From 04294770ae472e59655b5a66eb51b358b699e723 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sun, 7 Aug 2022 01:36:48 +0900 Subject: [PATCH 01/29] =?UTF-8?q?C=20API=E3=81=AE=E6=94=B9=E5=96=84?= =?UTF-8?q?=E3=82=92=E8=A1=8C=E3=81=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From 60bc0861b2061d33a8f72e6de41f47b3805e4d3b Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sat, 20 Aug 2022 10:50:36 +0900 Subject: [PATCH 02/29] =?UTF-8?q?C=20API=E5=AE=9A=E7=BE=A9=E3=82=92?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core_c_api/src/lib.rs | 120 ++++++++++---------------- 1 file changed, 46 insertions(+), 74 deletions(-) diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 37300bbde..5a4a69038 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -79,18 +79,22 @@ fn convert_result(result: Result) -> (Option, VoicevoxResultCode) { } } -// FIXME:各関数の戻り値をboolからVoicevoxResultCodeに変えてこのstatic変数を削除する -static ERROR_MESSAGE: Lazy> = Lazy::new(|| Mutex::new(String::new())); - -fn set_message(message: &str) { - ERROR_MESSAGE - .lock() - .unwrap() - .replace_range(.., &format!("{}\0", message)); +#[repr(C)] +pub struct VoicevoxInitializeOptions { + use_cuda: bool, + cpu_num_threads: u32, + load_all_models: bool, + open_jtalk_dict_dir: *const c_char, } #[no_mangle] -pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_models: bool) -> bool { +pub extern "C" fn voicevox_default_initialize_options() -> VoicevoxInitializeOptions { + unimplemented!() +} + +#[no_mangle] +pub extern "C" fn voicevox_initialize(options: VoicevoxInitializeOptions) -> VoicevoxResultCode { + unimplemented!(); let result = lock_internal().initialize(use_gpu, cpu_num_threads as usize, load_all_models); //TODO: VoicevoxResultCodeを返すようにする if let Some(err) = result.err() { @@ -102,7 +106,8 @@ pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_mod } #[no_mangle] -pub extern "C" fn load_model(speaker_id: i64) -> bool { +pub extern "C" fn voicevox_load_model(speaker_id: i64) -> VoicevoxResultCode { + unimplemented!(); let result = lock_internal().load_model(speaker_id as usize); //TODO: VoicevoxResultCodeを返すようにする if let Some(err) = result.err() { @@ -114,37 +119,34 @@ pub extern "C" fn load_model(speaker_id: i64) -> bool { } #[no_mangle] -pub extern "C" fn is_model_loaded(speaker_id: i64) -> bool { +pub extern "C" fn voicevox_is_model_loaded(speaker_id: i64) -> VoicevoxResultCode { + unimplemented!(); lock_internal().is_model_loaded(speaker_id as usize) } #[no_mangle] -pub extern "C" fn finalize() { +pub extern "C" fn voicevox_finalize() { lock_internal().finalize() } #[no_mangle] -pub extern "C" fn metas() -> *const c_char { +pub extern "C" fn voicevox_get_metas_json() -> *const c_char { lock_internal().metas().as_ptr() } #[no_mangle] -pub extern "C" fn last_error_message() -> *const c_char { - ERROR_MESSAGE.lock().unwrap().as_ptr() as *const c_char -} - -#[no_mangle] -pub extern "C" fn supported_devices() -> *const c_char { +pub extern "C" fn voicevox_get_supported_devices_json() -> *const c_char { lock_internal().supported_devices().as_ptr() } #[no_mangle] -pub extern "C" fn yukarin_s_forward( +pub extern "C" fn voicevox_yukarin_s_forward( length: i64, phoneme_list: *mut i64, speaker_id: *mut i64, output: *mut f32, -) -> bool { +) -> VoicevoxResultCode { + unimplemented!(); let result = lock_internal().yukarin_s_forward( unsafe { std::slice::from_raw_parts_mut(phoneme_list, length as usize) }, unsafe { *speaker_id as usize }, @@ -164,7 +166,7 @@ pub extern "C" fn yukarin_s_forward( } #[no_mangle] -pub extern "C" fn yukarin_sa_forward( +pub extern "C" fn voicevox_yukarin_sa_forward( length: i64, vowel_phoneme_list: *mut i64, consonant_phoneme_list: *mut i64, @@ -174,7 +176,8 @@ pub extern "C" fn yukarin_sa_forward( end_accent_phrase_list: *mut i64, speaker_id: *mut i64, output: *mut f32, -) -> bool { +) -> VoicevoxResultCode { + unimplemented!(); let result = lock_internal().yukarin_sa_forward( length, unsafe { std::slice::from_raw_parts(vowel_phoneme_list, length as usize) }, @@ -200,7 +203,7 @@ pub extern "C" fn yukarin_sa_forward( } #[no_mangle] -pub extern "C" fn decode_forward( +pub extern "C" fn voicevox_decode_forward( length: i64, phoneme_size: i64, f0: *mut f32, @@ -232,24 +235,24 @@ pub extern "C" fn decode_forward( } } +#[repr(C)] +pub struct VoicevoxAudioQueryOptions { + kana: bool, +} + #[no_mangle] -pub extern "C" fn voicevox_load_openjtalk_dict(dict_path: *const c_char) -> VoicevoxResultCode { - let (_, result_code) = { - if let Ok(dict_path) = unsafe { CStr::from_ptr(dict_path) }.to_str() { - convert_result(lock_internal().voicevox_load_openjtalk_dict(dict_path)) - } else { - (None, VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT) - } - }; - result_code +pub extern "C" fn voicevox_default_audio_query_options() -> VoicevoxAudioQueryOptions { + unimplemented!() } #[no_mangle] pub extern "C" fn voicevox_audio_query( text: *const c_char, speaker_id: i64, + options: VoicevoxAudioQueryOptions, output_audio_query_json: *mut *mut c_char, ) -> VoicevoxResultCode { + unimplemented!(); let text = unsafe { CStr::from_ptr(text) }; let audio_query = &match create_audio_query(text, speaker_id, Internal::voicevox_audio_query) { @@ -263,26 +266,6 @@ pub extern "C" fn voicevox_audio_query( VoicevoxResultCode::VOICEVOX_RESULT_SUCCEED } -#[no_mangle] -pub extern "C" fn voicevox_audio_query_from_kana( - text: *const c_char, - speaker_id: i64, - output_audio_query_json: *mut *mut c_char, -) -> VoicevoxResultCode { - let text = unsafe { CStr::from_ptr(text) }; - - let audio_query = - &match create_audio_query(text, speaker_id, Internal::voicevox_audio_query_from_kana) { - Ok(audio_query) => audio_query, - Err(result_code) => return result_code, - }; - - unsafe { - write_json_to_ptr(output_audio_query_json, audio_query); - } - VoicevoxResultCode::VOICEVOX_RESULT_SUCCEED -} - fn create_audio_query( japanese_or_kana: &CStr, speaker_id: i64, @@ -355,38 +338,27 @@ fn ensure_utf8(s: &CStr) -> std::result::Result<&str, VoicevoxResultCode> { .map_err(|_| VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT) } +#[repr(C)] +pub struct VoicevoxTtsOptions { + kana: bool, +} + #[no_mangle] -pub extern "C" fn voicevox_tts( - text: *const c_char, - speaker_id: i64, - output_binary_size: *mut c_int, - output_wav: *mut *mut u8, -) -> VoicevoxResultCode { - let (output_opt, result_code) = { - if let Ok(text) = unsafe { CStr::from_ptr(text) }.to_str() { - convert_result(lock_internal().voicevox_tts(text, speaker_id as usize)) - } else { - (None, VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT) - } - }; - if let Some(output) = output_opt { - unsafe { - write_wav_to_ptr(output_wav, output_binary_size, output.as_slice()); - } - } - result_code +pub fn voicevox_default_tts_options() -> VoicevoxTtsOptions { + unimplemented!() } #[no_mangle] -pub extern "C" fn voicevox_tts_from_kana( +pub extern "C" fn voicevox_tts( text: *const c_char, speaker_id: i64, + options: VoicevoxTtsOptions, output_binary_size: *mut c_int, output_wav: *mut *mut u8, ) -> VoicevoxResultCode { let (output_opt, result_code) = { if let Ok(text) = unsafe { CStr::from_ptr(text) }.to_str() { - convert_result(lock_internal().voicevox_tts_from_kana(text, speaker_id as usize)) + convert_result(lock_internal().voicevox_tts(text, speaker_id as usize)) } else { (None, VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT) } From af7a3fead0a61c535a270df203eaac5a45d6ab1a Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sat, 20 Aug 2022 11:04:41 +0900 Subject: [PATCH 03/29] =?UTF-8?q?voicevox=5Fcore=E5=81=B4=E3=81=AE?= =?UTF-8?q?=E5=AE=9F=E8=A3=85=E3=82=82=E5=A4=89=E6=9B=B4=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/error.rs | 2 +- crates/voicevox_core/src/publish.rs | 92 ++++++++------------------- crates/voicevox_core_c_api/src/lib.rs | 18 +++--- 3 files changed, 38 insertions(+), 74 deletions(-) diff --git a/crates/voicevox_core/src/error.rs b/crates/voicevox_core/src/error.rs index 357127120..06b759206 100644 --- a/crates/voicevox_core/src/error.rs +++ b/crates/voicevox_core/src/error.rs @@ -58,7 +58,7 @@ pub enum Error { } fn base_error_message(result_code: VoicevoxResultCode) -> &'static str { - let c_message: &'static str = crate::voicevox_error_result_to_message(result_code); + let c_message: &'static str = crate::error_result_to_message(result_code); &c_message[..(c_message.len() - 1)] } diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index b11c8e294..99c0b3f22 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -6,9 +6,9 @@ use onnxruntime::{ session::{AnyArray, NdArray}, }; use result_code::VoicevoxResultCode; -use std::collections::BTreeMap; use std::ffi::CStr; use std::sync::Mutex; +use std::{collections::BTreeMap, path::PathBuf}; use status::*; use std::ffi::CString; @@ -32,12 +32,8 @@ impl VoicevoxCore { }) } - pub fn initialize( - &mut self, - use_gpu: bool, - cpu_num_threads: usize, - load_all_models: bool, - ) -> Result<()> { + pub fn initialize(&mut self, options: InitializeOptions) -> Result<()> { + unimplemented!(); self.synthesis_engine.inference_core_mut().initialize( use_gpu, cpu_num_threads, @@ -61,11 +57,11 @@ impl VoicevoxCore { self.synthesis_engine.inference_core_mut().finalize() } - pub fn metas(&self) -> &'static CStr { + pub fn get_metas_json(&self) -> &'static CStr { &METAS_CSTRING } - pub fn supported_devices(&self) -> &'static CStr { + pub fn get_supported_devices_json(&self) -> &'static CStr { &SUPPORTED_DEVICES_CSTRING } @@ -122,15 +118,7 @@ impl VoicevoxCore { ) } - pub fn voicevox_load_openjtalk_dict(&mut self, dict_path: &str) -> Result<()> { - self.synthesis_engine.load_openjtalk_dict(dict_path) - } - - pub fn voicevox_audio_query( - &mut self, - text: &str, - speaker_id: usize, - ) -> Result { + pub fn audio_query(&mut self, text: &str, speaker_id: usize) -> Result { if !self.synthesis_engine.is_openjtalk_dict_loaded() { return Err(Error::NotLoadedOpenjtalkDict); } @@ -152,48 +140,36 @@ impl VoicevoxCore { )) } - pub fn voicevox_audio_query_from_kana( - &mut self, - text: &str, - speaker_id: usize, - ) -> Result { - let accent_phrases = parse_kana(text)?; - let accent_phrases = self - .synthesis_engine - .replace_mora_data(&accent_phrases, speaker_id)?; - - Ok(AudioQueryModel::new( - accent_phrases, - 1., - 0., - 1., - 1., - 0.1, - 0.1, - SynthesisEngine::DEFAULT_SAMPLING_RATE, - false, - "".into(), - )) - } - - pub fn voicevox_synthesis( + pub fn synthesis( &mut self, audio_query: &AudioQueryModel, speaker_id: usize, + options: SynthesisOptions, ) -> Result> { + unimplemented!(); self.synthesis_engine .synthesis_wave_format(audio_query, speaker_id, true) // TODO: 疑問文化を設定可能にする } - pub fn voicevox_tts(&mut self, text: &str, speaker_id: usize) -> Result> { - let audio_query = &self.voicevox_audio_query(text, speaker_id)?; - self.voicevox_synthesis(audio_query, speaker_id) + pub fn tts(&mut self, text: &str, speaker_id: usize, options: TtsOptions) -> Result> { + let audio_query = &self.audio_query(text, speaker_id)?; + self.synthesis(audio_query, speaker_id) } +} - pub fn voicevox_tts_from_kana(&mut self, text: &str, speaker_id: usize) -> Result> { - let audio_query = &self.voicevox_audio_query_from_kana(text, speaker_id)?; - self.voicevox_synthesis(audio_query, speaker_id) - } +pub struct InitializeOptions { + use_cuda: bool, + cpu_num_threads: u32, + load_all_models: bool, + open_jtalk_dict_dir: Option, +} + +pub struct SynthesisOptions { + kana: bool, +} + +pub struct TtsOptions { + kana: bool, } #[derive(new)] @@ -506,7 +482,7 @@ fn get_model_index_and_speaker_id(speaker_id: usize) -> Option<(usize, usize)> { SPEAKER_ID_MAP.get(&speaker_id).copied() } -pub const fn voicevox_error_result_to_message(result_code: VoicevoxResultCode) -> &'static str { +pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'static str { // C APIのため、messageには必ず末尾にNULL文字を追加する use VoicevoxResultCode::*; match result_code { @@ -635,7 +611,7 @@ mod tests { #[rstest] fn supported_devices_works() { let internal = VoicevoxCore::new_with_mutex(); - let cstr_result = internal.lock().unwrap().supported_devices(); + let cstr_result = internal.lock().unwrap().get_supported_devices_json(); assert!(cstr_result.to_str().is_ok(), "{:?}", cstr_result); let json_result: std::result::Result = @@ -736,16 +712,4 @@ mod tests { assert!(result.is_ok(), "{:?}", result); assert_eq!(result.unwrap().len(), F0_LENGTH * 256); } - - #[rstest] - #[async_std::test] - async fn voicevox_load_openjtalk_dict_works() { - let internal = VoicevoxCore::new_with_mutex(); - let open_jtalk_dic_dir = download_open_jtalk_dict_if_no_exists().await; - let result = internal - .lock() - .unwrap() - .voicevox_load_openjtalk_dict(open_jtalk_dic_dir.to_str().unwrap()); - assert_eq!(result, Ok(())); - } } diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 5a4a69038..91550f843 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -22,9 +22,10 @@ fn lock_internal() -> MutexGuard<'static, Internal> { } /* - * Cの関数として公開するための型や関数を定義するこれらの実装はinternal.rsに定義してある同名関数にある - * この関数ではinternal.rsにある同名関数の呼び出しと、その戻り値をCの形式に変換する処理のみとする + * Cの関数として公開するための型や関数を定義するこれらの実装はvoicevox_core/publish.rsに定義してある対応する関数にある + * この関数ではvoicevox_core/publish.rsにある対応する関数の呼び出しと、その戻り値をCの形式に変換する処理のみとする * これはC文脈の処理と実装をわけるためと、内部実装の変更がAPIに影響を与えにくくするためである + * voicevox_core/publish.rsにある対応する関数とはこのファイルに定義してある公開関数からvoicevoxプレフィックスを取り除いた名前の関数である */ pub use voicevox_core::result_code::VoicevoxResultCode; @@ -131,12 +132,12 @@ pub extern "C" fn voicevox_finalize() { #[no_mangle] pub extern "C" fn voicevox_get_metas_json() -> *const c_char { - lock_internal().metas().as_ptr() + lock_internal().get_metas_json().as_ptr() } #[no_mangle] pub extern "C" fn voicevox_get_supported_devices_json() -> *const c_char { - lock_internal().supported_devices().as_ptr() + lock_internal().get_supported_devices_json().as_ptr() } #[no_mangle] @@ -255,7 +256,7 @@ pub extern "C" fn voicevox_audio_query( unimplemented!(); let text = unsafe { CStr::from_ptr(text) }; - let audio_query = &match create_audio_query(text, speaker_id, Internal::voicevox_audio_query) { + let audio_query = &match create_audio_query(text, speaker_id, Internal::audio_query) { Ok(audio_query) => audio_query, Err(result_code) => return result_code, }; @@ -319,8 +320,7 @@ pub extern "C" fn voicevox_synthesis( let speaker_id = speaker_id as usize; - let (wav, result_code) = - convert_result(lock_internal().voicevox_synthesis(audio_query, speaker_id)); + let (wav, result_code) = convert_result(lock_internal().synthesis(audio_query, speaker_id)); let wav = &if let Some(wav) = wav { wav } else { @@ -358,7 +358,7 @@ pub extern "C" fn voicevox_tts( ) -> VoicevoxResultCode { let (output_opt, result_code) = { if let Ok(text) = unsafe { CStr::from_ptr(text) }.to_str() { - convert_result(lock_internal().voicevox_tts(text, speaker_id as usize)) + convert_result(lock_internal().tts(text, speaker_id as usize)) } else { (None, VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT) } @@ -389,7 +389,7 @@ pub extern "C" fn voicevox_wav_free(wav: *mut u8) { pub extern "C" fn voicevox_error_result_to_message( result_code: VoicevoxResultCode, ) -> *const c_char { - voicevox_core::voicevox_error_result_to_message(result_code).as_ptr() as *const c_char + voicevox_core::error_result_to_message(result_code).as_ptr() as *const c_char } #[cfg(test)] From 466fdd99d6aa81cf4f26315b096529b3688e8a24 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sun, 21 Aug 2022 10:33:56 +0900 Subject: [PATCH 04/29] =?UTF-8?q?cpu=5Fnum=5Fthreads=E3=82=92u16=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 2 +- crates/voicevox_core_c_api/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 99c0b3f22..9b134a175 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -159,7 +159,7 @@ impl VoicevoxCore { pub struct InitializeOptions { use_cuda: bool, - cpu_num_threads: u32, + cpu_num_threads: u16, load_all_models: bool, open_jtalk_dict_dir: Option, } diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 91550f843..9bc0e2800 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -83,7 +83,7 @@ fn convert_result(result: Result) -> (Option, VoicevoxResultCode) { #[repr(C)] pub struct VoicevoxInitializeOptions { use_cuda: bool, - cpu_num_threads: u32, + cpu_num_threads: u16, load_all_models: bool, open_jtalk_dict_dir: *const c_char, } From b5635cf48257860fc2de41256a2e54e1eae9f77c Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sun, 21 Aug 2022 17:27:39 +0900 Subject: [PATCH 05/29] =?UTF-8?q?helper=E9=96=A2=E6=95=B0=E3=82=92?= =?UTF-8?q?=E5=88=86=E9=9B=A2=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core_c_api/src/helpers.rs | 92 +++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 crates/voicevox_core_c_api/src/helpers.rs diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs new file mode 100644 index 000000000..80c81a6a7 --- /dev/null +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -0,0 +1,92 @@ +use super::*; + +pub(crate) fn convert_result(result: Result) -> (Option, VoicevoxResultCode) { + match result { + Ok(target) => (Some(target), VoicevoxResultCode::VOICEVOX_RESULT_SUCCEED), + Err(err) => { + eprintln!("{}", err); + dbg!(&err); + match err { + Error::NotLoadedOpenjtalkDict => ( + None, + VoicevoxResultCode::VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT, + ), + Error::CantGpuSupport => { + (None, VoicevoxResultCode::VOICEVOX_RESULT_CANT_GPU_SUPPORT) + } + Error::LoadModel(_) => { + (None, VoicevoxResultCode::VOICEVOX_RESULT_FAILED_LOAD_MODEL) + } + Error::LoadMetas(_) => { + (None, VoicevoxResultCode::VOICEVOX_RESULT_FAILED_LOAD_METAS) + } + Error::GetSupportedDevices(_) => ( + None, + VoicevoxResultCode::VOICEVOX_RESULT_FAILED_GET_SUPPORTED_DEVICES, + ), + Error::UninitializedStatus => ( + None, + VoicevoxResultCode::VOICEVOX_RESULT_UNINITIALIZED_STATUS, + ), + Error::InvalidSpeakerId { .. } => { + (None, VoicevoxResultCode::VOICEVOX_RESULT_INVALID_SPEAKER_ID) + } + Error::InvalidModelIndex { .. } => ( + None, + VoicevoxResultCode::VOICEVOX_RESULT_INVALID_MODEL_INDEX, + ), + Error::InferenceFailed => { + (None, VoicevoxResultCode::VOICEVOX_RESULT_INFERENCE_FAILED) + } + Error::FailedExtractFullContextLabel(_) => ( + None, + VoicevoxResultCode::VOICEVOX_RESULT_FAILED_EXTRACT_FULL_CONTEXT_LABEL, + ), + Error::FailedParseKana(_) => { + (None, VoicevoxResultCode::VOICEVOX_RESULT_FAILED_PARSE_KANA) + } + } + } + } +} + +pub(crate) fn create_audio_query( + japanese_or_kana: &CStr, + speaker_id: i64, + method: fn(&mut Internal, &str, usize) -> Result, +) -> std::result::Result { + let japanese_or_kana = ensure_utf8(japanese_or_kana)?; + let speaker_id = speaker_id as usize; + + let (audio_query, result_code) = + convert_result(method(&mut lock_internal(), japanese_or_kana, speaker_id)); + let audio_query = audio_query.ok_or(result_code)?; + Ok(CString::new(audio_query_model_to_json(&audio_query)).expect("should not contain '\\0'")) +} + +fn audio_query_model_to_json(audio_query_model: &AudioQueryModel) -> String { + serde_json::to_string(audio_query_model).expect("should be always valid") +} + +pub(crate) unsafe fn write_json_to_ptr(output_ptr: *mut *mut c_char, json: &CStr) { + let n = json.to_bytes_with_nul().len(); + let json_heap = libc::malloc(n); + libc::memcpy(json_heap, json.as_ptr() as *const c_void, n); + output_ptr.write(json_heap as *mut c_char); +} + +pub(crate) unsafe fn write_wav_to_ptr( + output_wav_ptr: *mut *mut u8, + output_size_ptr: *mut c_int, + data: &[u8], +) { + output_size_ptr.write(data.len() as c_int); + let wav_heap = libc::malloc(data.len()); + libc::memcpy(wav_heap, data.as_ptr() as *const c_void, data.len()); + output_wav_ptr.write(wav_heap as *mut u8); +} + +pub(crate) fn ensure_utf8(s: &CStr) -> std::result::Result<&str, VoicevoxResultCode> { + s.to_str() + .map_err(|_| VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT) +} From 43e2c519e4d3219180cb14269545024a4e0ee46c Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sun, 21 Aug 2022 17:27:49 +0900 Subject: [PATCH 06/29] =?UTF-8?q?SynthesisOptions=E3=81=AF=E4=B8=8D?= =?UTF-8?q?=E8=A6=81=E3=81=9D=E3=81=86=E3=81=A0=E3=81=A3=E3=81=9F=E3=81=AE?= =?UTF-8?q?=E3=81=A7=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 5 -- crates/voicevox_core_c_api/src/lib.rs | 89 +-------------------------- 2 files changed, 2 insertions(+), 92 deletions(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 9b134a175..bdd3910d7 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -144,7 +144,6 @@ impl VoicevoxCore { &mut self, audio_query: &AudioQueryModel, speaker_id: usize, - options: SynthesisOptions, ) -> Result> { unimplemented!(); self.synthesis_engine @@ -164,10 +163,6 @@ pub struct InitializeOptions { open_jtalk_dict_dir: Option, } -pub struct SynthesisOptions { - kana: bool, -} - pub struct TtsOptions { kana: bool, } diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 9bc0e2800..240ddc093 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -1,6 +1,8 @@ // TODO: ドキュメントを作成する段階になったらこのallowを外し、各pointerを使用している関数にunsafeとSafety documentを追加する #![allow(clippy::not_unsafe_ptr_arg_deref)] +mod helpers; +use helpers::*; use libc::c_void; use once_cell::sync::Lazy; use std::ffi::{CStr, CString}; @@ -30,56 +32,6 @@ fn lock_internal() -> MutexGuard<'static, Internal> { pub use voicevox_core::result_code::VoicevoxResultCode; -fn convert_result(result: Result) -> (Option, VoicevoxResultCode) { - match result { - Ok(target) => (Some(target), VoicevoxResultCode::VOICEVOX_RESULT_SUCCEED), - Err(err) => { - eprintln!("{}", err); - dbg!(&err); - match err { - Error::NotLoadedOpenjtalkDict => ( - None, - VoicevoxResultCode::VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT, - ), - Error::CantGpuSupport => { - (None, VoicevoxResultCode::VOICEVOX_RESULT_CANT_GPU_SUPPORT) - } - Error::LoadModel(_) => { - (None, VoicevoxResultCode::VOICEVOX_RESULT_FAILED_LOAD_MODEL) - } - Error::LoadMetas(_) => { - (None, VoicevoxResultCode::VOICEVOX_RESULT_FAILED_LOAD_METAS) - } - Error::GetSupportedDevices(_) => ( - None, - VoicevoxResultCode::VOICEVOX_RESULT_FAILED_GET_SUPPORTED_DEVICES, - ), - Error::UninitializedStatus => ( - None, - VoicevoxResultCode::VOICEVOX_RESULT_UNINITIALIZED_STATUS, - ), - Error::InvalidSpeakerId { .. } => { - (None, VoicevoxResultCode::VOICEVOX_RESULT_INVALID_SPEAKER_ID) - } - Error::InvalidModelIndex { .. } => ( - None, - VoicevoxResultCode::VOICEVOX_RESULT_INVALID_MODEL_INDEX, - ), - Error::InferenceFailed => { - (None, VoicevoxResultCode::VOICEVOX_RESULT_INFERENCE_FAILED) - } - Error::FailedExtractFullContextLabel(_) => ( - None, - VoicevoxResultCode::VOICEVOX_RESULT_FAILED_EXTRACT_FULL_CONTEXT_LABEL, - ), - Error::FailedParseKana(_) => { - (None, VoicevoxResultCode::VOICEVOX_RESULT_FAILED_PARSE_KANA) - } - } - } - } -} - #[repr(C)] pub struct VoicevoxInitializeOptions { use_cuda: bool, @@ -267,38 +219,6 @@ pub extern "C" fn voicevox_audio_query( VoicevoxResultCode::VOICEVOX_RESULT_SUCCEED } -fn create_audio_query( - japanese_or_kana: &CStr, - speaker_id: i64, - method: fn(&mut Internal, &str, usize) -> Result, -) -> std::result::Result { - let japanese_or_kana = ensure_utf8(japanese_or_kana)?; - let speaker_id = speaker_id as usize; - - let (audio_query, result_code) = - convert_result(method(&mut lock_internal(), japanese_or_kana, speaker_id)); - let audio_query = audio_query.ok_or(result_code)?; - Ok(CString::new(audio_query_model_to_json(&audio_query)).expect("should not contain '\\0'")) -} - -fn audio_query_model_to_json(audio_query_model: &AudioQueryModel) -> String { - serde_json::to_string(audio_query_model).expect("should be always valid") -} - -unsafe fn write_json_to_ptr(output_ptr: *mut *mut c_char, json: &CStr) { - let n = json.to_bytes_with_nul().len(); - let json_heap = libc::malloc(n); - libc::memcpy(json_heap, json.as_ptr() as *const c_void, n); - output_ptr.write(json_heap as *mut c_char); -} - -unsafe fn write_wav_to_ptr(output_wav_ptr: *mut *mut u8, output_size_ptr: *mut c_int, data: &[u8]) { - output_size_ptr.write(data.len() as c_int); - let wav_heap = libc::malloc(data.len()); - libc::memcpy(wav_heap, data.as_ptr() as *const c_void, data.len()); - output_wav_ptr.write(wav_heap as *mut u8); -} - #[no_mangle] pub extern "C" fn voicevox_synthesis( audio_query_json: *const c_char, @@ -333,11 +253,6 @@ pub extern "C" fn voicevox_synthesis( VoicevoxResultCode::VOICEVOX_RESULT_SUCCEED } -fn ensure_utf8(s: &CStr) -> std::result::Result<&str, VoicevoxResultCode> { - s.to_str() - .map_err(|_| VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT) -} - #[repr(C)] pub struct VoicevoxTtsOptions { kana: bool, From b5cd93ee85585a884281a8de11106b9efb4bca4c Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sun, 21 Aug 2022 17:34:19 +0900 Subject: [PATCH 07/29] =?UTF-8?q?SynthesisOptions=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 3 +++ crates/voicevox_core_c_api/src/lib.rs | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index bdd3910d7..056afe852 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -143,6 +143,7 @@ impl VoicevoxCore { pub fn synthesis( &mut self, audio_query: &AudioQueryModel, + options: SynthesisOptions, speaker_id: usize, ) -> Result> { unimplemented!(); @@ -163,6 +164,8 @@ pub struct InitializeOptions { open_jtalk_dict_dir: Option, } +pub struct SynthesisOptions {} + pub struct TtsOptions { kana: bool, } diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 240ddc093..cafe71b7f 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -219,10 +219,14 @@ pub extern "C" fn voicevox_audio_query( VoicevoxResultCode::VOICEVOX_RESULT_SUCCEED } +#[repr(C)] +pub struct VoicevoxSynthesisOptions {} + #[no_mangle] pub extern "C" fn voicevox_synthesis( audio_query_json: *const c_char, speaker_id: i64, + options: VoicevoxSynthesisOptions, output_binary_size: *mut c_int, output_wav: *mut *mut u8, ) -> VoicevoxResultCode { From 2ece20685cc60c99fedd0a538d24dccb2ba150e5 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sun, 21 Aug 2022 17:40:10 +0900 Subject: [PATCH 08/29] =?UTF-8?q?decode=5Fforward=20->=20decode=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../voicevox_core/src/engine/synthesis_engine.rs | 2 +- crates/voicevox_core/src/publish.rs | 15 +++++++-------- crates/voicevox_core_c_api/src/lib.rs | 6 +++--- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/crates/voicevox_core/src/engine/synthesis_engine.rs b/crates/voicevox_core/src/engine/synthesis_engine.rs index e3fae420d..f8286ccfa 100644 --- a/crates/voicevox_core/src/engine/synthesis_engine.rs +++ b/crates/voicevox_core/src/engine/synthesis_engine.rs @@ -410,7 +410,7 @@ impl SynthesisEngine { // 2次元のvectorを1次元に変換し、アドレスを連続させる let flatten_phoneme = phoneme.into_iter().flatten().collect::>(); - self.inference_core_mut().decode_forward( + self.inference_core_mut().decode( f0.len(), OjtPhoneme::num_phoneme(), &f0, diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 056afe852..b3d39c518 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -101,7 +101,7 @@ impl VoicevoxCore { ) } - pub fn decode_forward( + pub fn decode( &mut self, length: usize, phoneme_size: usize, @@ -109,7 +109,7 @@ impl VoicevoxCore { phoneme: &[f32], speaker_id: usize, ) -> Result> { - self.synthesis_engine.inference_core_mut().decode_forward( + self.synthesis_engine.inference_core_mut().decode( length, phoneme_size, f0, @@ -350,7 +350,7 @@ impl InferenceCore { status.yukarin_sa_session_run(model_index, input_tensors) } - pub fn decode_forward( + pub fn decode( &mut self, length: usize, phoneme_size: usize, @@ -701,11 +701,10 @@ mod tests { set_one(30, 45..60); set_one(0, 60..69); - let result = - internal - .lock() - .unwrap() - .decode_forward(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, 0); + let result = internal + .lock() + .unwrap() + .decode(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, 0); assert!(result.is_ok(), "{:?}", result); assert_eq!(result.unwrap().len(), F0_LENGTH * 256); diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index cafe71b7f..47ffe48a8 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -156,17 +156,17 @@ pub extern "C" fn voicevox_yukarin_sa_forward( } #[no_mangle] -pub extern "C" fn voicevox_decode_forward( +pub extern "C" fn voicevox_decode( length: i64, phoneme_size: i64, f0: *mut f32, phoneme: *mut f32, speaker_id: *mut i64, output: *mut f32, -) -> bool { +) -> VoicevoxResultCode { let length = length as usize; let phoneme_size = phoneme_size as usize; - let result = lock_internal().decode_forward( + let result = lock_internal().decode( length, phoneme_size, unsafe { std::slice::from_raw_parts(f0, length) }, From 3a80b7e424037f995eeec190705696c06160bda9 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sun, 21 Aug 2022 17:41:57 +0900 Subject: [PATCH 09/29] =?UTF-8?q?yukarin=5Fsa=5Fforward=20->=20predict=5Fi?= =?UTF-8?q?ntonation=20=E3=81=AB=E5=A4=89=E6=9B=B4=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/engine/synthesis_engine.rs | 2 +- crates/voicevox_core/src/publish.rs | 8 ++++---- crates/voicevox_core_c_api/src/lib.rs | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/voicevox_core/src/engine/synthesis_engine.rs b/crates/voicevox_core/src/engine/synthesis_engine.rs index f8286ccfa..0bcef0160 100644 --- a/crates/voicevox_core/src/engine/synthesis_engine.rs +++ b/crates/voicevox_core/src/engine/synthesis_engine.rs @@ -250,7 +250,7 @@ impl SynthesisEngine { end_accent_phrase_list.push(base_end_accent_phrase_list[vowel_index as usize]); } - let mut f0_list = self.inference_core_mut().yukarin_sa_forward( + let mut f0_list = self.inference_core_mut().predict_intonation( vowel_phoneme_list.len() as i64, &vowel_phoneme_list, &consonant_phoneme_list, diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index b3d39c518..63c729ee1 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -76,7 +76,7 @@ impl VoicevoxCore { } #[allow(clippy::too_many_arguments)] - pub fn yukarin_sa_forward( + pub fn predict_intonation( &mut self, length: i64, vowel_phoneme_list: &[i64], @@ -89,7 +89,7 @@ impl VoicevoxCore { ) -> Result> { self.synthesis_engine .inference_core_mut() - .yukarin_sa_forward( + .predict_intonation( length, vowel_phoneme_list, consonant_phoneme_list, @@ -291,7 +291,7 @@ impl InferenceCore { } #[allow(clippy::too_many_arguments)] - pub fn yukarin_sa_forward( + pub fn predict_intonation( &mut self, length: i64, vowel_phoneme_list: &[i64], @@ -659,7 +659,7 @@ mod tests { let start_accent_phrase_list = [0, 1, 0, 0, 0]; let end_accent_phrase_list = [0, 0, 0, 1, 0]; - let result = internal.lock().unwrap().yukarin_sa_forward( + let result = internal.lock().unwrap().predict_intonation( vowel_phoneme_list.len() as i64, &vowel_phoneme_list, &consonant_phoneme_list, diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 47ffe48a8..97824547b 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -119,7 +119,7 @@ pub extern "C" fn voicevox_yukarin_s_forward( } #[no_mangle] -pub extern "C" fn voicevox_yukarin_sa_forward( +pub extern "C" fn voicevox_predict_intonation( length: i64, vowel_phoneme_list: *mut i64, consonant_phoneme_list: *mut i64, @@ -131,7 +131,7 @@ pub extern "C" fn voicevox_yukarin_sa_forward( output: *mut f32, ) -> VoicevoxResultCode { unimplemented!(); - let result = lock_internal().yukarin_sa_forward( + let result = lock_internal().predict_intonation( length, unsafe { std::slice::from_raw_parts(vowel_phoneme_list, length as usize) }, unsafe { std::slice::from_raw_parts(consonant_phoneme_list, length as usize) }, From e3a34690b0500b3a7dda72e717486debf5d200c5 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sun, 21 Aug 2022 17:44:03 +0900 Subject: [PATCH 10/29] =?UTF-8?q?yukarin=5Fs=5Fforward=20->=20predict=5Fdu?= =?UTF-8?q?ration=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/engine/synthesis_engine.rs | 2 +- crates/voicevox_core/src/publish.rs | 8 ++++---- crates/voicevox_core_c_api/src/lib.rs | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/voicevox_core/src/engine/synthesis_engine.rs b/crates/voicevox_core/src/engine/synthesis_engine.rs index 0bcef0160..1ea88fe5f 100644 --- a/crates/voicevox_core/src/engine/synthesis_engine.rs +++ b/crates/voicevox_core/src/engine/synthesis_engine.rs @@ -136,7 +136,7 @@ impl SynthesisEngine { .collect(); let phoneme_length = self .inference_core_mut() - .yukarin_s_forward(&phoneme_list_s, speaker_id)?; + .predict_duration(&phoneme_list_s, speaker_id)?; let mut index = 0; let new_accent_phrases = accent_phrases diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 63c729ee1..5d654beda 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -65,14 +65,14 @@ impl VoicevoxCore { &SUPPORTED_DEVICES_CSTRING } - pub fn yukarin_s_forward( + pub fn predict_duration( &mut self, phoneme_list: &[i64], speaker_id: usize, ) -> Result> { self.synthesis_engine .inference_core_mut() - .yukarin_s_forward(phoneme_list, speaker_id) + .predict_duration(phoneme_list, speaker_id) } #[allow(clippy::too_many_arguments)] @@ -244,7 +244,7 @@ impl InferenceCore { self.status_option = None; } - pub fn yukarin_s_forward( + pub fn predict_duration( &mut self, phoneme_list: &[i64], speaker_id: usize, @@ -640,7 +640,7 @@ mod tests { 30, 35, 14, 23, 7, 21, 14, 43, 30, 30, 23, 30, 35, 30, 0, ]; - let result = internal.lock().unwrap().yukarin_s_forward(&phoneme_list, 0); + let result = internal.lock().unwrap().predict_duration(&phoneme_list, 0); assert!(result.is_ok(), "{:?}", result); assert_eq!(result.unwrap().len(), phoneme_list.len()); diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 97824547b..3ea9ffd89 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -93,14 +93,14 @@ pub extern "C" fn voicevox_get_supported_devices_json() -> *const c_char { } #[no_mangle] -pub extern "C" fn voicevox_yukarin_s_forward( +pub extern "C" fn voicevox_predict_duration( length: i64, phoneme_list: *mut i64, speaker_id: *mut i64, output: *mut f32, ) -> VoicevoxResultCode { unimplemented!(); - let result = lock_internal().yukarin_s_forward( + let result = lock_internal().predict_duration( unsafe { std::slice::from_raw_parts_mut(phoneme_list, length as usize) }, unsafe { *speaker_id as usize }, ); From 317d432ebdbc5f35e16fccb13aeb768748b68b1a Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Mon, 22 Aug 2022 21:18:08 +0900 Subject: [PATCH 11/29] =?UTF-8?q?=E3=81=A8=E3=82=8A=E3=81=82=E3=81=88?= =?UTF-8?q?=E3=81=9A=E3=82=B3=E3=83=B3=E3=83=91=E3=82=A4=E3=83=AB=E3=83=BB?= =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=81=8C=E9=80=9A=E3=82=8B=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/engine/synthesis_engine.rs | 2 +- crates/voicevox_core/src/publish.rs | 113 +++++++--- crates/voicevox_core/src/status.rs | 6 +- crates/voicevox_core_c_api/src/helpers.rs | 19 +- crates/voicevox_core_c_api/src/lib.rs | 194 ++++++++++-------- 5 files changed, 213 insertions(+), 121 deletions(-) diff --git a/crates/voicevox_core/src/engine/synthesis_engine.rs b/crates/voicevox_core/src/engine/synthesis_engine.rs index 1ea88fe5f..d0f0b3fad 100644 --- a/crates/voicevox_core/src/engine/synthesis_engine.rs +++ b/crates/voicevox_core/src/engine/synthesis_engine.rs @@ -251,7 +251,7 @@ impl SynthesisEngine { } let mut f0_list = self.inference_core_mut().predict_intonation( - vowel_phoneme_list.len() as i64, + vowel_phoneme_list.len(), &vowel_phoneme_list, &consonant_phoneme_list, &start_accent_list, diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 5d654beda..a8c088f7a 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -33,11 +33,10 @@ impl VoicevoxCore { } pub fn initialize(&mut self, options: InitializeOptions) -> Result<()> { - unimplemented!(); self.synthesis_engine.inference_core_mut().initialize( - use_gpu, - cpu_num_threads, - load_all_models, + options.use_gpu, + options.cpu_num_threads, + options.load_all_models, ) } @@ -78,7 +77,7 @@ impl VoicevoxCore { #[allow(clippy::too_many_arguments)] pub fn predict_intonation( &mut self, - length: i64, + length: usize, vowel_phoneme_list: &[i64], consonant_phoneme_list: &[i64], start_accent_list: &[i64], @@ -118,13 +117,21 @@ impl VoicevoxCore { ) } - pub fn audio_query(&mut self, text: &str, speaker_id: usize) -> Result { + pub fn audio_query( + &mut self, + text: &str, + speaker_id: usize, + options: AudioQueryOptions, + ) -> Result { if !self.synthesis_engine.is_openjtalk_dict_loaded() { return Err(Error::NotLoadedOpenjtalkDict); } - let accent_phrases = self - .synthesis_engine - .create_accent_phrases(text, speaker_id)?; + let accent_phrases = if options.kana { + parse_kana(text)? + } else { + self.synthesis_engine + .create_accent_phrases(text, speaker_id)? + }; Ok(AudioQueryModel::new( accent_phrases, @@ -143,31 +150,57 @@ impl VoicevoxCore { pub fn synthesis( &mut self, audio_query: &AudioQueryModel, - options: SynthesisOptions, speaker_id: usize, + options: SynthesisOptions, ) -> Result> { - unimplemented!(); self.synthesis_engine .synthesis_wave_format(audio_query, speaker_id, true) // TODO: 疑問文化を設定可能にする } pub fn tts(&mut self, text: &str, speaker_id: usize, options: TtsOptions) -> Result> { - let audio_query = &self.audio_query(text, speaker_id)?; - self.synthesis(audio_query, speaker_id) + let audio_query = &self.audio_query( + text, + speaker_id, + AudioQueryOptions::from_tts_options(&options), + )?; + self.synthesis( + audio_query, + speaker_id, + SynthesisOptions::from_tts_options(&options), + ) + } +} + +#[derive(Default)] +pub struct AudioQueryOptions { + pub kana: bool, +} + +impl AudioQueryOptions { + fn from_tts_options(options: &TtsOptions) -> Self { + Self { kana: options.kana } } } +#[derive(Default)] pub struct InitializeOptions { - use_cuda: bool, - cpu_num_threads: u16, - load_all_models: bool, - open_jtalk_dict_dir: Option, + pub use_gpu: bool, + pub cpu_num_threads: u16, + pub load_all_models: bool, + pub open_jtalk_dict_dir: Option, } pub struct SynthesisOptions {} +impl SynthesisOptions { + fn from_tts_options(_: &TtsOptions) -> Self { + Self {} + } +} + +#[derive(Default)] pub struct TtsOptions { - kana: bool, + pub kana: bool, } #[derive(new)] @@ -180,7 +213,7 @@ impl InferenceCore { pub fn initialize( &mut self, use_gpu: bool, - cpu_num_threads: usize, + cpu_num_threads: u16, load_all_models: bool, ) -> Result<()> { self.initialized = false; @@ -293,7 +326,7 @@ impl InferenceCore { #[allow(clippy::too_many_arguments)] pub fn predict_intonation( &mut self, - length: i64, + length: usize, vowel_phoneme_list: &[i64], consonant_phoneme_list: &[i64], start_accent_list: &[i64], @@ -326,7 +359,7 @@ impl InferenceCore { return Err(Error::InvalidModelIndex { model_index }); } - let mut length_array = NdArray::new(ndarray::arr0(length)); + let mut length_array = NdArray::new(ndarray::arr0(length as i64)); let mut vowel_phoneme_list_array = NdArray::new(ndarray::arr1(vowel_phoneme_list)); let mut consonant_phoneme_list_array = NdArray::new(ndarray::arr1(consonant_phoneme_list)); let mut start_accent_list_array = NdArray::new(ndarray::arr1(start_accent_list)); @@ -521,7 +554,10 @@ mod tests { #[rstest] fn finalize_works() { let internal = VoicevoxCore::new_with_mutex(); - let result = internal.lock().unwrap().initialize(false, 0, false); + let result = internal + .lock() + .unwrap() + .initialize(InitializeOptions::default()); assert_eq!(Ok(()), result); internal.lock().unwrap().finalize(); assert_eq!( @@ -561,7 +597,7 @@ mod tests { internal .lock() .unwrap() - .initialize(false, 0, false) + .initialize(InitializeOptions::default()) .unwrap(); let result = internal.lock().unwrap().load_model(speaker_id); assert_eq!( @@ -584,7 +620,7 @@ mod tests { internal .lock() .unwrap() - .initialize(false, 0, false) + .initialize(InitializeOptions::default()) .unwrap(); assert!( !internal.lock().unwrap().is_model_loaded(speaker_id), @@ -632,7 +668,14 @@ mod tests { #[rstest] fn yukarin_s_forward_works() { let internal = VoicevoxCore::new_with_mutex(); - internal.lock().unwrap().initialize(false, 0, true).unwrap(); + internal + .lock() + .unwrap() + .initialize(InitializeOptions { + load_all_models: true, + ..Default::default() + }) + .unwrap(); // 「こんにちは、音声合成の世界へようこそ」という文章を変換して得た phoneme_list let phoneme_list = [ @@ -649,7 +692,14 @@ mod tests { #[rstest] fn yukarin_sa_forward_works() { let internal = VoicevoxCore::new_with_mutex(); - internal.lock().unwrap().initialize(false, 0, true).unwrap(); + internal + .lock() + .unwrap() + .initialize(InitializeOptions { + load_all_models: true, + ..Default::default() + }) + .unwrap(); // 「テスト」という文章に対応する入力 let vowel_phoneme_list = [0, 14, 6, 30, 0]; @@ -660,7 +710,7 @@ mod tests { let end_accent_phrase_list = [0, 0, 0, 1, 0]; let result = internal.lock().unwrap().predict_intonation( - vowel_phoneme_list.len() as i64, + vowel_phoneme_list.len(), &vowel_phoneme_list, &consonant_phoneme_list, &start_accent_list, @@ -677,7 +727,14 @@ mod tests { #[rstest] fn decode_forward_works() { let internal = VoicevoxCore::new_with_mutex(); - internal.lock().unwrap().initialize(false, 0, true).unwrap(); + internal + .lock() + .unwrap() + .initialize(InitializeOptions { + load_all_models: true, + ..Default::default() + }) + .unwrap(); // 「テスト」という文章に対応する入力 const F0_LENGTH: usize = 69; diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs index f87327efa..7a29ca051 100644 --- a/crates/voicevox_core/src/status.rs +++ b/crates/voicevox_core/src/status.rs @@ -29,7 +29,7 @@ struct StatusModels { #[derive(new, Getters)] struct SessionOptions { - cpu_num_threads: usize, + cpu_num_threads: u16, use_gpu: bool, } @@ -106,7 +106,7 @@ impl Status { pub const MODELS_COUNT: usize = Self::MODELS.len(); - pub fn new(use_gpu: bool, cpu_num_threads: usize) -> Self { + pub fn new(use_gpu: bool, cpu_num_threads: u16) -> Self { Self { models: StatusModels { yukarin_s: BTreeMap::new(), @@ -261,7 +261,7 @@ mod tests { #[case(false, 4)] #[case(false, 8)] #[case(false, 0)] - fn status_new_works(#[case] use_gpu: bool, #[case] cpu_num_threads: usize) { + fn status_new_works(#[case] use_gpu: bool, #[case] cpu_num_threads: u16) { let status = Status::new(use_gpu, cpu_num_threads); assert_eq!(false, status.light_session_options.use_gpu); assert_eq!(use_gpu, status.heavy_session_options.use_gpu); diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index 80c81a6a7..a33562be4 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -52,14 +52,23 @@ pub(crate) fn convert_result(result: Result) -> (Option, VoicevoxResult pub(crate) fn create_audio_query( japanese_or_kana: &CStr, - speaker_id: i64, - method: fn(&mut Internal, &str, usize) -> Result, + speaker_id: usize, + method: fn( + &mut Internal, + &str, + usize, + voicevox_core::AudioQueryOptions, + ) -> Result, + options: VoicevoxAudioQueryOptions, ) -> std::result::Result { let japanese_or_kana = ensure_utf8(japanese_or_kana)?; - let speaker_id = speaker_id as usize; - let (audio_query, result_code) = - convert_result(method(&mut lock_internal(), japanese_or_kana, speaker_id)); + let (audio_query, result_code) = convert_result(method( + &mut lock_internal(), + japanese_or_kana, + speaker_id, + options.into(), + )); let audio_query = audio_query.ok_or(result_code)?; Ok(CString::new(audio_query_model_to_json(&audio_query)).expect("should not contain '\\0'")) } diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 3ea9ffd89..abab8f174 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -7,6 +7,8 @@ use libc::c_void; use once_cell::sync::Lazy; use std::ffi::{CStr, CString}; use std::os::raw::{c_char, c_int}; +use std::path::PathBuf; +use std::ptr::null; use std::sync::{Mutex, MutexGuard}; use voicevox_core::AudioQueryModel; use voicevox_core::VoicevoxCore; @@ -34,47 +36,62 @@ pub use voicevox_core::result_code::VoicevoxResultCode; #[repr(C)] pub struct VoicevoxInitializeOptions { - use_cuda: bool, + use_gpu: bool, cpu_num_threads: u16, load_all_models: bool, open_jtalk_dict_dir: *const c_char, } +impl VoicevoxInitializeOptions { + fn from_default_options(options: voicevox_core::InitializeOptions) -> Self { + Self { + use_gpu: options.use_gpu, + cpu_num_threads: options.cpu_num_threads, + load_all_models: options.load_all_models, + open_jtalk_dict_dir: null(), + } + } + + fn try_into_options( + self, + ) -> std::result::Result { + let open_jtalk_dict_dir = ensure_utf8(unsafe { CStr::from_ptr(self.open_jtalk_dict_dir) })?; + Ok(voicevox_core::InitializeOptions { + use_gpu: self.use_gpu, + cpu_num_threads: self.cpu_num_threads, + load_all_models: self.load_all_models, + open_jtalk_dict_dir: Some(PathBuf::from(open_jtalk_dict_dir)), + }) + } +} + #[no_mangle] pub extern "C" fn voicevox_default_initialize_options() -> VoicevoxInitializeOptions { - unimplemented!() + VoicevoxInitializeOptions::from_default_options(voicevox_core::InitializeOptions::default()) } #[no_mangle] pub extern "C" fn voicevox_initialize(options: VoicevoxInitializeOptions) -> VoicevoxResultCode { - unimplemented!(); - let result = lock_internal().initialize(use_gpu, cpu_num_threads as usize, load_all_models); - //TODO: VoicevoxResultCodeを返すようにする - if let Some(err) = result.err() { - set_message(&format!("{}", err)); - false - } else { - true + match options.try_into_options() { + Ok(options) => { + let result = lock_internal().initialize(options); + let (_, result_code) = convert_result(result); + result_code + } + Err(result_code) => result_code, } } #[no_mangle] -pub extern "C" fn voicevox_load_model(speaker_id: i64) -> VoicevoxResultCode { - unimplemented!(); - let result = lock_internal().load_model(speaker_id as usize); - //TODO: VoicevoxResultCodeを返すようにする - if let Some(err) = result.err() { - set_message(&format!("{}", err)); - false - } else { - true - } +pub extern "C" fn voicevox_load_model(speaker_id: usize) -> VoicevoxResultCode { + let result = lock_internal().load_model(speaker_id); + let (_, result_code) = convert_result(result); + result_code } #[no_mangle] -pub extern "C" fn voicevox_is_model_loaded(speaker_id: i64) -> VoicevoxResultCode { - unimplemented!(); - lock_internal().is_model_loaded(speaker_id as usize) +pub extern "C" fn voicevox_is_model_loaded(speaker_id: usize) -> bool { + lock_internal().is_model_loaded(speaker_id) } #[no_mangle] @@ -94,74 +111,63 @@ pub extern "C" fn voicevox_get_supported_devices_json() -> *const c_char { #[no_mangle] pub extern "C" fn voicevox_predict_duration( - length: i64, + length: usize, phoneme_list: *mut i64, - speaker_id: *mut i64, + speaker_id: usize, output: *mut f32, ) -> VoicevoxResultCode { - unimplemented!(); let result = lock_internal().predict_duration( - unsafe { std::slice::from_raw_parts_mut(phoneme_list, length as usize) }, - unsafe { *speaker_id as usize }, + unsafe { std::slice::from_raw_parts_mut(phoneme_list, length) }, + speaker_id, ); - //TODO: VoicevoxResultCodeを返すようにする - match result { - Ok(output_vec) => { - let output_slice = unsafe { std::slice::from_raw_parts_mut(output, length as usize) }; + + let (output_vec, result_code) = convert_result(result); + if result_code == VoicevoxResultCode::VOICEVOX_RESULT_SUCCEED { + if let Some(output_vec) = output_vec { + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, length) }; output_slice.clone_from_slice(&output_vec); - true - } - Err(err) => { - set_message(&format!("{}", err)); - false } } + result_code } #[no_mangle] pub extern "C" fn voicevox_predict_intonation( - length: i64, + length: usize, vowel_phoneme_list: *mut i64, consonant_phoneme_list: *mut i64, start_accent_list: *mut i64, end_accent_list: *mut i64, start_accent_phrase_list: *mut i64, end_accent_phrase_list: *mut i64, - speaker_id: *mut i64, + speaker_id: usize, output: *mut f32, ) -> VoicevoxResultCode { - unimplemented!(); let result = lock_internal().predict_intonation( length, - unsafe { std::slice::from_raw_parts(vowel_phoneme_list, length as usize) }, - unsafe { std::slice::from_raw_parts(consonant_phoneme_list, length as usize) }, - unsafe { std::slice::from_raw_parts(start_accent_list, length as usize) }, - unsafe { std::slice::from_raw_parts(end_accent_list, length as usize) }, - unsafe { std::slice::from_raw_parts(start_accent_phrase_list, length as usize) }, - unsafe { std::slice::from_raw_parts(end_accent_phrase_list, length as usize) }, - unsafe { *speaker_id as usize }, + unsafe { std::slice::from_raw_parts(vowel_phoneme_list, length) }, + unsafe { std::slice::from_raw_parts(consonant_phoneme_list, length) }, + unsafe { std::slice::from_raw_parts(start_accent_list, length) }, + unsafe { std::slice::from_raw_parts(end_accent_list, length) }, + unsafe { std::slice::from_raw_parts(start_accent_phrase_list, length) }, + unsafe { std::slice::from_raw_parts(end_accent_phrase_list, length) }, + speaker_id, ); - //TODO: VoicevoxResultCodeを返すようにする - match result { - Ok(output_vec) => { - let output_slice = unsafe { std::slice::from_raw_parts_mut(output, length as usize) }; - output_slice.clone_from_slice(&output_vec); - true - } - Err(err) => { - set_message(&format!("{}", err)); - false - } + let (output_vec, result_code) = convert_result(result); + if let Some(output_vec) = output_vec { + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, length) }; + output_slice.clone_from_slice(&output_vec); } + result_code } #[no_mangle] pub extern "C" fn voicevox_decode( - length: i64, + length: usize, phoneme_size: i64, f0: *mut f32, phoneme: *mut f32, - speaker_id: *mut i64, + speaker_id: usize, output: *mut f32, ) -> VoicevoxResultCode { let length = length as usize; @@ -171,21 +177,14 @@ pub extern "C" fn voicevox_decode( phoneme_size, unsafe { std::slice::from_raw_parts(f0, length) }, unsafe { std::slice::from_raw_parts(phoneme, phoneme_size * length) }, - unsafe { *speaker_id as usize }, + speaker_id, ); - //TODO: VoicevoxResultCodeを返すようにする - match result { - Ok(output_vec) => { - let output_slice = - unsafe { std::slice::from_raw_parts_mut(output, (length as usize) * 256) }; - output_slice.clone_from_slice(&output_vec); - true - } - Err(err) => { - set_message(&format!("{}", err)); - false - } + let (output_vec, result_code) = convert_result(result); + if let Some(output_vec) = output_vec { + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, length) }; + output_slice.clone_from_slice(&output_vec); } + result_code } #[repr(C)] @@ -193,22 +192,32 @@ pub struct VoicevoxAudioQueryOptions { kana: bool, } +impl From for VoicevoxAudioQueryOptions { + fn from(options: voicevox_core::AudioQueryOptions) -> Self { + Self { kana: options.kana } + } +} +impl From for voicevox_core::AudioQueryOptions { + fn from(options: VoicevoxAudioQueryOptions) -> Self { + Self { kana: options.kana } + } +} + #[no_mangle] pub extern "C" fn voicevox_default_audio_query_options() -> VoicevoxAudioQueryOptions { - unimplemented!() + voicevox_core::AudioQueryOptions::default().into() } #[no_mangle] pub extern "C" fn voicevox_audio_query( text: *const c_char, - speaker_id: i64, + speaker_id: usize, options: VoicevoxAudioQueryOptions, output_audio_query_json: *mut *mut c_char, ) -> VoicevoxResultCode { - unimplemented!(); let text = unsafe { CStr::from_ptr(text) }; - let audio_query = &match create_audio_query(text, speaker_id, Internal::audio_query) { + let audio_query = &match create_audio_query(text, speaker_id, Internal::audio_query, options) { Ok(audio_query) => audio_query, Err(result_code) => return result_code, }; @@ -222,10 +231,16 @@ pub extern "C" fn voicevox_audio_query( #[repr(C)] pub struct VoicevoxSynthesisOptions {} +impl From for voicevox_core::SynthesisOptions { + fn from(_: VoicevoxSynthesisOptions) -> Self { + Self {} + } +} + #[no_mangle] pub extern "C" fn voicevox_synthesis( audio_query_json: *const c_char, - speaker_id: i64, + speaker_id: usize, options: VoicevoxSynthesisOptions, output_binary_size: *mut c_int, output_wav: *mut *mut u8, @@ -242,9 +257,8 @@ pub extern "C" fn voicevox_synthesis( return VoicevoxResultCode::VOICEVOX_RESULT_INVALID_AUDIO_QUERY; }; - let speaker_id = speaker_id as usize; - - let (wav, result_code) = convert_result(lock_internal().synthesis(audio_query, speaker_id)); + let (wav, result_code) = + convert_result(lock_internal().synthesis(audio_query, speaker_id, options.into())); let wav = &if let Some(wav) = wav { wav } else { @@ -262,22 +276,34 @@ pub struct VoicevoxTtsOptions { kana: bool, } +impl From for VoicevoxTtsOptions { + fn from(options: voicevox_core::TtsOptions) -> Self { + Self { kana: options.kana } + } +} + +impl From for voicevox_core::TtsOptions { + fn from(options: VoicevoxTtsOptions) -> Self { + Self { kana: options.kana } + } +} + #[no_mangle] pub fn voicevox_default_tts_options() -> VoicevoxTtsOptions { - unimplemented!() + voicevox_core::TtsOptions::default().into() } #[no_mangle] pub extern "C" fn voicevox_tts( text: *const c_char, - speaker_id: i64, + speaker_id: usize, options: VoicevoxTtsOptions, output_binary_size: *mut c_int, output_wav: *mut *mut u8, ) -> VoicevoxResultCode { let (output_opt, result_code) = { if let Ok(text) = unsafe { CStr::from_ptr(text) }.to_str() { - convert_result(lock_internal().tts(text, speaker_id as usize)) + convert_result(lock_internal().tts(text, speaker_id, options.into())) } else { (None, VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT) } From 911fdd68f30fad45c7171d5ae5de4a8242c01038 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Mon, 22 Aug 2022 21:27:03 +0900 Subject: [PATCH 12/29] =?UTF-8?q?clippy=E3=81=AE=E3=82=A8=E3=83=A9?= =?UTF-8?q?=E3=83=BC=E3=82=92=E4=BF=AE=E6=AD=A3=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 10 ++++++++-- crates/voicevox_core_c_api/src/lib.rs | 6 +++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index a8c088f7a..07296b120 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -37,7 +37,12 @@ impl VoicevoxCore { options.use_gpu, options.cpu_num_threads, options.load_all_models, - ) + )?; + if let Some(open_jtalk_dict_dir) = options.open_jtalk_dict_dir { + self.synthesis_engine + .load_openjtalk_dict(open_jtalk_dict_dir)?; + } + Ok(()) } pub fn load_model(&mut self, speaker_id: usize) -> Result<()> { @@ -151,7 +156,8 @@ impl VoicevoxCore { &mut self, audio_query: &AudioQueryModel, speaker_id: usize, - options: SynthesisOptions, + // TODO: SynthesisOptions を使用した機能を提供する + #[allow(unused_variables)] options: SynthesisOptions, ) -> Result> { self.synthesis_engine .synthesis_wave_format(audio_query, speaker_id, true) // TODO: 疑問文化を設定可能にする diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index abab8f174..565a4bea1 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -229,7 +229,11 @@ pub extern "C" fn voicevox_audio_query( } #[repr(C)] -pub struct VoicevoxSynthesisOptions {} +pub struct VoicevoxSynthesisOptions { + // improper_ctypes_definitions を避けるためフィールドを持っておく + // TODO: improper_ctypes_definitionsを使用した機能を作る + enable_interrogative_upspeak: bool, +} impl From for voicevox_core::SynthesisOptions { fn from(_: VoicevoxSynthesisOptions) -> Self { From 0a5db301050c0a602ac1bc6d062ed543cbd46420 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Mon, 22 Aug 2022 21:33:59 +0900 Subject: [PATCH 13/29] =?UTF-8?q?=E5=86=85=E9=83=A8=E3=81=AEyukarin=5Fs,yu?= =?UTF-8?q?karin=5Fsa=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6=E5=90=8D?= =?UTF-8?q?=E5=89=8D=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/include_models.rs | 8 +-- crates/voicevox_core/src/publish.rs | 8 +-- crates/voicevox_core/src/status.rs | 48 +++++++++--------- .../{yukarin_s.onnx => predict_duration.onnx} | Bin ...ukarin_sa.onnx => predict_intonation.onnx} | Bin 5 files changed, 33 insertions(+), 31 deletions(-) rename model/{yukarin_s.onnx => predict_duration.onnx} (100%) rename model/{yukarin_sa.onnx => predict_intonation.onnx} (100%) diff --git a/crates/voicevox_core/src/include_models.rs b/crates/voicevox_core/src/include_models.rs index 7dbb8f01c..f9c06ecc8 100644 --- a/crates/voicevox_core/src/include_models.rs +++ b/crates/voicevox_core/src/include_models.rs @@ -1,12 +1,12 @@ [ Model{ - yukarin_s_model: include_bytes!(concat!( + predict_duration_model: include_bytes!(concat!( env!("CARGO_WORKSPACE_DIR"), - "/model/yukarin_s.onnx" + "/model/predict_duration.onnx" )), - yukarin_sa_model: include_bytes!(concat!( + predict_intonation_model: include_bytes!(concat!( env!("CARGO_WORKSPACE_DIR"), - "/model/yukarin_sa.onnx" + "/model/predict_intonation.onnx" )), decode_model: include_bytes!(concat!( env!("CARGO_WORKSPACE_DIR"), diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 07296b120..5f9caff87 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -318,7 +318,7 @@ impl InferenceCore { let input_tensors: Vec<&mut dyn AnyArray> = vec![&mut phoneme_list_array, &mut speaker_id_array]; - let mut output = status.yukarin_s_session_run(model_index, input_tensors)?; + let mut output = status.predict_duration_session_run(model_index, input_tensors)?; for output_item in output.iter_mut() { if *output_item < PHONEME_LENGTH_MINIMAL { @@ -386,7 +386,7 @@ impl InferenceCore { &mut speaker_id_array, ]; - status.yukarin_sa_session_run(model_index, input_tensors) + status.predict_intonation_session_run(model_index, input_tensors) } pub fn decode( @@ -672,7 +672,7 @@ mod tests { } #[rstest] - fn yukarin_s_forward_works() { + fn predict_duration_works() { let internal = VoicevoxCore::new_with_mutex(); internal .lock() @@ -696,7 +696,7 @@ mod tests { } #[rstest] - fn yukarin_sa_forward_works() { + fn predict_intonation_works() { let internal = VoicevoxCore::new_with_mutex(); internal .lock() diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs index 7a29ca051..bf392658b 100644 --- a/crates/voicevox_core/src/status.rs +++ b/crates/voicevox_core/src/status.rs @@ -22,8 +22,8 @@ pub struct Status { } struct StatusModels { - yukarin_s: BTreeMap>, - yukarin_sa: BTreeMap>, + predict_duration: BTreeMap>, + predict_intonation: BTreeMap>, decode: BTreeMap>, } @@ -34,8 +34,8 @@ struct SessionOptions { } struct Model { - yukarin_s_model: &'static [u8], - yukarin_sa_model: &'static [u8], + predict_duration_model: &'static [u8], + predict_intonation_model: &'static [u8], decode_model: &'static [u8], } @@ -109,8 +109,8 @@ impl Status { pub fn new(use_gpu: bool, cpu_num_threads: u16) -> Self { Self { models: StatusModels { - yukarin_s: BTreeMap::new(), - yukarin_sa: BTreeMap::new(), + predict_duration: BTreeMap::new(), + predict_intonation: BTreeMap::new(), decode: BTreeMap::new(), }, light_session_options: SessionOptions::new(cpu_num_threads, false), @@ -135,20 +135,22 @@ impl Status { pub fn load_model(&mut self, model_index: usize) -> Result<()> { if model_index < Self::MODELS.len() { let model = &Self::MODELS[model_index]; - let yukarin_s_session = self - .new_session(model.yukarin_s_model, &self.light_session_options) + let predict_duration_session = self + .new_session(model.predict_duration_model, &self.light_session_options) .map_err(Error::LoadModel)?; - let yukarin_sa_session = self - .new_session(model.yukarin_sa_model, &self.light_session_options) + let predict_intonation_session = self + .new_session(model.predict_intonation_model, &self.light_session_options) .map_err(Error::LoadModel)?; let decode_model = self .new_session(model.decode_model, &self.heavy_session_options) .map_err(Error::LoadModel)?; - self.models.yukarin_s.insert(model_index, yukarin_s_session); self.models - .yukarin_sa - .insert(model_index, yukarin_sa_session); + .predict_duration + .insert(model_index, predict_duration_session); + self.models + .predict_intonation + .insert(model_index, predict_intonation_session); self.models.decode.insert(model_index, decode_model); @@ -159,8 +161,8 @@ impl Status { } pub fn is_model_loaded(&self, model_index: usize) -> bool { - self.models.yukarin_sa.contains_key(&model_index) - && self.models.yukarin_s.contains_key(&model_index) + self.models.predict_intonation.contains_key(&model_index) + && self.models.predict_duration.contains_key(&model_index) && self.models.decode.contains_key(&model_index) } @@ -198,12 +200,12 @@ impl Status { self.supported_styles.contains(&speaker_id) } - pub fn yukarin_s_session_run( + pub fn predict_duration_session_run( &mut self, model_index: usize, inputs: Vec<&mut dyn AnyArray>, ) -> Result> { - if let Some(model) = self.models.yukarin_s.get_mut(&model_index) { + if let Some(model) = self.models.predict_duration.get_mut(&model_index) { if let Ok(output_tensors) = model.run(inputs) { Ok(output_tensors[0].as_slice().unwrap().to_owned()) } else { @@ -214,12 +216,12 @@ impl Status { } } - pub fn yukarin_sa_session_run( + pub fn predict_intonation_session_run( &mut self, model_index: usize, inputs: Vec<&mut dyn AnyArray>, ) -> Result> { - if let Some(model) = self.models.yukarin_sa.get_mut(&model_index) { + if let Some(model) = self.models.predict_intonation.get_mut(&model_index) { if let Ok(output_tensors) = model.run(inputs) { Ok(output_tensors[0].as_slice().unwrap().to_owned()) } else { @@ -273,8 +275,8 @@ mod tests { cpu_num_threads, status.heavy_session_options.cpu_num_threads ); - assert!(status.models.yukarin_s.is_empty()); - assert!(status.models.yukarin_sa.is_empty()); + assert!(status.models.predict_duration.is_empty()); + assert!(status.models.predict_intonation.is_empty()); assert!(status.models.decode.is_empty()); assert!(status.supported_styles.is_empty()); } @@ -302,8 +304,8 @@ mod tests { let mut status = Status::new(false, 0); let result = status.load_model(0); assert_eq!(Ok(()), result); - assert_eq!(1, status.models.yukarin_s.len()); - assert_eq!(1, status.models.yukarin_sa.len()); + assert_eq!(1, status.models.predict_duration.len()); + assert_eq!(1, status.models.predict_intonation.len()); assert_eq!(1, status.models.decode.len()); } diff --git a/model/yukarin_s.onnx b/model/predict_duration.onnx similarity index 100% rename from model/yukarin_s.onnx rename to model/predict_duration.onnx diff --git a/model/yukarin_sa.onnx b/model/predict_intonation.onnx similarity index 100% rename from model/yukarin_sa.onnx rename to model/predict_intonation.onnx From 5fe6665fd84fd07d988aebdee311371eb53ec626 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Mon, 22 Aug 2022 21:34:46 +0900 Subject: [PATCH 14/29] =?UTF-8?q?decode=5Fforward=E3=81=A8=E3=81=AA?= =?UTF-8?q?=E3=81=A3=E3=81=A6=E3=81=84=E3=81=9F=E3=81=A8=E3=81=93=E3=82=8D?= =?UTF-8?q?=E3=82=92decode=E3=81=AB=E4=BF=AE=E6=AD=A3=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 5f9caff87..ba8edfe44 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -731,7 +731,7 @@ mod tests { } #[rstest] - fn decode_forward_works() { + fn decode_works() { let internal = VoicevoxCore::new_with_mutex(); internal .lock() From b62595403322439de46dea4fd15b6d0dca0cf9f9 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Mon, 22 Aug 2022 21:49:25 +0900 Subject: [PATCH 15/29] =?UTF-8?q?unix=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=81=8C=E9=80=9A=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core_c_api/src/lib.rs | 2 +- example/cpp/unix/simple_tts.cpp | 20 +++++++------------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 565a4bea1..e2d2ae145 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -293,7 +293,7 @@ impl From for voicevox_core::TtsOptions { } #[no_mangle] -pub fn voicevox_default_tts_options() -> VoicevoxTtsOptions { +pub extern "C" fn voicevox_default_tts_options() -> VoicevoxTtsOptions { voicevox_core::TtsOptions::default().into() } diff --git a/example/cpp/unix/simple_tts.cpp b/example/cpp/unix/simple_tts.cpp index 0e7e39add..fd08fd426 100644 --- a/example/cpp/unix/simple_tts.cpp +++ b/example/cpp/unix/simple_tts.cpp @@ -17,29 +17,23 @@ int main(int argc, char *argv[]) { std::cout << "coreの初期化中..." << std::endl; - if (!initialize(false, 0, true)) { + auto initialize_options = voicevox_default_initialize_options(); + initialize_options.load_all_models = true; + initialize_options.open_jtalk_dict_dir = open_jtalk_dict_path.c_str(); + if (voicevox_initialize(initialize_options) != VOICEVOX_RESULT_SUCCEED) { std::cout << "coreの初期化に失敗しました" << std::endl; return 1; } - VoicevoxResultCode result; - - std::cout << "openjtalk辞書の読み込み中..." << std::endl; - - result = voicevox_load_openjtalk_dict(open_jtalk_dict_path.c_str()); - if (result != VOICEVOX_RESULT_SUCCEED) { - std::cout << voicevox_error_result_to_message(result) << std::endl; - return 1; - } - std::cout << "音声生成中..." << std::endl; int64_t speaker_id = 0; int output_binary_size = 0; uint8_t *output_wav = nullptr; - result = - voicevox_tts(text.c_str(), speaker_id, &output_binary_size, &output_wav); + auto result = + voicevox_tts(text.c_str(), speaker_id, voicevox_default_tts_options(), + &output_binary_size, &output_wav); if (result != VOICEVOX_RESULT_SUCCEED) { std::cout << voicevox_error_result_to_message(result) << std::endl; return 1; From 01d6ecae107210d2af79205a42c96f3533f7e390 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Mon, 22 Aug 2022 21:57:05 +0900 Subject: [PATCH 16/29] =?UTF-8?q?API=E5=AE=9A=E7=BE=A9=E3=81=A8=E3=81=9D?= =?UTF-8?q?=E3=81=93=E3=81=BE=E3=81=A7=E9=96=A2=E4=BF=82=E3=81=AA=E3=81=84?= =?UTF-8?q?=E5=AE=9F=E8=A3=85=E3=81=AFhelper=E3=81=AB=E7=A7=BB=E5=8B=95?= =?UTF-8?q?=E3=81=95=E3=81=9B=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core_c_api/src/helpers.rs | 17 +++++++++++++++++ crates/voicevox_core_c_api/src/lib.rs | 17 ----------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index a33562be4..fbbd7707b 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -99,3 +99,20 @@ pub(crate) fn ensure_utf8(s: &CStr) -> std::result::Result<&str, VoicevoxResultC s.to_str() .map_err(|_| VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT) } + +impl From for VoicevoxAudioQueryOptions { + fn from(options: voicevox_core::AudioQueryOptions) -> Self { + Self { kana: options.kana } + } +} +impl From for voicevox_core::AudioQueryOptions { + fn from(options: VoicevoxAudioQueryOptions) -> Self { + Self { kana: options.kana } + } +} + +impl From for voicevox_core::SynthesisOptions { + fn from(_: VoicevoxSynthesisOptions) -> Self { + Self {} + } +} diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index e2d2ae145..94487b959 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -192,17 +192,6 @@ pub struct VoicevoxAudioQueryOptions { kana: bool, } -impl From for VoicevoxAudioQueryOptions { - fn from(options: voicevox_core::AudioQueryOptions) -> Self { - Self { kana: options.kana } - } -} -impl From for voicevox_core::AudioQueryOptions { - fn from(options: VoicevoxAudioQueryOptions) -> Self { - Self { kana: options.kana } - } -} - #[no_mangle] pub extern "C" fn voicevox_default_audio_query_options() -> VoicevoxAudioQueryOptions { voicevox_core::AudioQueryOptions::default().into() @@ -235,12 +224,6 @@ pub struct VoicevoxSynthesisOptions { enable_interrogative_upspeak: bool, } -impl From for voicevox_core::SynthesisOptions { - fn from(_: VoicevoxSynthesisOptions) -> Self { - Self {} - } -} - #[no_mangle] pub extern "C" fn voicevox_synthesis( audio_query_json: *const c_char, From 7f043487aa6fdb872f9097cf10c2c8bc9e15f3e8 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Tue, 23 Aug 2022 00:50:33 +0900 Subject: [PATCH 17/29] =?UTF-8?q?VoicevoxInitializeOptions=E3=82=92?= =?UTF-8?q?=E3=82=B3=E3=83=A1=E3=83=B3=E3=83=88=E3=82=92=E5=85=83=E3=81=AB?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://github.com/VOICEVOX/voicevox_core/pull/217/files/01d6ecae107210d2af79205a42c96f3533f7e390#r951545467 https://github.com/VOICEVOX/voicevox_core/pull/217/files/01d6ecae107210d2af79205a42c96f3533f7e390#r951564769 --- crates/voicevox_core_c_api/src/helpers.rs | 26 ++++++++++++++++++++++ crates/voicevox_core_c_api/src/lib.rs | 27 ++--------------------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index fbbd7707b..0f304da74 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -116,3 +116,29 @@ impl From for voicevox_core::SynthesisOptions { Self {} } } + +impl Default for VoicevoxInitializeOptions { + fn default() -> Self { + let options = voicevox_core::InitializeOptions::default(); + Self { + use_gpu: options.use_gpu, + cpu_num_threads: options.cpu_num_threads, + load_all_models: options.load_all_models, + open_jtalk_dict_dir: null(), + } + } +} + +impl VoicevoxInitializeOptions { + pub(crate) unsafe fn try_into_options( + self, + ) -> std::result::Result { + let open_jtalk_dict_dir = ensure_utf8(CStr::from_ptr(self.open_jtalk_dict_dir))?; + Ok(voicevox_core::InitializeOptions { + use_gpu: self.use_gpu, + cpu_num_threads: self.cpu_num_threads, + load_all_models: self.load_all_models, + open_jtalk_dict_dir: Some(PathBuf::from(open_jtalk_dict_dir)), + }) + } +} diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 94487b959..d7257a610 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -42,37 +42,14 @@ pub struct VoicevoxInitializeOptions { open_jtalk_dict_dir: *const c_char, } -impl VoicevoxInitializeOptions { - fn from_default_options(options: voicevox_core::InitializeOptions) -> Self { - Self { - use_gpu: options.use_gpu, - cpu_num_threads: options.cpu_num_threads, - load_all_models: options.load_all_models, - open_jtalk_dict_dir: null(), - } - } - - fn try_into_options( - self, - ) -> std::result::Result { - let open_jtalk_dict_dir = ensure_utf8(unsafe { CStr::from_ptr(self.open_jtalk_dict_dir) })?; - Ok(voicevox_core::InitializeOptions { - use_gpu: self.use_gpu, - cpu_num_threads: self.cpu_num_threads, - load_all_models: self.load_all_models, - open_jtalk_dict_dir: Some(PathBuf::from(open_jtalk_dict_dir)), - }) - } -} - #[no_mangle] pub extern "C" fn voicevox_default_initialize_options() -> VoicevoxInitializeOptions { - VoicevoxInitializeOptions::from_default_options(voicevox_core::InitializeOptions::default()) + VoicevoxInitializeOptions::default() } #[no_mangle] pub extern "C" fn voicevox_initialize(options: VoicevoxInitializeOptions) -> VoicevoxResultCode { - match options.try_into_options() { + match unsafe { options.try_into_options() } { Ok(options) => { let result = lock_internal().initialize(options); let (_, result_code) = convert_result(result); From d92c0e47851759ea54188f072f4627790e7f938f Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Tue, 23 Aug 2022 01:32:29 +0900 Subject: [PATCH 18/29] =?UTF-8?q?TtsOptions=E3=81=AE=E5=A4=89=E6=8F=9B?= =?UTF-8?q?=E3=82=92From=20trait=E3=81=A7=E8=A1=8C=E3=81=86=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index ba8edfe44..bf439caa9 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -164,16 +164,8 @@ impl VoicevoxCore { } pub fn tts(&mut self, text: &str, speaker_id: usize, options: TtsOptions) -> Result> { - let audio_query = &self.audio_query( - text, - speaker_id, - AudioQueryOptions::from_tts_options(&options), - )?; - self.synthesis( - audio_query, - speaker_id, - SynthesisOptions::from_tts_options(&options), - ) + let audio_query = &self.audio_query(text, speaker_id, AudioQueryOptions::from(&options))?; + self.synthesis(audio_query, speaker_id, SynthesisOptions::from(&options)) } } @@ -182,8 +174,8 @@ pub struct AudioQueryOptions { pub kana: bool, } -impl AudioQueryOptions { - fn from_tts_options(options: &TtsOptions) -> Self { +impl From<&TtsOptions> for AudioQueryOptions { + fn from(options: &TtsOptions) -> Self { Self { kana: options.kana } } } @@ -198,8 +190,9 @@ pub struct InitializeOptions { pub struct SynthesisOptions {} -impl SynthesisOptions { - fn from_tts_options(_: &TtsOptions) -> Self { +impl From<&TtsOptions> for SynthesisOptions { + fn from(_: &TtsOptions) -> Self { + // TODO:変換の必要性が出たら実装する Self {} } } From 58921871823c47324e993daa423cc230f12801d1 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Tue, 23 Aug 2022 07:39:20 +0900 Subject: [PATCH 19/29] =?UTF-8?q?=E7=96=91=E5=95=8F=E6=96=87=E3=83=A2?= =?UTF-8?q?=E3=83=BC=E3=83=89=E3=82=92=E5=AE=9F=E8=A3=85=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 22 +++++++++++++-------- crates/voicevox_core_c_api/src/helpers.rs | 24 +++++++++++++++++++++-- crates/voicevox_core_c_api/src/lib.rs | 15 +------------- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index bf439caa9..445069898 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -156,11 +156,13 @@ impl VoicevoxCore { &mut self, audio_query: &AudioQueryModel, speaker_id: usize, - // TODO: SynthesisOptions を使用した機能を提供する - #[allow(unused_variables)] options: SynthesisOptions, + options: SynthesisOptions, ) -> Result> { - self.synthesis_engine - .synthesis_wave_format(audio_query, speaker_id, true) // TODO: 疑問文化を設定可能にする + self.synthesis_engine.synthesis_wave_format( + audio_query, + speaker_id, + options.enable_interrogative_upspeak, + ) } pub fn tts(&mut self, text: &str, speaker_id: usize, options: TtsOptions) -> Result> { @@ -188,18 +190,22 @@ pub struct InitializeOptions { pub open_jtalk_dict_dir: Option, } -pub struct SynthesisOptions {} +pub struct SynthesisOptions { + pub enable_interrogative_upspeak: bool, +} impl From<&TtsOptions> for SynthesisOptions { - fn from(_: &TtsOptions) -> Self { - // TODO:変換の必要性が出たら実装する - Self {} + fn from(options: &TtsOptions) -> Self { + Self { + enable_interrogative_upspeak: options.enable_interrogative_upspeak, + } } } #[derive(Default)] pub struct TtsOptions { pub kana: bool, + pub enable_interrogative_upspeak: bool, } #[derive(new)] diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index 0f304da74..64d8aa037 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -112,8 +112,10 @@ impl From for voicevox_core::AudioQueryOptions { } impl From for voicevox_core::SynthesisOptions { - fn from(_: VoicevoxSynthesisOptions) -> Self { - Self {} + fn from(options: VoicevoxSynthesisOptions) -> Self { + Self { + enable_interrogative_upspeak: options.enable_interrogative_upspeak, + } } } @@ -142,3 +144,21 @@ impl VoicevoxInitializeOptions { }) } } + +impl From for VoicevoxTtsOptions { + fn from(options: voicevox_core::TtsOptions) -> Self { + Self { + kana: options.kana, + enable_interrogative_upspeak: options.enable_interrogative_upspeak, + } + } +} + +impl From for voicevox_core::TtsOptions { + fn from(options: VoicevoxTtsOptions) -> Self { + Self { + kana: options.kana, + enable_interrogative_upspeak: options.enable_interrogative_upspeak, + } + } +} diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index d7257a610..4596d9704 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -196,8 +196,6 @@ pub extern "C" fn voicevox_audio_query( #[repr(C)] pub struct VoicevoxSynthesisOptions { - // improper_ctypes_definitions を避けるためフィールドを持っておく - // TODO: improper_ctypes_definitionsを使用した機能を作る enable_interrogative_upspeak: bool, } @@ -238,18 +236,7 @@ pub extern "C" fn voicevox_synthesis( #[repr(C)] pub struct VoicevoxTtsOptions { kana: bool, -} - -impl From for VoicevoxTtsOptions { - fn from(options: voicevox_core::TtsOptions) -> Self { - Self { kana: options.kana } - } -} - -impl From for voicevox_core::TtsOptions { - fn from(options: VoicevoxTtsOptions) -> Self { - Self { kana: options.kana } - } + enable_interrogative_upspeak: bool, } #[no_mangle] From 3f53f077fb3d94f05cb8ec9168e9925cd039c8b1 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Tue, 23 Aug 2022 07:47:09 +0900 Subject: [PATCH 20/29] =?UTF-8?q?voicevox=5Fdefault=5Fsynthesis=5Foptions?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=81enable=5Finterrogative=5Fup?= =?UTF-8?q?speak=E3=81=8C=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB=E3=83=88?= =?UTF-8?q?=E3=81=A7true=E3=81=AB=E3=81=AA=E3=82=8B=E3=82=88=E3=81=86?= =?UTF-8?q?=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 10 +++++++++- crates/voicevox_core_c_api/src/helpers.rs | 9 +++++++++ crates/voicevox_core_c_api/src/lib.rs | 4 ++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 445069898..f4bc62607 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -202,12 +202,20 @@ impl From<&TtsOptions> for SynthesisOptions { } } -#[derive(Default)] pub struct TtsOptions { pub kana: bool, pub enable_interrogative_upspeak: bool, } +impl Default for TtsOptions { + fn default() -> Self { + Self { + enable_interrogative_upspeak: true, + kana: false, + } + } +} + #[derive(new)] pub struct InferenceCore { initialized: bool, diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index 64d8aa037..6616b4043 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -162,3 +162,12 @@ impl From for voicevox_core::TtsOptions { } } } + +impl Default for VoicevoxSynthesisOptions { + fn default() -> Self { + let options = voicevox_core::TtsOptions::default(); + Self { + enable_interrogative_upspeak: options.enable_interrogative_upspeak, + } + } +} diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 4596d9704..10ff3c1d1 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -199,6 +199,10 @@ pub struct VoicevoxSynthesisOptions { enable_interrogative_upspeak: bool, } +pub extern "C" fn voicevox_default_synthesis_options() -> VoicevoxSynthesisOptions { + VoicevoxSynthesisOptions::default() +} + #[no_mangle] pub extern "C" fn voicevox_synthesis( audio_query_json: *const c_char, From 3e1136f9c2e7202f7f91362a2efee667b679f6f9 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Tue, 23 Aug 2022 07:56:33 +0900 Subject: [PATCH 21/29] =?UTF-8?q?kana=E3=81=AF=20default=E3=81=A8=E3=81=99?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index f4bc62607..6f1cc32df 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -211,7 +211,7 @@ impl Default for TtsOptions { fn default() -> Self { Self { enable_interrogative_upspeak: true, - kana: false, + kana: Default::default(), } } } From f697e09f684d626f6bd68cecbffb7fd871fa9128 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Tue, 23 Aug 2022 21:32:27 +0900 Subject: [PATCH 22/29] =?UTF-8?q?use=5Fgpu=20->=20AccelerationMode?= =?UTF-8?q?=E3=81=AB=E5=A4=89=E6=9B=B4=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 33 +++++++++++++++++++++-- crates/voicevox_core_c_api/src/helpers.rs | 26 ++++++++++++++++-- crates/voicevox_core_c_api/src/lib.rs | 11 +++++++- 3 files changed, 65 insertions(+), 5 deletions(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 6f1cc32df..491e41901 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -33,8 +33,24 @@ impl VoicevoxCore { } pub fn initialize(&mut self, options: InitializeOptions) -> Result<()> { + let use_gpu = match options.acceleration_mode { + AccelerationMode::Auto => { + let supported_devices = SupportedDevices::get_supported_devices()?; + + cfg_if! { + if #[cfg(feature="directml")]{ + *supported_devices.dml() + + } else { + *supported_devices.cuda() + } + } + } + AccelerationMode::Cpu => false, + AccelerationMode::Gpu => true, + }; self.synthesis_engine.inference_core_mut().initialize( - options.use_gpu, + use_gpu, options.cpu_num_threads, options.load_all_models, )?; @@ -182,9 +198,22 @@ impl From<&TtsOptions> for AudioQueryOptions { } } +#[derive(Debug, PartialEq, Eq)] +pub enum AccelerationMode { + Auto, + Cpu, + Gpu, +} + +impl Default for AccelerationMode { + fn default() -> Self { + Self::Auto + } +} + #[derive(Default)] pub struct InitializeOptions { - pub use_gpu: bool, + pub acceleration_mode: AccelerationMode, pub cpu_num_threads: u16, pub load_all_models: bool, pub open_jtalk_dict_dir: Option, diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index 6616b4043..66105e37d 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -119,11 +119,33 @@ impl From for voicevox_core::SynthesisOptions { } } +impl From for VoicevoxAccelerationMode { + fn from(mode: voicevox_core::AccelerationMode) -> Self { + use voicevox_core::AccelerationMode::*; + match mode { + Auto => Self::VOICEVOX_ACCELERATION_MODE_AUTO, + Cpu => Self::VOICEVOX_ACCELERATION_MODE_CPU, + Gpu => Self::VOICEVOX_ACCELERATION_MODE_GPU, + } + } +} + +impl From for voicevox_core::AccelerationMode { + fn from(mode: VoicevoxAccelerationMode) -> Self { + use VoicevoxAccelerationMode::*; + match mode { + VOICEVOX_ACCELERATION_MODE_AUTO => Self::Auto, + VOICEVOX_ACCELERATION_MODE_CPU => Self::Cpu, + VOICEVOX_ACCELERATION_MODE_GPU => Self::Gpu, + } + } +} + impl Default for VoicevoxInitializeOptions { fn default() -> Self { let options = voicevox_core::InitializeOptions::default(); Self { - use_gpu: options.use_gpu, + acceleration_mode: options.acceleration_mode.into(), cpu_num_threads: options.cpu_num_threads, load_all_models: options.load_all_models, open_jtalk_dict_dir: null(), @@ -137,7 +159,7 @@ impl VoicevoxInitializeOptions { ) -> std::result::Result { let open_jtalk_dict_dir = ensure_utf8(CStr::from_ptr(self.open_jtalk_dict_dir))?; Ok(voicevox_core::InitializeOptions { - use_gpu: self.use_gpu, + acceleration_mode: self.acceleration_mode.into(), cpu_num_threads: self.cpu_num_threads, load_all_models: self.load_all_models, open_jtalk_dict_dir: Some(PathBuf::from(open_jtalk_dict_dir)), diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 10ff3c1d1..daea243a5 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -34,9 +34,18 @@ fn lock_internal() -> MutexGuard<'static, Internal> { pub use voicevox_core::result_code::VoicevoxResultCode; +#[repr(i32)] +#[derive(Debug, PartialEq, Eq)] +#[allow(non_camel_case_types)] +pub enum VoicevoxAccelerationMode { + VOICEVOX_ACCELERATION_MODE_AUTO = 1, + VOICEVOX_ACCELERATION_MODE_CPU = 2, + VOICEVOX_ACCELERATION_MODE_GPU = 3, +} + #[repr(C)] pub struct VoicevoxInitializeOptions { - use_gpu: bool, + acceleration_mode: VoicevoxAccelerationMode, cpu_num_threads: u16, load_all_models: bool, open_jtalk_dict_dir: *const c_char, From 07bc35bb930c998690f5786de848e0202d500e8a Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Tue, 23 Aug 2022 21:49:58 +0900 Subject: [PATCH 23/29] =?UTF-8?q?GPU=E4=BD=BF=E7=94=A8=E4=B8=AD=E3=81=8B?= =?UTF-8?q?=E3=81=A9=E3=81=86=E3=81=8B=E5=88=A4=E5=AE=9A=E3=81=99=E3=82=8B?= =?UTF-8?q?API=E9=96=A2=E6=95=B0=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97?= =?UTF-8?q?=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 35 +++++++++++++++++++++++++-- crates/voicevox_core_c_api/src/lib.rs | 5 ++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 491e41901..9684923b0 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -20,6 +20,7 @@ static SPEAKER_ID_MAP: Lazy> = pub struct VoicevoxCore { synthesis_engine: SynthesisEngine, + use_gpu: bool, } impl VoicevoxCore { @@ -29,6 +30,7 @@ impl VoicevoxCore { InferenceCore::new(false, None), OpenJtalk::initialize(), ), + use_gpu: false, }) } @@ -49,6 +51,7 @@ impl VoicevoxCore { AccelerationMode::Cpu => false, AccelerationMode::Gpu => true, }; + self.use_gpu = use_gpu; self.synthesis_engine.inference_core_mut().initialize( use_gpu, options.cpu_num_threads, @@ -61,6 +64,10 @@ impl VoicevoxCore { Ok(()) } + pub fn is_use_gpu(&self) -> bool { + self.use_gpu + } + pub fn load_model(&mut self, speaker_id: usize) -> Result<()> { self.synthesis_engine .inference_core_mut() @@ -639,7 +646,10 @@ mod tests { internal .lock() .unwrap() - .initialize(InitializeOptions::default()) + .initialize(InitializeOptions { + acceleration_mode: AccelerationMode::Cpu, + ..Default::default() + }) .unwrap(); let result = internal.lock().unwrap().load_model(speaker_id); assert_eq!( @@ -648,6 +658,21 @@ mod tests { ); } + #[rstest] + fn is_use_gpu_works() { + let internal = VoicevoxCore::new_with_mutex(); + assert_eq!(false, internal.lock().unwrap().is_use_gpu()); + internal + .lock() + .unwrap() + .initialize(InitializeOptions { + acceleration_mode: AccelerationMode::Cpu, + ..Default::default() + }) + .unwrap(); + assert_eq!(false, internal.lock().unwrap().is_use_gpu()); + } + #[rstest] #[case(0, true)] #[case(1, true)] @@ -662,7 +687,10 @@ mod tests { internal .lock() .unwrap() - .initialize(InitializeOptions::default()) + .initialize(InitializeOptions { + acceleration_mode: AccelerationMode::Cpu, + ..Default::default() + }) .unwrap(); assert!( !internal.lock().unwrap().is_model_loaded(speaker_id), @@ -715,6 +743,7 @@ mod tests { .unwrap() .initialize(InitializeOptions { load_all_models: true, + acceleration_mode: AccelerationMode::Cpu, ..Default::default() }) .unwrap(); @@ -739,6 +768,7 @@ mod tests { .unwrap() .initialize(InitializeOptions { load_all_models: true, + acceleration_mode: AccelerationMode::Cpu, ..Default::default() }) .unwrap(); @@ -773,6 +803,7 @@ mod tests { .lock() .unwrap() .initialize(InitializeOptions { + acceleration_mode: AccelerationMode::Cpu, load_all_models: true, ..Default::default() }) diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index daea243a5..8b911d55a 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -75,6 +75,11 @@ pub extern "C" fn voicevox_load_model(speaker_id: usize) -> VoicevoxResultCode { result_code } +#[no_mangle] +pub extern "C" fn voicevox_is_use_gpu() -> bool { + lock_internal().is_use_gpu() +} + #[no_mangle] pub extern "C" fn voicevox_is_model_loaded(speaker_id: usize) -> bool { lock_internal().is_model_loaded(speaker_id) From d1e2c4bebb2fd3ba5a0542e1d638cf6b98a00157 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Tue, 23 Aug 2022 22:28:02 +0900 Subject: [PATCH 24/29] =?UTF-8?q?default=E3=81=AE=E8=A8=98=E8=BF=B0?= =?UTF-8?q?=E3=82=92=E6=96=B0=E3=81=97=E3=81=84=E5=BD=A2=E3=81=AB=E3=81=97?= =?UTF-8?q?=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 9684923b0..c7f710f18 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -205,19 +205,14 @@ impl From<&TtsOptions> for AudioQueryOptions { } } -#[derive(Debug, PartialEq, Eq)] +#[derive(Default, Debug, PartialEq, Eq)] pub enum AccelerationMode { + #[default] Auto, Cpu, Gpu, } -impl Default for AccelerationMode { - fn default() -> Self { - Self::Auto - } -} - #[derive(Default)] pub struct InitializeOptions { pub acceleration_mode: AccelerationMode, From 7e50250e64585d97bcd73630a593c237b4458887 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Wed, 24 Aug 2022 02:17:17 +0900 Subject: [PATCH 25/29] =?UTF-8?q?Auto=3D0=E3=81=A8=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core_c_api/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 8b911d55a..d475f705f 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -38,9 +38,9 @@ pub use voicevox_core::result_code::VoicevoxResultCode; #[derive(Debug, PartialEq, Eq)] #[allow(non_camel_case_types)] pub enum VoicevoxAccelerationMode { - VOICEVOX_ACCELERATION_MODE_AUTO = 1, - VOICEVOX_ACCELERATION_MODE_CPU = 2, - VOICEVOX_ACCELERATION_MODE_GPU = 3, + VOICEVOX_ACCELERATION_MODE_AUTO = 0, + VOICEVOX_ACCELERATION_MODE_CPU = 1, + VOICEVOX_ACCELERATION_MODE_GPU = 2, } #[repr(C)] From f6e4d9bb597f8dcf2c00c50e37ba30a19b4d5693 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Thu, 25 Aug 2022 19:40:21 +0900 Subject: [PATCH 26/29] =?UTF-8?q?is=5Fgpu=5Fmode=E3=81=AB=E5=90=8D?= =?UTF-8?q?=E7=A7=B0=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/publish.rs | 6 +++--- crates/voicevox_core_c_api/src/lib.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index c7f710f18..fa3563c43 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -64,7 +64,7 @@ impl VoicevoxCore { Ok(()) } - pub fn is_use_gpu(&self) -> bool { + pub fn is_gpu_mode(&self) -> bool { self.use_gpu } @@ -656,7 +656,7 @@ mod tests { #[rstest] fn is_use_gpu_works() { let internal = VoicevoxCore::new_with_mutex(); - assert_eq!(false, internal.lock().unwrap().is_use_gpu()); + assert_eq!(false, internal.lock().unwrap().is_gpu_mode()); internal .lock() .unwrap() @@ -665,7 +665,7 @@ mod tests { ..Default::default() }) .unwrap(); - assert_eq!(false, internal.lock().unwrap().is_use_gpu()); + assert_eq!(false, internal.lock().unwrap().is_gpu_mode()); } #[rstest] diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index d475f705f..36a40c547 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -76,8 +76,8 @@ pub extern "C" fn voicevox_load_model(speaker_id: usize) -> VoicevoxResultCode { } #[no_mangle] -pub extern "C" fn voicevox_is_use_gpu() -> bool { - lock_internal().is_use_gpu() +pub extern "C" fn voicevox_is_gpu_mode() -> bool { + lock_internal().is_gpu_mode() } #[no_mangle] From a9c2ceee234fb70d5b5f45348f52685d40b45122 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Thu, 25 Aug 2022 19:41:27 +0900 Subject: [PATCH 27/29] =?UTF-8?q?default=E3=82=AA=E3=83=97=E3=82=B7?= =?UTF-8?q?=E3=83=A7=E3=83=B3=E7=94=9F=E6=88=90=E9=96=A2=E6=95=B0=E3=81=AB?= =?UTF-8?q?make=E3=82=92=E3=81=A4=E3=81=91=E3=81=9F=E3=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core_c_api/src/lib.rs | 8 ++++---- example/cpp/unix/simple_tts.cpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 36a40c547..2b1e4f51b 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -52,7 +52,7 @@ pub struct VoicevoxInitializeOptions { } #[no_mangle] -pub extern "C" fn voicevox_default_initialize_options() -> VoicevoxInitializeOptions { +pub extern "C" fn voicevox_make_default_initialize_options() -> VoicevoxInitializeOptions { VoicevoxInitializeOptions::default() } @@ -184,7 +184,7 @@ pub struct VoicevoxAudioQueryOptions { } #[no_mangle] -pub extern "C" fn voicevox_default_audio_query_options() -> VoicevoxAudioQueryOptions { +pub extern "C" fn voicevox_make_default_audio_query_options() -> VoicevoxAudioQueryOptions { voicevox_core::AudioQueryOptions::default().into() } @@ -213,7 +213,7 @@ pub struct VoicevoxSynthesisOptions { enable_interrogative_upspeak: bool, } -pub extern "C" fn voicevox_default_synthesis_options() -> VoicevoxSynthesisOptions { +pub extern "C" fn voicevox_make_default_synthesis_options() -> VoicevoxSynthesisOptions { VoicevoxSynthesisOptions::default() } @@ -258,7 +258,7 @@ pub struct VoicevoxTtsOptions { } #[no_mangle] -pub extern "C" fn voicevox_default_tts_options() -> VoicevoxTtsOptions { +pub extern "C" fn voicevox_make_default_tts_options() -> VoicevoxTtsOptions { voicevox_core::TtsOptions::default().into() } diff --git a/example/cpp/unix/simple_tts.cpp b/example/cpp/unix/simple_tts.cpp index fd08fd426..a2f07c72c 100644 --- a/example/cpp/unix/simple_tts.cpp +++ b/example/cpp/unix/simple_tts.cpp @@ -17,7 +17,7 @@ int main(int argc, char *argv[]) { std::cout << "coreの初期化中..." << std::endl; - auto initialize_options = voicevox_default_initialize_options(); + auto initialize_options = voicevox_make_default_initialize_options(); initialize_options.load_all_models = true; initialize_options.open_jtalk_dict_dir = open_jtalk_dict_path.c_str(); if (voicevox_initialize(initialize_options) != VOICEVOX_RESULT_SUCCEED) { @@ -31,9 +31,9 @@ int main(int argc, char *argv[]) { int output_binary_size = 0; uint8_t *output_wav = nullptr; - auto result = - voicevox_tts(text.c_str(), speaker_id, voicevox_default_tts_options(), - &output_binary_size, &output_wav); + auto result = voicevox_tts(text.c_str(), speaker_id, + voicevox_make_default_tts_options(), + &output_binary_size, &output_wav); if (result != VOICEVOX_RESULT_SUCCEED) { std::cout << voicevox_error_result_to_message(result) << std::endl; return 1; From a599e17d48c8f6c6c97ed9e83c84c68f16920fc2 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Fri, 26 Aug 2022 22:05:56 +0900 Subject: [PATCH 28/29] =?UTF-8?q?output=5Fbinary=5Fsize->output=5Fwav=5Fsi?= =?UTF-8?q?ze=E3=81=AB=E5=A4=89=E6=9B=B4=20=E5=9E=8B=E3=82=92c=5Fint->usiz?= =?UTF-8?q?e=E3=81=AB=E5=A4=89=E6=9B=B4=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core_c_api/src/helpers.rs | 4 ++-- crates/voicevox_core_c_api/src/lib.rs | 10 +++++----- example/cpp/unix/simple_tts.cpp | 7 +++---- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index 66105e37d..4aad6b861 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -86,10 +86,10 @@ pub(crate) unsafe fn write_json_to_ptr(output_ptr: *mut *mut c_char, json: &CStr pub(crate) unsafe fn write_wav_to_ptr( output_wav_ptr: *mut *mut u8, - output_size_ptr: *mut c_int, + output_size_ptr: *mut usize, data: &[u8], ) { - output_size_ptr.write(data.len() as c_int); + output_size_ptr.write(data.len()); let wav_heap = libc::malloc(data.len()); libc::memcpy(wav_heap, data.as_ptr() as *const c_void, data.len()); output_wav_ptr.write(wav_heap as *mut u8); diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 2b1e4f51b..159e90f9f 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -6,7 +6,7 @@ use helpers::*; use libc::c_void; use once_cell::sync::Lazy; use std::ffi::{CStr, CString}; -use std::os::raw::{c_char, c_int}; +use std::os::raw::c_char; use std::path::PathBuf; use std::ptr::null; use std::sync::{Mutex, MutexGuard}; @@ -222,7 +222,7 @@ pub extern "C" fn voicevox_synthesis( audio_query_json: *const c_char, speaker_id: usize, options: VoicevoxSynthesisOptions, - output_binary_size: *mut c_int, + output_wav_size: *mut usize, output_wav: *mut *mut u8, ) -> VoicevoxResultCode { let audio_query_json = unsafe { CStr::from_ptr(audio_query_json) }; @@ -246,7 +246,7 @@ pub extern "C" fn voicevox_synthesis( }; unsafe { - write_wav_to_ptr(output_wav, output_binary_size, wav); + write_wav_to_ptr(output_wav, output_wav_size, wav); } VoicevoxResultCode::VOICEVOX_RESULT_SUCCEED } @@ -267,7 +267,7 @@ pub extern "C" fn voicevox_tts( text: *const c_char, speaker_id: usize, options: VoicevoxTtsOptions, - output_binary_size: *mut c_int, + output_wav_size: *mut usize, output_wav: *mut *mut u8, ) -> VoicevoxResultCode { let (output_opt, result_code) = { @@ -279,7 +279,7 @@ pub extern "C" fn voicevox_tts( }; if let Some(output) = output_opt { unsafe { - write_wav_to_ptr(output_wav, output_binary_size, output.as_slice()); + write_wav_to_ptr(output_wav, output_wav_size, output.as_slice()); } } result_code diff --git a/example/cpp/unix/simple_tts.cpp b/example/cpp/unix/simple_tts.cpp index a2f07c72c..4a9d8dea9 100644 --- a/example/cpp/unix/simple_tts.cpp +++ b/example/cpp/unix/simple_tts.cpp @@ -28,12 +28,12 @@ int main(int argc, char *argv[]) { std::cout << "音声生成中..." << std::endl; int64_t speaker_id = 0; - int output_binary_size = 0; + size_t output_wav_size = 0; uint8_t *output_wav = nullptr; auto result = voicevox_tts(text.c_str(), speaker_id, voicevox_make_default_tts_options(), - &output_binary_size, &output_wav); + &output_wav_size, &output_wav); if (result != VOICEVOX_RESULT_SUCCEED) { std::cout << voicevox_error_result_to_message(result) << std::endl; return 1; @@ -42,8 +42,7 @@ int main(int argc, char *argv[]) { std::cout << "音声ファイル保存中..." << std::endl; std::ofstream wav_file(OUTPUT_WAV_NAME, std::ios::binary); - wav_file.write(reinterpret_cast(output_wav), - output_binary_size); + wav_file.write(reinterpret_cast(output_wav), output_wav_size); voicevox_wav_free(output_wav); std::cout << "音声ファイル保存完了 (" << OUTPUT_WAV_NAME << ")" << std::endl; From d5af118513671600454ce55096c970eaa72fe5a4 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Sat, 27 Aug 2022 01:42:10 +0900 Subject: [PATCH 29/29] =?UTF-8?q?speaker=5Fid=E3=82=92u32=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/engine/synthesis_engine.rs | 12 ++--- crates/voicevox_core/src/error.rs | 2 +- crates/voicevox_core/src/publish.rs | 46 ++++++++----------- crates/voicevox_core/src/status.rs | 6 +-- crates/voicevox_core_c_api/src/helpers.rs | 4 +- crates/voicevox_core_c_api/src/lib.rs | 16 +++---- 6 files changed, 39 insertions(+), 47 deletions(-) diff --git a/crates/voicevox_core/src/engine/synthesis_engine.rs b/crates/voicevox_core/src/engine/synthesis_engine.rs index d0f0b3fad..3c3c02bc7 100644 --- a/crates/voicevox_core/src/engine/synthesis_engine.rs +++ b/crates/voicevox_core/src/engine/synthesis_engine.rs @@ -38,7 +38,7 @@ impl SynthesisEngine { pub fn create_accent_phrases( &mut self, text: impl AsRef, - speaker_id: usize, + speaker_id: u32, ) -> Result> { if text.as_ref().is_empty() { return Ok(Vec::new()); @@ -115,7 +115,7 @@ impl SynthesisEngine { pub fn replace_mora_data( &mut self, accent_phrases: &[AccentPhraseModel], - speaker_id: usize, + speaker_id: u32, ) -> Result> { let accent_phrases = self.replace_phoneme_length(accent_phrases, speaker_id)?; self.replace_mora_pitch(&accent_phrases, speaker_id) @@ -124,7 +124,7 @@ impl SynthesisEngine { pub fn replace_phoneme_length( &mut self, accent_phrases: &[AccentPhraseModel], - speaker_id: usize, + speaker_id: u32, ) -> Result> { let (_, phoneme_data_list) = SynthesisEngine::initial_process(accent_phrases); @@ -188,7 +188,7 @@ impl SynthesisEngine { pub fn replace_mora_pitch( &mut self, accent_phrases: &[AccentPhraseModel], - speaker_id: usize, + speaker_id: u32, ) -> Result> { let (_, phoneme_data_list) = SynthesisEngine::initial_process(accent_phrases); @@ -315,7 +315,7 @@ impl SynthesisEngine { pub fn synthesis( &mut self, query: &AudioQueryModel, - speaker_id: usize, + speaker_id: u32, enable_interrogative_upspeak: bool, ) -> Result> { let speed_scale = *query.speed_scale(); @@ -422,7 +422,7 @@ impl SynthesisEngine { pub fn synthesis_wave_format( &mut self, query: &AudioQueryModel, - speaker_id: usize, + speaker_id: u32, enable_interrogative_upspeak: bool, ) -> Result> { let wave = self.synthesis(query, speaker_id, enable_interrogative_upspeak)?; diff --git a/crates/voicevox_core/src/error.rs b/crates/voicevox_core/src/error.rs index 06b759206..c81f4570e 100644 --- a/crates/voicevox_core/src/error.rs +++ b/crates/voicevox_core/src/error.rs @@ -39,7 +39,7 @@ pub enum Error { UninitializedStatus, #[error("{},{0}", base_error_message(VOICEVOX_RESULT_INVALID_SPEAKER_ID))] - InvalidSpeakerId { speaker_id: usize }, + InvalidSpeakerId { speaker_id: u32 }, #[error("{},{0}", base_error_message(VOICEVOX_RESULT_INVALID_MODEL_INDEX))] InvalidModelIndex { model_index: usize }, diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index fa3563c43..42514d15d 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -15,7 +15,7 @@ use std::ffi::CString; const PHONEME_LENGTH_MINIMAL: f32 = 0.01; -static SPEAKER_ID_MAP: Lazy> = +static SPEAKER_ID_MAP: Lazy> = Lazy::new(|| include!("include_speaker_id_map.rs").into_iter().collect()); pub struct VoicevoxCore { @@ -68,13 +68,13 @@ impl VoicevoxCore { self.use_gpu } - pub fn load_model(&mut self, speaker_id: usize) -> Result<()> { + pub fn load_model(&mut self, speaker_id: u32) -> Result<()> { self.synthesis_engine .inference_core_mut() .load_model(speaker_id) } - pub fn is_model_loaded(&self, speaker_id: usize) -> bool { + pub fn is_model_loaded(&self, speaker_id: u32) -> bool { self.synthesis_engine .inference_core() .is_model_loaded(speaker_id) @@ -92,11 +92,7 @@ impl VoicevoxCore { &SUPPORTED_DEVICES_CSTRING } - pub fn predict_duration( - &mut self, - phoneme_list: &[i64], - speaker_id: usize, - ) -> Result> { + pub fn predict_duration(&mut self, phoneme_list: &[i64], speaker_id: u32) -> Result> { self.synthesis_engine .inference_core_mut() .predict_duration(phoneme_list, speaker_id) @@ -112,7 +108,7 @@ impl VoicevoxCore { end_accent_list: &[i64], start_accent_phrase_list: &[i64], end_accent_phrase_list: &[i64], - speaker_id: usize, + speaker_id: u32, ) -> Result> { self.synthesis_engine .inference_core_mut() @@ -134,7 +130,7 @@ impl VoicevoxCore { phoneme_size: usize, f0: &[f32], phoneme: &[f32], - speaker_id: usize, + speaker_id: u32, ) -> Result> { self.synthesis_engine.inference_core_mut().decode( length, @@ -148,7 +144,7 @@ impl VoicevoxCore { pub fn audio_query( &mut self, text: &str, - speaker_id: usize, + speaker_id: u32, options: AudioQueryOptions, ) -> Result { if !self.synthesis_engine.is_openjtalk_dict_loaded() { @@ -178,7 +174,7 @@ impl VoicevoxCore { pub fn synthesis( &mut self, audio_query: &AudioQueryModel, - speaker_id: usize, + speaker_id: u32, options: SynthesisOptions, ) -> Result> { self.synthesis_engine.synthesis_wave_format( @@ -188,7 +184,7 @@ impl VoicevoxCore { ) } - pub fn tts(&mut self, text: &str, speaker_id: usize, options: TtsOptions) -> Result> { + pub fn tts(&mut self, text: &str, speaker_id: u32, options: TtsOptions) -> Result> { let audio_query = &self.audio_query(text, speaker_id, AudioQueryOptions::from(&options))?; self.synthesis(audio_query, speaker_id, SynthesisOptions::from(&options)) } @@ -290,7 +286,7 @@ impl InferenceCore { } } } - pub fn load_model(&mut self, speaker_id: usize) -> Result<()> { + pub fn load_model(&mut self, speaker_id: u32) -> Result<()> { if self.initialized { let status = self .status_option @@ -305,7 +301,7 @@ impl InferenceCore { Err(Error::UninitializedStatus) } } - pub fn is_model_loaded(&self, speaker_id: usize) -> bool { + pub fn is_model_loaded(&self, speaker_id: u32) -> bool { if let Some(status) = self.status_option.as_ref() { if let Some((model_index, _)) = get_model_index_and_speaker_id(speaker_id) { status.is_model_loaded(model_index) @@ -321,11 +317,7 @@ impl InferenceCore { self.status_option = None; } - pub fn predict_duration( - &mut self, - phoneme_list: &[i64], - speaker_id: usize, - ) -> Result> { + pub fn predict_duration(&mut self, phoneme_list: &[i64], speaker_id: u32) -> Result> { if !self.initialized { return Err(Error::UninitializedStatus); } @@ -377,7 +369,7 @@ impl InferenceCore { end_accent_list: &[i64], start_accent_phrase_list: &[i64], end_accent_phrase_list: &[i64], - speaker_id: usize, + speaker_id: u32, ) -> Result> { if !self.initialized { return Err(Error::UninitializedStatus); @@ -433,7 +425,7 @@ impl InferenceCore { phoneme_size: usize, f0: &[f32], phoneme: &[f32], - speaker_id: usize, + speaker_id: u32, ) -> Result> { if !self.initialized { return Err(Error::UninitializedStatus); @@ -553,7 +545,7 @@ static SUPPORTED_DEVICES_CSTRING: Lazy = Lazy::new(|| { .unwrap() }); -fn get_model_index_and_speaker_id(speaker_id: usize) -> Option<(usize, usize)> { +fn get_model_index_and_speaker_id(speaker_id: u32) -> Option<(usize, u32)> { SPEAKER_ID_MAP.get(&speaker_id).copied() } @@ -630,7 +622,7 @@ mod tests { #[case(1, Err(Error::UninitializedStatus), Ok(()))] #[case(999, Err(Error::UninitializedStatus), Err(Error::InvalidSpeakerId{speaker_id:999}))] fn load_model_works( - #[case] speaker_id: usize, + #[case] speaker_id: u32, #[case] expected_result_at_uninitialized: Result<()>, #[case] expected_result_at_initialized: Result<()>, ) { @@ -672,7 +664,7 @@ mod tests { #[case(0, true)] #[case(1, true)] #[case(999, false)] - fn is_model_loaded_works(#[case] speaker_id: usize, #[case] expected: bool) { + fn is_model_loaded_works(#[case] speaker_id: u32, #[case] expected: bool) { let internal = VoicevoxCore::new_with_mutex(); assert!( !internal.lock().unwrap().is_model_loaded(speaker_id), @@ -723,8 +715,8 @@ mod tests { #[case(1, Some((0,1)))] #[case(999, None)] fn get_model_index_and_speaker_id_works( - #[case] speaker_id: usize, - #[case] expected: Option<(usize, usize)>, + #[case] speaker_id: u32, + #[case] expected: Option<(usize, u32)>, ) { let actual = get_model_index_and_speaker_id(speaker_id); assert_eq!(expected, actual); diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs index bf392658b..2c2865d79 100644 --- a/crates/voicevox_core/src/status.rs +++ b/crates/voicevox_core/src/status.rs @@ -18,7 +18,7 @@ pub struct Status { models: StatusModels, light_session_options: SessionOptions, // 軽いモデルはこちらを使う heavy_session_options: SessionOptions, // 重いモデルはこちらを使う - supported_styles: BTreeSet, + supported_styles: BTreeSet, } struct StatusModels { @@ -125,7 +125,7 @@ impl Status { for meta in metas.iter() { for style in meta.styles().iter() { - self.supported_styles.insert(*style.id() as usize); + self.supported_styles.insert(*style.id() as u32); } } @@ -196,7 +196,7 @@ impl Status { Ok(session_builder.with_model_from_memory(model_bytes)?) } - pub fn validate_speaker_id(&self, speaker_id: usize) -> bool { + pub fn validate_speaker_id(&self, speaker_id: u32) -> bool { self.supported_styles.contains(&speaker_id) } diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index 4aad6b861..f8f045cf7 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -52,11 +52,11 @@ pub(crate) fn convert_result(result: Result) -> (Option, VoicevoxResult pub(crate) fn create_audio_query( japanese_or_kana: &CStr, - speaker_id: usize, + speaker_id: u32, method: fn( &mut Internal, &str, - usize, + u32, voicevox_core::AudioQueryOptions, ) -> Result, options: VoicevoxAudioQueryOptions, diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 159e90f9f..b8a6de8c3 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -69,7 +69,7 @@ pub extern "C" fn voicevox_initialize(options: VoicevoxInitializeOptions) -> Voi } #[no_mangle] -pub extern "C" fn voicevox_load_model(speaker_id: usize) -> VoicevoxResultCode { +pub extern "C" fn voicevox_load_model(speaker_id: u32) -> VoicevoxResultCode { let result = lock_internal().load_model(speaker_id); let (_, result_code) = convert_result(result); result_code @@ -81,7 +81,7 @@ pub extern "C" fn voicevox_is_gpu_mode() -> bool { } #[no_mangle] -pub extern "C" fn voicevox_is_model_loaded(speaker_id: usize) -> bool { +pub extern "C" fn voicevox_is_model_loaded(speaker_id: u32) -> bool { lock_internal().is_model_loaded(speaker_id) } @@ -104,7 +104,7 @@ pub extern "C" fn voicevox_get_supported_devices_json() -> *const c_char { pub extern "C" fn voicevox_predict_duration( length: usize, phoneme_list: *mut i64, - speaker_id: usize, + speaker_id: u32, output: *mut f32, ) -> VoicevoxResultCode { let result = lock_internal().predict_duration( @@ -131,7 +131,7 @@ pub extern "C" fn voicevox_predict_intonation( end_accent_list: *mut i64, start_accent_phrase_list: *mut i64, end_accent_phrase_list: *mut i64, - speaker_id: usize, + speaker_id: u32, output: *mut f32, ) -> VoicevoxResultCode { let result = lock_internal().predict_intonation( @@ -158,7 +158,7 @@ pub extern "C" fn voicevox_decode( phoneme_size: i64, f0: *mut f32, phoneme: *mut f32, - speaker_id: usize, + speaker_id: u32, output: *mut f32, ) -> VoicevoxResultCode { let length = length as usize; @@ -191,7 +191,7 @@ pub extern "C" fn voicevox_make_default_audio_query_options() -> VoicevoxAudioQu #[no_mangle] pub extern "C" fn voicevox_audio_query( text: *const c_char, - speaker_id: usize, + speaker_id: u32, options: VoicevoxAudioQueryOptions, output_audio_query_json: *mut *mut c_char, ) -> VoicevoxResultCode { @@ -220,7 +220,7 @@ pub extern "C" fn voicevox_make_default_synthesis_options() -> VoicevoxSynthesis #[no_mangle] pub extern "C" fn voicevox_synthesis( audio_query_json: *const c_char, - speaker_id: usize, + speaker_id: u32, options: VoicevoxSynthesisOptions, output_wav_size: *mut usize, output_wav: *mut *mut u8, @@ -265,7 +265,7 @@ pub extern "C" fn voicevox_make_default_tts_options() -> VoicevoxTtsOptions { #[no_mangle] pub extern "C" fn voicevox_tts( text: *const c_char, - speaker_id: usize, + speaker_id: u32, options: VoicevoxTtsOptions, output_wav_size: *mut usize, output_wav: *mut *mut u8,