From dd2ece823b619e6d93dadf87df5fd19398b0359f Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Mon, 4 Nov 2024 14:56:16 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20=E9=9D=9E=E5=90=8C=E6=9C=9F=E9=96=A2?= =?UTF-8?q?=E9=80=A3=E3=81=AEtodo=E3=81=A8fixme=E3=82=92=E8=A7=A3=E6=B6=88?= =?UTF-8?q?=20(#868)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 非同期APIに関して以下の二つを行う。 * `load_voice_model`にて、ONNX Runtimeの`Session`作成(結構重い)をス レッドプール上でやるようにする。 * Python APIの`Synthesizer`にて、`Closable`の機構が適切に役割を果たすよ うにする。 --- crates/voicevox_core/src/synthesizer.rs | 8 ++- crates/voicevox_core/src/voice_model.rs | 9 +-- .../voicevox_core_python_api/src/convert.rs | 5 +- crates/voicevox_core_python_api/src/lib.rs | 64 ++++++++++++------- 4 files changed, 54 insertions(+), 32 deletions(-) diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs index b30d7c3ca..b68d1ab2c 100644 --- a/crates/voicevox_core/src/synthesizer.rs +++ b/crates/voicevox_core/src/synthesizer.rs @@ -248,9 +248,11 @@ mod inner { &self, model: &voice_model::Inner, ) -> crate::Result<()> { - let model_bytes = &model.read_inference_models().await?; - // TODO: 重い操作なので、asyncにする - self.status.insert_model(model.header(), model_bytes) + let model_bytes = model.read_inference_models().await?; + + let status = self.status.clone(); + let header = model.header().clone(); + A::unblock(move || status.insert_model(&header, &model_bytes)).await } pub(super) fn unload_voice_model(&self, voice_model_id: VoiceModelId) -> Result<()> { diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index c2920398b..9914409eb 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -63,7 +63,7 @@ impl VoiceModelId { #[self_referencing] pub(crate) struct Inner { - header: VoiceModelHeader, + header: Arc, #[borrows(header)] #[not_covariant] @@ -126,11 +126,12 @@ impl Inner { ) })?; - let header = VoiceModelHeader::new(manifest, metas, path)?; + let header = VoiceModelHeader::new(manifest, metas, path)?.into(); InnerTryBuilder { header, - inference_model_entries_builder: |VoiceModelHeader { manifest, .. }| { + inference_model_entries_builder: |header| { + let VoiceModelHeader { manifest, .. } = &**header; manifest .domains() .each_ref() @@ -182,7 +183,7 @@ impl Inner { &self.borrow_header().metas } - pub(crate) fn header(&self) -> &VoiceModelHeader { + pub(crate) fn header(&self) -> &Arc { self.borrow_header() } diff --git a/crates/voicevox_core_python_api/src/convert.rs b/crates/voicevox_core_python_api/src/convert.rs index d4a867606..711da5fe4 100644 --- a/crates/voicevox_core_python_api/src/convert.rs +++ b/crates/voicevox_core_python_api/src/convert.rs @@ -111,7 +111,7 @@ pub(crate) fn async_modify_accent_phrases<'py, Fun, Fut>( ) -> PyResult<&'py PyAny> where Fun: FnOnce(Vec, StyleId) -> Fut + Send + 'static, - Fut: Future>> + Send + 'static, + Fut: Future>> + Send + 'static, { let rust_accent_phrases = accent_phrases .iter() @@ -121,10 +121,9 @@ where py, pyo3_asyncio::tokio::get_current_locals(py)?, async move { - let replaced_accent_phrases = method(rust_accent_phrases, speaker_id).await; + let replaced_accent_phrases = method(rust_accent_phrases, speaker_id).await?; Python::with_gil(|py| { let replaced_accent_phrases = replaced_accent_phrases - .into_py_result(py)? .iter() .map(move |accent_phrase| { to_pydantic_dataclass( diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs index a2d1c2475..52ba87c2c 100644 --- a/crates/voicevox_core_python_api/src/lib.rs +++ b/crates/voicevox_core_python_api/src/lib.rs @@ -1055,11 +1055,9 @@ mod asyncio { #[pyclass] pub(crate) struct Synthesizer { - // FIXME: `Arc`ではなく、`Arc>`を - // `clone`する synthesizer: Arc< Closable< - Arc>, + voicevox_core::nonblocking::Synthesizer, Self, Tokio, >, @@ -1090,7 +1088,7 @@ mod asyncio { cpu_num_threads, }, ); - let synthesizer = Python::with_gil(|py| synthesizer.into_py_result(py))?.into(); + let synthesizer = Python::with_gil(|py| synthesizer.into_py_result(py))?; let synthesizer = Closable::new(synthesizer).into(); Ok(Self { synthesizer }) } @@ -1139,9 +1137,12 @@ mod asyncio { py: Python<'py>, ) -> PyResult<&'py PyAny> { let model: VoiceModelFile = model.extract()?; - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); pyo3_asyncio::tokio::future_into_py(py, async move { - let result = synthesizer.load_voice_model(&*model.model.read()?).await; + let result = synthesizer + .read()? + .load_voice_model(&*model.model.read()?) + .await; Python::with_gil(|py| result.into_py_result(py)) }) } @@ -1173,13 +1174,14 @@ mod asyncio { style_id: u32, py: Python<'py>, ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); let kana = kana.to_owned(); pyo3_asyncio::tokio::future_into_py_with_locals( py, pyo3_asyncio::tokio::get_current_locals(py)?, async move { let audio_query = synthesizer + .read()? .audio_query_from_kana(&kana, StyleId::new(style_id)) .await; @@ -1201,13 +1203,16 @@ mod asyncio { style_id: u32, py: Python<'py>, ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); let text = text.to_owned(); pyo3_asyncio::tokio::future_into_py_with_locals( py, pyo3_asyncio::tokio::get_current_locals(py)?, async move { - let audio_query = synthesizer.audio_query(&text, StyleId::new(style_id)).await; + let audio_query = synthesizer + .read()? + .audio_query(&text, StyleId::new(style_id)) + .await; Python::with_gil(|py| { let audio_query = audio_query.into_py_result(py)?; @@ -1225,13 +1230,14 @@ mod asyncio { style_id: u32, py: Python<'py>, ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); let kana = kana.to_owned(); pyo3_asyncio::tokio::future_into_py_with_locals( py, pyo3_asyncio::tokio::get_current_locals(py)?, async move { let accent_phrases = synthesizer + .read()? .create_accent_phrases_from_kana(&kana, StyleId::new(style_id)) .await; Python::with_gil(|py| { @@ -1254,13 +1260,14 @@ mod asyncio { style_id: u32, py: Python<'py>, ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); let text = text.to_owned(); pyo3_asyncio::tokio::future_into_py_with_locals( py, pyo3_asyncio::tokio::get_current_locals(py)?, async move { let accent_phrases = synthesizer + .read()? .create_accent_phrases(&text, StyleId::new(style_id)) .await; Python::with_gil(|py| { @@ -1283,12 +1290,15 @@ mod asyncio { style_id: u32, py: Python<'py>, ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); crate::convert::async_modify_accent_phrases( accent_phrases, StyleId::new(style_id), py, - |a, s| async move { synthesizer.replace_mora_data(&a, s).await }, + |a, s| async move { + let result = synthesizer.read()?.replace_mora_data(&a, s).await; + Python::with_gil(|py| result.into_py_result(py)) + }, ) } @@ -1298,12 +1308,15 @@ mod asyncio { style_id: u32, py: Python<'py>, ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); crate::convert::async_modify_accent_phrases( accent_phrases, StyleId::new(style_id), py, - |a, s| async move { synthesizer.replace_phoneme_length(&a, s).await }, + |a, s| async move { + let result = synthesizer.read()?.replace_phoneme_length(&a, s).await; + Python::with_gil(|py| result.into_py_result(py)) + }, ) } @@ -1313,12 +1326,15 @@ mod asyncio { style_id: u32, py: Python<'py>, ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); crate::convert::async_modify_accent_phrases( accent_phrases, StyleId::new(style_id), py, - |a, s| async move { synthesizer.replace_mora_pitch(&a, s).await }, + |a, s| async move { + let result = synthesizer.read()?.replace_mora_pitch(&a, s).await; + Python::with_gil(|py| result.into_py_result(py)) + }, ) } @@ -1330,12 +1346,13 @@ mod asyncio { enable_interrogative_upspeak: bool, py: Python<'py>, ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); pyo3_asyncio::tokio::future_into_py_with_locals( py, pyo3_asyncio::tokio::get_current_locals(py)?, async move { let wav = synthesizer + .read()? .synthesis( &audio_query, StyleId::new(style_id), @@ -1368,13 +1385,16 @@ mod asyncio { let options = TtsOptions { enable_interrogative_upspeak, }; - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); let kana = kana.to_owned(); pyo3_asyncio::tokio::future_into_py_with_locals( py, pyo3_asyncio::tokio::get_current_locals(py)?, async move { - let wav = synthesizer.tts_from_kana(&kana, style_id, &options).await; + let wav = synthesizer + .read()? + .tts_from_kana(&kana, style_id, &options) + .await; Python::with_gil(|py| { let wav = wav.into_py_result(py)?; @@ -1400,13 +1420,13 @@ mod asyncio { let options = TtsOptions { enable_interrogative_upspeak, }; - let synthesizer = self.synthesizer.read()?.clone(); + let synthesizer = self.synthesizer.clone(); let text = text.to_owned(); pyo3_asyncio::tokio::future_into_py_with_locals( py, pyo3_asyncio::tokio::get_current_locals(py)?, async move { - let wav = synthesizer.tts(&text, style_id, &options).await; + let wav = synthesizer.read()?.tts(&text, style_id, &options).await; Python::with_gil(|py| { let wav = wav.into_py_result(py)?;