Skip to content

Commit

Permalink
fix: 非同期関連のtodoとfixmeを解消 (#868)
Browse files Browse the repository at this point in the history
非同期APIに関して以下の二つを行う。

* `load_voice_model`にて、ONNX Runtimeの`Session`作成(結構重い)をス
    レッドプール上でやるようにする。
* Python APIの`Synthesizer`にて、`Closable`の機構が適切に役割を果たすよ
    うにする。
  • Loading branch information
qryxip authored Nov 4, 2024
1 parent a8131d9 commit dd2ece8
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 32 deletions.
8 changes: 5 additions & 3 deletions crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,11 @@ mod inner {
&self,
model: &voice_model::Inner<A>,
) -> crate::Result<()> {
let model_bytes = &model.read_inference_models().await?;
// TODO: 重い操作なので、asyncにする
self.status.insert_model(model.header(), model_bytes)
let model_bytes = model.read_inference_models().await?;

let status = self.status.clone();
let header = model.header().clone();
A::unblock(move || status.insert_model(&header, &model_bytes)).await
}

pub(super) fn unload_voice_model(&self, voice_model_id: VoiceModelId) -> Result<()> {
Expand Down
9 changes: 5 additions & 4 deletions crates/voicevox_core/src/voice_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ impl VoiceModelId {

#[self_referencing]
pub(crate) struct Inner<A: Async> {
header: VoiceModelHeader,
header: Arc<VoiceModelHeader>,

#[borrows(header)]
#[not_covariant]
Expand Down Expand Up @@ -126,11 +126,12 @@ impl<A: Async> Inner<A> {
)
})?;

let header = VoiceModelHeader::new(manifest, metas, path)?;
let header = VoiceModelHeader::new(manifest, metas, path)?.into();

InnerTryBuilder {
header,
inference_model_entries_builder: |VoiceModelHeader { manifest, .. }| {
inference_model_entries_builder: |header| {
let VoiceModelHeader { manifest, .. } = &**header;
manifest
.domains()
.each_ref()
Expand Down Expand Up @@ -182,7 +183,7 @@ impl<A: Async> Inner<A> {
&self.borrow_header().metas
}

pub(crate) fn header(&self) -> &VoiceModelHeader {
pub(crate) fn header(&self) -> &Arc<VoiceModelHeader> {
self.borrow_header()
}

Expand Down
5 changes: 2 additions & 3 deletions crates/voicevox_core_python_api/src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ pub(crate) fn async_modify_accent_phrases<'py, Fun, Fut>(
) -> PyResult<&'py PyAny>
where
Fun: FnOnce(Vec<AccentPhrase>, StyleId) -> Fut + Send + 'static,
Fut: Future<Output = voicevox_core::Result<Vec<AccentPhrase>>> + Send + 'static,
Fut: Future<Output = PyResult<Vec<AccentPhrase>>> + Send + 'static,
{
let rust_accent_phrases = accent_phrases
.iter()
Expand All @@ -121,10 +121,9 @@ where
py,
pyo3_asyncio::tokio::get_current_locals(py)?,
async move {
let replaced_accent_phrases = method(rust_accent_phrases, speaker_id).await;
let replaced_accent_phrases = method(rust_accent_phrases, speaker_id).await?;
Python::with_gil(|py| {
let replaced_accent_phrases = replaced_accent_phrases
.into_py_result(py)?
.iter()
.map(move |accent_phrase| {
to_pydantic_dataclass(
Expand Down
64 changes: 42 additions & 22 deletions crates/voicevox_core_python_api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1055,11 +1055,9 @@ mod asyncio {

#[pyclass]
pub(crate) struct Synthesizer {
// FIXME: `Arc<voicevox_core::nonblocking::Synthesizer>`ではなく、`Arc<Closable<_>>`を
// `clone`する
synthesizer: Arc<
Closable<
Arc<voicevox_core::nonblocking::Synthesizer<voicevox_core::nonblocking::OpenJtalk>>,
voicevox_core::nonblocking::Synthesizer<voicevox_core::nonblocking::OpenJtalk>,
Self,
Tokio,
>,
Expand Down Expand Up @@ -1090,7 +1088,7 @@ mod asyncio {
cpu_num_threads,
},
);
let synthesizer = Python::with_gil(|py| synthesizer.into_py_result(py))?.into();
let synthesizer = Python::with_gil(|py| synthesizer.into_py_result(py))?;
let synthesizer = Closable::new(synthesizer).into();
Ok(Self { synthesizer })
}
Expand Down Expand Up @@ -1139,9 +1137,12 @@ mod asyncio {
py: Python<'py>,
) -> PyResult<&'py PyAny> {
let model: VoiceModelFile = model.extract()?;
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
pyo3_asyncio::tokio::future_into_py(py, async move {
let result = synthesizer.load_voice_model(&*model.model.read()?).await;
let result = synthesizer
.read()?
.load_voice_model(&*model.model.read()?)
.await;
Python::with_gil(|py| result.into_py_result(py))
})
}
Expand Down Expand Up @@ -1173,13 +1174,14 @@ mod asyncio {
style_id: u32,
py: Python<'py>,
) -> PyResult<&'py PyAny> {
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
let kana = kana.to_owned();
pyo3_asyncio::tokio::future_into_py_with_locals(
py,
pyo3_asyncio::tokio::get_current_locals(py)?,
async move {
let audio_query = synthesizer
.read()?
.audio_query_from_kana(&kana, StyleId::new(style_id))
.await;

Expand All @@ -1201,13 +1203,16 @@ mod asyncio {
style_id: u32,
py: Python<'py>,
) -> PyResult<&'py PyAny> {
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
let text = text.to_owned();
pyo3_asyncio::tokio::future_into_py_with_locals(
py,
pyo3_asyncio::tokio::get_current_locals(py)?,
async move {
let audio_query = synthesizer.audio_query(&text, StyleId::new(style_id)).await;
let audio_query = synthesizer
.read()?
.audio_query(&text, StyleId::new(style_id))
.await;

Python::with_gil(|py| {
let audio_query = audio_query.into_py_result(py)?;
Expand All @@ -1225,13 +1230,14 @@ mod asyncio {
style_id: u32,
py: Python<'py>,
) -> PyResult<&'py PyAny> {
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
let kana = kana.to_owned();
pyo3_asyncio::tokio::future_into_py_with_locals(
py,
pyo3_asyncio::tokio::get_current_locals(py)?,
async move {
let accent_phrases = synthesizer
.read()?
.create_accent_phrases_from_kana(&kana, StyleId::new(style_id))
.await;
Python::with_gil(|py| {
Expand All @@ -1254,13 +1260,14 @@ mod asyncio {
style_id: u32,
py: Python<'py>,
) -> PyResult<&'py PyAny> {
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
let text = text.to_owned();
pyo3_asyncio::tokio::future_into_py_with_locals(
py,
pyo3_asyncio::tokio::get_current_locals(py)?,
async move {
let accent_phrases = synthesizer
.read()?
.create_accent_phrases(&text, StyleId::new(style_id))
.await;
Python::with_gil(|py| {
Expand All @@ -1283,12 +1290,15 @@ mod asyncio {
style_id: u32,
py: Python<'py>,
) -> PyResult<&'py PyAny> {
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
crate::convert::async_modify_accent_phrases(
accent_phrases,
StyleId::new(style_id),
py,
|a, s| async move { synthesizer.replace_mora_data(&a, s).await },
|a, s| async move {
let result = synthesizer.read()?.replace_mora_data(&a, s).await;
Python::with_gil(|py| result.into_py_result(py))
},
)
}

Expand All @@ -1298,12 +1308,15 @@ mod asyncio {
style_id: u32,
py: Python<'py>,
) -> PyResult<&'py PyAny> {
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
crate::convert::async_modify_accent_phrases(
accent_phrases,
StyleId::new(style_id),
py,
|a, s| async move { synthesizer.replace_phoneme_length(&a, s).await },
|a, s| async move {
let result = synthesizer.read()?.replace_phoneme_length(&a, s).await;
Python::with_gil(|py| result.into_py_result(py))
},
)
}

Expand All @@ -1313,12 +1326,15 @@ mod asyncio {
style_id: u32,
py: Python<'py>,
) -> PyResult<&'py PyAny> {
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
crate::convert::async_modify_accent_phrases(
accent_phrases,
StyleId::new(style_id),
py,
|a, s| async move { synthesizer.replace_mora_pitch(&a, s).await },
|a, s| async move {
let result = synthesizer.read()?.replace_mora_pitch(&a, s).await;
Python::with_gil(|py| result.into_py_result(py))
},
)
}

Expand All @@ -1330,12 +1346,13 @@ mod asyncio {
enable_interrogative_upspeak: bool,
py: Python<'py>,
) -> PyResult<&'py PyAny> {
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
pyo3_asyncio::tokio::future_into_py_with_locals(
py,
pyo3_asyncio::tokio::get_current_locals(py)?,
async move {
let wav = synthesizer
.read()?
.synthesis(
&audio_query,
StyleId::new(style_id),
Expand Down Expand Up @@ -1368,13 +1385,16 @@ mod asyncio {
let options = TtsOptions {
enable_interrogative_upspeak,
};
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
let kana = kana.to_owned();
pyo3_asyncio::tokio::future_into_py_with_locals(
py,
pyo3_asyncio::tokio::get_current_locals(py)?,
async move {
let wav = synthesizer.tts_from_kana(&kana, style_id, &options).await;
let wav = synthesizer
.read()?
.tts_from_kana(&kana, style_id, &options)
.await;

Python::with_gil(|py| {
let wav = wav.into_py_result(py)?;
Expand All @@ -1400,13 +1420,13 @@ mod asyncio {
let options = TtsOptions {
enable_interrogative_upspeak,
};
let synthesizer = self.synthesizer.read()?.clone();
let synthesizer = self.synthesizer.clone();
let text = text.to_owned();
pyo3_asyncio::tokio::future_into_py_with_locals(
py,
pyo3_asyncio::tokio::get_current_locals(py)?,
async move {
let wav = synthesizer.tts(&text, style_id, &options).await;
let wav = synthesizer.read()?.tts(&text, style_id, &options).await;

Python::with_gil(|py| {
let wav = wav.into_py_result(py)?;
Expand Down

0 comments on commit dd2ece8

Please sign in to comment.