Merge branch 'main' into feat-rename-audio-query-to-create-audio-query

VOICEVOX · Dec 1, 2024 · 87dd095 · 87dd095
2 parents 3122a44 + c61d5db
commit 87dd095
Show file tree

Hide file tree

Showing 5 changed files with 166 additions and 168 deletions.
diff --git a/crates/voicevox_core/src/engine/model.rs b/crates/voicevox_core/src/engine/model.rs
@@ -84,10 +84,10 @@ pub struct AudioQuery {
     pub pause_length_scale: (),
     /// \[読み取り専用\] AquesTalk風記法。
     ///
-    /// [`Synthesizer::audio_query`]が返すもののみ`Some`となる。入力としてのAudioQueryでは無視され
+    /// [`Synthesizer::create_audio_query`]が返すもののみ`Some`となる。入力としてのAudioQueryでは無視され
     /// る。
     ///
-    /// [`Synthesizer::audio_query`]: crate::blocking::Synthesizer::audio_query
+    /// [`Synthesizer::create_audio_query`]: crate::blocking::Synthesizer::create_audio_query
     pub kana: Option<String>,
 }
 

diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_models.py b/crates/voicevox_core_python_api/python/voicevox_core/_models.py
@@ -218,8 +218,8 @@ class AudioQuery:
     """
     [読み取り専用] AquesTalk風記法。
 
-    :func:`Synthesizer.audio_query` が返すもののみ ``str`` となる。入力としてのAudioQueryでは無視さ
-    れる。
+    :func:`Synthesizer.create_audio_query` が返すもののみ ``str`` となる。入力として
+    のAudioQueryでは無視される。
     """
 
 

diff --git a/docs/guide/user/usage.md b/docs/guide/user/usage.md
@@ -120,7 +120,7 @@ with VoiceModelFile.open("model/0.vvm") as model:
 ```python
 text = "サンプル音声です"
 style_id = 0
-audio_query = synthesizer.audio_query(text, style_id)
+audio_query = synthesizer.create_audio_query(text, style_id)
 pprint(audio_query)
 ```
 

diff --git a/example/python/run-asyncio.py b/example/python/run-asyncio.py
@@ -6,110 +6,109 @@
 import logging
 from argparse import ArgumentParser
 from pathlib import Path
-from typing import Tuple
 
 from voicevox_core import AccelerationMode, AudioQuery
 from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile
 
 
+@dataclasses.dataclass
+class Args:
+    mode: AccelerationMode
+    vvm: Path
+    onnxruntime: str
+    dict_dir: Path
+    text: str
+    out: Path
+    style_id: int
+
+    @staticmethod
+    def parse_args() -> "Args":
+        argparser = ArgumentParser()
+        argparser.add_argument(
+            "--mode",
+            default="AUTO",
+            type=AccelerationMode,
+            help='モード ("AUTO", "CPU", "GPU")',
+        )
+        argparser.add_argument(
+            "vvm",
+            type=Path,
+            help="vvmファイルへのパス",
+        )
+        argparser.add_argument(
+            "--onnxruntime",
+            default=Onnxruntime.LIB_VERSIONED_FILENAME,
+            help="ONNX Runtimeのライブラリのfilename",
+        )
+        argparser.add_argument(
+            "--dict-dir",
+            default="./open_jtalk_dic_utf_8-1.11",
+            type=Path,
+            help="Open JTalkの辞書ディレクトリ",
+        )
+        argparser.add_argument(
+            "--text",
+            default="この音声は、ボイスボックスを使用して、出力されています。",
+            help="読み上げさせたい文章",
+        )
+        argparser.add_argument(
+            "--out",
+            default="./output.wav",
+            type=Path,
+            help="出力wavファイルのパス",
+        )
+        argparser.add_argument(
+            "--style-id",
+            default=0,
+            type=int,
+            help="話者IDを指定",
+        )
+        args = argparser.parse_args()
+        return Args(
+            args.mode,
+            args.vvm,
+            args.onnxruntime,
+            args.dict_dir,
+            args.text,
+            args.out,
+            args.style_id,
+        )
+
+
 async def main() -> None:
     logging.basicConfig(format="[%(levelname)s] %(name)s: %(message)s")
     logger = logging.getLogger(__name__)
     logger.setLevel("DEBUG")
     logging.getLogger("voicevox_core_python_api").setLevel("DEBUG")
     logging.getLogger("voicevox_core").setLevel("DEBUG")
 
-    (
-        acceleration_mode,
-        vvm_path,
-        onnxruntime_filename,
-        open_jtalk_dict_dir,
-        text,
-        out,
-        style_id,
-    ) = parse_args()
+    args = Args.parse_args()
 
-    logger.info("%s", f"Loading ONNX Runtime ({onnxruntime_filename=})")
-    onnxruntime = await Onnxruntime.load_once(filename=onnxruntime_filename)
+    logger.info("%s", f"Loading ONNX Runtime ({args.onnxruntime=})")
+    onnxruntime = await Onnxruntime.load_once(filename=args.onnxruntime)
 
     logger.debug("%s", f"{onnxruntime.supported_devices()=}")
 
-    logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})")
+    logger.info("%s", f"Initializing ({args.mode=}, {args.dict_dir=})")
     synthesizer = Synthesizer(
-        onnxruntime,
-        await OpenJtalk.new(open_jtalk_dict_dir),
-        acceleration_mode=acceleration_mode,
+        onnxruntime, await OpenJtalk.new(args.dict_dir), acceleration_mode=args.mode
     )
 
     logger.debug("%s", f"{synthesizer.metas=}")
     logger.debug("%s", f"{synthesizer.is_gpu_mode=}")
 
-    logger.info("%s", f"Loading `{vvm_path}`")
-    async with await VoiceModelFile.open(vvm_path) as model:
+    logger.info("%s", f"Loading `{args.vvm}`")
+    async with await VoiceModelFile.open(args.vvm) as model:
         await synthesizer.load_voice_model(model)
 
-    logger.info("%s", f"Creating an AudioQuery from {text!r}")
-    audio_query = await synthesizer.create_audio_query(text, style_id)
+    logger.info("%s", f"Creating an AudioQuery from {args.text!r}")
+    audio_query = await synthesizer.create_audio_query(args.text, args.style_id)
 
     logger.info("%s", f"Synthesizing with {display_as_json(audio_query)}")
-    wav = await synthesizer.synthesis(audio_query, style_id)
-
-    out.write_bytes(wav)
-    logger.info("%s", f"Wrote `{out}`")
+    wav = await synthesizer.synthesis(audio_query, args.style_id)
 
-
-def parse_args() -> Tuple[AccelerationMode, Path, str, Path, str, Path, int]:
-    argparser = ArgumentParser()
-    argparser.add_argument(
-        "--mode",
-        default="AUTO",
-        type=AccelerationMode,
-        help='モード ("AUTO", "CPU", "GPU")',
-    )
-    argparser.add_argument(
-        "vvm",
-        type=Path,
-        help="vvmファイルへのパス",
-    )
-    argparser.add_argument(
-        "--onnxruntime",
-        default=Onnxruntime.LIB_VERSIONED_FILENAME,
-        help="ONNX Runtimeのライブラリのfilename",
-    )
-    argparser.add_argument(
-        "--dict-dir",
-        default="./open_jtalk_dic_utf_8-1.11",
-        type=Path,
-        help="Open JTalkの辞書ディレクトリ",
-    )
-    argparser.add_argument(
-        "--text",
-        default="この音声は、ボイスボックスを使用して、出力されています。",
-        help="読み上げさせたい文章",
-    )
-    argparser.add_argument(
-        "--out",
-        default="./output.wav",
-        type=Path,
-        help="出力wavファイルのパス",
-    )
-    argparser.add_argument(
-        "--style-id",
-        default=0,
-        type=int,
-        help="話者IDを指定",
-    )
-    args = argparser.parse_args()
-    # FIXME: 流石に多くなってきたので、`dataclass`化する
-    return (
-        args.mode,
-        args.vvm,
-        args.onnxruntime,
-        args.dict_dir,
-        args.text,
-        args.out,
-        args.style_id,
-    )
+    args.out.write_bytes(wav)
+    logger.info("%s", f"Wrote `{args.out}`")
 
 
 def display_as_json(audio_query: AudioQuery) -> str: