Merge branch 'master' into merge-engine-manifest.py

Hiroshiba · Nov 15, 2024 · 474b60d · 474b60d
2 parents bcb480a + 03397d6
commit 474b60d
Show file tree

Hide file tree

Showing 11 changed files with 92 additions and 31 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,8 @@
 ## Artifact of generating licenses
 /licenses.json
 licenses_venv/
+## Presets
+presets.yaml
 
 # Copied from `https://github.com/github/gitignore/blob/main/Python.gitignore` @2022-01-10
 # Byte-compiled / optimized / DLL files

diff --git a/Dockerfile b/Dockerfile
@@ -229,7 +229,7 @@ COPY --from=download-onnxruntime-env /opt/onnxruntime /opt/onnxruntime
 # Add local files
 ADD ./voicevox_engine /opt/voicevox_engine/voicevox_engine
 ADD ./docs /opt/voicevox_engine/docs
-ADD ./run.py ./presets.yaml ./engine_manifest.json /opt/voicevox_engine/
+ADD ./run.py ./engine_manifest.json /opt/voicevox_engine/
 ADD ./resources /opt/voicevox_engine/resources
 ADD ./tools/generate_licenses.py /opt/voicevox_engine/tools/
 ADD ./tools/licenses /opt/voicevox_engine/tools/licenses

diff --git a/presets.yaml b/presets.yaml
diff --git a/run.py b/run.py
@@ -360,11 +360,10 @@ def main() -> None:
         env_preset_path = Path(envs.env_preset_path)
     else:
         env_preset_path = None
-    root_preset_path = engine_root() / "presets.yaml"
+    default_preset_path = engine_root() / "presets.yaml"
     preset_path = select_first_not_none(
-        [args.preset_file, env_preset_path, root_preset_path]
+        [args.preset_file, env_preset_path, default_preset_path]
     )
-    # ファイルの存在に関わらず指定されたパスをプリセットファイルとして使用する
     preset_manager = PresetManager(preset_path)
 
     use_dict = UserDictionary()

diff --git a/run.spec b/run.spec
@@ -7,7 +7,6 @@ datas = [
     ('resources', 'resources'),
     ('engine_manifest.json', '.'),
     ('licenses.json', '.'),
-    ('presets.yaml', '.'),
 ]
 datas += collect_data_files('pyopenjtalk')
 

diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
@@ -4,6 +4,7 @@
 from typing import Any
 
 import pytest
+import yaml
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
@@ -35,8 +36,9 @@ def app_params(tmp_path: Path) -> dict[str, Any]:
     setting_loader = SettingHandler(tmp_path / "not_exist.yaml")
 
     # テスト用に隔離されたプリセットを生成する
-    preset_path = Path("./presets.yaml")
-    preset_manager = PresetManager(_copy_under_dir(preset_path, tmp_path))
+    preset_path = tmp_path / "presets.yaml"
+    _generate_preset(preset_path)
+    preset_manager = PresetManager(preset_path)
 
     # テスト用に隔離されたユーザー辞書を生成する
     user_dict = UserDictionary(
@@ -75,6 +77,28 @@ def client(app: FastAPI) -> TestClient:
     return TestClient(app)
 
 
+def _generate_preset(preset_path: Path) -> None:
+    """指定パス下にプリセットファイルを生成する。"""
+    contents = [
+        {
+            "id": 1,
+            "name": "サンプルプリセット",
+            "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff",
+            "style_id": 0,
+            "speedScale": 1,
+            "pitchScale": 0,
+            "intonationScale": 1,
+            "volumeScale": 1,
+            "prePhonemeLength": 0.1,
+            "postPhonemeLength": 0.1,
+            "pauseLength": None,
+            "pauseLengthScale": 1,
+        }
+    ]
+    with open(preset_path, mode="w", encoding="utf-8") as f:
+        yaml.safe_dump(contents, f, allow_unicode=True, sort_keys=False)
+
+
 def _generate_user_dict(dir_path: Path) -> Path:
     """指定されたディレクトリ下にユーザー辞書ファイルを生成し、生成されたファイルのパスを返す。"""
     contents = {

diff --git a/test/unit/preset/test_preset.py b/test/unit/preset/test_preset.py
@@ -52,11 +52,10 @@ def test_empty_file() -> None:
         preset_manager.load_presets()
 
 
-def test_not_exist_file() -> None:
-    preset_manager = PresetManager(preset_path=Path("test/presets-dummy.yaml"))
-    true_msg = "プリセットの設定ファイルが見つかりません"
-    with pytest.raises(PresetInternalError, match=true_msg):
-        preset_manager.load_presets()
+def test_not_exist_file(tmp_path: Path) -> None:
+    preset_manager = PresetManager(preset_path=tmp_path / "presets-dummy.yaml")
+    presets = preset_manager.load_presets()
+    assert len(presets) == 0
 
 
 def test_add_preset(tmp_path: Path) -> None:

diff --git a/tools/make_docs.py b/tools/make_docs.py
@@ -47,6 +47,7 @@ def generate_api_docs_html(schema: str) -> str:
     core_manager.register_core(CoreAdapter(MockCoreWrapper()), "mock")
     tts_engines = TTSEngineManager()
     tts_engines.register_engine(MockTTSEngine(), "mock")
+    preset_path = engine_root() / "presets.yaml"
     engine_manifest = load_manifest(engine_manifest_path())
     library_manager = LibraryManager(
         get_save_dir() / "installed_libraries",
@@ -61,9 +62,7 @@ def generate_api_docs_html(schema: str) -> str:
         tts_engines=tts_engines,
         core_manager=core_manager,
         setting_loader=SettingHandler(USER_SETTING_PATH),
-        preset_manager=PresetManager(  # FIXME: impl MockPresetManager
-            preset_path=engine_root() / "presets.yaml",
-        ),
+        preset_manager=PresetManager(preset_path),
         user_dict=UserDictionary(),
         engine_manifest=engine_manifest,
         library_manager=library_manager,

diff --git a/voicevox_engine/preset/preset_manager.py b/voicevox_engine/preset/preset_manager.py
@@ -29,10 +29,13 @@ class PresetManager:
     """
 
     def __init__(self, preset_path: Path):
-        """プリセットの設定ファイルへのパスからプリセットマネージャーを生成する"""
+        """プリセットマネージャーを生成する。プリセットファイルが存在しない場合は新規作成する。"""
+
         self.presets: list[Preset] = []  # 全プリセットのキャッシュ
         self.last_modified_time = 0.0
         self.preset_path = preset_path
+        if not self.preset_path.exists():
+            self.preset_path.write_text("[]")
 
     def _refresh_cache(self) -> None:
         """プリセットの設定ファイルの最新状態をキャッシュへ反映する"""

diff --git a/voicevox_engine/tts_pipeline/mora_mapping.py b/voicevox_engine/tts_pipeline/mora_mapping.py
@@ -56,6 +56,7 @@
     "ゥ",
     "ウ",
     "ウィ",
+    "ウゥ",
     "ウェ",
     "ウォ",
     "ェ",
@@ -65,18 +66,30 @@
     "カ",
     "ガ",
     "キ",
+    "キィ",
     "キェ",
     "キャ",
     "キュ",
     "キョ",
     "ギ",
+    "ギィ",
     "ギェ",
     "ギャ",
     "ギュ",
     "ギョ",
     "ク",
+    "クァ",
+    "クィ",
+    "クゥ",
+    "クェ",
+    "クォ",
     "クヮ",
     "グ",
+    "グァ",
+    "グィ",
+    "グゥ",
+    "グェ",
+    "グォ",
     "グヮ",
     "ケ",
     "ゲ",
@@ -110,6 +123,10 @@
     "チュ",
     "チョ",
     "ヂ",
+    "ヂェ",
+    "ヂャ",
+    "ヂュ",
+    "ヂョ",
     "ッ",
     "ツ",
     "ツァ",
@@ -119,6 +136,7 @@
     "ヅ",
     "テ",
     "ティ",
+    "テェ",
     "テャ",
     "テュ",
     "テョ",
@@ -134,6 +152,7 @@
     "ドゥ",
     "ナ",
     "ニ",
+    "ニィ",
     "ニェ",
     "ニャ",
     "ニュ",
@@ -145,16 +164,19 @@
     "バ",
     "パ",
     "ヒ",
+    "ヒィ",
     "ヒェ",
     "ヒャ",
     "ヒュ",
     "ヒョ",
     "ビ",
+    "ビィ",
     "ビェ",
     "ビャ",
     "ビュ",
     "ビョ",
     "ピ",
+    "ピィ",
     "ピェ",
     "ピャ",
     "ピュ",
@@ -174,6 +196,7 @@
     "ポ",
     "マ",
     "ミ",
+    "ミィ",
     "ミェ",
     "ミャ",
     "ミュ",
@@ -189,6 +212,7 @@
     "ヨ",
     "ラ",
     "リ",
+    "リィ",
     "リェ",
     "リャ",
     "リュ",
@@ -228,6 +252,7 @@
     ("リュ", "ry", "u"),
     ("リャ", "ry", "a"),
     ("リェ", "ry", "e"),
+    ("リィ", "ry", "i"),
     ("リ", "r", "i"),
     ("ラ", "r", "a"),
     ("ヨ", "y", "o"),
@@ -240,6 +265,7 @@
     ("ミュ", "my", "u"),
     ("ミャ", "my", "a"),
     ("ミェ", "my", "e"),
+    ("ミィ", "my", "i"),
     ("ミ", "m", "i"),
     ("マ", "m", "a"),
     ("ポ", "p", "o"),
@@ -259,16 +285,19 @@
     ("ピュ", "py", "u"),
     ("ピャ", "py", "a"),
     ("ピェ", "py", "e"),
+    ("ピィ", "py", "i"),
     ("ピ", "p", "i"),
     ("ビョ", "by", "o"),
     ("ビュ", "by", "u"),
     ("ビャ", "by", "a"),
     ("ビェ", "by", "e"),
+    ("ビィ", "by", "i"),
     ("ビ", "b", "i"),
     ("ヒョ", "hy", "o"),
     ("ヒュ", "hy", "u"),
     ("ヒャ", "hy", "a"),
     ("ヒェ", "hy", "e"),
+    ("ヒィ", "hy", "i"),
     ("ヒ", "h", "i"),
     ("パ", "p", "a"),
     ("バ", "b", "a"),
@@ -280,6 +309,7 @@
     ("ニュ", "ny", "u"),
     ("ニャ", "ny", "a"),
     ("ニェ", "ny", "e"),
+    ("ニィ", "ny", "i"),
     ("ニ", "n", "i"),
     ("ナ", "n", "a"),
     ("ドゥ", "d", "u"),
@@ -295,6 +325,7 @@
     ("テョ", "ty", "o"),
     ("テュ", "ty", "u"),
     ("テャ", "ty", "a"),
+    ("テェ", "ty", "e"),
     ("ティ", "t", "i"),
     ("テ", "t", "e"),
     ("ツォ", "ts", "o"),
@@ -335,25 +366,36 @@
     ("ゲ", "g", "e"),
     ("ケ", "k", "e"),
     ("グヮ", "gw", "a"),
+    ("グォ", "gw", "o"),
+    ("グェ", "gw", "e"),
+    ("グゥ", "gw", "u"),
+    ("グィ", "gw", "i"),
     ("グ", "g", "u"),
     ("クヮ", "kw", "a"),
+    ("クォ", "kw", "o"),
+    ("クェ", "kw", "e"),
+    ("クゥ", "kw", "u"),
+    ("クィ", "kw", "i"),
     ("ク", "k", "u"),
     ("ギョ", "gy", "o"),
     ("ギュ", "gy", "u"),
     ("ギャ", "gy", "a"),
     ("ギェ", "gy", "e"),
+    ("ギィ", "gy", "i"),
     ("ギ", "g", "i"),
     ("キョ", "ky", "o"),
     ("キュ", "ky", "u"),
     ("キャ", "ky", "a"),
     ("キェ", "ky", "e"),
+    ("キィ", "ky", "i"),
     ("キ", "k", "i"),
     ("ガ", "g", "a"),
     ("カ", "k", "a"),
     ("オ", None, "o"),
     ("エ", None, "e"),
     ("ウォ", "w", "o"),
     ("ウェ", "w", "e"),
+    ("ウゥ", "w", "u"),
     ("ウィ", "w", "i"),
     ("ウ", None, "u"),
     ("イェ", "y", "e"),
@@ -371,7 +413,13 @@
     ("ョ", "y", "o"),
     ("ュ", "y", "u"),
     ("ヅ", "z", "u"),
+    ("ヂョ", "j", "o"),
+    ("ヂュ", "j", "u"),
+    ("ヂャ", "j", "a"),
+    ("ヂェ", "j", "e"),
     ("ヂ", "j", "i"),
+    ("グァ", "gw", "a"),
+    ("クァ", "kw", "a"),
     ("ヶ", "k", "e"),
     ("ャ", "y", "a"),
     ("ォ", None, "o"),

diff --git a/voicevox_engine/user_dict/model.py b/voicevox_engine/user_dict/model.py
@@ -90,10 +90,10 @@ def check_is_katakana(cls, pronunciation: str) -> str:
     def check_mora_count_and_accent_type(self) -> Self:
         if self.mora_count is None:
             rule_others = (
-                "[イ][ェ]|[ヴ][ャュョ]|[トド][ゥ]|[テデ][ィャュョ]|[デ][ェ]|[クグ][ヮ]"
+                "[イ][ェ]|[ヴ][ャュョ]|[ウクグトド][ゥ]|[テデ][ィェャュョ]|[クグ][ヮ]"
             )
-            rule_line_i = "[キシチニヒミリギジビピ][ェャュョ]"
-            rule_line_u = "[ツフヴ][ァ]|[ウスツフヴズ][ィ]|[ウツフヴ][ェォ]"
+            rule_line_i = "[キシチニヒミリギジヂビピ][ェャュョ]|[キニヒミリギビピ][ィ]"
+            rule_line_u = "[クツフヴグ][ァ]|[ウクスツフヴグズ][ィ]|[ウクツフヴグ][ェォ]"
             rule_one_mora = "[ァ-ヴー]"
             self.mora_count = len(
                 findall(