diff --git a/.gitignore b/.gitignore index bfcae394d..70a8aa797 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ ## Artifact of generating licenses /licenses.json licenses_venv/ +## Presets +presets.yaml # Copied from `https://github.com/github/gitignore/blob/main/Python.gitignore` @2022-01-10 # Byte-compiled / optimized / DLL files diff --git a/Dockerfile b/Dockerfile index 4ebdab3fd..d42733f1f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -229,7 +229,7 @@ COPY --from=download-onnxruntime-env /opt/onnxruntime /opt/onnxruntime # Add local files ADD ./voicevox_engine /opt/voicevox_engine/voicevox_engine ADD ./docs /opt/voicevox_engine/docs -ADD ./run.py ./presets.yaml ./engine_manifest.json /opt/voicevox_engine/ +ADD ./run.py ./engine_manifest.json /opt/voicevox_engine/ ADD ./resources /opt/voicevox_engine/resources ADD ./tools/generate_licenses.py /opt/voicevox_engine/tools/ ADD ./tools/licenses /opt/voicevox_engine/tools/licenses diff --git a/presets.yaml b/presets.yaml deleted file mode 100644 index 6934b90d5..000000000 --- a/presets.yaml +++ /dev/null @@ -1,12 +0,0 @@ -- id: 1 - name: サンプルプリセット - speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff - style_id: 0 - speedScale: 1 - pitchScale: 0 - intonationScale: 1 - volumeScale: 1 - prePhonemeLength: 0.1 - postPhonemeLength: 0.1 - pauseLength: null - pauseLengthScale: 1 diff --git a/run.py b/run.py index 302f762f4..b17d29cae 100644 --- a/run.py +++ b/run.py @@ -360,11 +360,10 @@ def main() -> None: env_preset_path = Path(envs.env_preset_path) else: env_preset_path = None - root_preset_path = engine_root() / "presets.yaml" + default_preset_path = engine_root() / "presets.yaml" preset_path = select_first_not_none( - [args.preset_file, env_preset_path, root_preset_path] + [args.preset_file, env_preset_path, default_preset_path] ) - # ファイルの存在に関わらず指定されたパスをプリセットファイルとして使用する preset_manager = PresetManager(preset_path) use_dict = UserDictionary() diff --git a/run.spec b/run.spec index 7f3f7a1c9..0b70b71d1 100644 --- a/run.spec +++ b/run.spec @@ -7,7 +7,6 @@ datas = [ ('resources', 'resources'), ('engine_manifest.json', '.'), ('licenses.json', '.'), - ('presets.yaml', '.'), ] datas += collect_data_files('pyopenjtalk') diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py index c70a12121..1cb36e879 100644 --- a/test/e2e/conftest.py +++ b/test/e2e/conftest.py @@ -4,6 +4,7 @@ from typing import Any import pytest +import yaml from fastapi import FastAPI from fastapi.testclient import TestClient @@ -35,8 +36,9 @@ def app_params(tmp_path: Path) -> dict[str, Any]: setting_loader = SettingHandler(tmp_path / "not_exist.yaml") # テスト用に隔離されたプリセットを生成する - preset_path = Path("./presets.yaml") - preset_manager = PresetManager(_copy_under_dir(preset_path, tmp_path)) + preset_path = tmp_path / "presets.yaml" + _generate_preset(preset_path) + preset_manager = PresetManager(preset_path) # テスト用に隔離されたユーザー辞書を生成する user_dict = UserDictionary( @@ -75,6 +77,28 @@ def client(app: FastAPI) -> TestClient: return TestClient(app) +def _generate_preset(preset_path: Path) -> None: + """指定パス下にプリセットファイルを生成する。""" + contents = [ + { + "id": 1, + "name": "サンプルプリセット", + "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff", + "style_id": 0, + "speedScale": 1, + "pitchScale": 0, + "intonationScale": 1, + "volumeScale": 1, + "prePhonemeLength": 0.1, + "postPhonemeLength": 0.1, + "pauseLength": None, + "pauseLengthScale": 1, + } + ] + with open(preset_path, mode="w", encoding="utf-8") as f: + yaml.safe_dump(contents, f, allow_unicode=True, sort_keys=False) + + def _generate_user_dict(dir_path: Path) -> Path: """指定されたディレクトリ下にユーザー辞書ファイルを生成し、生成されたファイルのパスを返す。""" contents = { diff --git a/test/unit/preset/test_preset.py b/test/unit/preset/test_preset.py index 10f983b35..0efbfcfff 100644 --- a/test/unit/preset/test_preset.py +++ b/test/unit/preset/test_preset.py @@ -52,11 +52,10 @@ def test_empty_file() -> None: preset_manager.load_presets() -def test_not_exist_file() -> None: - preset_manager = PresetManager(preset_path=Path("test/presets-dummy.yaml")) - true_msg = "プリセットの設定ファイルが見つかりません" - with pytest.raises(PresetInternalError, match=true_msg): - preset_manager.load_presets() +def test_not_exist_file(tmp_path: Path) -> None: + preset_manager = PresetManager(preset_path=tmp_path / "presets-dummy.yaml") + presets = preset_manager.load_presets() + assert len(presets) == 0 def test_add_preset(tmp_path: Path) -> None: diff --git a/tools/make_docs.py b/tools/make_docs.py index 88eb55d14..e9483d0f0 100644 --- a/tools/make_docs.py +++ b/tools/make_docs.py @@ -47,6 +47,7 @@ def generate_api_docs_html(schema: str) -> str: core_manager.register_core(CoreAdapter(MockCoreWrapper()), "mock") tts_engines = TTSEngineManager() tts_engines.register_engine(MockTTSEngine(), "mock") + preset_path = engine_root() / "presets.yaml" engine_manifest = load_manifest(engine_manifest_path()) library_manager = LibraryManager( get_save_dir() / "installed_libraries", @@ -61,9 +62,7 @@ def generate_api_docs_html(schema: str) -> str: tts_engines=tts_engines, core_manager=core_manager, setting_loader=SettingHandler(USER_SETTING_PATH), - preset_manager=PresetManager( # FIXME: impl MockPresetManager - preset_path=engine_root() / "presets.yaml", - ), + preset_manager=PresetManager(preset_path), user_dict=UserDictionary(), engine_manifest=engine_manifest, library_manager=library_manager, diff --git a/voicevox_engine/preset/preset_manager.py b/voicevox_engine/preset/preset_manager.py index 32453441f..7029db043 100644 --- a/voicevox_engine/preset/preset_manager.py +++ b/voicevox_engine/preset/preset_manager.py @@ -29,10 +29,13 @@ class PresetManager: """ def __init__(self, preset_path: Path): - """プリセットの設定ファイルへのパスからプリセットマネージャーを生成する""" + """プリセットマネージャーを生成する。プリセットファイルが存在しない場合は新規作成する。""" + self.presets: list[Preset] = [] # 全プリセットのキャッシュ self.last_modified_time = 0.0 self.preset_path = preset_path + if not self.preset_path.exists(): + self.preset_path.write_text("[]") def _refresh_cache(self) -> None: """プリセットの設定ファイルの最新状態をキャッシュへ反映する""" diff --git a/voicevox_engine/tts_pipeline/mora_mapping.py b/voicevox_engine/tts_pipeline/mora_mapping.py index bd7df524b..f0c5f1010 100644 --- a/voicevox_engine/tts_pipeline/mora_mapping.py +++ b/voicevox_engine/tts_pipeline/mora_mapping.py @@ -56,6 +56,7 @@ "ゥ", "ウ", "ウィ", + "ウゥ", "ウェ", "ウォ", "ェ", @@ -65,18 +66,30 @@ "カ", "ガ", "キ", + "キィ", "キェ", "キャ", "キュ", "キョ", "ギ", + "ギィ", "ギェ", "ギャ", "ギュ", "ギョ", "ク", + "クァ", + "クィ", + "クゥ", + "クェ", + "クォ", "クヮ", "グ", + "グァ", + "グィ", + "グゥ", + "グェ", + "グォ", "グヮ", "ケ", "ゲ", @@ -110,6 +123,10 @@ "チュ", "チョ", "ヂ", + "ヂェ", + "ヂャ", + "ヂュ", + "ヂョ", "ッ", "ツ", "ツァ", @@ -119,6 +136,7 @@ "ヅ", "テ", "ティ", + "テェ", "テャ", "テュ", "テョ", @@ -134,6 +152,7 @@ "ドゥ", "ナ", "ニ", + "ニィ", "ニェ", "ニャ", "ニュ", @@ -145,16 +164,19 @@ "バ", "パ", "ヒ", + "ヒィ", "ヒェ", "ヒャ", "ヒュ", "ヒョ", "ビ", + "ビィ", "ビェ", "ビャ", "ビュ", "ビョ", "ピ", + "ピィ", "ピェ", "ピャ", "ピュ", @@ -174,6 +196,7 @@ "ポ", "マ", "ミ", + "ミィ", "ミェ", "ミャ", "ミュ", @@ -189,6 +212,7 @@ "ヨ", "ラ", "リ", + "リィ", "リェ", "リャ", "リュ", @@ -228,6 +252,7 @@ ("リュ", "ry", "u"), ("リャ", "ry", "a"), ("リェ", "ry", "e"), + ("リィ", "ry", "i"), ("リ", "r", "i"), ("ラ", "r", "a"), ("ヨ", "y", "o"), @@ -240,6 +265,7 @@ ("ミュ", "my", "u"), ("ミャ", "my", "a"), ("ミェ", "my", "e"), + ("ミィ", "my", "i"), ("ミ", "m", "i"), ("マ", "m", "a"), ("ポ", "p", "o"), @@ -259,16 +285,19 @@ ("ピュ", "py", "u"), ("ピャ", "py", "a"), ("ピェ", "py", "e"), + ("ピィ", "py", "i"), ("ピ", "p", "i"), ("ビョ", "by", "o"), ("ビュ", "by", "u"), ("ビャ", "by", "a"), ("ビェ", "by", "e"), + ("ビィ", "by", "i"), ("ビ", "b", "i"), ("ヒョ", "hy", "o"), ("ヒュ", "hy", "u"), ("ヒャ", "hy", "a"), ("ヒェ", "hy", "e"), + ("ヒィ", "hy", "i"), ("ヒ", "h", "i"), ("パ", "p", "a"), ("バ", "b", "a"), @@ -280,6 +309,7 @@ ("ニュ", "ny", "u"), ("ニャ", "ny", "a"), ("ニェ", "ny", "e"), + ("ニィ", "ny", "i"), ("ニ", "n", "i"), ("ナ", "n", "a"), ("ドゥ", "d", "u"), @@ -295,6 +325,7 @@ ("テョ", "ty", "o"), ("テュ", "ty", "u"), ("テャ", "ty", "a"), + ("テェ", "ty", "e"), ("ティ", "t", "i"), ("テ", "t", "e"), ("ツォ", "ts", "o"), @@ -335,18 +366,28 @@ ("ゲ", "g", "e"), ("ケ", "k", "e"), ("グヮ", "gw", "a"), + ("グォ", "gw", "o"), + ("グェ", "gw", "e"), + ("グゥ", "gw", "u"), + ("グィ", "gw", "i"), ("グ", "g", "u"), ("クヮ", "kw", "a"), + ("クォ", "kw", "o"), + ("クェ", "kw", "e"), + ("クゥ", "kw", "u"), + ("クィ", "kw", "i"), ("ク", "k", "u"), ("ギョ", "gy", "o"), ("ギュ", "gy", "u"), ("ギャ", "gy", "a"), ("ギェ", "gy", "e"), + ("ギィ", "gy", "i"), ("ギ", "g", "i"), ("キョ", "ky", "o"), ("キュ", "ky", "u"), ("キャ", "ky", "a"), ("キェ", "ky", "e"), + ("キィ", "ky", "i"), ("キ", "k", "i"), ("ガ", "g", "a"), ("カ", "k", "a"), @@ -354,6 +395,7 @@ ("エ", None, "e"), ("ウォ", "w", "o"), ("ウェ", "w", "e"), + ("ウゥ", "w", "u"), ("ウィ", "w", "i"), ("ウ", None, "u"), ("イェ", "y", "e"), @@ -371,7 +413,13 @@ ("ョ", "y", "o"), ("ュ", "y", "u"), ("ヅ", "z", "u"), + ("ヂョ", "j", "o"), + ("ヂュ", "j", "u"), + ("ヂャ", "j", "a"), + ("ヂェ", "j", "e"), ("ヂ", "j", "i"), + ("グァ", "gw", "a"), + ("クァ", "kw", "a"), ("ヶ", "k", "e"), ("ャ", "y", "a"), ("ォ", None, "o"), diff --git a/voicevox_engine/user_dict/model.py b/voicevox_engine/user_dict/model.py index 355564f3e..0cc0dd88b 100644 --- a/voicevox_engine/user_dict/model.py +++ b/voicevox_engine/user_dict/model.py @@ -90,10 +90,10 @@ def check_is_katakana(cls, pronunciation: str) -> str: def check_mora_count_and_accent_type(self) -> Self: if self.mora_count is None: rule_others = ( - "[イ][ェ]|[ヴ][ャュョ]|[トド][ゥ]|[テデ][ィャュョ]|[デ][ェ]|[クグ][ヮ]" + "[イ][ェ]|[ヴ][ャュョ]|[ウクグトド][ゥ]|[テデ][ィェャュョ]|[クグ][ヮ]" ) - rule_line_i = "[キシチニヒミリギジビピ][ェャュョ]" - rule_line_u = "[ツフヴ][ァ]|[ウスツフヴズ][ィ]|[ウツフヴ][ェォ]" + rule_line_i = "[キシチニヒミリギジヂビピ][ェャュョ]|[キニヒミリギビピ][ィ]" + rule_line_u = "[クツフヴグ][ァ]|[ウクスツフヴグズ][ィ]|[ウクツフヴグ][ェォ]" rule_one_mora = "[ァ-ヴー]" self.mora_count = len( findall(