diff --git a/run.py b/run.py index 34671965a..66a30de4f 100644 --- a/run.py +++ b/run.py @@ -54,6 +54,7 @@ Speaker, SpeakerInfo, StyleIdNotFoundError, + StylePitchRange, SupportedDevicesInfo, UserDictWord, VvlibManifest, @@ -537,6 +538,21 @@ def multi_synthesis( background=BackgroundTask(delete_file, f.name), ) + @app.post( + "/optimal_pitch", + response_model=StylePitchRange, + tags=["その他"], + summary="指定したスタイルに対して最適なピッチ範囲を得る", + ) + def optimal_pitch(style_id: StyleId) -> StylePitchRange: + try: + low, high = metas_store.get_style_optimal_pitch_range(style_id) + return StylePitchRange(low=low, high=high) + except StyleIdNotFoundError as e: + raise HTTPException( + status_code=404, detail=f"該当するスタイル(style_id={e.style_id})が見つかりません" + ) + @app.post( "/morphable_targets", response_model=list[dict[str, MorphableTargetInfo]], diff --git a/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/metas.json b/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/metas.json index 577af6f58..4b367c9a3 100644 --- a/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/metas.json +++ b/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/metas.json @@ -1,3 +1,10 @@ { - "supported_features": { "permitted_synthesis_morphing": "NOTHING" } -} + "supported_features": {}, + "range": [ + { + "style_id": 8, + "low": 5.07, + "high": 6.5 + } + ] +} \ No newline at end of file diff --git a/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/metas.json b/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/metas.json index 41570a1fc..e050d24db 100644 --- a/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/metas.json +++ b/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/metas.json @@ -1,3 +1,17 @@ { - "supported_features": { "permitted_synthesis_morphing": "SELF_ONLY" } -} + "supported_features": { + "permitted_synthesis_morphing": "SELF_ONLY" + }, + "range": [ + { + "style_id": 1, + "low": 5.38, + "high": 6.44 + }, + { + "style_id": 3, + "low": 5.11, + "high": 6.5 + } + ] +} \ No newline at end of file diff --git a/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/metas.json b/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/metas.json index 0967ef424..ee91b61e1 100644 --- a/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/metas.json +++ b/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/metas.json @@ -1 +1,17 @@ -{} +{ + "supported_features": { + "permitted_synthesis_morphing": "SELF_ONLY" + }, + "range": [ + { + "style_id": 2, + "low": 5.16, + "high": 6.2 + }, + { + "style_id": 0, + "low": 5.21, + "high": 6.13 + } + ] +} \ No newline at end of file diff --git a/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/metas.json b/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/metas.json index 218fe4c1f..7ef5d064f 100644 --- a/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/metas.json +++ b/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/metas.json @@ -1,3 +1,4 @@ { - "supported_features": { "permitted_synthesis_morphing": "ALL" } -} + "supported_features": {}, + "range": [] +} \ No newline at end of file diff --git a/voicevox_engine/metas/Metas.py b/voicevox_engine/metas/Metas.py index eda288eb8..94dcf6e37 100644 --- a/voicevox_engine/metas/Metas.py +++ b/voicevox_engine/metas/Metas.py @@ -6,6 +6,7 @@ # NOTE: 循環importを防ぐためにとりあえずここに書いている # FIXME: 他のmodelに依存せず、全modelから参照できる場所に配置する StyleId = NewType("StyleId", int) +PitchRange = NewType("PitchRange", float) StyleType = Literal["talk", "singing_teacher", "frame_decode", "sing"] @@ -28,6 +29,12 @@ class SpeakerStyle(BaseModel): ) +class SpeakerOptimalPitchRangeItem(BaseModel): + style_id: StyleId + high: PitchRange + low: PitchRange + + class SpeakerSupportPermittedSynthesisMorphing(str, Enum): ALL = "ALL" # 全て許可 SELF_ONLY = "SELF_ONLY" # 同じ話者内でのみ許可 @@ -67,6 +74,9 @@ class EngineSpeaker(BaseModel): supported_features: SpeakerSupportedFeatures = Field( title="話者の対応機能", default_factory=SpeakerSupportedFeatures ) + range: List[SpeakerOptimalPitchRangeItem] = Field( + title="話者の最適ピッチ範囲", default_factory=list[SpeakerOptimalPitchRangeItem] + ) class Speaker(CoreSpeaker, EngineSpeaker): diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py index bcea6c1ce..4eaa64b54 100644 --- a/voicevox_engine/metas/MetasStore.py +++ b/voicevox_engine/metas/MetasStore.py @@ -11,6 +11,7 @@ StyleId, StyleType, ) +from voicevox_engine.model import StyleIdNotFoundError if TYPE_CHECKING: from voicevox_engine.core.core_adapter import CoreAdapter @@ -61,6 +62,14 @@ def load_combined_metas(self, core: "CoreAdapter") -> List[Speaker]: for speaker_meta in core_metas ] + def get_style_optimal_pitch_range(self, style_id: StyleId) -> tuple[float, float]: + for meta in self._loaded_metas.values(): + for range in meta.range: + if range.style_id == style_id: + return (range.low, range.high) + else: + raise StyleIdNotFoundError(style_id=style_id) + def construct_lookup( speakers: List[Speaker], diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py index f9e50b62c..6df0ee8cd 100644 --- a/voicevox_engine/model.py +++ b/voicevox_engine/model.py @@ -155,6 +155,11 @@ class MorphableTargetInfo(BaseModel): # reason: Optional[str] = Field(title="is_morphableがfalseである場合、その理由") +class StylePitchRange(BaseModel): + low: float + high: float + + class StyleIdNotFoundError(LookupError): def __init__(self, style_id: int, *args: object, **kywrds: object) -> None: self.style_id = style_id