From a41a509bc22b3e621785e4ff37d1b03d10f8cef2 Mon Sep 17 00:00:00 2001 From: qwerty2501 <939468+qwerty2501@users.noreply.github.com> Date: Tue, 26 Jul 2022 00:28:22 +0900 Subject: [PATCH] =?UTF-8?q?python=E3=81=AEexample=E3=82=92=E6=96=B0?= =?UTF-8?q?=E3=81=97=E3=81=84=E5=BD=A2=E3=81=AB=E5=A4=89=E6=9B=B4=E3=81=97?= =?UTF-8?q?=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refs #128 --- README.md | 8 ++ example/python/.gitignore | 5 + example/python/core.py | 159 ++++++++++++++++++++++++++++++++ example/python/requirements.txt | 1 + 4 files changed, 173 insertions(+) create mode 100644 example/python/core.py create mode 100644 example/python/requirements.txt diff --git a/README.md b/README.md index 94e128342..07c65c260 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,14 @@ sudo apt install libgomp1 ## サンプル実行 +まず Open JTalk 辞書フォルダを配置します。 http://open-jtalk.sourceforge.net/ を開き、Dictionary for Open JTalk 欄の Binary Package (UTF-8)をクリックして「open_jtalk_dic_utf_8-1.11.tar.gz」をダウンロードします。 +これを展開してできた「open_jtalk_dic_utf_8-1.11」フォルダを example/python に配置します。 + +- バージョン 0.12 以降の voicevox_core, onnxruntime ライブラリ(配布ページ: https://github.com/VOICEVOX/voicevox_core/releases )を example/python に配置する + - Linux の場合:`voicevox_core-linux-{お使いのCPUアーキテクチャ}-cpu-{バージョン}.zip` 内の 全ての so file + - macOS の場合:`voicevox_core-osx-{お使いのCPUアーキテクチャ}-cpu-{バージョン}.zip` 内の 全ての dylib file + - macOS の場合:`voicevox_core-windows-{お使いのCPUアーキテクチャ}-cpu-{バージョン}.zip` 内の 全ての dll file + ```bash cd example/python diff --git a/example/python/.gitignore b/example/python/.gitignore index a0f63e6cc..c19d96764 100644 --- a/example/python/.gitignore +++ b/example/python/.gitignore @@ -139,3 +139,8 @@ cython_debug/ # OpenJTalk-dictionary's dir open_jtalk_dic_utf_8-* + +# shared library +*.so.* +*.dylib +*.dll diff --git a/example/python/core.py b/example/python/core.py new file mode 100644 index 000000000..8319ccca4 --- /dev/null +++ b/example/python/core.py @@ -0,0 +1,159 @@ +from ctypes import * +import platform +import os +from pathlib import Path +import numpy + +# numpy ndarray types +int64_dim1_type = numpy.ctypeslib.ndpointer(dtype=numpy.int64, ndim=1) +float32_dim1_type = numpy.ctypeslib.ndpointer(dtype=numpy.float32, ndim=1) +int64_dim2_type = numpy.ctypeslib.ndpointer(dtype=numpy.int64, ndim=2) +float32_dim2_type = numpy.ctypeslib.ndpointer(dtype=numpy.float32, ndim=2) + +get_os = platform.system() + +lib_file = "" +if get_os == "Windows": + lib_file = "core.dll" +elif get_os == "Darwin": + lib_file = "libcore.dylib" +elif get_os == "Linux": + lib_file = "libcore.so" + +# ライブラリ読み込み +core_dll_path = Path(os.path.dirname(__file__) + f"/{lib_file}") +if not os.path.exists(core_dll_path): + raise Exception(f"coreライブラリファイルが{core_dll_path}に存在しません") +lib = cdll.LoadLibrary(str(core_dll_path)) + +# 関数型定義 +lib.initialize.argtypes = (c_bool, c_int, c_bool) +lib.initialize.restype = c_bool + +lib.load_model.argtypes = (c_int64,) +lib.load_model.restype = c_bool + +lib.is_model_loaded.argtypes = (c_int64,) +lib.is_model_loaded.restype = c_bool + +lib.finalize.argtypes = () + +lib.metas.restype = c_char_p + +lib.supported_devices.restype = c_char_p + +lib.yukarin_s_forward.argtypes = ( + c_int64, int64_dim1_type, int64_dim1_type, float32_dim1_type) +lib.yukarin_s_forward.restype = c_bool + +lib.yukarin_sa_forward.argtypes = (c_int64, int64_dim2_type, int64_dim2_type, int64_dim2_type, + int64_dim2_type, int64_dim2_type, int64_dim2_type, int64_dim1_type, float32_dim2_type) +lib.yukarin_sa_forward.restype = c_bool + +lib.decode_forward.argtypes = ( + c_int64, c_int64, float32_dim2_type, float32_dim2_type, int64_dim1_type, float32_dim1_type) +lib.decode_forward.restype = c_bool + +lib.last_error_message.restype = c_char_p + +lib.voicevox_load_openjtalk_dict.argtypes = (c_char_p,) +lib.voicevox_load_openjtalk_dict.restype = c_int + +lib.voicevox_tts.argtypes = (c_char_p, c_int64, POINTER(c_int), POINTER(POINTER(c_uint8))) +lib.voicevox_tts.restype = c_int + +lib.voicevox_tts_from_kana.argtypes = (c_char_p, c_int64, POINTER(c_int), POINTER(POINTER(c_uint8))) +lib.voicevox_tts_from_kana.restype = c_int + +lib.voicevox_wav_free.argtypes = (POINTER(c_uint8),) + +lib.voicevox_error_result_to_message.argtypes = (c_int,) +lib.voicevox_load_openjtalk_dict.argtypes = (c_char_p,) + +# ラッパー関数 +def initialize(use_gpu: bool, cpu_num_threads=0, load_all_models=True): + success = lib.initialize(use_gpu, cpu_num_threads, load_all_models) + if not success: + raise Exception(lib.last_error_message().decode()) + +def load_model(speaker_id: int): + success = lib.load_model(speaker_id) + if not success: + raise Exception(lib.last_error_message().decode()) + +def is_model_loaded(speaker_id: int) -> bool: + return lib.is_model_loaded(speaker_id) + +def metas() -> str: + return lib.metas().decode() + + +def supported_devices() -> str: + return lib.supported_devices().decode() + + +def yukarin_s_forward(length: int, phoneme_list: numpy.ndarray, speaker_id: numpy.ndarray) -> numpy.ndarray: + output = numpy.zeros((length, ), dtype=numpy.float32) + success = lib.yukarin_s_forward(length, phoneme_list, speaker_id, output) + if not success: + raise Exception(lib.last_error_message().decode()) + return output + + +def yukarin_sa_forward( + length: int, + vowel_phoneme_list, + consonant_phoneme_list, + start_accent_list, + end_accent_list, + start_accent_phrase_list, + end_accent_phrase_list, + speaker_id +): + output = numpy.empty((len(speaker_id), length,), dtype=numpy.float32) + success = lib.yukarin_sa_forward( + length, vowel_phoneme_list, consonant_phoneme_list, start_accent_list, end_accent_list, start_accent_phrase_list, end_accent_phrase_list, speaker_id, output + ) + if not success: + raise Exception(lib.last_error_message().decode()) + return output + + +def decode_forward(length: int, phoneme_size: int, f0, phoneme, speaker_id): + output = numpy.empty((length*256,), dtype=numpy.float32) + success = lib.decode_forward( + length, phoneme_size, f0, phoneme, speaker_id, output + ) + if not success: + raise Exception(lib.last_error_message().decode()) + return output + +def voicevox_load_openjtalk_dict(dict_path: str): + errno = lib.voicevox_load_openjtalk_dict(dict_path.encode()) + if errno != 0: + raise Exception(lib.voicevox_error_result_to_message(errno).decode()) + +def voicevox_tts(text: str, speaker_id: int) -> bytes: + output_binary_size = c_int() + output_wav = POINTER(c_uint8)() + errno = lib.voicevox_tts(text.encode(), speaker_id, byref(output_binary_size), byref(output_wav)) + if errno != 0: + raise Exception(lib.voicevox_error_result_to_message(errno).decode()) + output = create_string_buffer(output_binary_size.value * sizeof(c_uint8)) + memmove(output, output_wav, output_binary_size.value * sizeof(c_uint8)) + lib.voicevox_wav_free(output_wav) + return output + +def voicevox_tts_from_kana(text: str, speaker_id: int) -> bytes: + output_binary_size = c_int() + output_wav = POINTER(c_uint8)() + errno = lib.voicevox_tts_from_kana(text.encode(), speaker_id, byref(output_binary_size), byref(output_wav)) + if errno != 0: + raise Exception(lib.voicevox_error_result_to_message(errno).decode()) + output = create_string_buffer(output_binary_size.value * sizeof(c_uint8)) + memmove(output, output_wav, output_binary_size.value * sizeof(c_uint8)) + lib.voicevox_wav_free(output_wav) + return output + +def finalize(): + lib.finalize() diff --git a/example/python/requirements.txt b/example/python/requirements.txt new file mode 100644 index 000000000..24ce15ab7 --- /dev/null +++ b/example/python/requirements.txt @@ -0,0 +1 @@ +numpy