Skip to content

Commit

Permalink
pythonのexampleを新しい形に変更した
Browse files Browse the repository at this point in the history
  • Loading branch information
qwerty2501 committed Jul 25, 2022
1 parent 1284e26 commit a41a509
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 0 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,14 @@ sudo apt install libgomp1

## サンプル実行

まず Open JTalk 辞書フォルダを配置します。 http://open-jtalk.sourceforge.net/ を開き、Dictionary for Open JTalk 欄の Binary Package (UTF-8)をクリックして「open_jtalk_dic_utf_8-1.11.tar.gz」をダウンロードします。
これを展開してできた「open_jtalk_dic_utf_8-1.11」フォルダを example/python に配置します。

- バージョン 0.12 以降の voicevox_core, onnxruntime ライブラリ(配布ページ: https://github.com/VOICEVOX/voicevox_core/releases )を example/python に配置する
- Linux の場合:`voicevox_core-linux-{お使いのCPUアーキテクチャ}-cpu-{バージョン}.zip` 内の 全ての so file
- macOS の場合:`voicevox_core-osx-{お使いのCPUアーキテクチャ}-cpu-{バージョン}.zip` 内の 全ての dylib file
- macOS の場合:`voicevox_core-windows-{お使いのCPUアーキテクチャ}-cpu-{バージョン}.zip` 内の 全ての dll file

```bash
cd example/python

Expand Down
5 changes: 5 additions & 0 deletions example/python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,8 @@ cython_debug/

# OpenJTalk-dictionary's dir
open_jtalk_dic_utf_8-*

# shared library
*.so.*
*.dylib
*.dll
159 changes: 159 additions & 0 deletions example/python/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from ctypes import *
import platform
import os
from pathlib import Path
import numpy

# numpy ndarray types
int64_dim1_type = numpy.ctypeslib.ndpointer(dtype=numpy.int64, ndim=1)
float32_dim1_type = numpy.ctypeslib.ndpointer(dtype=numpy.float32, ndim=1)
int64_dim2_type = numpy.ctypeslib.ndpointer(dtype=numpy.int64, ndim=2)
float32_dim2_type = numpy.ctypeslib.ndpointer(dtype=numpy.float32, ndim=2)

get_os = platform.system()

lib_file = ""
if get_os == "Windows":
lib_file = "core.dll"
elif get_os == "Darwin":
lib_file = "libcore.dylib"
elif get_os == "Linux":
lib_file = "libcore.so"

# ライブラリ読み込み
core_dll_path = Path(os.path.dirname(__file__) + f"/{lib_file}")
if not os.path.exists(core_dll_path):
raise Exception(f"coreライブラリファイルが{core_dll_path}に存在しません")
lib = cdll.LoadLibrary(str(core_dll_path))

# 関数型定義
lib.initialize.argtypes = (c_bool, c_int, c_bool)
lib.initialize.restype = c_bool

lib.load_model.argtypes = (c_int64,)
lib.load_model.restype = c_bool

lib.is_model_loaded.argtypes = (c_int64,)
lib.is_model_loaded.restype = c_bool

lib.finalize.argtypes = ()

lib.metas.restype = c_char_p

lib.supported_devices.restype = c_char_p

lib.yukarin_s_forward.argtypes = (
c_int64, int64_dim1_type, int64_dim1_type, float32_dim1_type)
lib.yukarin_s_forward.restype = c_bool

lib.yukarin_sa_forward.argtypes = (c_int64, int64_dim2_type, int64_dim2_type, int64_dim2_type,
int64_dim2_type, int64_dim2_type, int64_dim2_type, int64_dim1_type, float32_dim2_type)
lib.yukarin_sa_forward.restype = c_bool

lib.decode_forward.argtypes = (
c_int64, c_int64, float32_dim2_type, float32_dim2_type, int64_dim1_type, float32_dim1_type)
lib.decode_forward.restype = c_bool

lib.last_error_message.restype = c_char_p

lib.voicevox_load_openjtalk_dict.argtypes = (c_char_p,)
lib.voicevox_load_openjtalk_dict.restype = c_int

lib.voicevox_tts.argtypes = (c_char_p, c_int64, POINTER(c_int), POINTER(POINTER(c_uint8)))
lib.voicevox_tts.restype = c_int

lib.voicevox_tts_from_kana.argtypes = (c_char_p, c_int64, POINTER(c_int), POINTER(POINTER(c_uint8)))
lib.voicevox_tts_from_kana.restype = c_int

lib.voicevox_wav_free.argtypes = (POINTER(c_uint8),)

lib.voicevox_error_result_to_message.argtypes = (c_int,)
lib.voicevox_load_openjtalk_dict.argtypes = (c_char_p,)

# ラッパー関数
def initialize(use_gpu: bool, cpu_num_threads=0, load_all_models=True):
success = lib.initialize(use_gpu, cpu_num_threads, load_all_models)
if not success:
raise Exception(lib.last_error_message().decode())

def load_model(speaker_id: int):
success = lib.load_model(speaker_id)
if not success:
raise Exception(lib.last_error_message().decode())

def is_model_loaded(speaker_id: int) -> bool:
return lib.is_model_loaded(speaker_id)

def metas() -> str:
return lib.metas().decode()


def supported_devices() -> str:
return lib.supported_devices().decode()


def yukarin_s_forward(length: int, phoneme_list: numpy.ndarray, speaker_id: numpy.ndarray) -> numpy.ndarray:
output = numpy.zeros((length, ), dtype=numpy.float32)
success = lib.yukarin_s_forward(length, phoneme_list, speaker_id, output)
if not success:
raise Exception(lib.last_error_message().decode())
return output


def yukarin_sa_forward(
length: int,
vowel_phoneme_list,
consonant_phoneme_list,
start_accent_list,
end_accent_list,
start_accent_phrase_list,
end_accent_phrase_list,
speaker_id
):
output = numpy.empty((len(speaker_id), length,), dtype=numpy.float32)
success = lib.yukarin_sa_forward(
length, vowel_phoneme_list, consonant_phoneme_list, start_accent_list, end_accent_list, start_accent_phrase_list, end_accent_phrase_list, speaker_id, output
)
if not success:
raise Exception(lib.last_error_message().decode())
return output


def decode_forward(length: int, phoneme_size: int, f0, phoneme, speaker_id):
output = numpy.empty((length*256,), dtype=numpy.float32)
success = lib.decode_forward(
length, phoneme_size, f0, phoneme, speaker_id, output
)
if not success:
raise Exception(lib.last_error_message().decode())
return output

def voicevox_load_openjtalk_dict(dict_path: str):
errno = lib.voicevox_load_openjtalk_dict(dict_path.encode())
if errno != 0:
raise Exception(lib.voicevox_error_result_to_message(errno).decode())

def voicevox_tts(text: str, speaker_id: int) -> bytes:
output_binary_size = c_int()
output_wav = POINTER(c_uint8)()
errno = lib.voicevox_tts(text.encode(), speaker_id, byref(output_binary_size), byref(output_wav))
if errno != 0:
raise Exception(lib.voicevox_error_result_to_message(errno).decode())
output = create_string_buffer(output_binary_size.value * sizeof(c_uint8))
memmove(output, output_wav, output_binary_size.value * sizeof(c_uint8))
lib.voicevox_wav_free(output_wav)
return output

def voicevox_tts_from_kana(text: str, speaker_id: int) -> bytes:
output_binary_size = c_int()
output_wav = POINTER(c_uint8)()
errno = lib.voicevox_tts_from_kana(text.encode(), speaker_id, byref(output_binary_size), byref(output_wav))
if errno != 0:
raise Exception(lib.voicevox_error_result_to_message(errno).decode())
output = create_string_buffer(output_binary_size.value * sizeof(c_uint8))
memmove(output, output_wav, output_binary_size.value * sizeof(c_uint8))
lib.voicevox_wav_free(output_wav)
return output

def finalize():
lib.finalize()
1 change: 1 addition & 0 deletions example/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
numpy

0 comments on commit a41a509

Please sign in to comment.