Skip to content

Commit

Permalink
Revert "[Rust]mainとのconflict解消 (VOICEVOX#204)"
Browse files Browse the repository at this point in the history
This reverts commit db28cbf.
  • Loading branch information
qwerty2501 committed Jul 24, 2022
1 parent db28cbf commit e7c20f8
Show file tree
Hide file tree
Showing 10 changed files with 1,132 additions and 11 deletions.
3 changes: 0 additions & 3 deletions example/python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,3 @@ dmypy.json

# Cython debug symbols
cython_debug/

# OpenJTalk-dictionary's dir
open_jtalk_dic_utf_8-*
226 changes: 226 additions & 0 deletions example/python/acoustic_feature_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
from abc import abstractmethod
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import List, Sequence

import numpy


@dataclass
class SamplingData:
array: numpy.ndarray # shape: (N, ?)
rate: float

def resample(self, sampling_rate: float, index: int = 0, length: int = None):
if length is None:
length = int(len(self.array) / self.rate * sampling_rate)
indexes = (numpy.random.rand() + index + numpy.arange(length)) * (
self.rate / sampling_rate
)
return self.array[indexes.astype(int)]


class BasePhoneme(object):
phoneme_list: Sequence[str]
num_phoneme: int
space_phoneme: str

def __init__(
self,
phoneme: str,
start: float,
end: float,
):
self.phoneme = phoneme
self.start = numpy.round(start, decimals=2)
self.end = numpy.round(end, decimals=2)

def __repr__(self):
return f"Phoneme(phoneme='{self.phoneme}', start={self.start}, end={self.end})"

def __eq__(self, o: object):
return isinstance(o, BasePhoneme) and (
self.phoneme == o.phoneme and self.start == o.start and self.end == o.end
)

def verify(self):
assert self.phoneme in self.phoneme_list, f"{self.phoneme} is not defined."

@property
def phoneme_id(self):
return self.phoneme_list.index(self.phoneme)

@property
def duration(self):
return self.end - self.start

@property
def onehot(self):
array = numpy.zeros(self.num_phoneme, dtype=bool)
array[self.phoneme_id] = True
return array

@classmethod
def parse(cls, s: str):
"""
>>> BasePhoneme.parse('1.7425000 1.9125000 o:')
Phoneme(phoneme='o:', start=1.74, end=1.91)
"""
words = s.split()
return cls(
start=float(words[0]),
end=float(words[1]),
phoneme=words[2],
)

@classmethod
@abstractmethod
def convert(cls, phonemes: List["BasePhoneme"]) -> List["BasePhoneme"]:
pass

@classmethod
def load_julius_list(cls, path: Path):
phonemes = [cls.parse(s) for s in path.read_text().split("\n") if len(s) > 0]
phonemes = cls.convert(phonemes)

for phoneme in phonemes:
phoneme.verify()
return phonemes

@classmethod
def save_julius_list(cls, phonemes: List["BasePhoneme"], path: Path):
text = "\n".join(
[
f"{numpy.round(p.start, decimals=2):.2f}\t"
f"{numpy.round(p.end, decimals=2):.2f}\t"
f"{p.phoneme}"
for p in phonemes
]
)
path.write_text(text)


class JvsPhoneme(BasePhoneme):
phoneme_list = (
"pau",
"I",
"N",
"U",
"a",
"b",
"by",
"ch",
"cl",
"d",
"dy",
"e",
"f",
"g",
"gy",
"h",
"hy",
"i",
"j",
"k",
"ky",
"m",
"my",
"n",
"ny",
"o",
"p",
"py",
"r",
"ry",
"s",
"sh",
"t",
"ts",
"u",
"v",
"w",
"y",
"z",
)
num_phoneme = len(phoneme_list)
space_phoneme = "pau"

@classmethod
def convert(cls, phonemes: List["JvsPhoneme"]):
if "sil" in phonemes[0].phoneme:
phonemes[0].phoneme = cls.space_phoneme
if "sil" in phonemes[-1].phoneme:
phonemes[-1].phoneme = cls.space_phoneme
return phonemes


class OjtPhoneme(BasePhoneme):
phoneme_list = (
"pau",
"A",
"E",
"I",
"N",
"O",
"U",
"a",
"b",
"by",
"ch",
"cl",
"d",
"dy",
"e",
"f",
"g",
"gw",
"gy",
"h",
"hy",
"i",
"j",
"k",
"kw",
"ky",
"m",
"my",
"n",
"ny",
"o",
"p",
"py",
"r",
"ry",
"s",
"sh",
"t",
"ts",
"ty",
"u",
"v",
"w",
"y",
"z",
)
num_phoneme = len(phoneme_list)
space_phoneme = "pau"

@classmethod
def convert(cls, phonemes: List["OjtPhoneme"]):
if "sil" in phonemes[0].phoneme:
phonemes[0].phoneme = cls.space_phoneme
if "sil" in phonemes[-1].phoneme:
phonemes[-1].phoneme = cls.space_phoneme
return phonemes


class PhonemeType(str, Enum):
jvs = "jvs"
openjtalk = "openjtalk"


phoneme_type_to_class = {
PhonemeType.jvs: JvsPhoneme,
PhonemeType.openjtalk: OjtPhoneme,
}
41 changes: 41 additions & 0 deletions example/python/core.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from libcpp cimport bool

cdef extern from "core.h":
bool c_initialize "initialize" (
const char *root_dir_path,
bool use_gpu
)

void c_finalize "finalize" ()

const char *c_metas "metas" ()

bool c_yukarin_s_forward "yukarin_s_forward" (
int length,
long *phoneme_list,
long *speaker_id,
float *output
)

bool c_yukarin_sa_forward "yukarin_sa_forward" (
int length,
long *vowel_phoneme_list,
long *consonant_phoneme_list,
long *start_accent_list,
long *end_accent_list,
long *start_accent_phrase_list,
long *end_accent_phrase_list,
long *speaker_id,
float *output
)

bool c_decode_forward "decode_forward" (
int length,
int phoneme_size,
float *f0,
float *phoneme,
long *speaker_id,
float *output
)

const char *c_last_error_message "last_error_message" ()
80 changes: 80 additions & 0 deletions example/python/core.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
cimport numpy
import numpy

from libcpp cimport bool

cpdef initialize(
str root_dir_path,
bool use_gpu,
):
cdef bool success = c_initialize(
root_dir_path.encode(),
use_gpu,
)
if not success: raise Exception(c_last_error_message().decode())

cpdef finalize():
c_finalize()

cpdef metas():
return c_metas().decode()

cpdef numpy.ndarray[numpy.float32_t, ndim=1] yukarin_s_forward(
int length,
numpy.ndarray[numpy.int64_t, ndim=1] phoneme_list,
numpy.ndarray[numpy.int64_t, ndim=1] speaker_id,
):
cdef numpy.ndarray[numpy.float32_t, ndim=1] output = numpy.zeros((length,), dtype=numpy.float32)
cdef bool success = c_yukarin_s_forward(
length,
<long*> phoneme_list.data,
<long*> speaker_id.data,
<float*> output.data,
)
if not success: raise Exception(c_last_error_message().decode())
return output


cpdef numpy.ndarray[numpy.float32_t, ndim=2] yukarin_sa_forward(
int length,
numpy.ndarray[numpy.int64_t, ndim=2] vowel_phoneme_list,
numpy.ndarray[numpy.int64_t, ndim=2] consonant_phoneme_list,
numpy.ndarray[numpy.int64_t, ndim=2] start_accent_list,
numpy.ndarray[numpy.int64_t, ndim=2] end_accent_list,
numpy.ndarray[numpy.int64_t, ndim=2] start_accent_phrase_list,
numpy.ndarray[numpy.int64_t, ndim=2] end_accent_phrase_list,
numpy.ndarray[numpy.int64_t, ndim=1] speaker_id,
):
cdef numpy.ndarray[numpy.float32_t, ndim=2] output = numpy.empty((len(speaker_id), length,), dtype=numpy.float32)
cdef bool success = c_yukarin_sa_forward(
length,
<long*> vowel_phoneme_list.data,
<long*> consonant_phoneme_list.data,
<long*> start_accent_list.data,
<long*> end_accent_list.data,
<long*> start_accent_phrase_list.data,
<long*> end_accent_phrase_list.data,
<long*> speaker_id.data,
<float*> output.data,
)
if not success: raise Exception(c_last_error_message().decode())
return output

cpdef numpy.ndarray[numpy.float32_t, ndim=1] decode_forward(
int length,
int phoneme_size,
numpy.ndarray[numpy.float32_t, ndim=2] f0,
numpy.ndarray[numpy.float32_t, ndim=2] phoneme,
numpy.ndarray[numpy.int64_t, ndim=1] speaker_id,
):
cdef numpy.ndarray[numpy.float32_t, ndim=1] output = numpy.empty((length*256,), dtype=numpy.float32)
cdef bool success = c_decode_forward(
length,
phoneme_size,
<float*> f0.data,
<float*> phoneme.data,
<long*> speaker_id.data,
<float*> output.data,
)
if not success: raise Exception(c_last_error_message().decode())
return output
Loading

0 comments on commit e7c20f8

Please sign in to comment.