forked from VOICEVOX/voicevox_core
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Revert "[Rust]mainとのconflict解消 (VOICEVOX#204)"
This reverts commit db28cbf.
- Loading branch information
1 parent
db28cbf
commit e7c20f8
Showing
10 changed files
with
1,132 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -136,6 +136,3 @@ dmypy.json | |
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
# OpenJTalk-dictionary's dir | ||
open_jtalk_dic_utf_8-* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,226 @@ | ||
from abc import abstractmethod | ||
from dataclasses import dataclass | ||
from enum import Enum | ||
from pathlib import Path | ||
from typing import List, Sequence | ||
|
||
import numpy | ||
|
||
|
||
@dataclass | ||
class SamplingData: | ||
array: numpy.ndarray # shape: (N, ?) | ||
rate: float | ||
|
||
def resample(self, sampling_rate: float, index: int = 0, length: int = None): | ||
if length is None: | ||
length = int(len(self.array) / self.rate * sampling_rate) | ||
indexes = (numpy.random.rand() + index + numpy.arange(length)) * ( | ||
self.rate / sampling_rate | ||
) | ||
return self.array[indexes.astype(int)] | ||
|
||
|
||
class BasePhoneme(object): | ||
phoneme_list: Sequence[str] | ||
num_phoneme: int | ||
space_phoneme: str | ||
|
||
def __init__( | ||
self, | ||
phoneme: str, | ||
start: float, | ||
end: float, | ||
): | ||
self.phoneme = phoneme | ||
self.start = numpy.round(start, decimals=2) | ||
self.end = numpy.round(end, decimals=2) | ||
|
||
def __repr__(self): | ||
return f"Phoneme(phoneme='{self.phoneme}', start={self.start}, end={self.end})" | ||
|
||
def __eq__(self, o: object): | ||
return isinstance(o, BasePhoneme) and ( | ||
self.phoneme == o.phoneme and self.start == o.start and self.end == o.end | ||
) | ||
|
||
def verify(self): | ||
assert self.phoneme in self.phoneme_list, f"{self.phoneme} is not defined." | ||
|
||
@property | ||
def phoneme_id(self): | ||
return self.phoneme_list.index(self.phoneme) | ||
|
||
@property | ||
def duration(self): | ||
return self.end - self.start | ||
|
||
@property | ||
def onehot(self): | ||
array = numpy.zeros(self.num_phoneme, dtype=bool) | ||
array[self.phoneme_id] = True | ||
return array | ||
|
||
@classmethod | ||
def parse(cls, s: str): | ||
""" | ||
>>> BasePhoneme.parse('1.7425000 1.9125000 o:') | ||
Phoneme(phoneme='o:', start=1.74, end=1.91) | ||
""" | ||
words = s.split() | ||
return cls( | ||
start=float(words[0]), | ||
end=float(words[1]), | ||
phoneme=words[2], | ||
) | ||
|
||
@classmethod | ||
@abstractmethod | ||
def convert(cls, phonemes: List["BasePhoneme"]) -> List["BasePhoneme"]: | ||
pass | ||
|
||
@classmethod | ||
def load_julius_list(cls, path: Path): | ||
phonemes = [cls.parse(s) for s in path.read_text().split("\n") if len(s) > 0] | ||
phonemes = cls.convert(phonemes) | ||
|
||
for phoneme in phonemes: | ||
phoneme.verify() | ||
return phonemes | ||
|
||
@classmethod | ||
def save_julius_list(cls, phonemes: List["BasePhoneme"], path: Path): | ||
text = "\n".join( | ||
[ | ||
f"{numpy.round(p.start, decimals=2):.2f}\t" | ||
f"{numpy.round(p.end, decimals=2):.2f}\t" | ||
f"{p.phoneme}" | ||
for p in phonemes | ||
] | ||
) | ||
path.write_text(text) | ||
|
||
|
||
class JvsPhoneme(BasePhoneme): | ||
phoneme_list = ( | ||
"pau", | ||
"I", | ||
"N", | ||
"U", | ||
"a", | ||
"b", | ||
"by", | ||
"ch", | ||
"cl", | ||
"d", | ||
"dy", | ||
"e", | ||
"f", | ||
"g", | ||
"gy", | ||
"h", | ||
"hy", | ||
"i", | ||
"j", | ||
"k", | ||
"ky", | ||
"m", | ||
"my", | ||
"n", | ||
"ny", | ||
"o", | ||
"p", | ||
"py", | ||
"r", | ||
"ry", | ||
"s", | ||
"sh", | ||
"t", | ||
"ts", | ||
"u", | ||
"v", | ||
"w", | ||
"y", | ||
"z", | ||
) | ||
num_phoneme = len(phoneme_list) | ||
space_phoneme = "pau" | ||
|
||
@classmethod | ||
def convert(cls, phonemes: List["JvsPhoneme"]): | ||
if "sil" in phonemes[0].phoneme: | ||
phonemes[0].phoneme = cls.space_phoneme | ||
if "sil" in phonemes[-1].phoneme: | ||
phonemes[-1].phoneme = cls.space_phoneme | ||
return phonemes | ||
|
||
|
||
class OjtPhoneme(BasePhoneme): | ||
phoneme_list = ( | ||
"pau", | ||
"A", | ||
"E", | ||
"I", | ||
"N", | ||
"O", | ||
"U", | ||
"a", | ||
"b", | ||
"by", | ||
"ch", | ||
"cl", | ||
"d", | ||
"dy", | ||
"e", | ||
"f", | ||
"g", | ||
"gw", | ||
"gy", | ||
"h", | ||
"hy", | ||
"i", | ||
"j", | ||
"k", | ||
"kw", | ||
"ky", | ||
"m", | ||
"my", | ||
"n", | ||
"ny", | ||
"o", | ||
"p", | ||
"py", | ||
"r", | ||
"ry", | ||
"s", | ||
"sh", | ||
"t", | ||
"ts", | ||
"ty", | ||
"u", | ||
"v", | ||
"w", | ||
"y", | ||
"z", | ||
) | ||
num_phoneme = len(phoneme_list) | ||
space_phoneme = "pau" | ||
|
||
@classmethod | ||
def convert(cls, phonemes: List["OjtPhoneme"]): | ||
if "sil" in phonemes[0].phoneme: | ||
phonemes[0].phoneme = cls.space_phoneme | ||
if "sil" in phonemes[-1].phoneme: | ||
phonemes[-1].phoneme = cls.space_phoneme | ||
return phonemes | ||
|
||
|
||
class PhonemeType(str, Enum): | ||
jvs = "jvs" | ||
openjtalk = "openjtalk" | ||
|
||
|
||
phoneme_type_to_class = { | ||
PhonemeType.jvs: JvsPhoneme, | ||
PhonemeType.openjtalk: OjtPhoneme, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from libcpp cimport bool | ||
|
||
cdef extern from "core.h": | ||
bool c_initialize "initialize" ( | ||
const char *root_dir_path, | ||
bool use_gpu | ||
) | ||
|
||
void c_finalize "finalize" () | ||
|
||
const char *c_metas "metas" () | ||
|
||
bool c_yukarin_s_forward "yukarin_s_forward" ( | ||
int length, | ||
long *phoneme_list, | ||
long *speaker_id, | ||
float *output | ||
) | ||
|
||
bool c_yukarin_sa_forward "yukarin_sa_forward" ( | ||
int length, | ||
long *vowel_phoneme_list, | ||
long *consonant_phoneme_list, | ||
long *start_accent_list, | ||
long *end_accent_list, | ||
long *start_accent_phrase_list, | ||
long *end_accent_phrase_list, | ||
long *speaker_id, | ||
float *output | ||
) | ||
|
||
bool c_decode_forward "decode_forward" ( | ||
int length, | ||
int phoneme_size, | ||
float *f0, | ||
float *phoneme, | ||
long *speaker_id, | ||
float *output | ||
) | ||
|
||
const char *c_last_error_message "last_error_message" () |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
cimport numpy | ||
import numpy | ||
|
||
from libcpp cimport bool | ||
|
||
cpdef initialize( | ||
str root_dir_path, | ||
bool use_gpu, | ||
): | ||
cdef bool success = c_initialize( | ||
root_dir_path.encode(), | ||
use_gpu, | ||
) | ||
if not success: raise Exception(c_last_error_message().decode()) | ||
|
||
cpdef finalize(): | ||
c_finalize() | ||
|
||
cpdef metas(): | ||
return c_metas().decode() | ||
|
||
cpdef numpy.ndarray[numpy.float32_t, ndim=1] yukarin_s_forward( | ||
int length, | ||
numpy.ndarray[numpy.int64_t, ndim=1] phoneme_list, | ||
numpy.ndarray[numpy.int64_t, ndim=1] speaker_id, | ||
): | ||
cdef numpy.ndarray[numpy.float32_t, ndim=1] output = numpy.zeros((length,), dtype=numpy.float32) | ||
cdef bool success = c_yukarin_s_forward( | ||
length, | ||
<long*> phoneme_list.data, | ||
<long*> speaker_id.data, | ||
<float*> output.data, | ||
) | ||
if not success: raise Exception(c_last_error_message().decode()) | ||
return output | ||
|
||
|
||
cpdef numpy.ndarray[numpy.float32_t, ndim=2] yukarin_sa_forward( | ||
int length, | ||
numpy.ndarray[numpy.int64_t, ndim=2] vowel_phoneme_list, | ||
numpy.ndarray[numpy.int64_t, ndim=2] consonant_phoneme_list, | ||
numpy.ndarray[numpy.int64_t, ndim=2] start_accent_list, | ||
numpy.ndarray[numpy.int64_t, ndim=2] end_accent_list, | ||
numpy.ndarray[numpy.int64_t, ndim=2] start_accent_phrase_list, | ||
numpy.ndarray[numpy.int64_t, ndim=2] end_accent_phrase_list, | ||
numpy.ndarray[numpy.int64_t, ndim=1] speaker_id, | ||
): | ||
cdef numpy.ndarray[numpy.float32_t, ndim=2] output = numpy.empty((len(speaker_id), length,), dtype=numpy.float32) | ||
cdef bool success = c_yukarin_sa_forward( | ||
length, | ||
<long*> vowel_phoneme_list.data, | ||
<long*> consonant_phoneme_list.data, | ||
<long*> start_accent_list.data, | ||
<long*> end_accent_list.data, | ||
<long*> start_accent_phrase_list.data, | ||
<long*> end_accent_phrase_list.data, | ||
<long*> speaker_id.data, | ||
<float*> output.data, | ||
) | ||
if not success: raise Exception(c_last_error_message().decode()) | ||
return output | ||
|
||
cpdef numpy.ndarray[numpy.float32_t, ndim=1] decode_forward( | ||
int length, | ||
int phoneme_size, | ||
numpy.ndarray[numpy.float32_t, ndim=2] f0, | ||
numpy.ndarray[numpy.float32_t, ndim=2] phoneme, | ||
numpy.ndarray[numpy.int64_t, ndim=1] speaker_id, | ||
): | ||
cdef numpy.ndarray[numpy.float32_t, ndim=1] output = numpy.empty((length*256,), dtype=numpy.float32) | ||
cdef bool success = c_decode_forward( | ||
length, | ||
phoneme_size, | ||
<float*> f0.data, | ||
<float*> phoneme.data, | ||
<long*> speaker_id.data, | ||
<float*> output.data, | ||
) | ||
if not success: raise Exception(c_last_error_message().decode()) | ||
return output |
Oops, something went wrong.