Skip to content

Commit

Permalink
Use commit previous commit from RVC CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
aitronz committed Jan 20, 2024
1 parent 747765a commit e009d28
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 44 deletions.
4 changes: 2 additions & 2 deletions rvc/train/extract/extract_feature_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
i_part = int(sys.argv[3])

if len(sys.argv) == 7:
exp_dir, version, is_half = sys.argv[4], sys.argv[5], bool(sys.argv[6])
exp_dir, version, is_half = sys.argv[4], sys.argv[5], sys.argv[6]
else:
i_gpu, exp_dir = sys.argv[4], sys.argv[5]
os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
version, is_half = sys.argv[6], bool(sys.argv[7])
version, is_half = sys.argv[6], sys.argv[7]


def forward_dml(ctx, x, scale):
Expand Down
35 changes: 10 additions & 25 deletions rvc/train/index_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,39 +45,24 @@

n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)

# index_trained
index_trained = faiss.index_factory(
256 if version == "v1" else 768, f"IVF{n_ivf},Flat"
)
index_ivf_trained = faiss.extract_index_ivf(index_trained)
index_ivf_trained.nprobe = 1
index_trained.train(big_npy)

index_filename_trained = (
f"trained_IVF{n_ivf}_Flat_nprobe_{index_ivf_trained.nprobe}_{version}.index"
)
index_filepath_trained = os.path.join(exp_dir, index_filename_trained)
index = faiss.index_factory(256 if version == "v1" else 768, f"IVF{n_ivf},Flat")

faiss.write_index(index_trained, index_filepath_trained)
index_ivf = faiss.extract_index_ivf(index)
index_ivf.nprobe = 1
index.train(big_npy)

# index_added
index_added = faiss.index_factory(
256 if version == "v1" else 768, f"IVF{n_ivf},Flat"
index_filename = (
f"trained_IVF{n_ivf}_Flat_nprobe_{index_ivf.nprobe}_{version}.index"
)
index_ivf_added = faiss.extract_index_ivf(index_added)
index_ivf_added.nprobe = 1
index_added.train(big_npy)
index_filepath = os.path.join(exp_dir, index_filename)

index_filename_added = (
f"added_IVF{n_ivf}_Flat_nprobe_{index_ivf_added.nprobe}_{version}.index"
)
index_filepath_added = os.path.join(exp_dir, index_filename_added)
faiss.write_index(index, index_filepath)

batch_size_add = 8192
for i in range(0, big_npy.shape[0], batch_size_add):
index_added.add(big_npy[i : i + batch_size_add])
index.add(big_npy[i : i + batch_size_add])

faiss.write_index(index_added, index_filepath_added)
faiss.write_index(index, index_filepath)

except Exception as error:
print(f"Failed to train index: {error}")
Expand Down
1 change: 0 additions & 1 deletion rvc/train/losses.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import torch


def feature_loss(fmap_r, fmap_g):
loss = 0
for dr, dg in zip(fmap_r, fmap_g):
Expand Down
71 changes: 70 additions & 1 deletion rvc/train/slicer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os
from argparse import ArgumentParser
import librosa
import soundfile
import numpy as np


class Slicer:
def __init__(
self,
Expand Down Expand Up @@ -172,3 +172,72 @@ def get_rms(

power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
return np.sqrt(power)


def main():
parser = ArgumentParser()
parser.add_argument("audio", type=str, help="The audio to be sliced")
parser.add_argument(
"--out", type=str, help="Output directory of the sliced audio clips"
)
parser.add_argument(
"--db_thresh",
type=float,
default=-40,
help="The dB threshold for silence detection",
)
parser.add_argument(
"--min_length",
type=int,
default=5000,
help="The minimum milliseconds required for each sliced audio clip",
)
parser.add_argument(
"--min_interval",
type=int,
default=300,
help="The minimum milliseconds for a silence part to be sliced",
)
parser.add_argument(
"--hop_size", type=int, default=10, help="Frame length in milliseconds"
)
parser.add_argument(
"--max_sil_kept",
type=int,
default=500,
help="The maximum silence length kept around the sliced clip, presented in milliseconds",
)
args = parser.parse_args()

out = args.out or os.path.dirname(os.path.abspath(args.audio))
audio, sr = librosa.load(args.audio, sr=None, mono=False)

slicer = Slicer(
sr=sr,
threshold=args.db_thresh,
min_length=args.min_length,
min_interval=args.min_interval,
hop_size=args.hop_size,
max_sil_kept=args.max_sil_kept,
)

chunks = slicer.slice(audio)

if not os.path.exists(out):
os.makedirs(out)

for i, chunk in enumerate(chunks):
if len(chunk.shape) > 1:
chunk = chunk.T
soundfile.write(
os.path.join(
out,
f"{os.path.basename(args.audio).rsplit('.', maxsplit=1)[0]}_{i}.wav",
),
chunk,
sr,
)


if __name__ == "__main__":
main()
25 changes: 12 additions & 13 deletions rvc/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
kl_loss,
)
from mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from process_ckpt import save_final, extract_small_model
from process_ckpt import save_final

from rvc.lib.infer_pack import commons

Expand Down Expand Up @@ -90,7 +90,7 @@ def record(self):
elapsed_time = round(elapsed_time, 1)
elapsed_time_str = str(datetime.timedelta(seconds=int(elapsed_time)))
current_time = datetime.datetime.now().strftime("%H:%M:%S")
return f"time={current_time} | training_speed={elapsed_time_str}"
return f"[{current_time}] | ({elapsed_time_str})"


def main():
Expand Down Expand Up @@ -201,7 +201,6 @@ def run(
net_d = DDP(net_d)

try:
print("Starting training...")
_, _, _, epoch_str = load_checkpoint(
latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d
)
Expand Down Expand Up @@ -478,6 +477,10 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, scaler, loaders, writers,
if loss_kl > 9:
loss_kl = 9

# print([global_step, lr])
print(
f"[loss_disc={loss_disc:.3f} | loss_gen={loss_gen:.3f} | loss_fm={loss_fm:.3f} | loss_mel={loss_mel:.3f} | loss_kl={loss_kl:.3f}]"
)
scalar_dict = {
"loss/g/total": loss_gen_all,
"loss/d/total": loss_disc,
Expand Down Expand Up @@ -556,15 +559,15 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, scaler, loaders, writers,
else:
ckpt = net_g.state_dict()
print(
"saving ckpt %s_e%s:%s"
"Saving small model... %s_e%s:%s"
% (
hps.name,
epoch,
extract_small_model(
save_final(
ckpt,
hps.sample_rate,
hps.if_f0,
hps.name + "_e%s_s%s" % (epoch, global_step),
hps.name + f"_e{epoch}_s{global_step}",
epoch,
hps.version,
hps,
Expand All @@ -573,20 +576,16 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, scaler, loaders, writers,
)

if rank == 0:
print(
f"{hps.name} | epoch={epoch} | step={global_step} | {epoch_recorder.record()} | loss_disc={loss_disc:.3f} | loss_gen={loss_gen:.3f} | loss_fm={loss_fm:.3f} | loss_mel={loss_mel:.3f} | loss_kl={loss_kl:.3f}"
)
print(f"Epoch {epoch}: {epoch_recorder.record()}")
if epoch >= hps.total_epoch and rank == 0:
print(
f"Training has been successfully completed with {epoch} epoch and {global_step} steps."
)
print("The training is over, finalizing the program...")

if hasattr(net_g, "module"):
ckpt = net_g.module.state_dict()
else:
ckpt = net_g.state_dict()
print(
"Saving final checkpoint: %s"
"Saving final model... %s"
% (
save_final(
ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version, hps
Expand Down
37 changes: 35 additions & 2 deletions rvc/train/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,12 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1):
learning_rate = checkpoint_dict["learning_rate"]
if optimizer is not None and load_opt == 1:
optimizer.load_state_dict(checkpoint_dict["optimizer"])
print(f"Loaded checkpoint '{checkpoint_path}' (epoch {iteration})")
print(f"Loaded checkpoint '{checkpoint_path}' (epochs {iteration})")
return model, optimizer, learning_rate, iteration


def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
print(f"Saving model '{checkpoint_path}' (epoch {iteration})")
print(f"Saving model '{checkpoint_path}' (epochs {iteration})")
if hasattr(model, "module"):
state_dict = model.module.state_dict()
else:
Expand Down Expand Up @@ -218,6 +218,34 @@ def get_hparams():
required=True,
help="if caching the dataset in GPU memory, 1 or 0",
)
parser.add_argument(
"-sof",
"--stop_on_fit",
type=int,
required=False,
help="if retraining mode collapses, 1 or 0",
)
parser.add_argument(
"-sm",
"--smoothness",
type=float,
required=False,
help="smoothness for --stop_on_fit",
)
parser.add_argument(
"-rc",
"--retrain_collapse",
type=int,
required=False,
help="if retraining mode collapses, 1 or 0",
)
parser.add_argument(
"-ct",
"--collapse_threshold",
type=int,
required=False,
help="if retraining mode collapses, 1 or 0",
)
args = parser.parse_args()
name = args.experiment_dir
experiment_dir = os.path.join("./logs", args.experiment_dir)
Expand All @@ -239,6 +267,11 @@ def get_hparams():
hparams.if_latest = args.if_latest
hparams.save_every_weights = args.save_every_weights
hparams.if_cache_data_in_gpu = args.if_cache_data_in_gpu
hparams.if_stop_on_fit = args.stop_on_fit
hparams.smoothness = args.smoothness
hparams.if_retrain_collapse = args.retrain_collapse
if args.collapse_threshold != None:
hparams.collapse_threshold = args.collapse_threshold * 0.01
hparams.data.training_files = f"{experiment_dir}/filelist.txt"
return hparams

Expand Down

0 comments on commit e009d28

Please sign in to comment.