From 68610880c91012967bf6b0e89cad7d0ff684eff1 Mon Sep 17 00:00:00 2001 From: Lucas Newman Date: Sun, 1 Dec 2024 12:15:55 -0800 Subject: [PATCH] 0.2.0 --- README.md | 16 +++++++++++++--- f5_tts_mlx/generate.py | 6 ++++-- pyproject.toml | 2 +- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2f25da0..677f9c6 100644 --- a/README.md +++ b/README.md @@ -16,12 +16,22 @@ F5 is an evolution of [E2 TTS](https://arxiv.org/abs/2406.18009v2) and improves pip install f5-tts-mlx ``` -## Usage +## Basic Usage ```bash python -m f5_tts_mlx.generate --text "The quick brown fox jumped over the lazy dog." ``` +You can also use a pipe to generate speech from the output of another process, for instance from a language model: + +```bash +mlx_lm.generate --model mlx-community/Llama-3.2-1B-Instruct-4bit --verbose false \ +--prompt "Write a concise paragraph explaning wavelets as used in signal processing." \ +| python -m f5_tts_mlx.generate +``` + +## Voice Matching + If you want to use your own reference audio sample, make sure it's a mono, 24kHz wav file of around 5-10 seconds: ```bash @@ -39,9 +49,9 @@ ffmpeg -i /path/to/audio.wav -ac 1 -ar 24000 -sample_fmt s16 -t 10 /path/to/outp See [here](./f5_tts_mlx) for more options to customize generation. -— +## From Python -You can load a pretrained model from Python like this: +You can load a pretrained model from Python: ```python from f5_tts_mlx.generate import generate diff --git a/f5_tts_mlx/generate.py b/f5_tts_mlx/generate.py index a3faadb..2ce0279 100644 --- a/f5_tts_mlx/generate.py +++ b/f5_tts_mlx/generate.py @@ -185,7 +185,8 @@ def generate( if output_path is not None: sf.write(output_path, np.array(wave), SAMPLE_RATE) - player.stop() + if player is not None: + player.stop() else: start_date = datetime.datetime.now() @@ -229,7 +230,8 @@ def generate( if output_path is not None: sf.write(output_path, np.array(wave), SAMPLE_RATE) - player.stop() + if player is not None: + player.stop() if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 9c0eb01..87a0005 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "f5-tts-mlx" -version = "0.1.9" +version = "0.2.0" authors = [{name = "Lucas Newman", email = "lucasnewman@me.com"}] license = {text = "MIT"} description = "F5-TTS - MLX"