From 1af6236aa541165ea30a5572a738a7914600e8ca Mon Sep 17 00:00:00 2001
From: Duy Huynh <vndee.huynh@gmail.com>
Date: Fri, 29 Mar 2024 14:33:32 +0700
Subject: [PATCH] feat: add tts docstring

---
 app.py |  8 ++++----
 tts.py | 29 ++++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/app.py b/app.py
index c6643db..830ba3d 100644
--- a/app.py
+++ b/app.py
@@ -62,14 +62,14 @@ def callback(indata, frames, time, status):
 
 
 if __name__ == "__main__":
-    console.print(
-        "[cyan]Assistant started! Press Ctrl+C to exit."
-    )
+    console.print("[cyan]Assistant started! Press Ctrl+C to exit.")
 
     try:
         while True:
             # Wait for the user to press Enter to start recording
-            console.input("Press Enter to start recording, then press Enter again to stop.")
+            console.input(
+                "Press Enter to start recording, then press Enter again to stop."
+            )
 
             data_queue = Queue()  # type: ignore[var-annotated]
             stop_event = threading.Event()
diff --git a/tts.py b/tts.py
index bb7e4dc..13b9613 100644
--- a/tts.py
+++ b/tts.py
@@ -12,12 +12,29 @@
 
 class TextToSpeechService:
     def __init__(self, device: str = "cuda" if torch.cuda.is_available() else "cpu"):
+        """
+        Initializes the TextToSpeechService class.
+
+        Args:
+            device (str, optional): The device to be used for the model, either "cuda" if a GPU is available or "cpu".
+            Defaults to "cuda" if available, otherwise "cpu".
+        """
         self.device = device
         self.processor = AutoProcessor.from_pretrained("suno/bark-small")
         self.model = BarkModel.from_pretrained("suno/bark-small")
         self.model.to(self.device)
 
-    def synthesize(self, text: str, voice_preset: str):
+    def synthesize(self, text: str, voice_preset: str = "v2/en_speaker_1"):
+        """
+        Synthesizes audio from the given text using the specified voice preset.
+
+        Args:
+            text (str): The input text to be synthesized.
+            voice_preset (str, optional): The voice preset to be used for the synthesis. Defaults to "v2/en_speaker_1".
+
+        Returns:
+            tuple: A tuple containing the sample rate and the generated audio array.
+        """
         inputs = self.processor(text, voice_preset=voice_preset, return_tensors="pt")
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
 
@@ -29,6 +46,16 @@ def synthesize(self, text: str, voice_preset: str):
         return sample_rate, audio_array
 
     def long_form_synthesize(self, text: str, voice_preset: str = "v2/en_speaker_1"):
+        """
+        Synthesizes audio from the given long-form text using the specified voice preset.
+
+        Args:
+            text (str): The input text to be synthesized.
+            voice_preset (str, optional): The voice preset to be used for the synthesis. Defaults to "v2/en_speaker_1".
+
+        Returns:
+            tuple: A tuple containing the sample rate and the generated audio array.
+        """
         pieces = []
         sentences = nltk.sent_tokenize(text)
         silence = np.zeros(int(0.25 * self.model.generation_config.sample_rate))