From 5791a951e4859dffe9002f0794dcab2f89927f5d Mon Sep 17 00:00:00 2001
From: David de la Iglesia Castro <daviddelaiglesiacastro@gmail.com>
Date: Wed, 4 Dec 2024 12:40:41 +0100
Subject: [PATCH] fix(docs): Update broken links. (#36)

* fix(docs): Update broken links.

* Add search function in docs

* Add basic docstring to config.py

* Add docstring to save_waveform_as_file

* Add missing references and fix a typo

---------

Co-authored-by: Kostis-S-Z <kostissz@pm.me>
---
 docs/api.md                                    |  4 ++++
 docs/customization.md                          |  2 +-
 docs/index.md                                  |  2 +-
 docs/step-by-step-guide.md                     | 18 +++++++++---------
 mkdocs.yml                                     |  1 +
 .../podcast_maker/config.py                    |  9 +++++++++
 .../podcast_maker/script_to_audio.py           |  9 +++++++++
 7 files changed, 34 insertions(+), 11 deletions(-)
diff --git a/docs/api.md b/docs/api.md
index 2995f23..c781ee2 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -7,3 +7,7 @@
 ::: document_to_podcast.inference.text_to_text
 
 ::: document_to_podcast.inference.text_to_speech
+
+::: document_to_podcast.podcast_maker.script_to_audio
+
+::: document_to_podcast.podcast_maker.config
diff --git a/docs/customization.md b/docs/customization.md
index 92b68b3..8cab6e0 100644
--- a/docs/customization.md
+++ b/docs/customization.md
@@ -86,4 +86,4 @@ def load_text_to_speech_model_and_tokenizer():
 
 ## 🤝 **Contributing to the Blueprint**
 
-Want to help improve or extend this Blueprint? Check out the **[Future Features & Contributions Guide](../future-features-contributions)** to see how you can contribute your ideas, code, or feedback to make this Blueprint even better!
+Want to help improve or extend this Blueprint? Check out the **[Future Features & Contributions Guide](future-features-contributions.md)** to see how you can contribute your ideas, code, or feedback to make this Blueprint even better!
diff --git a/docs/index.md b/docs/index.md
index b5345e2..e0ea982 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,7 +1,7 @@
 # **Document-to-Podcast Blueprint**
 
 <div style="text-align: center;">
-  <img src="../images/document-to-podcast-diagram.png" alt="Project Logo" style="width: 100%; margin-bottom: 1px; margin-top: 1px;">
+  <img src="images/document-to-podcast-diagram.png" alt="Project Logo" style="width: 100%; margin-bottom: 1px; margin-top: 1px;">
 </div>
 
 Blueprints empower developers to easily integrate AI capabilities into their projects using open-source models and tools.
diff --git a/docs/step-by-step-guide.md b/docs/step-by-step-guide.md
index 1fc3bbe..a84b10a 100644
--- a/docs/step-by-step-guide.md
+++ b/docs/step-by-step-guide.md
@@ -1,6 +1,6 @@
 # **Step-by-Step Guide: How the Document-to-Podcast Blueprint Works**
 
-Transforming static documents into engaging podcast episodes involves a  integration of pre-processing, LLM-powered transcript generation, and text-to-speech generation. Here's how it all works under the hood:
+Transforming static documents into engaging podcast episodes involves an integration of pre-processing, LLM-powered transcript generation, and text-to-speech generation. Here's how it all works under the hood:
 
 ---
 
@@ -33,7 +33,7 @@ Cleaner input data ensures that the model works with reliable and consistent inf
 ### ⚙️ **Key Components in this Doc Pre-Processing**
  **1 - File Loading**
 
-   - Uses functions defined in `data_loaders.py`
+   - Uses functions defined in [`data_loaders.py`](api.md/#document_to_podcast.preprocessing.data_loaders)
 
    - Supports `.html`, `.pdf`, `.txt`, and `.docx` formats.
 
@@ -41,7 +41,7 @@ Cleaner input data ensures that the model works with reliable and consistent inf
 
  **2 - Text Cleaning**
 
-   - Uses functions defined in [`data_cleaners.py`](../api/#document_to_podcast.inference.data_cleaners)
+   - Uses functions defined in [`data_cleaners.py`](api.md/#document_to_podcast.preprocessing.data_cleaners)
 
    - Removes unwanted elements like URLs, email addresses, and special characters using Python's `re` library, which leverages **Regular Expressions** (regex) to identify and manipulate specific patterns in text.
 
@@ -55,7 +55,7 @@ In this step, the pre-processed text is transformed into a conversational podcas
 
  **1 - Model Loading**
 
-   - The [`model_loader.py`](../api/#document_to_podcast.inference.model_loaders) script is responsible for loading GGUF-type models using the `llama_cpp` library.
+   - The [`model_loader.py`](api.md/#document_to_podcast.inference.model_loaders) script is responsible for loading GGUF-type models using the `llama_cpp` library.
 
    - The function `load_llama_cpp_model` takes a model ID in the format `{org}/{repo}/{filename}` and loads the specified model.
 
@@ -63,7 +63,7 @@ In this step, the pre-processed text is transformed into a conversational podcas
 
  **2 - Text-to-Text Generation**
 
-   - The [`text_to_text.py`](../api/#document_to_podcast.inference.text_to_text) script manages the interaction with the language model, converting input text into a structured conversational podcast script.
+   - The [`text_to_text.py`](api.md/#document_to_podcast.inference.text_to_text) script manages the interaction with the language model, converting input text into a structured conversational podcast script.
 
    - It uses the `chat_completion` function to process the input text and a customizable system prompt, guiding the language to generate a text output (e.g. a coherent podcast script between speakers).
 
@@ -80,7 +80,7 @@ In this final step, the generated podcast transcript is brought to life as an au
 
 **1 - Text-to-Speech Audio Generation**
 
-   - The `text_to_speech.py` script converts text into audio using a specified TTS model and tokenizer.
+   - The [`text_to_speech.py`](api.md/#document_to_podcast.inference.text_to_speech) script converts text into audio using a specified TTS model and tokenizer.
 
    - A **speaker profile** defines the voice characteristics (e.g., tone, speed, clarity) for each speaker.
 
@@ -88,7 +88,7 @@ In this final step, the generated podcast transcript is brought to life as an au
 
 **2 - Parsing and Combining Voices**
 
-- The `script_to_audio.py` script ensures each speaker’s dialogue is spoken in their unique voice.
+- The [`script_to_audio.py`](api.md/#document_to_podcast.podcast_maker.script_to_audio) script ensures each speaker’s dialogue is spoken in their unique voice.
 
 - The function `parse_script_to_waveform` splits the dialogue script by speakers and uses `text_to_speech` to generate audio for each speaker, stitching them together into a full podcast.
 
@@ -145,8 +145,8 @@ This demo uses [Streamlit](https://streamlit.io/), an open-source Python framewo
 
 ## 🎨 **Customizing the Blueprint**
 
-To better understand how you can tailor this Blueprint to suit your specific needs, please visit the **[Customization Guide](../customization)**.
+To better understand how you can tailor this Blueprint to suit your specific needs, please visit the **[Customization Guide](customization.md)**.
 
 ## 🤝 **Contributing to the Blueprint**
 
-Want to help improve or extend this Blueprint? Check out the **[Future Features & Contributions Guide](../future-features-contributions)** to see how you can contribute your ideas, code, or feedback to make this Blueprint even better!
+Want to help improve or extend this Blueprint? Check out the **[Future Features & Contributions Guide](future-features-contributions.md)** to see how you can contribute your ideas, code, or feedback to make this Blueprint even better!
diff --git a/mkdocs.yml b/mkdocs.yml
index 3d3940c..78e501d 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -34,6 +34,7 @@ markdown_extensions:
   - pymdownx.superfences
 
 plugins:
+- search
 - mkdocstrings:
     handlers:
       python:
diff --git a/src/document_to_podcast/podcast_maker/config.py b/src/document_to_podcast/podcast_maker/config.py
index 47cd1b7..50e737a 100644
--- a/src/document_to_podcast/podcast_maker/config.py
+++ b/src/document_to_podcast/podcast_maker/config.py
@@ -4,6 +4,10 @@
 
 
 class SpeakerConfig(BaseModel):
+    """
+    Pydantic model that stores configuration of an individual speaker for the TTS model.
+    """
+
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     model: PreTrainedModel
@@ -16,5 +20,10 @@ class SpeakerConfig(BaseModel):
 
 
 class PodcastConfig(BaseModel):
+    """
+    Pydantic model that stores configuration of all the speakers for the TTS model. This allows different speakers to
+    use different models and configurations.
+    """
+
     speakers: Dict[str, SpeakerConfig]
     sampling_rate: int = 44_100
diff --git a/src/document_to_podcast/podcast_maker/script_to_audio.py b/src/document_to_podcast/podcast_maker/script_to_audio.py
index 403ca00..ef63e94 100644
--- a/src/document_to_podcast/podcast_maker/script_to_audio.py
+++ b/src/document_to_podcast/podcast_maker/script_to_audio.py
@@ -40,6 +40,15 @@ def parse_script_to_waveform(script: str, podcast_config: PodcastConfig):
 def save_waveform_as_file(
     waveform: np.ndarray, sampling_rate: int, filename: str
 ) -> None:
+    """
+    Save the output of the TTS (a numpy waveform) to a .wav file using the soundfile library.
+
+    Args:
+        waveform: 2D numpy array of a waveform
+        sampling_rate: Usually 44.100, but check the specifications of the TTS model you are using.
+        filename: the destination filename to save the audio
+
+    """
     sf.write(filename, waveform, sampling_rate)