DSProjects2024 · raaguln · Mar 8, 2024 · Mar 8, 2024 · Mar 8, 2024 · Mar 8, 2024
diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml
@@ -41,7 +41,7 @@ jobs:
     # GitHub will run the tests for each of these Python versions.
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10"]
 
     # The actual workflow steps!
     steps:

diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@ personal
 .ipynb*
 data/test
 .streamlit
+secrets.toml
diff --git a/environment.yml b/environment.yml
@@ -2,6 +2,7 @@ name: thronetalks
 channels:
   - defaults
 dependencies:
+  - pip
   - pip:
       - -r requirements.txt
 prefix: /Users/abhinavduvvuri/opt/anaconda3/envs/thronetalks
diff --git a/requirements.txt b/requirements.txt
@@ -4,6 +4,6 @@ nltk==3.8.1
 numpy==1.24.3
 openai==1.13.3
 pandas==2.0.3
-scikit_learn==1.3.0
+scikit_learn==1.0.2
 streamlit==1.32.0
-wordcloud==1.9.3
+wordcloud==1.9.3
diff --git a/thronetalk-game-of-thrones-summarizer/app.py b/thronetalk-game-of-thrones-summarizer/app.py
@@ -6,7 +6,7 @@
 import time
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
-from utils.model import model
+from utils.model import Model
 from utils.visualization_generator import VisualizationGenerator
 from utils.data_analysis import DataAnalysis
 
@@ -17,7 +17,7 @@
 csv_file_path = os.path.join(current_directory, 'data', 'Season_Episode_MultiEpisode.csv')
 
 #st.image("back.jpg", use_column_width=True)
-
+st. set_page_config(layout="wide") 
 def get_base64(bin_file):
     with open(bin_file, 'rb') as f:
         data = f.read()
@@ -93,13 +93,12 @@ def remove_zeros(lst):
     cleaned_data = pd.read_csv(f'{current_directory}/data/ouput_dialogues.csv')
     data_analysis = DataAnalysis(cleaned_data)
     top_3_characters, top_3_characters_dialogues = data_analysis.get_top_n_characters(
-        n_char=3,
         from_season=int(season_from),
         to_season=int(season_to),
         from_episode=int(from_ep_no),
         to_episode=int(to_ep_no)
     )
-    characters = top_3_characters
+    characters = top_3_characters[:3] #for first 3
     st.subheader(out_text_temp2)
 
     vg = VisualizationGenerator(
@@ -108,10 +107,10 @@ def remove_zeros(lst):
         int(season_to),
         int(to_ep_no)
     )
-    line_chart = vg.sentimentAnalysisVisualization(characters)
+    line_chart = vg.sentiment_analysis_visualization(characters)
     st.line_chart(line_chart)
     columns = st.columns(len(characters))
-    wordcloud = vg.multiWordCloud(characters)
+    wordcloud = vg.multi_word_cloud(characters)
 
     # Display word cloud on Streamlit UI
     plots = []
@@ -127,7 +126,7 @@ def remove_zeros(lst):
             st.pyplot(plots[i])
 
     def spinner_loading_summary():
-        got = model(season_from,from_ep_no, season_to, to_ep_no)
+        got = Model(season_from,from_ep_no, season_to, to_ep_no)
         time.sleep(1)
         return got.summarize()
 

diff --git a/thronetalk-game-of-thrones-summarizer/tests/test_visualization_generator.py b/thronetalk-game-of-thrones-summarizer/tests/test_visualization_generator.py
@@ -36,30 +36,30 @@ def test_wordcloud_error(self):
         '''Edge tests for wordcloud generation function'''
         v_g = VisualizationGenerator(1,1,1,2)
         with self.assertRaises(TypeError):
-            v_g.multiWordCloud() # pylint: disable=no-value-for-parameter
+            v_g.multi_word_cloud() # pylint: disable=no-value-for-parameter
         with self.assertRaises(ValueError):
-            v_g.multiWordCloud([])
+            v_g.multi_word_cloud([])
         with self.assertRaises(ValueError):
-            v_g.multiWordCloud(['', ''])
+            v_g.multi_word_cloud(['', ''])
 
     def test_sentiment_analysis_visualization_error(self):
         '''Edge tests for sentiment analysis viz generation function'''
         v_g = VisualizationGenerator(1,1,1,2)
         with self.assertRaises(TypeError):
-            v_g.sentimentAnalysisVisualization() # pylint: disable=no-value-for-parameter
+            v_g.sentiment_analysis_visualization() # pylint: disable=no-value-for-parameter
         with self.assertRaises(ValueError):
-            v_g.sentimentAnalysisVisualization([])
+            v_g.sentiment_analysis_visualization([])
         with self.assertRaises(ValueError):
-            v_g.sentimentAnalysisVisualization(['', ''])
+            v_g.sentiment_analysis_visualization(['', ''])
 
     # Smoke tests
     # @patch('scripts.visualization_generator.pd.read_csv',
     #        side_effect=mock_functions.mocked_read_csv_ouput_dialogues)
     # def test_smoke_test(self):
     #     top_3_characters = ["eddard","catelyn","robert"]
     #     vg = VisualizationGenerator(1,1,1,3)
-    #     vg.multiWordCloud(top_3_characters)
-    #     vg.sentimentAnalysisVisualization(top_3_characters)
+    #     vg.multi_word_cloud(top_3_characters)
+    #     vg.sentiment_analysis_visualization(top_3_characters)
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/thronetalk-game-of-thrones-summarizer/utils/__init__.py b/thronetalk-game-of-thrones-summarizer/utils/__init__.py
@@ -1,2 +1,14 @@
-'''init file for utils'''
+
+"""
+utils module for Streamlit App
+
+This module provides helper functions for building a Streamlit application
+related to Game of Thrones. It includes functionalities for:
+
+* Creating Wordcloud visualizations based Game of Thrones Characters
+* Performing sentiment analysis on Game of Thrones characters
+* Summarizing Game of Thrones seasons and episodes
+
+Use these functions within your Streamlit app to enhance its capabilities.
+"""
 from .visualization_generator import VisualizationGenerator
diff --git a/thronetalk-game-of-thrones-summarizer/utils/model.py b/thronetalk-game-of-thrones-summarizer/utils/model.py
@@ -1,48 +1,93 @@
-import os
+"""
+This module provides a class (`Model`) to summarize the plot of Game of Thrones (GoT) 
+based on user-specified episode and season ranges.
+
+The `Model` class utilizes the Azure OpenAI API to generate summaries through conversation prompts. 
+It first constructs the prompt based on the provided season and episode information and then 
+calls the Azure OpenAI API to obtain the summarized text.
+"""
 from openai import AzureOpenAI
 import streamlit as st
-class model:
-    def __init__(self, seasonFrom=1, episodeFrom=1  , seasonTo=1, episodeTo=1):
-        self.episodeFrom = episodeFrom
-        self.episodeTo = episodeTo
-        self.seasonFrom = seasonFrom
-        self.seasonTo = seasonTo
-
-    def createSummarizerInput(self):
-        if self.episodeFrom == self.episodeTo and self.seasonFrom == self.seasonTo:
-            messageText = [{"role":"system","content":"Summarize Game of thrones season "+ str(self.seasonFrom) + " episode "+ str(self.episodeFrom) + " in 300 words."}]
-
+
+class Model:
+    """
+    A class to create summary of GOT plot.
+    """
+    def __init__(self, season_from=1, episode_from=1, season_to=1, episode_to=1):
+        """
+        Initializes the summarizer with episode and season information.
+
+        Args:
+            season_from: The starting season number (inclusive).
+            episode_from: The starting episode number (inclusive) within the starting season.
+            season_to: The ending season number (inclusive).
+            episode_to: The ending episode number (inclusive) within the ending season.
+        """
+        self.episode_from = episode_from
+        self.episode_to = episode_to
+        self.season_from = season_from
+        self.season_to = season_to
+
+    def create_summarizer_input(self):
+        """
+        Creates the prompt for summary based on episode input.
+
+        Returns:
+            A list containing a dictionary for the prompt.
+        """
+        if self.episode_from == self.episode_to and self.season_from == self.season_to:
+            message_text = [{
+                "role": "system",
+                "content": f'''Summarize Game of thrones season {str(self.season_from)}
+                  episode {str(self.episode_from)} in 300 words.'''
+                }]
         else:
-            messageText = [{"role":"system","content":"Summarize Game of thrones from season "+ str(self.seasonFrom) + " episode "+ str(self.episodeFrom) + " to season " + str(self.seasonTo) + " episode " + str(self.episodeTo) + " in 300 words."}]
-        return messageText 
+            message_text = [{
+                "role": "system",
+                "content": f'''Summarize Game of thrones from season {str(self.season_from)}
+                 episode {str(self.episode_from)} to season {str(self.season_to)}
+                 episode {str(self.episode_to)} in 300 words.'''
+                }]
+        return message_text
+
+    def azure_api_call(self, message_text):
+        """
+        Calls the Azure OpenAI API with the prompt `message_text`.
 
-    def azureAPICall(self, messageText):
+        Args:
+            message_text: A list of dictionaries containing the role ("system" or "user")
+            and content of the messages.
+
+        Returns:
+            The completed response from the Azure OpenAI API as a string.
+        """
         client = AzureOpenAI(
-            azure_endpoint = st.secrets["AZURE_ENDPOINT"], 
+            azure_endpoint = st.secrets["AZURE_ENDPOINT"],
             api_key = st.secrets["AZURE_OPENAI_KEY"],
             api_version="2024-02-15-preview"
-)
-        
+        )
+
         completion = client.chat.completions.create(
-        model="ThroneTalk", # model = "deployment_name"
-        messages = messageText,
-        temperature=0.7,
-        max_tokens=800,
-        top_p=0.95,
-        frequency_penalty=0,
-        presence_penalty=0,
-        stop=None
+            model="ThroneTalk", # model = "deployment_name"
+            messages = message_text,
+            temperature=0.7,
+            max_tokens=800,
+            top_p=0.95,
+            frequency_penalty=0,
+            presence_penalty=0,
+            stop=None
         )
-        #completion = 'This is a test completion. API has been commented out '+ str(messageText)
         return completion.choices[0].message.content
-    
+
     def summarize(self):
+        """
+        Summarizes content using the Azure OpenAI API. 
+        Calls the `azure_api_call` function to get the summarized text from the Azure OpenAI API.
+
+        Returns:
+            The summarized text as a string.
+        """
         summary = ''
-        messageText = self.createSummarizerInput()
-        summary = self.azureAPICall(messageText)
-        #summary = self.extractOutput(rawData)
+        message_text = self.create_summarizer_input()
+        summary = self.azure_api_call(message_text)
         return summary
-
-if __name__ == '__main__':
-    got = model(1,1,2,2)
-    # print(got.summarize())