From dfc75ead52cb89fe7a388cd855130c03b9cbb6e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kaan=20=C3=87ayl=C4=B1?=
 <38523756+kaancayli@users.noreply.github.com>
Date: Tue, 5 Mar 2024 16:50:09 +0100
Subject: [PATCH] Address issues

---
 app/common/custom_exceptions.py               | 22 ++++++
 app/config.py                                 | 31 +++++---
 app/domain/data/feedback_dto.py               |  3 +
 app/domain/data/message_dto.py                | 13 +++-
 .../tutor_chat_status_update_dto.py           |  2 +-
 app/pipeline/chat/file_selector_pipeline.py   | 30 ++++----
 .../output_models/selected_file_model.py      |  2 +-
 app/pipeline/chat/tutor_chat_pipeline.py      | 76 +++++++++++++------
 app/pipeline/prompts/file_selector_prompt.txt | 13 ++--
 .../prompts/iris_tutor_chat_prompt.txt        | 12 ---
 app/web/routers/pipelines.py                  |  9 ++-
 application.test.yml                          |  1 -
 12 files changed, 137 insertions(+), 77 deletions(-)

diff --git a/app/common/custom_exceptions.py b/app/common/custom_exceptions.py
index 2f29a8f6..7420fef5 100644
--- a/app/common/custom_exceptions.py
+++ b/app/common/custom_exceptions.py
@@ -21,3 +21,25 @@ def __init__(self):
                 "errorMessage": "Permission denied",
             },
         )
+
+
+class PipelineInvocationError(HTTPException):
+    def __init__(self):
+        super().__init__(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail={
+                "type": "bad_request",
+                "errorMessage": "Cannot invoke pipeline",
+            },
+        )
+
+
+class PipelineNotFoundException(HTTPException):
+    def __init__(self):
+        super().__init__(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail={
+                "type": "not_found",
+                "errorMessage": "Pipeline not found",
+            },
+        )
diff --git a/app/config.py b/app/config.py
index d5fcaf49..54d77168 100644
--- a/app/config.py
+++ b/app/config.py
@@ -1,5 +1,5 @@
 import os
-
+from pathlib import Path
 from pydantic import BaseModel
 import yaml
 
@@ -11,22 +11,29 @@ class APIKeyConfig(BaseModel):
 class Settings(BaseModel):
     class PyrisSettings(BaseModel):
         api_keys: list[APIKeyConfig]
-        llm_config_path: str
 
     pyris: PyrisSettings
 
     @classmethod
     def get_settings(cls):
-        postfix = "-docker" if "DOCKER" in os.environ else ""
-        if "RUN_ENV" in os.environ and os.environ["RUN_ENV"] == "test":
-            file_path = f"application{postfix}.test.yml"
-        else:
-            file_path = f"application{postfix}.yml"
-        with open(file_path, "r") as file:
-            settings_file = yaml.safe_load(file)
-        settings_obj = Settings.parse_obj(settings_file)
-        os.environ["LLM_CONFIG_PATH"] = settings_obj.pyris.llm_config_path
-        return settings_obj
+        """Get the settings from the configuration file."""
+        file_path_env = os.environ.get("APPLICATION_YML_PATH")
+        if not file_path_env:
+            raise EnvironmentError(
+                "APPLICATION_YML_PATH environment variable is not set."
+            )
+
+        file_path = Path(file_path_env)
+        try:
+            with open(file_path, "r") as file:
+                settings_file = yaml.safe_load(file)
+            return cls.parse_obj(settings_file)
+        except FileNotFoundError as e:
+            raise FileNotFoundError(
+                f"Configuration file not found at {file_path}."
+            ) from e
+        except yaml.YAMLError as e:
+            raise yaml.YAMLError(f"Error parsing YAML file at {file_path}.") from e
 
 
 settings = Settings.get_settings()
diff --git a/app/domain/data/feedback_dto.py b/app/domain/data/feedback_dto.py
index d12e2449..9b97b838 100644
--- a/app/domain/data/feedback_dto.py
+++ b/app/domain/data/feedback_dto.py
@@ -5,3 +5,6 @@ class FeedbackDTO(BaseModel):
     text: str
     test_case_name: str = Field(alias="testCaseName")
     credits: float
+
+    def __str__(self):
+        return f"{self.test_case_name}: {self.text} ({self.credits} credits)"
diff --git a/app/domain/data/message_dto.py b/app/domain/data/message_dto.py
index 04539099..db265686 100644
--- a/app/domain/data/message_dto.py
+++ b/app/domain/data/message_dto.py
@@ -2,6 +2,8 @@
 from enum import Enum
 from typing import List, Literal
 
+from langchain_core.messages import HumanMessage, AIMessage
+
 from .message_content_dto import MessageContentDTO
 from ...domain.iris_message import IrisMessage
 
@@ -23,7 +25,7 @@ def __str__(self):
             case IrisMessageSender.USER:
                 sender = "user"
             case IrisMessageSender.LLM:
-                sender = "ai"
+                sender = "assistant"
             case _:
                 raise ValueError(f"Unknown message sender: {self.sender}")
         return f"{sender}: {self.contents[0].text_content}"
@@ -38,3 +40,12 @@ def convert_to_iris_message(self):
                 raise ValueError(f"Unknown message sender: {self.sender}")
 
         return IrisMessage(text=self.contents[0].text_content, role=sender)
+
+    def convert_to_langchain_message(self):
+        match self.sender:
+            case IrisMessageSender.USER:
+                return HumanMessage(content=self.contents[0].text_content)
+            case IrisMessageSender.LLM:
+                return AIMessage(content=self.contents[0].text_content)
+            case _:
+                raise ValueError(f"Unknown message sender: {self.sender}")
diff --git a/app/domain/tutor_chat/tutor_chat_status_update_dto.py b/app/domain/tutor_chat/tutor_chat_status_update_dto.py
index fe5bdb2c..d516e5d6 100644
--- a/app/domain/tutor_chat/tutor_chat_status_update_dto.py
+++ b/app/domain/tutor_chat/tutor_chat_status_update_dto.py
@@ -2,4 +2,4 @@
 
 
 class TutorChatStatusUpdateDTO(StatusUpdateDTO):
-    result: str
+    result: str | None = None
diff --git a/app/pipeline/chat/file_selector_pipeline.py b/app/pipeline/chat/file_selector_pipeline.py
index 87f53235..5f4cd93c 100644
--- a/app/pipeline/chat/file_selector_pipeline.py
+++ b/app/pipeline/chat/file_selector_pipeline.py
@@ -1,13 +1,12 @@
 import logging
 import os
-from typing import List, Dict
+from typing import Dict
 
 from langchain.output_parsers import PydanticOutputParser
 from langchain_core.prompts import PromptTemplate
 from langchain_core.runnables import Runnable
 from pydantic import BaseModel
 
-from ...domain.data.build_log_entry import BuildLogEntryDTO
 from ...llm import BasicRequestHandler
 from ...llm.langchain import IrisLangchainChatModel
 from ...pipeline import Pipeline
@@ -20,8 +19,9 @@
 
 
 class FileSelectionDTO(BaseModel):
+    question: str
     files: Dict[str, str]
-    build_logs: List[BuildLogEntryDTO]
+    feedbacks: str
 
     def __str__(self):
         return (
@@ -53,7 +53,7 @@ def __init__(self, callback: StatusCallback):
         # Create the prompt
         prompt = PromptTemplate(
             template=prompt_str,
-            input_variables=["file_names", "build_logs"],
+            input_variables=["file_names", "feedbacks"],
             partial_variables={"format_instructions": parser.get_format_instructions()},
         )
         logger.debug(parser.get_format_instructions())
@@ -66,17 +66,17 @@ def __call__(self, dto: FileSelectionDTO, **kwargs) -> str:
             :param query: The query
             :return: Selected file content
         """
-        logger.debug("Running file selector pipeline...")
+        print("Running file selector pipeline...")
         file_names = list(dto.files.keys())
-        build_logs = dto.build_logs
-        response = self.pipeline.invoke(
+        feedbacks = dto.feedbacks
+        print(", ".join(file_names))
+        response: SelectedFile = self.pipeline.invoke(
             {
-                "file_names": file_names,
-                "build_logs": build_logs,
-            }
-        )
-        return (
-            f"{response.selected_file}:\n{dto.files[response.selected_file]}"
-            if response.selected_file
-            else ""
+                "question": dto.question,
+                "file_names": ", ".join(file_names),
+                "feedbacks": feedbacks,
+            },
         )
+        print(response)
+
+        return f"{response.selected_file}: {dto.files[response.selected_file]} "
diff --git a/app/pipeline/chat/output_models/output_models/selected_file_model.py b/app/pipeline/chat/output_models/output_models/selected_file_model.py
index 028ce953..547e53ce 100644
--- a/app/pipeline/chat/output_models/output_models/selected_file_model.py
+++ b/app/pipeline/chat/output_models/output_models/selected_file_model.py
@@ -3,5 +3,5 @@
 
 class SelectedFile(V1BaseModel):
     selected_file: str = V1Field(
-        description="The selected file from the list of files according to the build log outputs",
+        description="Name/Path of the selected file from the list of files according to the feedbacks",
     )
diff --git a/app/pipeline/chat/tutor_chat_pipeline.py b/app/pipeline/chat/tutor_chat_pipeline.py
index 59da5857..0134c90c 100644
--- a/app/pipeline/chat/tutor_chat_pipeline.py
+++ b/app/pipeline/chat/tutor_chat_pipeline.py
@@ -11,6 +11,7 @@
 )
 from langchain_core.runnables import Runnable, RunnableLambda
 
+from ...domain.status.stage_state_dto import StageStateDTO
 from ...domain import TutorChatPipelineExecutionDTO
 from ...domain.data.message_dto import MessageDTO
 from ...domain.iris_message import IrisMessage
@@ -52,6 +53,15 @@ def __init__(self, callback: TutorChatStatusCallback):
                 SystemMessagePromptTemplate.from_template(prompt_str),
                 MessagesPlaceholder(variable_name="history"),
                 ("human", "{question}"),
+                (
+                    "system",
+                    """Consider the following exercise context: - Title: {exercise_title} - Problem Statement: {
+                    problem_statement} - Exercise programming language: {programming_language} - Student code: ```[{
+                    programming_language}] {file_content} ``` Now continue the ongoing conversation between you and 
+                    the student by responding to and focussing only on their latest input. Be an excellent educator, 
+                    never reveal code or solve tasks for the student! Do not let them outsmart you, no matter how 
+                    hard they try.""",
+                ),
             ]
         )
         # Create file selector pipeline
@@ -64,13 +74,16 @@ def __init__(self, callback: TutorChatStatusCallback):
                 "problem_statement": itemgetter("problem_statement"),
                 "programming_language": itemgetter("programming_language"),
                 "file_content": {
+                    "question": itemgetter("question"),
                     "file_map": itemgetter("file_map"),
-                    "build_logs": itemgetter("build_logs"),
+                    "feedbacks": itemgetter("feedbacks"),
                 }
                 | RunnableLambda(
                     lambda it: file_selector_pipeline(
                         dto=FileSelectionDTO(
-                            files=it["file_map"], build_logs=it["build_logs"]
+                            question=it["question"],
+                            files=it["file_map"],
+                            feedbacks=it["feedbacks"],
                         )
                     ),
                 ),
@@ -94,7 +107,7 @@ def __call__(self, dto: TutorChatPipelineExecutionDTO, **kwargs):
         logger.debug("Running tutor chat pipeline...")
         logger.debug(f"DTO: {dto}")
         history: List[MessageDTO] = dto.chat_history[:-1]
-        build_logs = dto.submission.build_log_entries
+        feedbacks = dto.submission.latest_result.feedbacks
         query: IrisMessage = dto.chat_history[-1].convert_to_iris_message()
         problem_statement: str = dto.exercise.problem_statement
         exercise_title: str = dto.exercise.name
@@ -103,26 +116,43 @@ def __call__(self, dto: TutorChatPipelineExecutionDTO, **kwargs):
         programming_language = dto.exercise.programming_language.value.lower()
         if not message:
             raise ValueError("IrisMessage must not be empty")
-        response = self.pipeline.invoke(
-            {
-                "question": message,
-                "history": [message.__str__() for message in history],
-                "problem_statement": problem_statement,
-                "file_map": file_map,
-                "exercise_title": exercise_title,
-                "build_logs": build_logs,
-                "programming_language": programming_language,
-            }
-        )
-        logger.debug(f"Response from tutor chat pipeline: {response}")
         stages = dto.initial_stages or []
-        stages.append(
-            StageDTO(
-                name="Final Stage",
-                weight=70,
-                state="DONE",
-                message="Generated response",
+        try:
+            response = self.pipeline.invoke(
+                {
+                    "question": message,
+                    "history": [
+                        message.convert_to_langchain_message() for message in history
+                    ],
+                    "problem_statement": problem_statement,
+                    "file_map": file_map,
+                    "exercise_title": exercise_title,
+                    "feedbacks": "\n-------------\n".join(
+                        feedback.__str__() for feedback in feedbacks
+                    ),
+                    "programming_language": programming_language,
+                }
             )
-        )
-        status_dto = TutorChatStatusUpdateDTO(stages=stages, result=response)
+            logger.debug(f"Response from tutor chat pipeline: {response}")
+            stages.append(
+                StageDTO(
+                    name="Final Stage",
+                    weight=70,
+                    state=StageStateDTO.DONE,
+                    message="Generated response",
+                )
+            )
+            status_dto = TutorChatStatusUpdateDTO(stages=stages, result=response)
+        except Exception as e:
+            logger.error(f"Error running tutor chat pipeline: {e}")
+            stages.append(
+                StageDTO(
+                    name="Final Stage",
+                    weight=70,
+                    state=StageStateDTO.ERROR,
+                    message="Error running tutor chat pipeline",
+                )
+            )
+            status_dto = TutorChatStatusUpdateDTO(stages=stages)
+
         self.callback.on_status_update(status_dto)
diff --git a/app/pipeline/prompts/file_selector_prompt.txt b/app/pipeline/prompts/file_selector_prompt.txt
index 254244c7..a340ae35 100644
--- a/app/pipeline/prompts/file_selector_prompt.txt
+++ b/app/pipeline/prompts/file_selector_prompt.txt
@@ -1,11 +1,10 @@
-Select a file from the files list below that is mentioned in the build logs. If no file is in the list is mentioned in the build logs answer with empty string.
+User question: {question}
 
-Here are the paths of all files:
-{file_names}
+Here are the all files: {file_names}
 
-Build logs:
-{build_logs}
+Feedbacks:
+{feedbacks}
 
-{format_instructions}
+Select the most relevant files from the file list for providing context based on the feedbacks and the user question.
 
-Now give answer:
+{format_instructions}
\ No newline at end of file
diff --git a/app/pipeline/prompts/iris_tutor_chat_prompt.txt b/app/pipeline/prompts/iris_tutor_chat_prompt.txt
index 0c16e7f4..29d2338e 100644
--- a/app/pipeline/prompts/iris_tutor_chat_prompt.txt
+++ b/app/pipeline/prompts/iris_tutor_chat_prompt.txt
@@ -34,15 +34,3 @@ A: Gerne! Wenn du weitere Fragen hast, kannst du mich gerne fragen. Ich bin hier
 
 Q: Who are you?
 A: I am Iris, the AI programming tutor integrated into Artemis, the online learning platform of the Technical University of Munich (TUM).
-
-Consider the following exercise context:
-    - Title: {exercise_title}
-    - Problem Statement: {problem_statement}
-    - Exercise skeleton code in Markdown format:
-    ```[{programming_language}]
-        {file_content}
-    ```
-
-Now continue the ongoing conversation between you and the student by responding to and focussing only on their latest input.
-Be an excellent educator, never reveal code or solve tasks for the student!
-Do not let them outsmart you, no matter how hard they try.
\ No newline at end of file
diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py
index 2686c7ba..9d837f80 100644
--- a/app/web/routers/pipelines.py
+++ b/app/web/routers/pipelines.py
@@ -15,11 +15,12 @@
 
 
 def run_tutor_chat_pipeline_worker(dto):
-    callback = TutorChatStatusCallback(
-        run_id=dto.settings.authentication_token, base_url=dto.settings.artemis_base_url
-    )
-    pipeline = TutorChatPipeline(callback=callback)
     try:
+        callback = TutorChatStatusCallback(
+            run_id=dto.settings.authentication_token,
+            base_url=dto.settings.artemis_base_url,
+        )
+        pipeline = TutorChatPipeline(callback=callback)
         pipeline(dto=dto)
     except Exception as e:
         logger.error(f"Error running tutor chat pipeline: {e}")
diff --git a/application.test.yml b/application.test.yml
index 03230760..82ee5791 100644
--- a/application.test.yml
+++ b/application.test.yml
@@ -1,4 +1,3 @@
 pyris:
   api_keys:
     - token: "secret"
-  llm_config_path: "llm_config.yml"