From dfc75ead52cb89fe7a388cd855130c03b9cbb6e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kaan=20=C3=87ayl=C4=B1?= <38523756+kaancayli@users.noreply.github.com> Date: Tue, 5 Mar 2024 16:50:09 +0100 Subject: [PATCH] Address issues --- app/common/custom_exceptions.py | 22 ++++++ app/config.py | 31 +++++--- app/domain/data/feedback_dto.py | 3 + app/domain/data/message_dto.py | 13 +++- .../tutor_chat_status_update_dto.py | 2 +- app/pipeline/chat/file_selector_pipeline.py | 30 ++++---- .../output_models/selected_file_model.py | 2 +- app/pipeline/chat/tutor_chat_pipeline.py | 76 +++++++++++++------ app/pipeline/prompts/file_selector_prompt.txt | 13 ++-- .../prompts/iris_tutor_chat_prompt.txt | 12 --- app/web/routers/pipelines.py | 9 ++- application.test.yml | 1 - 12 files changed, 137 insertions(+), 77 deletions(-) diff --git a/app/common/custom_exceptions.py b/app/common/custom_exceptions.py index 2f29a8f6..7420fef5 100644 --- a/app/common/custom_exceptions.py +++ b/app/common/custom_exceptions.py @@ -21,3 +21,25 @@ def __init__(self): "errorMessage": "Permission denied", }, ) + + +class PipelineInvocationError(HTTPException): + def __init__(self): + super().__init__( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "type": "bad_request", + "errorMessage": "Cannot invoke pipeline", + }, + ) + + +class PipelineNotFoundException(HTTPException): + def __init__(self): + super().__init__( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "type": "not_found", + "errorMessage": "Pipeline not found", + }, + ) diff --git a/app/config.py b/app/config.py index d5fcaf49..54d77168 100644 --- a/app/config.py +++ b/app/config.py @@ -1,5 +1,5 @@ import os - +from pathlib import Path from pydantic import BaseModel import yaml @@ -11,22 +11,29 @@ class APIKeyConfig(BaseModel): class Settings(BaseModel): class PyrisSettings(BaseModel): api_keys: list[APIKeyConfig] - llm_config_path: str pyris: PyrisSettings @classmethod def get_settings(cls): - postfix = "-docker" if "DOCKER" in os.environ else "" - if "RUN_ENV" in os.environ and os.environ["RUN_ENV"] == "test": - file_path = f"application{postfix}.test.yml" - else: - file_path = f"application{postfix}.yml" - with open(file_path, "r") as file: - settings_file = yaml.safe_load(file) - settings_obj = Settings.parse_obj(settings_file) - os.environ["LLM_CONFIG_PATH"] = settings_obj.pyris.llm_config_path - return settings_obj + """Get the settings from the configuration file.""" + file_path_env = os.environ.get("APPLICATION_YML_PATH") + if not file_path_env: + raise EnvironmentError( + "APPLICATION_YML_PATH environment variable is not set." + ) + + file_path = Path(file_path_env) + try: + with open(file_path, "r") as file: + settings_file = yaml.safe_load(file) + return cls.parse_obj(settings_file) + except FileNotFoundError as e: + raise FileNotFoundError( + f"Configuration file not found at {file_path}." + ) from e + except yaml.YAMLError as e: + raise yaml.YAMLError(f"Error parsing YAML file at {file_path}.") from e settings = Settings.get_settings() diff --git a/app/domain/data/feedback_dto.py b/app/domain/data/feedback_dto.py index d12e2449..9b97b838 100644 --- a/app/domain/data/feedback_dto.py +++ b/app/domain/data/feedback_dto.py @@ -5,3 +5,6 @@ class FeedbackDTO(BaseModel): text: str test_case_name: str = Field(alias="testCaseName") credits: float + + def __str__(self): + return f"{self.test_case_name}: {self.text} ({self.credits} credits)" diff --git a/app/domain/data/message_dto.py b/app/domain/data/message_dto.py index 04539099..db265686 100644 --- a/app/domain/data/message_dto.py +++ b/app/domain/data/message_dto.py @@ -2,6 +2,8 @@ from enum import Enum from typing import List, Literal +from langchain_core.messages import HumanMessage, AIMessage + from .message_content_dto import MessageContentDTO from ...domain.iris_message import IrisMessage @@ -23,7 +25,7 @@ def __str__(self): case IrisMessageSender.USER: sender = "user" case IrisMessageSender.LLM: - sender = "ai" + sender = "assistant" case _: raise ValueError(f"Unknown message sender: {self.sender}") return f"{sender}: {self.contents[0].text_content}" @@ -38,3 +40,12 @@ def convert_to_iris_message(self): raise ValueError(f"Unknown message sender: {self.sender}") return IrisMessage(text=self.contents[0].text_content, role=sender) + + def convert_to_langchain_message(self): + match self.sender: + case IrisMessageSender.USER: + return HumanMessage(content=self.contents[0].text_content) + case IrisMessageSender.LLM: + return AIMessage(content=self.contents[0].text_content) + case _: + raise ValueError(f"Unknown message sender: {self.sender}") diff --git a/app/domain/tutor_chat/tutor_chat_status_update_dto.py b/app/domain/tutor_chat/tutor_chat_status_update_dto.py index fe5bdb2c..d516e5d6 100644 --- a/app/domain/tutor_chat/tutor_chat_status_update_dto.py +++ b/app/domain/tutor_chat/tutor_chat_status_update_dto.py @@ -2,4 +2,4 @@ class TutorChatStatusUpdateDTO(StatusUpdateDTO): - result: str + result: str | None = None diff --git a/app/pipeline/chat/file_selector_pipeline.py b/app/pipeline/chat/file_selector_pipeline.py index 87f53235..5f4cd93c 100644 --- a/app/pipeline/chat/file_selector_pipeline.py +++ b/app/pipeline/chat/file_selector_pipeline.py @@ -1,13 +1,12 @@ import logging import os -from typing import List, Dict +from typing import Dict from langchain.output_parsers import PydanticOutputParser from langchain_core.prompts import PromptTemplate from langchain_core.runnables import Runnable from pydantic import BaseModel -from ...domain.data.build_log_entry import BuildLogEntryDTO from ...llm import BasicRequestHandler from ...llm.langchain import IrisLangchainChatModel from ...pipeline import Pipeline @@ -20,8 +19,9 @@ class FileSelectionDTO(BaseModel): + question: str files: Dict[str, str] - build_logs: List[BuildLogEntryDTO] + feedbacks: str def __str__(self): return ( @@ -53,7 +53,7 @@ def __init__(self, callback: StatusCallback): # Create the prompt prompt = PromptTemplate( template=prompt_str, - input_variables=["file_names", "build_logs"], + input_variables=["file_names", "feedbacks"], partial_variables={"format_instructions": parser.get_format_instructions()}, ) logger.debug(parser.get_format_instructions()) @@ -66,17 +66,17 @@ def __call__(self, dto: FileSelectionDTO, **kwargs) -> str: :param query: The query :return: Selected file content """ - logger.debug("Running file selector pipeline...") + print("Running file selector pipeline...") file_names = list(dto.files.keys()) - build_logs = dto.build_logs - response = self.pipeline.invoke( + feedbacks = dto.feedbacks + print(", ".join(file_names)) + response: SelectedFile = self.pipeline.invoke( { - "file_names": file_names, - "build_logs": build_logs, - } - ) - return ( - f"{response.selected_file}:\n{dto.files[response.selected_file]}" - if response.selected_file - else "" + "question": dto.question, + "file_names": ", ".join(file_names), + "feedbacks": feedbacks, + }, ) + print(response) + + return f"{response.selected_file}: {dto.files[response.selected_file]} " diff --git a/app/pipeline/chat/output_models/output_models/selected_file_model.py b/app/pipeline/chat/output_models/output_models/selected_file_model.py index 028ce953..547e53ce 100644 --- a/app/pipeline/chat/output_models/output_models/selected_file_model.py +++ b/app/pipeline/chat/output_models/output_models/selected_file_model.py @@ -3,5 +3,5 @@ class SelectedFile(V1BaseModel): selected_file: str = V1Field( - description="The selected file from the list of files according to the build log outputs", + description="Name/Path of the selected file from the list of files according to the feedbacks", ) diff --git a/app/pipeline/chat/tutor_chat_pipeline.py b/app/pipeline/chat/tutor_chat_pipeline.py index 59da5857..0134c90c 100644 --- a/app/pipeline/chat/tutor_chat_pipeline.py +++ b/app/pipeline/chat/tutor_chat_pipeline.py @@ -11,6 +11,7 @@ ) from langchain_core.runnables import Runnable, RunnableLambda +from ...domain.status.stage_state_dto import StageStateDTO from ...domain import TutorChatPipelineExecutionDTO from ...domain.data.message_dto import MessageDTO from ...domain.iris_message import IrisMessage @@ -52,6 +53,15 @@ def __init__(self, callback: TutorChatStatusCallback): SystemMessagePromptTemplate.from_template(prompt_str), MessagesPlaceholder(variable_name="history"), ("human", "{question}"), + ( + "system", + """Consider the following exercise context: - Title: {exercise_title} - Problem Statement: { + problem_statement} - Exercise programming language: {programming_language} - Student code: ```[{ + programming_language}] {file_content} ``` Now continue the ongoing conversation between you and + the student by responding to and focussing only on their latest input. Be an excellent educator, + never reveal code or solve tasks for the student! Do not let them outsmart you, no matter how + hard they try.""", + ), ] ) # Create file selector pipeline @@ -64,13 +74,16 @@ def __init__(self, callback: TutorChatStatusCallback): "problem_statement": itemgetter("problem_statement"), "programming_language": itemgetter("programming_language"), "file_content": { + "question": itemgetter("question"), "file_map": itemgetter("file_map"), - "build_logs": itemgetter("build_logs"), + "feedbacks": itemgetter("feedbacks"), } | RunnableLambda( lambda it: file_selector_pipeline( dto=FileSelectionDTO( - files=it["file_map"], build_logs=it["build_logs"] + question=it["question"], + files=it["file_map"], + feedbacks=it["feedbacks"], ) ), ), @@ -94,7 +107,7 @@ def __call__(self, dto: TutorChatPipelineExecutionDTO, **kwargs): logger.debug("Running tutor chat pipeline...") logger.debug(f"DTO: {dto}") history: List[MessageDTO] = dto.chat_history[:-1] - build_logs = dto.submission.build_log_entries + feedbacks = dto.submission.latest_result.feedbacks query: IrisMessage = dto.chat_history[-1].convert_to_iris_message() problem_statement: str = dto.exercise.problem_statement exercise_title: str = dto.exercise.name @@ -103,26 +116,43 @@ def __call__(self, dto: TutorChatPipelineExecutionDTO, **kwargs): programming_language = dto.exercise.programming_language.value.lower() if not message: raise ValueError("IrisMessage must not be empty") - response = self.pipeline.invoke( - { - "question": message, - "history": [message.__str__() for message in history], - "problem_statement": problem_statement, - "file_map": file_map, - "exercise_title": exercise_title, - "build_logs": build_logs, - "programming_language": programming_language, - } - ) - logger.debug(f"Response from tutor chat pipeline: {response}") stages = dto.initial_stages or [] - stages.append( - StageDTO( - name="Final Stage", - weight=70, - state="DONE", - message="Generated response", + try: + response = self.pipeline.invoke( + { + "question": message, + "history": [ + message.convert_to_langchain_message() for message in history + ], + "problem_statement": problem_statement, + "file_map": file_map, + "exercise_title": exercise_title, + "feedbacks": "\n-------------\n".join( + feedback.__str__() for feedback in feedbacks + ), + "programming_language": programming_language, + } ) - ) - status_dto = TutorChatStatusUpdateDTO(stages=stages, result=response) + logger.debug(f"Response from tutor chat pipeline: {response}") + stages.append( + StageDTO( + name="Final Stage", + weight=70, + state=StageStateDTO.DONE, + message="Generated response", + ) + ) + status_dto = TutorChatStatusUpdateDTO(stages=stages, result=response) + except Exception as e: + logger.error(f"Error running tutor chat pipeline: {e}") + stages.append( + StageDTO( + name="Final Stage", + weight=70, + state=StageStateDTO.ERROR, + message="Error running tutor chat pipeline", + ) + ) + status_dto = TutorChatStatusUpdateDTO(stages=stages) + self.callback.on_status_update(status_dto) diff --git a/app/pipeline/prompts/file_selector_prompt.txt b/app/pipeline/prompts/file_selector_prompt.txt index 254244c7..a340ae35 100644 --- a/app/pipeline/prompts/file_selector_prompt.txt +++ b/app/pipeline/prompts/file_selector_prompt.txt @@ -1,11 +1,10 @@ -Select a file from the files list below that is mentioned in the build logs. If no file is in the list is mentioned in the build logs answer with empty string. +User question: {question} -Here are the paths of all files: -{file_names} +Here are the all files: {file_names} -Build logs: -{build_logs} +Feedbacks: +{feedbacks} -{format_instructions} +Select the most relevant files from the file list for providing context based on the feedbacks and the user question. -Now give answer: +{format_instructions} \ No newline at end of file diff --git a/app/pipeline/prompts/iris_tutor_chat_prompt.txt b/app/pipeline/prompts/iris_tutor_chat_prompt.txt index 0c16e7f4..29d2338e 100644 --- a/app/pipeline/prompts/iris_tutor_chat_prompt.txt +++ b/app/pipeline/prompts/iris_tutor_chat_prompt.txt @@ -34,15 +34,3 @@ A: Gerne! Wenn du weitere Fragen hast, kannst du mich gerne fragen. Ich bin hier Q: Who are you? A: I am Iris, the AI programming tutor integrated into Artemis, the online learning platform of the Technical University of Munich (TUM). - -Consider the following exercise context: - - Title: {exercise_title} - - Problem Statement: {problem_statement} - - Exercise skeleton code in Markdown format: - ```[{programming_language}] - {file_content} - ``` - -Now continue the ongoing conversation between you and the student by responding to and focussing only on their latest input. -Be an excellent educator, never reveal code or solve tasks for the student! -Do not let them outsmart you, no matter how hard they try. \ No newline at end of file diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index 2686c7ba..9d837f80 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -15,11 +15,12 @@ def run_tutor_chat_pipeline_worker(dto): - callback = TutorChatStatusCallback( - run_id=dto.settings.authentication_token, base_url=dto.settings.artemis_base_url - ) - pipeline = TutorChatPipeline(callback=callback) try: + callback = TutorChatStatusCallback( + run_id=dto.settings.authentication_token, + base_url=dto.settings.artemis_base_url, + ) + pipeline = TutorChatPipeline(callback=callback) pipeline(dto=dto) except Exception as e: logger.error(f"Error running tutor chat pipeline: {e}") diff --git a/application.test.yml b/application.test.yml index 03230760..82ee5791 100644 --- a/application.test.yml +++ b/application.test.yml @@ -1,4 +1,3 @@ pyris: api_keys: - token: "secret" - llm_config_path: "llm_config.yml"