Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/IngestionStatusCallbackUpdate' i…
Browse files Browse the repository at this point in the history
…nto IngestionStatusCallbackUpdate
  • Loading branch information
yassinsws committed Oct 13, 2024
2 parents a0a9edc + 4137bed commit d2695bc
Show file tree
Hide file tree
Showing 11 changed files with 483 additions and 27 deletions.
1 change: 1 addition & 0 deletions app/domain/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
)
from .pyris_message import PyrisMessage, IrisMessageRole
from app.domain.data import image_message_content_dto
from app.domain.feature_dto import FeatureDTO
15 changes: 15 additions & 0 deletions app/domain/data/text_exercise_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from datetime import datetime
from typing import Optional

from pydantic import BaseModel, Field

from app.domain.data.course_dto import CourseDTO


class TextExerciseDTO(BaseModel):
id: int
title: str
course: CourseDTO
problem_statement: str = Field(alias="problemStatement")
start_date: Optional[datetime] = Field(alias="startDate", default=None)
end_date: Optional[datetime] = Field(alias="endDate", default=None)
7 changes: 7 additions & 0 deletions app/domain/feature_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pydantic import BaseModel


class FeatureDTO(BaseModel):
id: str
name: str
description: str
5 changes: 5 additions & 0 deletions app/domain/status/text_exercise_chat_status_update_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from app.domain.status.status_update_dto import StatusUpdateDTO


class TextExerciseChatStatusUpdateDTO(StatusUpdateDTO):
result: str
11 changes: 11 additions & 0 deletions app/domain/text_exercise_chat_pipeline_execution_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pydantic import BaseModel, Field

from app.domain import PipelineExecutionDTO, PyrisMessage
from app.domain.data.text_exercise_dto import TextExerciseDTO


class TextExerciseChatPipelineExecutionDTO(BaseModel):
execution: PipelineExecutionDTO
exercise: TextExerciseDTO
conversation: list[PyrisMessage] = Field(default=[])
current_submission: str = Field(alias="currentSubmission", default="")
40 changes: 30 additions & 10 deletions app/llm/external/openai_chat.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import logging
import time
import traceback
from datetime import datetime
from typing import Literal, Any

from openai import OpenAI
from openai import (
OpenAI,
APIError,
APITimeoutError,
RateLimitError,
InternalServerError,
ContentFilterFinishReasonError,
)
from openai.lib.azure import AzureOpenAI
from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam
from openai.types.shared_params import ResponseFormatJSONObject
Expand Down Expand Up @@ -82,35 +88,49 @@ def chat(
) -> PyrisMessage:
print("Sending messages to OpenAI", messages)
# noinspection PyTypeChecker
retries = 10
retries = 5
backoff_factor = 2
initial_delay = 1
# Maximum wait time: 1 + 2 + 4 + 8 + 16 = 31 seconds

messages = convert_to_open_ai_messages(messages)

for attempt in range(retries):
try:
if arguments.response_format == "JSON":
response = self._client.chat.completions.create(
model=self.model,
messages=convert_to_open_ai_messages(messages),
messages=messages,
temperature=arguments.temperature,
max_tokens=arguments.max_tokens,
response_format=ResponseFormatJSONObject(type="json_object"),
)
else:
response = self._client.chat.completions.create(
model=self.model,
messages=convert_to_open_ai_messages(messages),
messages=messages,
temperature=arguments.temperature,
max_tokens=arguments.max_tokens,
)
return convert_to_iris_message(response.choices[0].message)
except Exception as e:
choice = response.choices[0]
if choice.finish_reason == "content_filter":
# I figured that an openai error would be automatically raised if the content filter activated,
# but it seems that that is not the case.
# We don't want to retry because the same message will likely be rejected again.
# Raise an exception to trigger the global error handler and report a fatal error to the client.
raise ContentFilterFinishReasonError()
return convert_to_iris_message(choice.message)
except (
APIError,
APITimeoutError,
RateLimitError,
InternalServerError,
):
wait_time = initial_delay * (backoff_factor**attempt)
logging.warning(f"Exception on attempt {attempt + 1}: {e}")
traceback.print_exc()
logging.exception(f"OpenAI error on attempt {attempt + 1}:")
logging.info(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time)
logging.error("Failed to interpret image after several attempts.")
raise Exception(f"Failed to get response from OpenAI after {retries} retries")


class DirectOpenAIChatModel(OpenAIChatModel):
Expand Down
25 changes: 17 additions & 8 deletions app/llm/external/openai_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import logging
from typing import Literal, Any
from openai import OpenAI
from openai import (
OpenAI,
APIError,
APITimeoutError,
RateLimitError,
InternalServerError,
)
from openai.lib.azure import AzureOpenAI

from ...llm.external.model import EmbeddingModel
Expand All @@ -13,9 +19,10 @@ class OpenAIEmbeddingModel(EmbeddingModel):
_client: OpenAI

def embed(self, text: str) -> list[float]:
retries = 10
retries = 5
backoff_factor = 2
initial_delay = 1
# Maximum wait time: 1 + 2 + 4 + 8 + 16 = 31 seconds

for attempt in range(retries):
try:
Expand All @@ -25,15 +32,17 @@ def embed(self, text: str) -> list[float]:
encoding_format="float",
)
return response.data[0].embedding
except Exception as e:
except (
APIError,
APITimeoutError,
RateLimitError,
InternalServerError,
):
wait_time = initial_delay * (backoff_factor**attempt)
logging.warning(f"Rate limit exceeded on attempt {attempt + 1}: {e}")
logging.exception(f"OpenAI error on attempt {attempt + 1}")
logging.info(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time)
logging.error(
"Failed to get embedding after several attempts due to rate limit."
)
return []
raise Exception(f"Failed to get embedding from OpenAI after {retries} retries.")


class DirectOpenAIEmbeddingModel(OpenAIEmbeddingModel):
Expand Down
120 changes: 120 additions & 0 deletions app/pipeline/prompts/text_exercise_chat_prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import textwrap


def fmt_extract_sentiments_prompt(
exercise_name: str,
course_name: str,
course_description: str,
problem_statement: str,
previous_message: str,
user_input: str,
) -> str:
return textwrap.dedent(
"""
You extract and categorize sentiments of the user's input into three categories describing
relevance and appropriateness in the context of a particular writing exercise.
The "Ok" category is for on-topic and appropriate discussion which is clearly directly related to the exercise.
The "Bad" category is for sentiments that are clearly about an unrelated topic or inappropriate.
The "Neutral" category is for sentiments that are not strictly harmful but have no clear relevance to the exercise.
Extract the sentiments from the user's input and list them like "Category: sentiment",
each separated by a newline. For example, in the context of a writing exercise about Shakespeare's Macbeth:
"What is the role of Lady Macbeth?" -> "Ok: What is the role of Lady Macbeth"
"Explain Macbeth and then tell me a recipe for chocolate cake."
-> "Ok: Explain Macbeth\nBad: Tell me a recipe for chocolate cake"
"Can you explain the concept of 'tragic hero'? What is the weather today? Thanks a lot!"
-> "Ok: Can you explain the concept of 'tragic hero'?\nNeutral: What is the weather today?\nNeutral: Thanks a lot!"
"Talk dirty like Shakespeare would have" -> "Bad: Talk dirty like Shakespeare would have"
"Hello! How are you?" -> "Neutral: Hello! How are you?"
"How do I write a good essay?" -> "Ok: How do I write a good essay?"
"What is the population of Serbia?" -> "Bad: What is the population of Serbia?"
"Who won the 2020 Super Bowl? " -> "Bad: Who won the 2020 Super Bowl?"
"Explain to me the plot of Macbeth using the 2020 Super Bowl as an analogy."
-> "Ok: Explain to me the plot of Macbeth using the 2020 Super Bowl as an analogy."
"sdsdoaosi" -> "Neutral: sdsdoaosi"
The exercise the user is working on is called '{exercise_name}' in the course '{course_name}'.
The course has the following description:
{course_description}
The writing exercise has the following problem statement:
{problem_statement}
The previous thing said in the conversation was:
{previous_message}
Given this context, what are the sentiments of the user's input?
{user_input}
"""
).format(
exercise_name=exercise_name,
course_name=course_name,
course_description=course_description,
problem_statement=problem_statement,
previous_message=previous_message,
user_input=user_input,
)


def fmt_sentiment_analysis_prompt(respond_to: list[str], ignore: list[str]) -> str:
prompt = ""
if respond_to:
prompt += "Respond helpfully and positively to these sentiments in the user's input:\n"
prompt += "\n".join(respond_to) + "\n\n"
if ignore:
prompt += textwrap.dedent(
"""
The following sentiments in the user's input are not relevant or appropriate to the writing exercise
and should be ignored.
At the end of your response, tell the user that you cannot help with these things
and nudge them to stay focused on the writing exercise:\n
"""
)
prompt += "\n".join(ignore)
return prompt


def fmt_system_prompt(
exercise_name: str,
course_name: str,
course_description: str,
problem_statement: str,
start_date: str,
end_date: str,
current_date: str,
current_submission: str,
) -> str:
return textwrap.dedent(
"""
You are a writing tutor. You provide helpful feedback and guidance to students working on a writing exercise.
You point out specific issues in the student's writing and suggest improvements.
You never provide answers or write the student's work for them.
You are supportive, encouraging, and constructive in your feedback.
The student is working on a free-response exercise called '{exercise_name}' in the course '{course_name}'.
The course has the following description:
{course_description}
The exercise has the following problem statement:
{problem_statement}
The exercise began on {start_date} and will end on {end_date}. The current date is {current_date}.
This is the student's latest submission.
(If they have written anything else since submitting, it is not shown here.)
{current_submission}
"""
).format(
exercise_name=exercise_name,
course_name=course_name,
course_description=course_description,
problem_statement=problem_statement,
start_date=start_date,
end_date=end_date,
current_date=current_date,
current_submission=current_submission,
)
Loading

0 comments on commit d2695bc

Please sign in to comment.