diff --git a/apps/ai_api/eda_ai_api/api/routes/supervisor.py b/apps/ai_api/eda_ai_api/api/routes/supervisor.py index e80b185..69f3e7d 100644 --- a/apps/ai_api/eda_ai_api/api/routes/supervisor.py +++ b/apps/ai_api/eda_ai_api/api/routes/supervisor.py @@ -10,6 +10,10 @@ from proposal_writer.crew import ProposalWriterCrew from eda_ai_api.models.supervisor import SupervisorRequest, SupervisorResponse +from eda_ai_api.utils.audio_converter import convert_ogg +from eda_ai_api.utils.transcriber import transcribe_audio + +import tempfile router = APIRouter() @@ -69,16 +73,112 @@ async def supervisor_route( message: Optional[str] = Form(None), audio: Optional[UploadFile] = File(None) ) -> SupervisorResponse: + ALLOWED_FORMATS = { + "audio/mpeg": "mp3", + "audio/mp4": "mp4", + "audio/mpeg": "mpeg", + "audio/mpga": "mpga", + "audio/mp4": "m4a", + "audio/wav": "wav", + "audio/webm": "webm", + "audio/ogg": "ogg", + } + + def detect_content_type(file: UploadFile) -> Optional[str]: + """Helper to detect content type from file""" + if hasattr(file, "content_type") and file.content_type: + return file.content_type + + if hasattr(file, "mime_type") and file.mime_type: + return file.mime_type + + ext = os.path.splitext(file.filename)[1].lower() + return { + ".mp3": "audio/mpeg", + ".mp4": "audio/mp4", + ".mpeg": "audio/mpeg", + ".mpga": "audio/mpga", + ".m4a": "audio/mp4", + ".wav": "audio/wav", + ".webm": "audio/webm", + ".ogg": "audio/ogg", + }.get(ext) + try: if audio: - if audio.content_type != "audio/ogg": - return SupervisorResponse( - result="Error: Only .ogg audio files are supported" - ) + content_type = detect_content_type(audio) + content = await audio.read() + + try: + audio_path = "" + # Default to mp3 if content type detection failed + if not content_type: + content_type = "audio/mpeg" + + if content_type == "audio/ogg": + audio_path = convert_ogg(content, output_format="mp3") + else: + with tempfile.NamedTemporaryFile( + suffix=f".{ALLOWED_FORMATS.get(content_type, 'mp3')}", + delete=False, + ) as temp_file: + temp_file.write(content) + audio_path = temp_file.name + + transcription = transcribe_audio(audio_path) + print("\n==================================================") + print(f" TRANSCRIPTION: {transcription}") + print("==================================================\n") + + if os.path.exists(audio_path): + os.unlink(audio_path) + + # Process transcription through the router chain + decision = router_chain.run(message=transcription).strip().lower() + + # Continue with existing decision handling logic... + if decision == "discovery": + topics_raw = topic_chain.run(message=transcription) + topics = [t.strip() for t in topics_raw.split(",") if t.strip()][:5] + if not topics: + topics = ["AI", "Technology"] + result = ( + OpportunityFinderCrew() + .crew() + .kickoff(inputs={"topics": ", ".join(topics)}) + ) + + elif decision == "proposal": + extracted = proposal_chain.run(message=transcription).split("|") + community_project = ( + extracted[0].strip() if len(extracted) > 0 else "unknown" + ) + grant_call = ( + extracted[1].strip() if len(extracted) > 1 else "unknown" + ) + result = ( + ProposalWriterCrew( + community_project=community_project, grant_call=grant_call + ) + .crew() + .kickoff() + ) + + elif decision == "heartbeat": + result = {"is_alive": True} + + elif decision == "onboarding": + result = OnboardingCrew().crew().kickoff() + + else: + result = {"error": f"Unknown decision type: {decision}"} + + return SupervisorResponse(result=str(result)) - # Here you would add whisper transcription later - # For now just acknowledge we received the audio - return SupervisorResponse(result=f"Received audio file: {audio.filename}") + except Exception as e: + if os.path.exists(audio_path): + os.unlink(audio_path) + return SupervisorResponse(result=f"Error processing audio: {str(e)}") elif message: # Existing message handling logic diff --git a/apps/ai_api/eda_ai_api/utils/audio_converter.py b/apps/ai_api/eda_ai_api/utils/audio_converter.py new file mode 100644 index 0000000..1f2f3d3 --- /dev/null +++ b/apps/ai_api/eda_ai_api/utils/audio_converter.py @@ -0,0 +1,57 @@ +import os +from pathlib import Path +import tempfile +from typing import Literal +import ffmpeg + +AudioFormat = Literal["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"] + + +def convert_ogg( + input_file: str | Path | bytes, + output_format: AudioFormat = "mp3", + output_path: str | Path | None = None, +) -> str: + """ + Convert OGG audio file to another format using ffmpeg. + + Args: + input_file: Path to input OGG file or bytes content + output_format: Desired output format + output_path: Optional output path. If None, uses a temporary file + + Returns: + str: Path to the converted audio file + """ + try: + # Handle bytes input by writing to temp file first + if isinstance(input_file, bytes): + with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as temp_ogg: + temp_ogg.write(input_file) + input_file = temp_ogg.name + + # If no output path specified, create temp file + if output_path is None: + temp_dir = tempfile.gettempdir() + output_path = os.path.join(temp_dir, f"converted_audio.{output_format}") + + # Ensure output directory exists + os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True) + + # Convert audio using ffmpeg + stream = ffmpeg.input(str(input_file)) + stream = ffmpeg.output(stream, str(output_path)) + ffmpeg.run( + stream, overwrite_output=True, capture_stdout=True, capture_stderr=True + ) + + # Clean up temp input file if we created one + if isinstance(input_file, str) and input_file.startswith(tempfile.gettempdir()): + os.unlink(input_file) + + return str(output_path) + + except ffmpeg.Error as e: + raise RuntimeError(f"FFmpeg error: {e.stderr.decode()}") from e + except Exception as e: + raise RuntimeError(f"Error converting audio: {str(e)}") from e diff --git a/apps/ai_api/eda_ai_api/utils/transcriber.py b/apps/ai_api/eda_ai_api/utils/transcriber.py new file mode 100644 index 0000000..4fc9685 --- /dev/null +++ b/apps/ai_api/eda_ai_api/utils/transcriber.py @@ -0,0 +1,34 @@ +import os +from groq import Groq +from loguru import logger + + +def transcribe_audio(audio_path: str, language: str = "en") -> str: + """ + Transcribe audio file using Groq's Whisper API. + + Args: + audio_path: Path to the audio file + language: Language code (default: "en") + + Returns: + str: Transcribed text + """ + try: + client = Groq() + + with open(audio_path, "rb") as file: + transcription = client.audio.transcriptions.create( + file=(audio_path, file.read()), + model="whisper-large-v3-turbo", + response_format="json", + # language=language, + temperature=0.0, + ) + + logger.info(f"Transcription result: {transcription.text}") + return transcription.text + + except Exception as e: + logger.error(f"Error transcribing audio: {str(e)}") + raise RuntimeError(f"Transcription failed: {str(e)}") from e