Skip to content

Commit

Permalink
feat: audio transcription
Browse files Browse the repository at this point in the history
  • Loading branch information
Luisotee committed Nov 27, 2024
1 parent 868d2b7 commit 41fc958
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 7 deletions.
114 changes: 107 additions & 7 deletions apps/ai_api/eda_ai_api/api/routes/supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
from proposal_writer.crew import ProposalWriterCrew

from eda_ai_api.models.supervisor import SupervisorRequest, SupervisorResponse
from eda_ai_api.utils.audio_converter import convert_ogg
from eda_ai_api.utils.transcriber import transcribe_audio

import tempfile

router = APIRouter()

Expand Down Expand Up @@ -69,16 +73,112 @@
async def supervisor_route(
message: Optional[str] = Form(None), audio: Optional[UploadFile] = File(None)
) -> SupervisorResponse:
ALLOWED_FORMATS = {
"audio/mpeg": "mp3",
"audio/mp4": "mp4",
"audio/mpeg": "mpeg",
"audio/mpga": "mpga",
"audio/mp4": "m4a",
"audio/wav": "wav",
"audio/webm": "webm",
"audio/ogg": "ogg",
}

def detect_content_type(file: UploadFile) -> Optional[str]:
"""Helper to detect content type from file"""
if hasattr(file, "content_type") and file.content_type:
return file.content_type

if hasattr(file, "mime_type") and file.mime_type:
return file.mime_type

ext = os.path.splitext(file.filename)[1].lower()
return {
".mp3": "audio/mpeg",
".mp4": "audio/mp4",
".mpeg": "audio/mpeg",
".mpga": "audio/mpga",
".m4a": "audio/mp4",
".wav": "audio/wav",
".webm": "audio/webm",
".ogg": "audio/ogg",
}.get(ext)

try:
if audio:
if audio.content_type != "audio/ogg":
return SupervisorResponse(
result="Error: Only .ogg audio files are supported"
)
content_type = detect_content_type(audio)
content = await audio.read()

try:
audio_path = ""
# Default to mp3 if content type detection failed
if not content_type:
content_type = "audio/mpeg"

if content_type == "audio/ogg":
audio_path = convert_ogg(content, output_format="mp3")
else:
with tempfile.NamedTemporaryFile(
suffix=f".{ALLOWED_FORMATS.get(content_type, 'mp3')}",
delete=False,
) as temp_file:
temp_file.write(content)
audio_path = temp_file.name

transcription = transcribe_audio(audio_path)
print("\n==================================================")
print(f" TRANSCRIPTION: {transcription}")
print("==================================================\n")

if os.path.exists(audio_path):
os.unlink(audio_path)

# Process transcription through the router chain
decision = router_chain.run(message=transcription).strip().lower()

# Continue with existing decision handling logic...
if decision == "discovery":
topics_raw = topic_chain.run(message=transcription)
topics = [t.strip() for t in topics_raw.split(",") if t.strip()][:5]
if not topics:
topics = ["AI", "Technology"]
result = (
OpportunityFinderCrew()
.crew()
.kickoff(inputs={"topics": ", ".join(topics)})
)

elif decision == "proposal":
extracted = proposal_chain.run(message=transcription).split("|")
community_project = (
extracted[0].strip() if len(extracted) > 0 else "unknown"
)
grant_call = (
extracted[1].strip() if len(extracted) > 1 else "unknown"
)
result = (
ProposalWriterCrew(
community_project=community_project, grant_call=grant_call
)
.crew()
.kickoff()
)

elif decision == "heartbeat":
result = {"is_alive": True}

elif decision == "onboarding":
result = OnboardingCrew().crew().kickoff()

else:
result = {"error": f"Unknown decision type: {decision}"}

return SupervisorResponse(result=str(result))

# Here you would add whisper transcription later
# For now just acknowledge we received the audio
return SupervisorResponse(result=f"Received audio file: {audio.filename}")
except Exception as e:
if os.path.exists(audio_path):
os.unlink(audio_path)
return SupervisorResponse(result=f"Error processing audio: {str(e)}")

elif message:
# Existing message handling logic
Expand Down
57 changes: 57 additions & 0 deletions apps/ai_api/eda_ai_api/utils/audio_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import os
from pathlib import Path
import tempfile
from typing import Literal
import ffmpeg

AudioFormat = Literal["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]


def convert_ogg(
input_file: str | Path | bytes,
output_format: AudioFormat = "mp3",
output_path: str | Path | None = None,
) -> str:
"""
Convert OGG audio file to another format using ffmpeg.
Args:
input_file: Path to input OGG file or bytes content
output_format: Desired output format
output_path: Optional output path. If None, uses a temporary file
Returns:
str: Path to the converted audio file
"""
try:
# Handle bytes input by writing to temp file first
if isinstance(input_file, bytes):
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as temp_ogg:
temp_ogg.write(input_file)
input_file = temp_ogg.name

# If no output path specified, create temp file
if output_path is None:
temp_dir = tempfile.gettempdir()
output_path = os.path.join(temp_dir, f"converted_audio.{output_format}")

# Ensure output directory exists
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)

# Convert audio using ffmpeg
stream = ffmpeg.input(str(input_file))
stream = ffmpeg.output(stream, str(output_path))
ffmpeg.run(
stream, overwrite_output=True, capture_stdout=True, capture_stderr=True
)

# Clean up temp input file if we created one
if isinstance(input_file, str) and input_file.startswith(tempfile.gettempdir()):
os.unlink(input_file)

return str(output_path)

except ffmpeg.Error as e:
raise RuntimeError(f"FFmpeg error: {e.stderr.decode()}") from e
except Exception as e:
raise RuntimeError(f"Error converting audio: {str(e)}") from e
34 changes: 34 additions & 0 deletions apps/ai_api/eda_ai_api/utils/transcriber.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
from groq import Groq
from loguru import logger


def transcribe_audio(audio_path: str, language: str = "en") -> str:
"""
Transcribe audio file using Groq's Whisper API.
Args:
audio_path: Path to the audio file
language: Language code (default: "en")
Returns:
str: Transcribed text
"""
try:
client = Groq()

with open(audio_path, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(audio_path, file.read()),
model="whisper-large-v3-turbo",
response_format="json",
# language=language,
temperature=0.0,
)

logger.info(f"Transcription result: {transcription.text}")
return transcription.text

except Exception as e:
logger.error(f"Error transcribing audio: {str(e)}")
raise RuntimeError(f"Transcription failed: {str(e)}") from e

0 comments on commit 41fc958

Please sign in to comment.