diff --git a/.changeset/olive-kangaroos-yawn.md b/.changeset/olive-kangaroos-yawn.md new file mode 100644 index 000000000..553940297 --- /dev/null +++ b/.changeset/olive-kangaroos-yawn.md @@ -0,0 +1,5 @@ +--- +"livekit-plugins-deepgram": patch +--- + +Added support for custom deepgram base url diff --git a/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/stt.py b/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/stt.py index c163aeb3a..ccb35b5cf 100644 --- a/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/stt.py +++ b/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/stt.py @@ -42,7 +42,6 @@ from .models import DeepgramLanguages, DeepgramModels BASE_URL = "https://api.deepgram.com/v1/listen" -BASE_URL_WS = "wss://api.deepgram.com/v1/listen" # This is the magic number during testing that we use to determine if a frame is loud enough @@ -126,6 +125,7 @@ def __init__( profanity_filter: bool = False, api_key: str | None = None, http_session: aiohttp.ClientSession | None = None, + base_url: str = BASE_URL, energy_filter: AudioEnergyFilter | bool = False, ) -> None: """ @@ -140,6 +140,7 @@ def __init__( streaming=True, interim_results=interim_results ) ) + self._base_url = base_url api_key = api_key or os.environ.get("DEEPGRAM_API_KEY") if api_key is None: @@ -210,7 +211,7 @@ async def _recognize_impl( try: async with self._ensure_session().post( - url=_to_deepgram_url(recognize_config), + url=_to_deepgram_url(recognize_config, self._base_url, websocket=False), data=rtc.combine_audio_frames(buffer).to_wav_bytes(), headers={ "Authorization": f"Token {self._api_key}", @@ -253,6 +254,7 @@ def stream( opts=config, api_key=self._api_key, http_session=self._ensure_session(), + base_url=self._base_url, ) self._active_streams.add(stream) return stream @@ -290,6 +292,7 @@ def __init__( conn_options: APIConnectOptions, api_key: str, http_session: aiohttp.ClientSession, + base_url: str, ) -> None: super().__init__( stt=stt, conn_options=conn_options, sample_rate=opts.sample_rate @@ -301,6 +304,7 @@ def __init__( self._opts = opts self._api_key = api_key self._session = http_session + self._base_url = base_url self._speaking = False self._audio_duration_collector = PeriodicCollector( callback=self._on_audio_duration_report, @@ -695,7 +699,7 @@ def prerecorded_transcription_to_speech_event( ) -def _to_deepgram_url(opts: dict, *, websocket: bool = False) -> str: +def _to_deepgram_url(opts: dict, base_url: str, *, websocket: bool) -> str: if opts.get("keywords"): # convert keywords to a list of "keyword:intensifier" opts["keywords"] = [ @@ -704,5 +708,11 @@ def _to_deepgram_url(opts: dict, *, websocket: bool = False) -> str: # lowercase bools opts = {k: str(v).lower() if isinstance(v, bool) else v for k, v in opts.items()} - base_url = BASE_URL_WS if websocket else BASE_URL + + if websocket and base_url.startswith("http"): + base_url = base_url.replace("http", "ws", 1) + + elif not websocket and base_url.startswith("ws"): + base_url = base_url.replace("ws", "http", 1) + return f"{base_url}?{urlencode(opts, doseq=True)}"