hugobloem · hugobloem · Nov 23, 2024 · Nov 23, 2024
diff --git a/wyoming_microsoft_tts/__main__.py b/wyoming_microsoft_tts/__main__.py
@@ -96,12 +96,18 @@ async def main() -> None:
     _LOGGER.debug("Arguments parsed successfully.")
 
     # Load voice info
-    voices_info = get_voices(
-        args.download_dir,
-        update_voices=args.update_voices,
-        region=args.service_region,
-        key=args.subscription_key,
-    )
+    try:
+        _LOGGER.info("Starting voices loading process.")
+        voices_info = get_voices(
+            args.download_dir,
+            update_voices=args.update_voices,
+            region=args.service_region,
+            key=args.subscription_key,
+        )
+        _LOGGER.info("Voices loaded successfully.")
+    except Exception as e:
+        _LOGGER.error(f"Failed to load voices: {e}")
+        return
 
     # Resolve aliases for backwards compatibility with old voice names
     aliases_info: dict[str, Any] = {}
@@ -166,13 +172,16 @@ async def main() -> None:
     server = AsyncServer.from_uri(args.uri)
 
     _LOGGER.info("Ready")
-    await server.run(
-        partial(
-            MicrosoftEventHandler,
-            wyoming_info,
-            args,
+    try:
+        await server.run(
+            partial(
+                MicrosoftEventHandler,
+                wyoming_info,
+                args,
+            )
         )
-    )
+    except Exception as e:
+        _LOGGER.error(f"An error occurred while running the server: {e}")
 
 
 # -----------------------------------------------------------------------------

diff --git a/wyoming_microsoft_tts/handler.py b/wyoming_microsoft_tts/handler.py
@@ -67,41 +67,51 @@ async def handle_event(self, event: Event) -> bool:
             if not has_punctuation:
                 text = text + self.cli_args.auto_punctuation[0]
 
-        output_path = self.microsoft_tts.synthesize(text=synthesize.text, voice=voice)
-
-        wav_file: wave.Wave_read = wave.open(output_path, "rb")
-        with wav_file:
-            rate = wav_file.getframerate()
-            width = wav_file.getsampwidth()
-            channels = wav_file.getnchannels()
-
-            await self.write_event(
-                AudioStart(
-                    rate=rate,
-                    width=width,
-                    channels=channels,
-                ).event(),
-            )
-
-            # Audio
-            audio_bytes = wav_file.readframes(wav_file.getnframes())
-            bytes_per_sample = width * channels
-            bytes_per_chunk = bytes_per_sample * self.cli_args.samples_per_chunk
-            num_chunks = int(math.ceil(len(audio_bytes) / bytes_per_chunk))
-
-            # Split into chunks
-            for i in range(num_chunks):
-                offset = i * bytes_per_chunk
-                chunk = audio_bytes[offset : offset + bytes_per_chunk]
+        _LOGGER.debug("Synthesizing: %s", text)
+        try:
+            output_path = self.microsoft_tts.synthesize(text=text, voice=voice)
+        except Exception as e:
+            _LOGGER.error("Failed to synthesize text: %s", e)
+            return False
+
+        _LOGGER.debug("Synthesized text")
+        try:
+            wav_file: wave.Wave_read = wave.open(output_path, "rb")
+            with wav_file:
+                rate = wav_file.getframerate()
+                width = wav_file.getsampwidth()
+                channels = wav_file.getnchannels()
+
                 await self.write_event(
-                    AudioChunk(
-                        audio=chunk,
+                    AudioStart(
                         rate=rate,
                         width=width,
                         channels=channels,
                     ).event(),
                 )
 
+                # Audio
+                audio_bytes = wav_file.readframes(wav_file.getnframes())
+                bytes_per_sample = width * channels
+                bytes_per_chunk = bytes_per_sample * self.cli_args.samples_per_chunk
+                num_chunks = int(math.ceil(len(audio_bytes) / bytes_per_chunk))
+
+                # Split into chunks
+                for i in range(num_chunks):
+                    offset = i * bytes_per_chunk
+                    chunk = audio_bytes[offset : offset + bytes_per_chunk]
+                    await self.write_event(
+                        AudioChunk(
+                            audio=chunk,
+                            rate=rate,
+                            width=width,
+                            channels=channels,
+                        ).event(),
+                    )
+        except Exception as e:
+            _LOGGER.error("Failed to send audio: %s", e)
+            return False
+
         await self.write_event(AudioStop().event())
         _LOGGER.debug("Completed request")