From d6303beebb64ea2e6011af710b550e8de824e8d7 Mon Sep 17 00:00:00 2001 From: Markus Hennerbichler Date: Thu, 29 Feb 2024 13:34:53 +0000 Subject: [PATCH] Add example on how to read and transcribe microphone input (#88) --- examples/README.md | 8 ++- examples/transcribe_from_microphone.py | 71 ++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 examples/transcribe_from_microphone.py diff --git a/examples/README.md b/examples/README.md index c3c3be0..10ba352 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,6 +1,8 @@ # Examples -This folder provides some examples of how the Speechmatics python client can be used to build different systems. The current examples include: +This folder provides some examples of how the Speechmatics python client can be used to build different systems. +Each of the examples should have a separate README with all the necessary steps to get them up and running. +The current examples include: 1. [notification_flow](./notification_flow/README.md) (webhooks) @@ -18,4 +20,6 @@ Demonstrates how to run a websocket server that acts as a proxy to a speechmatic Tools for batch synchronising a local folder of audio into a searchable database of transcriptions. -Each of the examples should have a separate README with all the necessary steps to get them up and running. +5. [Microphone transcription example](./transcribe_from_microphone.py) + +This shows how the `sounddevice` python package can be used to stream audio from a microphone to Speechmatics. diff --git a/examples/transcribe_from_microphone.py b/examples/transcribe_from_microphone.py new file mode 100644 index 0000000..471bff2 --- /dev/null +++ b/examples/transcribe_from_microphone.py @@ -0,0 +1,71 @@ +import speechmatics +import speechmatics.models +import speechmatics.client +import speechmatics.cli +import asyncio +import argparse +import sys +import sounddevice as sd + + +class RawInputStreamWrapper: + def __init__(self, wrapped: sd.RawInputStream): + self.wrapped: sd.RawInputStream = wrapped + + def read(self, frames): + return bytes(self.wrapped.read(frames)[0]) + + +async def transcribe_from_device(device, speechmatics_client, language: str, max_delay): + frame_rate = 44_100 + with sd.RawInputStream( + device=device, channels=1, samplerate=frame_rate, dtype="float32" + ) as stream: + settings = speechmatics.models.AudioSettings( + sample_rate=frame_rate, + encoding="pcm_f32" + ("le" if sys.byteorder == "little" else "be"), + ) + + conf = speechmatics.models.TranscriptionConfig( + language=language, + operating_point="enhanced", + max_delay=max_delay, + enable_partials=True, + enable_entities=True, + ) + await speechmatics_client.run(RawInputStreamWrapper(stream), conf, settings) + + +def main(args): + speechmatics_client = speechmatics.client.WebsocketClient( + connection_settings_or_auth_token=args.auth_token + ) + transcripts = speechmatics.cli.Transcripts(text="", json=[]) + speechmatics.cli.add_printing_handlers(speechmatics_client, transcripts) + + asyncio.run( + transcribe_from_device( + args.device, speechmatics_client, args.language, args.max_delay + ) + ) + + +def int_or_str(text): + try: + return int(text) + except ValueError: + return text + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Speechmatics Microphone Realtime Transcription example" + ) + parser.add_argument( + "-d", "--device", type=int_or_str, help="input device (numeric ID or substring)" + ) + parser.add_argument("-a", "--auth_token", type=str, required=True) + parser.add_argument("-l", "--language", type=str, default="en") + parser.add_argument("-m", "--max_delay", type=float, default=2.0) + + main(parser.parse_args())