add streaming lad as a replacement for max-delay on streaming mode

speechmatics · Oct 16, 2024 · ed5b55c · ed5b55c
1 parent e47cd2f
commit ed5b55c
Show file tree

Hide file tree

Showing 5 changed files with 17 additions and 2 deletions.
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.0.2
+2.0.3
diff --git a/speechmatics/cli.py b/speechmatics/cli.py
@@ -213,6 +213,7 @@ def get_transcription_config(
         "output_locale",
         "operating_point",
         "max_delay",
+        "streaming_lag",
         "max_delay_mode",
         "diarization",
         "channel_diarization_labels",
@@ -228,6 +229,9 @@ def get_transcription_config(
         "enable_transcription_partials",
     ]:
         config[option] = True if args.get(option) else config.get(option)
+    if config.get("streaming_mode") and config.get("max_delay"):
+        if config.get("streaming_lag"):
+            LOGGER.warning("When using streaming mode only streaming_lag will be used. max_delay isn't used.")
 
     if args.get("volume_threshold") is not None:
         config["audio_filtering_config"] = {

diff --git a/speechmatics/cli_parser.py b/speechmatics/cli_parser.py
@@ -385,7 +385,7 @@ def get_arg_parser():
         "--streaming-mode",
         default=False,
         action="store_true",
-        help="Whether to run the engine in streaming mode. Internal Speechmatics use only.",
+        help="Whether to run the engine in streaming mode.",
     )
     rt_transcribe_command_parser.add_argument(
         "--enable-partials",
@@ -429,6 +429,11 @@ def get_arg_parser():
         type=float,
         help="Maximum acceptable delay before sending a piece of transcript.",
     )
+    rt_transcribe_command_parser.add_argument(
+        "--streaming-lag",
+        type=float,
+        help="Maximum streaming delay before sending a word.",
+    )
     rt_transcribe_command_parser.add_argument(
         "--max-delay-mode",
         default="flexible",

diff --git a/speechmatics/models.py b/speechmatics/models.py
@@ -240,6 +240,9 @@ class TranscriptionConfig(_TranscriptionConfig):
     max_delay: float = None
     """Maximum acceptable delay."""
 
+    streaming_lag: float = None
+    """Maximum acceptable streaming delay."""
+
     max_delay_mode: str = None
     """Determines whether the threshold specified in max_delay can be exceeded
     if a potential entity is detected. Flexible means if a potential entity

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -613,6 +613,8 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
         "none",
         "--max-delay",
         "5.0",
+        "--streaming-lag", # we shouldn't have max-delay and streaming lag for this test is ok
+        "0.8",
         "--max-delay-mode",
         "fixed",
         "--chunk-size",
@@ -654,6 +656,7 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
     )  # noqa
     assert msg["transcription_config"]["diarization"] == "none"
     assert msg["transcription_config"]["max_delay"] == 5.0
+    assert msg["transcription_config"]["streaming_lag"] == 0.8
     assert msg["transcription_config"]["max_delay_mode"] == "fixed"
     assert msg["transcription_config"].get("operating_point") is None
     assert (