Skip to content

Commit

Permalink
add streaming lad as a replacement for max-delay on streaming mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Georgios Hadjiharalambous committed Oct 16, 2024
1 parent e47cd2f commit ed5b55c
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 2 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.0.2
2.0.3
4 changes: 4 additions & 0 deletions speechmatics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def get_transcription_config(
"output_locale",
"operating_point",
"max_delay",
"streaming_lag",
"max_delay_mode",
"diarization",
"channel_diarization_labels",
Expand All @@ -228,6 +229,9 @@ def get_transcription_config(
"enable_transcription_partials",
]:
config[option] = True if args.get(option) else config.get(option)
if config.get("streaming_mode") and config.get("max_delay"):
if config.get("streaming_lag"):
LOGGER.warning("When using streaming mode only streaming_lag will be used. max_delay isn't used.")

if args.get("volume_threshold") is not None:
config["audio_filtering_config"] = {
Expand Down
7 changes: 6 additions & 1 deletion speechmatics/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def get_arg_parser():
"--streaming-mode",
default=False,
action="store_true",
help="Whether to run the engine in streaming mode. Internal Speechmatics use only.",
help="Whether to run the engine in streaming mode.",
)
rt_transcribe_command_parser.add_argument(
"--enable-partials",
Expand Down Expand Up @@ -429,6 +429,11 @@ def get_arg_parser():
type=float,
help="Maximum acceptable delay before sending a piece of transcript.",
)
rt_transcribe_command_parser.add_argument(
"--streaming-lag",
type=float,
help="Maximum streaming delay before sending a word.",
)
rt_transcribe_command_parser.add_argument(
"--max-delay-mode",
default="flexible",
Expand Down
3 changes: 3 additions & 0 deletions speechmatics/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,9 @@ class TranscriptionConfig(_TranscriptionConfig):
max_delay: float = None
"""Maximum acceptable delay."""

streaming_lag: float = None
"""Maximum acceptable streaming delay."""

max_delay_mode: str = None
"""Determines whether the threshold specified in max_delay can be exceeded
if a potential entity is detected. Flexible means if a potential entity
Expand Down
3 changes: 3 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,8 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
"none",
"--max-delay",
"5.0",
"--streaming-lag", # we shouldn't have max-delay and streaming lag for this test is ok
"0.8",
"--max-delay-mode",
"fixed",
"--chunk-size",
Expand Down Expand Up @@ -654,6 +656,7 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
) # noqa
assert msg["transcription_config"]["diarization"] == "none"
assert msg["transcription_config"]["max_delay"] == 5.0
assert msg["transcription_config"]["streaming_lag"] == 0.8
assert msg["transcription_config"]["max_delay_mode"] == "fixed"
assert msg["transcription_config"].get("operating_point") is None
assert (
Expand Down

0 comments on commit ed5b55c

Please sign in to comment.