diff --git a/README.md b/README.md index 7f501e1..0932c9a 100644 --- a/README.md +++ b/README.md @@ -152,6 +152,8 @@ python3 ./stream-translator-gpt/translator.py | `--cookies` | | Used to open member-only stream, this parameter will be passed directly to yt-dlp. | | `--input_proxy` | | Use the specified HTTP/HTTPS/SOCKS proxy for yt-dlp, e.g. http://127.0.0.1:7890. | | `--device_index` | | The index of the device that needs to be recorded. If not set, the system default recording device will be used. | +| `--print_all_devices` | | Print all audio devices info then exit. | +| `--device_recording_interval` | 0.5 | The shorter the recording interval, the lower the latency, but it will increase CPU usage. It is recommended to set it between 0.1 and 1.0. | | **Audio Slicing Options** | | `--frame_duration` | 0.1 | The unit that processes live streaming data in seconds, should be >= 0.03 | | `--continuous_no_speech_threshold` | 0.5 | Slice if there is no speech for a continuous period in second. | diff --git a/README_CN.md b/README_CN.md index e49cbc2..14dcdc6 100644 --- a/README_CN.md +++ b/README_CN.md @@ -150,6 +150,8 @@ python3 ./stream-translator-gpt/translator.py | `--cookies` | | 用于打开仅会员可看的直播流,此参数将直接传递给yt-dlp。 | | `--input_proxy` | | 为 yt-dlp 使用指定的 HTTP/HTTPS/SOCKS 代理,例如 http://127.0.0.1:7890。 | | `--device_index` | | 音频输入设备的index。如果未设置,则使用系统默认音频输入设备。 | +| `--print_all_devices` | | 打印所有音频设备信息然后退出。 | +| `--device_recording_interval` | 0.5 | 录音间隔越短,延迟越低,但会增加CPU使用率。建议将其设置在0.1到1.0之间。 | | **音频切割选项** | | `--frame_duration` | 0.1 | 处理实时流数据的单位(以秒为单位),需大于等于0.03。 | | `--continuous_no_speech_threshold` | 0.5 | 如果连续一段时间内没有语音,则进行切片(以秒为单位)。 | diff --git a/stream_translator_gpt/audio_getter.py b/stream_translator_gpt/audio_getter.py index b2e56a4..b6cb540 100644 --- a/stream_translator_gpt/audio_getter.py +++ b/stream_translator_gpt/audio_getter.py @@ -124,19 +124,22 @@ def loop(self, output_queue: queue.SimpleQueue[np.array]): class DeviceAudioGetter(LoopWorkerBase): - def __init__(self, device_index: int, frame_duration: float) -> None: + def __init__(self, device_index: int, frame_duration: float, recording_interval: float) -> None: import sounddevice as sd if device_index: sd.default.device[0] = device_index sd.default.dtype[0] = np.float32 self.frame_duration = frame_duration + self.recording_frame_num = max(1, round(recording_interval / frame_duration)) print('Recording device: {}'.format(sd.query_devices(sd.default.device[0])['name'])) def loop(self, output_queue: queue.SimpleQueue[np.array]): import sounddevice as sd while True: - audio = sd.rec(frames=round(SAMPLE_RATE * self.frame_duration), + audio = sd.rec(frames=round(SAMPLE_RATE * self.frame_duration * self.recording_frame_num), samplerate=SAMPLE_RATE, channels=1, blocking=True).flatten() - output_queue.put(audio) + split_audios = np.array_split(audio, self.recording_frame_num) + for split_audio in split_audios: + output_queue.put(split_audio) diff --git a/stream_translator_gpt/translator.py b/stream_translator_gpt/translator.py index bf62302..7a83226 100644 --- a/stream_translator_gpt/translator.py +++ b/stream_translator_gpt/translator.py @@ -21,7 +21,7 @@ def _start_daemon_thread(func, *args, **kwargs): thread.start() -def main(url, format, cookies, input_proxy, device_index, frame_duration, +def main(url, format, cookies, input_proxy, device_index, device_recording_interval, frame_duration, continuous_no_speech_threshold, min_audio_length, max_audio_length, prefix_retention_length, vad_threshold, model, language, use_faster_whisper, use_whisper_api, whisper_filters, openai_api_key, google_api_key, gpt_translation_prompt, @@ -141,6 +141,7 @@ def main(url, format, cookies, input_proxy, device_index, frame_duration, DeviceAudioGetter.work( device_index=device_index, frame_duration=frame_duration, + recording_interval=device_recording_interval, output_queue=getter_to_slicer_queue, ) elif os.path.isabs(url): @@ -196,6 +197,11 @@ def cli(): parser.add_argument('--print_all_devices', action='store_true', help='Print all audio devices info then exit.') + parser.add_argument('--device_recording_interval', + type=float, + default=0.5, + help='The shorter the recording interval, the lower the latency,' + 'but it will increase CPU usage. It is recommended to set it between 0.1 and 1.0.') parser.add_argument('--frame_duration', type=float, default=0.1,