diff --git a/README.md b/README.md index 8196284..b78d220 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,6 @@ python3 ./stream-translator-gpt/translator.py | `URL` | | The URL of the stream. If a local file path is filled in, it will be used as input. If fill in "device", the input will be obtained from your PC device. | | `--format` | wa* | Stream format code, this parameter will be passed directly to yt-dlp. | | `--cookies` | | Used to open member-only stream, this parameter will be passed directly to yt-dlp. | -| `--direct_url` | | Set this flag to pass the URL directly to ffmpeg. Otherwise, yt-dlp is used to obtain the stream URL. | | `--device_index` | | The index of the device that needs to be recorded. If not set, the system default recording device will be used. | | **Audio Slicing Options** | | `--frame_duration` | 0.1 | The unit that processes live streaming data in seconds. | diff --git a/README_CN.md b/README_CN.md index 8d0feb5..b46c688 100644 --- a/README_CN.md +++ b/README_CN.md @@ -146,7 +146,6 @@ python3 ./stream-translator-gpt/translator.py | `URL` | | 直播流的URL。如果填写了本地文件路径,它将被用作输入。如果填写"device",输入将从您的PC音频设备获取。 | | `--format` | wa* | 直播流格式代码,此参数将直接传递给yt-dlp。 | | `--cookies` | | 用于打开仅会员可看的直播流,此参数将直接传递给yt-dlp。 | -| `--direct_url` | | 设置此标志以直接将URL传递给ffmpeg。否则,yt-dlp用于获取流URL。 | | `--device_index` | | 音频输入设备的index。如果未设置,则使用系统默认音频输入设备。 | | **音频切割选项** | | `--frame_duration` | 0.1 | 处理实时流数据的单位(以秒为单位)。 | diff --git a/stream_translator_gpt/audio_getter.py b/stream_translator_gpt/audio_getter.py index ad92e6c..95be25d 100644 --- a/stream_translator_gpt/audio_getter.py +++ b/stream_translator_gpt/audio_getter.py @@ -1,3 +1,4 @@ +import os import queue import signal import subprocess @@ -21,20 +22,7 @@ def _transport(ytdlp_proc, ffmpeg_proc): ffmpeg_proc.kill() -def _open_stream(url: str, direct_url: bool, format: str, cookies: str): - if direct_url: - try: - process = (ffmpeg.input( - url, loglevel='panic').output('pipe:', - format='s16le', - acodec='pcm_s16le', - ac=1, - ar=SAMPLE_RATE).run_async(pipe_stdout=True)) - except ffmpeg.Error as e: - raise RuntimeError(f'Failed to load audio: {e.stderr.decode()}') from e - - return process, None - +def _open_stream(url: str, format: str, cookies: str): cmd = ['yt-dlp', url, '-f', format, '-o', '-', '-q'] if cookies: cmd.extend(['--cookies', cookies]) @@ -58,19 +46,29 @@ def _open_stream(url: str, direct_url: bool, format: str, cookies: str): class StreamAudioGetter(LoopWorkerBase): - def __init__(self, url: str, direct_url: bool, format: str, cookies: str, + def __init__(self, url: str, format: str, cookies: str, frame_duration: float) -> None: + self._cleanup_ytdlp_cache() + print('Opening stream: {}'.format(url)) - self.ffmpeg_process, self.ytdlp_process = _open_stream(url, direct_url, format, cookies) + self.ffmpeg_process, self.ytdlp_process = _open_stream(url, format, cookies) self.byte_size = round(frame_duration * SAMPLE_RATE * 2) # Factor 2 comes from reading the int16 stream as bytes signal.signal(signal.SIGINT, self._exit_handler) + + def __del__(self): + self._cleanup_ytdlp_cache() def _exit_handler(self, signum, frame): self.ffmpeg_process.kill() if self.ytdlp_process: self.ytdlp_process.kill() sys.exit(0) + + def _cleanup_ytdlp_cache(self): + for file in os.listdir('./'): + if file.startswith('--Frag'): + os.remove(file) def loop(self, output_queue: queue.SimpleQueue[np.array]): while self.ffmpeg_process.poll() is None: diff --git a/stream_translator_gpt/audio_transcriber.py b/stream_translator_gpt/audio_transcriber.py index 7bce552..470ae80 100644 --- a/stream_translator_gpt/audio_transcriber.py +++ b/stream_translator_gpt/audio_transcriber.py @@ -8,7 +8,7 @@ from . import filters from .common import TranslationTask, SAMPLE_RATE, LoopWorkerBase, sec2str -TEMP_AUDIO_FILE_NAME = 'temp.wav' +TEMP_AUDIO_FILE_NAME = '_whisper_api_temp.wav' def _filter_text(text: str, whisper_filters: str): @@ -79,6 +79,10 @@ class RemoteOpenaiWhisper(OpenaiWhisper): def __init__(self, language: str) -> None: self.client = OpenAI() self.language = language + + def __del__(self): + if os.path.exists(TEMP_AUDIO_FILE_NAME): + os.remove(TEMP_AUDIO_FILE_NAME) def transcribe(self, audio: np.array, **transcribe_options) -> str: with open(TEMP_AUDIO_FILE_NAME, 'wb') as audio_file: diff --git a/stream_translator_gpt/translator.py b/stream_translator_gpt/translator.py index ff41c7c..96c9d94 100644 --- a/stream_translator_gpt/translator.py +++ b/stream_translator_gpt/translator.py @@ -20,7 +20,7 @@ def _start_daemon_thread(func, *args, **kwargs): thread.start() -def main(url, format, cookies, direct_url, device_index, frame_duration, +def main(url, format, cookies, device_index, frame_duration, continuous_no_speech_threshold, min_audio_length, max_audio_length, prefix_retention_length, vad_threshold, model, language, use_faster_whisper, use_whisper_api, whisper_filters, openai_api_key, google_api_key, gpt_translation_prompt, @@ -122,7 +122,6 @@ def main(url, format, cookies, direct_url, device_index, frame_duration, output_queue=getter_to_slicer_queue) else: StreamAudioGetter.work(url=url, - direct_url=direct_url, format=format, cookies=cookies, frame_duration=frame_duration, @@ -152,10 +151,6 @@ def cli(): default=None, help='Used to open member-only stream, ' 'this parameter will be passed directly to yt-dlp.') - parser.add_argument('--direct_url', - action='store_true', - help='Set this flag to pass the URL directly to ffmpeg. ' - 'Otherwise, yt-dlp is used to obtain the stream URL.') parser.add_argument('--device_index', type=int, default=None, @@ -333,9 +328,4 @@ def cli(): if args['beam_size'] == 0: args['beam_size'] = None - # Remove yt-dlp cache - for file in os.listdir('./'): - if file.startswith('--Frag'): - os.remove(file) - main(url, **args)