Readability

ionic-bond · Mar 26, 2024 · 223c23a · 223c23a
1 parent 1c7b688
commit 223c23a
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -148,7 +148,6 @@ python3 ./stream-translator-gpt/translator.py
 | `URL`                              |               | The URL of the stream. If a local file path is filled in, it will be used as input. If fill in "device", the input will be obtained from your PC device.                                                 |
 | `--format`                         | wa*           | Stream format code, this parameter will be passed directly to yt-dlp.                                                                                                                                    |
 | `--cookies`                        |               | Used to open member-only stream, this parameter will be passed directly to yt-dlp.                                                                                                                       |
-| `--direct_url`                     |               | Set this flag to pass the URL directly to ffmpeg. Otherwise, yt-dlp is used to obtain the stream URL.                                                                                                    |
 | `--device_index`                   |               | The index of the device that needs to be recorded. If not set, the system default recording device will be used.                                                                                         |
 | **Audio Slicing Options**          |
 | `--frame_duration`                 | 0.1           | The unit that processes live streaming data in seconds.                                                                                                                                                  |

diff --git a/README_CN.md b/README_CN.md
@@ -146,7 +146,6 @@ python3 ./stream-translator-gpt/translator.py
 | `URL`                              |               | 直播流的URL。如果填写了本地文件路径，它将被用作输入。如果填写"device"，输入将从您的PC音频设备获取。                      |
 | `--format`                         | wa*           | 直播流格式代码，此参数将直接传递给yt-dlp。                                                                               |
 | `--cookies`                        |               | 用于打开仅会员可看的直播流，此参数将直接传递给yt-dlp。                                                                   |
-| `--direct_url`                     |               | 设置此标志以直接将URL传递给ffmpeg。否则，yt-dlp用于获取流URL。                                                           |
 | `--device_index`                   |               | 音频输入设备的index。如果未设置，则使用系统默认音频输入设备。                                                            |
 | **音频切割选项**                   |
 | `--frame_duration`                 | 0.1           | 处理实时流数据的单位（以秒为单位）。                                                                                     |

diff --git a/stream_translator_gpt/audio_getter.py b/stream_translator_gpt/audio_getter.py
@@ -1,3 +1,4 @@
+import os
 import queue
 import signal
 import subprocess
@@ -21,20 +22,7 @@ def _transport(ytdlp_proc, ffmpeg_proc):
     ffmpeg_proc.kill()
 
 
-def _open_stream(url: str, direct_url: bool, format: str, cookies: str):
-    if direct_url:
-        try:
-            process = (ffmpeg.input(
-                url, loglevel='panic').output('pipe:',
-                                              format='s16le',
-                                              acodec='pcm_s16le',
-                                              ac=1,
-                                              ar=SAMPLE_RATE).run_async(pipe_stdout=True))
-        except ffmpeg.Error as e:
-            raise RuntimeError(f'Failed to load audio: {e.stderr.decode()}') from e
-
-        return process, None
-
+def _open_stream(url: str, format: str, cookies: str):
     cmd = ['yt-dlp', url, '-f', format, '-o', '-', '-q']
     if cookies:
         cmd.extend(['--cookies', cookies])
@@ -58,19 +46,29 @@ def _open_stream(url: str, direct_url: bool, format: str, cookies: str):
 
 class StreamAudioGetter(LoopWorkerBase):
 
-    def __init__(self, url: str, direct_url: bool, format: str, cookies: str,
+    def __init__(self, url: str, format: str, cookies: str,
                  frame_duration: float) -> None:
+        self._cleanup_ytdlp_cache()
+
         print('Opening stream: {}'.format(url))
-        self.ffmpeg_process, self.ytdlp_process = _open_stream(url, direct_url, format, cookies)
+        self.ffmpeg_process, self.ytdlp_process = _open_stream(url, format, cookies)
         self.byte_size = round(frame_duration * SAMPLE_RATE *
                                2)  # Factor 2 comes from reading the int16 stream as bytes
         signal.signal(signal.SIGINT, self._exit_handler)
+
+    def __del__(self):
+        self._cleanup_ytdlp_cache()
 
     def _exit_handler(self, signum, frame):
         self.ffmpeg_process.kill()
         if self.ytdlp_process:
             self.ytdlp_process.kill()
         sys.exit(0)
+
+    def _cleanup_ytdlp_cache(self):
+        for file in os.listdir('./'):
+            if file.startswith('--Frag'):
+                os.remove(file)
 
     def loop(self, output_queue: queue.SimpleQueue[np.array]):
         while self.ffmpeg_process.poll() is None:

diff --git a/stream_translator_gpt/audio_transcriber.py b/stream_translator_gpt/audio_transcriber.py
@@ -8,7 +8,7 @@
 from . import filters
 from .common import TranslationTask, SAMPLE_RATE, LoopWorkerBase, sec2str
 
-TEMP_AUDIO_FILE_NAME = 'temp.wav'
+TEMP_AUDIO_FILE_NAME = '_whisper_api_temp.wav'
 
 
 def _filter_text(text: str, whisper_filters: str):
@@ -79,6 +79,10 @@ class RemoteOpenaiWhisper(OpenaiWhisper):
     def __init__(self, language: str) -> None:
         self.client = OpenAI()
         self.language = language
+
+    def __del__(self):
+        if os.path.exists(TEMP_AUDIO_FILE_NAME):
+            os.remove(TEMP_AUDIO_FILE_NAME)
 
     def transcribe(self, audio: np.array, **transcribe_options) -> str:
         with open(TEMP_AUDIO_FILE_NAME, 'wb') as audio_file:

diff --git a/stream_translator_gpt/translator.py b/stream_translator_gpt/translator.py
@@ -20,7 +20,7 @@ def _start_daemon_thread(func, *args, **kwargs):
     thread.start()
 
 
-def main(url, format, cookies, direct_url, device_index, frame_duration,
+def main(url, format, cookies, device_index, frame_duration,
          continuous_no_speech_threshold, min_audio_length, max_audio_length,
          prefix_retention_length, vad_threshold, model, language, use_faster_whisper,
          use_whisper_api, whisper_filters, openai_api_key, google_api_key, gpt_translation_prompt,
@@ -122,7 +122,6 @@ def main(url, format, cookies, direct_url, device_index, frame_duration,
                                   output_queue=getter_to_slicer_queue)
     else:
         StreamAudioGetter.work(url=url,
-                               direct_url=direct_url,
                                format=format,
                                cookies=cookies,
                                frame_duration=frame_duration,
@@ -152,10 +151,6 @@ def cli():
                         default=None,
                         help='Used to open member-only stream, '
                         'this parameter will be passed directly to yt-dlp.')
-    parser.add_argument('--direct_url',
-                        action='store_true',
-                        help='Set this flag to pass the URL directly to ffmpeg. '
-                        'Otherwise, yt-dlp is used to obtain the stream URL.')
     parser.add_argument('--device_index',
                         type=int,
                         default=None,
@@ -333,9 +328,4 @@ def cli():
     if args['beam_size'] == 0:
         args['beam_size'] = None
 
-    # Remove yt-dlp cache
-    for file in os.listdir('./'):
-        if file.startswith('--Frag'):
-            os.remove(file)
-
     main(url, **args)