diff --git a/README.md b/README.md index 7c6613c..db0d154 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ python3 ./stream-translator-gpt/translator.py | `--gpt_model` | gpt-3.5-turbo | GPT model name, gpt-3.5-turbo or gpt-4. (If using Gemini, not need to change this) | | `--gpt_translation_prompt` | | If set, will translate the result text to target language via GPT / Gemini API (According to which API key is filled in). Example: "Translate from Japanese to Chinese" | | `--gpt_translation_history_size` | 0 | The number of previous messages sent when calling the GPT / Gemini API. If the history size is 0, the translation will be run parallelly. If the history size > 0, the translation will be run serially. | -| `--gpt_translation_timeout` | 15 | If the GPT / Gemini translation exceeds this number of seconds, the translation will be discarded. | +| `--gpt_translation_timeout` | 10 | If the GPT / Gemini translation exceeds this number of seconds, the translation will be discarded. | | `--gpt_base_url` | | Customize the API endpoint of GPT. | | `--retry_if_translation_fails` | | Retry when translation times out/fails. Used to generate subtitles offline. | | **Output Options** | diff --git a/README_CN.md b/README_CN.md index a014ee5..0aafff8 100644 --- a/README_CN.md +++ b/README_CN.md @@ -160,7 +160,7 @@ python3 ./stream-translator-gpt/translator.py | `--gpt_model` | gpt-3.5-turbo | GPT模型名称,gpt-3.5-turbo或gpt-4。(如果使用Gemini,则无需更改此设置) | | `--gpt_translation_prompt` | | 如果设置了该选项,将通过GPT / Gemini API(根据填写的API密钥决定)将结果文本翻译成目标语言。例如:"从日语翻译成中文" | | `--gpt_translation_history_size` | 0 | 调用GPT / Gemini API时发送的先前消息数量。如果历史记录大小为0,则会并行运行翻译。如果历史记录大小> 0,则会串行运行翻译。 | -| `--gpt_translation_timeout` | 15 | 如果GPT / Gemini的翻译超过这个秒数,那么该次的翻译将被丢弃。 | +| `--gpt_translation_timeout` | 10 | 如果GPT / Gemini的翻译超过这个秒数,那么该次的翻译将被丢弃。 | | `--gpt_base_url` | | 自定义GPT的API地址。 | | `--retry_if_translation_fails` | | 当翻译超时/失败时重试。用于离线生成字幕。 | | **输出选项** | diff --git a/pyproject.toml b/pyproject.toml index f31f3e8..904420c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ dependencies = [ "faster-whisper>=0.8.0,<1.0.0", "openai>=1.0,<2.0", "google-generativeai<1.0", + "discord.py>=2.0,<3.0", ] [project.scripts] diff --git a/requirements.txt b/requirements.txt index 900aa80..c6086d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,10 @@ numpy scipy -ffmpeg-python==0.2.0 -openai-whisper==20231117 -faster-whisper==0.10.1 -openai==1.6.0 -google-generativeai yt-dlp -sounddevice +ffmpeg-python>=0.2.0,<0.3 +sounddevice<1.0 +openai-whisper<=20231117 +faster-whisper>=0.8.0,<1.0.0 +openai>=1.0,<2.0 +google-generativeai<1.0 +discord.py>=2.0,<3.0 diff --git a/stream_translator_gpt/result_exporter.py b/stream_translator_gpt/result_exporter.py index 367bcf4..bcb733b 100644 --- a/stream_translator_gpt/result_exporter.py +++ b/stream_translator_gpt/result_exporter.py @@ -1,3 +1,4 @@ +import discord import queue import requests from datetime import datetime @@ -8,7 +9,7 @@ def _send_to_cqhttp(url: str, token: str, text: str): headers = {'Authorization': 'Bearer {}'.format(token)} if token else None data = {'message': text} - requests.post(url, headers=headers, data=data) + requests.post(url, headers=headers, data=data, timeout=10) def _sec2str(second: float): @@ -20,11 +21,23 @@ def _sec2str(second: float): class ResultExporter(LoopWorkerBase): - def __init__(self) -> None: - pass + def __init__(self, discord_token: str) -> None: + self.discord_client = None + if discord_token: + self.discord_client = discord.Client(intents=discord.Intents.default()) + self.discord_client.run(discord_token) + + def send_to_discord(self, channel_id: int, text: str) -> None: + if not self.discord_client: + return + + @self.discord_client.event + async def on_ready(): + channel = self.discord_client.get_channel(channel_id) + await channel.send(text) def loop(self, input_queue: queue.SimpleQueue[TranslationTask], output_whisper_result: bool, - output_timestamps: bool, cqhttp_url: str, cqhttp_token: str): + output_timestamps: bool, cqhttp_url: str, cqhttp_token: str, discord_channel_id: int): while True: task = input_queue.get() timestamp_text = '{} --> {}'.format(_sec2str(task.time_range[0]), @@ -41,3 +54,5 @@ def loop(self, input_queue: queue.SimpleQueue[TranslationTask], output_whisper_r text_to_send = text_to_send.strip() if cqhttp_url: _send_to_cqhttp(cqhttp_url, cqhttp_token, text_to_send) + if discord_channel_id: + self.send_to_discord(discord_channel_id, text_to_send) \ No newline at end of file diff --git a/stream_translator_gpt/translator.py b/stream_translator_gpt/translator.py index 4974b42..2b9fbbb 100644 --- a/stream_translator_gpt/translator.py +++ b/stream_translator_gpt/translator.py @@ -24,9 +24,9 @@ def main(url, format, cookies, direct_url, device_index, frame_duration, continuous_no_speech_threshold, min_audio_length, max_audio_length, prefix_retention_length, vad_threshold, model, language, use_faster_whisper, use_whisper_api, whisper_filters, openai_api_key, google_api_key, gpt_translation_prompt, - gpt_translation_history_size, gpt_model, gpt_translation_timeout, + gpt_translation_history_size, gpt_model, gpt_translation_timeout, gpt_base_url, retry_if_translation_fails, output_timestamps, hide_transcribe_result, cqhttp_url, - cqhttp_token, gpt_base_url, **transcribe_options): + cqhttp_token, discord_channel_id, discord_token, **transcribe_options): if openai_api_key: os.environ['OPENAI_API_KEY'] = openai_api_key @@ -46,6 +46,8 @@ def main(url, format, cookies, direct_url, device_index, frame_duration, output_timestamps=output_timestamps, cqhttp_url=cqhttp_url, cqhttp_token=cqhttp_token, + discord_channel_id=discord_channel_id, + discord_token=discord_token, input_queue=translator_to_exporter_queue) if gpt_translation_prompt: if google_api_key: @@ -251,7 +253,7 @@ def cli(): 'If the history size > 0, the translation will be run serially.') parser.add_argument('--gpt_translation_timeout', type=int, - default=15, + default=10, help='If the GPT / Gemini translation exceeds this number of seconds, ' 'the translation will be discarded.') parser.add_argument('--gpt_base_url', @@ -277,6 +279,14 @@ def cli(): default=None, help='Token of cqhttp, if it is not set on the server side, ' 'it does not need to fill in.') + parser.add_argument('--discord_channel_id', + type=int, + default=None, + help='If set, will send the result text to the discord channel.') + parser.add_argument('--discord_token', + type=str, + default=None, + help='Token of discord bot.') args = parser.parse_args().__dict__ url = args.pop('URL')