Skip to content

Commit

Permalink
support recording device
Browse files Browse the repository at this point in the history
  • Loading branch information
ionic-bond committed Dec 26, 2023
1 parent dabd1eb commit 7f9fcc1
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 19 deletions.
16 changes: 16 additions & 0 deletions audio_getter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import ffmpeg
import numpy as np
import sounddevice as sd

from common import SAMPLE_RATE

Expand Down Expand Up @@ -85,3 +86,18 @@ def work(self, output_queue: queue.SimpleQueue[np.array]):
self.ffmpeg_process.kill()
if self.ytdlp_process:
self.ytdlp_process.kill()


class DeviceAudioGetter():

def __init__(self, device_index: int, frame_duration: float) -> None:
if device_index:
sd.default.device[0] = device_index
sd.default.dtype[0] = np.float32
self.frame_duration = frame_duration
print("Recording device: {}".format(sd.query_devices(sd.default.device[0])['name']))

def work(self, output_queue: queue.SimpleQueue[np.array]):
while True:
audio = sd.rec(frames=round(SAMPLE_RATE*self.frame_duration), samplerate=SAMPLE_RATE, channels=1, blocking=True).flatten()
output_queue.put(audio)
35 changes: 19 additions & 16 deletions gpt_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,31 @@
from collections import deque
from datetime import datetime, timedelta

from openai import OpenAI
from openai import OpenAI, APITimeoutError, APIConnectionError

from common import TranslationTask


def _translate_by_gpt(client, translation_task, assistant_prompt, model, history_messages=[]):
# https://platform.openai.com/docs/api-reference/chat/create?lang=python
system_prompt = "You are a translation engine."
messages = [{"role": "system", "content": system_prompt}]
messages.extend(history_messages)
messages.append({"role": "user", "content": assistant_prompt})
messages.append({"role": "user", "content": translation_task.transcribed_text})
completion = client.chat.completions.create(
model=model,
temperature=0,
max_tokens=1000,
top_p=1,
frequency_penalty=1,
presence_penalty=1,
messages=messages,
)
translation_task.translated_text = completion.choices[0].message.content
try:
system_prompt = "You are a translation engine."
messages = [{"role": "system", "content": system_prompt}]
messages.extend(history_messages)
messages.append({"role": "user", "content": assistant_prompt})
messages.append({"role": "user", "content": translation_task.transcribed_text})
completion = client.chat.completions.create(
model=model,
temperature=0,
max_tokens=1000,
top_p=1,
frequency_penalty=1,
presence_penalty=1,
messages=messages,
)
translation_task.translated_text = completion.choices[0].message.content
except (APITimeoutError, APIConnectionError) as e:
print(e)


class ParallelTranslator():
Expand Down
14 changes: 11 additions & 3 deletions translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import threading
import time

from audio_getter import StreamAudioGetter
from audio_getter import StreamAudioGetter, DeviceAudioGetter
from audio_slicer import AudioSlicer
from audio_transcriber import OpenaiWhisper, FasterWhisper, RemoteOpenaiWhisper
from gpt_translator import ParallelTranslator, SerialTranslator
Expand All @@ -18,7 +18,7 @@ def _start_daemon_thread(func, *args, **kwargs):
thread.start()


def main(url, format, direct_url, cookies, frame_duration, continuous_no_speech_threshold,
def main(url, format, direct_url, cookies, device_index, frame_duration, continuous_no_speech_threshold,
min_audio_length, max_audio_length, prefix_retention_length, vad_threshold, model,
use_faster_whisper, use_whisper_api, whisper_filters, output_timestamps,
gpt_translation_prompt, gpt_translation_history_size, openai_api_key, gpt_model,
Expand Down Expand Up @@ -48,7 +48,10 @@ def main(url, format, direct_url, cookies, frame_duration, continuous_no_speech_
audio_transcriber = OpenaiWhisper(model)
audio_slicer = AudioSlicer(frame_duration, continuous_no_speech_threshold, min_audio_length,
max_audio_length, prefix_retention_length, vad_threshold)
audio_getter = StreamAudioGetter(url, direct_url, format, cookies, frame_duration)
if url.lower() == 'device':
audio_getter = DeviceAudioGetter(device_index, frame_duration)
else:
audio_getter = StreamAudioGetter(url, direct_url, format, cookies, frame_duration)

getter_to_slicer_queue = queue.SimpleQueue()
slicer_to_transcriber_queue = queue.SimpleQueue()
Expand Down Expand Up @@ -91,6 +94,11 @@ def cli():
default=None,
help='Used to open member-only stream, '
'this parameter will be passed directly to yt-dlp.')
parser.add_argument('--device_index',
type=int,
default=None,
help='The index of the device that needs to be recorded. '
'If not set, the system default recording device will be used.')
parser.add_argument('--frame_duration',
type=float,
default=0.1,
Expand Down

0 comments on commit 7f9fcc1

Please sign in to comment.