diff --git a/AudioToText.ipynb b/AudioToText.ipynb index 0965089..f415699 100644 --- a/AudioToText.ipynb +++ b/AudioToText.ipynb @@ -65,7 +65,7 @@ " NO_ROOT_WARNING = '|& grep -v \\\"WARNING: Running pip as the \\'root\\' user\"' # running in Colab\n", "\n", " !pip install --no-warn-script-location --user --upgrade pip {NO_ROOT_WARNING}\n", - " !pip install --root-user-action=ignore git+https://github.com/openai/whisper.git@v20231117 openai==0.28 numpy scipy deepl pydub cohere ffmpeg-python torch==2.1.0 tensorflow-probability==0.23.0 typing-extensions==4.9.0" + " !pip install --root-user-action=ignore git+https://github.com/openai/whisper.git@v20231117 openai==1.8.0 numpy scipy deepl pydub cohere ffmpeg-python torch==2.1.0 tensorflow-probability==0.23.0 typing-extensions==4.9.0" ] }, { @@ -308,10 +308,10 @@ "\n", "import torch\n", "\n", - "import openai\n", - "\n", "import math\n", "\n", + "from openai import OpenAI\n", + "\n", "# select task\n", "\n", "task = \"Transcribe\" #@param [\"Transcribe\", \"Translate to English\"]\n", @@ -422,12 +422,14 @@ "}\n", "\n", "if api_key:\n", - " openai.api_key = api_key\n", + " api_client = OpenAI(api_key=api_key)\n", "\n", " api_supported_formats = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']\n", " api_max_bytes = 25 * 1024 * 1024 # 25 MB\n", "\n", - " api_transcribe = getattr(openai.Audio, task)\n", + " api_transcribe = api_client.audio.transcriptions if task == 'transcribe' else api_client.audio.translations\n", + " api_transcribe = api_transcribe.create\n", + " \n", " api_model = 'whisper-1' # large-v2\n", "\n", " # https://platform.openai.com/docs/api-reference/audio?lang=python\n", @@ -578,7 +580,8 @@ " for api_audio_chunk_path in api_audio_chunks:\n", " ## API request\n", " with open(api_audio_chunk_path, 'rb') as api_audio_file:\n", - " api_result = api_transcribe(api_model, api_audio_file, **api_options)\n", + " api_result = api_transcribe(model=api_model, file=api_audio_file, **api_options)\n", + " api_result = api_result.model_dump() # to dict\n", " \n", " api_segments = api_result['segments']\n", " \n", diff --git a/audiototext.py b/audiototext.py index e5f5c9b..7110e0e 100644 --- a/audiototext.py +++ b/audiototext.py @@ -64,7 +64,7 @@ if not args.skip_install: os.system("pip install --user --upgrade pip") - os.system("pip install git+https://github.com/openai/whisper.git@v20231117 openai==0.28 numpy scipy deepl pydub cohere ffmpeg-python torch==2.1.0 tensorflow-probability==0.23.0 typing-extensions==4.9.0") + os.system("pip install git+https://github.com/openai/whisper.git@v20231117 openai==1.8.0 numpy scipy deepl pydub cohere ffmpeg-python torch==2.1.0 tensorflow-probability==0.23.0 typing-extensions==4.9.0") print() """## [Step 2] 📁 Upload your audio files to this folder @@ -110,10 +110,10 @@ import torch -import openai - import math +from openai import OpenAI + # select task task = args.task @@ -208,12 +208,14 @@ } if args.api_key: - openai.api_key = args.api_key + api_client = OpenAI(api_key=args.api_key) api_supported_formats = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'] api_max_bytes = 25 * 1024 * 1024 # 25 MB - api_transcribe = getattr(openai.Audio, task) + api_transcribe = api_client.audio.transcriptions if task == 'transcribe' else api_client.audio.translations + api_transcribe = api_transcribe.create + api_model = 'whisper-1' # large-v2 # https://platform.openai.com/docs/api-reference/audio?lang=python @@ -364,7 +366,8 @@ def raw_split(big_chunk): for api_audio_chunk_path in api_audio_chunks: ## API request with open(api_audio_chunk_path, 'rb') as api_audio_file: - api_result = api_transcribe(api_model, api_audio_file, **api_options) + api_result = api_transcribe(model=api_model, file=api_audio_file, **api_options) + api_result = api_result.model_dump() # to dict api_segments = api_result['segments']