Merge pull request #67 from cyberofficial/dev-testing

Dev testing -> Master Various fixes and improvements.
cyberofficial · Nov 20, 2023 · 471278f · 471278f
2 parents 3d82436 + e374e25
commit 471278f
Show file tree

Hide file tree

Showing 10 changed files with 119 additions and 54 deletions.
diff --git a/.gitignore b/.gitignore
@@ -22,4 +22,6 @@ modules/__pycache__/model_downloader.cpython-310.pyc
 modules/__pycache__/languages.cpython-310.pyc
 modules/__pycache__/discord.cpython-310.pyc
 modules/__pycache__/console_settings.cpython-310.pyc
-.idea
+.idea
+cookies/
+patchfile.patch
diff --git a/README.md b/README.md
@@ -16,9 +16,10 @@ Learn about it here: [https://jb.gg/OpenSourceSupport](https://jb.gg/OpenSourceS
 
 
 ### Downloads
-| Version (Click to DL) | Portable Included | Type | Notes |
-| ------- |-------------------| ---- | ----- |
+| Version (Click to DL)                                                           | Portable Included | Type | Notes                                                                                                                |
+|---------------------------------------------------------------------------------|-------------------| ---- |----------------------------------------------------------------------------------------------------------------------|
 | [1.0.9986](https://github.com/cyberofficial/Synthalingua/releases/tag/1.0.9986) | Yes               | Stable | various updates and bug fixes, added noise gate supression argument. `--mic_calibration_time` check arguments below. |
+| [Pre Release \| 1.0.9989](https://github.com/cyberofficial/Synthalingua/releases/tag/1.0.9989) | Building...       | Pre-Release | Update for Model V3. Various bug fixes. New command line arguments                                                   |
 
 ### Badges
 [![CodeQL](https://github.com/cyberofficial/Synthalingua/actions/workflows/codeql.yml/badge.svg)](https://github.com/cyberofficial/Synthalingua/actions/workflows/codeql.yml)
@@ -111,6 +112,8 @@ Note:
 
 The tool will work on any system that meets the minimum requirements. The tool will work better on systems that meet the recommended requirements. The tool will work best on systems that meet the best performance requirements. You can mix and match the requirements to get the best performance. For example, you can have a CPU that meets the best performance requirements and a GPU that meets the moderate requirements. The tool will work best on systems that meet the best performance requirements.
 
+### A Microphone is required! You'll need some sort of software input source (or hardware source). See issue [#63](https://github.com/cyberofficial/Synthalingua/issues/63) for additional information. 
+
 ## Installation
 1. Download and install [Python 3.10.9](https://www.python.org/downloads/release/python-3109/).
      * Make sure to check the box that says "Add Python to PATH" when installing. If you don't check the box, you will have to manually add Python to your PATH. You can check this guide: [How to add Python to PATH](https://datatofish.com/add-python-to-windows-path/).
@@ -158,6 +161,8 @@ This script uses argparse to accept command line arguments. The following option
 | `--portnumber` | Set the port number for the web server. If no number is set then the web server will not start. |
 | `--retry` | Retries translations and transcription if they fail. |
 | `--about` | Shows about the app. |
+| `--save_transcript` | Saves the transcript to a text file. |
+| `--save_folder` | Set the folder to save the transcript to. |
 
 # Things to note!
 - When crafting your command line arguments, you need to make sure you adjust the energy threshold to your liking. The default is 100, but you can adjust it to your liking. The higher the number, the harder it is to trigger the audio detection. The lower the number, the easier it is to trigger the audio detection. I recommend you start with 100 and adjust it from there. I seen best results with 250-500.
@@ -231,6 +236,8 @@ If you encounter any issues with the tool, here are some common problems and the
     Try this fix: https://github.com/cyberofficial/Real-Time-Translation/issues/2#issuecomment-1491098222
 * Translator can't pickup stream sound
     * Check out this discussion thread for a possible fix: [#12 Discussion](https://github.com/cyberofficial/Synthalingua/discussions/12)
+* Error: Audio source must be entered before adjusting.
+    * You need to make sure you have a microphone set up. See issue [#63](https://github.com/cyberofficial/Synthalingua/issues/63) for additional information.
 
 # Additional Information
 * Models used are from OpenAI Whisper - [Whisper](https://github.com/openai/whisper)

diff --git a/modules/imports.py b/modules/imports.py
@@ -37,6 +37,7 @@
     from prettytable import PrettyTable
     from dateutil.tz import tzlocal
     from tzlocal import get_localzone
+
 except Exception as e:
     print("Error Loading Primary Imports")
     print("Check to make sure you have all the required modules installed.")
@@ -57,6 +58,7 @@
     from modules import parser_args
     from modules.languages import get_valid_languages
     from modules import api_backend
+    #from modules import microphone_check
 except Exception as e:
     print("Error Loading Extensions")
     print("Check the Modules folder and see if there are any missing or corrupted files.")

diff --git a/modules/microphone_check.py b/modules/microphone_check.py
@@ -0,0 +1,15 @@
+from speech_recognition.__main__ import r
+
+from modules.imports import *
+
+print("Microphone Check Module Loaded")
+def microphone_check():
+    # Check if the user has a microphone
+    print("Checking for microphone...")
+    mic = sr.Microphone()
+    with mic as source:
+        r.adjust_for_ambient_noise(source)
+    print("Microphone check complete.")
+    print("\n\n")
+    return True
+
diff --git a/modules/parser_args.py b/modules/parser_args.py
@@ -33,7 +33,7 @@ def set_model_by_ram(ram, language, target_language):
         else:
             model = "medium"
     elif ram == "12gb":
-        model = "large"
+        model = "large-v3"
         if language == "en" or language == "English":
             red_text = Fore.RED + Back.BLACK
             green_text = Fore.GREEN + Back.BLACK
@@ -78,6 +78,8 @@ def parse_arguments():
     parser.add_argument(
     "--portnumber", default=None, help="Port number to run the web server on. If not specified, the web server will not run.", type=valid_port_number)
     parser.add_argument("--about", action='store_true', help="About the project.")
+    parser.add_argument("--save_transcript", action='store_true', help="Save the transcript to a file.")
+    parser.add_argument("--save_folder", default="out", help="Folder to save the transcript to.")
     args = parser.parse_args()
     return args
 

diff --git a/modules/version_checker.py b/modules/version_checker.py
@@ -1,6 +1,6 @@
 from modules.imports import *
 
-version = "1.0.9986"
+version = "1.0.9989"
 ScriptCreator = "cyberofficial"
 GitHubRepo = "https://github.com/cyberofficial/Synthalingua"
 repo_owner = "cyberofficial"

diff --git a/requirements_static.txt → requirements.txt b/requirements_static.txt → requirements.txt
diff --git a/setup.bash b/setup.bash
@@ -30,7 +30,7 @@ python -m pip install --upgrade pip
 echo "Installing Requirements..."
 pip install wheel
 pip install setuptools-rust
-pip install -r requirements_static.txt
+pip install -r requirements.txt
 
 echo "Fixing CUDA Since Whisper installs non-gpu version."
 pip uninstall --yes torch torchvision torchaudio

diff --git a/setup.bat b/setup.bat
@@ -1,54 +1,72 @@
 @echo off
 setlocal enabledelayedexpansion
-Title Realtime Whipser Translation App
-cls
+Title Realtime Whisper Translation App Setup
+
+:check_python
+echo Checking for Python installation...
+where python >nul 2>&1
+if !errorlevel! neq 0 (
+    echo Python is not installed or not in the PATH. Please install Python before continuing.
+    exit /b
+)
 
+:prepare_environment
+cls
 if exist "data_whisper" (
-    set /p reinstall="Python environment already exists. Do you want to reinstall? [y/n]: "
-    if /i "!reinstall!"=="y" (
+    set /p reinstall="Python environment 'data_whisper' already exists. Reinstall it? [Y/N]: "
+    if /i "!reinstall!"=="Y" (
         echo Deleting existing environment...
-        REM call data_whisper\Scripts\deactivate.bat :: Not Needed for now
         rmdir /s /q data_whisper
     ) else (
-        echo Exiting...
+        echo Operation cancelled by user.
         pause
         exit /b
     )
 )
 
-Echo Creating python environment...
+echo Creating a new Python virtual environment...
 python -m venv data_whisper
 
-Echo Created Env...
-
+echo Activating the environment...
 call data_whisper\Scripts\activate.bat
-Echo Installing Whisper
-Echo Updating pip
+
+:install_dependencies
+echo Upgrading pip to the latest version...
 python.exe -m pip install --upgrade pip
 
-Echo Installing Requirements...
+echo Installing wheel and setuptools-rust...
 pip install wheel
 pip install setuptools-rust
-pip install -r requirements_static.txt
 
-:cuda-patch
-Echo Fixing CUDA Since Whisper installs non gpu version.
+echo Checking for 'requirements.txt'...
+if not exist "requirements.txt" (
+    echo 'requirements.txt' not found. Please ensure it is in the current directory.
+    exit /b
+)
+
+echo Installing requirements from 'requirements.txt'...
+pip install -r requirements.txt
+
+:cuda_patch
+echo Applying CUDA patch to install GPU versions of PyTorch packages...
 pip uninstall --yes torch torchvision torchaudio
 pip cache purge
-pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
 
-Echo. Setup Completed!
+echo Whisper translation environment setup completed!
 
+:create_shortcut
+echo Creating a shortcut batch file for the translation app...
+(
+    echo @echo off
+    echo cls
+    echo call "data_whisper\Scripts\activate.bat"
+    echo python "transcribe_audio.py" --ram 4gb --non_english --translate
+    echo pause
+) > "livetranslation.bat"
 
-:creating shortcut
-Echo Creating example shortcut in %cd%
-Echo You can edit with notepad anytime.
-Echo.
-Echo @echo off > livetranslation.bat
-Echo cls >> livetranslation.bat
-Echo call "data_whisper\Scripts\activate.bat" >> livetranslation.bat
-Echo python "transcribe_audio.py" --ram 4gb --non_english --translate >> livetranslation.bat
-Echo pause >> livetranslation.bat
-pause
+echo Shortcut 'livetranslation.bat' created in the current directory.
+echo You can edit this file with notepad if necessary.
 
-:eof
+pause
+exit /b
diff --git a/transcribe_audio.py b/transcribe_audio.py
@@ -105,6 +105,13 @@ def is_input_device(device_index):
     recorder.dynamic_energy_threshold = False
     reset_text = Style.RESET_ALL
 
+    #mic_check = microphone_check()
+    #if mic_check == False:
+    #    print("Microphone check failed. Exiting...")
+    #    print("If you are using a virtual audio cable, please make sure it is set as the default input device.")
+    #    print("If you are windows, make sure the microphone is set as the default input device, check the privacy settings, and make sure the microphone is plugged in, if you are using a virtual audio cable, make sure it is set as the default input device.")
+    #    sys.exit(1)
+
     def mic_calibration():
         print("Starting mic calibration...")
         with sr.Microphone() as source:
@@ -206,15 +213,17 @@ def mic_calibration():
     try:
         source, mic_name = get_microphone_source(args)
     except ValueError as e:
-        print(e)
-        sys.exit(0)
+        print(
+            "It may look like the microphone is not working, make sure your microphone is plugged in and working, or make sure your privacy settings allow microphone access, or make sure you have a microphone selected, or make sure you have a softwaare microphone selected: ie: Voicemeeter, VB-Cable, etc.")
+        print("Error Message:\n" + str(e))
 
     with source as s:
         try:
             recorder.adjust_for_ambient_noise(s)
             print(f"Microphone set to: {mic_name}")
         except AssertionError as e:
-            print(e)
+            print("It may look like the microphone is not working, make sure your microphone is plugged in and working, or make sure your privacy settings allow microphone access, or make sure you have a microphone selected, or make sure you have a softwaare microphone selected: ie: Voicemeeter, VB-Cable, etc.")
+            print("Error Message:\n" + str(e))
 
     #if args.language == "en" or args.language == "English":
     #    model += ".en"
@@ -393,7 +402,11 @@ def mic_calibration():
 
                 audio = whisper.load_audio(temp_file)
                 audio = whisper.pad_or_trim(audio)
-                mel = whisper.log_mel_spectrogram(audio).to(device)
+                # if ram is set to 12 use n_mels=128 else use n_mels=80
+                if args.ram == "12gb":
+                    mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(device)
+                else:
+                    mel = whisper.log_mel_spectrogram(audio, n_mels=80).to(device)
 
                 if ".en" in model:
                     detected_language = "English"
@@ -617,22 +630,28 @@ def mic_calibration():
                 send_to_discord_webhook(webhook_url, "Service has stopped.")
             # break
 
-            if not os.path.isdir('out'):
-                os.mkdir('out')
-
-            transcript = os.path.join(os.getcwd(), 'out', 'transcription.txt')
-            if os.path.isfile(transcript):
-                transcript = os.path.join(os.getcwd(), 'out', 'transcription_' + str(len(os.listdir('out'))) + '.txt')
-            transcription_file = open(transcript, 'w',  encoding='utf-8')
-
-            for original_text, translated_text, transcribed_text, detected_language in transcription:
-                transcription_file.write(f"-=-=-=-=-=-=-=-\nOriginal ({detected_language}): {original_text}\n")
-                if translated_text:
-                    transcription_file.write(f"Translation: {translated_text}\n")
-                if transcribed_text:
-                    transcription_file.write(f"Transcription: {transcribed_text}\n")
-            transcription_file.close()
-            print(f"Transcription was saved to {transcript}")
+            if args.save_transcript:
+                # if args.save_folder isn't set use "out" as the default
+                if not args.output:
+                    out = "out"
+                else:
+                    out = args.output
+                if not os.path.isdir(out):
+                    os.mkdir(out)
+
+                transcript = os.path.join(os.getcwd(), out, 'transcription.txt')
+                if os.path.isfile(transcript):
+                    transcript = os.path.join(os.getcwd(), out, 'transcription_' + str(len(os.listdir(out))) + '.txt')
+                transcription_file = open(transcript, 'w',  encoding='utf-8')
+
+                for original_text, translated_text, transcribed_text, detected_language in transcription:
+                    transcription_file.write(f"-=-=-=-=-=-=-=-\nOriginal ({detected_language}): {original_text}\n")
+                    if translated_text:
+                        transcription_file.write(f"Translation: {translated_text}\n")
+                    if transcribed_text:
+                        transcription_file.write(f"Transcription: {transcribed_text}\n")
+                transcription_file.close()
+                print(f"Transcription was saved to {transcript}")
 
             if args.portnumber:
                 api_backend.kill_server()