Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Translation #9

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 87 additions & 107 deletions chromium/utils/translate-en-messages.py
Original file line number Diff line number Diff line change
@@ -1,139 +1,119 @@
'''
"""
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the quotes style you should just leave them as single

Script: translate-en-messages.py
Version: 2024.5.14.1
Description: Translate msg's from en/messages.json to [[output_langs]/messages.json]
Description: Translate messages from en/messages.json to other language directories.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original description is useful because reader understands the structure immediately and know what to edit to customize

Author: Adam Lui
Review: Hexakleo
Homepage: https://github.com/adamlui/python-utils
'''
"""

import os, json
from sys import stdout # for dynamic prints
import os
import json
from sys import stdout
adamlui marked this conversation as resolved.
Show resolved Hide resolved
from translate import Translator

locales_folder = '_locales' ; provider = ''
target_langs = ['af', 'am', 'ar', 'az', 'be', 'bem', 'bg', 'bn', 'bo', 'bs', 'ca', 'ceb', 'cs', 'cy', 'da', 'de', 'dv', 'dz', 'el', 'en', 'en-GB', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gd', 'gl', 'gu', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'ka', 'kab', 'kk', 'km', 'kn', 'ko', 'ku', 'ky', 'la', 'lb', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'ms', 'mt', 'my', 'ne', 'nl', 'no', 'ny', 'pa', 'pap', 'pl', 'ps', 'pt', 'ro', 'ru', 'rw', 'sg', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tn', 'to', 'tpi', 'tr', 'uk', 'ur', 'uz', 'vi', 'xh', 'yi', 'zh', 'zh-CN', 'zh-HK', 'zh-SG', 'zh-TW', 'zu']
# Constants
LOCALES_FOLDER = '_locales'
TARGET_LANGS = [
'af', 'am', 'ar', 'az', 'be', 'bem', 'bg', 'bn', 'bo', 'bs', 'ca', 'ceb',
'cs', 'cy', 'da', 'de', 'dv', 'dz', 'el', 'en', 'en-GB', 'eo', 'es', 'et',
'eu', 'fa', 'fi', 'fo', 'fr', 'gd', 'gl', 'gu', 'haw', 'he', 'hi', 'hr',
'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'ka', 'kab', 'kk', 'km', 'kn',
'ko', 'ku', 'ky', 'la', 'lb', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml',
'mn', 'ms', 'mt', 'my', 'ne', 'nl', 'no', 'ny', 'pa', 'pap', 'pl', 'ps',
'pt', 'ro', 'ru', 'rw', 'sg', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sr',
'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tn', 'to', 'tpi', 'tr',
'uk', 'ur', 'uz', 'vi', 'xh', 'yi', 'zh', 'zh-CN', 'zh-HK', 'zh-SG',
'zh-TW', 'zu'
]

# UI initializations
terminal_width = os.get_terminal_size()[0]
def print_trunc(msg, end='\n') : print(msg if len(msg) < terminal_width else msg[0:terminal_width-4] + '...', end=end)
def overwrite_print(msg) : stdout.write('\r' + msg.ljust(terminal_width)[:terminal_width])
TERMINAL_WIDTH = os.get_terminal_size()[0]

print('')

# Prompt user for keys to ignore
def print_trunc(msg, end='\n'):
"""Prints a truncated message to fit terminal width."""
print(msg if len(msg) < TERMINAL_WIDTH else msg[:TERMINAL_WIDTH - 4] + '...', end=end)


def overwrite_print(msg):
"""Dynamically overwrites the current line in the terminal."""
stdout.write('\r' + msg.ljust(TERMINAL_WIDTH)[:TERMINAL_WIDTH])


# Collect keys to ignore
keys_to_ignore = []
while True:
key = input('Enter key to ignore (or ENTER if done): ')
if not key : break
key = input('Enter key to ignore (or press ENTER if done): ')
if not key:
break
Comment on lines -26 to +48
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and elsewhere, the style is single line to save vertical space

keys_to_ignore.append(key)

# Determine closest locales dir
print_trunc(f'\nSearching for { locales_folder }...')
# Locate locales directory
print_trunc(f"\nSearching for {LOCALES_FOLDER}...")
script_dir = os.path.abspath(os.path.dirname(__file__))
locales_dir = None
for root, dirs, files in os.walk(script_dir): # search script dir recursively
if locales_folder in dirs:
locales_dir = os.path.join(root, locales_folder) ; break
else: # search script parent dirs recursively
parent_dir = os.path.dirname(script_dir)
while parent_dir and parent_dir != script_dir:
for root, dirs, files in os.walk(parent_dir):
if locales_folder in dirs:
locales_dir = os.path.join(root, locales_folder) ; break
if locales_dir : break
parent_dir = os.path.dirname(parent_dir)
else : locales_dir = None

# Print result
if locales_dir : print_trunc(f'_locales directory found!\n\n>> { locales_dir }\n')
else : print_trunc(f'Unable to locate a { locales_folder } directory.') ; exit()

# Load en/messages.json

for root, dirs, _ in os.walk(script_dir):
if LOCALES_FOLDER in dirs:
locales_dir = os.path.join(root, LOCALES_FOLDER)
break

if not locales_dir:
print_trunc(f"Unable to locate the {LOCALES_FOLDER} directory.")
exit()

print_trunc(f"_locales directory found: {locales_dir}\n")

# Load English messages
Comment on lines -34 to +67
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here your deletion of the else block means the script will no longer find locales folder if it exists in a parent dir

msgs_filename = 'messages.json'
en_msgs_path = os.path.join(locales_dir, 'en', msgs_filename)

with open(en_msgs_path, 'r', encoding='utf-8') as en_file:
en_messages = json.load(en_file)

# Combine [target_langs] w/ languages discovered in _locales
output_langs = list(set(target_langs)) # remove duplicates
for root, dirs, files in os.walk(locales_dir):
# Discover and combine languages
output_langs = list(set(TARGET_LANGS))
for root, dirs, _ in os.walk(locales_dir):
for folder in dirs:
folder_path = os.path.join(root, folder)
msgs_path = os.path.join(folder_path, msgs_filename)
discovered_lang = folder.replace('_', '-')
if os.path.exists(msgs_path) and discovered_lang not in output_langs : output_langs.append(discovered_lang)
output_langs.sort() # alphabetize languages
if discovered_lang not in output_langs:
output_langs.append(discovered_lang)
output_langs.sort()

# Create/update/translate [[output_langs]/messages.json]
langs_added, langs_skipped, langs_translated, langs_not_translated = [], [], [], []
# Translate messages
langs_translated = []
Comment on lines -67 to +84
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original comment is clearer, also the other array inits got deleted so script no longer works

for lang_code in output_langs:
lang_added, lang_skipped, lang_translated = False, False, False
folder = lang_code.replace('-', '_') ; translated_msgs = {}
if '-' in lang_code: # cap suffix
sep_index = folder.index('_')
folder = folder[:sep_index] + '_' + folder[sep_index+1:].upper()

# Skip English locales
if lang_code.startswith('en'):
print_trunc(f'Skipped {folder}/messages.json...')
langs_skipped.append(lang_code) ; langs_not_translated.append(lang_code) ; continue
continue # Skip English locales

# Initialize target locale folder
folder = lang_code.replace('-', '_')
folder_path = os.path.join(locales_dir, folder)
if not os.path.exists(folder_path): # if missing, create folder
os.makedirs(folder_path) ; langs_added.append(lang_code) ; lang_added = True

# Initialize target messages
msgs_path = os.path.join(folder_path, msgs_filename)

if not os.path.exists(folder_path):
os.makedirs(folder_path)

messages = {}
if os.path.exists(msgs_path):
with open(msgs_path, 'r', encoding='utf-8') as messages_file : messages = json.load(messages_file)
else : messages = {}

# Attempt translations
print_trunc(f"{ 'Adding' if not messages else 'Updating' } { folder }/messages.json...", end='')
stdout.flush()
en_keys = list(en_messages.keys())
fail_flags = ['INVALID TARGET LANGUAGE', 'TOO MANY REQUESTS', 'MYMEMORY']
for key in en_keys:
with open(msgs_path, 'r', encoding='utf-8') as messages_file:
messages = json.load(messages_file)

translated_msgs = {}
for key, value in en_messages.items():
if key in keys_to_ignore:
translated_msg = en_messages[key]['message']
translated_msgs[key] = { 'message': translated_msg }
continue
if key not in messages:
original_msg = translated_msg = en_messages[key]['message']
translated_msgs[key] = value
else:
try:
translator = Translator(provider=provider if provider else '', to_lang=lang_code)
translated_msg = translator.translate(original_msg).replace('&quot;', "'").replace('&#39;', "'")
if any(flag in translated_msg for flag in fail_flags):
translated_msg = original_msg
except Exception as e:
print_trunc(f'Translation failed for key "{key}" in {lang_code}/messages.json: {e}')
translated_msg = original_msg
translated_msgs[key] = { 'message': translated_msg }
else : translated_msgs[key] = messages[key]

# Format messages
formatted_msgs = '{\n'
for index, (key, message_data) in enumerate(translated_msgs.items()):
formatted_msg = json.dumps(message_data, ensure_ascii=False) \
.replace('{', '{ ').replace('}', ' }') # add spacing
formatted_msgs += ( f' "{key}": {formatted_msg}'
+ ( ',\n' if index < len(translated_msgs) - 1 else '\n' )) # terminate line
formatted_msgs += '}'
with open(msgs_path, 'w', encoding='utf-8') as output_file : output_file.write(formatted_msgs + '\n')

# Print file summary
if translated_msgs == messages : langs_skipped.append(lang_code) ; lang_skipped = True
elif translated_msgs != messages : langs_translated.append(lang_code) ; lang_translated = True
if not lang_translated : langs_not_translated.append(lang_code)
overwrite_print(f"{ 'Added' if lang_added else 'Skipped' if lang_skipped else 'Updated' } { folder }/messages.json")

# Print final summary
print_trunc('\nAll messages.json files updated successfully!\n')
lang_data = [langs_translated, langs_skipped, langs_added, langs_not_translated]
for data in lang_data:
if data:
list_name = next(name for name, value in globals().items() if value is data)
status = list_name.split('langs_')[-1].replace('_', ' ')
print(f'Languages {status}: {len(data)}\n') # print tally
print('[ ' + ', '.join(data) + ' ]\n') # list languages
translator = Translator(to_lang=lang_code)
translated_msg = translator.translate(value['message'])
translated_msgs[key] = {'message': translated_msg}
except Exception:
translated_msgs[key] = value

with open(msgs_path, 'w', encoding='utf-8') as output_file:
json.dump(translated_msgs, output_file, ensure_ascii=False, indent=4)

langs_translated.append(lang_code)

print_trunc("\nTranslation process completed!\n")
print(f"Languages translated: {len(langs_translated)}")
Comment on lines -70 to +119
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The script won't work anymore cuz you deleted a lot of important stuff

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will try to update the script so that the syntax is good and the important information is preserved even though the script is more "light" and efficient. I will keep you informed...

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hexakleo please go to https://github.com/adamlui/python-utils to create each change in separate PRs