-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #38 from Unitystation-fork/37-add-wikimedias-conve…
…rtion-folder- 37 Add wikimedia's convertion folder
- Loading branch information
Showing
2 changed files
with
148 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
## Extract.py | ||
import os | ||
import requests | ||
from bs4 import BeautifulSoup | ||
|
||
# Vérifier et créer le dossier Result s'il n'existe pas | ||
result_dir = "./Result" | ||
os.makedirs(result_dir, exist_ok=True) | ||
|
||
# Demander à l'utilisateur l'URL de la page cible | ||
url = input("Entrez l'URL de la page : ") | ||
|
||
# Télécharger le contenu de la page en utilisant requests | ||
response = requests.get(url) | ||
if response.status_code != 200: | ||
print("La requête a échoué. Vérifiez l'URL ou votre connexion Internet.") | ||
exit(1) | ||
|
||
# Analyser le contenu HTML de la page | ||
soup = BeautifulSoup(response.text, 'html.parser') | ||
|
||
# Extraire le titre de la page | ||
title = soup.find('div', {'id': 'contentSub'}).find('a').get('title') | ||
|
||
# Remplacer les espaces par des caractères de soulignement (_) | ||
title = title.replace(' ', '_') | ||
|
||
# Extraire le code source de la page | ||
textarea = soup.find('textarea', {'name': 'wpTextbox1'}) | ||
if textarea: | ||
wikicode = textarea.get_text() | ||
else: | ||
print("La balise textarea n'a pas été trouvée.") | ||
exit(1) | ||
|
||
# Supprimer les balises <textarea> inutiles | ||
wikicode = wikicode.replace('<textarea readonly="readonly" accesskey="," id="wpTextbox1" cols="80" rows="25" style="" class="mw-editfont-monospace" lang="en" dir="ltr" name="wpTextbox1">', '') | ||
wikicode = wikicode.replace('</textarea>', '') | ||
|
||
# Enregistrer le code source dans le fichier résultat | ||
output_filename = os.path.join(result_dir, title + ".wiki") | ||
with open(output_filename, 'w', encoding='utf-8') as output_file: | ||
output_file.write(wikicode) | ||
|
||
# Afficher un message de confirmation en jaune | ||
print("\033[93mExtraction réussie. Le code source a été enregistré dans " + output_filename + ".\033[0m") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# ecrit via Chat-GPT, conversion de wikicode vers MD par Fr_Dae pour Unionrolistes.fr / unitystation-fork | ||
# CCBYNA | ||
import re | ||
import sys | ||
import os | ||
import readline | ||
|
||
def main(): | ||
input_path = sys.argv[1] if len(sys.argv) > 1 else None | ||
output_path = sys.argv[2] if len(sys.argv) > 2 else None | ||
|
||
if input_path is None: | ||
input_path = input("\033[93mEntrez le chemin du fichier ou du dossier d'entrée : \033[0m") | ||
|
||
if not output_path: | ||
output_path = input("\033[93mEntrez le chemin du dossier de sortie : \033[0m") | ||
|
||
if os.path.isfile(input_path): | ||
convert_file(input_path, os.path.join(output_path, os.path.basename(input_path).replace(".wiki", ".md"))) | ||
elif os.path.isdir(input_path): | ||
convert_files_in_directory(input_path, output_path) | ||
else: | ||
print("Chemin d'entrée non valide.") | ||
|
||
def convert_wikicode_to_markdown(wikicode): | ||
# Convertir les en-têtes | ||
|
||
wikicode = re.sub(r'=======(.*?)=======', r'###### \1', wikicode) | ||
wikicode = re.sub(r'======(.*?)======', r'##### \1', wikicode) | ||
wikicode = re.sub(r'=====(.*?)=====', r'#### \1', wikicode) | ||
wikicode = re.sub(r'====(.*?)====', r'### \1', wikicode) | ||
wikicode = re.sub(r'===(.*?)===', r'## \1', wikicode) | ||
wikicode = re.sub(r'==(.*?)==', r'# \1', wikicode) | ||
|
||
# Convertir les listes à puces | ||
wikicode = re.sub(r'^\* ', r'* ', wikicode, flags=re.MULTILINE) | ||
# Convertir les listes numérotées | ||
# wikicode = re.sub(r'^# ', r'1. ', wikicode, flags=re.MULTILINE) | ||
|
||
# Convertir les liens | ||
wikicode = re.sub(r'\[\[([^\]]+?)\]\]', r'[\1](\1)', wikicode) | ||
# Convertir les images | ||
wikicode = re.sub(r'\[\[File:([^\]]+?)\]\]', r'![\1](\1)', wikicode) | ||
# Convertir les liens internes | ||
wikicode = re.sub(r'\[\[([^\]]+?)\]\]', r'[\1](\1)', wikicode) | ||
# Convertir les liens internes personnalisés en balises de lien Markdown | ||
wikicode = re.sub(r'\[\[([^|]+)\|([^]]+)\]\]', r'[\2](\1)', wikicode) | ||
# Convertir les liens externes | ||
wikicode = re.sub(r'\[([^\]]+?)\]', r'[\1]', wikicode) | ||
# Convertir les citations | ||
wikicode = re.sub(r'<ref(.*?)<\/ref>', r'[\1]', wikicode) | ||
|
||
# Convertir le gras | ||
wikicode = re.sub(r"'''(.*?)'''", r'**\1**', wikicode) | ||
# Convertir l'italique | ||
wikicode = re.sub(r"''(.*?)''", r'*\1*', wikicode) | ||
# Convertir le texte souligné en Markdown | ||
wikicode = re.sub(r'<u>(.*?)</u>', r'__\1__', wikicode) | ||
# Convertir le texte barré en Markdown | ||
wikicode = re.sub(r'<s>(.*?)</s>|<strike>(.*?)</strike>', r'~~\1\2~~', wikicode) | ||
|
||
# Supprimer les balises de commentaires | ||
wikicode = re.sub(r'<!--(.*?)-->', r'', wikicode) | ||
# Convertir les balises de texte préformaté en balises de code Markdown | ||
wikicode = re.sub(r'<pre>(.*?)</pre>', r'```\1```', wikicode) | ||
|
||
# # Convertir les tables (exemples simples) | ||
# wikicode = re.sub(r'{|', r'\n| ', wikicode) | ||
# wikicode = re.sub(r'|}', r' |', wikicode) | ||
# wikicode = re.sub(r'|-', r'\n|', wikicode) | ||
# wikicode = re.sub(r'!', r'|', wikicode) | ||
|
||
return wikicode | ||
|
||
def convert_file(input_file, output_file): | ||
try: | ||
with open(input_file, 'r', encoding='utf-8') as file: | ||
wikicode_text = file.read() | ||
markdown_text = convert_wikicode_to_markdown(wikicode_text) | ||
|
||
with open(output_file, 'w', encoding='utf-8') as file: | ||
file.write(markdown_text) | ||
|
||
print(f"Conversion terminée. Fichier Markdown généré avec succès : {output_file}") | ||
except FileNotFoundError: | ||
print("Le fichier d'entrée n'a pas été trouvé.") | ||
except Exception as e: | ||
print(f"Une erreur s'est produite : {str(e)}") | ||
|
||
def convert_files_in_directory(input_directory, output_directory): | ||
if not os.path.exists(output_directory): | ||
os.makedirs(output_directory) | ||
|
||
for root, _, files in os.walk(input_directory): | ||
for filename in files: | ||
if filename.endswith(".wiki"): | ||
input_file = os.path.join(root, filename) | ||
output_file = os.path.join(output_directory, filename.replace(".wiki", ".md")) | ||
convert_file(input_file, output_file) | ||
|
||
if __name__ == "__main__": | ||
main() |