-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_extraction.py
45 lines (30 loc) · 1.3 KB
/
run_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from functions import *
print("Starting...\n\n")
# Band 1
volume = 1
input_file = "bomber_input/BB1_CLEAN_H_C_V2.txt"
temp_file = "bomber_output/temp.xml"
output_file = "bomber_output/bomber_output_part1.xml"
print("Configuration:\nVolume:", volume, "\nInput file:", input_file, "\nOutput file:", output_file)
# Öffnen und Speichern des Textes in einer Variable
bomber_text = file_open(input_file)
# Extrahieren und Speichern der Daten:
bomber_dic = create_data_dic(bomber_text, volume)
# Export der Daten als XML-Datei
insert_cities_xml("bomber_xml", bomber_dic, temp_file, input_file, volume)
# Unescape der HTML Entitäten der erstellten XML-Datei
unescape(temp_file, output_file)
print("\nSuccess!\n\n")
# Band 2
volume = 2
input_file = "bomber_input/BB2_CLEAN_H_C_V2.txt"
output_file = "bomber_output/bomber_output_part2.xml"
print("Configuration:\nVolume:", volume, "\nInput file:", input_file, "\nOutput file:", output_file)
# Öffnen und Speichern des Textes in einer Variable
bomber_text = file_open(input_file)
# Extrahieren und Speichern der Daten:
bomber_dic = create_data_dic(bomber_text, volume)
# Export der Daten als XML-Datei
insert_cities_xml("bomber_xml", bomber_dic, temp_file, input_file, volume)
unescape(temp_file, output_file)
print("\n\nFinished!")