-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_logs.py
141 lines (121 loc) · 4.44 KB
/
parse_logs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import os
import sys
from datetime import datetime
import pandas as pd
import numpy as np
print("=" * 80)
print("Parse log files generated by `simulation.py` extracting key information")
print("-" * 80)
print("Command: %s" % " ".join(sys.argv))
print("Run on host: %s" % os.uname().nodename)
print("Operating system: %s" % os.uname().sysname)
print("Machine: %s" % os.uname().machine)
print("Started at: %s" % datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print("=" * 80)
print("")
# Input logs
log_dir = "stdout_sim"
logs = [l for l in os.listdir(log_dir) if "sim" in l]
logs.sort()
# Output directory and file
output_dir = "summary_tables"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_fn = "%s_parsed-logs" % datetime.today().strftime('%Y-%m-%d')
output_path = os.path.join(output_dir, output_fn + ".txt")
# Goal is to generate both a .csv and a .txt
csv_dt = {
"expt_name": [],
"param_set": [],
"sim_dir": [],
"x_h_init": [],
"total_gens": [],
"t_elapsed": [],
"state": [] # running, complete, extinct, failed
}
print("Parsing...")
print(" Log directory: %s" % log_dir)
print(" No. logs: %d" % len(logs))
print(" Output path: %s" % output_path)
with open(output_path, "w") as parsed_logs:
# HEADER
parsed_logs.write("Logs Parsed On: ")
parsed_logs.write(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
parsed_logs.write("\n")
parsed_logs.write("No. of Logs: ")
parsed_logs.write(str(len(logs)))
parsed_logs.write("\n")
parsed_logs.write("-" * 80)
parsed_logs.write("\n")
# Prepare Table Columns
row_format = "{0[0]:<15}{0[1]:<20}{0[2]:<30}{0[3]:<15}{0[4]:<20}"
fields = ["Log Name", "Experiment Name", "Output Directory", "State", "Time Elapsed"]
parsed_logs.write(row_format.format(fields))
parsed_logs.write("\n")
# Parse each log
for log in logs:
f = open(os.path.join(log_dir, log), "r")
lines = f.read().splitlines() # memory intensive
# Always get experiment name
expt_name = lines[7].strip().split(": ")[1]
if lines[-1] == "Running simulation.py...":
state = "running"
param_set = np.nan
sim_dir = np.nan
x_h_init = np.nan
total_gens = np.nan
t_elapsed= np.nan
else:
state = "failed"
for line in lines:
if "Parameter File:" in line:
param_set = line.strip().split(": ")[1]
param_set = param_set.split("/")[1]
elif "Output Directory:" in line:
sim_dir = line.strip().split(": ")[1]
elif "Time elapsed:" in line:
t_elapsed = line.strip().split(": ")[1]
elif "Expected Human Prevalence:" in line:
x_h_init = line.strip().split(": ")[1]
elif "Total Generations:" in line:
total_gens = line.strip().split(": ")[1]
elif "Final Simulation State:" in line:
state = "complete"
elif "Human parasite population currently extinct!" in line:
state = "extinct"
# Store
csv_dt["expt_name"].append(expt_name)
csv_dt["param_set"].append(param_set)
csv_dt["sim_dir"].append(sim_dir)
csv_dt["x_h_init"].append(x_h_init)
csv_dt["total_gens"].append(total_gens)
csv_dt["t_elapsed"].append(t_elapsed)
csv_dt["state"].append(state)
# Write
values = [log, expt_name, sim_dir, state, t_elapsed]
parsed_logs.write(row_format.format(values))
parsed_logs.write("\n")
print("Done.")
print("")
# Munge .csv file
print("Munging .csv file...")
csv_df = pd.DataFrame(csv_dt)
# Convert time from HR:MIN:SEC to total seconds
t_seconds = []
for i, row in csv_df.iterrows():
if row["state"] in ["complete", "extinct"]:
t_hms = row["t_elapsed"]
t_seconds.append(sum([(s * int(t[:-1])) for s, t in zip([3600, 60, 1], t_hms.split(" "))]))
else:
t_seconds.append(np.nan)
csv_df["t_seconds"] = t_seconds
# Convert to float
csv_df["x_h_init"] = csv_df["x_h_init"].astype("float")
csv_df["total_gens"] = csv_df["total_gens"].astype("float")
# Write .csv file
csv_df.to_csv(os.path.join(output_dir, output_fn + ".csv"), index=False)
print("Done.")
print("")
print("-" * 80)
print("Finished at: %s" % datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print("=" * 80)