forked from Morail/wiki-network
-
Notifications
You must be signed in to change notification settings - Fork 10
/
pywc_revision_merge.py
executable file
·40 lines (36 loc) · 1.21 KB
/
pywc_revision_merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import sys
import csv
from collections import Counter
def main():
output_file = sys.argv[2]
input_dir = sys.argv[1]
output_data = {}
fieldnames = None
for filename in os.listdir(input_dir):
if not filename.endswith(".csv"):
continue
current_file = csv.DictReader(open(os.path.join(input_dir, filename)),
delimiter="\t")
fieldnames = current_file.fieldnames
for line in current_file:
date = line["date"]
ns = line["ns"]
if not ns in output_data:
output_data[ns] = {}
if not date in output_data[ns]:
output_data[ns][date] = Counter()
for key in line:
if line[key].isdigit():
output_data[ns][date][key] += int(line[key])
out = csv.DictWriter(open(output_file, "w"), fieldnames=fieldnames,
delimiter="\t")
out.writeheader()
for ns in sorted(output_data):
for date in sorted(output_data[ns]):
row = output_data[ns][date]
row["date"] = date
row["ns"] = ns
out.writerow(row)
if __name__ == "__main__":
main()