-
Notifications
You must be signed in to change notification settings - Fork 55
/
generate_metadata_pkl.py
executable file
·99 lines (80 loc) · 2.87 KB
/
generate_metadata_pkl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import argparse
import glob
import re
import cPickle as pickle
from dicom.sequence import Sequence
from log import print_to_file
from paths import LOGS_PATH, TRAIN_DATA_PATH, TEST_DATA_PATH
def read_slice(path):
return pickle.load(open(path))['data']
def convert_to_number(value):
value = str(value)
try:
if "." in value:
return float(value)
else:
return int(value)
except:
pass
return value
def clean_metadata(metadatadict):
# Do cleaning
keys = sorted(list(metadatadict.keys()))
for key in keys:
value = metadatadict[key]
if key == 'PatientAge':
metadatadict[key] = int(value[:-1])
if key == 'PatientSex':
metadatadict[key] = 1 if value == 'F' else -1
else:
if isinstance(value, Sequence):
#convert to list
value = [i for i in value]
if isinstance(value, (list,)):
metadatadict[key] = [convert_to_number(i) for i in value]
else:
metadatadict[key] = convert_to_number(value)
return metadatadict
def read_metadata(path):
d = pickle.load(open(path))['metadata'][0]
metadata = clean_metadata(d)
return metadata
def get_patient_data(patient_data_path):
patient_data = []
spaths = sorted(glob.glob(patient_data_path + r'/*.pkl'),
key=lambda x: int(re.search(r'/*_(\d+)\.pkl$', x).group(1)))
pid = re.search(r'/(\d+)/study$', patient_data_path).group(1)
for s in spaths:
slice_id = re.search(r'/(((4ch)|(2ch)|(sax))_\d+\.pkl)$', s).group(1)
metadata = read_metadata(s)
patient_data.append({'metadata': metadata,
'slice_id': slice_id})
print slice_id
return patient_data, pid
def get_metadata(data_path):
patient_paths = sorted(glob.glob(data_path + '*/study'))
metadata_dict = {}
for p in patient_paths:
patient_data, pid = get_patient_data(p)
print "patient", pid
metadata_dict[pid] = dict()
for pd in patient_data:
metadata_dict[pid][pd['slice_id']] = pd['metadata']
filename = data_path.split('/')[-2] + '_metadata.pkl'
with open(filename, 'w') as f:
pickle.dump(metadata_dict, f)
print 'saved to ', filename
return metadata_dict
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
required = parser.add_argument_group('required arguments')
#required.add_argument('-c', '--config',
# help='configuration to run',
# required=True)
args = parser.parse_args()
data_paths = [TRAIN_DATA_PATH, TEST_DATA_PATH]
log_path = LOGS_PATH + "generate_metadata.log"
with print_to_file(log_path):
for d in data_paths:
get_metadata(d)
print "log saved to '%s'" % log_path