forked from shwang54/PAC-Bayes-In-Medical-Imaging
-
Notifications
You must be signed in to change notification settings - Fork 0
/
make_split.py
92 lines (59 loc) · 2.07 KB
/
make_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import random
from pathlib import Path
import os
from PIL import Image
TASK1_IMG_DIR = 'ISIC2018/ISIC2018_Task1-2_Training_Input'
TASK3_IMG_DIR = 'ISIC2018/ISIC2018_Task3_Training_Input'
def isimage(fname):
try:
_ = Image.open(fname)
return True
except IOError:
return False
raise NotImplementedError('Unhandled case encountered.')
def write(paths, fname):
with open(fname, 'w') as out:
for p in paths:
out.write(f'{p}\n')
if __name__ == '__main__':
random.seed(0)
tasks = [(TASK1_IMG_DIR, 'task1'),
(TASK3_IMG_DIR, 'task3')]
for task_dir, task_name in tasks:
path = os.path.join('path', task_name)
Path(path).mkdir(exist_ok=True, parents=True)
task_files = [fname for fname in os.listdir(task_dir)
if isimage(os.path.join(task_dir,fname))]
train = []
final_holdout = []
for fname in task_files:
if random.random() <= 0.9:
train.append(fname)
else:
final_holdout.append(fname)
write(final_holdout,
os.path.join(path, 'final_holdout.txt'))
write(train,
os.path.join(path, 'pac_bayes_full_train.txt'))
hoeffding_holdout = []
hoeffding_train = []
for fname in train:
if random.random() <= 0.9:
hoeffding_train.append(fname)
else:
hoeffding_holdout.append(fname)
write(hoeffding_holdout,
os.path.join(path, 'hoeffding_holdout.txt'))
write(hoeffding_train,
os.path.join(path, 'hoeffding_train.txt'))
prefix = []
bound = []
for fname in train:
if random.random() <= 0.5:
bound.append(fname)
else:
prefix.append(fname)
write(prefix,
os.path.join(path, 'pac_bayes_prefix.txt'))
write(bound,
os.path.join(path, 'pac_bayes_prefix_bound.txt'))