-
Notifications
You must be signed in to change notification settings - Fork 604
/
dataset.py
111 lines (82 loc) · 3.9 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import argparse
import random
import shutil
from shutil import copyfile
from misc import printProgressBar
def rm_mkdir(dir_path):
if os.path.exists(dir_path):
shutil.rmtree(dir_path)
print('Remove path - %s'%dir_path)
os.makedirs(dir_path)
print('Create path - %s'%dir_path)
def main(config):
rm_mkdir(config.train_path)
rm_mkdir(config.train_GT_path)
rm_mkdir(config.valid_path)
rm_mkdir(config.valid_GT_path)
rm_mkdir(config.test_path)
rm_mkdir(config.test_GT_path)
filenames = os.listdir(config.origin_data_path)
data_list = []
GT_list = []
for filename in filenames:
ext = os.path.splitext(filename)[-1]
if ext =='.jpg':
filename = filename.split('_')[-1][:-len('.jpg')]
data_list.append('ISIC_'+filename+'.jpg')
GT_list.append('ISIC_'+filename+'_segmentation.png')
num_total = len(data_list)
num_train = int((config.train_ratio/(config.train_ratio+config.valid_ratio+config.test_ratio))*num_total)
num_valid = int((config.valid_ratio/(config.train_ratio+config.valid_ratio+config.test_ratio))*num_total)
num_test = num_total - num_train - num_valid
print('\nNum of train set : ',num_train)
print('\nNum of valid set : ',num_valid)
print('\nNum of test set : ',num_test)
Arange = list(range(num_total))
random.shuffle(Arange)
for i in range(num_train):
idx = Arange.pop()
src = os.path.join(config.origin_data_path, data_list[idx])
dst = os.path.join(config.train_path,data_list[idx])
copyfile(src, dst)
src = os.path.join(config.origin_GT_path, GT_list[idx])
dst = os.path.join(config.train_GT_path, GT_list[idx])
copyfile(src, dst)
printProgressBar(i + 1, num_train, prefix = 'Producing train set:', suffix = 'Complete', length = 50)
for i in range(num_valid):
idx = Arange.pop()
src = os.path.join(config.origin_data_path, data_list[idx])
dst = os.path.join(config.valid_path,data_list[idx])
copyfile(src, dst)
src = os.path.join(config.origin_GT_path, GT_list[idx])
dst = os.path.join(config.valid_GT_path, GT_list[idx])
copyfile(src, dst)
printProgressBar(i + 1, num_valid, prefix = 'Producing valid set:', suffix = 'Complete', length = 50)
for i in range(num_test):
idx = Arange.pop()
src = os.path.join(config.origin_data_path, data_list[idx])
dst = os.path.join(config.test_path,data_list[idx])
copyfile(src, dst)
src = os.path.join(config.origin_GT_path, GT_list[idx])
dst = os.path.join(config.test_GT_path, GT_list[idx])
copyfile(src, dst)
printProgressBar(i + 1, num_test, prefix = 'Producing test set:', suffix = 'Complete', length = 50)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# model hyper-parameters
parser.add_argument('--train_ratio', type=float, default=0.6)
parser.add_argument('--valid_ratio', type=float, default=0.2)
parser.add_argument('--test_ratio', type=float, default=0.2)
# data path
parser.add_argument('--origin_data_path', type=str, default='../ISIC/dataset/ISIC2018_Task1-2_Training_Input')
parser.add_argument('--origin_GT_path', type=str, default='../ISIC/dataset/ISIC2018_Task1_Training_GroundTruth')
parser.add_argument('--train_path', type=str, default='./dataset/train/')
parser.add_argument('--train_GT_path', type=str, default='./dataset/train_GT/')
parser.add_argument('--valid_path', type=str, default='./dataset/valid/')
parser.add_argument('--valid_GT_path', type=str, default='./dataset/valid_GT/')
parser.add_argument('--test_path', type=str, default='./dataset/test/')
parser.add_argument('--test_GT_path', type=str, default='./dataset/test_GT/')
config = parser.parse_args()
print(config)
main(config)