-
Notifications
You must be signed in to change notification settings - Fork 0
/
clean-train-MSE.py
81 lines (62 loc) · 2.88 KB
/
clean-train-MSE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import random
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms as T
from tqdm.auto import tqdm
from attacker import L2PGD, LinfPGD
from dataset import Cifar10, Cifar100
from model import resnet18_small as resnet18_small # wideresnet34 as resnet18_small
from runner import LinfRunner as DistRunner
from utils import get_device_id, Quick_MSELoss
from advertorch.attacks import LinfPGDAttack
def run(lr, epochs, batch_size):
torch.distributed.init_process_group(
backend='nccl',
init_method='env://'
)
device_id = get_device_id()
torch.cuda.set_device(device_id)
device = f'cuda:{device_id}'
train_transforms = T.Compose([
T.RandomCrop(32, padding=4),
T.RandomHorizontalFlip(),
T.ToTensor(),
])
test_transforms = T.Compose([
T.Resize((32, 32)),
T.ToTensor(),
])
train_dataset = Cifar10(os.environ['DATAROOT'], transform=train_transforms, train=True)
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=4, pin_memory=False)
test_dataset = Cifar10(os.environ['DATAROOT'], transform=test_transforms, train=False)
test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset)
test_loader = DataLoader(test_dataset, batch_size=batch_size, sampler=test_sampler, num_workers=4, pin_memory=False)
model = resnet18_small(n_class=train_dataset.class_num).to(device)
model = nn.parallel.DistributedDataParallel(model, device_ids=[device_id], output_device=device_id)
# model = nn.parallel.DataParallel(model, device_ids=[device_id], output_device=device_id)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=2e-4)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250, 350], gamma=0.1)
# attacker = LinfPGD(model, epsilon=8/255, step=2/255, iterations=10, random_start=True)
attacker = LinfPGDAttack(
model, loss_fn=nn.CrossEntropyLoss(reduction="mean"), eps=8/255, eps_iter=2/255, nb_iter=10,
rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False,
)
# criterion = nn.CrossEntropyLoss()
criterion = Quick_MSELoss(n_class=train_dataset.class_num)
runner = DistRunner(epochs, model, train_loader, test_loader, criterion, optimizer, scheduler, attacker, device)
runner.train(adv=False)
if torch.distributed.get_rank() == 0:
torch.save(model.cpu(), './checkpoint/clean-MSE-cifar10.pth')
print('Save model.')
if __name__ == '__main__':
lr = 4e-1
epochs = 400
batch_size = 64 # 128
manualSeed = 517 # 2077
random.seed(manualSeed)
torch.manual_seed(manualSeed)
os.environ['DATAROOT'] = '~/Datasets/cifar10'
run(lr, epochs, batch_size)