-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
74 lines (56 loc) · 1.87 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import argparse
import warnings
import wandb
from pytorch_lightning.loggers import WandbLogger
import pytorch_lightning as pl
import yaml
from pytorch_lightning.callbacks import ModelCheckpoint
import torch
from model import MyModel
from data import MaskDataset
def train():
# required to have access to `wandb.config`
wandb.init()
# set up W&B logger
wandb_logger = WandbLogger()
print("CONFIG", wandb.config)
model = MyModel(wandb.config)
mask_dataset = MaskDataset(wandb.config, model.transform)
# Init ModelCheckpoint callback, monitoring 'valid_dataset_iou'
checkpoint_callback = ModelCheckpoint(
dirpath="./best_models_ckpt/",
filename=f"{wandb.run.name}-"
+ "{epoch:02d}-{step:03d}-{valid_dataset_iou:.2f}",
monitor="valid_dataset_iou",
mode="max",
)
trainer = pl.Trainer(
logger=wandb_logger,
gpus=-1
if torch.cuda.is_available()
else None, # gpus=-1 - use all gpus
max_epochs=10,
callbacks=[checkpoint_callback],
)
trainer.fit(model, mask_dataset)
if __name__ == "__main__":
warnings.filterwarnings("ignore")
parser = argparse.ArgumentParser(description="Mask segmentation")
parser.add_argument("--data_path", type=str)
parser.add_argument(
"--cpu_number", default=os.cpu_count(), type=int, help="number of cpus"
)
args = parser.parse_args()
wandb.login()
# Create a dict bsed on the yaml config file
with open("fpn_resnet34_config.yaml") as f:
sweep_config = yaml.load(f, Loader=yaml.FullLoader)
sweep_config["parameters"].update(
{
"data_path": {"value": args.data_path},
"cpu_number": {"value": args.cpu_number},
}
)
sweep_id = wandb.sweep(sweep_config, project="mask_segmentation")
wandb.agent(sweep_id, train, count=3)