Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/metrics logging #132

Merged
merged 6 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ __pycache__/
# vscode sftp settings
.vscode/sftp.json

# vscode launch settings
.vscode/launch.json

# redis
*.rdb

Expand Down
5 changes: 0 additions & 5 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@
// Python use black as formatter
"editor.defaultFormatter": null
},
"python.formatting.provider": "black",
thomasehuang marked this conversation as resolved.
Show resolved Hide resolved
"python.linting.pylintEnabled": true,
"python.linting.mypyEnabled": true,
"python.linting.pydocstyleEnabled": true,
"python.linting.pydocstyleArgs": ["--convention=google"],
"spellright.language": ["en"],
"spellright.documentTypes": ["markdown", "latex", "plaintext", "typescript"],
"files.exclude": {
Expand Down
7 changes: 6 additions & 1 deletion docs/source/user_guide/faster_rcnn_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import lightning.pytorch as pl
import numpy as np
import tempfile
from torch.optim import SGD
from torch.optim.lr_scheduler import LinearLR, MultiStepLR

Expand Down Expand Up @@ -52,7 +53,11 @@ def get_config() -> ExperimentConfig:
######################################################
## General Config ##
######################################################
config = get_default_cfg(exp_name="faster_rcnn_r50_fpn_coco")
tmpdir = tempfile.mkdtemp()

config = get_default_cfg(
exp_name="faster_rcnn_r50_fpn_coco", work_dir=tmpdir
)

# High level hyper parameters
params = ExperimentParameters()
Expand Down
2 changes: 2 additions & 0 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ pydocstyle
pylint
pytest
pytest-notebook
jupyter
nbdime==3.2.1
types-PyYAML
types-requests
types-tabulate
Expand Down
13 changes: 10 additions & 3 deletions tests/engine/loss_module_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,23 @@ def test_forward(self) -> None:
}
),
},
]
],
exclude_attributes=["dummy_attribute"],
)
x = torch.rand(2, 3, 4, 5)
y = torch.rand(2, 3, 4, 5)
losses = loss({"input": x}, {"target": y})
total_loss = sum(losses.values())
total_loss, metrics = loss({"input": x}, {"target": y})

self.assertAlmostEqual(
total_loss.item(),
0.7 * torch.nn.MSELoss()(x, y).item()
+ 0.3 * torch.nn.L1Loss()(x, y).item(),
places=3,
)

self.assertAlmostEqual(
metrics["loss"],
0.7 * torch.nn.MSELoss()(x, y).detach().cpu().item()
+ 0.3 * torch.nn.L1Loss()(x, y).detach().cpu().item(),
places=3,
)
44 changes: 24 additions & 20 deletions tests/engine/util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,65 +47,69 @@ def test_apply_to_collection():

# test with data as namedtuple or dataclass
data_cls = Test(1, 2)
data = apply_to_collection(data_cls, int, lambda x: x * 2)
assert data == Test(2, 4)
data_cls = apply_to_collection(data_cls, int, lambda x: x * 2)
assert data_cls == Test(2, 4)

data_cls = Test(1, 2)
data = apply_to_collection(data_cls, (int, str), lambda x: x * 2)
assert data == Test(2, 4)
data_cls = apply_to_collection(data_cls, (int, str), lambda x: x * 2)
assert data_cls == Test(2, 4)

data_cls = Test(1, 2)
data = apply_to_collection(data_cls, int, lambda x: x * 2, wrong_dtype=str)
assert data == Test(2, 4)
data_cls = apply_to_collection(
data_cls, int, lambda x: x * 2, wrong_dtype=str
)
assert data_cls == Test(2, 4)

data_cls = Test(1, 2)
data = apply_to_collection(
data_cls = apply_to_collection(
data_cls,
int,
lambda x: x * 2,
wrong_dtype=(str, int),
include_none=False,
)
assert data == Test(1, 2)
assert data_cls == Test(1, 2)

data_cls = Test(1, 2)
data = apply_to_collection(
data_cls = apply_to_collection(
data_cls,
int,
lambda x: x * 2,
wrong_dtype=(str, int),
include_none=True,
)
assert data == Test(1, 2)
assert data_cls == Test(1, 2)

data_tup = namedtuple("test", "aaa bbb")(1, 2)
data = apply_to_collection(data_tup, int, lambda x: x * 2)
assert data == namedtuple("test", "aaa bbb")(2, 4)
data_tup = apply_to_collection(data_tup, int, lambda x: x * 2)
assert data_tup == namedtuple("test", "aaa bbb")(2, 4)

data_tup = namedtuple("test", "aaa bbb")(1, 2)
data = apply_to_collection(data_tup, (int, str), lambda x: x * 2)
assert data == namedtuple("test", "aaa bbb")(2, 4)
data_tup = apply_to_collection(data_tup, (int, str), lambda x: x * 2)
assert data_tup == namedtuple("test", "aaa bbb")(2, 4)

data_tup = namedtuple("test", "aaa bbb")(1, 2)
data = apply_to_collection(data_tup, int, lambda x: x * 2, wrong_dtype=str)
assert data == namedtuple("test", "aaa bbb")(2, 4)
data_tup = apply_to_collection(
data_tup, int, lambda x: x * 2, wrong_dtype=str
)
assert data_tup == namedtuple("test", "aaa bbb")(2, 4)

data_tup = namedtuple("test", "aaa bbb")(1, 2)
data = apply_to_collection(
data_tup = apply_to_collection(
data_tup,
int,
lambda x: x * 2,
wrong_dtype=(str, int),
include_none=False,
)
assert data == namedtuple("test", "aaa bbb")(1, 2)
assert data_tup == namedtuple("test", "aaa bbb")(1, 2)

data_tup = namedtuple("test", "aaa bbb")(1, 2)
data = apply_to_collection(
data_tup = apply_to_collection(
data_tup,
int,
lambda x: x * 2,
wrong_dtype=(str, int),
include_none=True,
)
assert data == namedtuple("test", "aaa bbb")(1, 2)
assert data_tup == namedtuple("test", "aaa bbb")(1, 2)
6 changes: 2 additions & 4 deletions tests/model/detect/mask_rcnn_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,13 @@ def test_train(self):
outputs = mask_rcnn(inputs, images_hw, gt_boxes, gt_class_ids)
assert isinstance(outputs, MaskRCNNOut)

mask_losses = mask_rcnn_loss(outputs, data)
total_loss, metrics = mask_rcnn_loss(outputs, data)

total_loss = sum(mask_losses.values())
total_loss.backward()
optimizer.step()

# print statistics
losses = {"loss": total_loss, **mask_losses}
for k, loss in losses.items():
for k, loss in metrics.items():
if k in running_losses:
running_losses[k] += loss
else:
Expand Down
2 changes: 1 addition & 1 deletion tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def generate_instance_masks(
torch.random.set_rng_state(torch.manual_seed(0).get_state())
rand_mask = torch.randint(0, num_masks, (height, width))
mask_tensor = torch.stack(
[(rand_mask == i).type(torch.uint8) for i in range(num_masks)]
[(torch.eq(rand_mask, i)).type(torch.uint8) for i in range(num_masks)]
)
torch.random.set_rng_state(state)
return (
Expand Down
2 changes: 1 addition & 1 deletion tests/vis4d-test-data
Submodule vis4d-test-data updated 37 files
+7 −7 config_test/bdd100k/faster_rcnn/faster_rcnn_r50_1x_bdd100k.yaml
+7 −7 config_test/bdd100k/faster_rcnn/faster_rcnn_r50_3x_bdd100k.yaml
+7 −7 config_test/bdd100k/mask_rcnn/mask_rcnn_r50_1x_bdd100k.yaml
+7 −7 config_test/bdd100k/mask_rcnn/mask_rcnn_r50_3x_bdd100k.yaml
+7 −7 config_test/bdd100k/mask_rcnn/mask_rcnn_r50_5x_bdd100k.yaml
+6 −6 config_test/bdd100k/qdtrack/qdtrack_frcnn_r50_fpn_1x_bdd100k.yaml
+9 −7 config_test/bdd100k/qdtrack/qdtrack_yolox_x_50e_bdd100k.yaml
+7 −7 config_test/bdd100k/semantic_fpn/semantic_fpn_r101_80k_bdd100k.yaml
+7 −7 config_test/bdd100k/semantic_fpn/semantic_fpn_r50_40k_bdd100k.yaml
+7 −7 config_test/bdd100k/semantic_fpn/semantic_fpn_r50_80k_bdd100k.yaml
+7 −7 config_test/bevformer/bevformer_base.yaml
+7 −7 config_test/bevformer/bevformer_tiny.yaml
+7 −7 config_test/bevformer/bevformer_vis.yaml
+8 −8 config_test/cc_3dt/cc_3dt_frcnn_r101_fpn_kf3d_24e_nusc.yaml
+7 −7 config_test/cc_3dt/cc_3dt_frcnn_r101_fpn_pure_det_nusc.yaml
+8 −8 config_test/cc_3dt/cc_3dt_frcnn_r101_fpn_velo_lstm_24e_nusc.yaml
+8 −8 config_test/cc_3dt/cc_3dt_frcnn_r50_fpn_kf3d_12e_nusc.yaml
+8 −8 config_test/cc_3dt/cc_3dt_nusc_vis.yaml
+6 −6 config_test/cc_3dt/velo_lstm_frcnn_r101_fpn_100e_nusc.yaml
+7 −7 config_test/faster_rcnn/faster_rcnn_coco.yaml
+6 −6 config_test/fcn_resnet/fcn_resnet_coco.yaml
+7 −7 config_test/mask_rcnn/mask_rcnn_coco.yaml
+7 −7 config_test/retinanet/retinanet_coco.yaml
+7 −7 config_test/shift/faster_rcnn/faster_rcnn_r50_12e_shift.yaml
+7 −7 config_test/shift/faster_rcnn/faster_rcnn_r50_36e_shift.yaml
+7 −7 config_test/shift/faster_rcnn/faster_rcnn_r50_6e_shift_all_domains.yaml
+7 −7 config_test/shift/mask_rcnn/mask_rcnn_r50_12e_shift.yaml
+7 −7 config_test/shift/mask_rcnn/mask_rcnn_r50_36e_shift.yaml
+7 −7 config_test/shift/mask_rcnn/mask_rcnn_r50_6e_shift_all_domains.yaml
+7 −7 config_test/shift/semantic_fpn/semantic_fpn_r50_160k_shift.yaml
+7 −7 config_test/shift/semantic_fpn/semantic_fpn_r50_160k_shift_all_domains.yaml
+7 −7 config_test/shift/semantic_fpn/semantic_fpn_r50_40k_shift.yaml
+7 −7 config_test/shift/semantic_fpn/semantic_fpn_r50_40k_shift_all_domains.yaml
+6 −6 config_test/vit/vit_small_imagenet.yaml
+6 −6 config_test/vit/vit_tiny_imagenet.yaml
+8 −8 config_test/yolox/yolox_s_300e_coco.yaml
+8 −8 config_test/yolox/yolox_tiny_300e_coco.yaml
2 changes: 1 addition & 1 deletion vis4d/common/ckpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def load(module: nn.Module, prefix: str = "") -> None:
# recursively check parallel module in case that the model has a
# complicated structure, e.g., nn.Module(nn.Module(DDP))
if is_module_wrapper(module):
module = module.module # type: ignore
module = module.module
local_metadata = (
{} if metadata is None else metadata.get(prefix[:-1], {})
)
Expand Down
2 changes: 0 additions & 2 deletions vis4d/common/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ def is_torch_tf32_available() -> bool: # pragma: no cover
return not (
not torch.cuda.is_available()
or torch.version.cuda is None
or torch.cuda.get_device_properties(torch.cuda.current_device()).major
< 8
or int(torch.version.cuda.split(".", maxsplit=1)[0]) < 11
or version.parse(torch.__version__) < version.parse("1.7")
)
4 changes: 2 additions & 2 deletions vis4d/config/common/models/faster_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get_default_rpn_box_codec_cfg(
target_stds: tuple[float, ...] = (1.0, 1.0, 1.0, 1.0),
) -> tuple[ConfigDict, ConfigDict]:
"""Get default config for rpn box encoder and decoder."""
return tuple( # type: ignore
return tuple(
class_config(x, target_means=target_means, target_stds=target_stds)
for x in (DeltaXYWHBBoxEncoder, DeltaXYWHBBoxDecoder)
)
Expand All @@ -49,7 +49,7 @@ def get_default_rcnn_box_codec_cfg(
target_stds: tuple[float, ...] = (0.1, 0.1, 0.2, 0.2),
) -> tuple[ConfigDict, ConfigDict]:
"""Get default config for rcnn box encoder and decoder."""
return tuple( # type: ignore
return tuple(
class_config(x, target_means=target_means, target_stds=target_stds)
for x in (DeltaXYWHBBoxEncoder, DeltaXYWHBBoxDecoder)
)
Expand Down
4 changes: 3 additions & 1 deletion vis4d/data/transforms/pad.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ def __call__(self, images: list[NDArrayF32]) -> list[NDArrayF32]:
for i, (image, h, w) in enumerate(zip(images, heights, widths)):
pad_param = (0, max_hw[1] - w, 0, max_hw[0] - h)
image_ = torch.from_numpy(image).permute(0, 3, 1, 2)
image_ = F.pad(image_, pad_param, self.mode, self.value)
image_ = F.pad( # pylint: disable=not-callable
image_, pad_param, self.mode, self.value
)
images[i] = image_.permute(0, 2, 3, 1).numpy()
return images

Expand Down
2 changes: 1 addition & 1 deletion vis4d/data/transforms/point_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def __call__(
selected_idxs = self._idx_sampler([coordinates])[0]
if selected_idxs.sum() >= self.min_pts:
sampled_idxs.append(selected_idxs)
return [np.stack(sampled_idxs)] * len(coordinates_list)
return [np.stack(sampled_idxs)] * len(coordinates_list) # type: ignore


@Transform([K.points3d, "transforms.sampling_idxs"], K.points3d)
Expand Down
44 changes: 39 additions & 5 deletions vis4d/engine/loss_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from typing import TypedDict, Union

import torch
from torch import Tensor, nn
from typing_extensions import NotRequired

Expand Down Expand Up @@ -72,14 +73,22 @@ class LossModule(nn.Module):
weighted by the corresponding weight and returned as a dictionary.
"""

def __init__(self, losses: list[LossDefinition] | LossDefinition) -> None:
def __init__(
self,
losses: list[LossDefinition] | LossDefinition,
exclude_attributes: list[str] | None = None,
) -> None:
"""Creates an instance of the class.

Each loss will be called with arguments matching the kwargs of the loss
function through its connector. By default, the weight is set to 1.0.

Args:
losses (list[LossDefinition]): List of loss definitions.
exclude_attributes (list[str] | None): List of attributes returned
by the losses that should be excluded from the total loss
computation. Use it to log metrics that should not be
optimised. Defaults to None.

Example:
>>> loss = LossModule(
Expand Down Expand Up @@ -127,7 +136,11 @@ def __init__(self, losses: list[LossDefinition] | LossDefinition) -> None:

self.losses.append(loss)

def forward(self, output: DictData, batch: DictData) -> LossesType:
self.exclude_attributes = exclude_attributes

def forward(
self, output: DictData, batch: DictData
) -> tuple[Tensor, dict[str, float]]:
"""Forward of loss module.

This function will call all loss functions and return a dictionary
Expand All @@ -142,7 +155,8 @@ def forward(self, output: DictData, batch: DictData) -> LossesType:
batch (DictData): Batch data.

Returns:
LossesType: The loss values.
total_loss: The total loss value.
metrics: The metrics disctionary.
"""
loss_dict: LossesType = {}

Expand Down Expand Up @@ -182,6 +196,26 @@ def forward(self, output: DictData, batch: DictData) -> LossesType:
while key in loss_dict:
key = "__" + key

loss_dict[key] = loss_weight * value
loss_dict[key] = torch.mul(loss_weight, value)

# Convert loss_dict to total loss and metrics dictionary
metrics: dict[str, float] = {}
if isinstance(loss_dict, Tensor):
total_loss = loss_dict
elif isinstance(loss_dict, dict):
keep_loss_dict: LossesType = {}
for k, v in loss_dict.items():
metrics[k] = v.detach().cpu().item()
if (
self.exclude_attributes is None
or k not in self.exclude_attributes
):
keep_loss_dict[k] = v
total_loss = sum(keep_loss_dict.values()) # type: ignore
else:
raise TypeError(
"Loss function must return a Tensor or a dict of Tensor"
)
metrics["loss"] = total_loss.detach().cpu().item()

return loss_dict
return total_loss, metrics
18 changes: 2 additions & 16 deletions vis4d/engine/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from __future__ import annotations

import torch
from torch import Tensor, nn
from torch import nn
from torch.optim.optimizer import Optimizer
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
Expand Down Expand Up @@ -236,21 +236,7 @@ def fit(
# Forward + backward + optimize
output = model(**self.train_data_connector(data))

losses = loss_module(output, data)

metrics: dict[str, float] = {}
if isinstance(losses, Tensor):
total_loss = losses
elif isinstance(losses, dict):
total_loss = sum(losses.values()) # type: ignore
for k, v in losses.items():
metrics[k] = v.detach().cpu().item()
else:
raise TypeError(
"Loss function must return a Tensor or a dict of "
+ "Tensor"
)
metrics["loss"] = total_loss.detach().cpu().item()
total_loss, metrics = loss_module(output, data)

total_loss.backward()

Expand Down
4 changes: 2 additions & 2 deletions vis4d/model/adapter/ema.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def update(self, steps: int) -> None: # pylint: disable=unused-argument
"""Update the internal EMA model."""
self._update(
self.model,
update_fn=lambda e, m: self.decay * e + (1.0 - self.decay) * m,
update_fn=lambda e, m: self.decay * e + (1.0 - self.decay) * m, # type: ignore # pylint: disable=line-too-long
)

def set(self, model: nn.Module) -> None:
Expand Down Expand Up @@ -113,5 +113,5 @@ def update(self, steps: int) -> None:
)
self._update(
self.model,
update_fn=lambda e, m: decay * e + (1.0 - decay) * m,
update_fn=lambda e, m: decay * e + (1.0 - decay) * m, # type: ignore # pylint: disable=line-too-long
)
2 changes: 1 addition & 1 deletion vis4d/model/detect/mask_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def forward_train(
assert outputs.sampled_proposals is not None
assert outputs.sampled_targets is not None
pos_proposals = apply_mask(
[label == 1 for label in outputs.sampled_targets.labels],
[torch.eq(label, 1) for label in outputs.sampled_targets.labels],
outputs.sampled_proposals.boxes,
)[0]
mask_outs = self.mask_head(features, pos_proposals)
Expand Down
Loading
Loading