From 36236f4e7a1bdefff378051b27bcffb45c888d78 Mon Sep 17 00:00:00 2001 From: Lea Vauchier Date: Mon, 11 Mar 2024 18:38:03 +0100 Subject: [PATCH] Fix edgecase with 1 input value that is not selected in subsample_data --- CHANGELOG.md | 1 + myria3d/pctl/transforms/transforms.py | 17 ++- tests/myria3d/models/test_model.py | 29 ++++- .../pctl/transforms/test_transforms.py | 112 +++++++++++++++++- 4 files changed, 150 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f278fb4a..2c3cfb6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # CHANGELOG ### 3.8.2 - fix: type error in edge case when dropping points in transforms +- fix: points not dropped case when dropping points in transforms ### 3.8.1 - fix: propagate input las format to output las (in particular epsg which comes either from input or config) diff --git a/myria3d/pctl/transforms/transforms.py b/myria3d/pctl/transforms/transforms.py index ddc362d2..da314cad 100755 --- a/myria3d/pctl/transforms/transforms.py +++ b/myria3d/pctl/transforms/transforms.py @@ -27,16 +27,19 @@ def __call__(self, data: Data): return data -def subsample_data(data, num_nodes, choice): +def subsample_data(data, num_nodes, choice: torch.Tensor): # TODO: get num_nodes from data.num_nodes instead to simplify signature + out_nodes = torch.sum(choice) if choice.dtype == torch.bool else choice.size(0) for key, item in data: if key == "num_nodes": - data.num_nodes = choice.size(0) + data.num_nodes = out_nodes + elif key in ["copies", "idx_in_original_cloud"]: + # Do not subsample copies of the original point cloud or indices of the original points + # contained in the patch + continue elif bool(re.search("edge", key)): continue - elif torch.is_tensor(item) and item.size(0) == num_nodes and item.size(0) != 1: - data[key] = item[choice] - elif isinstance(item, np.ndarray) and item.shape[0] == num_nodes and item.shape[0] != 1: + elif torch.is_tensor(item) and item.size(0) == num_nodes: data[key] = item[choice] return data @@ -237,5 +240,9 @@ def __call__(self, data): if points_to_drop.sum() > 0: points_to_keep = torch.logical_not(points_to_drop) data = subsample_data(data, num_nodes=data.num_nodes, choice=points_to_keep) + # Here we also subsample these idx since we do not need to interpolate these points back + # It supposes that DropPointsByClass is run before copying the original point cloud + if "idx_in_original_cloud" in data: + data.idx_in_original_cloud = data.idx_in_original_cloud[points_to_keep.numpy()] return data diff --git a/tests/myria3d/models/test_model.py b/tests/myria3d/models/test_model.py index 42f94c10..f4487c4a 100644 --- a/tests/myria3d/models/test_model.py +++ b/tests/myria3d/models/test_model.py @@ -1,8 +1,9 @@ import hydra -from pytorch_lightning import LightningDataModule +from pytorch_lightning import LightningDataModule, LightningModule from tests.conftest import make_default_hydra_cfg from myria3d.models.model import Model +from myria3d.utils import utils # noqa def test_model_get_batch_tensor_by_enumeration(): @@ -11,7 +12,7 @@ def test_model_get_batch_tensor_by_enumeration(): "predict.src_las=tests/data/toy_dataset_src/862000_6652000.classified_toy_dataset.100mx100m.las", "datamodule.epsg=2154", "work_dir=./../../..", - "datamodule.subtile_width=1", + "datamodule.subtile_width=1", # Extreme case with very few points per subtile "datamodule.hdf5_file_path=null", ] ) @@ -26,3 +27,27 @@ def test_model_get_batch_tensor_by_enumeration(): for batch in datamodule.predict_dataloader(): # Check that no error is raised ("TypeError: object of type 'numpy.int64' has no len()") _ = model._get_batch_tensor_by_enumeration(batch.idx_in_original_cloud) + + +def test_model_forward(): + config = make_default_hydra_cfg( + overrides=[ + "predict.src_las=tests/data/toy_dataset_src/862000_6652000.classified_toy_dataset.100mx100m.las", + "datamodule.epsg=2154", + "work_dir=./../../..", + "datamodule.subtile_width=1", # Extreme case with very few points per subtile + "datamodule.hdf5_file_path=null", + ] + ) + + datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule) + datamodule._set_predict_data(config.predict.src_las) + + model: LightningModule = hydra.utils.instantiate(config.model) + device = utils.define_device_from_config_param(config.predict.gpus) + model.to(device) + model.eval() + print(model.model) + for batch in datamodule.predict_dataloader(): + # Check that no error is raised + targets, logits = model.forward(batch) diff --git a/tests/myria3d/pctl/transforms/test_transforms.py b/tests/myria3d/pctl/transforms/test_transforms.py index bb0b09ec..3a01050e 100644 --- a/tests/myria3d/pctl/transforms/test_transforms.py +++ b/tests/myria3d/pctl/transforms/test_transforms.py @@ -3,7 +3,99 @@ import torch import torch_geometric -from myria3d.pctl.transforms.transforms import DropPointsByClass, TargetTransform +from myria3d.pctl.transforms.transforms import ( + DropPointsByClass, + MinimumNumNodes, + TargetTransform, + subsample_data, +) + + +@pytest.mark.parametrize( + "x,idx,choice,nb_out_nodes", + [ + # Standard use case with choice contiaining indices + ( + torch.Tensor([10, 11, 12, 13, 14]), + np.array([20, 21, 22, 23, 24]), + torch.IntTensor([0, 1, 4]), + 3, + ), + # Edge case with choice contiaining indices: select no point + ( + torch.Tensor([10, 11, 12, 13, 14]), + np.array([20, 21, 22, 23, 24]), + torch.IntTensor([]), + 0, + ), + # Edge case with choice contiaining indices: select one point + ( + torch.Tensor([10, 11, 12, 13, 14]), + np.array([20, 21, 22, 23, 24]), + torch.IntTensor([1]), + 1, + ), + # Edge case with choice contiaining indices: input array with one point + ( + torch.Tensor([10]), + np.array([20]), + torch.IntTensor([0]), + 1, + ), + # Edge case with choice contiaining indices: input array with one point + ( + torch.Tensor([10]), + np.array([20]), + torch.IntTensor([]), + 0, + ), + # Standard use case with choice as boolean array + ( + torch.Tensor([10, 11, 12, 13, 14]), + np.array([20, 21, 22, 23, 24]), + torch.BoolTensor([True, True, False, True, False]), + 3, + ), + # Edge case with choice as boolean array: select no point + ( + torch.Tensor([10, 11, 12, 13, 14]), + np.array([20, 21, 22, 23, 24]), + torch.BoolTensor([False, False, False, False, False]), + 0, + ), + # Edge case with choice as boolean array: select one point + ( + torch.Tensor([10, 11, 12, 13, 14]), + np.array([20, 21, 22, 23, 24]), + torch.BoolTensor([False, True, False, False, False]), + 1, + ), + # Edge case with choice as boolean array: input array with one point + ( + torch.Tensor([10]), + np.array([20]), + torch.BoolTensor([True]), + 1, + ), + # Edge case with choice as boolean array: input array with one point + ( + torch.Tensor([10]), + np.array([20]), + torch.BoolTensor([False]), + 0, + ), + ], +) +def test_subsample_data(x, idx, choice, nb_out_nodes): + num_nodes = x.size(0) + data = torch_geometric.data.Data(x=x, idx_in_original_cloud=idx, num_nodes=num_nodes) + transformed_data = subsample_data(data, num_nodes, choice) + assert transformed_data.num_nodes == nb_out_nodes + assert isinstance(transformed_data.x, torch.Tensor) + assert transformed_data.x.size(0) == nb_out_nodes + assert isinstance(transformed_data.idx_in_original_cloud, np.ndarray) + # Check that "idx_in_original_cloud" key is not modified + assert transformed_data.idx_in_original_cloud.shape[0] == num_nodes def test_TargetTransform_with_valid_config(): @@ -42,7 +134,6 @@ def test_DropPointsByClass(): transformed_data = drop_transforms(data) assert torch.equal(transformed_data.y, torch.Tensor([1, 2])) assert transformed_data.x.size(0) == 2 - print(type(transformed_data.idx_in_original_cloud)) assert isinstance(transformed_data.idx_in_original_cloud, np.ndarray) assert transformed_data.idx_in_original_cloud.size == 2 assert np.all(transformed_data.idx_in_original_cloud == np.array([0, 3])) @@ -66,3 +157,20 @@ def test_DropPointsByClass(): assert isinstance(transformed_data.idx_in_original_cloud, np.ndarray) assert transformed_data.idx_in_original_cloud.shape[0] == 1 assert np.all(transformed_data.idx_in_original_cloud == np.array([0])) + + +@pytest.mark.parametrize("input_nodes,min_nodes", [(5, 10), (1, 10), (15, 10)]) +def test_MinimumNumNodes(input_nodes, min_nodes): + x = torch.rand((input_nodes, 3)) + idx = np.arange(input_nodes) # Not a tensor + data = torch_geometric.data.Data(x=x, idx_in_original_cloud=idx) + transform = MinimumNumNodes(min_nodes) + + transformed_data = transform(data) + expected_nodes = max(input_nodes, min_nodes) + assert transformed_data.num_nodes == expected_nodes + assert isinstance(transformed_data.x, torch.Tensor) + assert transformed_data.x.size(0) == expected_nodes + # Check that "idx_in_original_cloud" key is not modified + assert isinstance(transformed_data.idx_in_original_cloud, np.ndarray) + assert transformed_data.idx_in_original_cloud.shape[0] == input_nodes