diff --git a/config.yml b/config.yml index 1944cce..acc312c 100644 --- a/config.yml +++ b/config.yml @@ -14,4 +14,16 @@ wahab_labels_dir: "1Felix and Rich make models/Training dataset Tiffs/Training s # Where random stuff from the scripts should go, relative to the root of the repository script_output: "script_output/" -boring_script_output: "script_output/boring_stuff/" \ No newline at end of file +boring_script_output: "script_output/boring_stuff/" + +# Bad Dicoms +# These ones are in multiple training sets +duplicate_ids: + - 39 +# These ones are broken for various reasons +broken_ids: + - 92 + - 90 + - 69 + - 36 + - 166 diff --git a/fishjaw/model/model.py b/fishjaw/model/model.py index baf25a6..c93bff6 100644 --- a/fishjaw/model/model.py +++ b/fishjaw/model/model.py @@ -5,7 +5,6 @@ import os import pickle -from math import sqrt from dataclasses import dataclass from typing import Type, Any diff --git a/fishjaw/util/files.py b/fishjaw/util/files.py index c114399..ca4ea15 100644 --- a/fishjaw/util/files.py +++ b/fishjaw/util/files.py @@ -178,3 +178,19 @@ def boring_script_out_dir() -> pathlib.Path: if not retval.is_dir(): retval.mkdir() return retval + + +def broken_dicoms() -> set[int]: + """ + Get the IDs of the broken DICOMs + + """ + return set(util.config()["broken_ids"]) + + +def duplicate_dicoms() -> set[int]: + """ + Get the IDs of the broken DICOMs + + """ + return set(util.config()["duplicate_ids"]) diff --git a/scripts/arch_summary.py b/scripts/arch_summary.py index 2eb45a6..e2d3967 100644 --- a/scripts/arch_summary.py +++ b/scripts/arch_summary.py @@ -7,9 +7,9 @@ import torch from prettytable import PrettyTable +from monai.networks.nets import attentionunet from fishjaw.model import model, data -from monai.networks.nets import attentionunet def count_parameters(net: torch.nn.Module) -> None: diff --git a/scripts/create_dicoms.py b/scripts/create_dicoms.py index ed7a714..2db7044 100644 --- a/scripts/create_dicoms.py +++ b/scripts/create_dicoms.py @@ -282,19 +282,12 @@ def main(): """ config = util.userconf() - # Some might be duplicated between the different sets; we only want - # the whole jaws in this case - duplicates = {39} - - # I might know some are broken - this is usually because the label and - # Wahab's TIFF are different shapes. Maybe the 3D tiffs are broken? - broken = {92, 90, 69, 36, 166} - create_set_1(config) - create_set_2(config, ignore=broken) + create_set_2(config, ignore=files.broken_dicoms()) - # Ignore the duplicates here - create_set_3(config, ignore=duplicates | broken) + # Some might be duplicated between the different sets; we only want + # the whole jaws in this case + create_set_3(config, ignore=files.broken_dicoms() | files.duplicate_dicoms()) if __name__ == "__main__":