From cc2f12bcaeefe6d1fe2609ab8826be0fc23f47c7 Mon Sep 17 00:00:00 2001 From: Michael Tauraso Date: Wed, 11 Dec 2024 15:13:33 -0800 Subject: [PATCH] Adding guards in HSCDataSet for the manifest representation of un-downloaded files (#130) - Should fix issue #127. - Moved removal of incomplete downloads from the prune stage to the f/s read stage - Added a better error to the case where HSCDataSet arrives at an absurdly small size of image to crop to. Co-authored-by: Drew Oldag <47493171+drewoldag@users.noreply.github.com> --- src/fibad/data_sets/hsc_data_set.py | 25 +++++++++++++++++-------- src/fibad/download.py | 2 +- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/fibad/data_sets/hsc_data_set.py b/src/fibad/data_sets/hsc_data_set.py index 359d3f1..1831026 100644 --- a/src/fibad/data_sets/hsc_data_set.py +++ b/src/fibad/data_sets/hsc_data_set.py @@ -520,17 +520,24 @@ def _read_filter_catalog( object_id = row["object_id"] filter = row["filter"] filename = row["filename"] + if "dim" in colnames: + dim = tuple(row["dim"]) + + # Skip over any files that are marked as didn't download. + # or have a dimension listed less than 1px x 1px + if filename == "Attempted" or min(dim) < 1: + continue + # Insert into the filter catalog. if object_id not in filter_catalog: filter_catalog[object_id] = {} - filter_catalog[object_id][filter] = filename - # Dimension is optional + # Dimension is optional, insert into dimension catalog. if "dim" in colnames: if object_id not in dim_catalog: dim_catalog[object_id] = [] - dim_catalog[object_id].append(tuple(row["dim"])) + dim_catalog[object_id].append(dim) return (filter_catalog, dim_catalog) if "dim" in colnames else filter_catalog @@ -632,11 +639,6 @@ def _prune_objects(self, filters_ref: list[str]): filters_ref = sorted(filters_ref) self.prune_count = 0 for index, (object_id, filters) in enumerate(self.files.items()): - # Drop objects that failed to download - if any("Attempted" in v for v in filters.items()): - msg = f"Attempted to download {object_id} but failed. Pruning." - self._mark_for_prune(object_id, msg) - # Drop objects with missing filters filters = sorted(list(filters)) if filters != filters_ref: @@ -726,6 +728,13 @@ def _check_file_dimensions(self) -> tuple[int, int]: finally: logger.warning(msg) + if min(cutout_height, cutout_width) < 1: + msg = "Automatic determination found an absurd dimension of " + msg += f"({cutout_width}px, {cutout_height}px)\n" + msg += "Please either correct the data source or set a static cutout size with the \n" + msg += "crop_to configuration in the [data_set] section of your fibad config.\n" + raise RuntimeError(msg) + return cutout_width, cutout_height def _rebuild_manifest(self, config): diff --git a/src/fibad/download.py b/src/fibad/download.py index 8f78da6..ae49d20 100644 --- a/src/fibad/download.py +++ b/src/fibad/download.py @@ -107,7 +107,7 @@ def _batched(iterable, n): while batch := tuple(itertools.islice(iterator, n)): yield batch - if len(self.rects) > num_threads: + if len(self.rects) < num_threads: msg = f"Only {len(self.rects)} sky locations, which is less than the number of threads, so we " msg += "will use only one thread." logger.info(msg)