Skip to content

Commit

Permalink
Adding guards in HSCDataSet for the manifest representation of un-dow…
Browse files Browse the repository at this point in the history
…nloaded files

- Should fix issue #127.
- Moved removal of incomplete downloads from the prune stage to the f/s read stage
- Added a better error to the case where HSCDataSet arrives at an absurdly small
  size of image to crop to.
  • Loading branch information
mtauraso committed Dec 6, 2024
1 parent 0467d4a commit 9520ec3
Showing 1 changed file with 17 additions and 8 deletions.
25 changes: 17 additions & 8 deletions src/fibad/data_sets/hsc_data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,17 +520,24 @@ def _read_filter_catalog(
object_id = row["object_id"]
filter = row["filter"]
filename = row["filename"]
if "dim" in colnames:
dim = tuple(row["dim"])

Check warning on line 524 in src/fibad/data_sets/hsc_data_set.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/data_sets/hsc_data_set.py#L523-L524

Added lines #L523 - L524 were not covered by tests

# Skip over any files that are marked as didn't download.
# or have a dimension listed less than 1px x 1px
if filename == "Attempted" or min(dim) < 1:
continue

Check warning on line 529 in src/fibad/data_sets/hsc_data_set.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/data_sets/hsc_data_set.py#L528-L529

Added lines #L528 - L529 were not covered by tests

# Insert into the filter catalog.
if object_id not in filter_catalog:
filter_catalog[object_id] = {}

filter_catalog[object_id][filter] = filename

# Dimension is optional
# Dimension is optional, insert into dimension catalog.
if "dim" in colnames:
if object_id not in dim_catalog:
dim_catalog[object_id] = []
dim_catalog[object_id].append(tuple(row["dim"]))
dim_catalog[object_id].append(dim)

Check warning on line 540 in src/fibad/data_sets/hsc_data_set.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/data_sets/hsc_data_set.py#L540

Added line #L540 was not covered by tests

return (filter_catalog, dim_catalog) if "dim" in colnames else filter_catalog

Expand Down Expand Up @@ -632,11 +639,6 @@ def _prune_objects(self, filters_ref: list[str]):
filters_ref = sorted(filters_ref)
self.prune_count = 0
for index, (object_id, filters) in enumerate(self.files.items()):
# Drop objects that failed to download
if any("Attempted" in v for v in filters.items()):
msg = f"Attempted to download {object_id} but failed. Pruning."
self._mark_for_prune(object_id, msg)

# Drop objects with missing filters
filters = sorted(list(filters))
if filters != filters_ref:
Expand Down Expand Up @@ -726,6 +728,13 @@ def _check_file_dimensions(self) -> tuple[int, int]:
finally:
logger.warning(msg)

if min(cutout_height, cutout_width) < 1:
msg = "Automatic determination found an absurd dimension of "
msg += f"({cutout_width}px, {cutout_height}px)\n"
msg += "Please either correct the data source or set a static cutout side with the \n"
msg += "crop_to configuration in the dataset section of the fibad config.\n"
raise RuntimeError(msg)

Check warning on line 736 in src/fibad/data_sets/hsc_data_set.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/data_sets/hsc_data_set.py#L732-L736

Added lines #L732 - L736 were not covered by tests

return cutout_width, cutout_height

def _rebuild_manifest(self, config):
Expand Down

0 comments on commit 9520ec3

Please sign in to comment.