Skip to content

Commit

Permalink
Merge branch 'tickets/DM-45750'
Browse files Browse the repository at this point in the history
  • Loading branch information
kfindeisen committed Oct 23, 2024
2 parents 91ca182 + f445cea commit 2046575
Show file tree
Hide file tree
Showing 47 changed files with 4,014 additions and 5,829 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ path | description
`config` | Dataset-specific configs to help Stack code work with this dataset.
`pipelines` | Dataset-specific pipelines to run on this dataset.
`dataIds.list` | List of dataIds in this repo. For use in running Tasks. Currently set to run all Ids.
`preloaded` | A Gen 3 Butler repository containing HSC master calibs from the 2016 COSMOS campaign (or, where necessary, from 2015), coadded images for use as differencing templates, PS1 reference catalog in HTM format for regions overlapping any visit in the dataset, and a pretrained machine learning model for real/bogus classification.
`preloaded` | A Gen 3 Butler repository containing HSC master calibs from the 2016 COSMOS campaign (or, where necessary, from 2015), coadded images for use as differencing templates, PS1 reference catalog in HTM format for regions overlapping any visit in the dataset, mock APDB outputs based on the raw images, and a pretrained machine learning model for real/bogus classification.
`scripts` | Scripts and data for generating this dataset.


Expand Down
9,511 changes: 3,709 additions & 5,802 deletions config/export.yaml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions doc/ap_verify_ci_cosmos_pdr2/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ It contains:
* biases, darks, brighter-fatter kernels, and g-band flats.
* reference catalogs for Pan-STARRS1, covering the raw images' footprint.
* image differencing templates coadded from 2014 COSMOS data, covering the raw images' footprint.
* mock APDB catalogs based on processing the raw images in order
* the rbResnet50-DC2 pretrained machine learning model for real/bogus classification

.. _ap_verify_ci_cosmos_pdr2-contributing:
Expand Down
6 changes: 5 additions & 1 deletion pipelines/Ephemerides.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
description: Pipeline for downloading solar system ephemerides given raw images
instrument: lsst.obs.subaru.HyperSuprimeCam
tasks:
SkyBotEphemerisQuery: lsst.ap.association.skyBotEphemerisQuery.SkyBotEphemerisQueryTask
getRegionTimeFromVisit:
class: lsst.pipe.tasks.getRegionTimeFromVisit.GetRegionTimeFromVisitTask
config:
connections.dummy_visit = visit_dummy
mpSkyEphemerisQuery: lsst.ap.association.MPSkyEphemerisQueryTask
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion preloaded/gen3.sqlite3
Git LFS file not shown
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
67 changes: 43 additions & 24 deletions scripts/generate_ephemerides_gen3.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
Running this script allows for updates to the ephemerides to be incorporated
into the dataset.
This script takes no command-line arguments; it infers everything it needs from
the `preloaded/` repository.
"""

import glob
Expand All @@ -34,9 +37,11 @@
import sys
import tempfile

import pandas

import lsst.log
import lsst.sphgeom
from lsst.daf.butler import Butler, FileDataset
from lsst.daf.butler import Butler, CollectionType, DatasetType
import lsst.obs.base


Expand All @@ -46,12 +51,14 @@

# Avoid explicit references to dataset package to maximize portability.
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
PIPE_DIR = os.path.join(SCRIPT_DIR, "..", "pipelines")
RAW_DIR = os.path.join(SCRIPT_DIR, "..", "raw")
PIPE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "pipelines"))
RAW_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "raw"))
RAW_RUN = "raw"
EPHEM_DATASET = "visitSsObjects"
DEST_DIR = os.path.join(SCRIPT_DIR, "..", "preloaded")
DEST_RUN = "sso/cached"
VISIT_DATASET = "visit_dummy"
EPHEM_DATASET = "preloaded_SsObjects"
DEST_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "preloaded"))
DEST_COLLECTION = "sso"
DEST_RUN = DEST_COLLECTION + "/mpsky"


########################################
Expand Down Expand Up @@ -99,7 +106,7 @@ def _make_repo_with_instruments(repo_dir, instruments):


########################################
# Ingest raws (needed for visitinfo)
# Ingest raws (needed for visit records)

def _ingest_raws(repo, raw_dir, run):
"""Ingest this dataset's raws into a specific repo.
Expand All @@ -121,11 +128,33 @@ def _ingest_raws(repo, raw_dir, run):
definer.run(exposures)


########################################
# Dummy pipeline inputs

def _make_visit_datasets(repo, run):
"""Create stub datasets for running GetRegionTimeFromVisitTask.
Parameters
---------
repo : `lsst.daf.butler.Butler`
A writeable Butler in which to create datasets.
run : `str`
The name of the run into which to create datasets.
"""
dummy_type = DatasetType(VISIT_DATASET, {"instrument", "visit", "detector"}, "DataFrame")
repo.registry.registerDatasetType(dummy_type)
# Exclude unused detectors
data_ids = {ref.dataId for ref in repo.query_datasets("raw", collections="*", find_first=False)}
exp_table = pandas.DataFrame()
for id in data_ids:
repo.put(exp_table, dummy_type, id, run=run)


########################################
# Download ephemerides

def _get_ephem(repo_dir, raw_collection, ephem_collection):
"""Run the task for downloading ephemerides.
"""Run the tasks for downloading ephemerides.
Parameters
----------
Expand Down Expand Up @@ -175,25 +204,12 @@ def _transfer_ephems(ephem_type, src_repo, src_dir, run, dest_repo):
dest_repo : `lsst.daf.butler.Butler`
The repository to which to copy the datasets.
"""
# Need to transfer visit definitions as well; Butler.export is the easiest
# Need to transfer group definitions as well; Butler.export is the easiest
# way to do this.
with tempfile.NamedTemporaryFile(suffix=".yaml") as export_file:
with src_repo.export(filename=export_file.name, transfer=None) as contents:
contents.saveDatasets(src_repo.registry.queryDatasets(ephem_type, collections=run),
elements=["visit"])
# Because of how the temp repo was constructed, there should not be
# any visit/exposure records other than those needed to support the
# ephemerides datasets.
contents.saveDimensionData("visit_system",
src_repo.registry.queryDimensionRecords("visit_system"))
contents.saveDimensionData("visit",
src_repo.registry.queryDimensionRecords("visit"))
contents.saveDimensionData("exposure",
src_repo.registry.queryDimensionRecords("exposure"))
contents.saveDimensionData("visit_definition",
src_repo.registry.queryDimensionRecords("visit_definition"))
contents.saveDimensionData("visit_detector_region",
src_repo.registry.queryDimensionRecords("visit_detector_region"))
elements=["group"])
# runs included automatically by saveDatasets
dest_repo.import_(directory=src_dir, filename=export_file.name, transfer="copy")

Expand All @@ -206,12 +222,15 @@ def _transfer_ephems(ephem_type, src_repo, src_dir, run, dest_repo):
temp_repo = _make_repo_with_instruments(workspace, _get_instruments(DEST_DIR))
logging.info("Ingesting raws...")
_ingest_raws(temp_repo, RAW_DIR, RAW_RUN)
_make_visit_datasets(temp_repo, RAW_RUN)
logging.info("Downloading ephemerides...")
_get_ephem(workspace, RAW_RUN, DEST_RUN)
temp_repo.registry.refresh() # Pipeline added dataset types
preloaded = Butler(DEST_DIR, writeable=True)
logging.debug("Preloaded repo has universe version %d.", preloaded.dimensions.version)
logging.info("Transferring ephemerides to dataset...")
_transfer_ephems(EPHEM_DATASET, temp_repo, workspace, DEST_RUN, preloaded)
preloaded.registry.registerCollection(DEST_COLLECTION, CollectionType.CHAINED)
preloaded.registry.setCollectionChain(DEST_COLLECTION, [DEST_RUN])

logging.info("Solar system catalogs copied to %s:%s", DEST_DIR, DEST_RUN)
logging.info("Solar system catalogs copied to %s:%s", DEST_DIR, DEST_COLLECTION)
Loading

0 comments on commit 2046575

Please sign in to comment.