Skip to content

Commit

Permalink
Merge pull request #239 from lsst/tickets/DM-46599
Browse files Browse the repository at this point in the history
DM-46599: Stop using Butler regular expression searches
  • Loading branch information
dhirving authored Oct 15, 2024
2 parents 8818d04 + 4f84c7d commit 15331d9
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 14 deletions.
10 changes: 6 additions & 4 deletions python/lsst/ap/verify/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

import fnmatch
import os
import re
import shutil
from glob import glob
import logging
Expand Down Expand Up @@ -138,9 +137,12 @@ def _ensureRaws(self, processes):
RuntimeError
Raised if there are no files to ingest.
"""
# TODO: regex is workaround for DM-25945
rawCollectionFilter = re.compile(self.dataset.instrument.makeDefaultRawIngestRunName())
rawCollections = list(self.workspace.workButler.registry.queryCollections(rawCollectionFilter))
try:
collectionName = self.dataset.instrument.makeDefaultRawIngestRunName()
rawCollections = list(self.workspace.workButler.registry.queryCollections(collectionName))
except lsst.daf.butler.MissingCollectionError:
rawCollections = []

rawData = list(self.workspace.workButler.registry.queryDatasets(
'raw',
collections=rawCollections,
Expand Down
5 changes: 4 additions & 1 deletion python/lsst/ap/verify/pipeline_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,10 @@ def _getCollectionArguments(workspace, reuse):
# skip-existing-in would work around that, but would lead to a worse bug in
# the case that the user is alternating runs with and without --clean-run.
# registry.refresh()
oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z")))
collectionPattern = re.compile(workspace.outputName + r"/\d+T\d+Z")
oldRuns = list(registry.queryCollections(workspace.outputName + "/*"))
oldRuns = [run for run in oldRuns if collectionPattern.fullmatch(run)]

if reuse and oldRuns:
args.extend(["--extend-run", "--skip-existing"])
return args
Expand Down
29 changes: 24 additions & 5 deletions python/lsst/ap/verify/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import abc
import os
import pathlib
import re
import stat

import lsst.daf.butler as dafButler
Expand Down Expand Up @@ -219,10 +218,31 @@ def _ensureCollection(self, registry, name, collectionType):
The type of collection to add. This field is ignored when
testing if a collection exists.
"""
matchingCollections = list(registry.queryCollections(re.compile(name)))
if not matchingCollections:
if not self._doesCollectionExist(registry, name):
registry.registerCollection(name, type=collectionType)

def _doesCollectionExist(self, registry, name):
"""Check if a collection exists in the registry.
Parameters
----------
registry : `lsst.daf.butler.Registry`
The repository that may contain the collection.
name : `str`
The name of the collection to check for existence.
Returns
-------
exists : `bool`
`True` if the collection exists in the registry, `False` otherwise.
"""
try:
matchingCollections = list(registry.queryCollections(name))
return len(matchingCollections) > 0
except dafButler.MissingCollectionError:
return False

@property
def workButler(self):
"""A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
Expand All @@ -247,8 +267,7 @@ def workButler(self):

# Create an output chain here, so that workButler can see it.
# Definition does not conflict with what pipetask --output uses.
# Regex is workaround for DM-25945.
if not list(queryButler.registry.queryCollections(re.compile(self.outputName))):
if not self._doesCollectionExist(queryButler.registry, self.outputName):
queryButler.registry.registerCollection(self.outputName,
dafButler.CollectionType.CHAINED)
queryButler.registry.setCollectionChain(self.outputName, inputs)
Expand Down
5 changes: 1 addition & 4 deletions tests/test_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

import os
import pickle
import re
import shutil
import tempfile
import unittest
Expand Down Expand Up @@ -133,9 +132,7 @@ def testCalibIngestDriver(self):
# queryDatasets cannot (yet) search CALIBRATION collections, so we
# instead search the RUN-type collections that calibrations are
# ingested into first before being associated with a validity range.
calibrationRunPattern = re.compile(
re.escape(self.dataset.instrument.makeCollectionName("calib") + "/") + ".+"
)
calibrationRunPattern = self.dataset.instrument.makeCollectionName("calib") + "/*"
calibrationRuns = list(
self.butler.registry.queryCollections(
calibrationRunPattern,
Expand Down

0 comments on commit 15331d9

Please sign in to comment.