Skip to content

Commit

Permalink
Merge pull request #18976 from mvdbeek/h5_disable_locking
Browse files Browse the repository at this point in the history
[24.1] Disable locking when opening h5 files, add missing ``with``
  • Loading branch information
mvdbeek authored Oct 14, 2024
2 parents 612d698 + a42496e commit 3f7646d
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 36 deletions.
32 changes: 16 additions & 16 deletions lib/galaxy/datatypes/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,7 @@ class Loom(H5):

def sniff(self, filename: str) -> bool:
if super().sniff(filename):
with h5py.File(filename, "r") as loom_file:
with h5py.File(filename, "r", locking=False) as loom_file:
# Check the optional but distinctive LOOM_SPEC_VERSION attribute
if bool(loom_file.attrs.get("LOOM_SPEC_VERSION")):
return True
Expand Down Expand Up @@ -1328,7 +1328,7 @@ def display_peek(self, dataset: DatasetProtocol) -> str:
def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
super().set_meta(dataset, overwrite=overwrite, **kwd)
try:
with h5py.File(dataset.get_file_name(), "r") as loom_file:
with h5py.File(dataset.get_file_name(), "r", locking=False) as loom_file:
dataset.metadata.title = loom_file.attrs.get("title")
dataset.metadata.description = loom_file.attrs.get("description")
dataset.metadata.url = loom_file.attrs.get("url")
Expand Down Expand Up @@ -1464,15 +1464,15 @@ class Anndata(H5):
def sniff(self, filename: str) -> bool:
if super().sniff(filename):
try:
with h5py.File(filename, "r") as f:
with h5py.File(filename, "r", locking=False) as f:
return all(attr in f for attr in ["X", "obs", "var"])
except Exception:
return False
return False

def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
super().set_meta(dataset, overwrite=overwrite, **kwd)
with h5py.File(dataset.get_file_name(), "r") as anndata_file:
with h5py.File(dataset.get_file_name(), "r", locking=False) as anndata_file:
dataset.metadata.title = anndata_file.attrs.get("title")
dataset.metadata.description = anndata_file.attrs.get("description")
dataset.metadata.url = anndata_file.attrs.get("url")
Expand Down Expand Up @@ -1822,15 +1822,15 @@ def sniff(self, filename: str) -> bool:
False
"""
if super().sniff(filename):
with h5py.File(filename, "r") as f:
with h5py.File(filename, "r", locking=False) as f:
required_fields = {"id", "format-url", "type", "generated-by", "creation-date", "nnz", "shape"}
return required_fields.issubset(f.attrs.keys())
return False

def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
super().set_meta(dataset, overwrite=overwrite, **kwd)
try:
with h5py.File(dataset.get_file_name(), "r") as f:
with h5py.File(dataset.get_file_name(), "r", locking=False) as f:
attributes = f.attrs

dataset.metadata.id = util.unicodify(attributes["id"])
Expand All @@ -1853,7 +1853,7 @@ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
lines = ["Biom2 (HDF5) file"]
try:
with h5py.File(dataset.get_file_name()) as f:
with h5py.File(dataset.get_file_name(), locking=False) as f:
for k, v in f.attrs.items():
lines.append(f"{k}: {util.unicodify(v)}")
except Exception as e:
Expand Down Expand Up @@ -1900,7 +1900,7 @@ def sniff(self, filename: str) -> bool:

if super().sniff(filename):
keys = ["chroms", "bins", "pixels", "indexes"]
with h5py.File(filename, "r") as handle:
with h5py.File(filename, "r", locking=False) as handle:
fmt = util.unicodify(handle.attrs.get("format"))
url = util.unicodify(handle.attrs.get("format-url"))
if fmt == MAGIC or url == URL:
Expand Down Expand Up @@ -1956,7 +1956,7 @@ def sniff(self, filename: str) -> bool:

if super().sniff(filename):
keys0 = ["resolutions"]
with h5py.File(filename, "r") as handle:
with h5py.File(filename, "r", locking=False) as handle:
if not all(name in handle.keys() for name in keys0):
return False
res0 = next(iter(handle["resolutions"].keys()))
Expand Down Expand Up @@ -2022,7 +2022,7 @@ def set_meta(
params_file = dataset.metadata.spec[spec_key].param.new_file(
dataset=dataset, metadata_tmp_files_dir=metadata_tmp_files_dir
)
with h5py.File(dataset.get_file_name(), "r") as handle:
with h5py.File(dataset.get_file_name(), "r", locking=False) as handle:
hyper_params = handle[self.HYPERPARAMETER][()]
hyper_params = json.loads(util.unicodify(hyper_params))
with open(params_file.get_file_name(), "w") as f:
Expand All @@ -2036,7 +2036,7 @@ def set_meta(
def sniff(self, filename: str) -> bool:
if super().sniff(filename):
keys = [self.CONFIG]
with h5py.File(filename, "r") as handle:
with h5py.File(filename, "r", locking=False) as handle:
if not all(name in handle.keys() for name in keys):
return False
url = util.unicodify(handle.attrs.get(self.URL))
Expand All @@ -2046,7 +2046,7 @@ def sniff(self, filename: str) -> bool:

def get_attribute(self, filename: str, attr_key: str) -> str:
try:
with h5py.File(filename, "r") as handle:
with h5py.File(filename, "r", locking=False) as handle:
attr = util.unicodify(handle.attrs.get(attr_key))
return attr
except Exception as e:
Expand All @@ -2069,7 +2069,7 @@ def get_html_repr(self, filename: str) -> str:

def get_config_string(self, filename: str) -> str:
try:
with h5py.File(filename, "r") as handle:
with h5py.File(filename, "r", locking=False) as handle:
config = util.unicodify(handle[self.CONFIG][()])
return config
except Exception as e:
Expand Down Expand Up @@ -2109,7 +2109,7 @@ def display_data(

out_dict: Dict = {}
try:
with h5py.File(dataset.get_file_name(), "r") as handle:
with h5py.File(dataset.get_file_name(), "r", locking=False) as handle:
out_dict["Attributes"] = {}
attributes = handle.attrs
for k in set(attributes.keys()) - {self.HTTP_REPR, self.REPR, self.URL}:
Expand Down Expand Up @@ -2199,7 +2199,7 @@ class HexrdMaterials(H5):
def sniff(self, filename: str) -> bool:
if super().sniff(filename):
req = {"AtomData", "Atomtypes", "CrystalSystem", "LatticeParameters"}
with h5py.File(filename, "r") as mat_file:
with h5py.File(filename, "r", locking=False) as mat_file:
for k in mat_file.keys():
if isinstance(mat_file[k], h5py._hl.group.Group) and set(mat_file[k].keys()) >= req:
return True
Expand All @@ -2208,7 +2208,7 @@ def sniff(self, filename: str) -> bool:
def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
super().set_meta(dataset, overwrite=overwrite, **kwd)
try:
with h5py.File(dataset.get_file_name(), "r") as mat_file:
with h5py.File(dataset.get_file_name(), "r", locking=False) as mat_file:
dataset.metadata.materials = list(mat_file.keys())
sgn = {}
lp = {}
Expand Down
22 changes: 12 additions & 10 deletions lib/galaxy/tool_util/verify/asserts/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ def assert_has_h5_attribute(output_bytes: bytes, key: str, value: str) -> None:
attribute"""
_assert_h5py()
output_temp = io.BytesIO(output_bytes)
local_attrs = h5py.File(output_temp, "r").attrs
assert (
key in local_attrs and str(local_attrs[key]) == value
), f"Not a HDF5 file or H5 attributes do not match:\n\t{list(local_attrs.items())}\n\n\t({key} : {value})"
with h5py.File(output_temp, "r", locking=False) as h5:
local_attrs = h5.attrs
assert (
key in local_attrs and str(local_attrs[key]) == value
), f"Not a HDF5 file or H5 attributes do not match:\n\t{list(local_attrs.items())}\n\n\t({key} : {value})"


# TODO the function actually queries groups. so the function and argument name are misleading
Expand All @@ -36,9 +37,10 @@ def append_keys(key):
local_keys.append(key)
return None

h5py.File(output_temp, "r").visit(append_keys)
missing = 0
for key in h5_keys:
if key not in local_keys:
missing += 1
assert missing == 0, f"Not a HDF5 file or H5 keys missing:\n\t{local_keys}\n\t{h5_keys}"
with h5py.File(output_temp, "r", locking=False) as f:
f.visit(append_keys)
missing = 0
for key in h5_keys:
if key not in local_keys:
missing += 1
assert missing == 0, f"Not a HDF5 file or H5 keys missing:\n\t{local_keys}\n\t{h5_keys}"
18 changes: 9 additions & 9 deletions lib/galaxy/tools/recommendations.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,16 +131,16 @@ def __set_model(self, trans, remote_model_url):
Create model and associated dictionaries for recommendations
"""
self.tool_recommendation_model_path = self.__download_model(remote_model_url)
model_file = h5py.File(self.tool_recommendation_model_path, "r")
self.reverse_dictionary = json.loads(model_file["reverse_dict"][()].decode("utf-8"))
self.loaded_model = self.create_transformer_model(len(self.reverse_dictionary) + 1)
self.loaded_model.load_weights(self.tool_recommendation_model_path)
with h5py.File(self.tool_recommendation_model_path, "r", locking=False) as model_file:
self.reverse_dictionary = json.loads(model_file["reverse_dict"][()].decode("utf-8"))
self.loaded_model = self.create_transformer_model(len(self.reverse_dictionary) + 1)
self.loaded_model.load_weights(self.tool_recommendation_model_path)

self.model_data_dictionary = {v: k for k, v in self.reverse_dictionary.items()}
# set the list of compatible tools
self.compatible_tools = json.loads(model_file["compatible_tools"][()].decode("utf-8"))
tool_weights = json.loads(model_file["class_weights"][()].decode("utf-8"))
self.standard_connections = json.loads(model_file["standard_connections"][()].decode("utf-8"))
self.model_data_dictionary = {v: k for k, v in self.reverse_dictionary.items()}
# set the list of compatible tools
self.compatible_tools = json.loads(model_file["compatible_tools"][()].decode("utf-8"))
tool_weights = json.loads(model_file["class_weights"][()].decode("utf-8"))
self.standard_connections = json.loads(model_file["standard_connections"][()].decode("utf-8"))
# sort the tools' usage dictionary
tool_pos_sorted = [int(key) for key in tool_weights.keys()]
for k in tool_pos_sorted:
Expand Down
2 changes: 1 addition & 1 deletion test/unit/tool_util/verify/test_asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1259,7 +1259,7 @@ def test_has_json_property_with_text_neg():
if h5py is not None:
with tempfile.NamedTemporaryFile(delete=False) as tmp:
h5name = tmp.name
with h5py.File(tmp.name, "w") as h5fh:
with h5py.File(tmp.name, "w", locking=False) as h5fh:
h5fh.attrs["myfileattr"] = "myfileattrvalue"
h5fh.attrs["myfileattrint"] = 1
dset = h5fh.create_dataset("myint", (100,), dtype="i")
Expand Down

0 comments on commit 3f7646d

Please sign in to comment.