-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
17c4e02
commit f90fdb4
Showing
3 changed files
with
77 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
"""Script to convert a dataset to hdf5 format.""" | ||
|
||
from __future__ import annotations | ||
|
||
import argparse | ||
import os | ||
|
||
import numpy as np | ||
from tqdm import tqdm | ||
|
||
from vis4d.common.imports import H5PY_AVAILABLE | ||
|
||
if H5PY_AVAILABLE: | ||
import h5py | ||
else: | ||
raise ImportError("Please install h5py to enable HDF5Backend.") | ||
|
||
|
||
def convert_dataset(source_dir: str) -> None: | ||
"""Convert a dataset to HDF5 format. | ||
This function converts an arbitary dictionary to an HDF5 file. The keys | ||
inside the HDF5 file preserve the directory structure of the original. | ||
As an example, if you convert "/path/to/dataset" to HDF5, the resulting | ||
file will be: "/path/to/dataset.hdf5". The file "relative/path/to/file" | ||
will be stored at "relative/path/to/file" inside /path/to/dataset.hdf5. | ||
Args: | ||
source_dir (str): The path to the dataset to convert. | ||
""" | ||
if not os.path.exists(source_dir): | ||
raise FileNotFoundError(f"No such file or directory: {source_dir}") | ||
|
||
source_dir = os.path.join(source_dir, "") # must end with trailing slash | ||
hdf5_path = source_dir.rstrip("/") + ".hdf5" | ||
if os.path.exists(hdf5_path): | ||
print(f"File {hdf5_path} already exists! Skipping {source_dir}") | ||
return | ||
|
||
print(f"Converting dataset at: {source_dir}") | ||
hdf5_file = h5py.File(hdf5_path, mode="w") | ||
sub_dirs = list(os.walk(source_dir)) | ||
file_count = sum(len(files) for (_, _, files) in sub_dirs) | ||
|
||
with tqdm(total=file_count) as pbar: | ||
for root, _, files in sub_dirs: | ||
g_name = root.replace(source_dir, "") | ||
g = hdf5_file.create_group(g_name) if g_name else hdf5_file | ||
for f in files: | ||
filepath = os.path.join(root, f) | ||
if os.path.isfile(filepath): | ||
with open(filepath, "rb") as fp: | ||
file_content = fp.read() | ||
g.create_dataset( | ||
f, data=np.frombuffer(file_content, dtype="uint8") | ||
) | ||
pbar.update() | ||
|
||
hdf5_file.close() | ||
print("done.") | ||
|
||
|
||
if __name__ == "__main__": # pragma: no cover | ||
parser = argparse.ArgumentParser( | ||
description="Converts a dataset at the specified path to hdf5. The " | ||
"local directory structure is preserved in the hdf5 file." | ||
) | ||
parser.add_argument( | ||
"-p", | ||
"--path", | ||
required=True, | ||
help="path to the root folder of a specific dataset to convert", | ||
) | ||
args = parser.parse_args() | ||
convert_dataset(args.path) |