Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve usability of the library #56

Merged
merged 18 commits into from
Jan 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/basic_reading_and_writing_numpy.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"metadata = metadata_dict[binary_filename]\n",
"\n",
"# Load the data\n",
"data = tsdf.load_binary_from_metadata(metadata)\n",
"data = tsdf.load_ndarray_from_binary(metadata)\n",
"\n",
"# Print some info\n",
"print(f\"Data type:\\t {data.dtype}\")\n",
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tsdf"
version = "0.4.1"
version = "0.5.1"
description = "A Python library that provides methods for encoding and decoding TSDF (Time Series Data Format) data, which allows you to easily create, manipulate and serialize TSDF files in your Python code."
authors = ["Peter Kok <[email protected]>",
"Pablo Rodríguez <[email protected]>",
Expand Down
32 changes: 16 additions & 16 deletions src/tsdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,34 @@
load_metadata_from_path,
load_metadatas_from_dir,
load_metadata_string,
load_metadata_legacy_file
load_metadata_legacy_file,
)
from .write_tsdf import (
write_metadata,
)

from .write_binary import (
write_binary_file,
write_dataframe_to_binaries,
)
from .read_binary import (
load_binary_from_metadata,
load_binaries_to_dataframe,
load_ndarray_from_binary,
load_dataframe_from_binaries,
)

from .tsdfmetadata import TSDFMetadata

__all__ = [
'load_metadata_file',
'load_metadata_from_path',
'load_metadatas_from_dir',
'load_metadata_string',
'load_metadata_legacy_file',
'write_metadata',
'write_binary_file',
'write_dataframe_to_binaries',
'load_binary_from_metadata',
'load_binaries_to_dataframe',
'TSDFMetadata',
'constants'
"load_metadata_file",
"load_metadata_from_path",
"load_metadatas_from_dir",
"load_metadata_string",
"load_metadata_legacy_file",
"write_metadata",
"write_binary_file",
"write_dataframe_to_binaries",
"load_ndarray_from_binary",
"load_dataframe_from_binaries",
"TSDFMetadata",
"constants",
]
49 changes: 25 additions & 24 deletions src/tsdf/parse_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
from typing import Any, Dict, List

from tsdf import constants
from tsdf import tsdfmetadata
from tsdf import tsdfmetadata

def read_data(data: Any, source_path: str) -> Dict[str, 'tsdfmetadata.TSDFMetadata']:

def read_data(data: Any, source_path: str) -> Dict[str, "tsdfmetadata.TSDFMetadata"]:
"""
Function used to parse the JSON object containing TSDF metadata. It returns a
list of TSDFMetadata objects, where each object describes formatting of a binary file.
Expand All @@ -25,16 +26,18 @@ def read_data(data: Any, source_path: str) -> Dict[str, 'tsdfmetadata.TSDFMetada

# Check if the version is supported
version = data["metadata_version"]
if not version in constants.SUPPORTED_TSDF_VERSIONS:
raise tsdfmetadata.TSDFMetadataFieldValueError(f"TSDF file version {version} not supported.")
if version not in constants.SUPPORTED_TSDF_VERSIONS:
raise tsdfmetadata.TSDFMetadataFieldValueError(
f"TSDF file version {version} not supported."
)

defined_properties: Dict[str, Any] = {}
return _read_struct(data, defined_properties.copy(), source_path, version)


def _read_struct(
data: Any, defined_properties: Dict[str, Any], source_path, version: str
) -> Dict[str, 'tsdfmetadata.TSDFMetadata']:
) -> Dict[str, "tsdfmetadata.TSDFMetadata"]:
"""
Recursive method used to parse the TSDF metadata in a hierarchical
order (from the root towards the leaves).
Expand All @@ -48,7 +51,7 @@ def _read_struct(

:raises tsdf_metadata.TSDFMetadataFieldError: if the TSDF metadata file is missing a mandatory field.
"""
all_streams: Dict[str, 'tsdfmetadata.TSDFMetadata'] = {}
all_streams: Dict[str, "tsdfmetadata.TSDFMetadata"] = {}
remaining_data = {}
leaf: bool = True

Expand All @@ -66,16 +69,14 @@ def _read_struct(
if leaf:
try:
bin_file_name = defined_properties["file_name"]
path = os.path.split(source_path)
file_dir = os.path.join(path[0])
meta_file_name = path[1]
all_streams[bin_file_name] = tsdfmetadata.TSDFMetadata(
defined_properties, file_dir, meta_file_name
)
except tsdfmetadata.TSDFMetadataFieldError as exc:
raise tsdfmetadata.TSDFMetadataFieldError(
"A property 'file_name' is missing in the TSDF metadata file."
) from exc
except KeyError:
raise tsdfmetadata.TSDFMetadataFieldError.missing_field("file_name")
path = os.path.split(source_path)
file_dir = os.path.join(path[0])
meta_file_name = path[1]
all_streams[bin_file_name] = tsdfmetadata.TSDFMetadata(
defined_properties, file_dir, meta_file_name
)

# 3) If the current element is not a leaf, `remaining_data`` will contain lower
# levels of the TSDF structure.
Expand Down Expand Up @@ -158,18 +159,18 @@ def contains_tsdf_mandatory_fields(dictionary: Dict[str, Any]) -> bool:
:raises tsdf_metadata.TSDFMetadataFieldValueError: if the TSDF metadata file contains an invalid value.
"""
version_key = "metadata_version"
if not version_key in dictionary.keys():
raise tsdfmetadata.TSDFMetadataFieldError(f"TSDF structure is missing key '{version_key}'")
if version_key not in dictionary.keys():
raise tsdfmetadata.TSDFMetadataFieldError.missing_field(version_key)

version = dictionary[version_key]
for key in constants.MANDATORY_TSDF_KEYS[version]:
if not key in dictionary.keys():
raise tsdfmetadata.TSDFMetadataFieldError(f"TSDF structure is missing key '{key}'")
if key not in dictionary.keys():
raise tsdfmetadata.TSDFMetadataFieldError.missing_field(key)
units = "units"
channels = "channels"
if len(dictionary[units]) != len(dictionary[channels]):
raise tsdfmetadata.TSDFMetadataFieldValueError(
f"TSDF structure requires equal number of {units} and {channels}"
f"TSDF metadata structure must specify equal number of {units} and {channels} for each binary file."
)

for key, value in dictionary.items():
Expand Down Expand Up @@ -203,8 +204,8 @@ def _check_tsdf_property_format(key: str, value, version: str) -> None:


def get_file_metadata_at_index(
metadata: Dict[str, 'tsdfmetadata.TSDFMetadata'], index: int
) -> 'tsdfmetadata.TSDFMetadata':
metadata: Dict[str, "tsdfmetadata.TSDFMetadata"], index: int
) -> "tsdfmetadata.TSDFMetadata":
"""
Returns the metadata object at the position defined by the index.

Expand All @@ -222,7 +223,7 @@ def get_file_metadata_at_index(
raise IndexError("The index is out of range.")


def confirm_dir_of_metadata(metadatas: List['tsdfmetadata.TSDFMetadata']) -> None:
def confirm_dir_of_metadata(metadatas: List["tsdfmetadata.TSDFMetadata"]) -> None:
"""
The method is used to confirm whether all the metadata files are expected in the same directory.

Expand Down
33 changes: 18 additions & 15 deletions src/tsdf/read_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,49 +8,52 @@
from typing import List, Union
import numpy as np
import pandas as pd
from tsdf import numpy_utils
from tsdf import numpy_utils
from tsdf import tsdfmetadata
from tsdf.constants import ConcatenationType


def load_binaries_to_dataframe(metadatas: '[tsdfmetadata.TSDFMetadata]', concatenation: ConcatenationType = ConcatenationType.none) -> Union[pd.DataFrame, List[pd.DataFrame]]:
def load_dataframe_from_binaries(
metadatas: List["tsdfmetadata.TSDFMetadata"],
concatenation: ConcatenationType = ConcatenationType.none,
) -> Union[pd.DataFrame, List[pd.DataFrame]]:
"""
Load binary files associated with TSDF and return a combined pandas DataFrame.
Load content of binary files associated with TSDF into a pandas DataFrame. The data frames can be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).

:param metadatas: list of TSDFMetadata objects.
:param concatenation: concatenation rule, i.e., determines whether the data frames (content of binary files) should be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or not concatenated (ConcatenationType.none), but provided as a list of data frames.
:param concatenation: concatenation rule, i.e., determines whether the data frames (content of binary files) should be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).

:return: pandas DataFrame containing the combined data.
"""
# Load the data
dataFrames = []
data_frames = []
for metadata in metadatas:
data = load_binary_from_metadata(metadata)
data = load_ndarray_from_binary(metadata)
df = pd.DataFrame(data, columns=metadata.channels)
dataFrames.append(df)
data_frames.append(df)

# Merge the data
if concatenation == ConcatenationType.rows:
return pd.concat(dataFrames)
return pd.concat(data_frames)
elif concatenation == ConcatenationType.columns:
return pd.concat(dataFrames, axis=1)
return pd.concat(data_frames, axis=1)
elif concatenation == ConcatenationType.none:
return dataFrames
return data_frames


def load_binary_from_metadata(
metadata: 'tsdfmetadata.TSDFMetadata', start_row: int = 0, end_row: int = -1
def load_ndarray_from_binary(
metadata: "tsdfmetadata.TSDFMetadata", start_row: int = 0, end_row: int = -1
) -> np.ndarray:
"""
Use metadata properties to load and return numpy array from a binary file (located the same directory where the metadata is saved).

:param metadata: TSDFMetadata object.
:param start_row: (optional) first row to load.
:param end_row: (optional) last row to load. If -1, load all rows.

:return: numpy array containing the data."""
metadata_dir = metadata.file_dir_path

bin_path = os.path.join(metadata_dir, metadata.file_name)
return _load_binary_file(
bin_path,
Expand All @@ -75,7 +78,7 @@ def _load_binary_file(
end_row: int = -1,
) -> np.ndarray:
"""
Use provided parameters to load and return a numpy array from a binary file
Use provided parameters to load and return a numpy array from a binary file.

:param bin_file_path: path to the binary file.
:param data_type: data type of the binary file.
Expand Down
9 changes: 6 additions & 3 deletions src/tsdf/tsdfmetadata.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import copy
from typing import Any, Dict, List
from numpy import ndarray
from tsdf import parse_metadata, read_binary

from tsdf import parse_metadata

class TSDFMetadataFieldError(Exception):
"Raised when the TSDFMetadata is missing an obligatory field."
pass
@classmethod
def missing_field(cls, field_name: str):
message = f"Value for the obligatory TSDF field '{field_name}' is missing in the provided TSDF metadata file."
return cls(message)


class TSDFMetadataFieldValueError(Exception):
Expand Down
20 changes: 13 additions & 7 deletions src/tsdf/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
from tsdf import read_tsdf, read_binary


def validate_tsdf_format(file_path):
try:
# Read the meta data (this will check for compulsory fields and such)
Expand All @@ -15,27 +16,31 @@ def validate_tsdf_format(file_path):

# Loop through all the files in the metadata
for file_name, file_metadata in metadata.items():

# print the file_metadata as json
# print(json.dumps(file_metadata.get_plain_tsdf_dict_copy(), indent=4))

# Load the binary data
binary_data = read_binary.load_binary_from_metadata(file_metadata)
binary_data = read_binary.load_ndarray_from_binary(file_metadata)

# Success message
print(f"Successfully loaded binary file {file_name}, resulting shape: {binary_data.shape}")
print(
f"Successfully loaded binary file {file_name}, resulting shape: {binary_data.shape}"
)

return True

except Exception as e:
print(f"Error while validating: {e}")
#traceback.print_exc()
# traceback.print_exc()
return False


def main():
# Parse the arguments
parser = argparse.ArgumentParser(description='Validate a file content against the TSDF format.')
parser.add_argument('file_path', help='Path to the file to validate')
parser = argparse.ArgumentParser(
description="Validate a file content against the TSDF format."
)
parser.add_argument("file_path", help="Path to the file to validate")
args = parser.parse_args()

# Perform validation
Expand All @@ -44,5 +49,6 @@ def main():
# Exit with error code 1 if the validation failed
exit(0 if is_valid else 1)

if __name__ == '__main__':

if __name__ == "__main__":
main()
Loading
Loading