Skip to content

Commit

Permalink
Fixes for converting strings to bytes (#289)
Browse files Browse the repository at this point in the history
* test edge cases

* convert strings with decimals and spaces

* mypy fixes
  • Loading branch information
TomNicholas authored Aug 2, 2023
1 parent 9ff0e4e commit 37ec354
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 21 deletions.
8 changes: 7 additions & 1 deletion cubed/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,12 +283,17 @@ def test_max_mem_deprecation_warning(self):
[
(500, 500),
(100_000, 100_000),
(50.0, 50),
("500B", 500),
("1kB", 1000),
("1MB", 1000**2),
("1GB", 1000**3),
("1TB", 1000**4),
("1PB", 1000**5),
("100_000", 100_000),
("1.2MB", 1.2 * 1000**2),
("1 MB", 1000**2),
("1.2 MB", 1.2 * 1000**2),
],
)
def test_convert_to_bytes(self, input_value, expected_value):
Expand All @@ -302,7 +307,8 @@ def test_convert_to_bytes(self, input_value, expected_value):
"1kb", # lower-case k is not valid
"invalid", # completely invalid input
-512, # negative integer
1000.0, # invalid type
"kB", # only unit, no value
"1.1B", # can't have a fractional number of bytes
],
)
def test_convert_to_bytes_error(self, input_value):
Expand Down
60 changes: 40 additions & 20 deletions cubed/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from operator import add
from pathlib import Path
from posixpath import join
from typing import Tuple, Union
from typing import Dict, Tuple, Union
from urllib.parse import quote, unquote, urlsplit, urlunsplit

import numpy as np
Expand Down Expand Up @@ -185,41 +185,61 @@ def extract_array_names(frame):
return array_names_to_variable_names


def convert_to_bytes(size: Union[int, str]) -> int:
def convert_to_bytes(size: Union[int, float, str]) -> int:
"""
Converts the input data size to bytes.
The data size can be expressed as an integer or as a string with different SI prefixes such as '500kB', '2MB', or '1GB'.
Parameters
----------
size: in or str:
Size of data. If int it should be >=0. If str it should be of form <value><unit> where unit can be kB, MB, GB, TB etc.
size: int, float, or str:
Size of data. If numeric it should represent an integer >=0. If str it should be of form <value><unit> where unit can be B, kB, MB, GB, TB etc.
Returns
-------
int: The size in bytes
"""
units = {"B": 0, "kB": 1, "MB": 2, "GB": 3, "TB": 4, "PB": 5}

if isinstance(size, int) and size >= 0:
return size
elif isinstance(size, str):
# check if the format is valid
if size[-1] == "B" and size[:-1].isdigit():
unit = "B"
units: Dict[str, int] = {"kB": 1, "MB": 2, "GB": 3, "TB": 4, "PB": 5}

def is_numeric_str(s: str) -> bool:
try:
float(s)
return True
except ValueError:
return False

if isinstance(size, str):
size = size.replace(" ", "")

# check if the format of the string is valid
if is_numeric_str(size):
unit_factor = 1.0
value = size
elif size[-1] == "B" and is_numeric_str(size[:-1]):
unit_factor = 1.0
value = size[:-1]
elif size[-2:] in units and size[:-2].isdigit():
elif size[-2:] in units and is_numeric_str(size[:-2]):
unit = size[-2:]
unit_factor = 1000 ** units[unit]
value = size[:-2]
else:
raise ValueError(
f"Invalid value: {size}. Expected a string ending with an SI prefix."
f"Invalid value: {size}. Expected the string to be a numeric value ending with an SI prefix."
)

if unit in units and value.isdigit():
# convert to bytes
return int(value) * (1000 ** units[unit])
raise ValueError(
f"Invalid value: {size}. Expected a positive integer or a string ending with an SI prefix."
)
# convert to float number of bytes
size = float(value) * unit_factor

if isinstance(size, float):
if size.is_integer():
size = int(size)
else:
raise ValueError(
f"Invalid value: {size}. Can't have a non-integer number of bytes"
)

if size >= 0:
return size
else:
raise ValueError(f"Invalid value: {size}. Must be a positive value")

0 comments on commit 37ec354

Please sign in to comment.