Skip to content

Commit

Permalink
Implement ragged_to_2darray and matrix_to_ragged (#158)
Browse files Browse the repository at this point in the history
* Implement ragged_to_2darray and matrix_to_ragged

* Bump version

* Rename new functions and update docstrings
  • Loading branch information
milancurcic authored Apr 11, 2023
1 parent acd35f9 commit 76e53ae
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 1 deletion.
68 changes: 68 additions & 0 deletions clouddrift/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,74 @@ def chunk(
return res


def regular_to_ragged(array: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""Convert a two-dimensional array to a ragged array. NaN values in the input array are
excluded from the output ragged array.
Parameters
----------
array : np.ndarray
A two-dimensional array.
Returns
-------
tuple[np.ndarray, np.ndarray]
A tuple of the ragged array and the size of each row.
Examples
--------
>>> regular_to_ragged(np.array([[1, 2], [3, np.nan], [4, 5]]))
(array([1., 2., 3., 4., 5.]), array([2, 1, 2]))
See Also
--------
:func:`ragged_to_regular`
"""
ragged = array.flatten()
return ragged[~np.isnan(ragged)], np.sum(~np.isnan(array), axis=1)


def ragged_to_regular(
ragged: Union[np.ndarray, pd.Series, xr.DataArray],
rowsize: Union[list, np.ndarray, pd.Series, xr.DataArray],
) -> np.ndarray:
"""Convert a ragged array to a two-dimensional array such that each contiguous segment
of a ragged array is a row in the two-dimensional array, and the remaining elements are
padded with NaNs.
Note: Although this function accepts parameters of type ``xarray.DataArray``,
passing NumPy arrays is recommended for performance reasons.
Parameters
----------
ragged : np.ndarray or pd.Series or xr.DataArray
A ragged array.
rowsize : list or np.ndarray[int] or pd.Series or xr.DataArray[int]
The size of each row in the ragged array.
Returns
-------
np.ndarray
A two-dimensional array.
Examples
--------
>>> ragged_to_regular(np.array([1, 2, 3, 4, 5]), np.array([2, 1, 2]))
array([[ 1., 2.],
[ 3., nan],
[ 4., 5.]])
See Also
--------
:func:`regular_to_ragged`
"""
res = np.nan * np.empty((len(rowsize), int(max(rowsize))), dtype=ragged.dtype)
unpacked = unpack_ragged(ragged, rowsize)
for n in range(len(rowsize)):
res[n, : int(rowsize[n])] = unpacked[n]
return res


def segment(
x: np.ndarray,
tolerance: Union[float, np.timedelta64, timedelta, pd.Timedelta],
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "clouddrift"
version = "0.10.0"
version = "0.11.0"
authors = [
{ name="Shane Elipot", email="[email protected]" },
{ name="Philippe Miron", email="[email protected]" },
Expand Down
45 changes: 45 additions & 0 deletions tests/analysis_tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from clouddrift.analysis import (
apply_ragged,
chunk,
regular_to_ragged,
ragged_to_regular,
segment,
subset,
velocity_from_position,
Expand Down Expand Up @@ -212,6 +214,49 @@ def test_segments_pandas(self):
)


class ragged_to_regular_tests(unittest.TestCase):
def test_ragged_to_regular(self):
ragged = np.array([1, 2, 3, 4, 5])
rowsize = [2, 1, 2]
expected = np.array([[1, 2], [3, np.nan], [4, 5]])

result = ragged_to_regular(ragged, rowsize)
self.assertTrue(np.all(np.isnan(result) == np.isnan(expected)))
self.assertTrue(
np.all(result[~np.isnan(result)] == expected[~np.isnan(expected)])
)

result = ragged_to_regular(
xr.DataArray(data=ragged), xr.DataArray(data=rowsize)
)
self.assertTrue(np.all(np.isnan(result) == np.isnan(expected)))
self.assertTrue(
np.all(result[~np.isnan(result)] == expected[~np.isnan(expected)])
)

result = ragged_to_regular(pd.Series(data=ragged), pd.Series(data=rowsize))
self.assertTrue(np.all(np.isnan(result) == np.isnan(expected)))
self.assertTrue(
np.all(result[~np.isnan(result)] == expected[~np.isnan(expected)])
)

def test_regular_to_ragged(self):
matrix = np.array([[1, 2], [3, np.nan], [4, 5]])
expected = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
expected_rowsize = np.array([2, 1, 2])

result, rowsize = regular_to_ragged(matrix)
self.assertTrue(np.all(result == expected))
self.assertTrue(np.all(rowsize == expected_rowsize))

def test_ragged_to_regular_roundtrip(self):
ragged = np.array([1, 2, 3, 4, 5])
rowsize = [2, 1, 2]
new_ragged, new_rowsize = regular_to_ragged(ragged_to_regular(ragged, rowsize))
self.assertTrue(np.all(new_ragged == ragged))
self.assertTrue(np.all(new_rowsize == rowsize))


class velocity_from_position_tests(unittest.TestCase):
def setUp(self):
self.INPUT_SIZE = 100
Expand Down

0 comments on commit 76e53ae

Please sign in to comment.