Skip to content

Commit

Permalink
Merge branch 'main' into raw-gdp-files-adapter
Browse files Browse the repository at this point in the history
  • Loading branch information
philippemiron authored Aug 6, 2024
2 parents ca9314d + bfdc533 commit 4b3a916
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 2 deletions.
3 changes: 1 addition & 2 deletions clouddrift/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,8 @@ def gdp1h(decode_times: bool = True) -> xr.Dataset:
:func:`gdp6h`
"""

url = "https://noaa-oar-hourly-gdp-pds.s3.amazonaws.com/latest/gdp-v2.01.zarr"
url = "https://noaa-oar-hourly-gdp-pds.s3.amazonaws.com/latest/gdp-v2.01.1.zarr"
ds = xr.open_dataset(url, engine="zarr", decode_times=decode_times)
ds = ds.rename_vars({"ID": "id"}).assign_coords({"id": ds.ID}).drop_vars(["ids"])
return ds


Expand Down
54 changes: 54 additions & 0 deletions clouddrift/ragged.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,60 @@ def unpack(
return [unpacked[i] for i in rows]


def obs_index_to_row(
index: int | list[int] | np.ndarray | xr.DataArray,
rowsize: list[int] | np.ndarray | xr.DataArray,
) -> list:
"""Obtain a list of row indices from a list of observation indices of a ragged array.
Parameters
----------
index : int or list or np.ndarray
A integer observation index or a list of observation indices of a ragged array.
rowsize : list or np.ndarray or xr.DataArray
A sequence of row sizes of a ragged array.
Returns
-------
list
A list of row indices.
Examples
--------
To obtain the row index of observation 5 within a ragged array of three consecutive
rows of sizes 2, 4, and 3:
>>> obs_index_to_row(5, [2, 4, 3])
[1]
To obtain the row indices of observations 0, 2, and 4 within a ragged array of three consecutive
rows of sizes 2, 4, and 3:
>>> obs_index_to_row([0, 2, 4], [2, 4, 3])
[0, 1, 1]
"""
# convert index to list if it is not
if isinstance(index, xr.DataArray):
index_list = [int(i) for i in index.values]
elif isinstance(index, np.ndarray):
index_list = [int(i) for i in index]
elif isinstance(index, int):
index_list = [index]
else:
index_list = index

# if index is not a list of integers or integer-likes, raise an error
if not all(isinstance(i, int) for i in index_list):
raise ValueError("The index must be an integer or a list of integers.")

rowsize_index = rowsize_to_index(rowsize)

# test that no index is out of bounds
if any([(i < rowsize_index[0]) | (i >= rowsize_index[-1]) for i in index_list]):
raise ValueError("Input index out of bounds based on input rowsize")

return (np.searchsorted(rowsize_index, index_list, side="right") - 1).tolist()


def _mask_var(
var: xr.DataArray | list[xr.DataArray],
criterion: tuple | list | np.ndarray | xr.DataArray | bool | float | int | Callable,
Expand Down
57 changes: 57 additions & 0 deletions tests/ragged_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from clouddrift.ragged import (
apply_ragged,
chunk,
obs_index_to_row,
prune,
ragged_to_regular,
regular_to_ragged,
Expand Down Expand Up @@ -807,3 +808,59 @@ def test_unpack_rows(self):
for a, b in zip(unpack(x, rowsize, np.int64(0)), unpack(x, rowsize)[:1])
)
)


class obs_index_to_row_tests(unittest.TestCase):
def test_obs_index_to_row(self):
index = list(range(10))
rowsize = [2, 5, 3]
row = obs_index_to_row(index, rowsize)
self.assertTrue(np.all(row == [0, 0, 1, 1, 1, 1, 1, 2, 2, 2]))

def test_obs_index_to_row_array_like_rowsize(self):
index = list(range(10))
rowsize = xr.DataArray(data=[2, 5, 3])
row = obs_index_to_row(index, rowsize)
self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))

def test_obs_index_to_row_array_rowsize(self):
index = list(range(10))
rowsize = np.array([2, 5, 3])
row = obs_index_to_row(index, rowsize)
self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))

def test_obs_index_to_row_array_like_index(self):
index = xr.DataArray(data=list(range(10)))
rowsize = [2, 5, 3]
row = obs_index_to_row(index, rowsize)
self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))

def test_obs_index_to_row_array_index(self):
index = np.array(range(10))
rowsize = [2, 5, 3]
row = obs_index_to_row(index, rowsize)
self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))

def test_obs_index_to_row_array_like(self):
index = xr.DataArray(data=list(range(10)))
rowsize = xr.DataArray(data=[2, 5, 3])
row = obs_index_to_row(index, rowsize)
self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))

def test_obs_index_to_row_array(self):
index = np.array(range(10))
rowsize = np.array([2, 5, 3])
row = obs_index_to_row(index, rowsize)
self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))

def test_obs_index_to_row_out_of_bounds(self):
index = 10
rowsize = [2, 5, 3]
with self.assertRaises(ValueError):
obs_index_to_row(index, rowsize)

def test_obs_index_to_row_negative(self):
index = -1
rowsize = [2, 5, 3]
with self.assertRaises(ValueError):
obs_index_to_row(index, rowsize)

0 comments on commit 4b3a916

Please sign in to comment.