Merge branch 'main' into raw-gdp-files-adapter

Cloud-Drift · Aug 6, 2024 · 4b3a916 · 4b3a916
2 parents ca9314d + bfdc533
commit 4b3a916
Show file tree

Hide file tree

Showing 3 changed files with 112 additions and 2 deletions.
diff --git a/clouddrift/datasets.py b/clouddrift/datasets.py
@@ -79,9 +79,8 @@ def gdp1h(decode_times: bool = True) -> xr.Dataset:
     :func:`gdp6h`
     """
 
-    url = "https://noaa-oar-hourly-gdp-pds.s3.amazonaws.com/latest/gdp-v2.01.zarr"
+    url = "https://noaa-oar-hourly-gdp-pds.s3.amazonaws.com/latest/gdp-v2.01.1.zarr"
     ds = xr.open_dataset(url, engine="zarr", decode_times=decode_times)
-    ds = ds.rename_vars({"ID": "id"}).assign_coords({"id": ds.ID}).drop_vars(["ids"])
     return ds
 
 

diff --git a/clouddrift/ragged.py b/clouddrift/ragged.py
@@ -854,6 +854,60 @@ def unpack(
     return [unpacked[i] for i in rows]
 
 
+def obs_index_to_row(
+    index: int | list[int] | np.ndarray | xr.DataArray,
+    rowsize: list[int] | np.ndarray | xr.DataArray,
+) -> list:
+    """Obtain a list of row indices from a list of observation indices of a ragged array.
+
+    Parameters
+    ----------
+    index : int or list or np.ndarray
+        A integer observation index or a list of observation indices of a ragged array.
+    rowsize : list or np.ndarray or xr.DataArray
+        A sequence of row sizes of a ragged array.
+
+    Returns
+    -------
+    list
+        A list of row indices.
+
+    Examples
+    --------
+    To obtain the row index of observation 5 within a ragged array of three consecutive
+    rows of sizes 2, 4, and 3:
+    >>> obs_index_to_row(5, [2, 4, 3])
+    [1]
+
+    To obtain the row indices of observations 0, 2, and 4 within a ragged array of three consecutive
+    rows of sizes 2, 4, and 3:
+    >>> obs_index_to_row([0, 2, 4], [2, 4, 3])
+    [0, 1, 1]
+
+    """
+    # convert index to list if it is not
+    if isinstance(index, xr.DataArray):
+        index_list = [int(i) for i in index.values]
+    elif isinstance(index, np.ndarray):
+        index_list = [int(i) for i in index]
+    elif isinstance(index, int):
+        index_list = [index]
+    else:
+        index_list = index
+
+    # if index is not a list of integers or integer-likes, raise an error
+    if not all(isinstance(i, int) for i in index_list):
+        raise ValueError("The index must be an integer or a list of integers.")
+
+    rowsize_index = rowsize_to_index(rowsize)
+
+    # test that no index is out of bounds
+    if any([(i < rowsize_index[0]) | (i >= rowsize_index[-1]) for i in index_list]):
+        raise ValueError("Input index out of bounds based on input rowsize")
+
+    return (np.searchsorted(rowsize_index, index_list, side="right") - 1).tolist()
+
+
 def _mask_var(
     var: xr.DataArray | list[xr.DataArray],
     criterion: tuple | list | np.ndarray | xr.DataArray | bool | float | int | Callable,

diff --git a/tests/ragged_tests.py b/tests/ragged_tests.py
@@ -11,6 +11,7 @@
 from clouddrift.ragged import (
     apply_ragged,
     chunk,
+    obs_index_to_row,
     prune,
     ragged_to_regular,
     regular_to_ragged,
@@ -807,3 +808,59 @@ def test_unpack_rows(self):
                 for a, b in zip(unpack(x, rowsize, np.int64(0)), unpack(x, rowsize)[:1])
             )
         )
+
+
+class obs_index_to_row_tests(unittest.TestCase):
+    def test_obs_index_to_row(self):
+        index = list(range(10))
+        rowsize = [2, 5, 3]
+        row = obs_index_to_row(index, rowsize)
+        self.assertTrue(np.all(row == [0, 0, 1, 1, 1, 1, 1, 2, 2, 2]))
+
+    def test_obs_index_to_row_array_like_rowsize(self):
+        index = list(range(10))
+        rowsize = xr.DataArray(data=[2, 5, 3])
+        row = obs_index_to_row(index, rowsize)
+        self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))
+
+    def test_obs_index_to_row_array_rowsize(self):
+        index = list(range(10))
+        rowsize = np.array([2, 5, 3])
+        row = obs_index_to_row(index, rowsize)
+        self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))
+
+    def test_obs_index_to_row_array_like_index(self):
+        index = xr.DataArray(data=list(range(10)))
+        rowsize = [2, 5, 3]
+        row = obs_index_to_row(index, rowsize)
+        self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))
+
+    def test_obs_index_to_row_array_index(self):
+        index = np.array(range(10))
+        rowsize = [2, 5, 3]
+        row = obs_index_to_row(index, rowsize)
+        self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))
+
+    def test_obs_index_to_row_array_like(self):
+        index = xr.DataArray(data=list(range(10)))
+        rowsize = xr.DataArray(data=[2, 5, 3])
+        row = obs_index_to_row(index, rowsize)
+        self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))
+
+    def test_obs_index_to_row_array(self):
+        index = np.array(range(10))
+        rowsize = np.array([2, 5, 3])
+        row = obs_index_to_row(index, rowsize)
+        self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))
+
+    def test_obs_index_to_row_out_of_bounds(self):
+        index = 10
+        rowsize = [2, 5, 3]
+        with self.assertRaises(ValueError):
+            obs_index_to_row(index, rowsize)
+
+    def test_obs_index_to_row_negative(self):
+        index = -1
+        rowsize = [2, 5, 3]
+        with self.assertRaises(ValueError):
+            obs_index_to_row(index, rowsize)