Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Standardized Streamflow Index and Standardized Groundwater level Index #1877

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
37 changes: 37 additions & 0 deletions tests/test_hydrology.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from __future__ import annotations

from pathlib import Path

import numpy as np
import pytest

from xclim import indices as xci
from xclim import land
from xclim.core.units import convert_units_to


class TestBaseFlowIndex:
Expand All @@ -23,6 +28,38 @@ def test_simple(self, q_series):
np.testing.assert_array_equal(out, 2)


class TestStandardizedStreamflow:
nc_ds = Path("Raven", "q_sim.nc")
LamAdr marked this conversation as resolved.
Show resolved Hide resolved

@pytest.mark.slow
def test_3d_data_with_nans(self, open_dataset):
# test with data
ds = open_dataset(self.nc_ds)
q = ds.q_obs.sel(time=slice("2008")).rename("q")
qMM = convert_units_to(q, "mm**3/s", context="hydro") # noqa
# put a nan somewhere
qMM.values[10] = np.nan
q.values[10] = np.nan

out1 = land.standardized_streamflow_index(
q,
freq="MS",
window=1,
dist="genextreme",
method="APP",
fitkwargs={"floc": 0},
)
out2 = land.standardized_streamflow_index(
qMM,
freq="MS",
window=1,
dist="genextreme",
method="APP",
fitkwargs={"floc": 0},
)
np.testing.assert_array_almost_equal(out1, out2, 3)


class TestSnwMax:
def test_simple(self, snw_series):
a = np.zeros(366)
Expand Down
48 changes: 48 additions & 0 deletions tests/test_indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,54 @@ def test_standardized_precipitation_evapotranspiration_index(

np.testing.assert_allclose(spei.values, values, rtol=0, atol=diff_tol)

# reference results: Obtained with R package `standaRdized`
Copy link
Collaborator Author

@LamAdr LamAdr Aug 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@SarahG-579462 @coxipi
I couldn't find any other reference to test against for SSI (same for SGI I think). I could only replicate two conditions out of many. When the values get too far from zero, standaRdized outputs lots of NaNs and the non-NaNs are far away from us. Also in these replicable values, the offset is somewhat constant.

It looks like the problem is on their side?

Do you think this is still a valuable test then?

I'll also add a couple tests against ourselves.

@pytest.mark.slow
@pytest.mark.parametrize(
"freq, window, dist, method, values, diff_tol",
[
(
"D",
1,
"genextreme",
"ML",
[0.5331, 0.5338, 0.5098, 0.4656, 0.4937],
9e-2,
),
(
"D",
12,
"genextreme",
"ML",
[0.4414, 0.4695, 0.4861, 0.4838, 0.4877],
9e-2,
),
],
)
def test_standardized_streamflow_index(
self, open_dataset, freq, window, dist, method, values, diff_tol
):
ds = open_dataset("Raven/q_sim.nc")
q = ds.q_obs.rename("q")
q_cal = ds.q_sim.rename("q").fillna(ds.q_sim.mean())
if freq == "D":
q = q.sel(time=slice("2008-01-01", "2008-01-30")).fillna(ds.q_obs.mean())
else:
q = q.sel(time=slice("2008-01-01", "2008-12-31")).fillna(ds.q_obs.mean())
fitkwargs = {}
params = xci.stats.standardized_index_fit_params(
q_cal,
freq=freq,
window=window,
dist=dist,
method=method,
fitkwargs=fitkwargs,
zero_inflated=True,
)
ssi = xci.standardized_streamflow_index(q, params=params)
ssi = ssi.isel(time=slice(-11, -1, 2)).values.flatten()

np.testing.assert_allclose(ssi, values, rtol=0, atol=diff_tol)

@pytest.mark.parametrize(
"indexer",
[
Expand Down
28 changes: 27 additions & 1 deletion xclim/indicators/land/_streamflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@
from xclim.core.cfchecks import check_valid
from xclim.core.indicator import ResamplingIndicator
from xclim.core.units import declare_units
from xclim.indices import base_flow_index, generic, rb_flashiness_index
from xclim.indices import (
base_flow_index,
generic,
rb_flashiness_index,
standardized_streamflow_index,
)

__all__ = [
"base_flow_index",
"doy_qmax",
"doy_qmin",
"rb_flashiness_index",
"standardized_streamflow_index",
LamAdr marked this conversation as resolved.
Show resolved Hide resolved
]


Expand All @@ -25,6 +31,13 @@ def cfcheck(q):
check_valid(q, "standard_name", "water_volume_transport_in_river_channel")


class StandardizedIndexes(ResamplingIndicator):
"""Resampling but flexible inputs indicators."""

src_freq = ["D", "MS"]
context = "hydro"


base_flow_index = Streamflow(
title="Base flow index",
identifier="base_flow_index",
Expand Down Expand Up @@ -71,3 +84,16 @@ def cfcheck(q):
compute=declare_units(da="[discharge]")(generic.select_resample_op),
parameters=dict(op=generic.doymin, out_units=None),
)


standardized_streamflow_index = StandardizedIndexes(
title="Standardized Streamflow Index (SSI)",
identifier="ssi",
units="",
standard_name="ssi",
long_name="Standardized Streamflow Index (SSI)",
description="FILLME",
abstract="FILLME",
cell_methods="",
compute=standardized_streamflow_index,
)
128 changes: 128 additions & 0 deletions xclim/indices/_hydrology.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from xclim.core.calendar import get_calendar
from xclim.core.missing import at_least_n_valid
from xclim.core.units import declare_units, rate2amount
from xclim.core.utils import DateStr, Quantified
from xclim.indices.stats import standardized_index

from . import generic

Expand All @@ -19,6 +21,7 @@
"snow_melt_we_max",
"snw_max",
"snw_max_doy",
"standardized_streamflow_index",
]


Expand Down Expand Up @@ -104,6 +107,131 @@ def rb_flashiness_index(q: xarray.DataArray, freq: str = "YS") -> xarray.DataArr
return out


@declare_units(
q="[discharge]",
params="[]",
)
def standardized_streamflow_index(
q: xarray.DataArray,
freq: str | None = "MS",
window: int = 1,
dist: str = "genextreme",
method: str = "ML",
fitkwargs: dict | None = None,
cal_start: DateStr | None = None,
cal_end: DateStr | None = None,
params: Quantified | None = None,
**indexer,
) -> xarray.DataArray:
r"""Standardized Streamflow Index (SSI).

Parameters
----------
q : xarray.DataArray
Rate of river discharge.
freq : str, optional
Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes that desired resampling
has already been applied input dataset and will skip the resampling step.
window : int
Averaging window length relative to the resampling frequency. For example, if `freq="MS"`,
i.e. a monthly resampling, the window is an integer number of months.
dist : {"genextreme", "fisk"}
Name of the univariate distribution. (see :py:mod:`scipy.stats`).
method : {'APP', 'ML', 'PWM'}
Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). The approximate method
uses a deterministic function that doesn't involve any optimization.
fitkwargs : dict, optional
Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certains parameters (`floc`, `fscale`).
cal_start : DateStr, optional
Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`.
Default option `None` means that the calibration period begins at the start of the input dataset.
cal_end : DateStr, optional
End date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`.
Default option `None` means that the calibration period finishes at the end of the input dataset.
params : xarray.DataArray
LamAdr marked this conversation as resolved.
Show resolved Hide resolved
Fit parameters.
The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance.
The output can be given here as input, and it overrides other options.
\*\*indexer
Zeitsperre marked this conversation as resolved.
Show resolved Hide resolved
Indexing parameters to compute the indicator on a temporal subset of the data.
It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`.

Returns
-------
xarray.DataArray, [unitless]
Standardized Streamflow Index.

Notes
-----
* N-month SSI / N-day SSI is determined by choosing the `window = N` and the appropriate frequency `freq`.
* Supported statistical distributions are: ["genextreme", "fisk"], where "fisk" is scipy's implementation of
a log-logistic distribution
* If `params` is given as input, it overrides the `cal_start`, `cal_end`, `freq` and `window`, `dist` and `method` options.
* "APP" method only supports two-parameter distributions. Parameter `loc` needs to be fixed to use method `APP`.
* The standardized index is bounded by ±8.21. 8.21 is the largest standardized index as constrained by the float64 precision in
the inversion to the normal distribution.

Example
-------
>>> from datetime import datetime
>>> from xclim.indices import standardized_streamflow_index
>>> ds = xr.open_dataset(path_to_q_file)
>>> q = ds.q
>>> cal_start, cal_end = "1990-05-01", "1990-08-31"
>>> ssi_3 = standardized_streamflow_index(
... q,
... freq="MS",
... window=3,
... dist="genextreme",
... method="ML",
... cal_start=cal_start,
... cal_end=cal_end,
... ) # Computing SSI-3 months using a GEV distribution for the fit
>>> # Fitting parameters can also be obtained first, then re-used as input.
>>> from xclim.indices.stats import standardized_index_fit_params
>>> params = standardized_index_fit_params(
... q.sel(time=slice(cal_start, cal_end)),
... freq="MS",
... window=3,
... dist="genextreme",
... method="ML",
... ) # First getting params
>>> ssi_3 = standardized_streamflow_index(q, params=params)

References
----------
CHANGEME :cite:cts:`mckee_relationship_1993`
"""
fitkwargs = fitkwargs or {}
dist_methods = {"genextreme": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]}
if dist in dist_methods.keys():
if method not in dist_methods[dist]:
raise NotImplementedError(
f"{method} method is not implemented for {dist} distribution"
LamAdr marked this conversation as resolved.
Show resolved Hide resolved
)
else:
raise NotImplementedError(f"{dist} distribution is not yet implemented.")

# Precipitation is expected to be zero-inflated
# zero_inflated = True
zero_inflated = False
ssi = standardized_index(
q,
freq=freq,
window=window,
dist=dist,
method=method,
zero_inflated=zero_inflated,
fitkwargs=fitkwargs,
cal_start=cal_start,
cal_end=cal_end,
params=params,
**indexer,
)

return ssi


@declare_units(snd="[length]")
def snd_max(snd: xarray.DataArray, freq: str = "YS-JUL") -> xarray.DataArray:
"""Maximum snow depth.
Expand Down
6 changes: 5 additions & 1 deletion xclim/indices/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,11 @@ def standardized_index_fit_params(
da = da + convert_units_to(offset, da, context="hydro")

# "WPM" method doesn't seem to work for gamma or pearson3
dist_and_methods = {"gamma": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]}
dist_and_methods = {
"gamma": ["ML", "APP", "PWM"],
"fisk": ["ML", "APP"],
"genextreme": ["ML", "APP", "PWM"],
}
dist = get_dist(dist)
if dist.name not in dist_and_methods:
raise NotImplementedError(f"The distribution `{dist.name}` is not supported.")
Expand Down
Loading