From e39c43b6fcf863052c4d0646ff22f6800b2eb970 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Mon, 28 Aug 2023 10:25:29 -0500 Subject: [PATCH 1/7] fixes and improvements for tidal filter --- oceans/filters.py | 48 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/oceans/filters.py b/oceans/filters.py index e053b3c..63b6254 100644 --- a/oceans/filters.py +++ b/oceans/filters.py @@ -415,10 +415,7 @@ def medfilt1(x, L=3): >>> L = 103 >>> xout = medfilt1(x=x, L=L) >>> ax = plt.subplot(212) - >>> ( - ... l1, - ... l2, - ... ) = ax.plot( + >>> (l1, l2,) = ax.plot( ... x ... ), ax.plot(xout) >>> ax.grid(True) @@ -570,7 +567,7 @@ def md_trenberth(x): return y -def pl33tn(x, dt=1.0, T=33.0, mode="valid"): +def pl33tn(x, dt=1.0, T=33.0, mode="valid", t=None): """ Computes low-passed series from `x` using pl33 filter, with optional sample interval `dt` (hours) and filter half-amplitude period T (hours) @@ -608,14 +605,26 @@ def pl33tn(x, dt=1.0, T=33.0, mode="valid"): """ import cf_xarray # noqa: F401 + import pandas as pd import xarray as xr if isinstance(x, xr.Dataset): raise TypeError("Input a DataArray not a Dataset.") + elif isinstance(x, pd.DataFrame): + raise TypeError("Input a Series not a DataFrame.") + if isinstance(x, pd.Series) and not isinstance( + x.index, + pd.core.indexes.datetimes.DatetimeIndex, + ): + raise TypeError("Input Series needs to have parsed datetime indices.") + + # find dt in units of hours if isinstance(x, xr.DataArray): - # find dt in units of hours - dt = (x.cf["T"][1] - x.cf["T"][0]) * 1e-9 / 3600 + dt = (x.cf["T"][1] - x.cf["T"][0]) / np.timedelta64(int(3.6e12)) + # dt = (x.cf["T"][1] - x.cf["T"][0]) * 1e-9 / 3600 + elif isinstance(x, pd.Series): + dt = (x.index[1] - x.index[0]) / pd.Timedelta("1H") pl33 = np.array( [ @@ -694,18 +703,20 @@ def pl33tn(x, dt=1.0, T=33.0, mode="valid"): dt = float(dt) * (33.0 / T) filter_time = np.arange(0.0, 33.0, dt, dtype="d") - # N = len(filter_time) + Nt = len(filter_time) filter_time = np.hstack((-filter_time[-1:0:-1], filter_time)) pl33 = np.interp(filter_time, _dt, pl33) pl33 /= pl33.sum() if isinstance(x, xr.DataArray): + x = x.interpolate_na(dim=x.cf["T"].name) + weight = xr.DataArray(pl33, dims=["window"]) xf = ( x.rolling({x.cf["T"].name: len(pl33)}, center=True) .construct({x.cf["T"].name: "window"}) - .dot(weight) + .dot(weight, dims="window") ) # update attrs attrs = { @@ -715,7 +726,26 @@ def pl33tn(x, dt=1.0, T=33.0, mode="valid"): } xf.attrs = attrs + elif isinstance(x, pd.Series): + xf = x.to_frame().apply(np.convolve, v=pl33, mode=mode) + + # nan out edges which are not good values anyway + if mode == "same": + xf[: Nt - 1] = np.nan + xf[-Nt + 2 :] = np.nan + else: # use numpy xf = np.convolve(x, pl33, mode=mode) + # times to match xf + if t is not None: + # Nt = len(filter_time) + tf = t[Nt - 1 : -Nt + 1] + return xf, tf + + # nan out edges which are not good values anyway + if mode == "same": + xf[: Nt - 1] = np.nan + xf[-Nt + 2 :] = np.nan + return xf From 9582b8d315cee8465e8542135f1ae3e85da49e7a Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Mon, 28 Aug 2023 13:18:21 -0500 Subject: [PATCH 2/7] Update oceans/filters.py change syntax of number Co-authored-by: Filipe --- oceans/filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oceans/filters.py b/oceans/filters.py index 63b6254..d133af5 100644 --- a/oceans/filters.py +++ b/oceans/filters.py @@ -621,7 +621,7 @@ def pl33tn(x, dt=1.0, T=33.0, mode="valid", t=None): # find dt in units of hours if isinstance(x, xr.DataArray): - dt = (x.cf["T"][1] - x.cf["T"][0]) / np.timedelta64(int(3.6e12)) + dt = (x.cf["T"][1] - x.cf["T"][0]) / np.timedelta64(360_000_0000_000) # This is a bit more readable. # dt = (x.cf["T"][1] - x.cf["T"][0]) * 1e-9 / 3600 elif isinstance(x, pd.Series): dt = (x.index[1] - x.index[0]) / pd.Timedelta("1H") From 54daec94a21556b0e8af2935ca4661a5b5f09b0c Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Mon, 28 Aug 2023 13:19:21 -0500 Subject: [PATCH 3/7] combined two checks --- oceans/filters.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/oceans/filters.py b/oceans/filters.py index d133af5..fb868c8 100644 --- a/oceans/filters.py +++ b/oceans/filters.py @@ -608,10 +608,8 @@ def pl33tn(x, dt=1.0, T=33.0, mode="valid", t=None): import pandas as pd import xarray as xr - if isinstance(x, xr.Dataset): - raise TypeError("Input a DataArray not a Dataset.") - elif isinstance(x, pd.DataFrame): - raise TypeError("Input a Series not a DataFrame.") + if isinstance(x, (xr.Dataset,pd.DataFrame)): + raise TypeError("Input a DataArray not a Dataset, or a Series not a DataFrame.") if isinstance(x, pd.Series) and not isinstance( x.index, From 0addfed70f74f7d415fdb8b4ca0a4182171cd426 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Mon, 28 Aug 2023 13:20:26 -0500 Subject: [PATCH 4/7] precommit wanted a diff syntax --- oceans/filters.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/oceans/filters.py b/oceans/filters.py index fb868c8..79b952f 100644 --- a/oceans/filters.py +++ b/oceans/filters.py @@ -608,7 +608,7 @@ def pl33tn(x, dt=1.0, T=33.0, mode="valid", t=None): import pandas as pd import xarray as xr - if isinstance(x, (xr.Dataset,pd.DataFrame)): + if isinstance(x, xr.Dataset | pd.DataFrame): raise TypeError("Input a DataArray not a Dataset, or a Series not a DataFrame.") if isinstance(x, pd.Series) and not isinstance( @@ -619,7 +619,9 @@ def pl33tn(x, dt=1.0, T=33.0, mode="valid", t=None): # find dt in units of hours if isinstance(x, xr.DataArray): - dt = (x.cf["T"][1] - x.cf["T"][0]) / np.timedelta64(360_000_0000_000) # This is a bit more readable. + dt = (x.cf["T"][1] - x.cf["T"][0]) / np.timedelta64( + 360_000_0000_000, + ) # This is a bit more readable. # dt = (x.cf["T"][1] - x.cf["T"][0]) * 1e-9 / 3600 elif isinstance(x, pd.Series): dt = (x.index[1] - x.index[0]) / pd.Timedelta("1H") From b4b1a1e4d2cfd5d6f8275ecbbd0c40d10fd57487 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Mon, 28 Aug 2023 14:57:25 -0500 Subject: [PATCH 5/7] removed comments --- oceans/filters.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/oceans/filters.py b/oceans/filters.py index 79b952f..7e857e6 100644 --- a/oceans/filters.py +++ b/oceans/filters.py @@ -621,8 +621,7 @@ def pl33tn(x, dt=1.0, T=33.0, mode="valid", t=None): if isinstance(x, xr.DataArray): dt = (x.cf["T"][1] - x.cf["T"][0]) / np.timedelta64( 360_000_0000_000, - ) # This is a bit more readable. - # dt = (x.cf["T"][1] - x.cf["T"][0]) * 1e-9 / 3600 + ) elif isinstance(x, pd.Series): dt = (x.index[1] - x.index[0]) / pd.Timedelta("1H") From cea342e43a312d9206b9f372d03a7fdcd36f3a2f Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 12 Sep 2023 11:56:46 -0500 Subject: [PATCH 6/7] Update oceans/filters.py Co-authored-by: Filipe --- oceans/filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oceans/filters.py b/oceans/filters.py index 7e857e6..ad630da 100644 --- a/oceans/filters.py +++ b/oceans/filters.py @@ -620,7 +620,7 @@ def pl33tn(x, dt=1.0, T=33.0, mode="valid", t=None): # find dt in units of hours if isinstance(x, xr.DataArray): dt = (x.cf["T"][1] - x.cf["T"][0]) / np.timedelta64( - 360_000_0000_000, + 360_000_000_000, ) elif isinstance(x, pd.Series): dt = (x.index[1] - x.index[0]) / pd.Timedelta("1H") From 4a3082318a5263385d324f50a21f7a1cd6c93d00 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 12 Sep 2023 12:10:22 -0500 Subject: [PATCH 7/7] linting fights on this --- oceans/filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oceans/filters.py b/oceans/filters.py index 7e857e6..5b53a9b 100644 --- a/oceans/filters.py +++ b/oceans/filters.py @@ -608,7 +608,7 @@ def pl33tn(x, dt=1.0, T=33.0, mode="valid", t=None): import pandas as pd import xarray as xr - if isinstance(x, xr.Dataset | pd.DataFrame): + if isinstance(x, (xr.Dataset, pd.DataFrame)): raise TypeError("Input a DataArray not a Dataset, or a Series not a DataFrame.") if isinstance(x, pd.Series) and not isinstance(