Skip to content

Commit

Permalink
onset move daylight saving check to a checks module, add ambiguous in…
Browse files Browse the repository at this point in the history
…put and timezone to both xlsx and csv outputs
  • Loading branch information
JessyBarrette committed Aug 29, 2024
1 parent 6347627 commit c73550f
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 32 deletions.
33 changes: 33 additions & 0 deletions ocean_data_parser/parsers/checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pandas as pd
from loguru import logger

def check_daylight_saving(time:pd.Series) -> bool:

Check failure on line 4 in ocean_data_parser/parsers/checks.py

View workflow job for this annotation

GitHub Actions / testing

Ruff (I001)

ocean_data_parser/parsers/checks.py:1:1: I001 Import block is un-sorted or un-formatted

# Test daylight saving issue
dt = time.diff()
sampling_interval = dt.median()
dst_fall = -pd.Timedelta("1h") + sampling_interval
dst_spring = pd.Timedelta("1h") + sampling_interval
has_issue = False
if any(dt == dst_fall):
logger.warning(
(
"Time gaps (=%s) for sampling interval of %s "
"suggest a Fall daylight saving issue is present"
),
dst_fall,
sampling_interval,
)
has_issue = True

if any(dt == dst_spring):
logger.warning(
(
"Time gaps (=%s) for sampling interval of %s "
"suggest a Spring daylight saving issue is present"
),
dst_fall,
sampling_interval,
)
has_issue = True
return has_issue
50 changes: 18 additions & 32 deletions ocean_data_parser/parsers/onset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import xarray

from ocean_data_parser.parsers.utils import standardize_dataset
from ocean_data_parser.parsers.checks import check_daylight_saving

GLOBAL_ATTRIBUTES = {"instrument_manufacturer": "Onset", "Convention": "CF-1.6"}

Check failure on line 21 in ocean_data_parser/parsers/onset.py

View workflow job for this annotation

GitHub Actions / testing

Ruff (I001)

ocean_data_parser/parsers/onset.py:10:1: I001 Import block is un-sorted or un-formatted

Expand Down Expand Up @@ -160,16 +161,20 @@ def csv(
standardize_variable_names: bool = True,
encoding: str = "UTF-8",
errors: str = "strict",
timezone: str = None,
timestamp_ambiguous: str="raise",
) -> xarray.Dataset:
"""Parses the Onset CSV format generate by HOBOware into a xarray object
Inputs:
Args:
path: The path to the CSV file
convert_units_to_si: Whether to standardize data units to SI units
standardize_variable_names: Rename the variable names a standardize name
convention
encoding: File encoding. Defaults to "utf-8"
errors: Error handling. Defaults to "strict"
timezone: Timezone to localize the time variable, overwrites the timezone in header
timestamp_ambiguous: How to handle ambiguous time stamps. Defaults to "raise"
Returns:
xarray.Dataset
"""
Expand Down Expand Up @@ -222,8 +227,8 @@ def csv(
df["Date Time"] = df["Date Time"].apply(
lambda x: pd.to_datetime(x, format=_get_time_format(x))
)

df["Date Time"] = df["Date Time"].dt.tz_localize(header["timezone"])
df["Date Time"] = df["Date Time"].dt.tz_localize(timezone or header["timezone"], ambiguous=timestamp_ambiguous)
check_daylight_saving(df["Date Time"])

# Convert to dataset
ds = df.to_xarray()
Expand Down Expand Up @@ -262,31 +267,6 @@ def csv(
"Unit conversion is not supported if standardize_variable_names=False"
)

# Test daylight saving issue
# TODO move this daylight saving detection test elsewhere
dt = ds["time"].diff("index")
sampling_interval = dt.median().values
dst_fall = -pd.Timedelta("1h") + sampling_interval
dst_spring = pd.Timedelta("1h") + sampling_interval
if any(dt == dst_fall):
logger.warning(
(
"Time gaps (=%s) for sampling interval of %s "
"suggest a Fall daylight saving issue is present"
),
dst_fall,
sampling_interval,
)
if any(dt == dst_spring):
logger.warning(
(
"Time gaps (=%s) for sampling interval of %s "
"suggest a Spring daylight saving issue is present"
),
dst_fall,
sampling_interval,
)

ds = standardize_dataset(ds)
return ds

Expand Down Expand Up @@ -352,11 +332,15 @@ def _farenheit_to_celsius(farenheit):
return (farenheit - 32.0) / 1.8000


def xlsx(path: str, timezone: str = None) -> xarray.Dataset:
def xlsx(path: str, timezone: str = None,ambiguous_timestamps:str="infer") -> xarray.Dataset:
"""Parses the Onset XLSX format generate by HOBOware into a xarray object
Inputs: path: The path to the XLSX file
Returns: xarray.Dataset
Args:
path: The path to the XLSX file
timezone: Timezone to localize the time variable, overwrites the timezone in header
ambiguous_timestamps: How to handle ambiguous time stamps. Defaults to "infer"
Returns:
xarray.Dataset
"""

def _format_detail_key(key):
Expand Down Expand Up @@ -423,9 +407,11 @@ def _get_column_and_unit(column):
# Convert to dataset
data["time"] = (
pd.to_datetime(data["time"], errors="coerce")
.dt.tz_localize(timezone or file_timezone)
.dt.tz_localize(timezone or file_timezone, ambiguous=ambiguous_timestamps)
.dt.tz_convert("UTC")
)
check_daylight_saving(data["time"])

ds = data.to_xarray()
for var in variable_attributes:
ds[var].attrs = variable_attributes[var]
Expand Down

0 comments on commit c73550f

Please sign in to comment.