Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make pme wiper compatible with pme parser #113

Merged
merged 8 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,23 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## `development`

### Added

- Add compatibility with PME wipers txt format.

### Fixed

- Fixed warning regarding star_oddi dayfirst=True missing input
- Rename pme parsers by removing `minidot_`. New functions are called `pme.txt`,
`pme.txts`, `pme.cat`. Maintain still a placeholder for those functions.

## `0.6.1` - 2024-08-30

### Added

- Add `onset.xlsx` parser
- Add `onset.xlsx` parser.
- Make `onset.xlsx` and `onset.csv` raise a `pytz.exception.AmbiguousTimeError`
when jumps associated with daylight saving time changes are detected.
- Add `star_oddi.DAT` ctd test file and fix timestamp format handling.
Expand Down
92 changes: 67 additions & 25 deletions ocean_data_parser/parsers/pme.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,46 @@
"Q ()": "q",
}

global_attributes = {"Conventions": "CF-1.6"}
default_global_attributes = {"Conventions": "CF-1.6"}


def minidot_txt(
# Deprecated functions
def minidot_txt(*args, **kwargs):
"""Rename minidot_txt to txt"""
logger.warning("minidot_txt is deprecated, use txt instead")
return txt(*args, **kwargs)


def minidot_txts(*args, **kwargs):
"""Rename minidot_txts to txts"""
logger.warning("minidot_txts is deprecated, use txts instead")
return txts(*args, **kwargs)


def minidot_cat(*args, **kwargs):
"""Rename minidot_cat to cat"""
logger.warning("minidot_cat is deprecated, use cat instead")
return cat(*args, **kwargs)


def _rename_variable(variable: str) -> str:
if variable in VARIABLE_RENAMING_MAPPING:
return VARIABLE_RENAMING_MAPPING[variable]
elif "I (mA)" in variable:
return variable.replace("I (mA)", "current").replace(" ", "_").lower()
elif " (Volt)" in variable:
return variable.replace(" (Volt)", "_volt").replace(" ", "_").lower()
else:
return variable.split("(")[0].strip().replace(" ", "_").lower()


def txt(
path: str,
rename_variables: bool = True,
encoding: str = "utf-8",
errors: str = "strict",
timezone: str = "UTC",
global_attributes: dict = None,
) -> xr.Dataset:
"""Parse PME MiniDot txt file

Expand All @@ -71,6 +102,8 @@ def minidot_txt(
rename_variables (bool, optional): _description_. Defaults to True.
encoding (str, optional): File encoding. Defaults to 'utf-8'.
errors (str, optional): Error handling. Defaults to 'strict'.
timezone (str, optional): Timezone to localize the time. Defaults to 'UTC'.
global_attributes (dict, optional): Global attributes to add to the dataset. Defaults to {}.

Returns:
xarray.Dataset
Expand All @@ -87,23 +120,28 @@ def _append_to_history(msg):
errors=errors,
) as f:
# Read the headre
serial_number = f.readline().replace("\n", "")
logger.debug("Parse file from serial number: %s", serial_number)
metadata = re.search(
(
r"OS REV: (?P<software_version>\d+\.\d+)\s"
r"Sensor Cal: (?P<instrument_calibration>\d*)"
),
f.readline(),
)
header = [f.readline()]
while "Time (sec)" not in header[-1]:
header += [f.readline()]

# Parse metadata from header
metadata = {}
metadata["serial_number"] = header[0].replace("\n", "")
metadata["software_version"] = re.search(r"OS REV: (\d+\.\d+)\s", header[1])[1]
if "Sensor Cal" in header[1]:
metadata["instrument_calibration"] = re.search(
r"Sensor Cal: (\d*)", header[1]
)[1]
if len(header) > 2:
for key, value in re.findall("(\w+)\: ([^,\n]+)", "".join(header[2:-1])):
metadata[key.lower()] = value.strip()

# If metadata is null than it's likely not a minidot file
if metadata is None:
warnings.warn("Failed to read: {path}", RuntimeWarning)
return pd.DataFrame(), None

# Parse column names
columns = [item.strip() for item in f.readline().split(",")]
columns = [item.strip() for item in header[-1].split(",")]

# Read the data with pandas
df = pd.read_csv(
Expand All @@ -124,12 +162,11 @@ def _append_to_history(msg):

# Global attributes
ds.attrs = {
**global_attributes,
**metadata.groupdict(),
**default_global_attributes,
**metadata,
"instrument_manufacturer": "PME",
"instrument_model": "MiniDot",
"instrument_sn": serial_number,
"history": "",
**(global_attributes or {}),
}

# Retrieve raw saturation values from minidot
Expand All @@ -155,20 +192,27 @@ def _append_to_history(msg):
for var in ds.variables:
if var not in VARIABLE_ATTRIBUTES:
logger.warning("Unknown variable: %s", var)
if "(" in var and ")" in var:
variable, unit = var.split("(")
unit = unit.replace(")", "")
ds[var].attrs.update({"units": unit})
continue
ds[var].attrs.update(VARIABLE_ATTRIBUTES[var])

if rename_variables:
ds = ds.rename_vars(VARIABLE_RENAMING_MAPPING)
ds.attrs["history"] += (
f"\n{pd.Timestamp.now().isoformat()} Rename variables: {VARIABLE_RENAMING_MAPPING}"
)
variable_mapping = {
variable: _rename_variable(variable) for variable in ds.variables
}
ds = ds.rename_vars(variable_mapping)
ds.attrs["history"] += (
f"\n{pd.Timestamp.now().isoformat()} Rename variables: {variable_mapping}"
)

ds = standardize_dataset(ds)
return ds


def minidot_txts(
def txts(
paths: Union[list, str], encoding: str = "utf-8", errors: str = "strict"
) -> xr.Dataset:
"""Parse PME Minidots txt files
Expand Down Expand Up @@ -197,9 +241,7 @@ def minidot_txts(
return xr.merge(datasets)


def minidot_cat(
path: str, encoding: str = "utf-8", errors: str = "strict"
) -> xr.Dataset:
def cat(path: str, encoding: str = "utf-8", errors: str = "strict") -> xr.Dataset:
"""cat reads PME MiniDot concatenated CAT files

Args:
Expand Down
17 changes: 14 additions & 3 deletions ocean_data_parser/parsers/star_oddi.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
"long_name": "Sound Velocity",
"standard_name": "speed_of_sound_in_sea_water",
},
"pressure": {
"long_name": "Pressure",
"standard_name": "sea_water_pressure",
},
}


Expand Down Expand Up @@ -108,6 +112,7 @@ def _standardize_attributes(item):
names=variables.keys(),
parse_dates=["time"],
date_format=date_format,
dayfirst=True,
)
if "time" in df:
df = df.set_index(["time"])
Expand Down Expand Up @@ -139,9 +144,15 @@ def _standardize_attributes(item):
)
),
"n_records": n_records,
"start_time": pd.to_datetime(start_time).isoformat(),
"end_time": pd.to_datetime(end_time).isoformat(),
"date_created": pd.to_datetime(metadata.pop("created")).isoformat(),
"start_time": pd.to_datetime(
start_time, format=date_format, dayfirst=True
).isoformat(),
"end_time": pd.to_datetime(
end_time, format=date_format, dayfirst=True
).isoformat(),
"date_created": pd.to_datetime(
metadata.pop("created"), format=date_format, dayfirst=True
).isoformat(),
"original_file_header": original_header,
}
# Add variable attributes
Expand Down
2 changes: 1 addition & 1 deletion ocean_data_parser/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def detect_file_format(file: str, encoding: str = "UTF-8") -> str:
elif ext == "MON":
parser = "van_essen_instruments.mon"
elif ext == "txt" and re.match(r"\d+\-\d+\s*\nOS REV\:", header):
parser = "pme.minidot_txt"
parser = "pme.txt"
elif ext == "txt" and re.match(r"Model\=.*\nFirmware\=.*\nSerial\=.*", header):
parser = "rbr.rtext"
elif ext == "txt" and "Front panel parameter change:" in header:
Expand Down
14 changes: 14 additions & 0 deletions tests/parsers_test_files/pme/wiper/2024-07-28 100000Z.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
5958-066383
OS REV: 2.30
Type: 0, Scrub: 1, Angle: 45
Timeout: 10, Threshold: 200
Time (sec), Bat (Volt), T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm)
1722160800, +3.43, +25.154, 26, +6.0, +5.9, +84.5, +68.7, +79.6, 225.2, +1.1
1722171600, +3.48, +25.396, 27, +6.0, +6.0, +73.3, +70.8, +77.3, 230.3, +1.1
1722182400, +3.48, +25.396, 28, +6.0, +5.9, +76.5, +70.1, +78.6, 225.8, +1.1
1722193200, +3.48, +25.154, 29, +6.0, +6.0, +75.8, +70.4, +78.2, 230.1, +1.1
1722204000, +3.48, +25.396, 30, +6.0, +5.9, +75.4, +69.4, +79.1, 200.1, +1.1
1722214800, +3.43, +26.123, 31, +6.0, +5.9, +85.3, +67.4, +78.9, 225.5, +1.1
1722225600, +3.48, +25.639, 32, +6.0, +6.0, +76.1, +69.5, +77.7, 230.4, +1.1
1722236400, +3.48, +25.396, 33, +6.0, +5.9, +76.1, +68.8, +78.4, 218.3, +1.1
1722247200, +3.48, +25.396, 34, +6.0, +5.9, +73.9, +69.5, +77.3, 203.7, +1.1
14 changes: 14 additions & 0 deletions tests/parsers_test_files/pme/wiper/2024-07-29 130000Z.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
5958-066383
OS REV: 2.30
Type: 0, Scrub: 1, Angle: 45
Timeout: 10, Threshold: 200
Time (sec), Bat (Volt), T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm)
1722258000, +3.48, +24.912, 35, +6.0, +5.9, +76.4, +69.2, +79.5, 217.1, +1.1
1722268800, +3.43, +25.396, 36, +6.0, +5.9, +83.9, +67.1, +78.6, 226.7, +1.1
1722279600, +3.48, +24.912, 37, +6.0, +6.0, +74.8, +69.4, +77.6, 228.8, +1.1
1722290400, +3.48, +24.670, 38, +6.0, +5.9, +76.0, +70.3, +77.9, 204.8, +1.1
1722301200, +3.48, +24.670, 39, +6.0, +6.0, +75.8, +70.2, +78.9, 231.4, +1.1
1722312000, +3.48, +25.154, 40, +6.0, +5.9, +75.6, +68.3, +79.2, 224.8, +1.1
1722322800, +3.43, +25.396, 41, +5.9, +5.8, +84.3, +66.5, +74.9, 207.4, +1.0
1722333600, +3.48, +24.427, 42, +5.9, +6.0, +73.3, +69.4, +77.7, 230.7, +1.1
1722344400, +3.48, +24.427, 43, +5.9, +5.9, +78.1, +68.9, +77.3, 209.0, +1.1
14 changes: 14 additions & 0 deletions tests/parsers_test_files/pme/wiper/2024-07-30 160000Z.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
5958-066383
OS REV: 2.30
Type: 0, Scrub: 1, Angle: 45
Timeout: 10, Threshold: 200
Time (sec), Bat (Volt), T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm)
1722355200, +3.48, +23.700, 44, +5.9, +6.0, +73.6, +69.9, +78.2, 231.5, +1.1
1722366000, +3.48, +25.396, 45, +5.9, +5.9, +76.1, +69.4, +75.9, 209.6, +1.1
1722376800, +3.43, +25.154, 46, +6.0, +5.9, +83.3, +67.8, +79.2, 231.2, +1.1
1722387600, +3.48, +24.185, 47, +6.0, +6.0, +73.7, +70.1, +77.6, 231.6, +1.1
1722398400, +3.48, +24.427, 48, +6.0, +5.9, +76.5, +68.8, +79.5, 212.1, +1.1
1722409200, +3.48, +22.974, 49, +6.0, +6.0, +75.5, +70.1, +77.7, 228.9, +1.1
1722420000, +3.48, +22.974, 50, +6.0, +5.9, +76.6, +69.4, +78.3, 215.8, +1.1
1722430800, +3.42, +23.458, 51, +6.0, +5.9, +85.2, +67.9, +79.8, 231.7, +1.1
1722441600, +3.47, +24.185, 52, +6.0, +6.0, +73.2, +69.5, +78.0, 230.5, +1.1
2 changes: 1 addition & 1 deletion tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class TestPMEParsers:
"path", glob("tests/parsers_test_files/pme/**/*.txt", recursive=True)
)
def test_txt_parser(self, path, caplog):
ds = pme.minidot_txt(path)
ds = pme.txt(path)
review_parsed_dataset(ds, path, caplog)


Expand Down
Loading