fix: Fix datetime extraction (#226)

tlambert03 · Apr 22, 2024 · c1848a3 · c1848a3
1 parent 0a46b98
commit c1848a3
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 36 deletions.
diff --git a/src/nd2/_ome.py b/src/nd2/_ome.py
@@ -56,7 +56,7 @@ def nd2_ome_metadata(
     rdr = cast("ModernReader", f._rdr)
     meta = f.metadata
     images = []
-    acquisition_date = rdr._acquisition_date()
+    acquisition_date = rdr._acquisition_datetime()
     uuid_ = f"urn:uuid:{uuid.uuid4()}"
     sizes = dict(f.sizes)
     n_positions = sizes.pop(AXIS.POSITION, 1)

diff --git a/src/nd2/_util.py b/src/nd2/_util.py
@@ -2,6 +2,7 @@
 
 import math
 import re
+from contextlib import suppress
 from datetime import datetime, timezone
 from itertools import product
 from typing import TYPE_CHECKING, BinaryIO, NamedTuple, cast
@@ -79,12 +80,18 @@ def is_new_format(path: str) -> bool:
         return fh.read(4) == NEW_HEADER_MAGIC
 
 
-def jdn_to_datetime(jdn: float, tz: timezone = timezone.utc) -> datetime:
-    return datetime.fromtimestamp((jdn - 2440587.5) * 86400.0, tz)
+JDN_UNIX_EPOCH = 2440587.5
+SECONDS_PER_DAY = 86400
 
 
-def rgb_int_to_tuple(rgb: int) -> tuple[int, int, int]:
-    return ((rgb & 255), (rgb >> 8 & 255), (rgb >> 16 & 255))
+def jdn_to_datetime(jdn: float, tz: timezone = timezone.utc) -> datetime:
+    seconds_since_epoch = (jdn - JDN_UNIX_EPOCH) * SECONDS_PER_DAY
+    # very negative values can cause OverflowError on Windows, and are meaningless
+    dt = datetime.fromtimestamp(max(seconds_since_epoch, 0), tz)
+    with suppress(ValueError, OSError):
+        # astimezone() without arguments will use the system's local timezone
+        return dt.astimezone()
+    return dt
 
 
 # these are used has headers in the events() table
@@ -133,15 +140,6 @@ class VoxelSize(NamedTuple):
 ]
 
 
-def parse_time(time_str: str) -> datetime:
-    for fmt_str in TIME_FMT_STRINGS:
-        try:
-            return datetime.strptime(time_str, fmt_str)
-        except ValueError:
-            continue
-    raise ValueError(f"Could not parse {time_str}")  # pragma: no cover
-
-
 def convert_records_to_dict_of_lists(
     records: ListOfDicts, null_val: Any = float("nan")
 ) -> DictOfLists:

diff --git a/src/nd2/index.py b/src/nd2/index.py
@@ -7,10 +7,22 @@
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Iterable, Iterator, Sequence, TypedDict, cast, no_type_check
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Iterable,
+    Iterator,
+    Sequence,
+    TypedDict,
+    cast,
+    no_type_check,
+)
 
 import nd2
 
+if TYPE_CHECKING:
+    from nd2.readers._modern.modern_reader import ModernReader
+
 try:
     import rich
 
@@ -47,26 +59,29 @@ def index_file(path: Path) -> Record:
     with nd2.ND2File(path) as nd:
         if nd.is_legacy:
             software: dict = {}
-            acquired: str | None = ""
+            acquired: datetime | None = None
             binary = False
         else:
-            software = nd._rdr._app_info()  # type: ignore
-            acquired = nd._rdr._acquisition_date()  # type: ignore
+            rdr = cast("ModernReader", nd._rdr)
+            software = rdr._app_info()
+            acquired = rdr._acquisition_datetime()
             binary = nd.binary_data is not None
 
         stat = path.stat()
         exp = [(x.type, x.count) for x in nd.experiment]
         axes, shape = zip(*nd.sizes.items())
         if isinstance(acquired, datetime):
-            acquired = acquired.strftime(TIME_FORMAT)
+            acq_str = acquired.strftime(TIME_FORMAT)
+        else:
+            acq_str = ""
 
         return Record(
             {
                 "path": str(path.resolve()),
                 "name": path.name,
                 "version": ".".join(map(str, nd.version)),
                 "kb": round(stat.st_size / 1000, 2),
-                "acquired": acquired or "",
+                "acquired": acq_str,
                 "experiment": ";".join([f"{t}:{c}" for t, c in exp]),
                 "dtype": str(nd.dtype),
                 "shape": list(shape),

diff --git a/src/nd2/readers/_modern/modern_reader.py b/src/nd2/readers/_modern/modern_reader.py
@@ -3,6 +3,7 @@
 import os
 import warnings
 import zlib
+from contextlib import suppress
 from typing import TYPE_CHECKING, Any, Iterable, Mapping, Sequence, cast
 
 import numpy as np
@@ -528,23 +529,13 @@ def _app_info(self) -> dict:
         k = b"CustomDataVar|AppInfo_V1_0!"
         return self._decode_chunk(k) if k in self.chunkmap else {}
 
-    def _acquisition_date(self) -> datetime.datetime | str | None:
-        """Try to extract acquisition date.
-
-        A best effort is made to extract a datetime object from the date string,
-        but if that fails, the raw string is returned.  Use isinstance() to
-        be safe.
-        """
-        date = self.text_info().get("date")
-        if date:
-            try:
-                return _util.parse_time(date)
-            except ValueError:
-                return date
-
+    def _acquisition_datetime(self) -> datetime.datetime | None:
+        """Try to extract acquisition date."""
         time = self._cached_global_metadata().get("time", {})
-        jdn = time.get("absoluteJulianDayNumber")
-        return _util.jdn_to_datetime(jdn) if jdn else None
+        if jdn := time.get("absoluteJulianDayNumber"):
+            with suppress(ValueError):
+                return _util.jdn_to_datetime(jdn)
+        return None
 
     def binary_data(self) -> BinaryLayers | None:
         from nd2._binary import BinaryLayer, BinaryLayers, decode_binary_mask