Skip to content

Commit

Permalink
Handle negative unspecified and negative unspecified + qualified
Browse files Browse the repository at this point in the history
Requires quite a few overrides of lower_ and upper_ range methods to properly handle dates due to padding working in the opposite direction for negative dates, esp when combined with month/day padding.
  • Loading branch information
ColeDCrawford committed May 28, 2024
1 parent ef24bc7 commit b53df4a
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 30 deletions.
2 changes: 1 addition & 1 deletion edtf/parser/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def f(toks):

# (* *** unspecified *** *)
yearWithOneOrTwoOrThreeUnspecifedDigits = Combine(
digit + (digit ^ "X") + (digit ^ "X") + "X"
Optional("-") + digit + (digit ^ "X") + (digit ^ "X") + "X"
)("year")
monthUnspecified = year + "-" + L("XX")("month")
dayUnspecified = yearMonth + "-" + L("XX")("day")
Expand Down
226 changes: 197 additions & 29 deletions edtf/parser/parser_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,16 +561,13 @@ def __init__(
int(significant_digits) if significant_digits else None
)
self.ua = ua if ua else None
self.negative = self.year.startswith("-")

def __str__(self):
r = self.year
if self.month:
r += f"-{self.month}"
if self.day:
r += f"-{self.day}"
base = super().__str__()
if self.ua:
r += str(self.ua)
return r
base += str(self.ua)
return base

def _get_fuzzy_padding(self, lean):
if not self.ua:
Expand All @@ -579,24 +576,16 @@ def _get_fuzzy_padding(self, lean):
padding = relativedelta()

if self.year:
if self.precision == PRECISION_MILLENIUM:
padding += relativedelta(
years=int(
multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years
)
)
elif self.precision == PRECISION_CENTURY:
padding += relativedelta(
years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years)
)
elif self.precision == PRECISION_DECADE:
padding += relativedelta(
years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years)
)
else:
padding += relativedelta(
years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years)
)
year_no_symbol = self.year.lstrip("-")
years_padding = self._calculate_years_padding(multiplier, year_no_symbol)
# Reverse the padding for negative years and earliest calculations
# if self.negative:
# years_padding = -years_padding if lean == EARLIEST else years_padding
# else:
# years_padding = years_padding if lean == EARLIEST else -years_padding

padding += years_padding

if self.month:
padding += relativedelta(
months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months)
Expand All @@ -608,20 +597,199 @@ def _get_fuzzy_padding(self, lean):

return padding

def _calculate_years_padding(self, multiplier, year_no_symbol):
if self.precision == PRECISION_MILLENIUM:
return relativedelta(
years=int(multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years)
)
elif self.precision == PRECISION_CENTURY:
return relativedelta(
years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years)
)
elif self.precision == PRECISION_DECADE:
return relativedelta(
years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years)
)
else:
return relativedelta(
years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years)
)

def lower_fuzzy(self):
time_empty_time_tuple = tuple(TIME_EMPTY_TIME)
time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS)
strict_val = (
self.lower_strict()
) # negative handled in the lower_strict() override

if self.negative:
adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(LATEST))
if (
self.precision == PRECISION_YEAR
or self.precision == PRECISION_DECADE
or self.precision == PRECISION_CENTURY
or self.precision == PRECISION_MILLENIUM
):
adjusted = struct_time(
(adjusted.tm_year, 1, 1)
+ time_empty_time_tuple
+ time_empty_extras_tuple
)
elif self.precision == PRECISION_MONTH:
adjusted = struct_time(
(adjusted.tm_year, adjusted.tm_mon, 1)
+ time_empty_time_tuple
+ time_empty_extras_tuple
)
else:
adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
if (
self.precision == PRECISION_YEAR
or self.precision == PRECISION_DECADE
or self.precision == PRECISION_CENTURY
or self.precision == PRECISION_MILLENIUM
):
adjusted = struct_time(
(adjusted.tm_year, 1, 1)
+ time_empty_time_tuple
+ time_empty_extras_tuple
)
elif self.precision == PRECISION_MONTH:
days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[
1
]
adjusted = struct_time(
(adjusted.tm_year, adjusted.tm_mon, days_in_month)
+ time_empty_time_tuple
+ time_empty_extras_tuple
)

return adjusted

def upper_fuzzy(self):
time_empty_time_tuple = tuple(TIME_EMPTY_TIME)
time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS)
strict_val = (
self.upper_strict()
) # negative handled in the upper_strict() override

if self.negative:
adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
if (
self.precision == PRECISION_YEAR
or self.precision == PRECISION_DECADE
or self.precision == PRECISION_CENTURY
or self.precision == PRECISION_MILLENIUM
):
adjusted = struct_time(
(adjusted.tm_year, 12, 31)
+ time_empty_time_tuple
+ time_empty_extras_tuple
)
elif self.precision == PRECISION_MONTH:
days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[
1
]
adjusted = struct_time(
(adjusted.tm_year, adjusted.tm_mon, days_in_month)
+ time_empty_time_tuple
+ time_empty_extras_tuple
)
else:
adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
if (
self.precision == PRECISION_YEAR
or self.precision == PRECISION_DECADE
or self.precision == PRECISION_CENTURY
or self.precision == PRECISION_MILLENIUM
):
adjusted = struct_time(
(adjusted.tm_year, 12, 31)
+ time_empty_time_tuple
+ time_empty_extras_tuple
)
elif self.precision == PRECISION_MONTH:
adjusted = struct_time(
(adjusted.tm_year, adjusted.tm_mon, 1)
+ time_empty_time_tuple
+ time_empty_extras_tuple
)

return adjusted

def lower_strict(self):
if self.negative:
strict_val = self._strict_date(
lean=LATEST
) # gets the year right, but need to adjust day and month
if (
self.precision == PRECISION_YEAR
or self.precision == PRECISION_DECADE
or self.precision == PRECISION_CENTURY
or self.precision == PRECISION_MILLENIUM
):
return struct_time(
(strict_val.tm_year, 1, 1)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
elif self.precision == PRECISION_MONTH:
days_in_month = calendar.monthrange(
strict_val.tm_year, strict_val.tm_mon
)[1]
return struct_time(
(strict_val.tm_year, strict_val.tm_mon, days_in_month)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
else:
return strict_val
else:
return self._strict_date(lean=EARLIEST)

def upper_strict(self):
if self.negative:
strict_val = self._strict_date(lean=EARLIEST)
if (
self.precision == PRECISION_YEAR
or self.precision == PRECISION_DECADE
or self.precision == PRECISION_CENTURY
or self.precision == PRECISION_MILLENIUM
):
return struct_time(
(strict_val.tm_year, 12, 31)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
elif self.precision == PRECISION_MONTH:
days_in_month = calendar.monthrange(
strict_val.tm_year, strict_val.tm_mon
)[1]
return struct_time(
(strict_val.tm_year, strict_val.tm_mon, days_in_month)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
else:
return strict_val
else:
return self._strict_date(lean=LATEST)

@property
def precision(self):
if self.day:
return PRECISION_DAY
if self.month:
return PRECISION_MONTH
if self.year:
if self.year.isdigit():
year_no_symbol = self.year.lstrip("-")
if year_no_symbol.isdigit():
return PRECISION_YEAR
if len(self.year) == 4 and self.year.endswith("XXX"):
if len(year_no_symbol) == 4 and year_no_symbol.endswith("XXX"):
return PRECISION_MILLENIUM
if len(self.year) == 4 and self.year.endswith("XX"):
if len(year_no_symbol) == 4 and year_no_symbol.endswith("XX"):
return PRECISION_CENTURY
if len(self.year) == 4 and self.year.endswith("X"):
if len(year_no_symbol) == 4 and year_no_symbol.endswith("X"):
return PRECISION_DECADE
raise ValueError(f"Unspecified date {self} has no precision")

Expand Down
3 changes: 3 additions & 0 deletions edtf/parser/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@
("1999-01-XX", ("1999-01-01", "1999-01-31")),
# some day in 1999
("1999-XX-XX", ("1999-01-01", "1999-12-31")),
# negative unspecified year
("-01XX", ("-0199-01-01", "-0100-12-31")),
# Uncertain/Approximate lower boundary dates (BCE)
("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")),
("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")),
Expand All @@ -92,6 +94,7 @@
("1XXX", ("1000-01-01", "1999-12-31")),
("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")),
("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")),
("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "-0000-12-31")),
# L1 Extended Interval
# beginning unknown, end 2006
# for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years)
Expand Down

0 comments on commit b53df4a

Please sign in to comment.