diff --git a/edtf/appsettings.py b/edtf/appsettings.py index e1bc821..e00a223 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -87,6 +87,13 @@ PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1)) PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1)) PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12)) +PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10)) +PADDING_CENTURY_PRECISION = EDTF.get( + "PADDING_CENTURY_PRECISION", relativedelta(years=100) +) +PADDING_MILLENNIUM_PRECISION = EDTF.get( + "PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000) +) MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0) MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index dc0f66d..f458b2b 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -161,17 +161,19 @@ def f(toks): Level1Interval.set_parser(level1Interval) # (* *** unspecified *** *) -yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year") +yearWithOneOrTwoOrThreeUnspecifedDigits = Combine( + Optional("-") + digit + (digit ^ "X") + (digit ^ "X") + "X" +)("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") unspecified = ( - yearWithOneOrTwoUnspecifedDigits + yearWithOneOrTwoOrThreeUnspecifedDigits ^ monthUnspecified ^ dayUnspecified ^ dayAndMonthUnspecified -) +) + Optional(UASymbol)("ua") Unspecified.set_parser(unspecified) # (* *** uncertainOrApproxDate *** *) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index e12ecbd..a15cbf1 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -541,7 +541,148 @@ def precision(self): class Unspecified(Date): - pass + def __init__( + self, + year=None, + month=None, + day=None, + significant_digits=None, + ua=None, + **kwargs, + ): + super().__init__( + year=year, + month=month, + day=day, + significant_digits=significant_digits, + **kwargs, + ) + self.ua = ua + self.negative = self.year.startswith("-") + + def __str__(self): + base = super().__str__() + if self.ua: + base += str(self.ua) + return base + + def _get_fuzzy_padding(self, lean): + if not self.ua: + return relativedelta() + multiplier = self.ua._get_multiplier() + padding = relativedelta() + + if self.year: + years_padding = self._years_padding(multiplier) + padding += years_padding + if self.month: + padding += relativedelta( + months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) + ) + if self.day: + padding += relativedelta( + days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days) + ) + return padding + + def _years_padding(self, multiplier): + """Calculate year padding based on the precision.""" + precision_settings = { + PRECISION_MILLENIUM: appsettings.PADDING_MILLENNIUM_PRECISION.years, + PRECISION_CENTURY: appsettings.PADDING_CENTURY_PRECISION.years, + PRECISION_DECADE: appsettings.PADDING_DECADE_PRECISION.years, + PRECISION_YEAR: appsettings.PADDING_YEAR_PRECISION.years, + } + years = precision_settings.get(self.precision, 0) + return relativedelta(years=int(multiplier * years)) + + def lower_fuzzy(self): + strict_val = ( + self.lower_strict() + ) # negative handled in the lower_strict() override + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + return adjusted + + def upper_fuzzy(self): + strict_val = ( + self.upper_strict() + ) # negative handled in the upper_strict() override + + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + return adjusted + + def lower_strict(self): + if self.negative: + strict_val = self._strict_date( + lean=LATEST + ) # gets the year right, but need to adjust day and month + if self.precision in ( + PRECISION_YEAR, + PRECISION_DECADE, + PRECISION_CENTURY, + PRECISION_MILLENIUM, + ): + return struct_time( + (strict_val.tm_year, 1, 1) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + elif self.precision == PRECISION_MONTH: + return struct_time( + (strict_val.tm_year, strict_val.tm_mon, 1) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + else: + return strict_val + else: + return self._strict_date(lean=EARLIEST) + + def upper_strict(self): + if self.negative: + strict_val = self._strict_date(lean=EARLIEST) + if self.precision in ( + PRECISION_YEAR, + PRECISION_DECADE, + PRECISION_CENTURY, + PRECISION_MILLENIUM, + ): + return struct_time( + (strict_val.tm_year, 12, 31) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange( + strict_val.tm_year, strict_val.tm_mon + )[1] + return struct_time( + (strict_val.tm_year, strict_val.tm_mon, days_in_month) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + else: + return strict_val + else: + return self._strict_date(lean=LATEST) + + @property + def precision(self): + if self.day: + return PRECISION_DAY + if self.month: + return PRECISION_MONTH + if self.year: + year_no_symbol = self.year.lstrip("-") + if year_no_symbol.isdigit(): + return PRECISION_YEAR + if len(year_no_symbol) == 4 and year_no_symbol.endswith("XXX"): + return PRECISION_MILLENIUM + if len(year_no_symbol) == 4 and year_no_symbol.endswith("XX"): + return PRECISION_CENTURY + if len(year_no_symbol) == 4 and year_no_symbol.endswith("X"): + return PRECISION_DECADE + raise ValueError(f"Unspecified date {self} has no precision") class Level1Interval(Interval): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 4932e95..199f245 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -81,10 +81,20 @@ ("1999-01-XX", ("1999-01-01", "1999-01-31")), # some day in 1999 ("1999-XX-XX", ("1999-01-01", "1999-12-31")), + # negative unspecified year + ("-01XX", ("-0199-01-01", "-0100-12-31")), # Uncertain/Approximate lower boundary dates (BCE) ("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")), ("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")), ("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")), + # Unspecified and qualified + # "circa 17th century" + ("16XX~", ("1600-01-01", "1699-12-31", "1500-01-01", "1799-12-31")), + ("16XX%", ("1600-01-01", "1699-12-31", "1400-01-01", "1899-12-31")), + ("1XXX", ("1000-01-01", "1999-12-31")), + ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), + ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), + ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "0000-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years)