diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index ae03251..f458b2b 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -162,7 +162,7 @@ def f(toks): # (* *** unspecified *** *) yearWithOneOrTwoOrThreeUnspecifedDigits = Combine( - digit + (digit ^ "X") + (digit ^ "X") + "X" + Optional("-") + digit + (digit ^ "X") + (digit ^ "X") + "X" )("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 0bbf855..43f4a9c 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -561,16 +561,13 @@ def __init__( int(significant_digits) if significant_digits else None ) self.ua = ua if ua else None + self.negative = self.year.startswith("-") def __str__(self): - r = self.year - if self.month: - r += f"-{self.month}" - if self.day: - r += f"-{self.day}" + base = super().__str__() if self.ua: - r += str(self.ua) - return r + base += str(self.ua) + return base def _get_fuzzy_padding(self, lean): if not self.ua: @@ -579,24 +576,16 @@ def _get_fuzzy_padding(self, lean): padding = relativedelta() if self.year: - if self.precision == PRECISION_MILLENIUM: - padding += relativedelta( - years=int( - multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years - ) - ) - elif self.precision == PRECISION_CENTURY: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) - ) - elif self.precision == PRECISION_DECADE: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) - ) - else: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) - ) + year_no_symbol = self.year.lstrip("-") + years_padding = self._calculate_years_padding(multiplier, year_no_symbol) + # Reverse the padding for negative years and earliest calculations + # if self.negative: + # years_padding = -years_padding if lean == EARLIEST else years_padding + # else: + # years_padding = years_padding if lean == EARLIEST else -years_padding + + padding += years_padding + if self.month: padding += relativedelta( months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) @@ -608,6 +597,184 @@ def _get_fuzzy_padding(self, lean): return padding + def _calculate_years_padding(self, multiplier, year_no_symbol): + if self.precision == PRECISION_MILLENIUM: + return relativedelta( + years=int(multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years) + ) + elif self.precision == PRECISION_CENTURY: + return relativedelta( + years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) + ) + elif self.precision == PRECISION_DECADE: + return relativedelta( + years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) + ) + else: + return relativedelta( + years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) + ) + + def lower_fuzzy(self): + time_empty_time_tuple = tuple(TIME_EMPTY_TIME) + time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) + strict_val = ( + self.lower_strict() + ) # negative handled in the lower_strict() override + + if self.negative: + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 1, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + else: + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 1, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ + 1 + ] + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, days_in_month) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + + return adjusted + + def upper_fuzzy(self): + time_empty_time_tuple = tuple(TIME_EMPTY_TIME) + time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) + strict_val = ( + self.upper_strict() + ) # negative handled in the upper_strict() override + + if self.negative: + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 12, 31) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ + 1 + ] + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, days_in_month) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + else: + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 12, 31) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + + return adjusted + + def lower_strict(self): + if self.negative: + strict_val = self._strict_date( + lean=LATEST + ) # gets the year right, but need to adjust day and month + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + return struct_time( + (strict_val.tm_year, 1, 1) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange( + strict_val.tm_year, strict_val.tm_mon + )[1] + return struct_time( + (strict_val.tm_year, strict_val.tm_mon, days_in_month) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + else: + return strict_val + else: + return self._strict_date(lean=EARLIEST) + + def upper_strict(self): + if self.negative: + strict_val = self._strict_date(lean=EARLIEST) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + return struct_time( + (strict_val.tm_year, 12, 31) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange( + strict_val.tm_year, strict_val.tm_mon + )[1] + return struct_time( + (strict_val.tm_year, strict_val.tm_mon, days_in_month) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + else: + return strict_val + else: + return self._strict_date(lean=LATEST) + @property def precision(self): if self.day: @@ -615,13 +782,14 @@ def precision(self): if self.month: return PRECISION_MONTH if self.year: - if self.year.isdigit(): + year_no_symbol = self.year.lstrip("-") + if year_no_symbol.isdigit(): return PRECISION_YEAR - if len(self.year) == 4 and self.year.endswith("XXX"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("XXX"): return PRECISION_MILLENIUM - if len(self.year) == 4 and self.year.endswith("XX"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("XX"): return PRECISION_CENTURY - if len(self.year) == 4 and self.year.endswith("X"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("X"): return PRECISION_DECADE raise ValueError(f"Unspecified date {self} has no precision") diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 464aca3..c89b3b8 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -81,6 +81,8 @@ ("1999-01-XX", ("1999-01-01", "1999-01-31")), # some day in 1999 ("1999-XX-XX", ("1999-01-01", "1999-12-31")), + # negative unspecified year + ("-01XX", ("-0199-01-01", "-0100-12-31")), # Uncertain/Approximate lower boundary dates (BCE) ("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")), ("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")), @@ -92,6 +94,7 @@ ("1XXX", ("1000-01-01", "1999-12-31")), ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), + ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "-0000-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years)