From 8789cb427c180f161d9248c96d1e86363d257370 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Fri, 4 Feb 2022 21:23:52 +0100 Subject: [PATCH 001/102] update unspecified date character MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the unspecified date character (formerly lower case ‘u’) is superseded by the character (upper case) 'X'; --- README.md | 24 ++++++++++++------------ edtf/parser/grammar.py | 26 +++++++++++++------------- edtf/parser/parser_classes.py | 8 ++++---- edtf/parser/tests.py | 22 +++++++++++----------- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 8f5c13c..92cccee 100644 --- a/README.md +++ b/README.md @@ -108,10 +108,10 @@ Test coverage includes every example given in the spec table of features. * Unspecified dates: - >>> parse_edtf('1979-08-uu') # An unknown day in August 1979 - Unspecified: '1979-08-uu' - >>> parse_edtf('1979-uu') # Some month in 1979 - Unspecified: '1979-uu' + >>> parse_edtf('1979-08-XX') # An unknown day in August 1979 + Unspecified: '1979-08-XX' + >>> parse_edtf('1979-XX') # Some month in 1979 + Unspecified: '1979-XX' * Extended intervals: @@ -138,8 +138,8 @@ Test coverage includes every example given in the spec table of features. * Partial unspecified: - >>> parse_edtf('1979-uu-28') # The 28th day of an uncertain month in 1979 - PartialUnspecified: '1979-uu-28' + >>> parse_edtf('1979-XX-28') # The 28th day of an uncertain month in 1979 + PartialUnspecified: '1979-XX-28' * One of a set: @@ -204,8 +204,8 @@ The parser can parse strings such as: 'c1800s?' => '180x?~' # with uncertainty indicators, use the decade # unspecified parts - 'January 12' => 'uuuu-01-12' - 'January' => 'uuuu-01' + 'January 12' => 'XXXX-01-12' + 'January' => 'XXXX-01' '7/2008' => '2008-07' #seasons @@ -221,9 +221,9 @@ The parser can parse strings such as: # unspecified 'year in the 1860s' => '186u' #186x has decade precision, 186u has year precision. ('year in the 1800s', '18xu') - 'month in 1872' => '1872-uu' - 'day in January 1872' => '1872-01-uu' - 'day in 1872' => '1872-uu-uu' + 'month in 1872' => '1872-XX' + 'day in January 1872' => '1872-01-XX' + 'day in 1872' => '1872-XX-XX' #centuries '1st century' => '00xx' @@ -231,7 +231,7 @@ The parser can parse strings such as: '19th century?' => '18xx?' # just showing off now... - 'a day in about Spring 1849?' => '1849-21-uu?~' + 'a day in about Spring 1849?' => '1849-21-XX?~' # simple ranges, which aren't as accurate as they could be. The parser is limited to only picking the first year range it finds. diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index c028c6e..12f440f 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -115,11 +115,11 @@ def f(toks): # (* *** unspecified *** *) yearWithOneOrTwoUnspecifedDigits = Combine( - digit + digit + (digit ^ 'u') + 'u' + digit + digit + (digit ^ 'X') + 'X' )("year") -monthUnspecified = year + "-" + L("uu")("month") -dayUnspecified = yearMonth + "-" + L("uu")("day") -dayAndMonthUnspecified = year + "-" + L("uu")("month") + "-" + L("uu")("day") +monthUnspecified = year + "-" + L("XX")("month") +dayUnspecified = yearMonth + "-" + L("XX")("day") +dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") unspecified = yearWithOneOrTwoUnspecifedDigits \ ^ monthUnspecified \ @@ -142,26 +142,26 @@ def f(toks): # (* ** Internal Unspecified** *) -digitOrU = Word(nums + 'u', exact=1) +digitOrU = Word(nums + 'X', exact=1) -# 2-digit day with at least one 'u' present +# 2-digit day with at least one 'X' present dayWithU = Combine( ("u" + digitOrU) ^ - (digitOrU + 'u') + (digitOrU + 'X') )("day") -# 2-digit month with at least one 'u' present +# 2-digit month with at least one 'X' present monthWithU = Combine( oneOf("0u 1u") ^ ("u" + digitOrU) )("month") -# 4-digit year with at least one 'u' present +# 4-digit year with at least one 'X' present yearWithU = Combine( - ('u' + digitOrU + digitOrU + digitOrU) ^ - (digitOrU + 'u' + digitOrU + digitOrU) ^ - (digitOrU + digitOrU + 'u' + digitOrU) ^ - (digitOrU + digitOrU + digitOrU + 'u') + ('X' + digitOrU + digitOrU + digitOrU) ^ + (digitOrU + 'X' + digitOrU + digitOrU) ^ + (digitOrU + digitOrU + 'X' + digitOrU) ^ + (digitOrU + digitOrU + digitOrU + 'X') )("year") yearMonthWithU = ( diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index b670296..3f626b1 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -263,12 +263,12 @@ def isoformat(self, default=date.max): def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: - return int(re.sub(r'[xu]', r'0', self.year)) + return int(re.sub(r'X', r'0', self.year)) else: - return int(re.sub(r'[xu]', r'9', self.year)) + return int(re.sub(r'X', r'9', self.year)) def _precise_month(self, lean): - if self.month and self.month != "uu": + if self.month and self.month != "XX": try: return int(self.month) except ValueError as e: @@ -277,7 +277,7 @@ def _precise_month(self, lean): return 1 if lean == EARLIEST else 12 def _precise_day(self, lean): - if not self.day or self.day == 'uu': + if not self.day or self.day == "XX": if lean == EARLIEST: return 1 else: diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index f9dde42..54c34cb 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -64,15 +64,15 @@ ('1984?~', '1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31'), # Unspecified # some unspecified year in the 1990s. - ('199u', '1990-01-01', '1999-12-31'), + ('199X', '1990-01-01', '1999-12-31'), # some unspecified year in the 1900s. - ('19uu', '1900-01-01', '1999-12-31'), + ('19XX', '1900-01-01', '1999-12-31'), # some month in 1999 - ('1999-uu', '1999-01-01', '1999-12-31'), + ('1999-XX', '1999-01-01', '1999-12-31'), # some day in January 1999 - ('1999-01-uu', '1999-01-01', '1999-01-31'), + ('1999-01-XX', '1999-01-01', '1999-01-31'), # some day in 1999 - ('1999-uu-uu', '1999-01-01', '1999-12-31'), + ('1999-XX-XX', '1999-01-01', '1999-12-31'), # Uncertain/Approximate lower boundary dates (BCE) ('-0275~', '-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31'), @@ -144,14 +144,14 @@ ('2011-24~', '2011-12-01', '2011-12-31', '2011-09-08', '2012-03-24'), # Partial unspecified # December 25 sometime during the 1560s - ('156u-12-25', '1560-12-25', '1569-12-25'), + ('156X-12-25', '1560-12-25', '1569-12-25'), # December 25 sometime during the 1500s - ('15uu-12-25', '1500-12-25', '1599-12-25'), + ('15XX-12-25', '1500-12-25', '1599-12-25'), # Year and day of month specified, month unspecified - ('1560-uu-25', '1560-01-25', '1560-12-25'), - ('15uu-12-uu', '1500-12-01', '1599-12-31'), + ('1560-XX-25', '1560-01-25', '1560-12-25'), + ('15XX-12-XX', '1500-12-01', '1599-12-31'), # Day specified, year and month unspecified - ('uuuu-uu-23', '0000-01-23', '9999-12-23'), + ('XXXX-XX-23', '0000-01-23', '9999-12-23'), # One of a Set # One of the years 1667, 1668, 1670, 1671, 1672 (('[1667,1668, 1670..1672]', '[1667, 1668, 1670..1672]'), '1667-01-01', '1672-12-31'), @@ -177,7 +177,7 @@ # An interval in June 2004 beginning approximately the first and ending approximately the 20th. ('2004-06-(01)~/2004-06-(20)~', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), # The interval began on an unspecified day in June 2004. - ('2004-06-uu/2004-07-03', '2004-06-01', '2004-07-03'), + ('2004-06-XX/2004-07-03', '2004-06-01', '2004-07-03'), # Year Requiring More than Four Digits - Exponential Form # the year 170000000 ('y17e7', '170000000-01-01', '170000000-12-31'), From 1895d6ba1ebde38234c9d2595216095fd5aab6fc Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Thu, 10 Feb 2022 13:28:56 +0100 Subject: [PATCH 002/102] update for uncertain AND approximate --- edtf/parser/grammar.py | 2 +- edtf/parser/parser_classes.py | 12 +++++++++++- edtf/parser/tests.py | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 12f440f..54a9451 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -74,7 +74,7 @@ # (* ************************** Level 1 *************************** *) # (* ** Auxiliary Assignments for Level 1 ** *) -UASymbol = Combine(oneOf("? ~ ?~")) +UASymbol = Combine(oneOf("? ~ %")) UA.set_parser(UASymbol) seasonNumber = oneOf("21 22 23 24") diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 3f626b1..8f65c03 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -150,6 +150,13 @@ def set_is_uncertain(self, val): self._is_uncertain = val is_uncertain = property(get_is_uncertain, set_is_uncertain) + def get_is_uncertain_and_approximate(self): + return getattr(self, '_uncertain_and_approximate', False) + + def set_is_uncertain_and_approximate(self, val): + self._uncertain_and_approximate = val + is_uncertain_and_approximate = property(get_is_uncertain_and_approximate, set_is_uncertain_and_approximate) + def lower_fuzzy(self): strict_val = self.lower_strict() return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) @@ -384,6 +391,7 @@ def __init__(self, *args): self.is_uncertain = "?" in ua self.is_approximate = "~" in ua + self.is_uncertain_and_approximate = "%" in ua def __str__(self): d = "" @@ -391,10 +399,12 @@ def __str__(self): d += "?" if self.is_approximate: d += "~" + if self.is_uncertain_and_approximate: + d += "%" return d def _get_multiplier(self): - if self.is_uncertain and self.is_approximate: + if self.is_uncertain_and_approximate: return appsettings.MULTIPLIER_IF_BOTH elif self.is_uncertain: return appsettings.MULTIPLIER_IF_UNCERTAIN diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 54c34cb..8f6dd01 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -61,7 +61,7 @@ # "approximately" the year 1984 ('1984~', '1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31'), # the year is approximately 1984 and even that is uncertain - ('1984?~', '1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31'), + ('1984%', '1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31'), # Unspecified # some unspecified year in the 1990s. ('199X', '1990-01-01', '1999-12-31'), From 8067056e5c645a9c9f09cdf856bf37d6f0bd5617 Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Mon, 27 Mar 2023 13:48:39 +0200 Subject: [PATCH 003/102] catch the right exception --- edtf/natlang/en.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index ec7842b..ff83034 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,6 +1,6 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" from datetime import datetime -from dateutil.parser import parse +from dateutil.parser import parse, ParserError import re from edtf import appsettings from six.moves import xrange @@ -198,7 +198,9 @@ def text_to_edtf_date(text): default=DEFAULT_DATE_2 ) - except ValueError: + except ParserError as pe: + return + except Exception as e: return if dt1.date() == DEFAULT_DATE_1.date() and \ From 57e86860a93e517f073b84d83c0b2f802e5ac701 Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Mon, 27 Mar 2023 14:05:49 +0200 Subject: [PATCH 004/102] update test for uncertain AND approximate qualifier --- edtf/parser/tests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 8f6dd01..43397f7 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -90,7 +90,7 @@ ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30'), # interval beginning 1984 and ending approximately June 2004 ('1984/2004-06~', '1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30'), - ('1984?/2004?~', '1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31'), + ('1984?/2004%', '1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31'), ('1984~/2004~', '1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31'), # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 ('1984-06?/2004-08?', '1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30'), @@ -123,7 +123,7 @@ # day is approximate; year, month known ('2004-06-(11)~', '2004-06-11', '2004-06-10', '2004-06-12'), # Year known, month within year is approximate and uncertain - ('2004-(06)?~', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), + ('2004-(06)%', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), # Year known, month and day uncertain ('2004-(06-11)?', '2004-06-11', '2004-05-10', '2004-07-12'), # Year uncertain, month known, day approximate @@ -131,7 +131,7 @@ # Year uncertain and month is both uncertain and approximate ('(2004-(06)~)?', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), # This has the same meaning as the previous example. - ('2004?-(06)?~', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + ('2004?-(06)%', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), # Year uncertain, month and day approximate. (('(2004)?-06-04~', '2004?-06-04~'), '2004-06-04', '2003-05-03', '2005-07-05'), # Year known, month and day approximate. Note that this has the same meaning as the following. From 2d241be5de711d96610cb56286b748e79925210c Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Mon, 27 Mar 2023 14:30:12 +0200 Subject: [PATCH 005/102] fixing/completing the update of unspecified date character ('X' instead of 'u') --- edtf/parser/grammar.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 54a9451..76bb31a 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -146,38 +146,38 @@ def f(toks): # 2-digit day with at least one 'X' present dayWithU = Combine( - ("u" + digitOrU) ^ - (digitOrU + 'X') + ("X" + digitOrU) + ^ (digitOrU + 'X') )("day") # 2-digit month with at least one 'X' present monthWithU = Combine( - oneOf("0u 1u") ^ - ("u" + digitOrU) + oneOf("0X 1X") + ^ ("X" + digitOrU) )("month") # 4-digit year with at least one 'X' present yearWithU = Combine( - ('X' + digitOrU + digitOrU + digitOrU) ^ - (digitOrU + 'X' + digitOrU + digitOrU) ^ - (digitOrU + digitOrU + 'X' + digitOrU) ^ - (digitOrU + digitOrU + digitOrU + 'X') + ('X' + digitOrU + digitOrU + digitOrU) + ^ (digitOrU + 'X' + digitOrU + digitOrU) + ^ (digitOrU + digitOrU + 'X' + digitOrU) + ^ (digitOrU + digitOrU + digitOrU + 'X') )("year") yearMonthWithU = ( - (Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) ^ - (yearWithU + "-" + month) + (Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) + ^ (yearWithU + "-" + month) ) monthDayWithU = ( - (Combine(month("") ^ monthWithU(""))("month") + "-" + dayWithU) ^ - (monthWithU + "-" + day) + (Combine(month("") ^ monthWithU(""))("month") + "-" + dayWithU) + ^ (monthWithU + "-" + day) ) yearMonthDayWithU = ( - (yearWithU + "-" + Combine(month("") ^ monthWithU(""))("month") + "-" + Combine(day("") ^ dayWithU(""))("day")) ^ - (year + "-" + monthWithU + "-" + Combine(day("") ^ dayWithU(""))("day")) ^ - (year + "-" + month + "-" + dayWithU) + (yearWithU + "-" + Combine(month("") ^ monthWithU(""))("month") + "-" + Combine(day("") ^ dayWithU(""))("day")) + ^ (year + "-" + monthWithU + "-" + Combine(day("") ^ dayWithU(""))("day")) + ^ (year + "-" + month + "-" + dayWithU) ) partialUnspecified = yearWithU ^ yearMonthWithU ^ yearMonthDayWithU From 3b08350fdbe1e0c0ba25820c351877635b8c7d3f Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Mon, 27 Mar 2023 15:29:51 +0200 Subject: [PATCH 006/102] code formatting: break-before-binary-operator style --- edtf/parser/grammar.py | 48 +++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 76bb31a..bf5552a 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -30,9 +30,9 @@ month = oneThru12("month") monthDay = ( - (oneOf("01 03 05 07 08 10 12")("month") + "-" + oneThru31("day")) ^ - (oneOf("04 06 09 11")("month") + "-" + oneThru30("day")) ^ - (L("02")("month") + "-" + oneThru29("day")) + (oneOf("01 03 05 07 08 10 12")("month") + "-" + oneThru31("day")) + ^ (oneOf("04 06 09 11")("month") + "-" + oneThru30("day")) + ^ (L("02")("month") + "-" + oneThru29("day")) ) # 4 digits, 0 to 9 @@ -50,13 +50,13 @@ Date.set_parser(date) zoneOffsetHour = oneThru13 -zoneOffset = L("Z") ^ ( - Regex("[+-]") + ( - zoneOffsetHour + Optional(":" + minute) ^ - L("14:00") ^ - ("00:" + oneThru59) - ) -) +zoneOffset = L("Z") \ + ^ (Regex("[+-]") + + (zoneOffsetHour + Optional(":" + minute) + ^ L("14:00") + ^ ("00:" + oneThru59) + ) + ) baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00") @@ -196,12 +196,12 @@ def f(toks): (year_with_brackets + UASymbol("year_ua") + "-" + month + Optional("-(" + day + ")" + UASymbol("day_ua"))) \ ^ (year_with_brackets + Optional(UASymbol)("year_ua") + "-" + monthDay + Optional(UASymbol)("month_day_ua")) \ ^ ( - year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") + - Optional("-(" + day + ")" + UASymbol("day_ua")) + year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") + + Optional("-(" + day + ")" + UASymbol("day_ua")) ) \ ^ ( - year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") + - Optional("-" + day) + year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") + + Optional("-" + day) ) \ ^ (yearMonth + UASymbol("year_month_ua") + "-(" + day + ")" + UASymbol("day_ua")) \ ^ (yearMonth + UASymbol("year_month_ua") + "-" + day) \ @@ -213,7 +213,7 @@ def f(toks): PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) dateWithInternalUncertainty = partialUncertainOrApproximate \ - ^ partialUnspecified + ^ partialUnspecified qualifyingString = Regex(r'\S') # any nonwhitespace char @@ -229,8 +229,8 @@ def f(toks): # (* ** level2Interval ** *) level2Interval = (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) \ - ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) \ - ^ (dateWithInternalUncertainty("lower") + "/" + dateWithInternalUncertainty("upper")) + ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) \ + ^ (dateWithInternalUncertainty("lower") + "/" + dateWithInternalUncertainty("upper")) Level2Interval.set_parser(level2Interval) # (* ** Masked precision ** *) @@ -266,13 +266,13 @@ def f(toks): MultipleDates.set_parser(inclusiveList) level2Expression = partialUncertainOrApproximate \ - ^ partialUnspecified \ - ^ choiceList \ - ^ inclusiveList \ - ^ maskedPrecision \ - ^ level2Interval \ - ^ longYearScientific \ - ^ seasonQualified + ^ partialUnspecified \ + ^ choiceList \ + ^ inclusiveList \ + ^ maskedPrecision \ + ^ level2Interval \ + ^ longYearScientific \ + ^ seasonQualified # putting it all together edtfParser = level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") From 379314205989efe76d59b25e2933c28531b63bab Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Mon, 27 Mar 2023 16:03:01 +0200 Subject: [PATCH 007/102] latest spec definitions for exponential year: the year prefix 'y' and the exponential indicator 'e', both previously lowercase, are now 'Y' and 'E' (uppercase); the significant digit indicator 'p' is now 'S' (uppercase) --> significat digit is still NOT implemented yet --- edtf/parser/grammar.py | 6 +++--- edtf/parser/parser_classes.py | 6 +++--- edtf/parser/tests.py | 16 ++++++++-------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index bf5552a..761d83d 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -87,7 +87,7 @@ # (* *** Long Year - Simple Form *** *) -longYearSimple = "y" + Combine( +longYearSimple = "Y" + Combine( Optional("-") + positiveDigit + digit + digit + digit + OneOrMore(digit) )("year") LongYear.set_parser(longYearSimple) @@ -223,8 +223,8 @@ def f(toks): # (* ** Long Year - Scientific Form ** *) positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) -longYearScientific = "y" + Combine(Optional("-") + positiveInteger)("base") + "e" + \ - positiveInteger("exponent") + Optional("p" + positiveInteger("precision")) +longYearScientific = "Y" + Combine(Optional("-") + positiveInteger)("base") + "E" + \ + positiveInteger("exponent") + Optional("S" + positiveInteger("precision")) ExponentialYear.set_parser(longYearScientific) # (* ** level2Interval ** *) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 8f65c03..f6d73c2 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -464,7 +464,7 @@ def __init__(self, year): self.year = year def __str__(self): - return "y%s" % self.year + return "Y%s" % self.year def _precise_year(self): return int(self.year) @@ -728,7 +728,7 @@ def _precise_year(self): def get_year(self): if self.precision: - return '%se%sp%s' % (self.base, self.exponent, self.precision) + return '%sE%sS%s' % (self.base, self.exponent, self.precision) else: - return '%se%s' % (self.base, self.exponent) + return '%sE%s' % (self.base, self.exponent) year = property(get_year) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 43397f7..9f6e5de 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -98,9 +98,9 @@ ('1984-06-02?/unknown', '1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02'), # Year exceeding 4 digits # the year 170000002 - ('y170000002', '170000002-01-01', '170000002-12-31'), + ('Y170000002', '170000002-01-01', '170000002-12-31'), # the year -170000002 - ('y-170000002', '-170000002-01-01', '-170000002-12-31'), + ('Y-170000002', '-170000002-01-01', '-170000002-12-31'), # Seasons # Spring, 2001 ('2001-21', '2001-03-01', '2001-05-31'), @@ -180,19 +180,19 @@ ('2004-06-XX/2004-07-03', '2004-06-01', '2004-07-03'), # Year Requiring More than Four Digits - Exponential Form # the year 170000000 - ('y17e7', '170000000-01-01', '170000000-12-31'), + ('Y17E7', '170000000-01-01', '170000000-12-31'), # the year -170000000 - ('y-17e7', '-170000000-01-01', '-170000000-12-31'), - # Some year between 171010000 and 171999999, estimated to be 171010000 ('p3' indicates a precision of 3 significant digits.) + ('Y-17E7', '-170000000-01-01', '-170000000-12-31'), + # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 - # ('y17101e4p3', '171010000-01-01', '171999999-12-31'), + # ('Y17101E4S3', '171010000-01-01', '171999999-12-31'), ) BAD_EXAMPLES = ( None, '', 'not a edtf string', - 'y17e7-12-26', # not implemented + 'Y17E7-12-26', # not implemented '2016-13-08', # wrong day order '2016-02-39', # out of range '-0000-01-01', # negative zero year @@ -206,7 +206,7 @@ def test_non_parsing(self): def test_date_values(self): """ - Test that every EDTFObject can tell you its lower and upper + Test that everY EDTFObject can tell you its lower and upper fuzzy and strict dates, and that they're what we think they should be. """ From 59ce382f7cfc70877700d836e1f70bc7d8de87b2 Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Mon, 27 Mar 2023 16:11:40 +0200 Subject: [PATCH 008/102] latest spec definition: elimination of masked precision --- edtf/parser/grammar.py | 7 +++---- edtf/parser/tests.py | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 761d83d..3c4dace 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -233,9 +233,9 @@ def f(toks): ^ (dateWithInternalUncertainty("lower") + "/" + dateWithInternalUncertainty("upper")) Level2Interval.set_parser(level2Interval) -# (* ** Masked precision ** *) -maskedPrecision = Combine(digit + digit + ((digit + "x") ^ "xx"))("year") -MaskedPrecision.set_parser(maskedPrecision) +# (* ** Masked precision ** *) eliminated in latest specs +# maskedPrecision = Combine(digit + digit + ((digit + "x") ^ "xx"))("year") +# MaskedPrecision.set_parser(maskedPrecision) # (* ** Inclusive list and choice list** *) consecutives = (yearMonthDay("lower") + ".." + yearMonthDay("upper")) \ @@ -269,7 +269,6 @@ def f(toks): ^ partialUnspecified \ ^ choiceList \ ^ inclusiveList \ - ^ maskedPrecision \ ^ level2Interval \ ^ longYearScientific \ ^ seasonQualified diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 9f6e5de..e35b423 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -168,11 +168,11 @@ (('{1667,1668, 1670..1672}', '{1667, 1668, 1670..1672}'), '1667-01-01', '1672-12-31'), # The year 1960 and the month December of 1961. ('{1960, 1961-12}', '1960-01-01', '1961-12-31'), - # Masked Precision + # Masked Precision --> eliminated # A date during the 1960s - ('196x', '1960-01-01', '1969-12-31'), + #('196x', '1960-01-01', '1969-12-31'), # A date during the 1900s - ('19xx', '1900-01-01', '1999-12-31'), + #('19xx', '1900-01-01', '1999-12-31'), # L2 Extended Interval # An interval in June 2004 beginning approximately the first and ending approximately the 20th. ('2004-06-(01)~/2004-06-(20)~', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), From 4d6ed4907836edb7b0b82a0eaa9f8fd4480d1e3d Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Tue, 28 Mar 2023 09:36:17 +0200 Subject: [PATCH 009/102] bugfix: not recognized time when hour is 23 --- edtf/parser/grammar.py | 2 +- edtf/parser/tests.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 3c4dace..4c1cc0e 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -13,7 +13,7 @@ oneThru12 = oneOf(['%.2d' % i for i in range(1, 13)]) oneThru13 = oneOf(['%.2d' % i for i in range(1, 14)]) oneThru23 = oneOf(['%.2d' % i for i in range(1, 24)]) -zeroThru23 = oneOf(['%.2d' % i for i in range(0, 23)]) +zeroThru23 = oneOf(['%.2d' % i for i in range(0, 24)]) oneThru29 = oneOf(['%.2d' % i for i in range(1, 30)]) oneThru30 = oneOf(['%.2d' % i for i in range(1, 31)]) oneThru31 = oneOf(['%.2d' % i for i in range(1, 32)]) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index e35b423..538e924 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -39,6 +39,7 @@ ('2001-02-03T09:30:01', '2001-02-03'), ('2004-01-01T10:10:10Z', '2004-01-01'), ('2004-01-01T10:10:10+05:00', '2004-01-01'), + ('1985-04-12T23:20:30', '1985-04-12'), # An interval beginning sometime in 1964 and ending sometime in 2008. Year precision. ('1964/2008', '1964-01-01', '2008-12-31'), # An interval beginning sometime in June 2004 and ending sometime in August of 2006. Month precision. From 8d556f99cb43e0659113239ba202bd62702f9651 Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Thu, 30 Mar 2023 12:17:56 +0200 Subject: [PATCH 010/102] latest spec definition: the extended interval syntax keywords 'unknown' and 'open' have been replaced with null and the double-dot notation ['..'] respectively; general overwork of intervals and consecutives to deal with open beginnings and open ends --- edtf/appsettings.py | 2 + edtf/parser/grammar.py | 13 +++-- edtf/parser/parser_classes.py | 97 ++++++++++++++++++++++++----------- edtf/parser/tests.py | 20 +++++--- 4 files changed, 88 insertions(+), 44 deletions(-) diff --git a/edtf/appsettings.py b/edtf/appsettings.py index b23d0aa..38e7b9f 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -43,5 +43,7 @@ MULTIPLIER_IF_UNCERTAIN = EDTF.get('MULTIPLIER_IF_UNCERTAIN', 1.0) MULTIPLIER_IF_APPROXIMATE = EDTF.get('MULTIPLIER_IF_APPROXIMATE', 1.0) MULTIPLIER_IF_BOTH = EDTF.get('MULTIPLIER_IF_BOTH', 2.0) +BEGINNING_OF_TIME = EDTF.get("BEGINNING_OF_TIME", '-20000000') +END_OF_TIME = EDTF.get("BEGINNING_OF_TIME", '20000000') DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 4c1cc0e..c454f34 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -94,7 +94,6 @@ # (* *** L1Interval *** *) uaDateOrSeason = dateOrSeason + Optional(UASymbol) -l1Start = uaDateOrSeason ^ "unknown" # bit of a kludge here to get the all the relevant tokens into the parse action @@ -106,11 +105,13 @@ def f(toks): return {'date': toks[0], 'ua': None} +l1Start = '..' ^ uaDateOrSeason l1Start.addParseAction(f) -l1End = uaDateOrSeason ^ "unknown" ^ "open" +l1End = uaDateOrSeason ^ '..' l1End.addParseAction(f) -level1Interval = l1Start("lower") + "/" + l1End("upper") +level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") \ + ^ l1Start("lower") + "/" + Optional(l1End("upper")) Level1Interval.set_parser(level1Interval) # (* *** unspecified *** *) @@ -249,11 +250,13 @@ def f(toks): ^ unspecified \ ^ consecutives -earlier = ".." + date("upper") +earlier = L("..").addParseAction(f)("lower") + date("upper").addParseAction(f) +later = date("lower").addParseAction(f) + L("..").addParseAction(f)("upper") + EarlierConsecutives.set_parser(earlier) -later = date("lower") + ".." LaterConsecutives.set_parser(later) + listContent = (earlier + ZeroOrMore("," + listElement)) \ ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) \ ^ (listElement + OneOrMore("," + listElement)) \ diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index f6d73c2..e0c6410 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -354,27 +354,16 @@ def __str__(self): def _strict_date(self, lean): if lean == EARLIEST: - try: - r = self.lower._strict_date(lean) - if r is None: - raise AttributeError - return r - except AttributeError: # it's a string, or no date. Result depends on the upper date - upper = self.upper._strict_date(LATEST) - return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) + r = self.lower._strict_date(lean) else: - try: - r = self.upper._strict_date(lean) - if r is None: - raise AttributeError - return r - except AttributeError: # an 'unknown' or 'open' string - depends on the lower date - if self.upper and (self.upper == "open" or self.upper.date == "open"): - return dt_to_struct_time(date.today()) # it's still happening - else: - lower = self.lower._strict_date(EARLIEST) - return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) + r = self.upper._strict_date(lean) + return r + @property + def precision(self): + if self.lower.precision == self.upper.precision: + return self.lower.precision + return None # (* ************************** Level 1 *************************** *) @@ -386,7 +375,7 @@ def parse_action(cls, toks): return cls(*args) def __init__(self, *args): - assert len(args)==1 + assert len(args) == 1 ua = args[0] self.is_uncertain = "?" in ua @@ -424,10 +413,6 @@ def __str__(self): return str(self.date) def _strict_date(self, lean): - if self.date == "open": - return dt_to_struct_time(date.today()) - if self.date =="unknown": - return None # depends on the other date return self.date._strict_date(lean) def _get_fuzzy_padding(self, lean): @@ -443,14 +428,62 @@ def _get_fuzzy_padding(self, lean): return multiplier * appsettings.PADDING_YEAR_PRECISION +class UnspecifiedIntervalSection(EDTFObject): + + def __init__(self, sectionOpen=False, other_section_element=None): + if sectionOpen: + self.is_open = True + self.is_unknown = False + else: + self.is_open = False + self.is_unknown = True + self.other = other_section_element + + def __str__(self): + if self.is_unknown: + return "" + else: + return ".." + + def _strict_date(self, lean): + if lean == EARLIEST: + if self.is_unknown: + upper = self.other._strict_date(LATEST) + return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) + else: + return LongYear(appsettings.BEGINNING_OF_TIME)._strict_date(lean) + else: + if self.is_unknown: + lower = self.other._strict_date(EARLIEST) + return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) + else: + return LongYear(appsettings.END_OF_TIME)._strict_date(lean) + + @property + def precision(self): + return self.other.date.precision or PRECISION_YEAR + + class Unspecified(Date): pass class Level1Interval(Interval): - def __init__(self, lower, upper): - self.lower = UncertainOrApproximate(**lower) - self.upper = UncertainOrApproximate(**upper) + def __init__(self, lower=None, upper=None): + if lower: + if lower['date'] == '..': + self.lower = UnspecifiedIntervalSection(True, UncertainOrApproximate(**upper)) + else: + self.lower = UncertainOrApproximate(**lower) + else: + self.lower = UnspecifiedIntervalSection(False, UncertainOrApproximate(**upper)) + if upper: + if upper['date'] == '..': + self.upper = UnspecifiedIntervalSection(True, UncertainOrApproximate(**lower)) + else: + self.upper = UncertainOrApproximate(**upper) + else: + self.upper = UnspecifiedIntervalSection(False, UncertainOrApproximate(**lower)) def _get_fuzzy_padding(self, lean): if lean == EARLIEST: @@ -651,12 +684,14 @@ def __str__(self): return "%s..%s" % (self.lower or '', self.upper or '') -class EarlierConsecutives(Consecutives): - pass +class EarlierConsecutives(Level1Interval): + def __str__(self): + return "%s%s" % (self.lower, self.upper) -class LaterConsecutives(Consecutives): - pass +class LaterConsecutives(Level1Interval): + def __str__(self): + return "%s%s" % (self.lower, self.upper) class OneOfASet(EDTFObject): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 538e924..05c9368 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -52,6 +52,8 @@ ('2004-02-01/2005', '2004-02-01', '2005-12-31'), # An interval beginning sometime in 2005 and ending sometime in February 2006. ('2005/2006-02', '2005-01-01', '2006-02-28'), + # An interval beginning sometime in -2005 and ending sometime in February -2004. + ('-2005/-1999-02', '-2005-01-01', '-1999-02-28'), # ******************************* LEVEL 1 ********************************* # Uncertain/Approximate @@ -82,11 +84,13 @@ # L1 Extended Interval # beginning unknown, end 2006 - ('unknown/2006', '1996-12-31', '2006-12-31'), + ('/2006', '1996-12-31', '2006-12-31'), # beginning June 1, 2004, end unknown - ('2004-06-01/unknown', '2004-06-01', '2014-06-01'), + ('2004-06-01/', '2004-06-01', '2014-06-01'), + # beginning open, end 2006 + ('../2006', '-20000000-01-01', '2006-12-31'), # beginning January 1 2004 with no end date - ('2004-01-01/open', '2004-01-01', date.today().isoformat()), + ('2004-01-01/..', '2004-01-01', '20000000-12-31'), # interval beginning approximately 1984 and ending June 2004 ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30'), # interval beginning 1984 and ending approximately June 2004 @@ -96,7 +100,7 @@ # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 ('1984-06?/2004-08?', '1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30'), ('1984-06-02?/2004-08-08~', '1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09'), - ('1984-06-02?/unknown', '1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02'), + ('1984-06-02?/', '1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02'), # Year exceeding 4 digits # the year 170000002 ('Y170000002', '170000002-01-01', '170000002-12-31'), @@ -155,13 +159,13 @@ ('XXXX-XX-23', '0000-01-23', '9999-12-23'), # One of a Set # One of the years 1667, 1668, 1670, 1671, 1672 - (('[1667,1668, 1670..1672]', '[1667, 1668, 1670..1672]'), '1667-01-01', '1672-12-31'), + (('[1667,1668, 1670..1672]', '[1667, 1668, 1670..1672]'), '1667-01-01', '1672-12-31'), # December 3, 1760 or some earlier date - ('[..1760-12-03]', '1750-12-03', '1760-12-03'), + ('[..1760-12-03]', '-20000000-01-01', '1760-12-03'), # December 1760 or some later month - ('[1760-12..]', '1760-12-01', '1770-12-01'), + ('[1760-12..]', '1760-12-01', '20000000-12-31'), # January or February of 1760 or December 1760 or some later month - ('[1760-01, 1760-02, 1760-12..]', '1760-01-01', '1770-12-01'), + ('[1760-01, 1760-02, 1760-12..]', '1760-01-01', '20000000-12-31'), # Either the year 1667 or the month December of 1760. ('[1667, 1760-12]', '1667-01-01', '1760-12-31'), # Multiple Dates From 924b64bd7632da9bdf21bb14956d6ee9d3ea1360 Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Thu, 30 Mar 2023 14:43:43 +0200 Subject: [PATCH 011/102] implementation of Level2Season based on implementation of exisiting Season; year overflow for northern winter/southern sommer still not considered --- edtf/appsettings.py | 38 +++++++++++++++++++++++++++++++++++ edtf/parser/grammar.py | 9 ++++++++- edtf/parser/parser_classes.py | 5 ++++- edtf/parser/tests.py | 5 +++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/edtf/appsettings.py b/edtf/appsettings.py index 38e7b9f..1f36d72 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -23,6 +23,44 @@ } ) +SEASON_L2_MONTHS_RANGE = EDTF.get('SEASON_L2_MONTHS_RANGE', { + # season id: [earliest_month, last_month] + 21: [3, 5], + 22: [6, 8], + 23: [9, 11], + # winter in the northern hemisphere wraps the end of the year, so + # Winter 2010 could wrap into 2011. + # For simplicity, we assume it falls at the end of the year, esp since the + # spec says that sort order goes spring > summer > autumn > winter + 24: [12, 12], + # spring in the northern hemisphere + 25: [3, 5], + # summer in the northern hemisphere + 26: [6, 8], + # fall/autumn in the northern hemisphere + 27: [9, 11], + # winter in the northern hemisphere wraps the end of the year + 28: [12, 12], + # spring in the southern hemisphere + 29: [9, 11], + # summer in the southern hemisphere + 30: [12, 12], + # fall/autumn in the southern hemisphere + 31: [3, 5], + # winter in the southern hemisphere + 32: [6, 8], + 33: [1, 3], + 34: [4, 6], + 35: [7, 9], + 36: [10, 12], + 37: [1, 4], + 38: [5, 8], + 39: [9, 12], + 40: [1, 6], + 41: [7, 12] + } +) + DAY_FIRST = EDTF.get('DAY_FIRST', False) # Americans! SEASONS = EDTF.get('SEASONS', { diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index c454f34..8ced0b9 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -6,7 +6,7 @@ UncertainOrApproximate, Level1Interval, LongYear, Season, \ PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ - MaskedPrecision, Level2Interval, ExponentialYear + MaskedPrecision, Level2Interval, ExponentialYear, Level2Season from edtf.parser.edtf_exceptions import EDTFParseException @@ -268,12 +268,19 @@ def f(toks): inclusiveList = "{" + listContent + "}" MultipleDates.set_parser(inclusiveList) + +# (* *** L2 Season *** *) +seasonL2Number = oneOf("21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41") +l2season = year + "-" + seasonL2Number("season") +Level2Season.set_parser(l2season) + level2Expression = partialUncertainOrApproximate \ ^ partialUnspecified \ ^ choiceList \ ^ inclusiveList \ ^ level2Interval \ ^ longYearScientific \ + ^ l2season \ ^ seasonQualified # putting it all together diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index e0c6410..5ad4c2e 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -524,7 +524,7 @@ def __str__(self): return "%s-%s" % (self.year, self.season) def _precise_month(self, lean): - rng = appsettings.SEASON_MONTHS_RANGE[int(self.season)] + rng = appsettings.SEASON_L2_MONTHS_RANGE[int(self.season)] if lean == EARLIEST: return rng[0] else: @@ -752,6 +752,9 @@ def __init__(self, lower, upper): self.upper = upper +class Level2Season(Season): + pass + class ExponentialYear(LongYear): def __init__(self, base, exponent, precision=None): self.base = base diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 05c9368..8bda551 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -191,6 +191,11 @@ # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 # ('Y17101E4S3', '171010000-01-01', '171999999-12-31'), + # L2 Seasons + # Spring southern, 2001 + ('2001-29', '2001-09-01', '2001-11-30'), + # second quarter of 2001 + ('2001-34', '2001-04-01', '2001-06-30'), ) BAD_EXAMPLES = ( From bed138d0f0c6de5f739bc1ecbed3174c232f368b Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Tue, 4 Apr 2023 10:17:09 +0200 Subject: [PATCH 012/102] adding function to convert an old specification expression to a new specification expression --- edtf/__init__.py | 2 +- edtf/convert.py | 13 +++++++++++++ edtf/parser/tests.py | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/edtf/__init__.py b/edtf/__init__.py index a86232f..291cccc 100644 --- a/edtf/__init__.py +++ b/edtf/__init__.py @@ -3,4 +3,4 @@ from edtf.parser.parser_classes import * from edtf.convert import dt_to_struct_time, struct_time_to_date, \ struct_time_to_datetime, trim_struct_time, struct_time_to_jd, \ - jd_to_struct_time + jd_to_struct_time, old_specs_to_new_specs_expression diff --git a/edtf/convert.py b/edtf/convert.py index c1bfd3a..f8d070f 100644 --- a/edtf/convert.py +++ b/edtf/convert.py @@ -7,6 +7,19 @@ TIME_EMPTY_TIME = [0, 0, 0] # tm_hour, tm_min, tm_sec TIME_EMPTY_EXTRAS = [0, 0, -1] # tm_wday, tm_yday, tm_isdst +def old_specs_to_new_specs_expression(expression): + expression = expression.replace("unknown", "") + expression = expression.replace("open", "..") + expression = expression.replace("u", "X") + expression = expression.replace("x", "X") + expression = expression.replace("?~", "%") + expression = expression.replace("~?", "%") + expression = expression.replace("e", "E") + expression = expression.replace("y", "Y") + expression = expression.replace("p", "S") + + return expression + def dt_to_struct_time(dt): """ diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 8bda551..d0a2884 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -202,7 +202,7 @@ None, '', 'not a edtf string', - 'Y17E7-12-26', # not implemented + 'Y17E7-12-26', # Y indicates that the date is year only '2016-13-08', # wrong day order '2016-02-39', # out of range '-0000-01-01', # negative zero year From e53a11a34be06bf3e390d266727c072c1cae77a1 Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Tue, 4 Apr 2023 14:14:30 +0200 Subject: [PATCH 013/102] new specs for group qualification: at the first step added implementation of the new spec but keep the old way with brackets --- edtf/parser/grammar.py | 30 ++++++++++++-- edtf/parser/parser_classes.py | 57 ++++++++++++++++++++++++++- edtf/parser/tests.py | 74 ++++++++++++++++++++++++++++++----- 3 files changed, 146 insertions(+), 15 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 8ced0b9..b6dac72 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -1,12 +1,12 @@ -from pyparsing import Literal as L, ParseException, Optional, OneOrMore, \ - ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums +from pyparsing import Literal as L, ParseException, Opt, Optional, OneOrMore, \ + ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, Group # (* ************************** Level 0 *************************** *) from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ UncertainOrApproximate, Level1Interval, LongYear, Season, \ PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ - MaskedPrecision, Level2Interval, ExponentialYear, Level2Season + MaskedPrecision, Level2Interval, ExponentialYear, Level2Season, PartialUncertainOrApproximateNEW from edtf.parser.edtf_exceptions import EDTFParseException @@ -210,11 +210,32 @@ def f(toks): ^ (year + "-(" + monthDay + ")" + UASymbol("month_day_ua")) \ ^ (season("ssn") + UASymbol("season_ua")) + +# group qualification +# qualifier right of a component(date, month, day) applies to all components to the left +group_qual = yearMonth + UASymbol("year_month_ua") + "-" + day \ + ^ year + UASymbol("year_ua") + "-" + month + Opt("-" + day) + +# component qualification +# qualifier immediate left of a component (date, month, day) applies to that component only +qual_year = year ^ UASymbol("year_ua_b") + year ^ year + UASymbol("year_ua") +qual_month = month ^ UASymbol("month_ua") + month +qual_day = day ^ UASymbol("day_ua") + day + +indi_qual = UASymbol("year_ua_b") + year + Opt("-" + qual_month + Opt("-" + qual_day)) \ + ^ qual_year + "-" + UASymbol("month_ua") + month + Opt("-" + qual_day) \ + ^ qual_year + "-" + qual_month + "-" + UASymbol("day_ua") + day + + partialUncertainOrApproximate = IUABase ^ ("(" + IUABase + ")" + UASymbol("all_ua")) PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) +partialUncertainOrApproximate_new = group_qual ^ indi_qual +PartialUncertainOrApproximateNEW.set_parser(partialUncertainOrApproximate_new) + dateWithInternalUncertainty = partialUncertainOrApproximate \ - ^ partialUnspecified + ^ partialUnspecified \ + ^ partialUncertainOrApproximate_new qualifyingString = Regex(r'\S') # any nonwhitespace char @@ -275,6 +296,7 @@ def f(toks): Level2Season.set_parser(l2season) level2Expression = partialUncertainOrApproximate \ + ^ partialUncertainOrApproximate_new \ ^ partialUnspecified \ ^ choiceList \ ^ inclusiveList \ diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 5ad4c2e..c049fb4 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -544,13 +544,14 @@ def __init__( self, year=None, month=None, day=None, year_ua=False, month_ua = False, day_ua = False, year_month_ua = False, month_day_ua = False, - ssn=None, season_ua=False, all_ua=False + ssn=None, season_ua=False, all_ua=False, year_ua_b = False ): self.year = year self.month = month self.day = day self.year_ua = year_ua + self.year_ua_b = year_ua_b self.month_ua = month_ua self.day_ua = day_ua @@ -632,6 +633,8 @@ def _get_fuzzy_padding(self, lean): if self.year_ua: result += appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + if self.year_ua_b: + result += appsettings.PADDING_YEAR_PRECISION * self.year_ua_b._get_multiplier() if self.month_ua: result += appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() if self.day_ua: @@ -663,6 +666,58 @@ def _get_fuzzy_padding(self, lean): return result +class PartialUncertainOrApproximateNEW(PartialUncertainOrApproximate): + + def __str__(self): + + if self.season_ua: + return "%s%s" % (self.season, self.season_ua) + + if self.year_ua: + y = "%s%s" % (self.year, self.year_ua) + else: + if self.year_ua_b: + y = "%s%s" % (self.year_ua_b, self.year) + else: + y = str(self.year) + + if self.month_ua: + m = "%s%s" % (self.month_ua, self.month) + else: + m = str(self.month) + + if self.day: + if self.day_ua: + d = "%s%s" % (self.day_ua, self.day) + else: + d = str(self.day) + else: + d = None + + if self.year_month_ua: # year/month approximate. No brackets needed. + ym = "%s-%s%s" % (y, m, self.year_month_ua) + if d: + result = "%s-%s" % (ym, d) + else: + result = ym + elif self.month_day_ua: + if self.year_ua: # we don't need the brackets round month and day + result = "%s-%s-%s%s" % (y, m, d, self.month_day_ua) + else: + result = "%s-(%s-%s)%s" % (y, m, d, self.month_day_ua) + else: + if d: + result = "%s-%s-%s" % (y, m, d) + else: + result = "%s-%s" % (y, m) + + if self.all_ua: + result = "(%s)%s" % (result, self.all_ua) + + return result + + + class PartialUnspecified(Unspecified): pass diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index d0a2884..ce48183 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -123,26 +123,47 @@ ('2004?-06-11', '2004-06-11', '2003-06-11', '2005-06-11'), # year and month are approximate; day known ('2004-06~-11', '2004-06-11', '2003-05-11', '2005-07-11'), - # uncertain month, year and day known + + # uncertain month, year and day known - OLD SPEC ('2004-(06)?-11', '2004-06-11', '2004-05-11', '2004-07-11'), - # day is approximate; year, month known + # uncertain month, year and day known - NEW SPEC + ('2004-?06-11', '2004-06-11', '2004-05-11', '2004-07-11'), + + # day is approximate; year, month known - OLD SPEC ('2004-06-(11)~', '2004-06-11', '2004-06-10', '2004-06-12'), - # Year known, month within year is approximate and uncertain + # day is approximate; year, month known - NEW SPEC + ('2004-06-~11', '2004-06-11', '2004-06-10', '2004-06-12'), + # Year known, month within year is approximate and uncertain - OLD SPEC ('2004-(06)%', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), - # Year known, month and day uncertain + # Year known, month within year is approximate and uncertain - NEW SPEC + ('2004-%06', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), + # Year known, month and day uncertain - OLD SPEC ('2004-(06-11)?', '2004-06-11', '2004-05-10', '2004-07-12'), - # Year uncertain, month known, day approximate + # Year known, month and day uncertain - NEW SPEC + ('2004-?06-?11', '2004-06-11', '2004-05-10', '2004-07-12'), + # Year uncertain, month known, day approximate - OLD SPEC ('2004?-06-(11)~', '2004-06-11', '2003-06-10', '2005-06-12'), - # Year uncertain and month is both uncertain and approximate + # Year uncertain, month known, day approximate - NEW SPEC + ('2004?-06-~11', '2004-06-11', '2003-06-10', '2005-06-12'), + # Year uncertain and month is both uncertain and approximate - OLD SPEC ('(2004-(06)~)?', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), - # This has the same meaning as the previous example. + # Year uncertain and month is both uncertain and approximate - NEW SPEC + ('?2004-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + # This has the same meaning as the previous example.- OLD SPEC ('2004?-(06)%', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), - # Year uncertain, month and day approximate. + # This has the same meaning as the previous example.- NEW SPEC + ('2004?-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + # Year uncertain, month and day approximate.- OLD SPEC (('(2004)?-06-04~', '2004?-06-04~'), '2004-06-04', '2003-05-03', '2005-07-05'), - # Year known, month and day approximate. Note that this has the same meaning as the following. + # Year uncertain, month and day approximate. - NEW SPEC + ('2004?-06-04~','2004-06-04', '2003-05-03', '2005-07-05'), + # Year known, month and day approximate. Note that this has the same meaning as the following.- OLD SPEC (('(2011)-06-04~', '2011-(06-04)~'), '2011-06-04', '2011-05-03', '2011-07-05'), - # Year known, month and day approximate. + # Year known, month and day approximate.- OLD SPEC ('2011-(06-04)~', '2011-06-04', '2011-05-03', '2011-07-05'), + # Year known, month and day approximate. - NEW SPEC + ('2011-~06-~04', '2011-06-04', '2011-05-03', '2011-07-05'), + # Approximate season (around Autumn 2011) ('2011-23~', '2011-09-01', '2011-11-30', '2011-06-09', '2012-02-22'), # Years wrapping @@ -214,6 +235,39 @@ def test_non_parsing(self): for i in BAD_EXAMPLES: self.assertRaises(EDTFParseException, parse, i) + def testInterval(self): + #expression = ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30') + #expression = ('/2006', '1996-01-01', '2006-12-31') + #expression = ('../2006', '0001-01-01', '2006-12-31') + expression = ('../-2006', '-20000000-01-01', '-2006-12-31') + #expression = ('2006/', '2006-01-01', '9999-12-31') + i = expression[0] + expected_lower_strict = expression[1] + expected_upper_strict = expression[2] + + def iso_to_struct_time(iso_date): + """ Convert YYYY-mm-dd date strings to time structs """ + if iso_date[0] == '-': + is_negative = True + iso_date = iso_date[1:] + else: + is_negative = False + y, mo, d = [int(i) for i in iso_date.split('-')] + if is_negative: + y *= -1 + return struct_time( + [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + # Convert string date representations into `struct_time`s + expected_lower_strict = iso_to_struct_time(expected_lower_strict) + expected_upper_strict = iso_to_struct_time(expected_upper_strict) + + f = parse(i) + print(str(f.lower_strict()) + '/' + str(f.upper_strict())) + self.assertEqual(f.lower_strict(), expected_lower_strict) + self.assertEqual(f.upper_strict(), expected_upper_strict) + + def test_date_values(self): """ Test that everY EDTFObject can tell you its lower and upper From 3fa4156f91980f0336bcf636da09168eda8a7ed1 Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Tue, 4 Apr 2023 14:14:30 +0200 Subject: [PATCH 014/102] new specs for group qualification: at the first step added implementation of the new spec but keep the old way with brackets --- edtf/parser/grammar.py | 30 ++++++++++++-- edtf/parser/parser_classes.py | 57 +++++++++++++++++++++++++- edtf/parser/tests.py | 75 ++++++++++++++++++++++++++++++----- 3 files changed, 147 insertions(+), 15 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 8ced0b9..b6dac72 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -1,12 +1,12 @@ -from pyparsing import Literal as L, ParseException, Optional, OneOrMore, \ - ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums +from pyparsing import Literal as L, ParseException, Opt, Optional, OneOrMore, \ + ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, Group # (* ************************** Level 0 *************************** *) from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ UncertainOrApproximate, Level1Interval, LongYear, Season, \ PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ - MaskedPrecision, Level2Interval, ExponentialYear, Level2Season + MaskedPrecision, Level2Interval, ExponentialYear, Level2Season, PartialUncertainOrApproximateNEW from edtf.parser.edtf_exceptions import EDTFParseException @@ -210,11 +210,32 @@ def f(toks): ^ (year + "-(" + monthDay + ")" + UASymbol("month_day_ua")) \ ^ (season("ssn") + UASymbol("season_ua")) + +# group qualification +# qualifier right of a component(date, month, day) applies to all components to the left +group_qual = yearMonth + UASymbol("year_month_ua") + "-" + day \ + ^ year + UASymbol("year_ua") + "-" + month + Opt("-" + day) + +# component qualification +# qualifier immediate left of a component (date, month, day) applies to that component only +qual_year = year ^ UASymbol("year_ua_b") + year ^ year + UASymbol("year_ua") +qual_month = month ^ UASymbol("month_ua") + month +qual_day = day ^ UASymbol("day_ua") + day + +indi_qual = UASymbol("year_ua_b") + year + Opt("-" + qual_month + Opt("-" + qual_day)) \ + ^ qual_year + "-" + UASymbol("month_ua") + month + Opt("-" + qual_day) \ + ^ qual_year + "-" + qual_month + "-" + UASymbol("day_ua") + day + + partialUncertainOrApproximate = IUABase ^ ("(" + IUABase + ")" + UASymbol("all_ua")) PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) +partialUncertainOrApproximate_new = group_qual ^ indi_qual +PartialUncertainOrApproximateNEW.set_parser(partialUncertainOrApproximate_new) + dateWithInternalUncertainty = partialUncertainOrApproximate \ - ^ partialUnspecified + ^ partialUnspecified \ + ^ partialUncertainOrApproximate_new qualifyingString = Regex(r'\S') # any nonwhitespace char @@ -275,6 +296,7 @@ def f(toks): Level2Season.set_parser(l2season) level2Expression = partialUncertainOrApproximate \ + ^ partialUncertainOrApproximate_new \ ^ partialUnspecified \ ^ choiceList \ ^ inclusiveList \ diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 5ad4c2e..c049fb4 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -544,13 +544,14 @@ def __init__( self, year=None, month=None, day=None, year_ua=False, month_ua = False, day_ua = False, year_month_ua = False, month_day_ua = False, - ssn=None, season_ua=False, all_ua=False + ssn=None, season_ua=False, all_ua=False, year_ua_b = False ): self.year = year self.month = month self.day = day self.year_ua = year_ua + self.year_ua_b = year_ua_b self.month_ua = month_ua self.day_ua = day_ua @@ -632,6 +633,8 @@ def _get_fuzzy_padding(self, lean): if self.year_ua: result += appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + if self.year_ua_b: + result += appsettings.PADDING_YEAR_PRECISION * self.year_ua_b._get_multiplier() if self.month_ua: result += appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() if self.day_ua: @@ -663,6 +666,58 @@ def _get_fuzzy_padding(self, lean): return result +class PartialUncertainOrApproximateNEW(PartialUncertainOrApproximate): + + def __str__(self): + + if self.season_ua: + return "%s%s" % (self.season, self.season_ua) + + if self.year_ua: + y = "%s%s" % (self.year, self.year_ua) + else: + if self.year_ua_b: + y = "%s%s" % (self.year_ua_b, self.year) + else: + y = str(self.year) + + if self.month_ua: + m = "%s%s" % (self.month_ua, self.month) + else: + m = str(self.month) + + if self.day: + if self.day_ua: + d = "%s%s" % (self.day_ua, self.day) + else: + d = str(self.day) + else: + d = None + + if self.year_month_ua: # year/month approximate. No brackets needed. + ym = "%s-%s%s" % (y, m, self.year_month_ua) + if d: + result = "%s-%s" % (ym, d) + else: + result = ym + elif self.month_day_ua: + if self.year_ua: # we don't need the brackets round month and day + result = "%s-%s-%s%s" % (y, m, d, self.month_day_ua) + else: + result = "%s-(%s-%s)%s" % (y, m, d, self.month_day_ua) + else: + if d: + result = "%s-%s-%s" % (y, m, d) + else: + result = "%s-%s" % (y, m) + + if self.all_ua: + result = "(%s)%s" % (result, self.all_ua) + + return result + + + class PartialUnspecified(Unspecified): pass diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index d0a2884..5c6caa9 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -123,26 +123,47 @@ ('2004?-06-11', '2004-06-11', '2003-06-11', '2005-06-11'), # year and month are approximate; day known ('2004-06~-11', '2004-06-11', '2003-05-11', '2005-07-11'), - # uncertain month, year and day known + + # uncertain month, year and day known - OLD SPEC ('2004-(06)?-11', '2004-06-11', '2004-05-11', '2004-07-11'), - # day is approximate; year, month known + # uncertain month, year and day known - NEW SPEC + ('2004-?06-11', '2004-06-11', '2004-05-11', '2004-07-11'), + + # day is approximate; year, month known - OLD SPEC ('2004-06-(11)~', '2004-06-11', '2004-06-10', '2004-06-12'), - # Year known, month within year is approximate and uncertain + # day is approximate; year, month known - NEW SPEC + ('2004-06-~11', '2004-06-11', '2004-06-10', '2004-06-12'), + # Year known, month within year is approximate and uncertain - OLD SPEC ('2004-(06)%', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), - # Year known, month and day uncertain + # Year known, month within year is approximate and uncertain - NEW SPEC + ('2004-%06', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), + # Year known, month and day uncertain - OLD SPEC ('2004-(06-11)?', '2004-06-11', '2004-05-10', '2004-07-12'), - # Year uncertain, month known, day approximate + # Year known, month and day uncertain - NEW SPEC + ('2004-?06-?11', '2004-06-11', '2004-05-10', '2004-07-12'), + # Year uncertain, month known, day approximate - OLD SPEC ('2004?-06-(11)~', '2004-06-11', '2003-06-10', '2005-06-12'), - # Year uncertain and month is both uncertain and approximate + # Year uncertain, month known, day approximate - NEW SPEC + ('2004?-06-~11', '2004-06-11', '2003-06-10', '2005-06-12'), + # Year uncertain and month is both uncertain and approximate - OLD SPEC ('(2004-(06)~)?', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), - # This has the same meaning as the previous example. + # Year uncertain and month is both uncertain and approximate - NEW SPEC + ('?2004-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + # This has the same meaning as the previous example.- OLD SPEC ('2004?-(06)%', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), - # Year uncertain, month and day approximate. + # This has the same meaning as the previous example.- NEW SPEC + ('2004?-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + # Year uncertain, month and day approximate.- OLD SPEC (('(2004)?-06-04~', '2004?-06-04~'), '2004-06-04', '2003-05-03', '2005-07-05'), - # Year known, month and day approximate. Note that this has the same meaning as the following. + # Year uncertain, month and day approximate. - NEW SPEC + ('2004?-06-04~','2004-06-04', '2003-05-03', '2005-07-05'), + # Year known, month and day approximate. Note that this has the same meaning as the following.- OLD SPEC (('(2011)-06-04~', '2011-(06-04)~'), '2011-06-04', '2011-05-03', '2011-07-05'), - # Year known, month and day approximate. + # Year known, month and day approximate.- OLD SPEC ('2011-(06-04)~', '2011-06-04', '2011-05-03', '2011-07-05'), + # Year known, month and day approximate. - NEW SPEC + ('2011-~06-~04', '2011-06-04', '2011-05-03', '2011-07-05'), + # Approximate season (around Autumn 2011) ('2011-23~', '2011-09-01', '2011-11-30', '2011-06-09', '2012-02-22'), # Years wrapping @@ -181,6 +202,7 @@ # L2 Extended Interval # An interval in June 2004 beginning approximately the first and ending approximately the 20th. ('2004-06-(01)~/2004-06-(20)~', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), + ('2004-06-~01/2004-06-~20', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), # The interval began on an unspecified day in June 2004. ('2004-06-XX/2004-07-03', '2004-06-01', '2004-07-03'), # Year Requiring More than Four Digits - Exponential Form @@ -214,6 +236,39 @@ def test_non_parsing(self): for i in BAD_EXAMPLES: self.assertRaises(EDTFParseException, parse, i) + def testInterval(self): + #expression = ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30') + #expression = ('/2006', '1996-01-01', '2006-12-31') + #expression = ('../2006', '0001-01-01', '2006-12-31') + expression = ('../-2006', '-20000000-01-01', '-2006-12-31') + #expression = ('2006/', '2006-01-01', '9999-12-31') + i = expression[0] + expected_lower_strict = expression[1] + expected_upper_strict = expression[2] + + def iso_to_struct_time(iso_date): + """ Convert YYYY-mm-dd date strings to time structs """ + if iso_date[0] == '-': + is_negative = True + iso_date = iso_date[1:] + else: + is_negative = False + y, mo, d = [int(i) for i in iso_date.split('-')] + if is_negative: + y *= -1 + return struct_time( + [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + # Convert string date representations into `struct_time`s + expected_lower_strict = iso_to_struct_time(expected_lower_strict) + expected_upper_strict = iso_to_struct_time(expected_upper_strict) + + f = parse(i) + print(str(f.lower_strict()) + '/' + str(f.upper_strict())) + self.assertEqual(f.lower_strict(), expected_lower_strict) + self.assertEqual(f.upper_strict(), expected_upper_strict) + + def test_date_values(self): """ Test that everY EDTFObject can tell you its lower and upper From f15511b1df800e561f4441285f33c7451799e4a9 Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Tue, 4 Apr 2023 14:48:30 +0200 Subject: [PATCH 015/102] new specs for group qualification: at second step removed old grouping way with brackets --- edtf/parser/grammar.py | 37 ++------------------- edtf/parser/parser_classes.py | 61 ++++------------------------------- edtf/parser/tests.py | 56 +++++++++++++------------------- 3 files changed, 31 insertions(+), 123 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index b6dac72..2744933 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -6,7 +6,7 @@ UncertainOrApproximate, Level1Interval, LongYear, Season, \ PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ - MaskedPrecision, Level2Interval, ExponentialYear, Level2Season, PartialUncertainOrApproximateNEW + MaskedPrecision, Level2Interval, ExponentialYear, Level2Season from edtf.parser.edtf_exceptions import EDTFParseException @@ -186,31 +186,6 @@ def f(toks): # (* ** Internal Uncertain or Approximate** *) -# this line is out of spec, but the given examples (e.g. '(2004)?-06-04~') -# appear to require it. -year_with_brackets = year ^ ("(" + year + ")") - -# second clause below needed Optional() around the "year_ua" UASymbol, for dates -# like '(2011)-06-04~' to work. - -IUABase = \ - (year_with_brackets + UASymbol("year_ua") + "-" + month + Optional("-(" + day + ")" + UASymbol("day_ua"))) \ - ^ (year_with_brackets + Optional(UASymbol)("year_ua") + "-" + monthDay + Optional(UASymbol)("month_day_ua")) \ - ^ ( - year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") - + Optional("-(" + day + ")" + UASymbol("day_ua")) - ) \ - ^ ( - year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") - + Optional("-" + day) - ) \ - ^ (yearMonth + UASymbol("year_month_ua") + "-(" + day + ")" + UASymbol("day_ua")) \ - ^ (yearMonth + UASymbol("year_month_ua") + "-" + day) \ - ^ (yearMonth + "-(" + day + ")" + UASymbol("day_ua")) \ - ^ (year + "-(" + monthDay + ")" + UASymbol("month_day_ua")) \ - ^ (season("ssn") + UASymbol("season_ua")) - - # group qualification # qualifier right of a component(date, month, day) applies to all components to the left group_qual = yearMonth + UASymbol("year_month_ua") + "-" + day \ @@ -226,16 +201,11 @@ def f(toks): ^ qual_year + "-" + UASymbol("month_ua") + month + Opt("-" + qual_day) \ ^ qual_year + "-" + qual_month + "-" + UASymbol("day_ua") + day - -partialUncertainOrApproximate = IUABase ^ ("(" + IUABase + ")" + UASymbol("all_ua")) +partialUncertainOrApproximate = group_qual ^ indi_qual PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) -partialUncertainOrApproximate_new = group_qual ^ indi_qual -PartialUncertainOrApproximateNEW.set_parser(partialUncertainOrApproximate_new) - dateWithInternalUncertainty = partialUncertainOrApproximate \ - ^ partialUnspecified \ - ^ partialUncertainOrApproximate_new + ^ partialUnspecified qualifyingString = Regex(r'\S') # any nonwhitespace char @@ -296,7 +266,6 @@ def f(toks): Level2Season.set_parser(l2season) level2Expression = partialUncertainOrApproximate \ - ^ partialUncertainOrApproximate_new \ ^ partialUnspecified \ ^ choiceList \ ^ inclusiveList \ diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index c049fb4..1a65718 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -571,16 +571,19 @@ def __str__(self): if self.year_ua: y = "%s%s" % (self.year, self.year_ua) else: - y = str(self.year) + if self.year_ua_b: + y = "%s%s" % (self.year_ua_b, self.year) + else: + y = str(self.year) if self.month_ua: - m = "(%s)%s" % (self.month, self.month_ua) + m = "%s%s" % (self.month_ua, self.month) else: m = str(self.month) if self.day: if self.day_ua: - d = "(%s)%s" % (self.day, self.day_ua) + d = "%s%s" % (self.day_ua, self.day) else: d = str(self.day) else: @@ -666,58 +669,6 @@ def _get_fuzzy_padding(self, lean): return result -class PartialUncertainOrApproximateNEW(PartialUncertainOrApproximate): - - def __str__(self): - - if self.season_ua: - return "%s%s" % (self.season, self.season_ua) - - if self.year_ua: - y = "%s%s" % (self.year, self.year_ua) - else: - if self.year_ua_b: - y = "%s%s" % (self.year_ua_b, self.year) - else: - y = str(self.year) - - if self.month_ua: - m = "%s%s" % (self.month_ua, self.month) - else: - m = str(self.month) - - if self.day: - if self.day_ua: - d = "%s%s" % (self.day_ua, self.day) - else: - d = str(self.day) - else: - d = None - - if self.year_month_ua: # year/month approximate. No brackets needed. - ym = "%s-%s%s" % (y, m, self.year_month_ua) - if d: - result = "%s-%s" % (ym, d) - else: - result = ym - elif self.month_day_ua: - if self.year_ua: # we don't need the brackets round month and day - result = "%s-%s-%s%s" % (y, m, d, self.month_day_ua) - else: - result = "%s-(%s-%s)%s" % (y, m, d, self.month_day_ua) - else: - if d: - result = "%s-%s-%s" % (y, m, d) - else: - result = "%s-%s" % (y, m) - - if self.all_ua: - result = "(%s)%s" % (result, self.all_ua) - - return result - - - class PartialUnspecified(Unspecified): pass diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 5c6caa9..55fa288 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -122,52 +122,31 @@ # uncertain year; month, day known ('2004?-06-11', '2004-06-11', '2003-06-11', '2005-06-11'), # year and month are approximate; day known - ('2004-06~-11', '2004-06-11', '2003-05-11', '2005-07-11'), - - # uncertain month, year and day known - OLD SPEC - ('2004-(06)?-11', '2004-06-11', '2004-05-11', '2004-07-11'), - # uncertain month, year and day known - NEW SPEC + ('2004-06~-11', '2004-06-11', '2003-05-11', '2005-07-11'), + # uncertain month, year and day known ('2004-?06-11', '2004-06-11', '2004-05-11', '2004-07-11'), - - # day is approximate; year, month known - OLD SPEC - ('2004-06-(11)~', '2004-06-11', '2004-06-10', '2004-06-12'), - # day is approximate; year, month known - NEW SPEC + # day is approximate; year, month known ('2004-06-~11', '2004-06-11', '2004-06-10', '2004-06-12'), - # Year known, month within year is approximate and uncertain - OLD SPEC - ('2004-(06)%', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), # Year known, month within year is approximate and uncertain - NEW SPEC ('2004-%06', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), - # Year known, month and day uncertain - OLD SPEC - ('2004-(06-11)?', '2004-06-11', '2004-05-10', '2004-07-12'), # Year known, month and day uncertain - NEW SPEC ('2004-?06-?11', '2004-06-11', '2004-05-10', '2004-07-12'), - # Year uncertain, month known, day approximate - OLD SPEC - ('2004?-06-(11)~', '2004-06-11', '2003-06-10', '2005-06-12'), # Year uncertain, month known, day approximate - NEW SPEC ('2004?-06-~11', '2004-06-11', '2003-06-10', '2005-06-12'), - # Year uncertain and month is both uncertain and approximate - OLD SPEC - ('(2004-(06)~)?', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), # Year uncertain and month is both uncertain and approximate - NEW SPEC ('?2004-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), - # This has the same meaning as the previous example.- OLD SPEC - ('2004?-(06)%', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), # This has the same meaning as the previous example.- NEW SPEC ('2004?-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), - # Year uncertain, month and day approximate.- OLD SPEC - (('(2004)?-06-04~', '2004?-06-04~'), '2004-06-04', '2003-05-03', '2005-07-05'), # Year uncertain, month and day approximate. - NEW SPEC - ('2004?-06-04~','2004-06-04', '2003-05-03', '2005-07-05'), - # Year known, month and day approximate. Note that this has the same meaning as the following.- OLD SPEC - (('(2011)-06-04~', '2011-(06-04)~'), '2011-06-04', '2011-05-03', '2011-07-05'), - # Year known, month and day approximate.- OLD SPEC - ('2011-(06-04)~', '2011-06-04', '2011-05-03', '2011-07-05'), + ('2004?-~06-~04','2004-06-04', '2003-05-03', '2005-07-05'), + # what about that? + #('2004?-06-04~','2004-06-04', '2003-05-03', '2005-07-05'), # Year known, month and day approximate. - NEW SPEC ('2011-~06-~04', '2011-06-04', '2011-05-03', '2011-07-05'), - # Approximate season (around Autumn 2011) - ('2011-23~', '2011-09-01', '2011-11-30', '2011-06-09', '2012-02-22'), + #('2011-23~', '2011-09-01', '2011-11-30', '2011-06-09', '2012-02-22'), # Years wrapping - ('2011-24~', '2011-12-01', '2011-12-31', '2011-09-08', '2012-03-24'), + #('2011-24~', '2011-12-01', '2011-12-31', '2011-09-08', '2012-03-24'), # Partial unspecified # December 25 sometime during the 1560s ('156X-12-25', '1560-12-25', '1569-12-25'), @@ -200,8 +179,7 @@ # A date during the 1900s #('19xx', '1900-01-01', '1999-12-31'), # L2 Extended Interval - # An interval in June 2004 beginning approximately the first and ending approximately the 20th. - ('2004-06-(01)~/2004-06-(20)~', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), + ('2004-06-~01/2004-06-~20', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), # The interval began on an unspecified day in June 2004. ('2004-06-XX/2004-07-03', '2004-06-01', '2004-07-03'), @@ -224,10 +202,20 @@ None, '', 'not a edtf string', - 'Y17E7-12-26', # Y indicates that the date is year only - '2016-13-08', # wrong day order - '2016-02-39', # out of range + 'Y17E7-12-26', # Y indicates that the date is year only + '2016-13-08', # wrong day order + '2016-02-39', # out of range '-0000-01-01', # negative zero year + '2004-(06)?-11', # uncertain month, year and day known - OLD SPEC + '2004-06-(11)~', # day is approximate; year, month known - OLD SPEC + '2004-(06)%', # Year known, month within year is approximate and uncertain - OLD SPEC + '2004-(06-11)?', # Year known, month and day uncertain - OLD SPEC + '2004?-06-(11)~', # Year uncertain, month known, day approximate - OLD SPEC + '(2004-(06)~)?', # Year uncertain and month is both uncertain and approximate - OLD SPEC + '(2004)?-06-04~', # Year uncertain, month and day approximate.- OLD SPEC + '(2011)-06-04~', # Year known, month and day approximate. Note that this has the same meaning as the following.- OLD SPEC + '2011-(06-04)~', # Year known, month and day approximate.- OLD SPEC + '2004-06-(01)~/2004-06-(20)~', # An interval in June 2004 beginning approximately the first and ending approximately the 20th - OLD SPEC ) From d0828eaeb6eb5af98858d509cfda152f3c38d048 Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Tue, 4 Apr 2023 14:52:05 +0200 Subject: [PATCH 016/102] renaming of variable names (U to X) --- edtf/parser/grammar.py | 44 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 2744933..1392180 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -143,45 +143,45 @@ def f(toks): # (* ** Internal Unspecified** *) -digitOrU = Word(nums + 'X', exact=1) +digitOrX = Word(nums + 'X', exact=1) # 2-digit day with at least one 'X' present -dayWithU = Combine( - ("X" + digitOrU) - ^ (digitOrU + 'X') +dayWithX = Combine( + ("X" + digitOrX) + ^ (digitOrX + 'X') )("day") # 2-digit month with at least one 'X' present -monthWithU = Combine( +monthWithX = Combine( oneOf("0X 1X") - ^ ("X" + digitOrU) + ^ ("X" + digitOrX) )("month") # 4-digit year with at least one 'X' present -yearWithU = Combine( - ('X' + digitOrU + digitOrU + digitOrU) - ^ (digitOrU + 'X' + digitOrU + digitOrU) - ^ (digitOrU + digitOrU + 'X' + digitOrU) - ^ (digitOrU + digitOrU + digitOrU + 'X') +yearWithX = Combine( + ('X' + digitOrX + digitOrX + digitOrX) + ^ (digitOrX + 'X' + digitOrX + digitOrX) + ^ (digitOrX + digitOrX + 'X' + digitOrX) + ^ (digitOrX + digitOrX + digitOrX + 'X') )("year") -yearMonthWithU = ( - (Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) - ^ (yearWithU + "-" + month) +yearMonthWithX = ( + (Combine(year("") ^ yearWithX(""))("year") + "-" + monthWithX) + ^ (yearWithX + "-" + month) ) -monthDayWithU = ( - (Combine(month("") ^ monthWithU(""))("month") + "-" + dayWithU) - ^ (monthWithU + "-" + day) +monthDayWithX = ( + (Combine(month("") ^ monthWithX(""))("month") + "-" + dayWithX) + ^ (monthWithX + "-" + day) ) -yearMonthDayWithU = ( - (yearWithU + "-" + Combine(month("") ^ monthWithU(""))("month") + "-" + Combine(day("") ^ dayWithU(""))("day")) - ^ (year + "-" + monthWithU + "-" + Combine(day("") ^ dayWithU(""))("day")) - ^ (year + "-" + month + "-" + dayWithU) +yearMonthDayWithX = ( + (yearWithX + "-" + Combine(month("") ^ monthWithX(""))("month") + "-" + Combine(day("") ^ dayWithX(""))("day")) + ^ (year + "-" + monthWithX + "-" + Combine(day("") ^ dayWithX(""))("day")) + ^ (year + "-" + month + "-" + dayWithX) ) -partialUnspecified = yearWithU ^ yearMonthWithU ^ yearMonthDayWithU +partialUnspecified = yearWithX ^ yearMonthWithX ^ yearMonthDayWithX PartialUnspecified.set_parser(partialUnspecified) # (* ** Internal Uncertain or Approximate** *) From 978379951eeeffb7efa4ae767d5bcb78a00c564b Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Wed, 7 Jun 2023 10:34:01 +0200 Subject: [PATCH 017/102] using infinity for open end/start interval sections --- edtf/appsettings.py | 3 --- edtf/parser/parser_classes.py | 18 +++++++++++++++--- setup.py | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/edtf/appsettings.py b/edtf/appsettings.py index 1f36d72..8904c58 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -81,7 +81,4 @@ MULTIPLIER_IF_UNCERTAIN = EDTF.get('MULTIPLIER_IF_UNCERTAIN', 1.0) MULTIPLIER_IF_APPROXIMATE = EDTF.get('MULTIPLIER_IF_APPROXIMATE', 1.0) MULTIPLIER_IF_BOTH = EDTF.get('MULTIPLIER_IF_BOTH', 2.0) -BEGINNING_OF_TIME = EDTF.get("BEGINNING_OF_TIME", '-20000000') -END_OF_TIME = EDTF.get("BEGINNING_OF_TIME", '20000000') - DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 1a65718..6f20667 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -3,7 +3,7 @@ from time import struct_time from datetime import date, datetime from operator import add, sub - +import math from dateutil.relativedelta import relativedelta from edtf import appsettings @@ -451,13 +451,25 @@ def _strict_date(self, lean): upper = self.other._strict_date(LATEST) return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: - return LongYear(appsettings.BEGINNING_OF_TIME)._strict_date(lean) + return struct_time( + ( + -math.inf, + 1, + 1, + ) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) + ) else: if self.is_unknown: lower = self.other._strict_date(EARLIEST) return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) else: - return LongYear(appsettings.END_OF_TIME)._strict_date(lean) + return struct_time( + ( + math.inf, + 12, + 31, + ) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) + ) @property def precision(self): diff --git a/setup.py b/setup.py index f0f1849..1636957 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ def readme(): ], }, setup_requires=[ - 'setuptools_scm', + 'setuptools_scm >=5.0.2, <6.0.0', ], keywords=[ 'edtf', From 300b4fe8bedbf3d1790dbef23613c95c3aa2bf6a Mon Sep 17 00:00:00 2001 From: muellers-saw-leipzig Date: Thu, 8 Jun 2023 15:54:21 +0200 Subject: [PATCH 018/102] returning math.inf for open intervall sections --- edtf/parser/parser_classes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 6f20667..993c562 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -451,6 +451,7 @@ def _strict_date(self, lean): upper = self.other._strict_date(LATEST) return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: + return -math.inf return struct_time( ( -math.inf, @@ -463,6 +464,7 @@ def _strict_date(self, lean): lower = self.other._strict_date(EARLIEST) return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) else: + return math.inf return struct_time( ( math.inf, From 1e3a01176d981df3ce63b6f8232c80b9f50597e4 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Sun, 5 May 2024 13:22:52 +1000 Subject: [PATCH 019/102] Merge remote-tracking branch 'saw-leipzig/master' --- .travis.yml | 2 +- LICENSE | 1 + README.md | 110 +-- changelog.rst | 19 + edtf/__init__.py | 6 - edtf/parser/__init__.py | 2 - edtf2/__init__.py | 6 + {edtf => edtf2}/appsettings.py | 0 {edtf => edtf2}/convert.py | 2 +- {edtf => edtf2}/fields.py | 6 +- {edtf => edtf2}/jdutil.py | 0 {edtf => edtf2}/natlang/__init__.py | 0 {edtf => edtf2}/natlang/en.py | 2 +- {edtf => edtf2}/natlang/tests.py | 2 +- edtf2/parser/__init__.py | 2 + {edtf => edtf2}/parser/edtf_exceptions.py | 0 {edtf => edtf2}/parser/grammar.py | 4 +- edtf2/parser/grammar_test.py | 296 ++++++++ {edtf => edtf2}/parser/parser_classes.py | 18 +- edtf2/parser/parser_classes_tests.py | 792 ++++++++++++++++++++++ {edtf => edtf2}/parser/tests.py | 6 +- {edtf => edtf2}/tests.py | 2 +- setup.py | 14 +- vagrant wheel install problems.txt | 5 + 24 files changed, 1201 insertions(+), 96 deletions(-) delete mode 100644 edtf/__init__.py delete mode 100644 edtf/parser/__init__.py create mode 100644 edtf2/__init__.py rename {edtf => edtf2}/appsettings.py (100%) rename {edtf => edtf2}/convert.py (99%) rename {edtf => edtf2}/fields.py (97%) rename {edtf => edtf2}/jdutil.py (100%) rename {edtf => edtf2}/natlang/__init__.py (100%) rename {edtf => edtf2}/natlang/en.py (99%) rename {edtf => edtf2}/natlang/tests.py (99%) create mode 100644 edtf2/parser/__init__.py rename {edtf => edtf2}/parser/edtf_exceptions.py (100%) rename {edtf => edtf2}/parser/grammar.py (98%) create mode 100644 edtf2/parser/grammar_test.py rename {edtf => edtf2}/parser/parser_classes.py (97%) create mode 100644 edtf2/parser/parser_classes_tests.py rename {edtf => edtf2}/parser/tests.py (98%) rename {edtf => edtf2}/tests.py (99%) create mode 100644 vagrant wheel install problems.txt diff --git a/.travis.yml b/.travis.yml index e3377a3..9b9049b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,6 @@ python: before_install: - pip install nose coverage 'django<2' script: -- nosetests --verbose --with-coverage --cover-package=edtf +- nosetests --verbose --with-coverage --cover-package=edtf2 after_success: - coverage report diff --git a/LICENSE b/LICENSE index 756b6a4..f697a39 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ The MIT License (MIT) Copyright (c) 2015 The Interaction Consortium +Copyright (c) 2023 SAW Leipzig Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 92cccee..c300c7d 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,19 @@ -python-edtf -=========== +edtf2 +===== -An implementation of EDTF format in Python, together with utility functions for parsing natural language date texts, and converting EDTF dates to related Python `date` objects. +An implementation of EDTF format in Python, together with utility functions for parsing natural language date texts, and converting EDTF dates to related Python `date` or `struct_time` objects. See http://www.loc.gov/standards/datetime/ for the current draft specification. +This project is based on python-edtf and was developed to include the newest specification + ## To install - pip install edtf + pip install edtf2 ## To use - >>> from edtf import parse_edtf + >>> from edtf2 import parse_edtf # Parse an EDTF string to an EDTFObject >>> e = parse_edtf("1979-08~") # approx August 1979 >>> e @@ -29,16 +31,16 @@ See http://www.loc.gov/standards/datetime/ for the current draft specification. ((1979, 7, 1), (1979, 9, 30)) # Date intervals - >>> interval = parse_edtf("1979-08~/open") + >>> interval = parse_edtf("1979-08~/..") >>> interval - Level1Interval: '1979-08~/open' + Level1Interval: '1979-08~/..' # Intervals have lower and upper EDTF objects. >>> interval.lower, interval.upper - (UncertainOrApproximate: '1979-08~', UncertainOrApproximate: 'open') - >>> interval.lower.upper_strict()[:3] - (1979, 8, 31) - >>> interval.upper.lower_strict() # 'open' is interpreted to mean 'still happening'. - [Today's date] + (UncertainOrApproximate: '1979-08~', UnspecifiedIntervalSection: '..') + >>> interval.lower.lower_strict()[:3], interval.lower.upper_strict()[:3] + ((1979, 8, 1), (1979, 8, 31)) + >>> interval.upper.upper_strict() # '..' is interpreted to mean open interval and is returning -/+ math.inf + math.inf # Date collections >>> coll = parse_edtf('{1667,1668, 1670..1672}') @@ -47,7 +49,7 @@ See http://www.loc.gov/standards/datetime/ for the current draft specification. >>> coll.objects (Date: '1667', Date: '1668', Consecutives: '1670..1672') -The object returned by `parse_edtf()` is an instance of an `edtf.parser.parser_classes.EDTFObject` subclass, depending on the type of date that was parsed. These classes are: +The object returned by `parse_edtf()` is an instance of an `edtf2.parser.parser_classes.EDTFObject` subclass, depending on the type of date that was parsed. These classes are: # Level 0 Date @@ -58,6 +60,7 @@ The object returned by `parse_edtf()` is an instance of an `edtf.parser.parser_c UncertainOrApproximate Unspecified Level1Interval + UnspecifiedIntervalSection LongYear Season @@ -68,9 +71,10 @@ The object returned by `parse_edtf()` is an instance of an `edtf.parser.parser_c MultipleDates MaskedPrecision Level2Interval + Level2Season ExponentialYear -All of these implement `upper/lower_strict/fuzzy()` methods to derive Python `date` objects. +All of these implement `upper/lower_strict/fuzzy()` methods to derive `struct_time` objects, except of UnspecifiedIntervalSection, that can also return math.inf value The `*Interval` instances have `upper` and `lower` properties that are themselves `EDTFObject` instances. @@ -86,92 +90,92 @@ Test coverage includes every example given in the spec table of features. * Date: - >>> parse_edtf('1979-08') # August 1979 - Date: '1979-08' + >>> parse_edtf('1979-08') # August 1979 + Date: '1979-08' * Date and Time: - >>> parse_edtf('2004-01-01T10:10:10+05:00') - DateAndTime: '2004-01-01T10:10:10+05:00' + >>> parse_edtf('2004-01-01T10:10:10+05:00') + DateAndTime: '2004-01-01T10:10:10+05:00' * Interval (start/end): - >>> parse_edtf('1979-08-28/1979-09-25') # From August 28 to September 25 1979 - Interval: '1979-08-28/1979-09-25' + >>> parse_edtf('1979-08-28/1979-09-25') # From August 28 to September 25 1979 + Interval: '1979-08-28/1979-09-25' ### Level 1 Extensions * Uncertain/Approximate dates: - - >>> parse_edtf('1979-08-28~') # Approximately August 28th 1979 - UncertainOrApproximate: '1979-08-28~' + + >>> parse_edtf('1979-08-28~') # Approximately August 28th 1979 + UncertainOrApproximate: '1979-08-28~' * Unspecified dates: - >>> parse_edtf('1979-08-XX') # An unknown day in August 1979 - Unspecified: '1979-08-XX' - >>> parse_edtf('1979-XX') # Some month in 1979 - Unspecified: '1979-XX' + >>> parse_edtf('1979-08-XX') # An unknown day in August 1979 + Unspecified: '1979-08-XX' + >>> parse_edtf('1979-XX') # Some month in 1979 + Unspecified: '1979-XX' * Extended intervals: - >>> parse_edtf('1984-06-02?/2004-08-08~') - Level1Interval: '1984-06-02?/2004-08-08~' + >>> parse_edtf('1984-06-02?/2004-08-08~') + Level1Interval: '1984-06-02?/2004-08-08~' * Years exceeding four digits: - >>> parse_edtf('y-12000') # 12000 years BCE - LongYear: 'y-12000' + >>> parse_edtf('y-12000') # 12000 years BCE + LongYear: 'y-12000' * Season: - >>> parse_edtf('1979-22') # Summer 1979 - Season: '1979-22' + >>> parse_edtf('1979-22') # Summer 1979 + Season: '1979-22' ### Level 2 Extensions * Partial uncertain/approximate: - >>> parse_edtf('(2011)-06-04~') # year certain, month/day approximate. - # Note that the result text is normalized - PartialUncertainOrApproximate: '2011-(06-04)~' + >>> parse_edtf('(2011)-06-04~') # year certain, month/day approximate. + # Note that the result text is normalized + PartialUncertainOrApproximate: '2011-(06-04)~' * Partial unspecified: - >>> parse_edtf('1979-XX-28') # The 28th day of an uncertain month in 1979 - PartialUnspecified: '1979-XX-28' + >>> parse_edtf('1979-XX-28') # The 28th day of an uncertain month in 1979 + PartialUnspecified: '1979-XX-28' * One of a set: - >>> parse_edtf("[..1760-12-03,1762]") - OneOfASet: '[..1760-12-03, 1762]' + >>> parse_edtf("[..1760-12-03,1762]") + OneOfASet: '[..1760-12-03, 1762]' * Multiple dates: - >>> parse_edtf('{1667,1668, 1670..1672}') - MultipleDates: '{1667, 1668, 1670..1672}' + >>> parse_edtf('{1667,1668, 1670..1672}') + MultipleDates: '{1667, 1668, 1670..1672}' * Masked precision: - >>> parse_edtf('197x') # A date in the 1970s. - MaskedPrecision: '197x' + >>> parse_edtf('197x') # A date in the 1970s. + MaskedPrecision: '197x' * Level 2 Extended intervals: - >>> parse_edtf('2004-06-(01)~/2004-06-(20)~') - Level2Interval: '2004-06-(01)~/2004-06-(20)~' + >>> parse_edtf('2004-06-(01)~/2004-06-(20)~') + Level2Interval: '2004-06-(01)~/2004-06-(20)~' * Year requiring more than 4 digits - exponential form: - >>> parse_edtf('y-17e7') - ExponentialYear: 'y-17e7' + >>> parse_edtf('y-17e7') + ExponentialYear: 'y-17e7' ### Natural language representation The library includes a basic English natural language parser (it's not yet smart enough to work with occasions such as 'Easter', or in other languages): - >>> from edtf import text_to_edtf + >>> from edtf2 import text_to_edtf >>> text_to_edtf("circa August 1979") '1979-08~' @@ -269,7 +273,7 @@ Because Python's `datetime` module does not support dates out side the range 1 A The `struct_time` representation is more difficult to work with, but can be sorted as-is which is the primary use-case, and can be converted relatively easily to `date` or `datetime` objects (provided the year is within 1 to 9999 AD) or to date objects in more flexible libraries like [astropy.time](http://docs.astropy.org/en/stable/time/index.html) for years outside these bounds. -If you are sure you are working with dates within the range supported by Python's `datetime` module, you can get these more convenient objects using the `edtf.struct_time_to_date` and `edtf.struct_time_to_datetime` functions. +If you are sure you are working with dates within the range supported by Python's `datetime` module, you can get these more convenient objects using the `edtf2.struct_time_to_date` and `edtf2.struct_time_to_datetime` functions. NOTE: This library previously did return `date` and `datetime` objects from methods by default before we switched to `struct_time`. See ticket https://github.com/ixc/python-edtf/issues/26. @@ -287,7 +291,7 @@ In an ascending sort (most recent last), sort by `lower_strict` to get a natural >>> e.lower_strict()[:3] # Show only interesting parts of struct_time (1912, 4, 01) - >>> from edtf import struct_time_to_date + >>> from edtf2 import struct_time_to_date >>> struct_time_to_date(e.lower_strict()) # Convert to date datetime.date(1912, 4, 01) @@ -332,7 +336,7 @@ Two EDTF dates are considered equal if their unicode() representations are the s ## Django ORM field -The `edtf.fields.EDTFField` implements a simple Django field that stores an EDTF object in the database. +The `edtf2.fields.EDTFField` implements a simple Django field that stores an EDTF object in the database. To store a natural language value on your model, define another field, and set the `natural_text_field` parameter of your `EDTFField`. @@ -344,7 +348,7 @@ When your model is saved, the `natural_text_field` value will be parsed to set t Example usage: from django.db import models - from edtf.fields import EDTFField + from edtf2.fields import EDTFField class MyModel(models.Model): date_display = models.CharField( diff --git a/changelog.rst b/changelog.rst index ea5b6fa..c844427 100644 --- a/changelog.rst +++ b/changelog.rst @@ -5,6 +5,25 @@ In development -------------- +5.0.0 (2023-10-04) +------------------ + +* Breaking Changes: Implementation of the newer specifications from `https://www.loc.gov/standards/datetime/`:: + + Differences + This specification differs from the earlier draft as follows: + + - the unspecified date character (formerly lower case ‘u’) is superseded by the character (upper case) 'X'; + - Masked precision is eliminated; + - the uncertain and approximate qualifiers, '?' and '~', when applied together, are combined into a single qualifier character '%'; + - “qualification from the left” is introduced and replaces the grouping mechanism using parentheses; + - the extended interval syntax keywords 'unknown' and 'open' have been replaced with null and the double-dot notation ['..'] respectively; + - the year prefix 'y' and the exponential indicator 'e', both previously lowercase, are now 'Y' and 'E' (uppercase); and + - the significant digit indicator 'p' is now 'S' (uppercase). + +* Renaming of the project to edtf2: As this project seems to have no longer support from the creator `The Interaction Consortium` we decided to fork it and release it under a new name by our own + + 4.0 (2018-05-31) ---------------- diff --git a/edtf/__init__.py b/edtf/__init__.py deleted file mode 100644 index 291cccc..0000000 --- a/edtf/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from edtf.parser.grammar import parse_edtf -from edtf.natlang import text_to_edtf -from edtf.parser.parser_classes import * -from edtf.convert import dt_to_struct_time, struct_time_to_date, \ - struct_time_to_datetime, trim_struct_time, struct_time_to_jd, \ - jd_to_struct_time, old_specs_to_new_specs_expression diff --git a/edtf/parser/__init__.py b/edtf/parser/__init__.py deleted file mode 100644 index e5a0e5f..0000000 --- a/edtf/parser/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from edtf.parser.grammar import parse_edtf -from edtf.parser.parser_classes import * diff --git a/edtf2/__init__.py b/edtf2/__init__.py new file mode 100644 index 0000000..d85a291 --- /dev/null +++ b/edtf2/__init__.py @@ -0,0 +1,6 @@ +from edtf2.parser.grammar import parse_edtf +from edtf2.natlang import text_to_edtf +from edtf2.parser.parser_classes import * +from edtf2.convert import dt_to_struct_time, struct_time_to_date, \ + struct_time_to_datetime, trim_struct_time, struct_time_to_jd, \ + jd_to_struct_time, old_specs_to_new_specs_expression diff --git a/edtf/appsettings.py b/edtf2/appsettings.py similarity index 100% rename from edtf/appsettings.py rename to edtf2/appsettings.py diff --git a/edtf/convert.py b/edtf2/convert.py similarity index 99% rename from edtf/convert.py rename to edtf2/convert.py index f8d070f..f87b301 100644 --- a/edtf/convert.py +++ b/edtf2/convert.py @@ -1,7 +1,7 @@ from time import struct_time from datetime import date, datetime -from edtf import jdutil +from edtf2 import jdutil TIME_EMPTY_TIME = [0, 0, 0] # tm_hour, tm_min, tm_sec diff --git a/edtf/fields.py b/edtf2/fields.py similarity index 97% rename from edtf/fields.py rename to edtf2/fields.py index 52b9171..49d8e4b 100644 --- a/edtf/fields.py +++ b/edtf2/fields.py @@ -6,9 +6,9 @@ from django.db import models from django.core.exceptions import FieldDoesNotExist -from edtf import parse_edtf, EDTFObject -from edtf.natlang import text_to_edtf -from edtf.convert import struct_time_to_date, struct_time_to_jd +from edtf2 import parse_edtf, EDTFObject +from edtf2.natlang import text_to_edtf +from edtf2.convert import struct_time_to_date, struct_time_to_jd DATE_ATTRS = ( 'lower_strict', diff --git a/edtf/jdutil.py b/edtf2/jdutil.py similarity index 100% rename from edtf/jdutil.py rename to edtf2/jdutil.py diff --git a/edtf/natlang/__init__.py b/edtf2/natlang/__init__.py similarity index 100% rename from edtf/natlang/__init__.py rename to edtf2/natlang/__init__.py diff --git a/edtf/natlang/en.py b/edtf2/natlang/en.py similarity index 99% rename from edtf/natlang/en.py rename to edtf2/natlang/en.py index ff83034..394588b 100644 --- a/edtf/natlang/en.py +++ b/edtf2/natlang/en.py @@ -2,7 +2,7 @@ from datetime import datetime from dateutil.parser import parse, ParserError import re -from edtf import appsettings +from edtf2 import appsettings from six.moves import xrange diff --git a/edtf/natlang/tests.py b/edtf2/natlang/tests.py similarity index 99% rename from edtf/natlang/tests.py rename to edtf2/natlang/tests.py index ea137d2..54c025f 100644 --- a/edtf/natlang/tests.py +++ b/edtf2/natlang/tests.py @@ -1,5 +1,5 @@ import unittest -from edtf.natlang.en import text_to_edtf +from edtf2.natlang.en import text_to_edtf # where examples are tuples, the second item is the normalised output EXAMPLES = ( diff --git a/edtf2/parser/__init__.py b/edtf2/parser/__init__.py new file mode 100644 index 0000000..bd24299 --- /dev/null +++ b/edtf2/parser/__init__.py @@ -0,0 +1,2 @@ +from edtf2.parser.grammar import parse_edtf +from edtf2.parser.parser_classes import * diff --git a/edtf/parser/edtf_exceptions.py b/edtf2/parser/edtf_exceptions.py similarity index 100% rename from edtf/parser/edtf_exceptions.py rename to edtf2/parser/edtf_exceptions.py diff --git a/edtf/parser/grammar.py b/edtf2/parser/grammar.py similarity index 98% rename from edtf/parser/grammar.py rename to edtf2/parser/grammar.py index 1392180..047b4a6 100644 --- a/edtf/parser/grammar.py +++ b/edtf2/parser/grammar.py @@ -2,13 +2,13 @@ ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, Group # (* ************************** Level 0 *************************** *) -from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ +from edtf2.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ UncertainOrApproximate, Level1Interval, LongYear, Season, \ PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ MaskedPrecision, Level2Interval, ExponentialYear, Level2Season -from edtf.parser.edtf_exceptions import EDTFParseException +from edtf2.parser.edtf_exceptions import EDTFParseException oneThru12 = oneOf(['%.2d' % i for i in range(1, 13)]) oneThru13 = oneOf(['%.2d' % i for i in range(1, 14)]) diff --git a/edtf2/parser/grammar_test.py b/edtf2/parser/grammar_test.py new file mode 100644 index 0000000..e43c8fa --- /dev/null +++ b/edtf2/parser/grammar_test.py @@ -0,0 +1,296 @@ +from pyparsing import Literal as L, ParseException, Optional, Opt, OneOrMore, \ + ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, FollowedBy + +# (* ************************** Level 0 *************************** *) +from edtf2.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ + UncertainOrApproximate, Level1Interval, LongYear, Season, \ + PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ + Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ + MaskedPrecision, Level2Interval, ExponentialYear, UnspecifiedIntervalSection, Testi + +from edtf2.parser.edtf_exceptions import EDTFParseException + +oneThru12 = oneOf(['%.2d' % i for i in range(1, 13)]) +oneThru13 = oneOf(['%.2d' % i for i in range(1, 14)]) +oneThru23 = oneOf(['%.2d' % i for i in range(1, 24)]) +zeroThru23 = oneOf(['%.2d' % i for i in range(0, 24)]) +oneThru29 = oneOf(['%.2d' % i for i in range(1, 30)]) +oneThru30 = oneOf(['%.2d' % i for i in range(1, 31)]) +oneThru31 = oneOf(['%.2d' % i for i in range(1, 32)]) +oneThru59 = oneOf(['%.2d' % i for i in range(1, 60)]) +zeroThru59 = oneOf(['%.2d' % i for i in range(0, 60)]) + +positiveDigit = Word(nums, exact=1, excludeChars='0') +digit = Word(nums, exact=1) + +second = zeroThru59 +minute = zeroThru59 +hour = zeroThru23 +day = oneThru31("day") + +month = oneThru12("month") +monthDay = ( + (oneOf("01 03 05 07 08 10 12")("month") + "-" + oneThru31("day")) + ^ (oneOf("04 06 09 11")("month") + "-" + oneThru30("day")) + ^ (L("02")("month") + "-" + oneThru29("day")) +) + +# 4 digits, 0 to 9 +positiveYear = Word(nums, exact=4) + +# Negative version of positive year, but "-0000" is illegal +negativeYear = NotAny(L("-0000")) + ("-" + positiveYear) + +year = Combine(positiveYear ^ negativeYear)("year") + +yearMonth = year + "-" + month +yearMonthDay = year + "-" + monthDay # o hai iso date + +date = Combine(year ^ yearMonth ^ yearMonthDay)("date") +Date.set_parser(date) + +zoneOffsetHour = oneThru13 +zoneOffset = L("Z") \ + ^ (Regex("[+-]") + + (zoneOffsetHour + Optional(":" + minute) + ^ L("14:00") + ^ ("00:" + oneThru59) + ) + ) + +baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00") + +time = Combine(baseTime + Optional(zoneOffset))("time") + +dateAndTime = date + "T" + time +DateAndTime.set_parser(dateAndTime) + +l0Interval = date("lower") + "/" + date("upper") +Interval.set_parser(l0Interval) + +level0Expression = date ^ dateAndTime ^ l0Interval + + +# (* ************************** Level 1 *************************** *) + +# (* ** Auxiliary Assignments for Level 1 ** *) +UASymbol = Combine(oneOf("? ~ %")) +UA.set_parser(UASymbol) + +seasonNumber = oneOf("21 22 23 24") + +# (* *** Season (unqualified) *** *) +season = year + "-" + seasonNumber("season") +Season.set_parser(season) + +dateOrSeason = date("") ^ season + +# (* *** Long Year - Simple Form *** *) + +longYearSimple = "Y" + Combine( + Optional("-") + positiveDigit + digit + digit + digit + OneOrMore(digit) +)("year") +LongYear.set_parser(longYearSimple) + +# (* *** L1Interval *** *) +uaDateOrSeason = dateOrSeason + Optional(UASymbol) + + +#unspecifiedIntervalSec = L('..')('unknownOrOpen') + FollowedBy(L("/") + uaDateOrSeason)('other_section_element') +#Testi.set_parser(unspecifiedIntervalSec) + +# bit of a kludge here to get the all the relevant tokens into the parse action +# cleanly otherwise the parameter names are overlapped. +def f(toks): + try: + return {'date': toks[0], 'ua': toks[1]} + except IndexError: + return {'date': toks[0], 'ua': None} + + +l1Start = '..' ^ uaDateOrSeason +#l1Start = unspecifiedIntervalSec ^ uaDateOrSeason +l1Start.addParseAction(f) +l1End = uaDateOrSeason ^ '..' +l1End.addParseAction(f) + +#level1Interval = l1Start("lower") + "/" + l1End("upper") +level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") \ + ^ l1Start("lower") + "/" + Optional(l1End("upper")) +Level1Interval.set_parser(level1Interval) + +# (* *** unspecified *** *) +yearWithOneOrTwoUnspecifedDigits = Combine( + digit + digit + (digit ^ 'X') + 'X' +)("year") +monthUnspecified = year + "-" + L("XX")("month") +dayUnspecified = yearMonth + "-" + L("XX")("day") +dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") + +unspecified = yearWithOneOrTwoUnspecifedDigits \ + ^ monthUnspecified \ + ^ dayUnspecified \ + ^ dayAndMonthUnspecified +Unspecified.set_parser(unspecified) + +# (* *** uncertainOrApproxDate *** *) + +uncertainOrApproxDate = date('date') + UASymbol("ua") +UncertainOrApproximate.set_parser(uncertainOrApproxDate) + +level1Expression = uncertainOrApproxDate \ + ^ unspecified \ + ^ level1Interval \ + ^ longYearSimple \ + ^ season + +# (* ************************** Level 2 *************************** *) + +# (* ** Internal Unspecified** *) + +digitOrU = Word(nums + 'X', exact=1) + +# 2-digit day with at least one 'X' present +dayWithU = Combine( + ("X" + digitOrU) + ^ (digitOrU + 'X') +)("day") + +# 2-digit month with at least one 'X' present +monthWithU = Combine( + oneOf("0X 1X") + ^ ("X" + digitOrU) +)("month") + +# 4-digit year with at least one 'X' present +yearWithU = Combine( + ('X' + digitOrU + digitOrU + digitOrU) + ^ (digitOrU + 'X' + digitOrU + digitOrU) + ^ (digitOrU + digitOrU + 'X' + digitOrU) + ^ (digitOrU + digitOrU + digitOrU + 'X') +)("year") + +yearMonthWithU = ( + (Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) + ^ (yearWithU + "-" + month) +) + +monthDayWithU = ( + (Combine(month("") ^ monthWithU(""))("month") + "-" + dayWithU) + ^ (monthWithU + "-" + day) +) + +yearMonthDayWithU = ( + (yearWithU + "-" + Combine(month("") ^ monthWithU(""))("month") + "-" + Combine(day("") ^ dayWithU(""))("day")) + ^ (year + "-" + monthWithU + "-" + Combine(day("") ^ dayWithU(""))("day")) + ^ (year + "-" + month + "-" + dayWithU) +) + +partialUnspecified = yearWithU ^ yearMonthWithU ^ yearMonthDayWithU +PartialUnspecified.set_parser(partialUnspecified) + +# (* ** Internal Uncertain or Approximate** *) + +# this line is out of spec, but the given examples (e.g. '(2004)?-06-04~') +# appear to require it. +year_with_brackets = year ^ ("(" + year + ")") + +# second clause below needed Optional() around the "year_ua" UASymbol, for dates +# like '(2011)-06-04~' to work. + +IUABase = \ + (year_with_brackets + UASymbol("year_ua") + "-" + month + Optional("-(" + day + ")" + UASymbol("day_ua"))) \ + ^ (year_with_brackets + Optional(UASymbol)("year_ua") + "-" + monthDay + Optional(UASymbol)("month_day_ua")) \ + ^ ( + year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") + + Optional("-(" + day + ")" + UASymbol("day_ua")) + ) \ + ^ ( + year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") + + Optional("-" + day) + ) \ + ^ (yearMonth + UASymbol("year_month_ua") + "-(" + day + ")" + UASymbol("day_ua")) \ + ^ (yearMonth + UASymbol("year_month_ua") + "-" + day) \ + ^ (yearMonth + "-(" + day + ")" + UASymbol("day_ua")) \ + ^ (year + "-(" + monthDay + ")" + UASymbol("month_day_ua")) \ + ^ (season("ssn") + UASymbol("season_ua")) + +partialUncertainOrApproximate = IUABase ^ ("(" + IUABase + ")" + UASymbol("all_ua")) +PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) + +dateWithInternalUncertainty = partialUncertainOrApproximate \ + ^ partialUnspecified + +qualifyingString = Regex(r'\S') # any nonwhitespace char + +# (* ** SeasonQualified ** *) +seasonQualifier = qualifyingString +seasonQualified = season + "^" + seasonQualifier + +# (* ** Long Year - Scientific Form ** *) +positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) +longYearScientific = "Y" + Combine(Optional("-") + positiveInteger)("base") + "E" + \ + positiveInteger("exponent") + Optional("S" + positiveInteger("precision")) +ExponentialYear.set_parser(longYearScientific) + +# (* ** level2Interval ** *) +level2Interval = (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) \ + ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) \ + ^ (dateWithInternalUncertainty("lower") + "/" + dateWithInternalUncertainty("upper")) +Level2Interval.set_parser(level2Interval) + +# (* ** Masked precision ** *) eliminated in latest specs +# maskedPrecision = Combine(digit + digit + ((digit + "x") ^ "xx"))("year") +# MaskedPrecision.set_parser(maskedPrecision) + +# (* ** Inclusive list and choice list** *) +consecutives = (yearMonthDay("lower") + ".." + yearMonthDay("upper")) \ + ^ (yearMonth("lower") + ".." + yearMonth("upper")) \ + ^ (year("lower") + ".." + year("upper")) +Consecutives.set_parser(consecutives) + +listElement = date \ + ^ dateWithInternalUncertainty \ + ^ uncertainOrApproxDate \ + ^ unspecified \ + ^ consecutives + +earlier = ".." + date("upper") +EarlierConsecutives.set_parser(earlier) +later = date("lower") + ".." +LaterConsecutives.set_parser(later) + +listContent = (earlier + ZeroOrMore("," + listElement)) \ + ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) \ + ^ (listElement + OneOrMore("," + listElement)) \ + ^ consecutives + +choiceList = "[" + listContent + "]" +OneOfASet.set_parser(choiceList) + +inclusiveList = "{" + listContent + "}" +MultipleDates.set_parser(inclusiveList) + +level2Expression = partialUncertainOrApproximate \ + ^ partialUnspecified \ + ^ choiceList \ + ^ inclusiveList \ + ^ level2Interval \ + ^ longYearScientific \ + ^ seasonQualified + +# putting it all together +edtfParser = level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") + + +def parse_edtf(str, parseAll=True, fail_silently=False): + try: + if not str: + raise ParseException("You must supply some input text") + p = edtfParser.parseString(str.strip(), parseAll) + if p: + return p[0] + except ParseException as e: + if fail_silently: + return None + raise EDTFParseException(e) diff --git a/edtf/parser/parser_classes.py b/edtf2/parser/parser_classes.py similarity index 97% rename from edtf/parser/parser_classes.py rename to edtf2/parser/parser_classes.py index 993c562..36aca5a 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf2/parser/parser_classes.py @@ -6,8 +6,8 @@ import math from dateutil.relativedelta import relativedelta -from edtf import appsettings -from edtf.convert import dt_to_struct_time, trim_struct_time, \ +from edtf2 import appsettings +from edtf2.convert import dt_to_struct_time, trim_struct_time, \ TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS EARLIEST = 'earliest' @@ -452,26 +452,12 @@ def _strict_date(self, lean): return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: return -math.inf - return struct_time( - ( - -math.inf, - 1, - 1, - ) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) - ) else: if self.is_unknown: lower = self.other._strict_date(EARLIEST) return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) else: return math.inf - return struct_time( - ( - math.inf, - 12, - 31, - ) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) - ) @property def precision(self): diff --git a/edtf2/parser/parser_classes_tests.py b/edtf2/parser/parser_classes_tests.py new file mode 100644 index 0000000..4c916c0 --- /dev/null +++ b/edtf2/parser/parser_classes_tests.py @@ -0,0 +1,792 @@ +import calendar +import re +from time import struct_time +from datetime import date, datetime +from operator import add, sub + +from dateutil.relativedelta import relativedelta + +from edtf2 import appsettings +from edtf2.convert import dt_to_struct_time, trim_struct_time, \ + TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS + +EARLIEST = 'earliest' +LATEST = 'latest' + +PRECISION_MILLENIUM = "millenium" +PRECISION_CENTURY = "century" +PRECISION_DECADE = "decade" +PRECISION_YEAR = "year" +PRECISION_MONTH = "month" +PRECISION_SEASON = "season" +PRECISION_DAY = "day" + + +def days_in_month(year, month): + """ + Return the number of days in the given year and month, where month is + 1=January to 12=December, and respecting leap years as identified by + `calendar.isleap()` + """ + return { + 1: 31, + 2: 29 if calendar.isleap(year) else 28, + 3: 31, + 4: 30, + 5: 31, + 6: 30, + 7: 31, + 8: 31, + 9: 30, + 10: 31, + 11: 30, + 12: 31, + }[month] + + +def apply_delta(op, time_struct, delta): + """ + Apply a `relativedelta` to a `struct_time` data structure. + + `op` is an operator function, probably always `add` or `sub`tract to + correspond to `a_date + a_delta` and `a_date - a_delta`. + + This function is required because we cannot use standard `datetime` module + objects for conversion when the date/time is, or will become, outside the + boundary years 1 AD to 9999 AD. + """ + if not delta: + return time_struct # No work to do + + try: + dt_result = op(datetime(*time_struct[:6]), delta) + return dt_to_struct_time(dt_result) + except (OverflowError, ValueError): + # Year is not within supported 1 to 9999 AD range + pass + + # Here we fake the year to one in the acceptable range to avoid having to + # write our own date rolling logic + + # Adjust the year to be close to the 2000 millenium in 1,000 year + # increments to try and retain accurate relative leap years + actual_year = time_struct.tm_year + millenium = int(float(actual_year) / 1000) + millenium_diff = (2 - millenium) * 1000 + adjusted_year = actual_year + millenium_diff + # Apply delta to the date/time with adjusted year + dt = datetime(*(adjusted_year,) + time_struct[1:6]) + dt_result = op(dt, delta) + # Convert result year back to its original millenium + final_year = dt_result.year - millenium_diff + return struct_time( + (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS)) + + +class EDTFObject(object): + """ + Object to attact to a parser to become instantiated when the parser + completes. + """ + parser = None + + @classmethod + def set_parser(cls, p): + cls.parser = p + p.addParseAction(cls.parse_action) + + @classmethod + def parse_action(cls, toks): + kwargs = toks.asDict() + try: + return cls(**kwargs) # replace the token list with the class + except Exception as e: + print("trying to %s.__init__(**%s)" % (cls.__name__, kwargs)) + raise e + + @classmethod + def parse(cls, s): + return cls.parser.parseString(s)[0] + + def __repr__(self): + return "%s: '%s'" % (type(self).__name__, str(self)) + + def __init__(self, *args, **kwargs): + str = "%s.__init__(*%s, **%s)" % ( + type(self).__name__, + args, kwargs, + ) + raise NotImplementedError("%s is not implemented." % str) + + def __str__(self): + raise NotImplementedError + + def _strict_date(self, lean): + raise NotImplementedError + + def lower_strict(self): + return self._strict_date(lean=EARLIEST) + + def upper_strict(self): + return self._strict_date(lean=LATEST) + + def _get_fuzzy_padding(self, lean): + """ + Subclasses should override this to pad based on how precise they are. + """ + return relativedelta(0) + + def get_is_approximate(self): + return getattr(self, '_is_approximate', False) + + def set_is_approximate(self, val): + self._is_approximate = val + is_approximate = property(get_is_approximate, set_is_approximate) + + def get_is_uncertain(self): + return getattr(self, '_is_uncertain', False) + + def set_is_uncertain(self, val): + self._is_uncertain = val + is_uncertain = property(get_is_uncertain, set_is_uncertain) + + def get_is_uncertain_and_approximate(self): + return getattr(self, '_uncertain_and_approximate', False) + + def set_is_uncertain_and_approximate(self, val): + self._uncertain_and_approximate = val + is_uncertain_and_approximate = property(get_is_uncertain_and_approximate, set_is_uncertain_and_approximate) + + def lower_fuzzy(self): + strict_val = self.lower_strict() + return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + + def upper_fuzzy(self): + strict_val = self.upper_strict() + return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + + def __eq__(self, other): + if isinstance(other, EDTFObject): + return str(self) == str(other) + elif isinstance(other, date): + return str(self) == other.isoformat() + elif isinstance(other, struct_time): + return self._strict_date() == trim_struct_time(other) + return False + + def __ne__(self, other): + if isinstance(other, EDTFObject): + return str(self) != str(other) + elif isinstance(other, date): + return str(self) != other.isoformat() + elif isinstance(other, struct_time): + return self._strict_date() != trim_struct_time(other) + return True + + def __gt__(self, other): + if isinstance(other, EDTFObject): + return self.lower_strict() > other.lower_strict() + elif isinstance(other, date): + return self.lower_strict() > dt_to_struct_time(other) + elif isinstance(other, struct_time): + return self.lower_strict() > trim_struct_time(other) + raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + + def __ge__(self, other): + if isinstance(other, EDTFObject): + return self.lower_strict() >= other.lower_strict() + elif isinstance(other, date): + return self.lower_strict() >= dt_to_struct_time(other) + elif isinstance(other, struct_time): + return self.lower_strict() >= trim_struct_time(other) + raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + + def __lt__(self, other): + if isinstance(other, EDTFObject): + return self.lower_strict() < other.lower_strict() + elif isinstance(other, date): + return self.lower_strict() < dt_to_struct_time(other) + elif isinstance(other, struct_time): + return self.lower_strict() < trim_struct_time(other) + raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + + def __le__(self, other): + if isinstance(other, EDTFObject): + return self.lower_strict() <= other.lower_strict() + elif isinstance(other, date): + return self.lower_strict() <= dt_to_struct_time(other) + elif isinstance(other, struct_time): + return self.lower_strict() <= trim_struct_time(other) + raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + + +# (* ************************** Level 0 *************************** *) + +class Date(EDTFObject): + + def set_year(self, y): + if y is None: + raise AttributeError("Year must not be None") + self._year = y + + def get_year(self): + return self._year + year = property(get_year, set_year) + + def set_month(self, m): + self._month = m + if m == None: + self.day = None + + def get_month(self): + return self._month + month = property(get_month, set_month) + + def __init__(self, year=None, month=None, day=None, **kwargs): + for param in ('date', 'lower', 'upper'): + if param in kwargs: + self.__init__(**kwargs[param]) + return + + self.year = year # Year is required, but sometimes passed in as a 'date' dict. + self.month = month + self.day = day + + def __str__(self): + r = self.year + if self.month: + r += "-%s" % self.month + if self.day: + r += "-%s" % self.day + return r + + def isoformat(self, default=date.max): + return "%s-%02d-%02d" % ( + self.year, + int(self.month or default.month), + int(self.day or default.day), + ) + + def _precise_year(self, lean): + # Replace any ambiguous characters in the year string with 0s or 9s + if lean == EARLIEST: + return int(re.sub(r'X', r'0', self.year)) + else: + return int(re.sub(r'X', r'9', self.year)) + + def _precise_month(self, lean): + if self.month and self.month != "XX": + try: + return int(self.month) + except ValueError as e: + raise ValueError("Couldn't convert %s to int (in %s)" % (self.month, self)) + else: + return 1 if lean == EARLIEST else 12 + + def _precise_day(self, lean): + if not self.day or self.day == "XX": + if lean == EARLIEST: + return 1 + else: + return days_in_month( + self._precise_year(LATEST), self._precise_month(LATEST) + ) + else: + return int(self.day) + + def _strict_date(self, lean): + """ + Return a `time.struct_time` representation of the date. + """ + return struct_time( + ( + self._precise_year(lean), + self._precise_month(lean), + self._precise_day(lean), + ) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) + ) + + @property + def precision(self): + if self.day: + return PRECISION_DAY + if self.month: + return PRECISION_MONTH + return PRECISION_YEAR + + +class DateAndTime(EDTFObject): + def __init__(self, date, time): + self.date = date + self.time = time + + def __str__(self): + return self.isoformat() + + def isoformat(self): + return self.date.isoformat() + "T" + self.time + + def _strict_date(self, lean): + return self.date._strict_date(lean) + + def __eq__(self, other): + if isinstance(other, datetime): + return self.isoformat() == other.isoformat() + elif isinstance(other, struct_time): + return self._strict_date() == trim_struct_time(other) + return super(DateAndTime, self).__eq__(other) + + def __ne__(self, other): + if isinstance(other, datetime): + return self.isoformat() != other.isoformat() + elif isinstance(other, struct_time): + return self._strict_date() != trim_struct_time(other) + return super(DateAndTime, self).__ne__(other) + + +class Interval(EDTFObject): + def __init__(self, lower, upper): + self.lower = lower + self.upper = upper + + def __str__(self): + return "%s/%s" % (self.lower, self.upper) + + def _strict_date(self, lean): + if lean == EARLIEST: + try: + r = self.lower._strict_date(lean) + if r is None: + raise AttributeError + return r + except AttributeError: # it's a string, or no date. Result depends on the upper date + upper = self.upper._strict_date(LATEST) + return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) + else: + try: + r = self.upper._strict_date(lean) + if r is None: + raise AttributeError + return r + except AttributeError: # an 'unknown' or 'open' string - depends on the lower date + import pdb; pdb.set_trace() + if self.upper and (self.upper == "open" or self.upper.date == "open"): + return dt_to_struct_time(date.today()) # it's still happening + else: + lower = self.lower._strict_date(EARLIEST) + return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) + + +# (* ************************** Level 1 *************************** *) + + +class UA(EDTFObject): + @classmethod + def parse_action(cls, toks): + args = toks.asList() + return cls(*args) + + def __init__(self, *args): + assert len(args) == 1 + ua = args[0] + + self.is_uncertain = "?" in ua + self.is_approximate = "~" in ua + self.is_uncertain_and_approximate = "%" in ua + + def __str__(self): + d = "" + if self.is_uncertain: + d += "?" + if self.is_approximate: + d += "~" + if self.is_uncertain_and_approximate: + d += "%" + return d + + def _get_multiplier(self): + if self.is_uncertain_and_approximate: + return appsettings.MULTIPLIER_IF_BOTH + elif self.is_uncertain: + return appsettings.MULTIPLIER_IF_UNCERTAIN + elif self.is_approximate: + return appsettings.MULTIPLIER_IF_APPROXIMATE + + +class UncertainOrApproximate(EDTFObject): + def __init__(self, date, ua): + self.date = date + self.ua = ua + + def __str__(self): + if self.ua: + return "%s%s" % (self.date, self.ua) + else: + return str(self.date) + + def _strict_date(self, lean): + if self.date == "open": + return None # depends on the other date + return dt_to_struct_time(date.today()) + if self.date =="unknown": + return None # depends on the other date + return self.date._strict_date(lean) + + def _get_fuzzy_padding(self, lean): + if not self.ua: + return relativedelta(0) + multiplier = self.ua._get_multiplier() + + if self.date.precision == PRECISION_DAY: + return multiplier * appsettings.PADDING_DAY_PRECISION + elif self.date.precision == PRECISION_MONTH: + return multiplier * appsettings.PADDING_MONTH_PRECISION + elif self.date.precision == PRECISION_YEAR: + return multiplier * appsettings.PADDING_YEAR_PRECISION + + + +class Testi(EDTFObject): + # @classmethod + # def parse_action(cls, toks): + # args = toks.asList() + # return cls(*args) + + def __init__(self, **args): + print(args) + +class UnspecifiedIntervalSection(EDTFObject): + + def __init__(self, sectionOpen=False, other_section_element=None): + if sectionOpen: + self.is_open = True + self.is_unknown = False + else: + self.is_open = False + self.is_unknown = True + self.other = other_section_element + + def __str__(self): + if self.is_unknown: + return "" + else: + return ".." + + def _strict_date(self, lean): + #import pdb; pdb.set_trace() + if lean == EARLIEST: + if self.is_unknown: + upper = self.other._strict_date(LATEST) + return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) + else: + return dt_to_struct_time(date.min) # from the beginning of time; *ahem, i mean python datetime + else: + if self.is_unknown: + lower = self.other._strict_date(EARLIEST) + return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) + else: + return dt_to_struct_time(date.max) # to then end of python datetime + + +class Unspecified(Date): + pass + + +class Level1Interval(Interval): + def __init__(self, lower=None, upper=None): + #import pdb; pdb.set_trace() + if lower: + if lower['date'] == '..': + self.lower = UnspecifiedIntervalSection(True, UncertainOrApproximate(**upper)) + else: + self.lower = UncertainOrApproximate(**lower) + else: + self.lower = UnspecifiedIntervalSection(False, UncertainOrApproximate(**upper)) + if upper: + if upper['date'] == '..': + self.upper = UnspecifiedIntervalSection(True, UncertainOrApproximate(**lower)) + else: + self.upper = UncertainOrApproximate(**upper) + else: + self.upper = UnspecifiedIntervalSection(False, UncertainOrApproximate(**lower)) + + def _get_fuzzy_padding(self, lean): + if lean == EARLIEST: + return self.lower._get_fuzzy_padding(lean) + elif lean == LATEST: + return self.upper._get_fuzzy_padding(lean) + + +class LongYear(EDTFObject): + def __init__(self, year): + self.year = year + + def __str__(self): + return "Y%s" % self.year + + def _precise_year(self): + return int(self.year) + + def _strict_date(self, lean): + py = self._precise_year() + if lean == EARLIEST: + return struct_time( + [py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + else: + return struct_time( + [py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + +class Season(Date): + def __init__(self, year, season, **kwargs): + self.year = year + self.season = season # use season to look up month + # day isn't part of the 'season' spec, but it helps the inherited + # `Date` methods do their thing. + self.day = None + + def __str__(self): + return "%s-%s" % (self.year, self.season) + + def _precise_month(self, lean): + rng = appsettings.SEASON_MONTHS_RANGE[int(self.season)] + if lean == EARLIEST: + return rng[0] + else: + return rng[1] + + +# (* ************************** Level 2 *************************** *) + + +class PartialUncertainOrApproximate(Date): + + def set_year(self, y): # Year can be None. + self._year = y + year = property(Date.get_year, set_year) + + def __init__( + self, year=None, month=None, day=None, + year_ua=False, month_ua = False, day_ua = False, + year_month_ua = False, month_day_ua = False, + ssn=None, season_ua=False, all_ua=False + ): + self.year = year + self.month = month + self.day = day + + self.year_ua = year_ua + self.month_ua = month_ua + self.day_ua = day_ua + + self.year_month_ua = year_month_ua + self.month_day_ua = month_day_ua + + self.season = ssn + self.season_ua = season_ua + + self.all_ua = all_ua + + def __str__(self): + + if self.season_ua: + return "%s%s" % (self.season, self.season_ua) + + if self.year_ua: + y = "%s%s" % (self.year, self.year_ua) + else: + y = str(self.year) + + if self.month_ua: + m = "(%s)%s" % (self.month, self.month_ua) + else: + m = str(self.month) + + if self.day: + if self.day_ua: + d = "(%s)%s" % (self.day, self.day_ua) + else: + d = str(self.day) + else: + d = None + + if self.year_month_ua: # year/month approximate. No brackets needed. + ym = "%s-%s%s" % (y, m, self.year_month_ua) + if d: + result = "%s-%s" % (ym, d) + else: + result = ym + elif self.month_day_ua: + if self.year_ua: # we don't need the brackets round month and day + result = "%s-%s-%s%s" % (y, m, d, self.month_day_ua) + else: + result = "%s-(%s-%s)%s" % (y, m, d, self.month_day_ua) + else: + if d: + result = "%s-%s-%s" % (y, m, d) + else: + result = "%s-%s" % (y, m) + + if self.all_ua: + result = "(%s)%s" % (result, self.all_ua) + + return result + + def _precise_year(self, lean): + if self.season: + return self.season._precise_year(lean) + return super(PartialUncertainOrApproximate, self)._precise_year(lean) + + def _precise_month(self, lean): + if self.season: + return self.season._precise_month(lean) + return super(PartialUncertainOrApproximate, self)._precise_month(lean) + + def _precise_day(self, lean): + if self.season: + return self.season._precise_day(lean) + return super(PartialUncertainOrApproximate, self)._precise_day(lean) + + def _get_fuzzy_padding(self, lean): + """ + This is not a perfect interpretation as fuzziness is introduced for + redundant uncertainly modifiers e.g. (2006~)~ will get two sets of + fuzziness. + """ + result = relativedelta(0) + + if self.year_ua: + result += appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + if self.month_ua: + result += appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() + if self.day_ua: + result += appsettings.PADDING_DAY_PRECISION * self.day_ua._get_multiplier() + + if self.year_month_ua: + result += appsettings.PADDING_YEAR_PRECISION * self.year_month_ua._get_multiplier() + result += appsettings.PADDING_MONTH_PRECISION * self.year_month_ua._get_multiplier() + if self.month_day_ua: + result += appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() + result += appsettings.PADDING_MONTH_PRECISION * self.month_day_ua._get_multiplier() + + if self.season_ua: + result += appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() + + if self.all_ua: + multiplier = self.all_ua._get_multiplier() + + if self.precision == PRECISION_DAY: + result += multiplier * appsettings.PADDING_DAY_PRECISION + result += multiplier * appsettings.PADDING_MONTH_PRECISION + result += multiplier * appsettings.PADDING_YEAR_PRECISION + elif self.precision == PRECISION_MONTH: + result += multiplier * appsettings.PADDING_MONTH_PRECISION + result += multiplier * appsettings.PADDING_YEAR_PRECISION + elif self.precision == PRECISION_YEAR: + result += multiplier * appsettings.PADDING_YEAR_PRECISION + + return result + + +class PartialUnspecified(Unspecified): + pass + + +class Consecutives(Interval): + # Treating Consecutive ranges as intervals where one bound is optional + def __init__(self, lower=None, upper=None): + if lower and not isinstance(lower, EDTFObject): + self.lower = Date.parse(lower) + else: + self.lower = lower + + if upper and not isinstance(upper, EDTFObject): + self.upper = Date.parse(upper) + else: + self.upper = upper + + def __str__(self): + return "%s..%s" % (self.lower or '', self.upper or '') + + +class EarlierConsecutives(Consecutives): + pass + + +class LaterConsecutives(Consecutives): + pass + + +class OneOfASet(EDTFObject): + @classmethod + def parse_action(cls, toks): + args = [t for t in toks.asList() if isinstance(t, EDTFObject)] + return cls(*args) + + def __init__(self, *args): + self.objects = args + + def __str__(self): + return "[%s]" % (", ".join([str(o) for o in self.objects])) + + def _strict_date(self, lean): + if lean == LATEST: + return max([x._strict_date(lean) for x in self.objects]) + else: + return min([x._strict_date(lean) for x in self.objects]) + + +class MultipleDates(EDTFObject): + @classmethod + def parse_action(cls, toks): + args = [t for t in toks.asList() if isinstance(t, EDTFObject)] + return cls(*args) + + def __init__(self, *args): + self.objects = args + + def __str__(self): + return "{%s}" % (", ".join([str(o) for o in self.objects])) + + def _strict_date(self, lean): + if lean == LATEST: + return max([x._strict_date(lean) for x in self.objects]) + else: + return min([x._strict_date(lean) for x in self.objects]) + + +class MaskedPrecision(Date): + pass + + +class Level2Interval(Level1Interval): + def __init__(self, lower, upper): + # Check whether incoming lower/upper values are single-item lists, and + # if so take just the first item. This works around what I *think* is a + # bug in the grammer that provides us with single-item lists of + # `PartialUncertainOrApproximate` items for lower/upper values. + if isinstance(lower, (tuple, list)) and len(lower) == 1: + self.lower = lower[0] + else: + self.lower = lower + if isinstance(lower, (tuple, list)) and len(upper) == 1: + self.upper = upper[0] + else: + self.upper = upper + + +class ExponentialYear(LongYear): + def __init__(self, base, exponent, precision=None): + self.base = base + self.exponent = exponent + self.precision = precision + + def _precise_year(self): + return int(self.base) * 10 ** int(self.exponent) + + def get_year(self): + if self.precision: + return '%sE%sS%s' % (self.base, self.exponent, self.precision) + else: + return '%sE%s' % (self.base, self.exponent) + year = property(get_year) diff --git a/edtf/parser/tests.py b/edtf2/parser/tests.py similarity index 98% rename from edtf/parser/tests.py rename to edtf2/parser/tests.py index 55fa288..4c376d9 100644 --- a/edtf/parser/tests.py +++ b/edtf2/parser/tests.py @@ -3,10 +3,10 @@ from datetime import date from time import struct_time -from edtf.parser.grammar import parse_edtf as parse -from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \ +from edtf2.parser.grammar import parse_edtf as parse +from edtf2.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \ TIME_EMPTY_EXTRAS -from edtf.parser.edtf_exceptions import EDTFParseException +from edtf2.parser.edtf_exceptions import EDTFParseException # Example object types and attributes. # the first item in each tuple is the input EDTF string, and expected parse result. diff --git a/edtf/tests.py b/edtf2/tests.py similarity index 99% rename from edtf/tests.py rename to edtf2/tests.py index 0e49e67..f48abbd 100644 --- a/edtf/tests.py +++ b/edtf2/tests.py @@ -3,7 +3,7 @@ from time import struct_time from datetime import datetime, date -from edtf import convert +from edtf2 import convert class TestConversions(unittest.TestCase): diff --git a/setup.py b/setup.py index 1636957..641711d 100644 --- a/setup.py +++ b/setup.py @@ -3,16 +3,19 @@ import setuptools import sys +version = '5.0.0' + def readme(): with open('README.md') as f: return f.read() setuptools.setup( - name='edtf', - use_scm_version={'version_scheme': 'post-release'}, - url='https://github.com/ixc/python-edtf', - author='Greg Turner', - author_email='greg@interaction.net.au', + name='edtf2', + version=version, + #use_scm_version={'version_scheme': 'post-release'}, + url='https://github.com/saw-leipzig/python-edtf', + author='Sabine Müller', + author_email='muellers@saw-leipzig.de', description='Python implementation of Library of Congress EDTF (Extended ' 'Date Time Format) specification', long_description=readme(), @@ -39,7 +42,6 @@ def readme(): 'edtf', ], classifiers=[ - 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'Intended Audience :: End Users/Desktop', 'License :: OSI Approved :: MIT License', diff --git a/vagrant wheel install problems.txt b/vagrant wheel install problems.txt new file mode 100644 index 0000000..174f67e --- /dev/null +++ b/vagrant wheel install problems.txt @@ -0,0 +1,5 @@ +vagrant wheel install problems +https://stackoverflow.com/questions/56851961/how-to-fix-no-such-file-or-directory-error-in-setuptools-wheel-py157-convert + +from that link: +So it turns out that this problem was being caused by lag in Vagrant/Virtualbox's synced folders. I was trying to build the Python project inside a Vagrant VM shared from the host file system using a synced folder. Copying the project out of the synced folder into another folder in the VM allows it to build. Another dirty hack that worked was to add a time.sleep(1) in the setuptools/wheel.py source file on line 157 before the os.rename that was causing the OS Exception to be raised. This gives the file system a chance to sync, and therefore works around the issue. \ No newline at end of file From a095fc197aab890279a330607da843b74caf3f11 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Sun, 5 May 2024 13:40:42 +1000 Subject: [PATCH 020/102] Revert name change from 43bc40 and e03739 back to `edtf` --- .travis.yml | 2 +- README.md | 20 +++++++++---------- changelog.rst | 5 +++++ edtf/__init__.py | 6 ++++++ {edtf2 => edtf}/appsettings.py | 0 {edtf2 => edtf}/convert.py | 2 +- {edtf2 => edtf}/fields.py | 6 +++--- {edtf2 => edtf}/jdutil.py | 0 {edtf2 => edtf}/natlang/__init__.py | 0 {edtf2 => edtf}/natlang/en.py | 2 +- {edtf2 => edtf}/natlang/tests.py | 4 ++-- edtf/parser/__init__.py | 2 ++ {edtf2 => edtf}/parser/edtf_exceptions.py | 0 {edtf2 => edtf}/parser/grammar.py | 8 ++++---- {edtf2 => edtf}/parser/grammar_test.py | 6 +++--- {edtf2 => edtf}/parser/parser_classes.py | 6 +++--- .../parser/parser_classes_tests.py | 4 ++-- {edtf2 => edtf}/parser/tests.py | 14 ++++++------- {edtf2 => edtf}/tests.py | 2 +- edtf2/__init__.py | 6 ------ edtf2/parser/__init__.py | 2 -- setup.py | 8 ++++---- 22 files changed, 55 insertions(+), 50 deletions(-) create mode 100644 edtf/__init__.py rename {edtf2 => edtf}/appsettings.py (100%) rename {edtf2 => edtf}/convert.py (99%) rename {edtf2 => edtf}/fields.py (97%) rename {edtf2 => edtf}/jdutil.py (100%) rename {edtf2 => edtf}/natlang/__init__.py (100%) rename {edtf2 => edtf}/natlang/en.py (99%) rename {edtf2 => edtf}/natlang/tests.py (99%) create mode 100644 edtf/parser/__init__.py rename {edtf2 => edtf}/parser/edtf_exceptions.py (100%) rename {edtf2 => edtf}/parser/grammar.py (97%) rename {edtf2 => edtf}/parser/grammar_test.py (97%) rename {edtf2 => edtf}/parser/parser_classes.py (99%) rename {edtf2 => edtf}/parser/parser_classes_tests.py (99%) rename {edtf2 => edtf}/parser/tests.py (98%) rename {edtf2 => edtf}/tests.py (99%) delete mode 100644 edtf2/__init__.py delete mode 100644 edtf2/parser/__init__.py diff --git a/.travis.yml b/.travis.yml index 9b9049b..e3377a3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,6 @@ python: before_install: - pip install nose coverage 'django<2' script: -- nosetests --verbose --with-coverage --cover-package=edtf2 +- nosetests --verbose --with-coverage --cover-package=edtf after_success: - coverage report diff --git a/README.md b/README.md index c300c7d..76aec1a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -edtf2 +edtf ===== An implementation of EDTF format in Python, together with utility functions for parsing natural language date texts, and converting EDTF dates to related Python `date` or `struct_time` objects. @@ -9,11 +9,11 @@ This project is based on python-edtf and was developed to include the newest spe ## To install - pip install edtf2 + pip install edtf ## To use - >>> from edtf2 import parse_edtf + >>> from edtf import parse_edtf # Parse an EDTF string to an EDTFObject >>> e = parse_edtf("1979-08~") # approx August 1979 >>> e @@ -49,7 +49,7 @@ This project is based on python-edtf and was developed to include the newest spe >>> coll.objects (Date: '1667', Date: '1668', Consecutives: '1670..1672') -The object returned by `parse_edtf()` is an instance of an `edtf2.parser.parser_classes.EDTFObject` subclass, depending on the type of date that was parsed. These classes are: +The object returned by `parse_edtf()` is an instance of an `edtf.parser.parser_classes.EDTFObject` subclass, depending on the type of date that was parsed. These classes are: # Level 0 Date @@ -106,7 +106,7 @@ Test coverage includes every example given in the spec table of features. ### Level 1 Extensions * Uncertain/Approximate dates: - + >>> parse_edtf('1979-08-28~') # Approximately August 28th 1979 UncertainOrApproximate: '1979-08-28~' @@ -175,7 +175,7 @@ Test coverage includes every example given in the spec table of features. The library includes a basic English natural language parser (it's not yet smart enough to work with occasions such as 'Easter', or in other languages): - >>> from edtf2 import text_to_edtf + >>> from edtf import text_to_edtf >>> text_to_edtf("circa August 1979") '1979-08~' @@ -273,7 +273,7 @@ Because Python's `datetime` module does not support dates out side the range 1 A The `struct_time` representation is more difficult to work with, but can be sorted as-is which is the primary use-case, and can be converted relatively easily to `date` or `datetime` objects (provided the year is within 1 to 9999 AD) or to date objects in more flexible libraries like [astropy.time](http://docs.astropy.org/en/stable/time/index.html) for years outside these bounds. -If you are sure you are working with dates within the range supported by Python's `datetime` module, you can get these more convenient objects using the `edtf2.struct_time_to_date` and `edtf2.struct_time_to_datetime` functions. +If you are sure you are working with dates within the range supported by Python's `datetime` module, you can get these more convenient objects using the `edtf.struct_time_to_date` and `edtf.struct_time_to_datetime` functions. NOTE: This library previously did return `date` and `datetime` objects from methods by default before we switched to `struct_time`. See ticket https://github.com/ixc/python-edtf/issues/26. @@ -291,7 +291,7 @@ In an ascending sort (most recent last), sort by `lower_strict` to get a natural >>> e.lower_strict()[:3] # Show only interesting parts of struct_time (1912, 4, 01) - >>> from edtf2 import struct_time_to_date + >>> from edtf import struct_time_to_date >>> struct_time_to_date(e.lower_strict()) # Convert to date datetime.date(1912, 4, 01) @@ -336,7 +336,7 @@ Two EDTF dates are considered equal if their unicode() representations are the s ## Django ORM field -The `edtf2.fields.EDTFField` implements a simple Django field that stores an EDTF object in the database. +The `edtf.fields.EDTFField` implements a simple Django field that stores an EDTF object in the database. To store a natural language value on your model, define another field, and set the `natural_text_field` parameter of your `EDTFField`. @@ -348,7 +348,7 @@ When your model is saved, the `natural_text_field` value will be parsed to set t Example usage: from django.db import models - from edtf2.fields import EDTFField + from edtf.fields import EDTFField class MyModel(models.Model): date_display = models.CharField( diff --git a/changelog.rst b/changelog.rst index c844427..690f2ed 100644 --- a/changelog.rst +++ b/changelog.rst @@ -4,6 +4,10 @@ Changelog In development -------------- +5.0.0.develop0 (2024-05-05) +-------------------------- + +* Breaking Changes: Rename project back to edtf from edtf2, after the merge of work form https://github.com/saw-leipzig/python-edtf/ 5.0.0 (2023-10-04) ------------------ @@ -22,6 +26,7 @@ In development - the significant digit indicator 'p' is now 'S' (uppercase). * Renaming of the project to edtf2: As this project seems to have no longer support from the creator `The Interaction Consortium` we decided to fork it and release it under a new name by our own +* Author: https://github.com/muellersSAW 4.0 (2018-05-31) diff --git a/edtf/__init__.py b/edtf/__init__.py new file mode 100644 index 0000000..291cccc --- /dev/null +++ b/edtf/__init__.py @@ -0,0 +1,6 @@ +from edtf.parser.grammar import parse_edtf +from edtf.natlang import text_to_edtf +from edtf.parser.parser_classes import * +from edtf.convert import dt_to_struct_time, struct_time_to_date, \ + struct_time_to_datetime, trim_struct_time, struct_time_to_jd, \ + jd_to_struct_time, old_specs_to_new_specs_expression diff --git a/edtf2/appsettings.py b/edtf/appsettings.py similarity index 100% rename from edtf2/appsettings.py rename to edtf/appsettings.py diff --git a/edtf2/convert.py b/edtf/convert.py similarity index 99% rename from edtf2/convert.py rename to edtf/convert.py index f87b301..f8d070f 100644 --- a/edtf2/convert.py +++ b/edtf/convert.py @@ -1,7 +1,7 @@ from time import struct_time from datetime import date, datetime -from edtf2 import jdutil +from edtf import jdutil TIME_EMPTY_TIME = [0, 0, 0] # tm_hour, tm_min, tm_sec diff --git a/edtf2/fields.py b/edtf/fields.py similarity index 97% rename from edtf2/fields.py rename to edtf/fields.py index 49d8e4b..52b9171 100644 --- a/edtf2/fields.py +++ b/edtf/fields.py @@ -6,9 +6,9 @@ from django.db import models from django.core.exceptions import FieldDoesNotExist -from edtf2 import parse_edtf, EDTFObject -from edtf2.natlang import text_to_edtf -from edtf2.convert import struct_time_to_date, struct_time_to_jd +from edtf import parse_edtf, EDTFObject +from edtf.natlang import text_to_edtf +from edtf.convert import struct_time_to_date, struct_time_to_jd DATE_ATTRS = ( 'lower_strict', diff --git a/edtf2/jdutil.py b/edtf/jdutil.py similarity index 100% rename from edtf2/jdutil.py rename to edtf/jdutil.py diff --git a/edtf2/natlang/__init__.py b/edtf/natlang/__init__.py similarity index 100% rename from edtf2/natlang/__init__.py rename to edtf/natlang/__init__.py diff --git a/edtf2/natlang/en.py b/edtf/natlang/en.py similarity index 99% rename from edtf2/natlang/en.py rename to edtf/natlang/en.py index 394588b..ff83034 100644 --- a/edtf2/natlang/en.py +++ b/edtf/natlang/en.py @@ -2,7 +2,7 @@ from datetime import datetime from dateutil.parser import parse, ParserError import re -from edtf2 import appsettings +from edtf import appsettings from six.moves import xrange diff --git a/edtf2/natlang/tests.py b/edtf/natlang/tests.py similarity index 99% rename from edtf2/natlang/tests.py rename to edtf/natlang/tests.py index 54c025f..645a373 100644 --- a/edtf2/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -1,5 +1,5 @@ import unittest -from edtf2.natlang.en import text_to_edtf +from edtf.natlang.en import text_to_edtf # where examples are tuples, the second item is the normalised output EXAMPLES = ( @@ -203,7 +203,7 @@ def test_natlang(self): For each of the examples, establish that: - the unicode of the parsed object is acceptably equal to the EDTF string - the parsed object is a subclass of EDTFObject - :return: + :return: """ for i, o in EXAMPLES: e = text_to_edtf(i) diff --git a/edtf/parser/__init__.py b/edtf/parser/__init__.py new file mode 100644 index 0000000..e5a0e5f --- /dev/null +++ b/edtf/parser/__init__.py @@ -0,0 +1,2 @@ +from edtf.parser.grammar import parse_edtf +from edtf.parser.parser_classes import * diff --git a/edtf2/parser/edtf_exceptions.py b/edtf/parser/edtf_exceptions.py similarity index 100% rename from edtf2/parser/edtf_exceptions.py rename to edtf/parser/edtf_exceptions.py diff --git a/edtf2/parser/grammar.py b/edtf/parser/grammar.py similarity index 97% rename from edtf2/parser/grammar.py rename to edtf/parser/grammar.py index 047b4a6..d69e719 100644 --- a/edtf2/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -2,13 +2,13 @@ ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, Group # (* ************************** Level 0 *************************** *) -from edtf2.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ +from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ UncertainOrApproximate, Level1Interval, LongYear, Season, \ PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ MaskedPrecision, Level2Interval, ExponentialYear, Level2Season -from edtf2.parser.edtf_exceptions import EDTFParseException +from edtf.parser.edtf_exceptions import EDTFParseException oneThru12 = oneOf(['%.2d' % i for i in range(1, 13)]) oneThru13 = oneOf(['%.2d' % i for i in range(1, 14)]) @@ -189,11 +189,11 @@ def f(toks): # group qualification # qualifier right of a component(date, month, day) applies to all components to the left group_qual = yearMonth + UASymbol("year_month_ua") + "-" + day \ - ^ year + UASymbol("year_ua") + "-" + month + Opt("-" + day) + ^ year + UASymbol("year_ua") + "-" + month + Opt("-" + day) # component qualification # qualifier immediate left of a component (date, month, day) applies to that component only -qual_year = year ^ UASymbol("year_ua_b") + year ^ year + UASymbol("year_ua") +qual_year = year ^ UASymbol("year_ua_b") + year ^ year + UASymbol("year_ua") qual_month = month ^ UASymbol("month_ua") + month qual_day = day ^ UASymbol("day_ua") + day diff --git a/edtf2/parser/grammar_test.py b/edtf/parser/grammar_test.py similarity index 97% rename from edtf2/parser/grammar_test.py rename to edtf/parser/grammar_test.py index e43c8fa..9482c9e 100644 --- a/edtf2/parser/grammar_test.py +++ b/edtf/parser/grammar_test.py @@ -2,13 +2,13 @@ ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, FollowedBy # (* ************************** Level 0 *************************** *) -from edtf2.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ +from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ UncertainOrApproximate, Level1Interval, LongYear, Season, \ PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ MaskedPrecision, Level2Interval, ExponentialYear, UnspecifiedIntervalSection, Testi -from edtf2.parser.edtf_exceptions import EDTFParseException +from edtf.parser.edtf_exceptions import EDTFParseException oneThru12 = oneOf(['%.2d' % i for i in range(1, 13)]) oneThru13 = oneOf(['%.2d' % i for i in range(1, 14)]) @@ -114,7 +114,7 @@ def f(toks): l1End = uaDateOrSeason ^ '..' l1End.addParseAction(f) -#level1Interval = l1Start("lower") + "/" + l1End("upper") +#level1Interval = l1Start("lower") + "/" + l1End("upper") level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") \ ^ l1Start("lower") + "/" + Optional(l1End("upper")) Level1Interval.set_parser(level1Interval) diff --git a/edtf2/parser/parser_classes.py b/edtf/parser/parser_classes.py similarity index 99% rename from edtf2/parser/parser_classes.py rename to edtf/parser/parser_classes.py index 36aca5a..2d6c0bf 100644 --- a/edtf2/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -6,8 +6,8 @@ import math from dateutil.relativedelta import relativedelta -from edtf2 import appsettings -from edtf2.convert import dt_to_struct_time, trim_struct_time, \ +from edtf import appsettings +from edtf.convert import dt_to_struct_time, trim_struct_time, \ TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS EARLIEST = 'earliest' @@ -573,7 +573,7 @@ def __str__(self): else: if self.year_ua_b: y = "%s%s" % (self.year_ua_b, self.year) - else: + else: y = str(self.year) if self.month_ua: diff --git a/edtf2/parser/parser_classes_tests.py b/edtf/parser/parser_classes_tests.py similarity index 99% rename from edtf2/parser/parser_classes_tests.py rename to edtf/parser/parser_classes_tests.py index 4c916c0..2cf330e 100644 --- a/edtf2/parser/parser_classes_tests.py +++ b/edtf/parser/parser_classes_tests.py @@ -6,8 +6,8 @@ from dateutil.relativedelta import relativedelta -from edtf2 import appsettings -from edtf2.convert import dt_to_struct_time, trim_struct_time, \ +from edtf import appsettings +from edtf.convert import dt_to_struct_time, trim_struct_time, \ TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS EARLIEST = 'earliest' diff --git a/edtf2/parser/tests.py b/edtf/parser/tests.py similarity index 98% rename from edtf2/parser/tests.py rename to edtf/parser/tests.py index 4c376d9..4043988 100644 --- a/edtf2/parser/tests.py +++ b/edtf/parser/tests.py @@ -3,10 +3,10 @@ from datetime import date from time import struct_time -from edtf2.parser.grammar import parse_edtf as parse -from edtf2.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \ +from edtf.parser.grammar import parse_edtf as parse +from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \ TIME_EMPTY_EXTRAS -from edtf2.parser.edtf_exceptions import EDTFParseException +from edtf.parser.edtf_exceptions import EDTFParseException # Example object types and attributes. # the first item in each tuple is the input EDTF string, and expected parse result. @@ -122,8 +122,8 @@ # uncertain year; month, day known ('2004?-06-11', '2004-06-11', '2003-06-11', '2005-06-11'), # year and month are approximate; day known - ('2004-06~-11', '2004-06-11', '2003-05-11', '2005-07-11'), - # uncertain month, year and day known + ('2004-06~-11', '2004-06-11', '2003-05-11', '2005-07-11'), + # uncertain month, year and day known ('2004-?06-11', '2004-06-11', '2004-05-11', '2004-07-11'), # day is approximate; year, month known ('2004-06-~11', '2004-06-11', '2004-06-10', '2004-06-12'), @@ -179,7 +179,7 @@ # A date during the 1900s #('19xx', '1900-01-01', '1999-12-31'), # L2 Extended Interval - + ('2004-06-~01/2004-06-~20', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), # The interval began on an unspecified day in June 2004. ('2004-06-XX/2004-07-03', '2004-06-01', '2004-07-03'), @@ -255,7 +255,7 @@ def iso_to_struct_time(iso_date): print(str(f.lower_strict()) + '/' + str(f.upper_strict())) self.assertEqual(f.lower_strict(), expected_lower_strict) self.assertEqual(f.upper_strict(), expected_upper_strict) - + def test_date_values(self): """ diff --git a/edtf2/tests.py b/edtf/tests.py similarity index 99% rename from edtf2/tests.py rename to edtf/tests.py index f48abbd..0e49e67 100644 --- a/edtf2/tests.py +++ b/edtf/tests.py @@ -3,7 +3,7 @@ from time import struct_time from datetime import datetime, date -from edtf2 import convert +from edtf import convert class TestConversions(unittest.TestCase): diff --git a/edtf2/__init__.py b/edtf2/__init__.py deleted file mode 100644 index d85a291..0000000 --- a/edtf2/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from edtf2.parser.grammar import parse_edtf -from edtf2.natlang import text_to_edtf -from edtf2.parser.parser_classes import * -from edtf2.convert import dt_to_struct_time, struct_time_to_date, \ - struct_time_to_datetime, trim_struct_time, struct_time_to_jd, \ - jd_to_struct_time, old_specs_to_new_specs_expression diff --git a/edtf2/parser/__init__.py b/edtf2/parser/__init__.py deleted file mode 100644 index bd24299..0000000 --- a/edtf2/parser/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from edtf2.parser.grammar import parse_edtf -from edtf2.parser.parser_classes import * diff --git a/setup.py b/setup.py index 641711d..8fcc6aa 100644 --- a/setup.py +++ b/setup.py @@ -10,12 +10,12 @@ def readme(): return f.read() setuptools.setup( - name='edtf2', + name='edtf', version=version, #use_scm_version={'version_scheme': 'post-release'}, - url='https://github.com/saw-leipzig/python-edtf', - author='Sabine Müller', - author_email='muellers@saw-leipzig.de', + url='https://github.com/ixc/python-edtf', + author='The Interaction Consortium', + author_email='studio@interaction.net.au', description='Python implementation of Library of Congress EDTF (Extended ' 'Date Time Format) specification', long_description=readme(), From 41f3050ede743d4de9aa37ead37a684aaf455d59 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Sun, 5 May 2024 13:41:24 +1000 Subject: [PATCH 021/102] Prevent ImportError during test --- edtf/parser/grammar_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edtf/parser/grammar_test.py b/edtf/parser/grammar_test.py index 9482c9e..81b2d5d 100644 --- a/edtf/parser/grammar_test.py +++ b/edtf/parser/grammar_test.py @@ -6,7 +6,7 @@ UncertainOrApproximate, Level1Interval, LongYear, Season, \ PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ - MaskedPrecision, Level2Interval, ExponentialYear, UnspecifiedIntervalSection, Testi + MaskedPrecision, Level2Interval, ExponentialYear, UnspecifiedIntervalSection# , Testi from edtf.parser.edtf_exceptions import EDTFParseException From a7091f6446baa0fc7c2ca30755229e8b906d0f2c Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 6 May 2024 16:45:21 -0400 Subject: [PATCH 022/102] WIP moving CI to Github Actions - Tests are failing due to testInterval(), but are being run correctly. On Python 3.12, `nose` is failing. We should upgrade to `nose2`, or more likely, switch from unittest + nose to pytest. - Move tox config to setup.cfg for now - TODO move to pyproject.toml instead of setup.cfg - TODO remove .travis.yml --- .github/workflows/ci.yml | 36 ++++++++++++++++++++++++++++++++++++ setup.cfg | 13 +++++++++++++ setup.py | 7 +++++-- tox.ini | 8 -------- 4 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 tox.ini diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..6efba4d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,36 @@ +name: CI + +on: + workflow_dispatch: + pull_request: + +jobs: + python-unit: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + defaults: + run: + working-directory: . + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: '**/setup.cfg' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Install test dependencies + run: pip install .[test] + + - name: Run unit tests + run: tox diff --git a/setup.cfg b/setup.cfg index 082465a..f17fb34 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,3 +3,16 @@ formats = bdist_wheel [wheel] universal = 1 + +[testenv] +deps = + nose + django +commands = + nosetests + +[tox:tox] +minversion = 4.0 +env_list = + py{38,39,310,311,312} +isolated_build = True diff --git a/setup.py b/setup.py index 8fcc6aa..5046357 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,10 @@ def readme(): 'Operating System :: MacOS :: MacOS X', 'Operating System :: POSIX :: Linux', 'Programming Language :: Python', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', ], ) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index f70761f..0000000 --- a/tox.ini +++ /dev/null @@ -1,8 +0,0 @@ -[tox] -envlist = py27,py36 - -[testenv] -deps= - nose - django -commands=nosetests From 782d1a1fe08c97712a493a79ea1a30b1d02dd103 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 6 May 2024 17:49:26 -0400 Subject: [PATCH 023/102] Implement pyproject.toml Remove setup.cfg and setup.py in favor of pyproject.toml Nose and Tox are next to go. Nose seems to be failing on Python 3.10, no need for it any more as pytest should cover its features. Likely no need for tox either as we are using the matrix strategy in Github Actions, though it could be useful for local dev --- pyproject.toml | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ setup.cfg | 18 ------------- setup.py | 57 ---------------------------------------- 3 files changed, 70 insertions(+), 75 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1ca1138 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,70 @@ +[project] +name = "edtf" +version = "5.0.0" +dependencies = [ + "python-dateutil", + "pyparsing", + "six" +] +description = "Python implementation of Library of Congress EDTF (Extended Date Time Format) specification" +requires-python = ">=3.8" +readme = {file = "README.txt", content-type = "text/markdown"} +authors = [ + { name = "The Interaction Consortium", email = "studio@interaction.net.au"}, + { name = "Alastair Weakley"}, + { name = "James Murty"}, + { name = "Mark Finger" }, + { name = "Sabine Müller" }, + { name = "Cole Crawford" } +] +maintainers = [ + { name = "The Interaction Consortium", email = "studio@interaction.net.au" } +] +classifiers = [ + "Intended Audience :: Developers", + "Intended Audience :: End Users/Desktop", + "License :: OSI Approved :: MIT License", + "Operating System :: MacOS :: MacOS X", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + +[project.optional-dependencies] +test = [ + "django", + "nose", + "tox" +] + +[project.urls] +homepage = "https://github.com/ixc/python-edtf" +issues = "https://github.com/ixc/python-edtf/issues" +repository = "https://github.com/ixc/python-edtf.git" +changelog = "https://github.com/ixc/python-edtf/blob/main/changelog.rst" + +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.wheel] +universal = false + +[tool.tox] +legacy_tox_ini = """ + [tox] + min_version = 4.0 + env_list = py{38,39,310,311,312} + isolated_build = true + skip_missing_interpreters = True + + [testenv] + deps = + nose + django + commands = nosetests +""" \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index f17fb34..0000000 --- a/setup.cfg +++ /dev/null @@ -1,18 +0,0 @@ -[devpi:upload] -formats = bdist_wheel - -[wheel] -universal = 1 - -[testenv] -deps = - nose - django -commands = - nosetests - -[tox:tox] -minversion = 4.0 -env_list = - py{38,39,310,311,312} -isolated_build = True diff --git a/setup.py b/setup.py deleted file mode 100644 index 5046357..0000000 --- a/setup.py +++ /dev/null @@ -1,57 +0,0 @@ -from __future__ import print_function - -import setuptools -import sys - -version = '5.0.0' - -def readme(): - with open('README.md') as f: - return f.read() - -setuptools.setup( - name='edtf', - version=version, - #use_scm_version={'version_scheme': 'post-release'}, - url='https://github.com/ixc/python-edtf', - author='The Interaction Consortium', - author_email='studio@interaction.net.au', - description='Python implementation of Library of Congress EDTF (Extended ' - 'Date Time Format) specification', - long_description=readme(), - long_description_content_type="text/markdown", - license='MIT', - packages=setuptools.find_packages(), - include_package_data=True, - install_requires=[ - 'python-dateutil', - 'pyparsing', - 'six' - ], - extras_require={ - 'test': [ - 'django', - 'nose', - 'tox', - ], - }, - setup_requires=[ - 'setuptools_scm >=5.0.2, <6.0.0', - ], - keywords=[ - 'edtf', - ], - classifiers=[ - 'Intended Audience :: Developers', - 'Intended Audience :: End Users/Desktop', - 'License :: OSI Approved :: MIT License', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: POSIX :: Linux', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: 3.12', - ], -) From f88c88ee033daa76bf07b21cc0f65a08e82f9461 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 6 May 2024 22:31:54 -0400 Subject: [PATCH 024/102] Switch to Pytest --- .github/workflows/ci.yml | 12 ++++++------ pyproject.toml | 7 +++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6efba4d..6106282 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,15 +22,15 @@ jobs: with: python-version: ${{ matrix.python-version }} cache: 'pip' - cache-dependency-path: '**/setup.cfg' + cache-dependency-path: '**/pyproject.toml' - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Install test dependencies - run: pip install .[test] + pip install .[test] - name: Run unit tests - run: tox + run: | + pytest edtf/tests.py + pytest edtf/natlang/tests.py + pytest edtf/parser/tests.py diff --git a/pyproject.toml b/pyproject.toml index 1ca1138..9f755fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,8 +37,7 @@ classifiers = [ [project.optional-dependencies] test = [ "django", - "nose", - "tox" + "pytest" ] [project.urls] @@ -64,7 +63,7 @@ legacy_tox_ini = """ [testenv] deps = - nose + pytest django - commands = nosetests + commands = pytest """ \ No newline at end of file From acebbff11cdc5197ace70d189dbdfc3cd5cb8063 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 7 May 2024 10:52:55 -0400 Subject: [PATCH 025/102] Pytest config --- .github/workflows/ci.yml | 4 +--- .gitignore | 1 + pyproject.toml | 7 ++++++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6106282..34cbabc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,4 @@ jobs: - name: Run unit tests run: | - pytest edtf/tests.py - pytest edtf/natlang/tests.py - pytest edtf/parser/tests.py + pytest diff --git a/.gitignore b/.gitignore index ba74660..ab3165a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ # Distribution / packaging .Python env/ +venv/ build/ develop-eggs/ dist/ diff --git a/pyproject.toml b/pyproject.toml index 9f755fb..444298e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,4 +66,9 @@ legacy_tox_ini = """ pytest django commands = pytest -""" \ No newline at end of file +""" + +[tool.pytest.ini_options] +python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] +python_classes = ["Test*", "*Tests"] +python_functions = ["test_*"] From f37831fb2d7dafea7867b9ab253aa04b4d2c620f Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 7 May 2024 12:29:35 -0400 Subject: [PATCH 026/102] Remove TravisCI config, update changelog --- .travis.yml | 12 ------------ changelog.rst | 3 +++ 2 files changed, 3 insertions(+), 12 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e3377a3..0000000 --- a/.travis.yml +++ /dev/null @@ -1,12 +0,0 @@ -language: python -sudo: false -cache: pip -python: -- '3.6' -- '2.7' -before_install: -- pip install nose coverage 'django<2' -script: -- nosetests --verbose --with-coverage --cover-package=edtf -after_success: -- coverage report diff --git a/changelog.rst b/changelog.rst index 690f2ed..6a302ae 100644 --- a/changelog.rst +++ b/changelog.rst @@ -8,6 +8,9 @@ In development -------------------------- * Breaking Changes: Rename project back to edtf from edtf2, after the merge of work form https://github.com/saw-leipzig/python-edtf/ +* Breaking Changes: Drop support for Python 2 and Python 3 versions below 3.8. `v5` will support Python 3.8 to 3.12 at release. +* Switch from `tox` and `nose` to `pytest` for testing. +* Consolidate config and packaging from `setup.py` and `setup.cfg` to `pyproject.toml`. 5.0.0 (2023-10-04) ------------------ From 7c41b8c4470770d2a4c224229192d96a4a26f7df Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 8 May 2024 08:28:34 -0400 Subject: [PATCH 027/102] Convert tests to pytest format - Use parameterization in pytest to simplify many test cases by wrapping the input and expected values in tuples - No need for unittest or class wrappers - There is a legitimately failing test in parser/tests.py - code change needed in parser_classes to handle this I think - natlang/tests.py will need to be updated to match the new spec, as will the whole text_to_edtf() function --- edtf/natlang/tests.py | 115 ++++++------ edtf/parser/tests.py | 396 ++++++++++++++++++------------------------ edtf/tests.py | 208 +++++++++------------- 3 files changed, 297 insertions(+), 422 deletions(-) diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 645a373..eaa9af6 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -1,26 +1,30 @@ -import unittest +import pytest from edtf.natlang.en import text_to_edtf +# TODO update the tests and code to test and output the new spec + # where examples are tuples, the second item is the normalised output -EXAMPLES = ( - ('active late 17th-19th centuries', '16xx/18xx'), # ignoring 'late' for now - ('active 17-19th Centuries', '16xx/18xx'), # ignoring 'late' for now +@pytest.mark.parametrize("input_text,expected_output", [ + # Ignoring 'late' for simplicity in these examples + ('active late 17th-19th centuries', '16xx/18xx'), + ('active 17-19th Centuries', '16xx/18xx'), # Unrecognised values ('', None), ('this isn\'t a date', None), - # Explicity rejected values that would otherwise be badly converted + # Explicitly rejected values that would otherwise be badly converted ('23rd Dynasty', None), - ('90', '1990'), # implied century + # Implied century and specific years + ('90', '1990'), # Implied century ('1860', '1860'), ('the year 1800', '1800'), ('the year 1897', '1897'), ('January 2008', '2008-01'), ('January 12, 1940', '1940-01-12'), - # uncertain/approximate + # Uncertain or approximate dates ('1860?', '1860?'), ('1862 (uncertain)', '1862?'), ('maybe 1862', '1862?'), @@ -31,11 +35,11 @@ ('~ Feb 1812', '1812-02~'), ('circa Feb 1812', '1812-02~'), ('Feb 1812 approx', '1812-02~'), - ('c1860', '1860~'), # different abbreviations - ('c.1860', '1860~'), # with or without . + ('c1860', '1860~'), # Different abbreviations + ('c.1860', '1860~'), # With or without . ('ca1860', '1860~'), ('ca.1860', '1860~'), - ('c 1860', '1860~'), # with or without space + ('c 1860', '1860~'), # With or without space ('c. 1860', '1860~'), ('ca. 1860', '1860~'), ('approx 1860', '1860~'), @@ -44,15 +48,14 @@ ('approximately 1860', '1860~'), ('about 1860', '1860~'), ('about Spring 1849', '1849-21~'), - ('notcirca 1860', '1860'), # avoid words containing circa - ('attica 1802', '1802'), - # avoid false positive circa at the end of preceding word - ('attic. 1802', '1802'), # avoid false positive circa + ('notcirca 1860', '1860'), # Avoid words containing 'circa' + ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word + ('attic. 1802', '1802'), # Avoid false positive 'circa' - # masked precision - ('1860s', '186x'), # 186x has decade precision, 186u has year precision. + # Masked precision + ('1860s', '186x'), # 186x has decade precision, 186u has year precision. - # masked precision + uncertainty + # Masked precision + uncertainty ('ca. 1860s', '186x~'), ('c. 1860s', '186x~'), ('Circa 1840s', '184x~'), @@ -60,26 +63,26 @@ ('ca. 1860s?', '186x?~'), ('uncertain: approx 1862', '1862?~'), - # masked precision with first decade (ambiguous) - ('1800s', '18xx'), # without additional uncertainty, use the century - ('2000s', '20xx'), # without additional uncertainty, use the century - ('c1900s', '190x~'), # if there's additional uncertainty, use the decade - ('c1800s?', '180x?~'), # if there's additional uncertainty, use the decade + # Ambiguous masked precision for centuries and decades + ('1800s', '18xx'), # Without additional uncertainty, use the century + ('2000s', '20xx'), # Without additional uncertainty, use the century + ('c1900s', '190x~'), # If there's additional uncertainty, use the decade + ('c1800s?', '180x?~'), # If there's additional uncertainty, use the decade - # unspecified + # Unspecified dates ('January 12', 'uuuu-01-12'), ('January', 'uuuu-01'), ('10/7/2008', '2008-10-07'), ('7/2008', '2008-07'), - # seasons + # Seasons mapped to specific codes ('Spring 1872', '1872-21'), ('Summer 1872', '1872-22'), ('Autumn 1872', '1872-23'), ('Fall 1872', '1872-23'), ('Winter 1872', '1872-24'), - # before/after + # Dates relative to known events (before/after) ('earlier than 1928', 'unknown/1928'), ('before 1928', 'unknown/1928'), ('after 1928', '1928/unknown'), @@ -87,32 +90,30 @@ ('before January 1928', 'unknown/1928-01'), ('before 18 January 1928', 'unknown/1928-01-18'), - # before/after approx + # Approximations combined with before/after ('before approx January 18 1928', 'unknown/1928-01-18~'), ('before approx January 1928', 'unknown/1928-01~'), ('after approx January 1928', '1928-01~/unknown'), ('after approx Summer 1928', '1928-22~/unknown'), - # before/after and uncertain/unspecificed + # Before and after with uncertain / unspecified components ('after about the 1920s', '192x~/unknown'), ('before about the 1900s', 'unknown/190x~'), ('before the 1900s', 'unknown/19xx'), - # unspecified + # Specifying unspecified components within a date # ('decade in 1800s', '18ux'), #too esoteric # ('decade somewhere during the 1800s', '18ux'), #lengthier. Keywords are 'in' or 'during' - ('year in the 1860s', '186u'), - # 186x has decade precision, 186u has year precision. - ('year in the 1800s', '18xu'), + ('year in the 1860s', '186u'), # 186x has decade precision + ('year in the 1800s', '18xu'), # 186u has year precision ('year in about the 1800s', '180u~'), ('month in 1872', '1872-uu'), ('day in Spring 1849', '1849-21-uu'), ('day in January 1872', '1872-01-uu'), ('day in 1872', '1872-uu-uu'), ('birthday in 1872', '1872'), - # avoid false positive at end of preceding word - # centuries + # Handling centuries with approximation and uncertainty ('1st century', '00xx'), ('10c', '09xx'), ('19th century', '18xx'), @@ -126,7 +127,7 @@ ('19c?', '18xx?'), ('c.19c?', '18xx?~'), - # BC/AD + # BC/AD dating ('1 AD', '0001'), ('17 CE', '0017'), ('127 CE', '0127'), @@ -136,18 +137,17 @@ ('c127 CE', '0127~'), ('c1270 CE', '1270~'), ('c64 BCE', '-0064~'), - ('2nd century bc', '-01xx'), # -200 to -101 + ('2nd century bc', '-01xx'), # -200 to -101 ('2nd century bce', '-01xx'), ('2nd century ad', '01xx'), ('2nd century ce', '01xx'), - # c-c-c-combo - # just showing off now... + # Combining uncertainties and approximations in creative ways ('a day in about Spring 1849?', '1849-21-uu?~'), - # simple ranges. Not all of these results are correct EDTF, but - # this is as good as the EDTF implementation and simple natural - # language parser we have. + # Simple date ranges, showcasing both the limitations and capabilities of the parser + # Not all of these results are correct EDTF, but this is as good as the EDTF implementation + # and simple natural language parser we have. ('1851-1852', '1851/1852'), ('1851-1852; printed 1853-1854', '1851/1852'), ('1851-52', '1851/1852'), @@ -156,7 +156,6 @@ ('1857-mid 1860s', '1857/186x'), ('1858/1860', '[1858, 1860]'), ('1860s-1870s', '186x/187x'), - ('1861, printed 1869', '1861'), ('1910-30', '1910/1930'), ('active 1910-30', '1910/1930'), ('1861-67', '1861/1867'), @@ -174,16 +173,13 @@ ('1900; 1973', '1900'), ('1900; printed 1912', '1900'), ('1915 late - autumn 1916', '1915/1916-23'), - - ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} + ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} ('1920s -early 1930s', '192x/193x'), ('1930s, printed early 1960s', '193x'), # should be something like {193x, 196x}, - # though those forms aren't explicitly supported in the spec. ('1932, printed 1976 by Gunther Sander', '1932'), # should be {1932, 1976} - ('1938, printed 1940s-1950s', '1938'), # should be something like {1938, 194x-195x} - - + ('1938, printed 1940s-1950s', '1938') # should be something like {1938, 194x-195x} + # Uncertain and approximate on different parts of the date # for these to work we need to recast is_uncertain and is_approximate # such that they work on different parts. Probably worth rolling our own # dateparser at this point. @@ -194,22 +190,13 @@ # ('a day in about Spring in about 1849', '1849~-21~-uu'), # ('maybe January in some year in about the 1830s', '183u~-01?'), # ('about July? in about 1849', '1849~-07?~'), -) - - -class TestLevel0(unittest.TestCase): - def test_natlang(self): - """ - For each of the examples, establish that: - - the unicode of the parsed object is acceptably equal to the EDTF string - - the parsed object is a subclass of EDTFObject - :return: - """ - for i, o in EXAMPLES: - e = text_to_edtf(i) - print("%s => %s" % (i, e)) - self.assertEqual(e, o) +]) +def test_natlang(input_text, expected_output): + """ + Test natural language conversion to EDTF format: + Verify that the conversion from text to EDTF format matches the expected output. + """ + result = text_to_edtf(input_text) + assert result == expected_output, f"Failed for input: {input_text}" -if __name__ == '__main__': - unittest.main() diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 4043988..877fd0b 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -1,19 +1,16 @@ -import unittest -import sys +import pytest from datetime import date from time import struct_time from edtf.parser.grammar import parse_edtf as parse -from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \ - TIME_EMPTY_EXTRAS +from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS from edtf.parser.edtf_exceptions import EDTFParseException -# Example object types and attributes. -# the first item in each tuple is the input EDTF string, and expected parse result. -# where the first value is a tuple, the second item is the normalised parse result. +# Example object types and attributes represented as tuples. +# The first item in each tuple is the input EDTF string, and expected parse result. +# where the first value is a tuple, the second item is a tuple of the normalised parse result. # -# The rest of the values in each tuple indicate the iso versions of the derived -# Python ``date``s. +# The values in the second tuple indicate the iso versions of the derived Python `date`s. # - If there's one other value, all the derived dates should be the same. # - If there're two other values, then all the lower values should be the same # and all the upper values should be the same. @@ -26,176 +23,171 @@ EXAMPLES = ( # ******************************* LEVEL 0 ********************************* # year, month, day - ('2001-02-03', '2001-02-03'), + ('2001-02-03', ('2001-02-03',)), # year, month - ('2008-12', '2008-12-01', '2008-12-31'), + ('2008-12', ('2008-12-01', '2008-12-31')), # year - ('2008', '2008-01-01', '2008-12-31'), + ('2008', ('2008-01-01', '2008-12-31')), # a negative year - ('-0999', '-0999-01-01', '-0999-12-31'), + ('-0999', ('-0999-01-01', '-0999-12-31')), # year zero - ('0000', '0000-01-01', '0000-12-31'), + ('0000', ('0000-01-01', '0000-12-31')), # DateTimes - ('2001-02-03T09:30:01', '2001-02-03'), - ('2004-01-01T10:10:10Z', '2004-01-01'), - ('2004-01-01T10:10:10+05:00', '2004-01-01'), - ('1985-04-12T23:20:30', '1985-04-12'), + ('2001-02-03T09:30:01', ('2001-02-03',)), + ('2004-01-01T10:10:10Z', ('2004-01-01',)), + ('2004-01-01T10:10:10+05:00', ('2004-01-01',)), + ('1985-04-12T23:20:30', ('1985-04-12',)), + # Intervals # An interval beginning sometime in 1964 and ending sometime in 2008. Year precision. - ('1964/2008', '1964-01-01', '2008-12-31'), + ('1964/2008', ('1964-01-01', '2008-12-31')), # An interval beginning sometime in June 2004 and ending sometime in August of 2006. Month precision. - ('2004-06/2006-08', '2004-06-01', '2006-08-31'), + ('2004-06/2006-08', ('2004-06-01', '2006-08-31')), # An interval beginning sometime on February 1, 2004 and ending sometime on February 8, 2005. Day precision. - ('2004-02-01/2005-02-08', '2004-02-01', '2005-02-08'), - # An interval beginning sometime on February 1, 2004 and ending sometime in February 2005. The precision of the interval is not defined; the start endpoint has day precision and the end endpoint has month precision. - ('2004-02-01/2005-02', '2004-02-01', '2005-02-28'), - # An interval beginning sometime on February 1, 2004 and ending sometime in 2005. The start endpoint has day precision and the end endpoint has year precision. - ('2004-02-01/2005', '2004-02-01', '2005-12-31'), + ('2004-02-01/2005-02-08', ('2004-02-01', '2005-02-08')), + # An interval beginning sometime on February 1, 2004 and ending sometime in February 2005. + # The precision of the interval is not defined; the start endpoint has day precision and the end endpoint has month precision. + ('2004-02-01/2005-02', ('2004-02-01', '2005-02-28')), + # An interval beginning sometime on February 1, 2004 and ending sometime in 2005. + # The start endpoint has day precision and the end endpoint has year precision. + ('2004-02-01/2005', ('2004-02-01', '2005-12-31')), # An interval beginning sometime in 2005 and ending sometime in February 2006. - ('2005/2006-02', '2005-01-01', '2006-02-28'), + ('2005/2006-02', ('2005-01-01', '2006-02-28')), # An interval beginning sometime in -2005 and ending sometime in February -2004. - ('-2005/-1999-02', '-2005-01-01', '-1999-02-28'), + ('-2005/-1999-02', ('-2005-01-01', '-1999-02-28')), # ******************************* LEVEL 1 ********************************* - # Uncertain/Approximate + # Uncertain/Approximate # uncertain: possibly the year 1984, but not definitely - ('1984?', '1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31'), - ('2004-06-11?', '2004-06-11', '2004-06-11', '2004-06-10', '2004-06-12'), - ('2004-06?', '2004-06-01', '2004-06-30', '2004-05-01', '2004-07-30'), + ('1984?', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), + ('2004-06-11?', ('2004-06-11', '2004-06-11', '2004-06-10', '2004-06-12')), + ('2004-06?', ('2004-06-01', '2004-06-30', '2004-05-01', '2004-07-30')), # "approximately" the year 1984 - ('1984~', '1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31'), + ('1984~', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), # the year is approximately 1984 and even that is uncertain - ('1984%', '1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31'), + ('1984%', ('1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31')), # Unspecified # some unspecified year in the 1990s. - ('199X', '1990-01-01', '1999-12-31'), + ('199X', ('1990-01-01', '1999-12-31')), # some unspecified year in the 1900s. - ('19XX', '1900-01-01', '1999-12-31'), + ('19XX', ('1900-01-01', '1999-12-31')), # some month in 1999 - ('1999-XX', '1999-01-01', '1999-12-31'), + ('1999-XX', ('1999-01-01', '1999-12-31')), # some day in January 1999 - ('1999-01-XX', '1999-01-01', '1999-01-31'), + ('1999-01-XX', ('1999-01-01', '1999-01-31')), # some day in 1999 - ('1999-XX-XX', '1999-01-01', '1999-12-31'), + ('1999-XX-XX', ('1999-01-01', '1999-12-31')), # Uncertain/Approximate lower boundary dates (BCE) - ('-0275~', '-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31'), - ('-0001~', '-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31'), - ('0000~', '0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31'), + ('-0275~', ('-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31')), + ('-0001~', ('-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31')), + ('0000~', ('0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31')), # L1 Extended Interval # beginning unknown, end 2006 - ('/2006', '1996-12-31', '2006-12-31'), + ('/2006', ('1996-12-31', '2006-12-31')), # beginning June 1, 2004, end unknown - ('2004-06-01/', '2004-06-01', '2014-06-01'), + ('2004-06-01/', ('2004-06-01', '2014-06-01')), # beginning open, end 2006 - ('../2006', '-20000000-01-01', '2006-12-31'), - # beginning January 1 2004 with no end date - ('2004-01-01/..', '2004-01-01', '20000000-12-31'), + ('../2006', ('-inf', '2006-12-31')), + # beginning January 1, 2004 with no end date + ('2004-01-01/..', ('2004-01-01', 'inf')), # interval beginning approximately 1984 and ending June 2004 - ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30'), + ('1984~/2004-06', ('1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30')), # interval beginning 1984 and ending approximately June 2004 - ('1984/2004-06~', '1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30'), - ('1984?/2004%', '1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31'), - ('1984~/2004~', '1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31'), + ('1984/2004-06~', ('1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30')), + ('1984?/2004%', ('1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31')), + ('1984~/2004~', ('1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31')), # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 - ('1984-06?/2004-08?', '1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30'), - ('1984-06-02?/2004-08-08~', '1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09'), - ('1984-06-02?/', '1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02'), + ('1984-06?/2004-08?', ('1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30')), + ('1984-06-02?/2004-08-08~', ('1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09')), + ('1984-06-02?/', ('1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02')), # Year exceeding 4 digits - # the year 170000002 - ('Y170000002', '170000002-01-01', '170000002-12-31'), - # the year -170000002 - ('Y-170000002', '-170000002-01-01', '-170000002-12-31'), + ('Y170000002', ('170000002-01-01', '170000002-12-31')), + ('Y-170000002', ('-170000002-01-01', '-170000002-12-31')), # Seasons - # Spring, 2001 - ('2001-21', '2001-03-01', '2001-05-31'), - # Summer, 2003 - ('2003-22', '2003-06-01', '2003-08-31'), - # Autumn, 2000 - ('2000-23', '2000-09-01', '2000-11-30'), - # Winter, 2010 - ('2010-24', '2010-12-01', '2010-12-31'), + ('2001-21', ('2001-03-01', '2001-05-31')), + ('2003-22', ('2003-06-01', '2003-08-31')), + ('2000-23', ('2000-09-01', '2000-11-30')), + ('2010-24', ('2010-12-01', '2010-12-31')), # ******************************* LEVEL 2 ********************************* - - # Partial Uncertain/ Approximate + # Partial Uncertain/Approximate # uncertain year; month, day known - ('2004?-06-11', '2004-06-11', '2003-06-11', '2005-06-11'), + ('2004?-06-11', ('2004-06-11', '2003-06-11', '2005-06-11')), # year and month are approximate; day known - ('2004-06~-11', '2004-06-11', '2003-05-11', '2005-07-11'), + ('2004-06~-11', ('2004-06-11', '2003-05-11', '2005-07-11')), # uncertain month, year and day known - ('2004-?06-11', '2004-06-11', '2004-05-11', '2004-07-11'), + ('2004-?06-11', ('2004-06-11', '2004-05-11', '2004-07-11')), # day is approximate; year, month known - ('2004-06-~11', '2004-06-11', '2004-06-10', '2004-06-12'), + ('2004-06-~11', ('2004-06-11', '2004-06-10', '2004-06-12')), # Year known, month within year is approximate and uncertain - NEW SPEC - ('2004-%06', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), + ('2004-%06', ('2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30')), # Year known, month and day uncertain - NEW SPEC - ('2004-?06-?11', '2004-06-11', '2004-05-10', '2004-07-12'), + ('2004-?06-?11', ('2004-06-11', '2004-05-10', '2004-07-12')), # Year uncertain, month known, day approximate - NEW SPEC - ('2004?-06-~11', '2004-06-11', '2003-06-10', '2005-06-12'), + ('2004?-06-~11', ('2004-06-11', '2003-06-10', '2005-06-12')), # Year uncertain and month is both uncertain and approximate - NEW SPEC - ('?2004-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + ('?2004-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), # This has the same meaning as the previous example.- NEW SPEC - ('2004?-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + ('2004?-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), # Year uncertain, month and day approximate. - NEW SPEC - ('2004?-~06-~04','2004-06-04', '2003-05-03', '2005-07-05'), - # what about that? - #('2004?-06-04~','2004-06-04', '2003-05-03', '2005-07-05'), + ('2004?-~06-~04', ('2004-06-04', '2003-05-03', '2005-07-05')), # Year known, month and day approximate. - NEW SPEC - ('2011-~06-~04', '2011-06-04', '2011-05-03', '2011-07-05'), - # Approximate season (around Autumn 2011) - #('2011-23~', '2011-09-01', '2011-11-30', '2011-06-09', '2012-02-22'), - # Years wrapping - #('2011-24~', '2011-12-01', '2011-12-31', '2011-09-08', '2012-03-24'), + ('2011-~06-~04', ('2011-06-04', '2011-05-03', '2011-07-05')), # Partial unspecified # December 25 sometime during the 1560s - ('156X-12-25', '1560-12-25', '1569-12-25'), + ('156X-12-25', ('1560-12-25', '1569-12-25')), # December 25 sometime during the 1500s - ('15XX-12-25', '1500-12-25', '1599-12-25'), + ('15XX-12-25', ('1500-12-25', '1599-12-25')), # Year and day of month specified, month unspecified - ('1560-XX-25', '1560-01-25', '1560-12-25'), - ('15XX-12-XX', '1500-12-01', '1599-12-31'), + ('1560-XX-25', ('1560-01-25', '1560-12-25')), + ('15XX-12-XX', ('1500-12-01', '1599-12-31')), # Day specified, year and month unspecified - ('XXXX-XX-23', '0000-01-23', '9999-12-23'), + ('XXXX-XX-23', ('0000-01-23', '9999-12-23')), + # One of a Set # One of the years 1667, 1668, 1670, 1671, 1672 - (('[1667,1668, 1670..1672]', '[1667, 1668, 1670..1672]'), '1667-01-01', '1672-12-31'), + ('[1667, 1668, 1670..1672]', ('1667-01-01', '1672-12-31')), # December 3, 1760 or some earlier date - ('[..1760-12-03]', '-20000000-01-01', '1760-12-03'), + ('[..1760-12-03]', ('-inf', '1760-12-03')), # December 1760 or some later month - ('[1760-12..]', '1760-12-01', '20000000-12-31'), + ('[1760-12..]', ('1760-12-01', 'inf')), # January or February of 1760 or December 1760 or some later month - ('[1760-01, 1760-02, 1760-12..]', '1760-01-01', '20000000-12-31'), + # This test is failing due to a code issue: + # TypeError: '>' not supported between instances of 'float' and 'time.struct_time' + ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), #TODO fix in parser_classes # Either the year 1667 or the month December of 1760. - ('[1667, 1760-12]', '1667-01-01', '1760-12-31'), + ('[1667, 1760-12]', ('1667-01-01', '1760-12-31')), # Multiple Dates # All of the years 1667, 1668, 1670, 1671, 1672 - (('{1667,1668, 1670..1672}', '{1667, 1668, 1670..1672}'), '1667-01-01', '1672-12-31'), + ('{1667,1668, 1670..1672}', ('1667-01-01', '1672-12-31')), # The year 1960 and the month December of 1961. - ('{1960, 1961-12}', '1960-01-01', '1961-12-31'), + ('{1960, 1961-12}', ('1960-01-01', '1961-12-31')), + # Masked Precision --> eliminated # A date during the 1960s #('196x', '1960-01-01', '1969-12-31'), # A date during the 1900s #('19xx', '1900-01-01', '1999-12-31'), - # L2 Extended Interval - ('2004-06-~01/2004-06-~20', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), + # L2 Extended Interval + # Interval with fuzzy day endpoints in June 2004 + ('2004-06-~01/2004-06-~20', ('2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21')), # The interval began on an unspecified day in June 2004. - ('2004-06-XX/2004-07-03', '2004-06-01', '2004-07-03'), + ('2004-06-XX/2004-07-03', ('2004-06-01', '2004-07-03')), # Year Requiring More than Four Digits - Exponential Form # the year 170000000 - ('Y17E7', '170000000-01-01', '170000000-12-31'), + ('Y17E7', ('170000000-01-01', '170000000-12-31')), # the year -170000000 - ('Y-17E7', '-170000000-01-01', '-170000000-12-31'), + ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 - # ('Y17101E4S3', '171010000-01-01', '171999999-12-31'), + # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), # L2 Seasons - # Spring southern, 2001 - ('2001-29', '2001-09-01', '2001-11-30'), + # Spring southern hemisphere, 2001 + ('2001-29', ('2001-09-01', '2001-11-30')), # second quarter of 2001 - ('2001-34', '2001-04-01', '2001-06-30'), + ('2001-34', ('2001-04-01', '2001-06-30')), ) BAD_EXAMPLES = ( @@ -218,137 +210,83 @@ '2004-06-(01)~/2004-06-(20)~', # An interval in June 2004 beginning approximately the first and ending approximately the 20th - OLD SPEC ) +def iso_to_struct_time(iso_date): + """ Convert YYYY-mm-dd date strings or infinities to time structs or float infinities. """ + if iso_date == 'inf': + return float('inf') + elif iso_date == '-inf': + return float('-inf') -class TestParsing(unittest.TestCase): - def test_non_parsing(self): - for i in BAD_EXAMPLES: - self.assertRaises(EDTFParseException, parse, i) - - def testInterval(self): - #expression = ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30') - #expression = ('/2006', '1996-01-01', '2006-12-31') - #expression = ('../2006', '0001-01-01', '2006-12-31') - expression = ('../-2006', '-20000000-01-01', '-2006-12-31') - #expression = ('2006/', '2006-01-01', '9999-12-31') - i = expression[0] - expected_lower_strict = expression[1] - expected_upper_strict = expression[2] - - def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings to time structs """ - if iso_date[0] == '-': - is_negative = True - iso_date = iso_date[1:] - else: - is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] - if is_negative: - y *= -1 - return struct_time( - [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - - # Convert string date representations into `struct_time`s - expected_lower_strict = iso_to_struct_time(expected_lower_strict) - expected_upper_strict = iso_to_struct_time(expected_upper_strict) - - f = parse(i) - print(str(f.lower_strict()) + '/' + str(f.upper_strict())) - self.assertEqual(f.lower_strict(), expected_lower_strict) - self.assertEqual(f.upper_strict(), expected_upper_strict) - - - def test_date_values(self): - """ - Test that everY EDTFObject can tell you its lower and upper - fuzzy and strict dates, and that they're what we think they should be. - """ - - for e in EXAMPLES: - i = e[0] - if isinstance(i, tuple): - i, o = i - else: - o = i - - sys.stdout.write("parsing '%s'" % i) - f = parse(i) - sys.stdout.write(" => %s()\n" % type(f).__name__) - self.assertIsInstance(f, EDTFObject) - self.assertEqual(str(f), o) + if iso_date[0] == '-': + is_negative = True + iso_date = iso_date[1:] + else: + is_negative = False + y, mo, d = [int(i) for i in iso_date.split('-')] + if is_negative: + y *= -1 + return struct_time([y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - if len(e) == 5: - expected_lower_strict = e[1] - expected_upper_strict = e[2] - expected_lower_fuzzy = e[3] - expected_upper_fuzzy = e[4] - elif len(e) == 4: - expected_lower_strict = e[1] - expected_upper_strict = e[1] - expected_lower_fuzzy = e[2] - expected_upper_fuzzy = e[3] - elif len(e) == 3: - expected_lower_strict = e[1] - expected_upper_strict = e[2] - expected_lower_fuzzy = e[1] - expected_upper_fuzzy = e[2] - elif len(e) == 2: - expected_lower_strict = e[1] - expected_upper_strict = e[1] - expected_lower_fuzzy = e[1] - expected_upper_fuzzy = e[1] - if len(e) == 1: - continue - def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings to time structs """ - if iso_date[0] == '-': - is_negative = True - iso_date = iso_date[1:] - else: - is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] - if is_negative: - y *= -1 - return struct_time( - [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) +@pytest.mark.parametrize("test_input,expected_tuple", EXAMPLES) +def test_edtf_examples(test_input, expected_tuple): + """ Test parsing of EDTF strings with expected outputs. """ + result = parse(test_input) + assert isinstance(result, EDTFObject), "Result should be an instance of EDTFObject" - # Convert string date representations into `struct_time`s - expected_lower_strict = iso_to_struct_time(expected_lower_strict) - expected_upper_strict = iso_to_struct_time(expected_upper_strict) - expected_lower_fuzzy = iso_to_struct_time(expected_lower_fuzzy) - expected_upper_fuzzy = iso_to_struct_time(expected_upper_fuzzy) + # Extract only the date part if the result includes a time. + result_date = str(result) + if 'T' in result_date: + result_date = result_date.split('T')[0] - try: - self.assertEqual(f.lower_strict(), expected_lower_strict) - self.assertEqual(f.upper_strict(), expected_upper_strict) - self.assertEqual(f.lower_fuzzy(), expected_lower_fuzzy) - self.assertEqual(f.upper_fuzzy(), expected_upper_fuzzy) - except Exception as x: - # Write to stdout for manual debugging, I guess - sys.stdout.write(str(x)) - # Re-raise exception so unit tests work for non-manual usage - raise + # Unpack expected results based on their count + if len(expected_tuple) == 1: + assert result_date == expected_tuple[0], f"Expected {expected_tuple[0]}, got {result_date}" + elif len(expected_tuple) == 2: + lower_strict = iso_to_struct_time(expected_tuple[0]) + upper_strict = iso_to_struct_time(expected_tuple[1]) + assert result.lower_strict() == lower_strict, "Lower strict date does not match" + assert result.upper_strict() == upper_strict, "Upper strict date does not match" + elif len(expected_tuple) == 3: + strict_date = iso_to_struct_time(expected_tuple[0]) + lower_fuzzy = iso_to_struct_time(expected_tuple[1]) + upper_fuzzy = iso_to_struct_time(expected_tuple[2]) + assert result.lower_strict() == strict_date, "Lower strict date does not match" + assert result.upper_strict() == strict_date, "Upper strict date does not match" + assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" + assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" + elif len(expected_tuple) == 4: + lower_strict = iso_to_struct_time(expected_tuple[0]) + upper_strict = iso_to_struct_time(expected_tuple[1]) + lower_fuzzy = iso_to_struct_time(expected_tuple[2]) + upper_fuzzy = iso_to_struct_time(expected_tuple[3]) + assert result.lower_strict() == lower_strict, "Lower strict date does not match" + assert result.upper_strict() == upper_strict, "Upper strict date does not match" + assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" + assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" - def test_comparisons(self): - d1 = parse("1979-08~") - d2 = parse("1979-08~") - d3 = parse("1979-09-16") - d4 = parse("1979-08-16") - d5 = date(1979, 8, 16) - d6 = date(1970, 9, 16) - self.assertEqual(d1, d2) - self.assertNotEqual(d1, d3) - self.assertTrue(d1 >= d2) - self.assertTrue(d2 >= d1) - self.assertTrue(d3 > d1) - self.assertTrue(d1 < d4) +@pytest.mark.parametrize("bad_input", BAD_EXAMPLES) +def test_non_parsing(bad_input): + """ Test that non-parsing inputs correctly raise an exception. """ + with pytest.raises(EDTFParseException): + parse(bad_input) - # with python dates (EDTFFormat must be first operand) - self.assertEqual(d4, d5) - self.assertTrue(d1 < d5) - self.assertTrue(d1 > d6) +def test_comparisons(): + """ Test comparisons between parsed EDTF objects and standard dates. """ + d1 = parse("1979-08~") + d2 = parse("1979-08~") + d3 = parse("1979-09-16") + d4 = parse("1979-08-16") + d5 = date(1979, 8, 16) + d6 = date(1970, 9, 16) -if __name__ == '__main__': - unittest.main() + assert d1 == d2 + assert d1 != d3 + assert d1 >= d2 + assert d3 > d1 + assert d1 < d4 + assert d4 == d5 + assert d1 < d5 + assert d1 > d6 diff --git a/edtf/tests.py b/edtf/tests.py index 0e49e67..f5ef655 100644 --- a/edtf/tests.py +++ b/edtf/tests.py @@ -1,134 +1,84 @@ -import unittest - from time import struct_time from datetime import datetime, date from edtf import convert - -class TestConversions(unittest.TestCase): - - def test_dt_to_struct_time_for_datetime(self): - now = datetime.now() - st = convert.dt_to_struct_time(now) - # Check equal year, month, day, hours, minutes, seconds - self.assertEqual(st[:6], now.timetuple()[:6]) - # Confirm 'extra' fields are set to defaults - self.assertEqual(st[6:], (0, 0, -1)) - - def test_dt_to_struct_time_for_date(self): - today = date.today() - st = convert.dt_to_struct_time(today) - # Check equal year, month, day - self.assertEqual(st[:3], today.timetuple()[:3]) - # Confirm time fields are zeroed - self.assertEqual(st[3:6], (0, 0, 0)) - # Confirm 'extra' fields are set to defaults - self.assertEqual(st[6:], (0, 0, -1)) - - def test_struct_time_to_date(self): - st = struct_time( - [2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) - d = date(*st[:3]) - self.assertEqual(d, convert.struct_time_to_date(st)) - - def test_struct_time_to_datetime(self): - st = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - dt = datetime(*st[:6]) - converted_dt = convert.struct_time_to_datetime(st) - self.assertEqual(dt, converted_dt) - # Note that 'extra' fields are auto-populated by `datetime` module - self.assertEqual(converted_dt.timetuple()[6:], (3, 109, -1)) - - def test_trim_struct_time(self): - now = datetime.now() - st = now.timetuple() - trimmed_st = convert.trim_struct_time(st) - # Confirm trimmed `struct_time` has expected date/time values - self.assertEqual( - trimmed_st[:6], - (now.year, now.month, now.day, now.hour, now.minute, now.second) - ) - # Confirm 'extra' fields are set to defaults - self.assertEqual(trimmed_st[6:], (0, 0, -1)) - # Confirm 'extra' fields in untrimmed `struct_time` has real values - self.assertNotEqual(st[6:], (0, 0, -1)) - - def test_struct_time_to_jd(self): - # Check conversion of AD date & time to Julian Date number - st_ad = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - jd_ad = 2458227.9263194446 - self.assertEqual(jd_ad, convert.struct_time_to_jd(st_ad)) - # Check conversion of BC date & time to Julian Date number - st_bc = struct_time( - [-2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - jd_bc = 984091.9263194444 - self.assertEqual(jd_bc, convert.struct_time_to_jd(st_bc)) - - def test_jd_to_struct_time(self): - # Check conversion of Julian Date number to AD date & time - jd_ad = 2458227.9263194446 # As in `test_struct_time_to_jd` - st_ad = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - self.assertEqual(st_ad, convert.jd_to_struct_time(jd_ad)) - # Check conversion of Julian Date number to BC date & time - # WARNING: Converted time is off by 1 second, 53 not 54 - jd_bc = 984091.9263194444 # As in `test_struct_time_to_jd` - st_bc = struct_time( - [-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) - self.assertEqual(st_bc, convert.jd_to_struct_time(jd_bc)) - - def test_jd_round_trip_for_extreme_future(self): - original_st = struct_time( - [999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) - jd = convert.struct_time_to_jd(original_st) - converted_st = convert.jd_to_struct_time(jd) - # Confirm that year, month, day, hour, minute are correct (not second) - self.assertEqual(original_st[:5], converted_st[:5]) - # WARNING: Seconds are off by 1, should be 3 but is 2 - self.assertEqual(3 - 1, converted_st[5]) - - def test_jd_round_trip_for_extreme_past(self): - original_st = struct_time( - [-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - # WARNING: We have lost a year of accuracy - self.assertEqual( - (-999999 + 1, # Year off by 1 - 8, 4, 21, 15, 3, 0, 0, -1), - tuple(converted_st)) - - def test_jd_round_trip_for_zero_year_aka_1_bc(self): - original_st = struct_time( - [0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - self.assertEqual( - (0, 9, 5, 4, 58, 59, 0, 0, -1), - tuple(converted_st)) - - def test_jd_round_trip_for_2_bc(self): - original_st = struct_time( - [-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - self.assertEqual( - (-1, 12, 5, 4, 58, 59, 0, 0, -1), - tuple(converted_st)) - - def test_roll_negative_time_fields(self): - # Confirm time value is adjusted as expected - year = -100 - month = -17 # More than 1 year - day = -34 # More than 1 month - hour = -25 # More than 1 day - minute = -74 # More than 1 hour - second = -253 # More than 1 minute - self.assertEqual( - (-102, 5, 24, 21, 41, 47), - convert._roll_negative_time_fields( - year, month, day, hour, minute, second) - ) +def test_dt_to_struct_time_for_datetime(): + now = datetime.now() + st = convert.dt_to_struct_time(now) + assert st[:6] == now.timetuple()[:6] + assert st[6:] == (0, 0, -1) + +def test_dt_to_struct_time_for_date(): + today = date.today() + st = convert.dt_to_struct_time(today) + assert st[:3] == today.timetuple()[:3] + assert st[3:6] == (0, 0, 0) + assert st[6:] == (0, 0, -1) + +def test_struct_time_to_date(): + st = struct_time([2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) + d = date(*st[:3]) + assert d == convert.struct_time_to_date(st) + +def test_struct_time_to_datetime(): + st = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + dt = datetime(*st[:6]) + converted_dt = convert.struct_time_to_datetime(st) + assert dt == converted_dt + assert converted_dt.timetuple()[6:] == (3, 109, -1) + +def test_trim_struct_time(): + now = datetime.now() + st = now.timetuple() + trimmed_st = convert.trim_struct_time(st) + assert trimmed_st[:6] == (now.year, now.month, now.day, now.hour, now.minute, now.second) + assert trimmed_st[6:] == (0, 0, -1) + assert st[6:] != (0, 0, -1) + +def test_struct_time_to_jd(): + st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + jd_ad = 2458227.9263194446 + assert jd_ad == convert.struct_time_to_jd(st_ad) + st_bc = struct_time([-2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + jd_bc = 984091.9263194444 + assert jd_bc == convert.struct_time_to_jd(st_bc) + +def test_jd_to_struct_time(): + jd_ad = 2458227.9263194446 + st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + assert st_ad == convert.jd_to_struct_time(jd_ad) + jd_bc = 984091.9263194444 + st_bc = struct_time([-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) + assert st_bc == convert.jd_to_struct_time(jd_bc) + +def test_jd_round_trip_for_extreme_future(): + original_st = struct_time([999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) + jd = convert.struct_time_to_jd(original_st) + converted_st = convert.jd_to_struct_time(jd) + assert original_st[:5] == converted_st[:5] + assert 3 - 1 == converted_st[5] + +def test_jd_round_trip_for_extreme_past(): + original_st = struct_time([-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (-999999 + 1, 8, 4, 21, 15, 3, 0, 0, -1) == tuple(converted_st) + +def test_jd_round_trip_for_zero_year_aka_1_bc(): + original_st = struct_time([0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (0, 9, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + +def test_jd_round_trip_for_2_bc(): + original_st = struct_time([-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (-1, 12, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + +def test_roll_negative_time_fields(): + year = -100 + month = -17 + day = -34 + hour = -25 + minute = -74 + second = -253 + assert (-102, 5, 24, 21, 41, 47) == convert._roll_negative_time_fields(year, month, day, hour, minute, second) From cf341bbb25785eca9033a592a039fc7149212340 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 8 May 2024 09:07:26 -0400 Subject: [PATCH 028/102] Fix infinite comparison for OneOfASet The max and min functions now use a generator expression to filter out infinite values unless they are directly relevant to the calculation; if inf or -inf are found, they are returned instead of doing a comparison. --- edtf/parser/parser_classes.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 2d6c0bf..3b5ac6e 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -713,10 +713,18 @@ def __str__(self): return "[%s]" % (", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): + strict_dates = [x._strict_date(lean) for x in self.objects] + # Accounting for possible 'inf' and '-inf' values if lean == LATEST: - return max([x._strict_date(lean) for x in self.objects]) + if any(isinstance(d, float) and d == float('inf') for d in strict_dates): + return float('inf') + else: + return max((d for d in strict_dates if not isinstance(d, float)), default=float('inf')) else: - return min([x._strict_date(lean) for x in self.objects]) + if any(isinstance(d, float) and d == float('-inf') for d in strict_dates): + return float('-inf') + else: + return min((d for d in strict_dates if not isinstance(d, float)), default=float('-inf')) class MultipleDates(EDTFObject): From 46b7b8f2be84b7862222db221d704b2c8c4daca2 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 8 May 2024 12:33:59 -0400 Subject: [PATCH 029/102] Upgrade actions/setup-python Use v5, which uses node v20 rather than deprecated v16 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34cbabc..670183a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: 'pip' From 0cc4bdf4f5de33921a56d6bafe6d4e3c456ed84f Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 8 May 2024 12:35:59 -0400 Subject: [PATCH 030/102] Remove context from from_db_value() In Django 3.0, "support for the context argument of Field.from_db_value() and Expression.convert_value() is removed": https://github.com/django/django/blob/91a4b9a8ec2237434f06866f39c7977e889aeae6/docs/releases/3.0.txt#L641-L642 --- edtf/fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edtf/fields.py b/edtf/fields.py index 52b9171..b6f0843 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -53,7 +53,7 @@ def deconstruct(self): del kwargs["max_length"] return name, path, args, kwargs - def from_db_value(self, value, expression, connection, context=None): + def from_db_value(self, value, expression, connection): # Converting values to Python objects if not value: return None From dfd32927ad9746df2d2a9d2327af748d1eac1df5 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 9 May 2024 14:01:19 -0400 Subject: [PATCH 031/102] Allow for setting EDTF directly in Django field Previously, the Django field could directly take an EDTF string, only a natural language string that was then parsed and turned into EDTF. --- edtf/fields.py | 87 +++++++++++++++++++++++++++++--------------------- pyproject.toml | 4 +++ 2 files changed, 55 insertions(+), 36 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index b6f0843..bbccbcf 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -24,6 +24,7 @@ def __init__( self, verbose_name=None, name=None, natural_text_field=None, + direct_input_field=None, lower_strict_field=None, upper_strict_field=None, lower_fuzzy_field=None, @@ -31,13 +32,14 @@ def __init__( **kwargs ): kwargs['max_length'] = 2000 - self.natural_text_field, self.lower_strict_field, \ - self.upper_strict_field, self.lower_fuzzy_field, \ - self.upper_fuzzy_field = natural_text_field, lower_strict_field, \ - upper_strict_field, lower_fuzzy_field, upper_fuzzy_field + self.natural_text_field, self.direct_input_field, \ + self.lower_strict_field, self.upper_strict_field, \ + self.lower_fuzzy_field, self.upper_fuzzy_field = \ + natural_text_field, direct_input_field, lower_strict_field, \ + upper_strict_field, lower_fuzzy_field, upper_fuzzy_field super(EDTFField, self).__init__(verbose_name, name, **kwargs) - description = "An field for storing complex/fuzzy date specifications in EDTF format." + description = "A field for storing complex/fuzzy date specifications in EDTF format." def deconstruct(self): name, path, args, kwargs = super(EDTFField, self).deconstruct() @@ -54,14 +56,16 @@ def deconstruct(self): return name, path, args, kwargs def from_db_value(self, value, expression, connection): - # Converting values to Python objects - if not value: - return None + # Converting values from the database to Python objects + if value is None: + return value + try: - return pickle.loads(str(value)) - except: - pass - return parse_edtf(value, fail_silently=True) + # Try to unpickle if the value was pickled + return pickle.loads(value) + except (pickle.PickleError, TypeError): + # If it fails because it's not pickled data, try parsing as EDTF + return parse_edtf(value, fail_silently=True) def to_python(self, value): if isinstance(value, EDTFObject): @@ -86,35 +90,46 @@ def get_prep_value(self, value): def pre_save(self, instance, add): """ - Updates the edtf value from the value of the display_field. - If there's a valid edtf, then set the date values. + Updates the EDTF value from either the natural_text_field, which is parsed + with text_to_edtf() and is used for display, or falling back to the direct_input_field, + which allows directly providing an EDTF string. If one of these provides a valid EDTF object, + then set the date values accordingly. """ - if not self.natural_text_field or self.attname not in instance.__dict__: - return - - edtf = getattr(instance, self.attname) - - # Update EDTF field based on latest natural text value, if any - natural_text = getattr(instance, self.natural_text_field) - if natural_text: - edtf = text_to_edtf(natural_text) + + # Get existing value to determine if update is needed + existing_value = getattr(instance, self.attname, None) + direct_input = getattr(instance, self.direct_input_field, None) + natural_text = getattr(instance, self.natural_text_field, None) + + # if direct_input is provided and is different from the existing value, update the EDTF field + if direct_input and (existing_value is None or str(existing_value) != direct_input): + edtf = parse_edtf(direct_input, fail_silently=True) # ParseException if invalid; should this be raised? + # TODO pyparsing.ParseExceptions are very noisy and dumps the whole grammar (see https://github.com/ixc/python-edtf/issues/46) + + # set the natural_text (display) field to the direct_input if it is not provided + if natural_text is None: + setattr(instance, self.natural_text_field, direct_input) + + elif natural_text: + edtf_string = text_to_edtf(natural_text) + if edtf_string and (existing_value is None or str(existing_value) != edtf_string): + edtf = parse_edtf(edtf_string, fail_silently=True) # potetial ParseException if invalid; should this be raised? + else: + edtf = existing_value else: - edtf = None - - # TODO If `natural_text_field` becomes cleared the derived EDTF field - # value should also be cleared, rather than left at original value? + if not existing_value: + # No inputs provided and no existing value; TODO log this? + return + # TODO: if both direct_input and natural_text are cleared, should we throw an error? + edtf = existing_value - # TODO Handle case where EDTF field is set to a string directly, not - # via `natural_text_field` (this is a slightly unexpected use-case, but - # is a very efficient way to set EDTF values in situations like for API - # imports so we probably want to continue to support it?) - if edtf and not isinstance(edtf, EDTFObject): - edtf = parse_edtf(edtf, fail_silently=True) + # Update the actual EDTF field in the model if there is a change + if edtf != existing_value: + setattr(instance, self.attname, edtf) - setattr(instance, self.attname, edtf) - # set or clear related date fields on the instance + # Process and update related date fields based on the EDTF object for attr in DATE_ATTRS: - field_attr = "%s_field" % attr + field_attr = f"{attr}_field" g = getattr(self, field_attr, None) if g: if edtf: diff --git a/pyproject.toml b/pyproject.toml index 444298e..4ee273d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,9 @@ changelog = "https://github.com/ixc/python-edtf/blob/main/changelog.rst" requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" +[tool.setuptools] +packages.find = { where = ["."], exclude = ["edtf_django_tests", "edtf_django_tests.*"] } + [tool.wheel] universal = false @@ -72,3 +75,4 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] +addopts = "--ignore=edtf_django_tests/" From 4f0604099d741bd8f385ee5ec2b7b991b11b7f35 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 9 May 2024 14:05:22 -0400 Subject: [PATCH 032/102] Set up Django test project Django 4 test project for now --- .../edtf_django_tests/__init__.py | 0 edtf_django_tests/edtf_django_tests/asgi.py | 16 +++ .../edtf_django_tests/settings.py | 124 ++++++++++++++++++ edtf_django_tests/edtf_django_tests/urls.py | 22 ++++ edtf_django_tests/edtf_django_tests/wsgi.py | 16 +++ .../edtf_integration/__init__.py | 0 edtf_django_tests/edtf_integration/admin.py | 3 + edtf_django_tests/edtf_integration/apps.py | 6 + .../edtf_integration/migrations/__init__.py | 0 edtf_django_tests/edtf_integration/views.py | 3 + edtf_django_tests/manage.py | 22 ++++ pyproject.toml | 2 +- 12 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 edtf_django_tests/edtf_django_tests/__init__.py create mode 100644 edtf_django_tests/edtf_django_tests/asgi.py create mode 100644 edtf_django_tests/edtf_django_tests/settings.py create mode 100644 edtf_django_tests/edtf_django_tests/urls.py create mode 100644 edtf_django_tests/edtf_django_tests/wsgi.py create mode 100644 edtf_django_tests/edtf_integration/__init__.py create mode 100644 edtf_django_tests/edtf_integration/admin.py create mode 100644 edtf_django_tests/edtf_integration/apps.py create mode 100644 edtf_django_tests/edtf_integration/migrations/__init__.py create mode 100644 edtf_django_tests/edtf_integration/views.py create mode 100755 edtf_django_tests/manage.py diff --git a/edtf_django_tests/edtf_django_tests/__init__.py b/edtf_django_tests/edtf_django_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/edtf_django_tests/edtf_django_tests/asgi.py b/edtf_django_tests/edtf_django_tests/asgi.py new file mode 100644 index 0000000..b62c5f5 --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for edtf_django_tests project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edtf_django_tests.settings") + +application = get_asgi_application() diff --git a/edtf_django_tests/edtf_django_tests/settings.py b/edtf_django_tests/edtf_django_tests/settings.py new file mode 100644 index 0000000..a8121e3 --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/settings.py @@ -0,0 +1,124 @@ +""" +Django settings for edtf_django_tests project. + +Generated by 'django-admin startproject' using Django 4.2.7. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/4.2/ref/settings/ +""" + +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = "django-insecure-zkd&%e=di9d(p@wq7vnstn+4dx7cxbxkve�*+57sks0q$=0a" + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = [] + + +# Application definition + +INSTALLED_APPS = [ + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + "edtf_integration", +] + +MIDDLEWARE = [ + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", +] + +ROOT_URLCONF = "edtf_django_tests.urls" + +TEMPLATES = [ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ], + }, + }, +] + +WSGI_APPLICATION = "edtf_django_tests.wsgi.application" + + +# Database +# https://docs.djangoproject.com/en/4.2/ref/settings/#databases + +DATABASES = { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": BASE_DIR / "db.sqlite3", + } +} + + +# Password validation +# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/4.2/topics/i18n/ + +LANGUAGE_CODE = "en-us" + +TIME_ZONE = "UTC" + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/4.2/howto/static-files/ + +STATIC_URL = "static/" + +# Default primary key field type +# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" diff --git a/edtf_django_tests/edtf_django_tests/urls.py b/edtf_django_tests/edtf_django_tests/urls.py new file mode 100644 index 0000000..ceca78b --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/urls.py @@ -0,0 +1,22 @@ +""" +URL configuration for edtf_django_tests project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/4.2/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" +from django.contrib import admin +from django.urls import path + +urlpatterns = [ + path("admin/", admin.site.urls), +] diff --git a/edtf_django_tests/edtf_django_tests/wsgi.py b/edtf_django_tests/edtf_django_tests/wsgi.py new file mode 100644 index 0000000..20450c1 --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for edtf_django_tests project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edtf_django_tests.settings") + +application = get_wsgi_application() diff --git a/edtf_django_tests/edtf_integration/__init__.py b/edtf_django_tests/edtf_integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/edtf_django_tests/edtf_integration/admin.py b/edtf_django_tests/edtf_integration/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/edtf_django_tests/edtf_integration/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/edtf_django_tests/edtf_integration/apps.py b/edtf_django_tests/edtf_integration/apps.py new file mode 100644 index 0000000..23bc09d --- /dev/null +++ b/edtf_django_tests/edtf_integration/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class EdtfIntegrationConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "edtf_integration" diff --git a/edtf_django_tests/edtf_integration/migrations/__init__.py b/edtf_django_tests/edtf_integration/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/edtf_django_tests/edtf_integration/views.py b/edtf_django_tests/edtf_integration/views.py new file mode 100644 index 0000000..91ea44a --- /dev/null +++ b/edtf_django_tests/edtf_integration/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/edtf_django_tests/manage.py b/edtf_django_tests/manage.py new file mode 100755 index 0000000..b2d2a20 --- /dev/null +++ b/edtf_django_tests/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edtf_django_tests.settings") + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 4ee273d..0b7a0ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ classifiers = [ [project.optional-dependencies] test = [ - "django", + "django>=4.2,<5.0", "pytest" ] From 25ee5074e3157076f345ed84c56604cd79250dcc Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 9 May 2024 14:14:03 -0400 Subject: [PATCH 033/102] Add TestEvent model implementing EDTFField Ignore SQLlite local database --- .gitignore | 1 + .../migrations/0001_initial.py | 64 +++++++++++++++++++ edtf_django_tests/edtf_integration/models.py | 45 +++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 edtf_django_tests/edtf_integration/migrations/0001_initial.py create mode 100644 edtf_django_tests/edtf_integration/models.py diff --git a/.gitignore b/.gitignore index ab3165a..7c23190 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml # Django stuff: *.log +db.sqlite3 # Sphinx documentation docs/_build/ diff --git a/edtf_django_tests/edtf_integration/migrations/0001_initial.py b/edtf_django_tests/edtf_integration/migrations/0001_initial.py new file mode 100644 index 0000000..286a9de --- /dev/null +++ b/edtf_django_tests/edtf_integration/migrations/0001_initial.py @@ -0,0 +1,64 @@ +# Generated by Django 4.2.13 on 2024-05-09 18:13 + +from django.db import migrations, models +import edtf.fields + + +class Migration(migrations.Migration): + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="TestEvent", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "date_display", + models.CharField( + blank=True, + help_text="Enter the date in natural language format (e.g., 'Approximately June 2004').", + max_length=255, + null=True, + verbose_name="Date of creation (display)", + ), + ), + ( + "date_edtf_direct", + models.CharField( + blank=True, + help_text="Enter the date in EDTF format (e.g., '2004-06~').", + max_length=255, + null=True, + verbose_name="Date of creation (EDTF format)", + ), + ), + ( + "date_edtf", + edtf.fields.EDTFField( + blank=True, + lower_fuzzy_field="date_earliest", + lower_strict_field="date_sort_ascending", + natural_text_field="date_display", + null=True, + upper_fuzzy_field="date_latest", + upper_strict_field="date_sort_descending", + verbose_name="Date of creation (EDTF)", + ), + ), + ("date_earliest", models.FloatField(blank=True, null=True)), + ("date_latest", models.FloatField(blank=True, null=True)), + ("date_sort_ascending", models.FloatField(blank=True, null=True)), + ("date_sort_descending", models.FloatField(blank=True, null=True)), + ], + ), + ] diff --git a/edtf_django_tests/edtf_integration/models.py b/edtf_django_tests/edtf_integration/models.py new file mode 100644 index 0000000..0274d5f --- /dev/null +++ b/edtf_django_tests/edtf_integration/models.py @@ -0,0 +1,45 @@ +from django.db import models +from edtf.fields import EDTFField + + +class TestEvent(models.Model): + date_display = models.CharField( + "Date of creation (display)", + blank=True, + null=True, + max_length=255, + help_text="Enter the date in natural language format (e.g., 'Approximately June 2004')." + ) + + date_edtf_direct = models.CharField( + "Date of creation (EDTF format)", + max_length=255, + blank=True, + null=True, + help_text="Enter the date in EDTF format (e.g., '2004-06~')." + ) + + # EDTF field that parses the input from either natural language or direct EDTF string + # natural_text_field is the field that stores the natural language input and is used for display + # direct_input_field stores an EDTF string + # TODO is there a need for both a natural text input and a label? + # TODO could consolidate the direct_input_field and natural_text_field into a single field, but would need + # a flag to indicate whether the input is natural language or EDTF as the natural language parser sometimes + # misparses an EDTF string as a natural language string (e.g. `2020-03-15/2020-04-15` -> `2020-03-15`) + date_edtf = EDTFField( + "Date of creation (EDTF)", + natural_text_field='date_display', + direct_input_field='date_edtf_direct', + lower_fuzzy_field='date_earliest', + upper_fuzzy_field='date_latest', + lower_strict_field='date_sort_ascending', + upper_strict_field='date_sort_descending', + blank=True, + null=True, + ) + # Computed fields for filtering + date_earliest = models.FloatField(blank=True, null=True) + date_latest = models.FloatField(blank=True, null=True) + # Computed fields for sorting + date_sort_ascending = models.FloatField(blank=True, null=True) + date_sort_descending = models.FloatField(blank=True, null=True) From fbf4262b35f0760c7fb8944d906092d54fea0c15 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 9 May 2024 14:18:41 -0400 Subject: [PATCH 034/102] Add Django integration tests This basic Django app shows how a user could create a model using the EDTFField and store data in it. The integration tests check that the EDTFField and associated fields (date_edtf_direct and date_display, in this case) work correctly. There is a weird issue in test_date_display() where if we use an instance variable (self.event1, self.event2) the event.date_display property is available, but if we retrieve the object from the database it is not. I tried using TestEvent.objects.create() as well as the current method (make and then save an instance to no effect). CI is set up to run the Django integration tests after Pytest. We could move to using pytest/django-pytest for these tests as well --- .github/workflows/ci.yml | 5 ++ edtf_django_tests/edtf_integration/tests.py | 96 +++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 edtf_django_tests/edtf_integration/tests.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34cbabc..627dd03 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,3 +32,8 @@ jobs: - name: Run unit tests run: | pytest + + - name: Run Django integration tests + working-directory: ./edtf_django_tests + run: | + python manage.py test edtf_integration diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py new file mode 100644 index 0000000..de54d64 --- /dev/null +++ b/edtf_django_tests/edtf_integration/tests.py @@ -0,0 +1,96 @@ +from django.test import TestCase +from .models import TestEvent +from edtf.parser.grammar import parse_edtf as parse +from edtf.parser import EDTFObject +from edtf.convert import struct_time_to_jd + +class TestEventModelTests(TestCase): + def setUp(self): + # Create instances and assign them to instance variables + # date_edtf_direct is a valid EDTF string, date_display is a date + # to be parsed from natural language + self.event1 = TestEvent(date_edtf_direct="2020-03-15/2020-04-15") + self.event2 = TestEvent(date_edtf_direct="2021-05-06") + self.event3 = TestEvent(date_edtf_direct="2019-11") + self.event4 = TestEvent(date_display="Approximately August 2018") + self.event5 = TestEvent(date_edtf_direct="2021-05-06") + self.event1.save() + self.event2.save() + self.event3.save() + self.event4.save() + self.event5.save() + + + def test_edtf_object_returned(self): + for event in TestEvent.objects.all(): + self.assertIsInstance(event.date_edtf, EDTFObject) + + + def test_sorting(self): + events = list(TestEvent.objects.order_by('date_sort_ascending')) + self.assertEqual(events[0].date_display, "Approximately August 2018") + self.assertEqual(events[1].date_edtf_direct, "2019-11") + self.assertEqual(events[2].date_edtf_direct, "2020-03-15/2020-04-15") + self.assertEqual(events[3].date_edtf_direct, "2021-05-06") + self.assertEqual(events[4].date_edtf_direct, "2021-05-06") + + events_desc = list(TestEvent.objects.order_by('-date_sort_descending')) + self.assertEqual(events_desc[0].date_edtf_direct, "2021-05-06") + self.assertEqual(events_desc[1].date_edtf_direct, "2021-05-06") + self.assertEqual(events_desc[2].date_edtf_direct, "2020-03-15/2020-04-15") + self.assertEqual(events_desc[3].date_edtf_direct, "2019-11") + self.assertEqual(events_desc[4].date_display, "Approximately August 2018") + + + def test_date_boundaries(self): + event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") + expected_earliest_jd = struct_time_to_jd(parse("2020-03-15").lower_strict()) + expected_latest_jd = struct_time_to_jd(parse("2020-04-15").upper_strict()) + self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + + event = self.event2 + expected_earliest_jd = struct_time_to_jd(parse("2021-05-06").lower_strict()) + expected_latest_jd = struct_time_to_jd(parse("2021-05-06").upper_strict()) + self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + + event = TestEvent.objects.get(date_edtf_direct="2019-11") + expected_earliest_jd = struct_time_to_jd(parse("2019-11").lower_strict()) + expected_latest_jd = struct_time_to_jd(parse("2019-11").upper_strict()) + self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + + event = TestEvent.objects.get(date_display="Approximately August 2018") + expected_earliest_jd = struct_time_to_jd(parse("2018-08~").lower_fuzzy()) + expected_latest_jd = struct_time_to_jd(parse("2018-08~").upper_fuzzy()) + self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + + def test_date_display(self): + """ + Test that the date_display field is correctly populated based on the EDTF input. + In the future, a more sophisticated natural language parser could be used to generate + a human readable date from the EDTF input. + """ + # why does this fail?? + # event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") + # self.assertEqual(event.date_display, "2020-03-15/2020-04-15") + + self.assertEqual(self.event1.date_display, "2020-03-15/2020-04-15") + self.assertEqual(self.event2.date_display, "2021-05-06") + self.assertEqual(self.event3.date_display, "2019-11") + self.assertEqual(self.event4.date_display, "Approximately August 2018") + + def test_comparison(self): + # test equality of the same dates + self.assertEqual(self.event2.date_edtf, self.event5.date_edtf, "Events with the same date should be equal") + + # test inequality of different dates + self.assertNotEqual(self.event1.date_edtf, self.event2.date_edtf, "Events with different dates should not be equal") + + # greater than + self.assertGreater(self.event2.date_edtf, self.event3.date_edtf, "2021-05-06 is greater than 2019-11") + + # less than + self.assertLess(self.event3.date_edtf, self.event2.date_edtf, "2019-11 is less than 2021-05-06") \ No newline at end of file From ee4b21e00f884ee8adea122128e89018da8d6491 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 9 May 2024 18:39:28 -0400 Subject: [PATCH 035/102] Update the natural language parser to work with 2018 spec In the parser: - Update regular expressions for SHORT_YEAR_RE and LONG_YEAR_RE to use X instead of x and u and Y instead of y - Replaced`unknown` with null as per the 2018 spec. It does not look like python-edtf currently has open intervals (`open` before, `..` now)? - Replaced `?~` with `%` In the tests: - eliminate masked precision - no u/x just X for unknown regardless of why the data is missing - replace unknown with null - replace ~? with % --- edtf/natlang/en.py | 34 +++++----- edtf/natlang/tests.py | 143 +++++++++++++++++++----------------------- 2 files changed, 84 insertions(+), 93 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index ff83034..213d17f 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -12,8 +12,8 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = r'(-?)([\du])([\dxu])([\dxu])([\dxu])' -LONG_YEAR_RE = r'y(-?)([1-9]\d\d\d\d+)' +SHORT_YEAR_RE = r'(-?)([\dX])([\dX])([\dX])([\dX])' +LONG_YEAR_RE = r'Y(-?)([1-9]\d\d\d\d+)' CENTURY_RE = r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?' CE_RE = r'(\d{1,4}) (ad|ce|bc|bce)' @@ -29,7 +29,7 @@ def text_to_edtf(text): Generate EDTF string equivalent of a given natural language date string. """ if not text: - return + return None t = text.lower() @@ -95,9 +95,9 @@ def text_to_edtf(text): is_after = is_after or re.findall(r'\blater\b', t) if is_before: - result = u"unknown/%s" % result + result = f"/{result}" # unknown is replaced with null for intervals elif is_after: - result = u"%s/unknown" % result + result = f"{result}/" # unknown is replaced with null for intervals return result @@ -151,7 +151,7 @@ def text_to_edtf_date(text): # detect CE/BCE year form is_ce = re.findall(CE_RE, t) if is_century: - result = "%02dxx" % (int(is_century[0][0]) - 1,) + result = "%02dXX" % (int(is_century[0][0]) - 1,) is_approximate = is_approximate or \ re.findall(r'\b(ca?\.?) ?' + CENTURY_RE, t) is_uncertain = is_uncertain or re.findall(CENTURY_RE + r'\?', t) @@ -222,12 +222,12 @@ def text_to_edtf_date(text): # a century or a decade. if i == 2 and could_be_century and \ not (is_approximate or is_uncertain): - result += 'x' + result += 'X' elif i == 3 and is_decade > 0: if mentions_year: - result += 'u' # year precision + result += 'X' # year precision else: - result += 'x' # decade precision + result += 'X' # decade precision elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default @@ -235,12 +235,12 @@ def text_to_edtf_date(text): else: # different values were produced, meaning that it's likely # a default. Use 'unspecified' - result += "u" + result += 'X' # strip off unknown chars from end of string - except the first 4 for i in reversed(xrange(len(result))): - if result[i] not in ('u', 'x', '-'): + if result[i] not in ('X', '-'): smallest_length = 4 if mentions_month: @@ -264,11 +264,13 @@ def text_to_edtf_date(text): # end dateutil post-parsing - if is_uncertain: - result += "?" - - if is_approximate: - result += "~" + if is_uncertain and is_approximate: + result += "%" + else: + if is_uncertain: + result += "?" + if is_approximate: + result += "~" # weed out bad parses if result.startswith("uu-uu"): diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index eaa9af6..5bfb052 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -6,8 +6,8 @@ # where examples are tuples, the second item is the normalised output @pytest.mark.parametrize("input_text,expected_output", [ # Ignoring 'late' for simplicity in these examples - ('active late 17th-19th centuries', '16xx/18xx'), - ('active 17-19th Centuries', '16xx/18xx'), + ('active late 17th-19th centuries', '16XX/18XX'), + ('active 17-19th Centuries', '16XX/18XX'), # Unrecognised values ('', None), @@ -52,26 +52,26 @@ ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word ('attic. 1802', '1802'), # Avoid false positive 'circa' - # Masked precision - ('1860s', '186x'), # 186x has decade precision, 186u has year precision. + # # Masked precision + # ('1860s', '186x'), # 186x has decade precision, 186u has year precision. - # Masked precision + uncertainty - ('ca. 1860s', '186x~'), - ('c. 1860s', '186x~'), - ('Circa 1840s', '184x~'), - ('circa 1840s', '184x~'), - ('ca. 1860s?', '186x?~'), - ('uncertain: approx 1862', '1862?~'), + # # Masked precision + uncertainty + # ('ca. 1860s', '186x~'), + # ('c. 1860s', '186x~'), + # ('Circa 1840s', '184x~'), + # ('circa 1840s', '184x~'), + # ('ca. 1860s?', '186x?~'), + # ('uncertain: approx 1862', '1862?~'), - # Ambiguous masked precision for centuries and decades - ('1800s', '18xx'), # Without additional uncertainty, use the century - ('2000s', '20xx'), # Without additional uncertainty, use the century - ('c1900s', '190x~'), # If there's additional uncertainty, use the decade - ('c1800s?', '180x?~'), # If there's additional uncertainty, use the decade + # # Ambiguous masked precision for centuries and decades + ('1800s', '18XX'), # Without additional uncertainty, use the century + ('2000s', '20XX'), # Without additional uncertainty, use the century + ('c1900s', '190X~'), # If there's additional uncertainty, use the decade + ('c1800s?', '180X%'), # If there's additional uncertainty, use the decade # Unspecified dates - ('January 12', 'uuuu-01-12'), - ('January', 'uuuu-01'), + ('January 12', 'XXXX-01-12'), + ('January', 'XXXX-01'), ('10/7/2008', '2008-10-07'), ('7/2008', '2008-07'), @@ -83,49 +83,50 @@ ('Winter 1872', '1872-24'), # Dates relative to known events (before/after) - ('earlier than 1928', 'unknown/1928'), - ('before 1928', 'unknown/1928'), - ('after 1928', '1928/unknown'), - ('later than 1928', '1928/unknown'), - ('before January 1928', 'unknown/1928-01'), - ('before 18 January 1928', 'unknown/1928-01-18'), + ('earlier than 1928', '/1928'), + ('before 1928', '/1928'), + ('after 1928', '1928/'), + ('later than 1928', '1928/'), + ('before January 1928', '/1928-01'), + ('before 18 January 1928', '/1928-01-18'), # Approximations combined with before/after - ('before approx January 18 1928', 'unknown/1928-01-18~'), - ('before approx January 1928', 'unknown/1928-01~'), - ('after approx January 1928', '1928-01~/unknown'), - ('after approx Summer 1928', '1928-22~/unknown'), + ('before approx January 18 1928', '/1928-01-18~'), + ('before approx January 1928', '/1928-01~'), + ('after approx January 1928', '1928-01~/'), + ('after approx Summer 1928', '1928-22~/'), # Before and after with uncertain / unspecified components - ('after about the 1920s', '192x~/unknown'), - ('before about the 1900s', 'unknown/190x~'), - ('before the 1900s', 'unknown/19xx'), - - # Specifying unspecified components within a date - # ('decade in 1800s', '18ux'), #too esoteric - # ('decade somewhere during the 1800s', '18ux'), #lengthier. Keywords are 'in' or 'during' - ('year in the 1860s', '186u'), # 186x has decade precision - ('year in the 1800s', '18xu'), # 186u has year precision - ('year in about the 1800s', '180u~'), - ('month in 1872', '1872-uu'), - ('day in Spring 1849', '1849-21-uu'), - ('day in January 1872', '1872-01-uu'), - ('day in 1872', '1872-uu-uu'), + ('after about the 1920s', '192X~/'), + ('before about the 1900s', '/190X~'), + ('before the 1900s', '/19XX'), + + # previous examples for masked precision, now removed from the EDTF spec + # use `X` for unknown regardless of precision or why the data is unknown + ('decade in 1800s', '18XX'), + ('decade somewhere during the 1800s', '18XX'), + ('year in the 1860s', '186X'), + ('year in the 1800s', '18XX'), + ('year in about the 1800s', '180X~'), + ('month in 1872', '1872-XX'), + ('day in Spring 1849', '1849-21-XX'), + ('day in January 1872', '1872-01-XX'), + ('day in 1872', '1872-XX-XX'), ('birthday in 1872', '1872'), # Handling centuries with approximation and uncertainty - ('1st century', '00xx'), - ('10c', '09xx'), - ('19th century', '18xx'), - ('19th century?', '18xx?'), - ('before 19th century', 'unknown/18xx'), - ('19c', '18xx'), - ('15c.', '14xx'), - ('ca. 19c', '18xx~'), - ('~19c', '18xx~'), - ('about 19c', '18xx~'), - ('19c?', '18xx?'), - ('c.19c?', '18xx?~'), + ('1st century', '00XX'), + ('10c', '09XX'), + ('19th century', '18XX'), + ('19th century?', '18XX?'), + ('before 19th century', '/18XX'), + ('19c', '18XX'), + ('15c.', '14XX'), + ('ca. 19c', '18XX~'), + ('~19c', '18XX~'), + ('about 19c', '18XX~'), + ('19c?', '18XX?'), + ('c.19c?', '18XX%'), # BC/AD dating ('1 AD', '0001'), @@ -137,13 +138,13 @@ ('c127 CE', '0127~'), ('c1270 CE', '1270~'), ('c64 BCE', '-0064~'), - ('2nd century bc', '-01xx'), # -200 to -101 - ('2nd century bce', '-01xx'), - ('2nd century ad', '01xx'), - ('2nd century ce', '01xx'), + ('2nd century bc', '-01XX'), # -200 to -101 + ('2nd century bce', '-01XX'), + ('2nd century ad', '01XX'), + ('2nd century ce', '01XX'), # Combining uncertainties and approximations in creative ways - ('a day in about Spring 1849?', '1849-21-uu?~'), + ('a day in about Spring 1849?', '1849-21-XX%'), # Simple date ranges, showcasing both the limitations and capabilities of the parser # Not all of these results are correct EDTF, but this is as good as the EDTF implementation @@ -153,9 +154,9 @@ ('1851-52', '1851/1852'), ('1852 - 1860', '1852/1860'), ('1856-ca. 1865', '1856/1865~'), - ('1857-mid 1860s', '1857/186x'), + ('1857-mid 1860s', '1857/186X'), ('1858/1860', '[1858, 1860]'), - ('1860s-1870s', '186x/187x'), + ('1860s-1870s', '186X/187X'), ('1910-30', '1910/1930'), ('active 1910-30', '1910/1930'), ('1861-67', '1861/1867'), @@ -168,28 +169,16 @@ ('1864-1872, printed 1870s', '1864/1872'), ('1868-1871?', '1868/1871?'), ('1869-70', '1869/1870'), - ('1870s, printed ca. 1880s', '187x'), + ('1870s, printed ca. 1880s', '187X'), ('1900-1903, cast before 1929', '1900/1903'), ('1900; 1973', '1900'), ('1900; printed 1912', '1900'), ('1915 late - autumn 1916', '1915/1916-23'), ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} - ('1920s -early 1930s', '192x/193x'), - ('1930s, printed early 1960s', '193x'), # should be something like {193x, 196x}, + ('1920s -early 1930s', '192X/193X'), + ('1930s, printed early 1960s', '193X'), # should be something like {193x, 196x}, ('1932, printed 1976 by Gunther Sander', '1932'), # should be {1932, 1976} ('1938, printed 1940s-1950s', '1938') # should be something like {1938, 194x-195x} - - # Uncertain and approximate on different parts of the date - # for these to work we need to recast is_uncertain and is_approximate - # such that they work on different parts. Probably worth rolling our own - # dateparser at this point. - # ('July in about 1849', '1849~-07'), - # ('a day in July in about 1849', '1849~-07-uu'), - # ('a day in Spring in about 1849', '1849~-21-uu'), - # ('a day in about July? in about 1849', '1849~-07?~-uu'), - # ('a day in about Spring in about 1849', '1849~-21~-uu'), - # ('maybe January in some year in about the 1830s', '183u~-01?'), - # ('about July? in about 1849', '1849~-07?~'), ]) def test_natlang(input_text, expected_output): @@ -198,5 +187,5 @@ def test_natlang(input_text, expected_output): Verify that the conversion from text to EDTF format matches the expected output. """ result = text_to_edtf(input_text) - assert result == expected_output, f"Failed for input: {input_text}" + assert result == expected_output, f"Failed for input: {input_text} - expected {expected_output}, got {result}" From e2a79ddd35d17871f408096ae3c6dc4fa04d5b44 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Sun, 12 May 2024 19:01:17 -0400 Subject: [PATCH 036/102] Improve handling of field updates Ensure that `EDTFField` properly updates related fields whenever it changes inspired by ImageField. - Use EDTFFieldDescriptorClass as a descriptor for EDTFField. This inherits from DeferredAttribute and handles getting, setting, and updating values. Whenever the field value is set, additional logic is processed to potentially update the field again based on other fields. - update_values() replaces pre_save() to better handle updates/dependencies when EDTFField value changes - contribute_to_class() attaches update_values() to the `post_init` signal These changes should make the field updates more stable and (not reliant on definition order in models using EDTFField). Thanks for the suggestion @aweakley https://github.com/ixc/python-edtf/pull/47#issuecomment-2105500069 Co-Authored-By: aweakley <224316+aweakley@users.noreply.github.com> --- edtf/fields.py | 36 ++++++++++++++---- edtf_django_tests/edtf_integration/tests.py | 41 ++++++++------------- 2 files changed, 45 insertions(+), 32 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index bbccbcf..d568375 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -3,12 +3,14 @@ except: import pickle -from django.db import models from django.core.exceptions import FieldDoesNotExist +from django.db import models +from django.db.models import signals +from django.db.models.query_utils import DeferredAttribute from edtf import parse_edtf, EDTFObject -from edtf.natlang import text_to_edtf from edtf.convert import struct_time_to_date, struct_time_to_jd +from edtf.natlang import text_to_edtf DATE_ATTRS = ( 'lower_strict', @@ -17,6 +19,20 @@ 'upper_fuzzy', ) +class EDTFFieldDescriptor(DeferredAttribute): + """ + Descriptor for the EDTFField's attribute on the model instance. + This updates the dependent fields each time this value is set. + """ + + def __set__(self, instance, value): + # First set the value we are given + instance.__dict__[self.field.attname] = value + # `update_values` may provide us with a new value to set + edtf = self.field.update_values(instance, value) + if edtf != value: + instance.__dict__[self.field.attname] = edtf + class EDTFField(models.CharField): @@ -40,6 +56,7 @@ def __init__( super(EDTFField, self).__init__(verbose_name, name, **kwargs) description = "A field for storing complex/fuzzy date specifications in EDTF format." + descriptor_class = EDTFFieldDescriptor def deconstruct(self): name, path, args, kwargs = super(EDTFField, self).deconstruct() @@ -88,7 +105,7 @@ def get_prep_value(self, value): return pickle.dumps(value) return value - def pre_save(self, instance, add): + def update_values(self, instance, *args, **kwargs): """ Updates the EDTF value from either the natural_text_field, which is parsed with text_to_edtf() and is used for display, or falling back to the direct_input_field, @@ -123,10 +140,6 @@ def pre_save(self, instance, add): # TODO: if both direct_input and natural_text are cleared, should we throw an error? edtf = existing_value - # Update the actual EDTF field in the model if there is a change - if edtf != existing_value: - setattr(instance, self.attname, edtf) - # Process and update related date fields based on the EDTF object for attr in DATE_ATTRS: field_attr = f"{attr}_field" @@ -151,3 +164,12 @@ def pre_save(self, instance, add): else: setattr(instance, g, None) return edtf + + def contribute_to_class(self, cls, name, **kwargs): + super().contribute_to_class(cls, name, **kwargs) + # Attach update_values so that dependent fields declared + # after their corresponding edtf field don't stay cleared by + # Model.__init__, see Django bug #11196. + # Only run post-initialization values update on non-abstract models + if not cls._meta.abstract: + signals.post_init.connect(self.update_values, sender=cls) \ No newline at end of file diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index de54d64..9385733 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -9,16 +9,11 @@ def setUp(self): # Create instances and assign them to instance variables # date_edtf_direct is a valid EDTF string, date_display is a date # to be parsed from natural language - self.event1 = TestEvent(date_edtf_direct="2020-03-15/2020-04-15") - self.event2 = TestEvent(date_edtf_direct="2021-05-06") - self.event3 = TestEvent(date_edtf_direct="2019-11") - self.event4 = TestEvent(date_display="Approximately August 2018") - self.event5 = TestEvent(date_edtf_direct="2021-05-06") - self.event1.save() - self.event2.save() - self.event3.save() - self.event4.save() - self.event5.save() + self.event1 = TestEvent.objects.create(date_edtf_direct="2020-03-15/2020-04-15") + self.event2 = TestEvent.objects.create(date_edtf_direct="2021-05-06") + self.event3 = TestEvent.objects.create(date_edtf_direct="2019-11") + self.event4 = TestEvent.objects.create(date_display="Approximately August 2018") + self.event5 = TestEvent.objects.create(date_edtf_direct="2021-05-06") def test_edtf_object_returned(self): @@ -49,23 +44,22 @@ def test_date_boundaries(self): self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) - event = self.event2 expected_earliest_jd = struct_time_to_jd(parse("2021-05-06").lower_strict()) expected_latest_jd = struct_time_to_jd(parse("2021-05-06").upper_strict()) - self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) - self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) - - event = TestEvent.objects.get(date_edtf_direct="2019-11") + self.assertAlmostEqual(self.event2.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(self.event2.date_latest, expected_latest_jd, places=1) + + event3 = TestEvent.objects.get(date_edtf_direct="2019-11") expected_earliest_jd = struct_time_to_jd(parse("2019-11").lower_strict()) expected_latest_jd = struct_time_to_jd(parse("2019-11").upper_strict()) - self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) - self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + self.assertAlmostEqual(event3.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event3.date_latest, expected_latest_jd, places=1) - event = TestEvent.objects.get(date_display="Approximately August 2018") + event4 = TestEvent.objects.get(date_display="Approximately August 2018") expected_earliest_jd = struct_time_to_jd(parse("2018-08~").lower_fuzzy()) expected_latest_jd = struct_time_to_jd(parse("2018-08~").upper_fuzzy()) - self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) - self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + self.assertAlmostEqual(event4.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event4.date_latest, expected_latest_jd, places=1) def test_date_display(self): """ @@ -73,11 +67,8 @@ def test_date_display(self): In the future, a more sophisticated natural language parser could be used to generate a human readable date from the EDTF input. """ - # why does this fail?? - # event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") - # self.assertEqual(event.date_display, "2020-03-15/2020-04-15") - - self.assertEqual(self.event1.date_display, "2020-03-15/2020-04-15") + event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") + self.assertEqual(event.date_display, "2020-03-15/2020-04-15") self.assertEqual(self.event2.date_display, "2021-05-06") self.assertEqual(self.event3.date_display, "2019-11") self.assertEqual(self.event4.date_display, "Approximately August 2018") From 4fd07820a71defb8d4a6e61434de13b178bcbdd3 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Sun, 12 May 2024 19:05:31 -0400 Subject: [PATCH 037/102] More updates of tests and English parser --- edtf/natlang/en.py | 8 ++++---- edtf/natlang/tests.py | 28 ++++++++++++---------------- edtf/parser/tests.py | 32 +++++++++++++++----------------- 3 files changed, 31 insertions(+), 37 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 213d17f..1f46c37 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -225,16 +225,16 @@ def text_to_edtf_date(text): result += 'X' elif i == 3 and is_decade > 0: if mentions_year: - result += 'X' # year precision + result += 'X' # previously year precision - now just X else: - result += 'X' # decade precision + result += 'X' # previously decade precision - now just X elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default result += date1[i] else: # different values were produced, meaning that it's likely - # a default. Use 'unspecified' + # a default. Use 'X' result += 'X' # strip off unknown chars from end of string - except the first 4 @@ -273,7 +273,7 @@ def text_to_edtf_date(text): result += "~" # weed out bad parses - if result.startswith("uu-uu"): + if result.startswith("XX-XX"): return None return result diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 5bfb052..911fc13 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -52,22 +52,18 @@ ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word ('attic. 1802', '1802'), # Avoid false positive 'circa' - # # Masked precision - # ('1860s', '186x'), # 186x has decade precision, 186u has year precision. - - # # Masked precision + uncertainty - # ('ca. 1860s', '186x~'), - # ('c. 1860s', '186x~'), - # ('Circa 1840s', '184x~'), - # ('circa 1840s', '184x~'), - # ('ca. 1860s?', '186x?~'), - # ('uncertain: approx 1862', '1862?~'), - - # # Ambiguous masked precision for centuries and decades - ('1800s', '18XX'), # Without additional uncertainty, use the century - ('2000s', '20XX'), # Without additional uncertainty, use the century - ('c1900s', '190X~'), # If there's additional uncertainty, use the decade - ('c1800s?', '180X%'), # If there's additional uncertainty, use the decade + # Previously tested masked precision, uncertain or ambiguous masked precision + ('1860s', '186X'), + ('ca. 1860s', '186X~'), + ('c. 1860s', '186X~'), + ('Circa 1840s', '184X~'), + ('circa 1840s', '184X~'), + ('ca. 1860s?', '186X%'), + ('uncertain: approx 1862', '1862%'), + ('1800s', '18XX'), + ('2000s', '20XX'), + ('c1900s', '190X~'), + ('c1800s?', '180X%'), # Unspecified dates ('January 12', 'XXXX-01-12'), diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 877fd0b..026622c 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -153,9 +153,7 @@ # December 1760 or some later month ('[1760-12..]', ('1760-12-01', 'inf')), # January or February of 1760 or December 1760 or some later month - # This test is failing due to a code issue: - # TypeError: '>' not supported between instances of 'float' and 'time.struct_time' - ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), #TODO fix in parser_classes + ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), # Either the year 1667 or the month December of 1760. ('[1667, 1760-12]', ('1667-01-01', '1760-12-31')), # Multiple Dates @@ -164,11 +162,11 @@ # The year 1960 and the month December of 1961. ('{1960, 1961-12}', ('1960-01-01', '1961-12-31')), - # Masked Precision --> eliminated + # Previously tested masked precision, now eliminated from the spec # A date during the 1960s - #('196x', '1960-01-01', '1969-12-31'), + ('196X', ('1960-01-01', '1969-12-31')), # A date during the 1900s - #('19xx', '1900-01-01', '1999-12-31'), + ('19XX', ('1900-01-01', '1999-12-31')), # L2 Extended Interval # Interval with fuzzy day endpoints in June 2004 @@ -180,6 +178,7 @@ ('Y17E7', ('170000000-01-01', '170000000-12-31')), # the year -170000000 ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), + # L2 significant digits # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), @@ -227,7 +226,6 @@ def iso_to_struct_time(iso_date): y *= -1 return struct_time([y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - @pytest.mark.parametrize("test_input,expected_tuple", EXAMPLES) def test_edtf_examples(test_input, expected_tuple): """ Test parsing of EDTF strings with expected outputs. """ @@ -245,25 +243,25 @@ def test_edtf_examples(test_input, expected_tuple): elif len(expected_tuple) == 2: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) - assert result.lower_strict() == lower_strict, "Lower strict date does not match" - assert result.upper_strict() == upper_strict, "Upper strict date does not match" + assert result.lower_strict() == lower_strict, f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + assert result.upper_strict() == upper_strict, f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" elif len(expected_tuple) == 3: strict_date = iso_to_struct_time(expected_tuple[0]) lower_fuzzy = iso_to_struct_time(expected_tuple[1]) upper_fuzzy = iso_to_struct_time(expected_tuple[2]) - assert result.lower_strict() == strict_date, "Lower strict date does not match" - assert result.upper_strict() == strict_date, "Upper strict date does not match" - assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" - assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" + assert result.lower_strict() == strict_date, f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" + assert result.upper_strict() == strict_date, f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" + assert result.lower_fuzzy() == lower_fuzzy, f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + assert result.upper_fuzzy() == upper_fuzzy, f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" elif len(expected_tuple) == 4: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) lower_fuzzy = iso_to_struct_time(expected_tuple[2]) upper_fuzzy = iso_to_struct_time(expected_tuple[3]) - assert result.lower_strict() == lower_strict, "Lower strict date does not match" - assert result.upper_strict() == upper_strict, "Upper strict date does not match" - assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" - assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" + assert result.lower_strict() == lower_strict, f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + assert result.upper_strict() == upper_strict, f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + assert result.lower_fuzzy() == lower_fuzzy, f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + assert result.upper_fuzzy() == upper_fuzzy, f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" @pytest.mark.parametrize("bad_input", BAD_EXAMPLES) From d23ff7b6932313c2d42f14ffeac2a3ffe9d32afd Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 13 May 2024 09:01:39 -0400 Subject: [PATCH 038/102] Remove masked precision and unspecified from README --- README.md | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 76aec1a..bf3155b 100644 --- a/README.md +++ b/README.md @@ -196,43 +196,33 @@ The parser can parse strings such as: 'c.1860' => '1860~' #with or without . 'ca1860' => '1860~' 'approx 1860' => '1860~' - - # masked precision - '1860s' => '186x' #186x has decade precision, 186u has year precision. - '1800s' => '18xx' # without uncertainty indicators, assume century - - # masked precision + uncertainty - 'ca. 1860s' => '186x~' - 'circa 1840s' => '184x~' - 'ca. 1860s?' => '186x?~' - 'c1800s?' => '180x?~' # with uncertainty indicators, use the decade + 'ca. 1860s' => '186X~' + 'circa 1840s' => '184X~' + 'ca. 1860s?' => '186X?~' + 'c1800s?' => '180X?~' # with uncertainty indicators, use the decade # unspecified parts 'January 12' => 'XXXX-01-12' 'January' => 'XXXX-01' '7/2008' => '2008-07' + 'month in 1872' => '1872-XX' + 'day in January 1872' => '1872-01-XX' + 'day in 1872' => '1872-XX-XX' #seasons 'Autumn 1872' => '1872-23' 'Fall 1872' => '1872-23' # before/after - 'earlier than 1928' => 'unknown/1928' - 'later than 1928' => '1928/unknown' - 'before January 1928' => 'unknown/1928-01' - 'after about the 1920s' => '192x~/unknown' - - # unspecified - 'year in the 1860s' => '186u' #186x has decade precision, 186u has year precision. - ('year in the 1800s', '18xu') - 'month in 1872' => '1872-XX' - 'day in January 1872' => '1872-01-XX' - 'day in 1872' => '1872-XX-XX' + 'earlier than 1928' => '/1928' + 'later than 1928' => '1928/' + 'before January 1928' => '/1928-01' + 'after about the 1920s' => '192X~/' #centuries - '1st century' => '00xx' - '10c' => '09xx' - '19th century?' => '18xx?' + '1st century' => '00XX' + '10c' => '09XX' + '19th century?' => '18XX?' # just showing off now... 'a day in about Spring 1849?' => '1849-21-XX?~' @@ -243,8 +233,8 @@ The parser can parse strings such as: '1851-1852; printed 1853-1854' => '1851/1852' '1851-52' => '1851/1852' '1856-ca. 1865' => '1856/1865~' - '1860s-1870s' => '186x/187x' - '1920s -early 1930s' => '192x/193x' + '1860s-1870s' => '186X/187X' + '1920s - early 1930s' => '192X/193X' '1938, printed 1940s-1950s' => '1938' From f1cd472916438b9c034959b7b7c7cfc420938d12 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 13 May 2024 15:08:50 -0400 Subject: [PATCH 039/102] Better grouping of group qualification tests --- edtf/parser/tests.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 026622c..817354a 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -111,29 +111,35 @@ ('2010-24', ('2010-12-01', '2010-12-31')), # ******************************* LEVEL 2 ********************************* - # Partial Uncertain/Approximate + # Qualification + # Group qualification: a qualification character to the immediate right of a component applies + # to that component as well as to all components to the left. + # year, month, and day are uncertain and approximate + ('2004-06-11%', ('2004-06-11', '2004-06-09', '2004-06-13')), # uncertain year; month, day known ('2004?-06-11', ('2004-06-11', '2003-06-11', '2005-06-11')), # year and month are approximate; day known ('2004-06~-11', ('2004-06-11', '2003-05-11', '2005-07-11')), - # uncertain month, year and day known - ('2004-?06-11', ('2004-06-11', '2004-05-11', '2004-07-11')), + + # Qualification of individual component: a qualification character to the immediate left + # of the component applies to that component only # day is approximate; year, month known ('2004-06-~11', ('2004-06-11', '2004-06-10', '2004-06-12')), - # Year known, month within year is approximate and uncertain - NEW SPEC + # Year known, month within year is approximate and uncertain ('2004-%06', ('2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30')), - # Year known, month and day uncertain - NEW SPEC + # Year known, month and day uncertain ('2004-?06-?11', ('2004-06-11', '2004-05-10', '2004-07-12')), - # Year uncertain, month known, day approximate - NEW SPEC + # Year uncertain, month known, day approximate ('2004?-06-~11', ('2004-06-11', '2003-06-10', '2005-06-12')), - # Year uncertain and month is both uncertain and approximate - NEW SPEC + # Year uncertain and month is both uncertain and approximate ('?2004-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), # This has the same meaning as the previous example.- NEW SPEC ('2004?-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), - # Year uncertain, month and day approximate. - NEW SPEC + # Year uncertain, month and day approximate ('2004?-~06-~04', ('2004-06-04', '2003-05-03', '2005-07-05')), - # Year known, month and day approximate. - NEW SPEC + # Year known, month and day approximate ('2011-~06-~04', ('2011-06-04', '2011-05-03', '2011-07-05')), + # Partial unspecified # December 25 sometime during the 1560s ('156X-12-25', ('1560-12-25', '1569-12-25')), @@ -180,7 +186,6 @@ ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), # L2 significant digits # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) - # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), # L2 Seasons # Spring southern hemisphere, 2001 @@ -190,6 +195,7 @@ ) BAD_EXAMPLES = ( + # parentheses are not used for group qualification in the 2018 spec None, '', 'not a edtf string', From c0243606f05f79b6205e57e6d3b2ee50565e88c1 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Tue, 14 May 2024 13:30:41 +1000 Subject: [PATCH 040/102] First pass with `ruff` #49 --- .github/workflows/ci.yml | 10 +- edtf/__init__.py | 14 +- edtf/appsettings.py | 53 +-- edtf/convert.py | 18 +- edtf/fields.py | 88 +++-- edtf/jdutil.py | 102 +++-- edtf/natlang/en.py | 118 +++--- edtf/natlang/tests.py | 374 +++++++++--------- edtf/parser/grammar.py | 237 ++++++----- edtf/parser/grammar_test.py | 276 ++++++++----- edtf/parser/parser_classes.py | 274 +++++++------ edtf/parser/parser_classes_tests.py | 269 +++++++------ edtf/parser/tests.py | 242 ++++++------ edtf/tests.py | 41 +- .../edtf_django_tests/settings.py | 2 +- edtf_django_tests/edtf_django_tests/urls.py | 1 + edtf_django_tests/edtf_integration/admin.py | 2 - edtf_django_tests/edtf_integration/models.py | 28 +- edtf_django_tests/edtf_integration/tests.py | 46 ++- edtf_django_tests/edtf_integration/views.py | 2 - edtf_django_tests/manage.py | 1 + pyproject.toml | 44 ++- 22 files changed, 1289 insertions(+), 953 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e5daf0e..b41c764 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,11 +28,17 @@ jobs: run: | python -m pip install --upgrade pip pip install .[test] - + + - name: Check Python linting (Ruff) + run: ruff check --output-format=github + + - name: Check Python formatting (Ruff) + run: ruff format --check + - name: Run unit tests run: | pytest - + - name: Run Django integration tests working-directory: ./edtf_django_tests run: | diff --git a/edtf/__init__.py b/edtf/__init__.py index 291cccc..4d423fa 100644 --- a/edtf/__init__.py +++ b/edtf/__init__.py @@ -1,6 +1,12 @@ -from edtf.parser.grammar import parse_edtf +from edtf.convert import ( + dt_to_struct_time, + jd_to_struct_time, + old_specs_to_new_specs_expression, + struct_time_to_date, + struct_time_to_datetime, + struct_time_to_jd, + trim_struct_time, +) from edtf.natlang import text_to_edtf +from edtf.parser.grammar import parse_edtf from edtf.parser.parser_classes import * -from edtf.convert import dt_to_struct_time, struct_time_to_date, \ - struct_time_to_datetime, trim_struct_time, struct_time_to_jd, \ - jd_to_struct_time, old_specs_to_new_specs_expression diff --git a/edtf/appsettings.py b/edtf/appsettings.py index 8904c58..e1bc821 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -2,15 +2,19 @@ try: from django.core.exceptions import ImproperlyConfigured + try: from django.conf import settings - EDTF = getattr(settings, 'EDTF', {}) + + EDTF = getattr(settings, "EDTF", {}) except ImproperlyConfigured: EDTF = {} except ImportError: EDTF = {} -SEASON_MONTHS_RANGE = EDTF.get('SEASON_MONTHS_RANGE', { +SEASON_MONTHS_RANGE = EDTF.get( + "SEASON_MONTHS_RANGE", + { # season id: [earliest_month, last_month] 21: [3, 5], 22: [6, 8], @@ -20,10 +24,12 @@ # For simplicity, we assume it falls at the end of the year, esp since the # spec says that sort order goes spring > summer > autumn > winter 24: [12, 12], - } + }, ) -SEASON_L2_MONTHS_RANGE = EDTF.get('SEASON_L2_MONTHS_RANGE', { +SEASON_L2_MONTHS_RANGE = EDTF.get( + "SEASON_L2_MONTHS_RANGE", + { # season id: [earliest_month, last_month] 21: [3, 5], 22: [6, 8], @@ -57,28 +63,31 @@ 38: [5, 8], 39: [9, 12], 40: [1, 6], - 41: [7, 12] - } + 41: [7, 12], + }, ) -DAY_FIRST = EDTF.get('DAY_FIRST', False) # Americans! +DAY_FIRST = EDTF.get("DAY_FIRST", False) # Americans! -SEASONS = EDTF.get('SEASONS', { - 21: "spring", - 22: "summer", - 23: "autumn", - 24: "winter", -}) -INVERSE_SEASONS = EDTF.get('INVERSE_SEASONS', {v: k for k, v in SEASONS.items()}) +SEASONS = EDTF.get( + "SEASONS", + { + 21: "spring", + 22: "summer", + 23: "autumn", + 24: "winter", + }, +) +INVERSE_SEASONS = EDTF.get("INVERSE_SEASONS", {v: k for k, v in SEASONS.items()}) # also need to interpret `fall` -INVERSE_SEASONS['fall'] = 23 +INVERSE_SEASONS["fall"] = 23 # changing these will break tests -PADDING_DAY_PRECISION = EDTF.get('PADDING_DAY_PRECISION', relativedelta(days=1)) -PADDING_MONTH_PRECISION = EDTF.get('PADDING_MONTH_PRECISION', relativedelta(months=1)) -PADDING_YEAR_PRECISION = EDTF.get('PADDING_YEAR_PRECISION', relativedelta(years=1)) -PADDING_SEASON_PRECISION = EDTF.get('PADDING_SEASON_PRECISION', relativedelta(weeks=12)) -MULTIPLIER_IF_UNCERTAIN = EDTF.get('MULTIPLIER_IF_UNCERTAIN', 1.0) -MULTIPLIER_IF_APPROXIMATE = EDTF.get('MULTIPLIER_IF_APPROXIMATE', 1.0) -MULTIPLIER_IF_BOTH = EDTF.get('MULTIPLIER_IF_BOTH', 2.0) +PADDING_DAY_PRECISION = EDTF.get("PADDING_DAY_PRECISION", relativedelta(days=1)) +PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1)) +PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1)) +PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12)) +MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0) +MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) +MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) diff --git a/edtf/convert.py b/edtf/convert.py index f8d070f..a294462 100644 --- a/edtf/convert.py +++ b/edtf/convert.py @@ -1,12 +1,12 @@ -from time import struct_time from datetime import date, datetime +from time import struct_time from edtf import jdutil - TIME_EMPTY_TIME = [0, 0, 0] # tm_hour, tm_min, tm_sec TIME_EMPTY_EXTRAS = [0, 0, -1] # tm_wday, tm_yday, tm_isdst + def old_specs_to_new_specs_expression(expression): expression = expression.replace("unknown", "") expression = expression.replace("open", "..") @@ -32,16 +32,15 @@ def dt_to_struct_time(dt): """ if isinstance(dt, datetime): return struct_time( - [dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second] + - TIME_EMPTY_EXTRAS + [dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second] + + TIME_EMPTY_EXTRAS ) elif isinstance(dt, date): return struct_time( [dt.year, dt.month, dt.day] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS ) else: - raise NotImplementedError( - "Cannot convert %s to `struct_time`" % type(dt)) + raise NotImplementedError(f"Cannot convert {type(dt)} to `struct_time`") def struct_time_to_date(st): @@ -112,12 +111,11 @@ def jd_to_struct_time(jd): # This conversion can return negative values for items we do not want to be # negative: month, day, hour, minute, second. year, month, day, hour, minute, second = _roll_negative_time_fields( - year, month, day, hour, minute, second) - - return struct_time( - [year, month, day, hour, minute, second] + TIME_EMPTY_EXTRAS + year, month, day, hour, minute, second ) + return struct_time([year, month, day, hour, minute, second] + TIME_EMPTY_EXTRAS) + def _roll_negative_time_fields(year, month, day, hour, minute, second): """ diff --git a/edtf/fields.py b/edtf/fields.py index d568375..b38873b 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -1,24 +1,22 @@ -try: - import cPickle as pickle -except: - import pickle +import pickle from django.core.exceptions import FieldDoesNotExist from django.db import models from django.db.models import signals from django.db.models.query_utils import DeferredAttribute -from edtf import parse_edtf, EDTFObject +from edtf import EDTFObject, parse_edtf from edtf.convert import struct_time_to_date, struct_time_to_jd from edtf.natlang import text_to_edtf DATE_ATTRS = ( - 'lower_strict', - 'upper_strict', - 'lower_fuzzy', - 'upper_fuzzy', + "lower_strict", + "upper_strict", + "lower_fuzzy", + "upper_fuzzy", ) + class EDTFFieldDescriptor(DeferredAttribute): """ Descriptor for the EDTFField's attribute on the model instance. @@ -35,36 +33,48 @@ def __set__(self, instance, value): class EDTFField(models.CharField): - def __init__( self, - verbose_name=None, name=None, + verbose_name=None, + name=None, natural_text_field=None, direct_input_field=None, lower_strict_field=None, upper_strict_field=None, lower_fuzzy_field=None, upper_fuzzy_field=None, - **kwargs + **kwargs, ): - kwargs['max_length'] = 2000 - self.natural_text_field, self.direct_input_field, \ - self.lower_strict_field, self.upper_strict_field, \ - self.lower_fuzzy_field, self.upper_fuzzy_field = \ - natural_text_field, direct_input_field, lower_strict_field, \ - upper_strict_field, lower_fuzzy_field, upper_fuzzy_field - super(EDTFField, self).__init__(verbose_name, name, **kwargs) - - description = "A field for storing complex/fuzzy date specifications in EDTF format." + kwargs["max_length"] = 2000 + ( + self.natural_text_field, + self.direct_input_field, + self.lower_strict_field, + self.upper_strict_field, + self.lower_fuzzy_field, + self.upper_fuzzy_field, + ) = ( + natural_text_field, + direct_input_field, + lower_strict_field, + upper_strict_field, + lower_fuzzy_field, + upper_fuzzy_field, + ) + super().__init__(verbose_name, name, **kwargs) + + description = ( + "A field for storing complex/fuzzy date specifications in EDTF format." + ) descriptor_class = EDTFFieldDescriptor def deconstruct(self): - name, path, args, kwargs = super(EDTFField, self).deconstruct() + name, path, args, kwargs = super().deconstruct() if self.natural_text_field: - kwargs['natural_text_field'] = self.natural_text_field + kwargs["natural_text_field"] = self.natural_text_field for attr in DATE_ATTRS: - field = "%s_field" % attr + field = f"{attr}_field" f = getattr(self, field, None) if f: kwargs[field] = f @@ -96,11 +106,11 @@ def to_python(self, value): def get_db_prep_save(self, value, connection): if value: return pickle.dumps(value) - return super(EDTFField, self).get_db_prep_save(value, connection) + return super().get_db_prep_save(value, connection) def get_prep_value(self, value): # convert python objects to query values - value = super(EDTFField, self).get_prep_value(value) + value = super().get_prep_value(value) if isinstance(value, EDTFObject): return pickle.dumps(value) return value @@ -112,15 +122,19 @@ def update_values(self, instance, *args, **kwargs): which allows directly providing an EDTF string. If one of these provides a valid EDTF object, then set the date values accordingly. """ - + # Get existing value to determine if update is needed existing_value = getattr(instance, self.attname, None) direct_input = getattr(instance, self.direct_input_field, None) natural_text = getattr(instance, self.natural_text_field, None) # if direct_input is provided and is different from the existing value, update the EDTF field - if direct_input and (existing_value is None or str(existing_value) != direct_input): - edtf = parse_edtf(direct_input, fail_silently=True) # ParseException if invalid; should this be raised? + if direct_input and ( + existing_value is None or str(existing_value) != direct_input + ): + edtf = parse_edtf( + direct_input, fail_silently=True + ) # ParseException if invalid; should this be raised? # TODO pyparsing.ParseExceptions are very noisy and dumps the whole grammar (see https://github.com/ixc/python-edtf/issues/46) # set the natural_text (display) field to the direct_input if it is not provided @@ -129,8 +143,12 @@ def update_values(self, instance, *args, **kwargs): elif natural_text: edtf_string = text_to_edtf(natural_text) - if edtf_string and (existing_value is None or str(existing_value) != edtf_string): - edtf = parse_edtf(edtf_string, fail_silently=True) # potetial ParseException if invalid; should this be raised? + if edtf_string and ( + existing_value is None or str(existing_value) != edtf_string + ): + edtf = parse_edtf( + edtf_string, fail_silently=True + ) # potetial ParseException if invalid; should this be raised? else: edtf = existing_value else: @@ -157,9 +175,9 @@ def update_values(self, instance, *args, **kwargs): value = struct_time_to_date(value) else: raise NotImplementedError( - u"EDTFField does not support %s as a derived data" - u" field, only FloatField or DateField" - % type(target_field)) + f"EDTFField does not support {type(target_field)} as a derived data" + " field, only FloatField or DateField" + ) setattr(instance, g, value) else: setattr(instance, g, None) @@ -172,4 +190,4 @@ def contribute_to_class(self, cls, name, **kwargs): # Model.__init__, see Django bug #11196. # Only run post-initialization values update on non-abstract models if not cls._meta.abstract: - signals.post_init.connect(self.update_values, sender=cls) \ No newline at end of file + signals.post_init.connect(self.update_values, sender=cls) diff --git a/edtf/jdutil.py b/edtf/jdutil.py index 9fabdd1..16cd312 100644 --- a/edtf/jdutil.py +++ b/edtf/jdutil.py @@ -9,14 +9,15 @@ """ -import math import datetime as dt +import math # Note: The Python datetime module assumes an infinitely valid Gregorian calendar. # The Gregorian calendar took effect after 10-15-1582 and the dates 10-05 through # 10-14-1582 never occurred. Python datetime objects will produce incorrect # time deltas if one date is from before 10-15-1582. + def mjd_to_jd(mjd): """ Convert Modified Julian Day to Julian Day. @@ -54,11 +55,11 @@ def jd_to_mjd(jd): return jd - 2400000.5 -def date_to_jd(year,month,day): +def date_to_jd(year, month, day): """ Convert a date to Julian Day. - Algorithm from 'Practical Astronomy with your Calculator or Spreadsheet', + Algorithm from 'Practical Astronomy with your Calculator or Spreadsheet', 4th ed., Duffet-Smith and Zwart, 2011. Parameters @@ -95,20 +96,19 @@ def date_to_jd(year,month,day): # this checks where we are in relation to October 15, 1582, the beginning # of the Gregorian calendar. - if ((year < 1582) or - (year == 1582 and month < 10) or - (year == 1582 and month == 10 and day < 15)): + if ( + (year < 1582) + or (year == 1582 and month < 10) + or (year == 1582 and month == 10 and day < 15) + ): # before start of Gregorian calendar B = 0 else: # after start of Gregorian calendar - A = math.trunc(yearp / 100.) - B = 2 - A + math.trunc(A / 4.) + A = math.trunc(yearp / 100.0) + B = 2 - A + math.trunc(A / 4.0) - if yearp < 0: - C = math.trunc((365.25 * yearp) - 0.75) - else: - C = math.trunc(365.25 * yearp) + C = math.trunc(365.25 * yearp - 0.75) if yearp < 0 else math.trunc(365.25 * yearp) D = math.trunc(30.6001 * (monthp + 1)) @@ -121,7 +121,7 @@ def jd_to_date(jd): """ Convert Julian Day to date. - Algorithm from 'Practical Astronomy with your Calculator or Spreadsheet', + Algorithm from 'Practical Astronomy with your Calculator or Spreadsheet', 4th ed., Duffet-Smith and Zwart, 2011. Parameters @@ -151,15 +151,12 @@ def jd_to_date(jd): """ jd = jd + 0.5 - F, I = math.modf(jd) - I = int(I) + F, I = math.modf(jd) # noqa: E741 + I = int(I) # noqa: E741 - A = math.trunc((I - 1867216.25)/36524.25) + A = math.trunc((I - 1867216.25) / 36524.25) - if I > 2299160: - B = I + 1 + A - math.trunc(A / 4.) - else: - B = I + B = I + 1 + A - math.trunc(A / 4.0) if I > 2299160 else I C = B + 1524 @@ -171,20 +168,14 @@ def jd_to_date(jd): day = C - E + F - math.trunc(30.6001 * G) - if G < 13.5: - month = G - 1 - else: - month = G - 13 + month = G - 1 if G < 13.5 else G - 13 - if month > 2.5: - year = D - 4716 - else: - year = D - 4715 + year = D - 4716 if month > 2.5 else D - 4715 return year, month, day -def hmsm_to_days(hour=0,min=0,sec=0,micro=0): +def hmsm_to_days(hour=0, min=0, sec=0, micro=0): """ Convert hours, minutes, seconds, and microseconds to fractional days. @@ -213,13 +204,13 @@ def hmsm_to_days(hour=0,min=0,sec=0,micro=0): 0.25 """ - days = sec + (micro / 1.e6) + days = sec + (micro / 1.0e6) - days = min + (days / 60.) + days = min + (days / 60.0) - days = hour + (days / 60.) + days = hour + (days / 60.0) - return days / 24. + return days / 24.0 def days_to_hmsm(days): @@ -257,16 +248,16 @@ def days_to_hmsm(days): (2, 24, 0, 0) """ - hours = days * 24. + hours = days * 24.0 hours, hour = math.modf(hours) - mins = hours * 60. + mins = hours * 60.0 mins, min = math.modf(mins) - secs = mins * 60. + secs = mins * 60.0 secs, sec = math.modf(secs) - micro = round(secs * 1.e6) + micro = round(secs * 1.0e6) return int(hour), int(min), int(sec), int(micro) @@ -286,16 +277,18 @@ def datetime_to_jd(date): Examples -------- - >>> d = datetime.datetime(1985,2,17,6) + >>> d = datetime.datetime(1985,2,17,6) >>> d datetime.datetime(1985, 2, 17, 6, 0) >>> jdutil.datetime_to_jd(d) 2446113.75 """ - days = date.day + hmsm_to_days(date.hour,date.minute,date.second,date.microsecond) + days = date.day + hmsm_to_days( + date.hour, date.minute, date.second, date.microsecond + ) - return date_to_jd(date.year,date.month,days) + return date_to_jd(date.year, date.month, days) def jd_to_datetime(jd): @@ -320,12 +313,12 @@ def jd_to_datetime(jd): """ year, month, day = jd_to_date(jd) - frac_days,day = math.modf(day) + frac_days, day = math.modf(day) day = int(day) - hour,min,sec,micro = days_to_hmsm(frac_days) + hour, min, sec, micro = days_to_hmsm(frac_days) - return datetime(year,month,day,hour,min,sec,micro) + return datetime(year, month, day, hour, min, sec, micro) def timedelta_to_days(td): @@ -350,9 +343,9 @@ def timedelta_to_days(td): 4.5 """ - seconds_in_day = 24. * 3600. + seconds_in_day = 24.0 * 3600.0 - days = td.days + (td.seconds + (td.microseconds * 10.e6)) / seconds_in_day + days = td.days + (td.seconds + (td.microseconds * 10.0e6)) / seconds_in_day return days @@ -372,8 +365,9 @@ class datetime(dt.datetime): datetime.datetime : Parent class. """ - def __add__(self,other): - if not isinstance(other,dt.timedelta): + + def __add__(self, other): + if not isinstance(other, dt.timedelta): s = "jdutil.datetime supports '+' only with datetime.timedelta" raise TypeError(s) @@ -383,8 +377,8 @@ def __add__(self,other): return jd_to_datetime(combined) - def __radd__(self,other): - if not isinstance(other,dt.timedelta): + def __radd__(self, other): + if not isinstance(other, dt.timedelta): s = "jdutil.datetime supports '+' only with datetime.timedelta" raise TypeError(s) @@ -394,15 +388,15 @@ def __radd__(self,other): return jd_to_datetime(combined) - def __sub__(self,other): - if isinstance(other,dt.timedelta): + def __sub__(self, other): + if isinstance(other, dt.timedelta): days = timedelta_to_days(other) combined = datetime_to_jd(self) - days return jd_to_datetime(combined) - elif isinstance(other, (datetime,dt.datetime)): + elif isinstance(other, (datetime, dt.datetime)): diff = datetime_to_jd(self) - datetime_to_jd(other) return dt.timedelta(diff) @@ -412,8 +406,8 @@ def __sub__(self,other): s += "datetime.timedelta, jdutil.datetime and datetime.datetime" raise TypeError(s) - def __rsub__(self,other): - if not isinstance(other, (datetime,dt.datetime)): + def __rsub__(self, other): + if not isinstance(other, (datetime, dt.datetime)): s = "jdutil.datetime supports '-' with: " s += "jdutil.datetime and datetime.datetime" raise TypeError(s) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index ff83034..5fb2fea 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,10 +1,12 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" -from datetime import datetime -from dateutil.parser import parse, ParserError + import re -from edtf import appsettings +from datetime import datetime + +from dateutil.parser import ParserError, parse from six.moves import xrange +from edtf import appsettings # two dates where every digit of an ISO date representation is different, # and one is in the past and one is in the future. @@ -12,15 +14,15 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = r'(-?)([\du])([\dxu])([\dxu])([\dxu])' -LONG_YEAR_RE = r'y(-?)([1-9]\d\d\d\d+)' -CENTURY_RE = r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?' -CE_RE = r'(\d{1,4}) (ad|ce|bc|bce)' +SHORT_YEAR_RE = r"(-?)([\du])([\dxu])([\dxu])([\dxu])" +LONG_YEAR_RE = r"y(-?)([1-9]\d\d\d\d+)" +CENTURY_RE = r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" +CE_RE = r"(\d{1,4}) (ad|ce|bc|bce)" # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. REJECT_RULES = ( - r'.*dynasty.*', # Don't parse '23rd Dynasty' to 'uuuu-uu-23' + r".*dynasty.*", # Don't parse '23rd Dynasty' to 'uuuu-uu-23' ) @@ -41,7 +43,6 @@ def text_to_edtf(text): # TODO: assemble multiple dates into a {} or [] structure. for split in [",", ";", "or"]: for list_item in t.split(split): - # try parsing as an interval - split by '-' toks = list_item.split("-") if len(toks) == 2: @@ -51,18 +52,23 @@ def text_to_edtf(text): # match looks from the beginning of the string, search # looks anywhere. - if re.match(r'\d\D\b', d2): # 1-digit year partial e.g. 1868-9 - if re.search(r'\b\d\d\d\d$', d1): # TODO: evaluate it and see if it's a year + if re.match(r"\d\D\b", d2): # 1-digit year partial e.g. 1868-9 + if re.search( + r"\b\d\d\d\d$", d1 + ): # TODO: evaluate it and see if it's a year d2 = d1[-4:-1] + d2 - elif re.match(r'\d\d\b', d2): # 2-digit year partial e.g. 1809-10 - if re.search(r'\b\d\d\d\d$', d1): + elif re.match(r"\d\d\b", d2): # 2-digit year partial e.g. 1809-10 + if re.search(r"\b\d\d\d\d$", d1): d2 = d1[-4:-2] + d2 else: - century_range_match = re.search(r'\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]', "%s-%s" % (d1,d2)) + century_range_match = re.search( + r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]", + f"{d1}-{d2}", + ) if century_range_match: g = century_range_match.groups() - d1 = "%sC" % g[0] - d2 = "%sC" % g[2] + d1 = f"{g[0]}C" + d2 = f"{g[2]}C" r1 = text_to_edtf_date(d1) r2 = text_to_edtf_date(d2) @@ -79,7 +85,7 @@ def text_to_edtf(text): else: int_match = re.search(r"(\d\d\d\d)\/(\d\d\d\d)", list_item) if int_match: - return "[%s, %s]" % (int_match.group(1), int_match.group(2)) + return f"[{int_match.group(1)}, {int_match.group(2)}]" result = text_to_edtf_date(list_item) if result: @@ -87,17 +93,17 @@ def text_to_edtf(text): if result: break - is_before = re.findall(r'\bbefore\b', t) - is_before = is_before or re.findall(r'\bearlier\b', t) + is_before = re.findall(r"\bbefore\b", t) + is_before = is_before or re.findall(r"\bearlier\b", t) - is_after = re.findall(r'\bafter\b', t) - is_after = is_after or re.findall(r'\bsince\b', t) - is_after = is_after or re.findall(r'\blater\b', t) + is_after = re.findall(r"\bafter\b", t) + is_after = is_after or re.findall(r"\bsince\b", t) + is_after = is_after or re.findall(r"\blater\b", t) if is_before: - result = u"unknown/%s" % result + result = f"unknown/{result}" elif is_after: - result = u"%s/unknown" % result + result = f"{result}/unknown" return result @@ -114,36 +120,34 @@ def text_to_edtf_date(text): return t = text.lower() - result = '' + result = "" for reject_re in REJECT_RULES: if re.match(reject_re, t): return # matches on '1800s'. Needs to happen before is_decade. - could_be_century = re.findall(r'(\d{2}00)s', t) + could_be_century = re.findall(r"(\d{2}00)s", t) # matches on '1800s' and '1910s'. Removes the 's'. # Needs to happen before is_uncertain because e.g. "1860s?" - t, is_decade = re.subn(r'(\d{3}0)s', r'\1', t) + t, is_decade = re.subn(r"(\d{3}0)s", r"\1", t) # detect approximation signifiers # a few 'circa' abbreviations just before the year - is_approximate = re.findall(r'\b(ca?\.?) ?\d{4}', t) + is_approximate = re.findall(r"\b(ca?\.?) ?\d{4}", t) # the word 'circa' anywhere - is_approximate = is_approximate or re.findall(r'\bcirca\b', t) + is_approximate = is_approximate or re.findall(r"\bcirca\b", t) # the word 'approx'/'around'/'about' anywhere - is_approximate = is_approximate or \ - re.findall(r'\b(approx|around|about)', t) + is_approximate = is_approximate or re.findall(r"\b(approx|around|about)", t) # a ~ before a year-ish number - is_approximate = is_approximate or re.findall(r'\b~\d{4}', t) + is_approximate = is_approximate or re.findall(r"\b~\d{4}", t) # a ~ at the beginning - is_approximate = is_approximate or re.findall(r'^~', t) + is_approximate = is_approximate or re.findall(r"^~", t) # detect uncertainty signifiers - t, is_uncertain = re.subn(r'(\d{4})\?', r'\1', t) + t, is_uncertain = re.subn(r"(\d{4})\?", r"\1", t) # the words uncertain/maybe/guess anywhere - is_uncertain = is_uncertain or re.findall( - r'\b(uncertain|possibly|maybe|guess)', t) + is_uncertain = is_uncertain or re.findall(r"\b(uncertain|possibly|maybe|guess)", t) # detect century forms is_century = re.findall(CENTURY_RE, t) @@ -152,27 +156,25 @@ def text_to_edtf_date(text): is_ce = re.findall(CE_RE, t) if is_century: result = "%02dxx" % (int(is_century[0][0]) - 1,) - is_approximate = is_approximate or \ - re.findall(r'\b(ca?\.?) ?' + CENTURY_RE, t) - is_uncertain = is_uncertain or re.findall(CENTURY_RE + r'\?', t) + is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CENTURY_RE, t) + is_uncertain = is_uncertain or re.findall(CENTURY_RE + r"\?", t) try: is_bc = is_century[0][-1] in ("bc", "bce") if is_bc: - result = "-%s" % result + result = f"-{result}" except IndexError: pass elif is_ce: result = "%04d" % (int(is_ce[0][0])) - is_approximate = is_approximate or \ - re.findall(r'\b(ca?\.?) ?' + CE_RE, t) - is_uncertain = is_uncertain or re.findall(CE_RE + r'\?', t) + is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CE_RE, t) + is_uncertain = is_uncertain or re.findall(CE_RE + r"\?", t) try: is_bc = is_ce[0][-1] in ("bc", "bce") if is_bc: - result = "-%s" % result + result = f"-{result}" except IndexError: pass @@ -187,7 +189,7 @@ def text_to_edtf_date(text): dayfirst=appsettings.DAY_FIRST, yearfirst=False, fuzzy=True, # force a match, even if it's default date - default=DEFAULT_DATE_1 + default=DEFAULT_DATE_1, ) dt2 = parse( @@ -195,16 +197,15 @@ def text_to_edtf_date(text): dayfirst=appsettings.DAY_FIRST, yearfirst=False, fuzzy=True, # force a match, even if it's default date - default=DEFAULT_DATE_2 + default=DEFAULT_DATE_2, ) - except ParserError as pe: + except ParserError: return - except Exception as e: + except Exception: return - if dt1.date() == DEFAULT_DATE_1.date() and \ - dt2.date() == DEFAULT_DATE_2.date(): + if dt1.date() == DEFAULT_DATE_1.date() and dt2.date() == DEFAULT_DATE_2.date(): # couldn't parse anything - defaults are untouched. return @@ -212,22 +213,21 @@ def text_to_edtf_date(text): date2 = dt2.isoformat()[:10] # guess precision of 'unspecified' characters to use - mentions_year = re.findall(r'\byear\b.+(in|during)\b', t) - mentions_month = re.findall(r'\bmonth\b.+(in|during)\b', t) - mentions_day = re.findall(r'\bday\b.+(in|during)\b', t) + mentions_year = re.findall(r"\byear\b.+(in|during)\b", t) + mentions_month = re.findall(r"\bmonth\b.+(in|during)\b", t) + mentions_day = re.findall(r"\bday\b.+(in|during)\b", t) for i in xrange(len(date1)): # if the given year could be a century (e.g. '1800s') then use # approximate/uncertain markers to decide whether we treat it as # a century or a decade. - if i == 2 and could_be_century and \ - not (is_approximate or is_uncertain): - result += 'x' + if i == 2 and could_be_century and not (is_approximate or is_uncertain): + result += "x" elif i == 3 and is_decade > 0: if mentions_year: - result += 'u' # year precision + result += "u" # year precision else: - result += 'x' # decade precision + result += "x" # decade precision elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default @@ -240,7 +240,7 @@ def text_to_edtf_date(text): # strip off unknown chars from end of string - except the first 4 for i in reversed(xrange(len(result))): - if result[i] not in ('u', 'x', '-'): + if result[i] not in ("u", "x", "-"): smallest_length = 4 if mentions_month: diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index eaa9af6..3602775 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -1,197 +1,194 @@ +# ruff: noqa: S101 # Asserts are ok in tests + import pytest + from edtf.natlang.en import text_to_edtf # TODO update the tests and code to test and output the new spec -# where examples are tuples, the second item is the normalised output -@pytest.mark.parametrize("input_text,expected_output", [ - # Ignoring 'late' for simplicity in these examples - ('active late 17th-19th centuries', '16xx/18xx'), - ('active 17-19th Centuries', '16xx/18xx'), - - # Unrecognised values - ('', None), - ('this isn\'t a date', None), - - # Explicitly rejected values that would otherwise be badly converted - ('23rd Dynasty', None), - - # Implied century and specific years - ('90', '1990'), # Implied century - ('1860', '1860'), - ('the year 1800', '1800'), - ('the year 1897', '1897'), - ('January 2008', '2008-01'), - ('January 12, 1940', '1940-01-12'), - - # Uncertain or approximate dates - ('1860?', '1860?'), - ('1862 (uncertain)', '1862?'), - ('maybe 1862', '1862?'), - ('1862 maybe', '1862?'), - ('1862 guess', '1862?'), - ('uncertain: 1862', '1862?'), - ('uncertain: Jan 18 1862', '1862-01-18?'), - ('~ Feb 1812', '1812-02~'), - ('circa Feb 1812', '1812-02~'), - ('Feb 1812 approx', '1812-02~'), - ('c1860', '1860~'), # Different abbreviations - ('c.1860', '1860~'), # With or without . - ('ca1860', '1860~'), - ('ca.1860', '1860~'), - ('c 1860', '1860~'), # With or without space - ('c. 1860', '1860~'), - ('ca. 1860', '1860~'), - ('approx 1860', '1860~'), - ('1860 approx', '1860~'), - ('1860 approximately', '1860~'), - ('approximately 1860', '1860~'), - ('about 1860', '1860~'), - ('about Spring 1849', '1849-21~'), - ('notcirca 1860', '1860'), # Avoid words containing 'circa' - ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word - ('attic. 1802', '1802'), # Avoid false positive 'circa' - - # Masked precision - ('1860s', '186x'), # 186x has decade precision, 186u has year precision. - - # Masked precision + uncertainty - ('ca. 1860s', '186x~'), - ('c. 1860s', '186x~'), - ('Circa 1840s', '184x~'), - ('circa 1840s', '184x~'), - ('ca. 1860s?', '186x?~'), - ('uncertain: approx 1862', '1862?~'), - - # Ambiguous masked precision for centuries and decades - ('1800s', '18xx'), # Without additional uncertainty, use the century - ('2000s', '20xx'), # Without additional uncertainty, use the century - ('c1900s', '190x~'), # If there's additional uncertainty, use the decade - ('c1800s?', '180x?~'), # If there's additional uncertainty, use the decade - - # Unspecified dates - ('January 12', 'uuuu-01-12'), - ('January', 'uuuu-01'), - ('10/7/2008', '2008-10-07'), - ('7/2008', '2008-07'), - - # Seasons mapped to specific codes - ('Spring 1872', '1872-21'), - ('Summer 1872', '1872-22'), - ('Autumn 1872', '1872-23'), - ('Fall 1872', '1872-23'), - ('Winter 1872', '1872-24'), - - # Dates relative to known events (before/after) - ('earlier than 1928', 'unknown/1928'), - ('before 1928', 'unknown/1928'), - ('after 1928', '1928/unknown'), - ('later than 1928', '1928/unknown'), - ('before January 1928', 'unknown/1928-01'), - ('before 18 January 1928', 'unknown/1928-01-18'), - - # Approximations combined with before/after - ('before approx January 18 1928', 'unknown/1928-01-18~'), - ('before approx January 1928', 'unknown/1928-01~'), - ('after approx January 1928', '1928-01~/unknown'), - ('after approx Summer 1928', '1928-22~/unknown'), - - # Before and after with uncertain / unspecified components - ('after about the 1920s', '192x~/unknown'), - ('before about the 1900s', 'unknown/190x~'), - ('before the 1900s', 'unknown/19xx'), - - # Specifying unspecified components within a date - # ('decade in 1800s', '18ux'), #too esoteric - # ('decade somewhere during the 1800s', '18ux'), #lengthier. Keywords are 'in' or 'during' - ('year in the 1860s', '186u'), # 186x has decade precision - ('year in the 1800s', '18xu'), # 186u has year precision - ('year in about the 1800s', '180u~'), - ('month in 1872', '1872-uu'), - ('day in Spring 1849', '1849-21-uu'), - ('day in January 1872', '1872-01-uu'), - ('day in 1872', '1872-uu-uu'), - ('birthday in 1872', '1872'), - - # Handling centuries with approximation and uncertainty - ('1st century', '00xx'), - ('10c', '09xx'), - ('19th century', '18xx'), - ('19th century?', '18xx?'), - ('before 19th century', 'unknown/18xx'), - ('19c', '18xx'), - ('15c.', '14xx'), - ('ca. 19c', '18xx~'), - ('~19c', '18xx~'), - ('about 19c', '18xx~'), - ('19c?', '18xx?'), - ('c.19c?', '18xx?~'), - - # BC/AD dating - ('1 AD', '0001'), - ('17 CE', '0017'), - ('127 CE', '0127'), - ('1270 CE', '1270'), - ('c1 AD', '0001~'), - ('c17 CE', '0017~'), - ('c127 CE', '0127~'), - ('c1270 CE', '1270~'), - ('c64 BCE', '-0064~'), - ('2nd century bc', '-01xx'), # -200 to -101 - ('2nd century bce', '-01xx'), - ('2nd century ad', '01xx'), - ('2nd century ce', '01xx'), - - # Combining uncertainties and approximations in creative ways - ('a day in about Spring 1849?', '1849-21-uu?~'), - - # Simple date ranges, showcasing both the limitations and capabilities of the parser - # Not all of these results are correct EDTF, but this is as good as the EDTF implementation - # and simple natural language parser we have. - ('1851-1852', '1851/1852'), - ('1851-1852; printed 1853-1854', '1851/1852'), - ('1851-52', '1851/1852'), - ('1852 - 1860', '1852/1860'), - ('1856-ca. 1865', '1856/1865~'), - ('1857-mid 1860s', '1857/186x'), - ('1858/1860', '[1858, 1860]'), - ('1860s-1870s', '186x/187x'), - ('1910-30', '1910/1930'), - ('active 1910-30', '1910/1930'), - ('1861-67', '1861/1867'), - ('1861-67 (later print)', '1861/1867'), - ('1863 or 1864', '1863'), - ('1863, printed 1870', '1863'), - ('1863, printed ca. 1866', '1863'), - ('1864 or 1866', '1864'), - ('1864, printed ca. 1864', '1864'), - ('1864-1872, printed 1870s', '1864/1872'), - ('1868-1871?', '1868/1871?'), - ('1869-70', '1869/1870'), - ('1870s, printed ca. 1880s', '187x'), - ('1900-1903, cast before 1929', '1900/1903'), - ('1900; 1973', '1900'), - ('1900; printed 1912', '1900'), - ('1915 late - autumn 1916', '1915/1916-23'), - ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} - ('1920s -early 1930s', '192x/193x'), - ('1930s, printed early 1960s', '193x'), # should be something like {193x, 196x}, - ('1932, printed 1976 by Gunther Sander', '1932'), # should be {1932, 1976} - ('1938, printed 1940s-1950s', '1938') # should be something like {1938, 194x-195x} - - # Uncertain and approximate on different parts of the date - # for these to work we need to recast is_uncertain and is_approximate - # such that they work on different parts. Probably worth rolling our own - # dateparser at this point. - # ('July in about 1849', '1849~-07'), - # ('a day in July in about 1849', '1849~-07-uu'), - # ('a day in Spring in about 1849', '1849~-21-uu'), - # ('a day in about July? in about 1849', '1849~-07?~-uu'), - # ('a day in about Spring in about 1849', '1849~-21~-uu'), - # ('maybe January in some year in about the 1830s', '183u~-01?'), - # ('about July? in about 1849', '1849~-07?~'), -]) +# where examples are tuples, the second item is the normalised output +@pytest.mark.parametrize( + "input_text,expected_output", + [ + # Ignoring 'late' for simplicity in these examples + ("active late 17th-19th centuries", "16xx/18xx"), + ("active 17-19th Centuries", "16xx/18xx"), + # Unrecognised values + ("", None), + ("this isn't a date", None), + # Explicitly rejected values that would otherwise be badly converted + ("23rd Dynasty", None), + # Implied century and specific years + ("90", "1990"), # Implied century + ("1860", "1860"), + ("the year 1800", "1800"), + ("the year 1897", "1897"), + ("January 2008", "2008-01"), + ("January 12, 1940", "1940-01-12"), + # Uncertain or approximate dates + ("1860?", "1860?"), + ("1862 (uncertain)", "1862?"), + ("maybe 1862", "1862?"), + ("1862 maybe", "1862?"), + ("1862 guess", "1862?"), + ("uncertain: 1862", "1862?"), + ("uncertain: Jan 18 1862", "1862-01-18?"), + ("~ Feb 1812", "1812-02~"), + ("circa Feb 1812", "1812-02~"), + ("Feb 1812 approx", "1812-02~"), + ("c1860", "1860~"), # Different abbreviations + ("c.1860", "1860~"), # With or without . + ("ca1860", "1860~"), + ("ca.1860", "1860~"), + ("c 1860", "1860~"), # With or without space + ("c. 1860", "1860~"), + ("ca. 1860", "1860~"), + ("approx 1860", "1860~"), + ("1860 approx", "1860~"), + ("1860 approximately", "1860~"), + ("approximately 1860", "1860~"), + ("about 1860", "1860~"), + ("about Spring 1849", "1849-21~"), + ("notcirca 1860", "1860"), # Avoid words containing 'circa' + ( + "attica 1802", + "1802", + ), # Avoid false positive 'circa' at the end of preceding word + ("attic. 1802", "1802"), # Avoid false positive 'circa' + # Masked precision + ("1860s", "186x"), # 186x has decade precision, 186u has year precision. + # Masked precision + uncertainty + ("ca. 1860s", "186x~"), + ("c. 1860s", "186x~"), + ("Circa 1840s", "184x~"), + ("circa 1840s", "184x~"), + ("ca. 1860s?", "186x?~"), + ("uncertain: approx 1862", "1862?~"), + # Ambiguous masked precision for centuries and decades + ("1800s", "18xx"), # Without additional uncertainty, use the century + ("2000s", "20xx"), # Without additional uncertainty, use the century + ("c1900s", "190x~"), # If there's additional uncertainty, use the decade + ("c1800s?", "180x?~"), # If there's additional uncertainty, use the decade + # Unspecified dates + ("January 12", "uuuu-01-12"), + ("January", "uuuu-01"), + ("10/7/2008", "2008-10-07"), + ("7/2008", "2008-07"), + # Seasons mapped to specific codes + ("Spring 1872", "1872-21"), + ("Summer 1872", "1872-22"), + ("Autumn 1872", "1872-23"), + ("Fall 1872", "1872-23"), + ("Winter 1872", "1872-24"), + # Dates relative to known events (before/after) + ("earlier than 1928", "unknown/1928"), + ("before 1928", "unknown/1928"), + ("after 1928", "1928/unknown"), + ("later than 1928", "1928/unknown"), + ("before January 1928", "unknown/1928-01"), + ("before 18 January 1928", "unknown/1928-01-18"), + # Approximations combined with before/after + ("before approx January 18 1928", "unknown/1928-01-18~"), + ("before approx January 1928", "unknown/1928-01~"), + ("after approx January 1928", "1928-01~/unknown"), + ("after approx Summer 1928", "1928-22~/unknown"), + # Before and after with uncertain / unspecified components + ("after about the 1920s", "192x~/unknown"), + ("before about the 1900s", "unknown/190x~"), + ("before the 1900s", "unknown/19xx"), + # Specifying unspecified components within a date + # ('decade in 1800s', '18ux'), #too esoteric + # ('decade somewhere during the 1800s', '18ux'), #lengthier. Keywords are 'in' or 'during' + ("year in the 1860s", "186u"), # 186x has decade precision + ("year in the 1800s", "18xu"), # 186u has year precision + ("year in about the 1800s", "180u~"), + ("month in 1872", "1872-uu"), + ("day in Spring 1849", "1849-21-uu"), + ("day in January 1872", "1872-01-uu"), + ("day in 1872", "1872-uu-uu"), + ("birthday in 1872", "1872"), + # Handling centuries with approximation and uncertainty + ("1st century", "00xx"), + ("10c", "09xx"), + ("19th century", "18xx"), + ("19th century?", "18xx?"), + ("before 19th century", "unknown/18xx"), + ("19c", "18xx"), + ("15c.", "14xx"), + ("ca. 19c", "18xx~"), + ("~19c", "18xx~"), + ("about 19c", "18xx~"), + ("19c?", "18xx?"), + ("c.19c?", "18xx?~"), + # BC/AD dating + ("1 AD", "0001"), + ("17 CE", "0017"), + ("127 CE", "0127"), + ("1270 CE", "1270"), + ("c1 AD", "0001~"), + ("c17 CE", "0017~"), + ("c127 CE", "0127~"), + ("c1270 CE", "1270~"), + ("c64 BCE", "-0064~"), + ("2nd century bc", "-01xx"), # -200 to -101 + ("2nd century bce", "-01xx"), + ("2nd century ad", "01xx"), + ("2nd century ce", "01xx"), + # Combining uncertainties and approximations in creative ways + ("a day in about Spring 1849?", "1849-21-uu?~"), + # Simple date ranges, showcasing both the limitations and capabilities of the parser + # Not all of these results are correct EDTF, but this is as good as the EDTF implementation + # and simple natural language parser we have. + ("1851-1852", "1851/1852"), + ("1851-1852; printed 1853-1854", "1851/1852"), + ("1851-52", "1851/1852"), + ("1852 - 1860", "1852/1860"), + ("1856-ca. 1865", "1856/1865~"), + ("1857-mid 1860s", "1857/186x"), + ("1858/1860", "[1858, 1860]"), + ("1860s-1870s", "186x/187x"), + ("1910-30", "1910/1930"), + ("active 1910-30", "1910/1930"), + ("1861-67", "1861/1867"), + ("1861-67 (later print)", "1861/1867"), + ("1863 or 1864", "1863"), + ("1863, printed 1870", "1863"), + ("1863, printed ca. 1866", "1863"), + ("1864 or 1866", "1864"), + ("1864, printed ca. 1864", "1864"), + ("1864-1872, printed 1870s", "1864/1872"), + ("1868-1871?", "1868/1871?"), + ("1869-70", "1869/1870"), + ("1870s, printed ca. 1880s", "187x"), + ("1900-1903, cast before 1929", "1900/1903"), + ("1900; 1973", "1900"), + ("1900; printed 1912", "1900"), + ("1915 late - autumn 1916", "1915/1916-23"), + ("1915, from Camerawork, October 1916", "1915"), # should be {1915, 1916-10} + ("1920s -early 1930s", "192x/193x"), + ( + "1930s, printed early 1960s", + "193x", + ), # should be something like {193x, 196x}, + ("1932, printed 1976 by Gunther Sander", "1932"), # should be {1932, 1976} + ( + "1938, printed 1940s-1950s", + "1938", + ), # should be something like {1938, 194x-195x} + # Uncertain and approximate on different parts of the date + # for these to work we need to recast is_uncertain and is_approximate + # such that they work on different parts. Probably worth rolling our own + # dateparser at this point. + # ('July in about 1849', '1849~-07'), + # ('a day in July in about 1849', '1849~-07-uu'), + # ('a day in Spring in about 1849', '1849~-21-uu'), + # ('a day in about July? in about 1849', '1849~-07?~-uu'), + # ('a day in about Spring in about 1849', '1849~-21~-uu'), + # ('maybe January in some year in about the 1830s', '183u~-01?'), + # ('about July? in about 1849', '1849~-07?~'), + ], +) def test_natlang(input_text, expected_output): """ Test natural language conversion to EDTF format: @@ -199,4 +196,3 @@ def test_natlang(input_text, expected_output): """ result = text_to_edtf(input_text) assert result == expected_output, f"Failed for input: {input_text}" - diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index d69e719..730f47d 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -1,26 +1,54 @@ -from pyparsing import Literal as L, ParseException, Opt, Optional, OneOrMore, \ - ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, Group - -# (* ************************** Level 0 *************************** *) -from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ - UncertainOrApproximate, Level1Interval, LongYear, Season, \ - PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ - Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ - MaskedPrecision, Level2Interval, ExponentialYear, Level2Season +from pyparsing import ( + Combine, + NotAny, + OneOrMore, + Opt, + Optional, + ParseException, + Regex, + Word, + ZeroOrMore, + nums, + oneOf, +) +from pyparsing import Literal as L from edtf.parser.edtf_exceptions import EDTFParseException -oneThru12 = oneOf(['%.2d' % i for i in range(1, 13)]) -oneThru13 = oneOf(['%.2d' % i for i in range(1, 14)]) -oneThru23 = oneOf(['%.2d' % i for i in range(1, 24)]) -zeroThru23 = oneOf(['%.2d' % i for i in range(0, 24)]) -oneThru29 = oneOf(['%.2d' % i for i in range(1, 30)]) -oneThru30 = oneOf(['%.2d' % i for i in range(1, 31)]) -oneThru31 = oneOf(['%.2d' % i for i in range(1, 32)]) -oneThru59 = oneOf(['%.2d' % i for i in range(1, 60)]) -zeroThru59 = oneOf(['%.2d' % i for i in range(0, 60)]) - -positiveDigit = Word(nums, exact=1, excludeChars='0') +# (* ************************** Level 0 *************************** *) +from edtf.parser.parser_classes import ( + UA, + Consecutives, + Date, + DateAndTime, + EarlierConsecutives, + ExponentialYear, + Interval, + LaterConsecutives, + Level1Interval, + Level2Interval, + Level2Season, + LongYear, + MultipleDates, + OneOfASet, + PartialUncertainOrApproximate, + PartialUnspecified, + Season, + UncertainOrApproximate, + Unspecified, +) + +oneThru12 = oneOf(["%.2d" % i for i in range(1, 13)]) +oneThru13 = oneOf(["%.2d" % i for i in range(1, 14)]) +oneThru23 = oneOf(["%.2d" % i for i in range(1, 24)]) +zeroThru23 = oneOf(["%.2d" % i for i in range(0, 24)]) +oneThru29 = oneOf(["%.2d" % i for i in range(1, 30)]) +oneThru30 = oneOf(["%.2d" % i for i in range(1, 31)]) +oneThru31 = oneOf(["%.2d" % i for i in range(1, 32)]) +oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) +zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) + +positiveDigit = Word(nums, exact=1, excludeChars="0") digit = Word(nums, exact=1) second = zeroThru59 @@ -50,13 +78,10 @@ Date.set_parser(date) zoneOffsetHour = oneThru13 -zoneOffset = L("Z") \ - ^ (Regex("[+-]") - + (zoneOffsetHour + Optional(":" + minute) - ^ L("14:00") - ^ ("00:" + oneThru59) - ) - ) +zoneOffset = L("Z") ^ ( + Regex("[+-]") + + (zoneOffsetHour + Optional(":" + minute) ^ L("14:00") ^ ("00:" + oneThru59)) +) baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00") @@ -100,83 +125,80 @@ # cleanly otherwise the parameter names are overlapped. def f(toks): try: - return {'date': toks[0], 'ua': toks[1]} + return {"date": toks[0], "ua": toks[1]} except IndexError: - return {'date': toks[0], 'ua': None} + return {"date": toks[0], "ua": None} -l1Start = '..' ^ uaDateOrSeason +l1Start = ".." ^ uaDateOrSeason l1Start.addParseAction(f) -l1End = uaDateOrSeason ^ '..' +l1End = uaDateOrSeason ^ ".." l1End.addParseAction(f) -level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") \ - ^ l1Start("lower") + "/" + Optional(l1End("upper")) +level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") ^ l1Start( + "lower" +) + "/" + Optional(l1End("upper")) Level1Interval.set_parser(level1Interval) # (* *** unspecified *** *) -yearWithOneOrTwoUnspecifedDigits = Combine( - digit + digit + (digit ^ 'X') + 'X' -)("year") +yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") -unspecified = yearWithOneOrTwoUnspecifedDigits \ - ^ monthUnspecified \ - ^ dayUnspecified \ +unspecified = ( + yearWithOneOrTwoUnspecifedDigits + ^ monthUnspecified + ^ dayUnspecified ^ dayAndMonthUnspecified +) Unspecified.set_parser(unspecified) # (* *** uncertainOrApproxDate *** *) -uncertainOrApproxDate = date('date') + UASymbol("ua") +uncertainOrApproxDate = date("date") + UASymbol("ua") UncertainOrApproximate.set_parser(uncertainOrApproxDate) -level1Expression = uncertainOrApproxDate \ - ^ unspecified \ - ^ level1Interval \ - ^ longYearSimple \ - ^ season +level1Expression = ( + uncertainOrApproxDate ^ unspecified ^ level1Interval ^ longYearSimple ^ season +) # (* ************************** Level 2 *************************** *) # (* ** Internal Unspecified** *) -digitOrX = Word(nums + 'X', exact=1) +digitOrX = Word(nums + "X", exact=1) # 2-digit day with at least one 'X' present -dayWithX = Combine( - ("X" + digitOrX) - ^ (digitOrX + 'X') -)("day") +dayWithX = Combine(("X" + digitOrX) ^ (digitOrX + "X"))("day") # 2-digit month with at least one 'X' present -monthWithX = Combine( - oneOf("0X 1X") - ^ ("X" + digitOrX) -)("month") +monthWithX = Combine(oneOf("0X 1X") ^ ("X" + digitOrX))("month") # 4-digit year with at least one 'X' present yearWithX = Combine( - ('X' + digitOrX + digitOrX + digitOrX) - ^ (digitOrX + 'X' + digitOrX + digitOrX) - ^ (digitOrX + digitOrX + 'X' + digitOrX) - ^ (digitOrX + digitOrX + digitOrX + 'X') + ("X" + digitOrX + digitOrX + digitOrX) + ^ (digitOrX + "X" + digitOrX + digitOrX) + ^ (digitOrX + digitOrX + "X" + digitOrX) + ^ (digitOrX + digitOrX + digitOrX + "X") )("year") -yearMonthWithX = ( - (Combine(year("") ^ yearWithX(""))("year") + "-" + monthWithX) - ^ (yearWithX + "-" + month) +yearMonthWithX = (Combine(year("") ^ yearWithX(""))("year") + "-" + monthWithX) ^ ( + yearWithX + "-" + month ) -monthDayWithX = ( - (Combine(month("") ^ monthWithX(""))("month") + "-" + dayWithX) - ^ (monthWithX + "-" + day) +monthDayWithX = (Combine(month("") ^ monthWithX(""))("month") + "-" + dayWithX) ^ ( + monthWithX + "-" + day ) yearMonthDayWithX = ( - (yearWithX + "-" + Combine(month("") ^ monthWithX(""))("month") + "-" + Combine(day("") ^ dayWithX(""))("day")) + ( + yearWithX + + "-" + + Combine(month("") ^ monthWithX(""))("month") + + "-" + + Combine(day("") ^ dayWithX(""))("day") + ) ^ (year + "-" + monthWithX + "-" + Combine(day("") ^ dayWithX(""))("day")) ^ (year + "-" + month + "-" + dayWithX) ) @@ -188,8 +210,9 @@ def f(toks): # group qualification # qualifier right of a component(date, month, day) applies to all components to the left -group_qual = yearMonth + UASymbol("year_month_ua") + "-" + day \ - ^ year + UASymbol("year_ua") + "-" + month + Opt("-" + day) +group_qual = yearMonth + UASymbol("year_month_ua") + "-" + day ^ year + UASymbol( + "year_ua" +) + "-" + month + Opt("-" + day) # component qualification # qualifier immediate left of a component (date, month, day) applies to that component only @@ -197,17 +220,18 @@ def f(toks): qual_month = month ^ UASymbol("month_ua") + month qual_day = day ^ UASymbol("day_ua") + day -indi_qual = UASymbol("year_ua_b") + year + Opt("-" + qual_month + Opt("-" + qual_day)) \ - ^ qual_year + "-" + UASymbol("month_ua") + month + Opt("-" + qual_day) \ +indi_qual = ( + UASymbol("year_ua_b") + year + Opt("-" + qual_month + Opt("-" + qual_day)) + ^ qual_year + "-" + UASymbol("month_ua") + month + Opt("-" + qual_day) ^ qual_year + "-" + qual_month + "-" + UASymbol("day_ua") + day +) partialUncertainOrApproximate = group_qual ^ indi_qual PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) -dateWithInternalUncertainty = partialUncertainOrApproximate \ - ^ partialUnspecified +dateWithInternalUncertainty = partialUncertainOrApproximate ^ partialUnspecified -qualifyingString = Regex(r'\S') # any nonwhitespace char +qualifyingString = Regex(r"\S") # any nonwhitespace char # (* ** SeasonQualified ** *) seasonQualifier = qualifyingString @@ -215,14 +239,25 @@ def f(toks): # (* ** Long Year - Scientific Form ** *) positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) -longYearScientific = "Y" + Combine(Optional("-") + positiveInteger)("base") + "E" + \ - positiveInteger("exponent") + Optional("S" + positiveInteger("precision")) +longYearScientific = ( + "Y" + + Combine(Optional("-") + positiveInteger)("base") + + "E" + + positiveInteger("exponent") + + Optional("S" + positiveInteger("precision")) +) ExponentialYear.set_parser(longYearScientific) # (* ** level2Interval ** *) -level2Interval = (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) \ - ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) \ - ^ (dateWithInternalUncertainty("lower") + "/" + dateWithInternalUncertainty("upper")) +level2Interval = ( + (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) + ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) + ^ ( + dateWithInternalUncertainty("lower") + + "/" + + dateWithInternalUncertainty("upper") + ) +) Level2Interval.set_parser(level2Interval) # (* ** Masked precision ** *) eliminated in latest specs @@ -230,16 +265,20 @@ def f(toks): # MaskedPrecision.set_parser(maskedPrecision) # (* ** Inclusive list and choice list** *) -consecutives = (yearMonthDay("lower") + ".." + yearMonthDay("upper")) \ - ^ (yearMonth("lower") + ".." + yearMonth("upper")) \ +consecutives = ( + (yearMonthDay("lower") + ".." + yearMonthDay("upper")) + ^ (yearMonth("lower") + ".." + yearMonth("upper")) ^ (year("lower") + ".." + year("upper")) +) Consecutives.set_parser(consecutives) -listElement = date \ - ^ dateWithInternalUncertainty \ - ^ uncertainOrApproxDate \ - ^ unspecified \ +listElement = ( + date + ^ dateWithInternalUncertainty + ^ uncertainOrApproxDate + ^ unspecified ^ consecutives +) earlier = L("..").addParseAction(f)("lower") + date("upper").addParseAction(f) later = date("lower").addParseAction(f) + L("..").addParseAction(f)("upper") @@ -248,10 +287,12 @@ def f(toks): LaterConsecutives.set_parser(later) -listContent = (earlier + ZeroOrMore("," + listElement)) \ - ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) \ - ^ (listElement + OneOrMore("," + listElement)) \ +listContent = ( + (earlier + ZeroOrMore("," + listElement)) + ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) + ^ (listElement + OneOrMore("," + listElement)) ^ consecutives +) choiceList = "[" + listContent + "]" OneOfASet.set_parser(choiceList) @@ -265,17 +306,21 @@ def f(toks): l2season = year + "-" + seasonL2Number("season") Level2Season.set_parser(l2season) -level2Expression = partialUncertainOrApproximate \ - ^ partialUnspecified \ - ^ choiceList \ - ^ inclusiveList \ - ^ level2Interval \ - ^ longYearScientific \ - ^ l2season \ +level2Expression = ( + partialUncertainOrApproximate + ^ partialUnspecified + ^ choiceList + ^ inclusiveList + ^ level2Interval + ^ longYearScientific + ^ l2season ^ seasonQualified +) # putting it all together -edtfParser = level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") +edtfParser = ( + level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") +) def parse_edtf(str, parseAll=True, fail_silently=False): @@ -285,7 +330,7 @@ def parse_edtf(str, parseAll=True, fail_silently=False): p = edtfParser.parseString(str.strip(), parseAll) if p: return p[0] - except ParseException as e: + except ParseException as err: if fail_silently: return None - raise EDTFParseException(e) + raise EDTFParseException(err) from err diff --git a/edtf/parser/grammar_test.py b/edtf/parser/grammar_test.py index 81b2d5d..c8ff727 100644 --- a/edtf/parser/grammar_test.py +++ b/edtf/parser/grammar_test.py @@ -1,26 +1,52 @@ -from pyparsing import Literal as L, ParseException, Optional, Opt, OneOrMore, \ - ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, FollowedBy - -# (* ************************** Level 0 *************************** *) -from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ - UncertainOrApproximate, Level1Interval, LongYear, Season, \ - PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ - Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ - MaskedPrecision, Level2Interval, ExponentialYear, UnspecifiedIntervalSection# , Testi +from pyparsing import ( + Combine, + NotAny, + OneOrMore, + Optional, + ParseException, + Regex, + Word, + ZeroOrMore, + nums, + oneOf, +) +from pyparsing import Literal as L from edtf.parser.edtf_exceptions import EDTFParseException -oneThru12 = oneOf(['%.2d' % i for i in range(1, 13)]) -oneThru13 = oneOf(['%.2d' % i for i in range(1, 14)]) -oneThru23 = oneOf(['%.2d' % i for i in range(1, 24)]) -zeroThru23 = oneOf(['%.2d' % i for i in range(0, 24)]) -oneThru29 = oneOf(['%.2d' % i for i in range(1, 30)]) -oneThru30 = oneOf(['%.2d' % i for i in range(1, 31)]) -oneThru31 = oneOf(['%.2d' % i for i in range(1, 32)]) -oneThru59 = oneOf(['%.2d' % i for i in range(1, 60)]) -zeroThru59 = oneOf(['%.2d' % i for i in range(0, 60)]) - -positiveDigit = Word(nums, exact=1, excludeChars='0') +# (* ************************** Level 0 *************************** *) +from edtf.parser.parser_classes import ( + UA, + Consecutives, + Date, + DateAndTime, + EarlierConsecutives, + ExponentialYear, + Interval, + LaterConsecutives, + Level1Interval, + Level2Interval, # , Testi + LongYear, + MultipleDates, + OneOfASet, + PartialUncertainOrApproximate, + PartialUnspecified, + Season, + UncertainOrApproximate, + Unspecified, +) + +oneThru12 = oneOf(["%.2d" % i for i in range(1, 13)]) +oneThru13 = oneOf(["%.2d" % i for i in range(1, 14)]) +oneThru23 = oneOf(["%.2d" % i for i in range(1, 24)]) +zeroThru23 = oneOf(["%.2d" % i for i in range(0, 24)]) +oneThru29 = oneOf(["%.2d" % i for i in range(1, 30)]) +oneThru30 = oneOf(["%.2d" % i for i in range(1, 31)]) +oneThru31 = oneOf(["%.2d" % i for i in range(1, 32)]) +oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) +zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) + +positiveDigit = Word(nums, exact=1, excludeChars="0") digit = Word(nums, exact=1) second = zeroThru59 @@ -50,13 +76,10 @@ Date.set_parser(date) zoneOffsetHour = oneThru13 -zoneOffset = L("Z") \ - ^ (Regex("[+-]") - + (zoneOffsetHour + Optional(":" + minute) - ^ L("14:00") - ^ ("00:" + oneThru59) - ) - ) +zoneOffset = L("Z") ^ ( + Regex("[+-]") + + (zoneOffsetHour + Optional(":" + minute) ^ L("14:00") ^ ("00:" + oneThru59)) +) baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00") @@ -96,92 +119,90 @@ uaDateOrSeason = dateOrSeason + Optional(UASymbol) -#unspecifiedIntervalSec = L('..')('unknownOrOpen') + FollowedBy(L("/") + uaDateOrSeason)('other_section_element') -#Testi.set_parser(unspecifiedIntervalSec) +# unspecifiedIntervalSec = L('..')('unknownOrOpen') + FollowedBy(L("/") + uaDateOrSeason)('other_section_element') +# Testi.set_parser(unspecifiedIntervalSec) + # bit of a kludge here to get the all the relevant tokens into the parse action # cleanly otherwise the parameter names are overlapped. def f(toks): try: - return {'date': toks[0], 'ua': toks[1]} + return {"date": toks[0], "ua": toks[1]} except IndexError: - return {'date': toks[0], 'ua': None} + return {"date": toks[0], "ua": None} -l1Start = '..' ^ uaDateOrSeason -#l1Start = unspecifiedIntervalSec ^ uaDateOrSeason +l1Start = ".." ^ uaDateOrSeason +# l1Start = unspecifiedIntervalSec ^ uaDateOrSeason l1Start.addParseAction(f) -l1End = uaDateOrSeason ^ '..' +l1End = uaDateOrSeason ^ ".." l1End.addParseAction(f) -#level1Interval = l1Start("lower") + "/" + l1End("upper") -level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") \ - ^ l1Start("lower") + "/" + Optional(l1End("upper")) +# level1Interval = l1Start("lower") + "/" + l1End("upper") +level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") ^ l1Start( + "lower" +) + "/" + Optional(l1End("upper")) Level1Interval.set_parser(level1Interval) # (* *** unspecified *** *) -yearWithOneOrTwoUnspecifedDigits = Combine( - digit + digit + (digit ^ 'X') + 'X' -)("year") +yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") -unspecified = yearWithOneOrTwoUnspecifedDigits \ - ^ monthUnspecified \ - ^ dayUnspecified \ +unspecified = ( + yearWithOneOrTwoUnspecifedDigits + ^ monthUnspecified + ^ dayUnspecified ^ dayAndMonthUnspecified +) Unspecified.set_parser(unspecified) # (* *** uncertainOrApproxDate *** *) -uncertainOrApproxDate = date('date') + UASymbol("ua") +uncertainOrApproxDate = date("date") + UASymbol("ua") UncertainOrApproximate.set_parser(uncertainOrApproxDate) -level1Expression = uncertainOrApproxDate \ - ^ unspecified \ - ^ level1Interval \ - ^ longYearSimple \ - ^ season +level1Expression = ( + uncertainOrApproxDate ^ unspecified ^ level1Interval ^ longYearSimple ^ season +) # (* ************************** Level 2 *************************** *) # (* ** Internal Unspecified** *) -digitOrU = Word(nums + 'X', exact=1) +digitOrU = Word(nums + "X", exact=1) # 2-digit day with at least one 'X' present -dayWithU = Combine( - ("X" + digitOrU) - ^ (digitOrU + 'X') -)("day") +dayWithU = Combine(("X" + digitOrU) ^ (digitOrU + "X"))("day") # 2-digit month with at least one 'X' present -monthWithU = Combine( - oneOf("0X 1X") - ^ ("X" + digitOrU) -)("month") +monthWithU = Combine(oneOf("0X 1X") ^ ("X" + digitOrU))("month") # 4-digit year with at least one 'X' present yearWithU = Combine( - ('X' + digitOrU + digitOrU + digitOrU) - ^ (digitOrU + 'X' + digitOrU + digitOrU) - ^ (digitOrU + digitOrU + 'X' + digitOrU) - ^ (digitOrU + digitOrU + digitOrU + 'X') + ("X" + digitOrU + digitOrU + digitOrU) + ^ (digitOrU + "X" + digitOrU + digitOrU) + ^ (digitOrU + digitOrU + "X" + digitOrU) + ^ (digitOrU + digitOrU + digitOrU + "X") )("year") -yearMonthWithU = ( - (Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) - ^ (yearWithU + "-" + month) +yearMonthWithU = (Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) ^ ( + yearWithU + "-" + month ) -monthDayWithU = ( - (Combine(month("") ^ monthWithU(""))("month") + "-" + dayWithU) - ^ (monthWithU + "-" + day) +monthDayWithU = (Combine(month("") ^ monthWithU(""))("month") + "-" + dayWithU) ^ ( + monthWithU + "-" + day ) yearMonthDayWithU = ( - (yearWithU + "-" + Combine(month("") ^ monthWithU(""))("month") + "-" + Combine(day("") ^ dayWithU(""))("day")) + ( + yearWithU + + "-" + + Combine(month("") ^ monthWithU(""))("month") + + "-" + + Combine(day("") ^ dayWithU(""))("day") + ) ^ (year + "-" + monthWithU + "-" + Combine(day("") ^ dayWithU(""))("day")) ^ (year + "-" + month + "-" + dayWithU) ) @@ -198,30 +219,52 @@ def f(toks): # second clause below needed Optional() around the "year_ua" UASymbol, for dates # like '(2011)-06-04~' to work. -IUABase = \ - (year_with_brackets + UASymbol("year_ua") + "-" + month + Optional("-(" + day + ")" + UASymbol("day_ua"))) \ - ^ (year_with_brackets + Optional(UASymbol)("year_ua") + "-" + monthDay + Optional(UASymbol)("month_day_ua")) \ +IUABase = ( + ( + year_with_brackets + + UASymbol("year_ua") + + "-" + + month + + Optional("-(" + day + ")" + UASymbol("day_ua")) + ) ^ ( - year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") + year_with_brackets + + Optional(UASymbol)("year_ua") + + "-" + + monthDay + + Optional(UASymbol)("month_day_ua") + ) + ^ ( + year_with_brackets + + Optional(UASymbol)("year_ua") + + "-(" + + month + + ")" + + UASymbol("month_ua") + Optional("-(" + day + ")" + UASymbol("day_ua")) - ) \ + ) ^ ( - year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") + year_with_brackets + + Optional(UASymbol)("year_ua") + + "-(" + + month + + ")" + + UASymbol("month_ua") + Optional("-" + day) - ) \ - ^ (yearMonth + UASymbol("year_month_ua") + "-(" + day + ")" + UASymbol("day_ua")) \ - ^ (yearMonth + UASymbol("year_month_ua") + "-" + day) \ - ^ (yearMonth + "-(" + day + ")" + UASymbol("day_ua")) \ - ^ (year + "-(" + monthDay + ")" + UASymbol("month_day_ua")) \ + ) + ^ (yearMonth + UASymbol("year_month_ua") + "-(" + day + ")" + UASymbol("day_ua")) + ^ (yearMonth + UASymbol("year_month_ua") + "-" + day) + ^ (yearMonth + "-(" + day + ")" + UASymbol("day_ua")) + ^ (year + "-(" + monthDay + ")" + UASymbol("month_day_ua")) ^ (season("ssn") + UASymbol("season_ua")) +) partialUncertainOrApproximate = IUABase ^ ("(" + IUABase + ")" + UASymbol("all_ua")) PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) -dateWithInternalUncertainty = partialUncertainOrApproximate \ - ^ partialUnspecified +dateWithInternalUncertainty = partialUncertainOrApproximate ^ partialUnspecified -qualifyingString = Regex(r'\S') # any nonwhitespace char +qualifyingString = Regex(r"\S") # any nonwhitespace char # (* ** SeasonQualified ** *) seasonQualifier = qualifyingString @@ -229,14 +272,25 @@ def f(toks): # (* ** Long Year - Scientific Form ** *) positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) -longYearScientific = "Y" + Combine(Optional("-") + positiveInteger)("base") + "E" + \ - positiveInteger("exponent") + Optional("S" + positiveInteger("precision")) +longYearScientific = ( + "Y" + + Combine(Optional("-") + positiveInteger)("base") + + "E" + + positiveInteger("exponent") + + Optional("S" + positiveInteger("precision")) +) ExponentialYear.set_parser(longYearScientific) # (* ** level2Interval ** *) -level2Interval = (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) \ - ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) \ - ^ (dateWithInternalUncertainty("lower") + "/" + dateWithInternalUncertainty("upper")) +level2Interval = ( + (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) + ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) + ^ ( + dateWithInternalUncertainty("lower") + + "/" + + dateWithInternalUncertainty("upper") + ) +) Level2Interval.set_parser(level2Interval) # (* ** Masked precision ** *) eliminated in latest specs @@ -244,26 +298,32 @@ def f(toks): # MaskedPrecision.set_parser(maskedPrecision) # (* ** Inclusive list and choice list** *) -consecutives = (yearMonthDay("lower") + ".." + yearMonthDay("upper")) \ - ^ (yearMonth("lower") + ".." + yearMonth("upper")) \ +consecutives = ( + (yearMonthDay("lower") + ".." + yearMonthDay("upper")) + ^ (yearMonth("lower") + ".." + yearMonth("upper")) ^ (year("lower") + ".." + year("upper")) +) Consecutives.set_parser(consecutives) -listElement = date \ - ^ dateWithInternalUncertainty \ - ^ uncertainOrApproxDate \ - ^ unspecified \ +listElement = ( + date + ^ dateWithInternalUncertainty + ^ uncertainOrApproxDate + ^ unspecified ^ consecutives +) earlier = ".." + date("upper") EarlierConsecutives.set_parser(earlier) later = date("lower") + ".." LaterConsecutives.set_parser(later) -listContent = (earlier + ZeroOrMore("," + listElement)) \ - ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) \ - ^ (listElement + OneOrMore("," + listElement)) \ +listContent = ( + (earlier + ZeroOrMore("," + listElement)) + ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) + ^ (listElement + OneOrMore("," + listElement)) ^ consecutives +) choiceList = "[" + listContent + "]" OneOfASet.set_parser(choiceList) @@ -271,16 +331,20 @@ def f(toks): inclusiveList = "{" + listContent + "}" MultipleDates.set_parser(inclusiveList) -level2Expression = partialUncertainOrApproximate \ - ^ partialUnspecified \ - ^ choiceList \ - ^ inclusiveList \ - ^ level2Interval \ - ^ longYearScientific \ +level2Expression = ( + partialUncertainOrApproximate + ^ partialUnspecified + ^ choiceList + ^ inclusiveList + ^ level2Interval + ^ longYearScientific ^ seasonQualified +) # putting it all together -edtfParser = level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") +edtfParser = ( + level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") +) def parse_edtf(str, parseAll=True, fail_silently=False): @@ -290,7 +354,7 @@ def parse_edtf(str, parseAll=True, fail_silently=False): p = edtfParser.parseString(str.strip(), parseAll) if p: return p[0] - except ParseException as e: + except ParseException as err: if fail_silently: return None - raise EDTFParseException(e) + raise EDTFParseException(err) from err diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 3b5ac6e..df19d67 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -1,17 +1,22 @@ import calendar +import math import re -from time import struct_time from datetime import date, datetime from operator import add, sub -import math +from time import struct_time + from dateutil.relativedelta import relativedelta from edtf import appsettings -from edtf.convert import dt_to_struct_time, trim_struct_time, \ - TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS +from edtf.convert import ( + TIME_EMPTY_EXTRAS, + TIME_EMPTY_TIME, + dt_to_struct_time, + trim_struct_time, +) -EARLIEST = 'earliest' -LATEST = 'latest' +EARLIEST = "earliest" +LATEST = "latest" PRECISION_MILLENIUM = "millenium" PRECISION_CENTURY = "century" @@ -80,14 +85,16 @@ def apply_delta(op, time_struct, delta): # Convert result year back to its original millenium final_year = dt_result.year - millenium_diff return struct_time( - (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS)) + (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS) + ) -class EDTFObject(object): +class EDTFObject: """ Object to attact to a parser to become instantiated when the parser completes. """ + parser = None @classmethod @@ -99,9 +106,9 @@ def set_parser(cls, p): def parse_action(cls, toks): kwargs = toks.asDict() try: - return cls(**kwargs) # replace the token list with the class + return cls(**kwargs) # replace the token list with the class except Exception as e: - print("trying to %s.__init__(**%s)" % (cls.__name__, kwargs)) + print(f"trying to {cls.__name__}.__init__(**{kwargs})") raise e @classmethod @@ -109,14 +116,11 @@ def parse(cls, s): return cls.parser.parseString(s)[0] def __repr__(self): - return "%s: '%s'" % (type(self).__name__, str(self)) + return f"{type(self).__name__}: '{str(self)}'" def __init__(self, *args, **kwargs): - str = "%s.__init__(*%s, **%s)" % ( - type(self).__name__, - args, kwargs, - ) - raise NotImplementedError("%s is not implemented." % str) + str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" + raise NotImplementedError(f"{str} is not implemented.") def __str__(self): raise NotImplementedError @@ -137,25 +141,30 @@ def _get_fuzzy_padding(self, lean): return relativedelta(0) def get_is_approximate(self): - return getattr(self, '_is_approximate', False) + return getattr(self, "_is_approximate", False) def set_is_approximate(self, val): self._is_approximate = val + is_approximate = property(get_is_approximate, set_is_approximate) def get_is_uncertain(self): - return getattr(self, '_is_uncertain', False) + return getattr(self, "_is_uncertain", False) def set_is_uncertain(self, val): self._is_uncertain = val + is_uncertain = property(get_is_uncertain, set_is_uncertain) def get_is_uncertain_and_approximate(self): - return getattr(self, '_uncertain_and_approximate', False) + return getattr(self, "_uncertain_and_approximate", False) def set_is_uncertain_and_approximate(self, val): self._uncertain_and_approximate = val - is_uncertain_and_approximate = property(get_is_uncertain_and_approximate, set_is_uncertain_and_approximate) + + is_uncertain_and_approximate = property( + get_is_uncertain_and_approximate, set_is_uncertain_and_approximate + ) def lower_fuzzy(self): strict_val = self.lower_strict() @@ -190,7 +199,9 @@ def __gt__(self, other): return self.lower_strict() > dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() > trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __ge__(self, other): if isinstance(other, EDTFObject): @@ -199,7 +210,9 @@ def __ge__(self, other): return self.lower_strict() >= dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() >= trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __lt__(self, other): if isinstance(other, EDTFObject): @@ -208,7 +221,9 @@ def __lt__(self, other): return self.lower_strict() < dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() < trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __le__(self, other): if isinstance(other, EDTFObject): @@ -217,13 +232,15 @@ def __le__(self, other): return self.lower_strict() <= dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() <= trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) # (* ************************** Level 0 *************************** *) -class Date(EDTFObject): +class Date(EDTFObject): def set_year(self, y): if y is None: raise AttributeError("Year must not be None") @@ -231,33 +248,35 @@ def set_year(self, y): def get_year(self): return self._year + year = property(get_year, set_year) def set_month(self, m): self._month = m - if m == None: + if m is None: self.day = None def get_month(self): return self._month + month = property(get_month, set_month) def __init__(self, year=None, month=None, day=None, **kwargs): - for param in ('date', 'lower', 'upper'): + for param in ("date", "lower", "upper"): if param in kwargs: self.__init__(**kwargs[param]) return - self.year = year # Year is required, but sometimes passed in as a 'date' dict. + self.year = year # Year is required, but sometimes passed in as a 'date' dict. self.month = month self.day = day def __str__(self): r = self.year if self.month: - r += "-%s" % self.month + r += f"-{self.month}" if self.day: - r += "-%s" % self.day + r += f"-{self.day}" return r def isoformat(self, default=date.max): @@ -270,16 +289,18 @@ def isoformat(self, default=date.max): def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: - return int(re.sub(r'X', r'0', self.year)) + return int(re.sub(r"X", r"0", self.year)) else: - return int(re.sub(r'X', r'9', self.year)) + return int(re.sub(r"X", r"9", self.year)) def _precise_month(self, lean): if self.month and self.month != "XX": try: return int(self.month) - except ValueError as e: - raise ValueError("Couldn't convert %s to int (in %s)" % (self.month, self)) + except ValueError as err: + raise ValueError( + f"Couldn't convert {self.month} to int (in {self})" + ) from err else: return 1 if lean == EARLIEST else 12 @@ -303,7 +324,9 @@ def _strict_date(self, lean): self._precise_year(lean), self._precise_month(lean), self._precise_day(lean), - ) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) + ) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) ) @property @@ -334,14 +357,14 @@ def __eq__(self, other): return self.isoformat() == other.isoformat() elif isinstance(other, struct_time): return self._strict_date() == trim_struct_time(other) - return super(DateAndTime, self).__eq__(other) + return super().__eq__(other) def __ne__(self, other): if isinstance(other, datetime): return self.isoformat() != other.isoformat() elif isinstance(other, struct_time): return self._strict_date() != trim_struct_time(other) - return super(DateAndTime, self).__ne__(other) + return super().__ne__(other) class Interval(EDTFObject): @@ -350,7 +373,7 @@ def __init__(self, lower, upper): self.upper = upper def __str__(self): - return "%s/%s" % (self.lower, self.upper) + return f"{self.lower}/{self.upper}" def _strict_date(self, lean): if lean == EARLIEST: @@ -365,6 +388,7 @@ def precision(self): return self.lower.precision return None + # (* ************************** Level 1 *************************** *) @@ -375,7 +399,8 @@ def parse_action(cls, toks): return cls(*args) def __init__(self, *args): - assert len(args) == 1 + if len(args) != 1: + raise AssertionError("UA must have exactly one argument") ua = args[0] self.is_uncertain = "?" in ua @@ -408,7 +433,7 @@ def __init__(self, date, ua): def __str__(self): if self.ua: - return "%s%s" % (self.date, self.ua) + return f"{self.date}{self.ua}" else: return str(self.date) @@ -429,7 +454,6 @@ def _get_fuzzy_padding(self, lean): class UnspecifiedIntervalSection(EDTFObject): - def __init__(self, sectionOpen=False, other_section_element=None): if sectionOpen: self.is_open = True @@ -471,19 +495,27 @@ class Unspecified(Date): class Level1Interval(Interval): def __init__(self, lower=None, upper=None): if lower: - if lower['date'] == '..': - self.lower = UnspecifiedIntervalSection(True, UncertainOrApproximate(**upper)) + if lower["date"] == "..": + self.lower = UnspecifiedIntervalSection( + True, UncertainOrApproximate(**upper) + ) else: self.lower = UncertainOrApproximate(**lower) else: - self.lower = UnspecifiedIntervalSection(False, UncertainOrApproximate(**upper)) + self.lower = UnspecifiedIntervalSection( + False, UncertainOrApproximate(**upper) + ) if upper: - if upper['date'] == '..': - self.upper = UnspecifiedIntervalSection(True, UncertainOrApproximate(**lower)) + if upper["date"] == "..": + self.upper = UnspecifiedIntervalSection( + True, UncertainOrApproximate(**lower) + ) else: self.upper = UncertainOrApproximate(**upper) else: - self.upper = UnspecifiedIntervalSection(False, UncertainOrApproximate(**lower)) + self.upper = UnspecifiedIntervalSection( + False, UncertainOrApproximate(**lower) + ) def _get_fuzzy_padding(self, lean): if lean == EARLIEST: @@ -497,7 +529,7 @@ def __init__(self, year): self.year = year def __str__(self): - return "Y%s" % self.year + return f"Y{self.year}" def _precise_year(self): return int(self.year) @@ -505,23 +537,21 @@ def _precise_year(self): def _strict_date(self, lean): py = self._precise_year() if lean == EARLIEST: - return struct_time( - [py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) else: - return struct_time( - [py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) class Season(Date): def __init__(self, year, season, **kwargs): self.year = year - self.season = season # use season to look up month + self.season = season # use season to look up month # day isn't part of the 'season' spec, but it helps the inherited # `Date` methods do their thing. self.day = None def __str__(self): - return "%s-%s" % (self.year, self.season) + return f"{self.year}-{self.season}" def _precise_month(self, lean): rng = appsettings.SEASON_L2_MONTHS_RANGE[int(self.season)] @@ -535,16 +565,25 @@ def _precise_month(self, lean): class PartialUncertainOrApproximate(Date): - - def set_year(self, y): # Year can be None. + def set_year(self, y): # Year can be None. self._year = y + year = property(Date.get_year, set_year) def __init__( - self, year=None, month=None, day=None, - year_ua=False, month_ua = False, day_ua = False, - year_month_ua = False, month_day_ua = False, - ssn=None, season_ua=False, all_ua=False, year_ua_b = False + self, + year=None, + month=None, + day=None, + year_ua=False, + month_ua=False, + day_ua=False, + year_month_ua=False, + month_day_ua=False, + ssn=None, + season_ua=False, + all_ua=False, + year_ua_b=False, ): self.year = year self.month = month @@ -564,67 +603,51 @@ def __init__( self.all_ua = all_ua def __str__(self): - if self.season_ua: - return "%s%s" % (self.season, self.season_ua) + return f"{self.season}{self.season_ua}" if self.year_ua: - y = "%s%s" % (self.year, self.year_ua) + y = f"{self.year}{self.year_ua}" else: - if self.year_ua_b: - y = "%s%s" % (self.year_ua_b, self.year) - else: - y = str(self.year) + y = f"{self.year_ua_b}{self.year}" if self.year_ua_b else str(self.year) - if self.month_ua: - m = "%s%s" % (self.month_ua, self.month) - else: - m = str(self.month) + m = f"{self.month_ua}{self.month}" if self.month_ua else str(self.month) if self.day: - if self.day_ua: - d = "%s%s" % (self.day_ua, self.day) - else: - d = str(self.day) + d = f"{self.day_ua}{self.day}" if self.day_ua else str(self.day) else: d = None - if self.year_month_ua: # year/month approximate. No brackets needed. - ym = "%s-%s%s" % (y, m, self.year_month_ua) - if d: - result = "%s-%s" % (ym, d) - else: - result = ym + if self.year_month_ua: # year/month approximate. No brackets needed. + ym = f"{y}-{m}{self.year_month_ua}" + result = f"{ym}-{d}" if d else ym elif self.month_day_ua: - if self.year_ua: # we don't need the brackets round month and day - result = "%s-%s-%s%s" % (y, m, d, self.month_day_ua) + if self.year_ua: # we don't need the brackets round month and day + result = f"{y}-{m}-{d}{self.month_day_ua}" else: - result = "%s-(%s-%s)%s" % (y, m, d, self.month_day_ua) + result = f"{y}-({m}-{d}){self.month_day_ua}" else: - if d: - result = "%s-%s-%s" % (y, m, d) - else: - result = "%s-%s" % (y, m) + result = f"{y}-{m}-{d}" if d else f"{y}-{m}" if self.all_ua: - result = "(%s)%s" % (result, self.all_ua) + result = f"({result}){self.all_ua}" return result def _precise_year(self, lean): if self.season: return self.season._precise_year(lean) - return super(PartialUncertainOrApproximate, self)._precise_year(lean) + return super()._precise_year(lean) def _precise_month(self, lean): if self.season: return self.season._precise_month(lean) - return super(PartialUncertainOrApproximate, self)._precise_month(lean) + return super()._precise_month(lean) def _precise_day(self, lean): if self.season: return self.season._precise_day(lean) - return super(PartialUncertainOrApproximate, self)._precise_day(lean) + return super()._precise_day(lean) def _get_fuzzy_padding(self, lean): """ @@ -635,23 +658,42 @@ def _get_fuzzy_padding(self, lean): result = relativedelta(0) if self.year_ua: - result += appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + result += ( + appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + ) if self.year_ua_b: - result += appsettings.PADDING_YEAR_PRECISION * self.year_ua_b._get_multiplier() + result += ( + appsettings.PADDING_YEAR_PRECISION * self.year_ua_b._get_multiplier() + ) if self.month_ua: - result += appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() + result += ( + appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() + ) if self.day_ua: result += appsettings.PADDING_DAY_PRECISION * self.day_ua._get_multiplier() if self.year_month_ua: - result += appsettings.PADDING_YEAR_PRECISION * self.year_month_ua._get_multiplier() - result += appsettings.PADDING_MONTH_PRECISION * self.year_month_ua._get_multiplier() + result += ( + appsettings.PADDING_YEAR_PRECISION + * self.year_month_ua._get_multiplier() + ) + result += ( + appsettings.PADDING_MONTH_PRECISION + * self.year_month_ua._get_multiplier() + ) if self.month_day_ua: - result += appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() - result += appsettings.PADDING_MONTH_PRECISION * self.month_day_ua._get_multiplier() + result += ( + appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() + ) + result += ( + appsettings.PADDING_MONTH_PRECISION + * self.month_day_ua._get_multiplier() + ) if self.season_ua: - result += appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() + result += ( + appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() + ) if self.all_ua: multiplier = self.all_ua._get_multiplier() @@ -687,17 +729,17 @@ def __init__(self, lower=None, upper=None): self.upper = upper def __str__(self): - return "%s..%s" % (self.lower or '', self.upper or '') + return "{}..{}".format(self.lower or "", self.upper or "") class EarlierConsecutives(Level1Interval): def __str__(self): - return "%s%s" % (self.lower, self.upper) + return f"{self.lower}{self.upper}" class LaterConsecutives(Level1Interval): def __str__(self): - return "%s%s" % (self.lower, self.upper) + return f"{self.lower}{self.upper}" class OneOfASet(EDTFObject): @@ -710,21 +752,27 @@ def __init__(self, *args): self.objects = args def __str__(self): - return "[%s]" % (", ".join([str(o) for o in self.objects])) + return "[{}]".format(", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): strict_dates = [x._strict_date(lean) for x in self.objects] # Accounting for possible 'inf' and '-inf' values if lean == LATEST: - if any(isinstance(d, float) and d == float('inf') for d in strict_dates): - return float('inf') + if any(isinstance(d, float) and d == float("inf") for d in strict_dates): + return float("inf") else: - return max((d for d in strict_dates if not isinstance(d, float)), default=float('inf')) + return max( + (d for d in strict_dates if not isinstance(d, float)), + default=float("inf"), + ) else: - if any(isinstance(d, float) and d == float('-inf') for d in strict_dates): - return float('-inf') + if any(isinstance(d, float) and d == float("-inf") for d in strict_dates): + return float("-inf") else: - return min((d for d in strict_dates if not isinstance(d, float)), default=float('-inf')) + return min( + (d for d in strict_dates if not isinstance(d, float)), + default=float("-inf"), + ) class MultipleDates(EDTFObject): @@ -769,6 +817,7 @@ def __init__(self, lower, upper): class Level2Season(Season): pass + class ExponentialYear(LongYear): def __init__(self, base, exponent, precision=None): self.base = base @@ -780,7 +829,8 @@ def _precise_year(self): def get_year(self): if self.precision: - return '%sE%sS%s' % (self.base, self.exponent, self.precision) + return f"{self.base}E{self.exponent}S{self.precision}" else: - return '%sE%s' % (self.base, self.exponent) + return f"{self.base}E{self.exponent}" + year = property(get_year) diff --git a/edtf/parser/parser_classes_tests.py b/edtf/parser/parser_classes_tests.py index 2cf330e..e9d7733 100644 --- a/edtf/parser/parser_classes_tests.py +++ b/edtf/parser/parser_classes_tests.py @@ -1,17 +1,23 @@ +# ruff: noqa: S101 # Asserts are ok in tests + import calendar import re -from time import struct_time from datetime import date, datetime from operator import add, sub +from time import struct_time from dateutil.relativedelta import relativedelta from edtf import appsettings -from edtf.convert import dt_to_struct_time, trim_struct_time, \ - TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS +from edtf.convert import ( + TIME_EMPTY_EXTRAS, + TIME_EMPTY_TIME, + dt_to_struct_time, + trim_struct_time, +) -EARLIEST = 'earliest' -LATEST = 'latest' +EARLIEST = "earliest" +LATEST = "latest" PRECISION_MILLENIUM = "millenium" PRECISION_CENTURY = "century" @@ -80,14 +86,16 @@ def apply_delta(op, time_struct, delta): # Convert result year back to its original millenium final_year = dt_result.year - millenium_diff return struct_time( - (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS)) + (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS) + ) -class EDTFObject(object): +class EDTFObject: """ Object to attact to a parser to become instantiated when the parser completes. """ + parser = None @classmethod @@ -99,9 +107,9 @@ def set_parser(cls, p): def parse_action(cls, toks): kwargs = toks.asDict() try: - return cls(**kwargs) # replace the token list with the class + return cls(**kwargs) # replace the token list with the class except Exception as e: - print("trying to %s.__init__(**%s)" % (cls.__name__, kwargs)) + print(f"trying to {cls.__name__}.__init__(**{kwargs})") raise e @classmethod @@ -109,14 +117,11 @@ def parse(cls, s): return cls.parser.parseString(s)[0] def __repr__(self): - return "%s: '%s'" % (type(self).__name__, str(self)) + return f"{type(self).__name__}: '{str(self)}'" def __init__(self, *args, **kwargs): - str = "%s.__init__(*%s, **%s)" % ( - type(self).__name__, - args, kwargs, - ) - raise NotImplementedError("%s is not implemented." % str) + str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" + raise NotImplementedError(f"{str} is not implemented.") def __str__(self): raise NotImplementedError @@ -137,25 +142,30 @@ def _get_fuzzy_padding(self, lean): return relativedelta(0) def get_is_approximate(self): - return getattr(self, '_is_approximate', False) + return getattr(self, "_is_approximate", False) def set_is_approximate(self, val): self._is_approximate = val + is_approximate = property(get_is_approximate, set_is_approximate) def get_is_uncertain(self): - return getattr(self, '_is_uncertain', False) + return getattr(self, "_is_uncertain", False) def set_is_uncertain(self, val): self._is_uncertain = val + is_uncertain = property(get_is_uncertain, set_is_uncertain) def get_is_uncertain_and_approximate(self): - return getattr(self, '_uncertain_and_approximate', False) + return getattr(self, "_uncertain_and_approximate", False) def set_is_uncertain_and_approximate(self, val): self._uncertain_and_approximate = val - is_uncertain_and_approximate = property(get_is_uncertain_and_approximate, set_is_uncertain_and_approximate) + + is_uncertain_and_approximate = property( + get_is_uncertain_and_approximate, set_is_uncertain_and_approximate + ) def lower_fuzzy(self): strict_val = self.lower_strict() @@ -190,7 +200,9 @@ def __gt__(self, other): return self.lower_strict() > dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() > trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __ge__(self, other): if isinstance(other, EDTFObject): @@ -199,7 +211,9 @@ def __ge__(self, other): return self.lower_strict() >= dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() >= trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __lt__(self, other): if isinstance(other, EDTFObject): @@ -208,7 +222,9 @@ def __lt__(self, other): return self.lower_strict() < dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() < trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __le__(self, other): if isinstance(other, EDTFObject): @@ -217,13 +233,15 @@ def __le__(self, other): return self.lower_strict() <= dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() <= trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) # (* ************************** Level 0 *************************** *) -class Date(EDTFObject): +class Date(EDTFObject): def set_year(self, y): if y is None: raise AttributeError("Year must not be None") @@ -231,33 +249,35 @@ def set_year(self, y): def get_year(self): return self._year + year = property(get_year, set_year) def set_month(self, m): self._month = m - if m == None: + if m is None: self.day = None def get_month(self): return self._month + month = property(get_month, set_month) def __init__(self, year=None, month=None, day=None, **kwargs): - for param in ('date', 'lower', 'upper'): + for param in ("date", "lower", "upper"): if param in kwargs: self.__init__(**kwargs[param]) return - self.year = year # Year is required, but sometimes passed in as a 'date' dict. + self.year = year # Year is required, but sometimes passed in as a 'date' dict. self.month = month self.day = day def __str__(self): r = self.year if self.month: - r += "-%s" % self.month + r += f"-{self.month}" if self.day: - r += "-%s" % self.day + r += f"-{self.day}" return r def isoformat(self, default=date.max): @@ -270,16 +290,18 @@ def isoformat(self, default=date.max): def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: - return int(re.sub(r'X', r'0', self.year)) + return int(re.sub(r"X", r"0", self.year)) else: - return int(re.sub(r'X', r'9', self.year)) + return int(re.sub(r"X", r"9", self.year)) def _precise_month(self, lean): if self.month and self.month != "XX": try: return int(self.month) - except ValueError as e: - raise ValueError("Couldn't convert %s to int (in %s)" % (self.month, self)) + except ValueError as err: + raise ValueError( + f"Couldn't convert {self.month} to int (in {self})" + ) from err else: return 1 if lean == EARLIEST else 12 @@ -303,7 +325,9 @@ def _strict_date(self, lean): self._precise_year(lean), self._precise_month(lean), self._precise_day(lean), - ) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) + ) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) ) @property @@ -334,14 +358,14 @@ def __eq__(self, other): return self.isoformat() == other.isoformat() elif isinstance(other, struct_time): return self._strict_date() == trim_struct_time(other) - return super(DateAndTime, self).__eq__(other) + return super().__eq__(other) def __ne__(self, other): if isinstance(other, datetime): return self.isoformat() != other.isoformat() elif isinstance(other, struct_time): return self._strict_date() != trim_struct_time(other) - return super(DateAndTime, self).__ne__(other) + return super().__ne__(other) class Interval(EDTFObject): @@ -350,7 +374,7 @@ def __init__(self, lower, upper): self.upper = upper def __str__(self): - return "%s/%s" % (self.lower, self.upper) + return f"{self.lower}/{self.upper}" def _strict_date(self, lean): if lean == EARLIEST: @@ -359,7 +383,9 @@ def _strict_date(self, lean): if r is None: raise AttributeError return r - except AttributeError: # it's a string, or no date. Result depends on the upper date + except ( + AttributeError + ): # it's a string, or no date. Result depends on the upper date upper = self.upper._strict_date(LATEST) return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: @@ -368,8 +394,12 @@ def _strict_date(self, lean): if r is None: raise AttributeError return r - except AttributeError: # an 'unknown' or 'open' string - depends on the lower date - import pdb; pdb.set_trace() + except ( + AttributeError + ): # an 'unknown' or 'open' string - depends on the lower date + # import pdb + + # pdb.set_trace() if self.upper and (self.upper == "open" or self.upper.date == "open"): return dt_to_struct_time(date.today()) # it's still happening else: @@ -420,16 +450,16 @@ def __init__(self, date, ua): def __str__(self): if self.ua: - return "%s%s" % (self.date, self.ua) + return f"{self.date}{self.ua}" else: return str(self.date) def _strict_date(self, lean): if self.date == "open": - return None # depends on the other date + return None # depends on the other date return dt_to_struct_time(date.today()) - if self.date =="unknown": - return None # depends on the other date + if self.date == "unknown": + return None # depends on the other date return self.date._strict_date(lean) def _get_fuzzy_padding(self, lean): @@ -445,7 +475,6 @@ def _get_fuzzy_padding(self, lean): return multiplier * appsettings.PADDING_YEAR_PRECISION - class Testi(EDTFObject): # @classmethod # def parse_action(cls, toks): @@ -455,8 +484,8 @@ class Testi(EDTFObject): def __init__(self, **args): print(args) -class UnspecifiedIntervalSection(EDTFObject): +class UnspecifiedIntervalSection(EDTFObject): def __init__(self, sectionOpen=False, other_section_element=None): if sectionOpen: self.is_open = True @@ -473,13 +502,15 @@ def __str__(self): return ".." def _strict_date(self, lean): - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() if lean == EARLIEST: if self.is_unknown: upper = self.other._strict_date(LATEST) return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: - return dt_to_struct_time(date.min) # from the beginning of time; *ahem, i mean python datetime + return dt_to_struct_time( + date.min + ) # from the beginning of time; *ahem, i mean python datetime else: if self.is_unknown: lower = self.other._strict_date(EARLIEST) @@ -494,21 +525,29 @@ class Unspecified(Date): class Level1Interval(Interval): def __init__(self, lower=None, upper=None): - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() if lower: - if lower['date'] == '..': - self.lower = UnspecifiedIntervalSection(True, UncertainOrApproximate(**upper)) + if lower["date"] == "..": + self.lower = UnspecifiedIntervalSection( + True, UncertainOrApproximate(**upper) + ) else: self.lower = UncertainOrApproximate(**lower) else: - self.lower = UnspecifiedIntervalSection(False, UncertainOrApproximate(**upper)) + self.lower = UnspecifiedIntervalSection( + False, UncertainOrApproximate(**upper) + ) if upper: - if upper['date'] == '..': - self.upper = UnspecifiedIntervalSection(True, UncertainOrApproximate(**lower)) + if upper["date"] == "..": + self.upper = UnspecifiedIntervalSection( + True, UncertainOrApproximate(**lower) + ) else: self.upper = UncertainOrApproximate(**upper) else: - self.upper = UnspecifiedIntervalSection(False, UncertainOrApproximate(**lower)) + self.upper = UnspecifiedIntervalSection( + False, UncertainOrApproximate(**lower) + ) def _get_fuzzy_padding(self, lean): if lean == EARLIEST: @@ -522,7 +561,7 @@ def __init__(self, year): self.year = year def __str__(self): - return "Y%s" % self.year + return f"Y{self.year}" def _precise_year(self): return int(self.year) @@ -530,23 +569,21 @@ def _precise_year(self): def _strict_date(self, lean): py = self._precise_year() if lean == EARLIEST: - return struct_time( - [py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) else: - return struct_time( - [py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) class Season(Date): def __init__(self, year, season, **kwargs): self.year = year - self.season = season # use season to look up month + self.season = season # use season to look up month # day isn't part of the 'season' spec, but it helps the inherited # `Date` methods do their thing. self.day = None def __str__(self): - return "%s-%s" % (self.year, self.season) + return f"{self.year}-{self.season}" def _precise_month(self, lean): rng = appsettings.SEASON_MONTHS_RANGE[int(self.season)] @@ -560,16 +597,24 @@ def _precise_month(self, lean): class PartialUncertainOrApproximate(Date): - - def set_year(self, y): # Year can be None. + def set_year(self, y): # Year can be None. self._year = y + year = property(Date.get_year, set_year) def __init__( - self, year=None, month=None, day=None, - year_ua=False, month_ua = False, day_ua = False, - year_month_ua = False, month_day_ua = False, - ssn=None, season_ua=False, all_ua=False + self, + year=None, + month=None, + day=None, + year_ua=False, + month_ua=False, + day_ua=False, + year_month_ua=False, + month_day_ua=False, + ssn=None, + season_ua=False, + all_ua=False, ): self.year = year self.month = month @@ -588,64 +633,48 @@ def __init__( self.all_ua = all_ua def __str__(self): - if self.season_ua: - return "%s%s" % (self.season, self.season_ua) + return f"{self.season}{self.season_ua}" - if self.year_ua: - y = "%s%s" % (self.year, self.year_ua) - else: - y = str(self.year) + y = f"{self.year}{self.year_ua}" if self.year_ua else str(self.year) - if self.month_ua: - m = "(%s)%s" % (self.month, self.month_ua) - else: - m = str(self.month) + m = f"({self.month}){self.month_ua}" if self.month_ua else str(self.month) if self.day: - if self.day_ua: - d = "(%s)%s" % (self.day, self.day_ua) - else: - d = str(self.day) + d = f"({self.day}){self.day_ua}" if self.day_ua else str(self.day) else: d = None - if self.year_month_ua: # year/month approximate. No brackets needed. - ym = "%s-%s%s" % (y, m, self.year_month_ua) - if d: - result = "%s-%s" % (ym, d) - else: - result = ym + if self.year_month_ua: # year/month approximate. No brackets needed. + ym = f"{y}-{m}{self.year_month_ua}" + result = f"{ym}-{d}" if d else ym elif self.month_day_ua: - if self.year_ua: # we don't need the brackets round month and day - result = "%s-%s-%s%s" % (y, m, d, self.month_day_ua) + if self.year_ua: # we don't need the brackets round month and day + result = f"{y}-{m}-{d}{self.month_day_ua}" else: - result = "%s-(%s-%s)%s" % (y, m, d, self.month_day_ua) + result = f"{y}-({m}-{d}){self.month_day_ua}" else: - if d: - result = "%s-%s-%s" % (y, m, d) - else: - result = "%s-%s" % (y, m) + result = f"{y}-{m}-{d}" if d else f"{y}-{m}" if self.all_ua: - result = "(%s)%s" % (result, self.all_ua) + result = f"({result}){self.all_ua}" return result def _precise_year(self, lean): if self.season: return self.season._precise_year(lean) - return super(PartialUncertainOrApproximate, self)._precise_year(lean) + return super()._precise_year(lean) def _precise_month(self, lean): if self.season: return self.season._precise_month(lean) - return super(PartialUncertainOrApproximate, self)._precise_month(lean) + return super()._precise_month(lean) def _precise_day(self, lean): if self.season: return self.season._precise_day(lean) - return super(PartialUncertainOrApproximate, self)._precise_day(lean) + return super()._precise_day(lean) def _get_fuzzy_padding(self, lean): """ @@ -656,21 +685,38 @@ def _get_fuzzy_padding(self, lean): result = relativedelta(0) if self.year_ua: - result += appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + result += ( + appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + ) if self.month_ua: - result += appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() + result += ( + appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() + ) if self.day_ua: result += appsettings.PADDING_DAY_PRECISION * self.day_ua._get_multiplier() if self.year_month_ua: - result += appsettings.PADDING_YEAR_PRECISION * self.year_month_ua._get_multiplier() - result += appsettings.PADDING_MONTH_PRECISION * self.year_month_ua._get_multiplier() + result += ( + appsettings.PADDING_YEAR_PRECISION + * self.year_month_ua._get_multiplier() + ) + result += ( + appsettings.PADDING_MONTH_PRECISION + * self.year_month_ua._get_multiplier() + ) if self.month_day_ua: - result += appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() - result += appsettings.PADDING_MONTH_PRECISION * self.month_day_ua._get_multiplier() + result += ( + appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() + ) + result += ( + appsettings.PADDING_MONTH_PRECISION + * self.month_day_ua._get_multiplier() + ) if self.season_ua: - result += appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() + result += ( + appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() + ) if self.all_ua: multiplier = self.all_ua._get_multiplier() @@ -706,7 +752,7 @@ def __init__(self, lower=None, upper=None): self.upper = upper def __str__(self): - return "%s..%s" % (self.lower or '', self.upper or '') + return "{}..{}".format(self.lower or "", self.upper or "") class EarlierConsecutives(Consecutives): @@ -727,7 +773,7 @@ def __init__(self, *args): self.objects = args def __str__(self): - return "[%s]" % (", ".join([str(o) for o in self.objects])) + return "[{}]".format(", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): if lean == LATEST: @@ -746,7 +792,7 @@ def __init__(self, *args): self.objects = args def __str__(self): - return "{%s}" % (", ".join([str(o) for o in self.objects])) + return "{{{}}}".format(", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): if lean == LATEST: @@ -786,7 +832,8 @@ def _precise_year(self): def get_year(self): if self.precision: - return '%sE%sS%s' % (self.base, self.exponent, self.precision) + return f"{self.base}E{self.exponent}S{self.precision}" else: - return '%sE%s' % (self.base, self.exponent) + return f"{self.base}E{self.exponent}" + year = property(get_year) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 877fd0b..52248f0 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -1,10 +1,13 @@ -import pytest +# ruff: noqa: S101 # Asserts are ok in tests + from datetime import date from time import struct_time -from edtf.parser.grammar import parse_edtf as parse -from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS +import pytest + from edtf.parser.edtf_exceptions import EDTFParseException +from edtf.parser.grammar import parse_edtf as parse +from edtf.parser.parser_classes import TIME_EMPTY_EXTRAS, TIME_EMPTY_TIME, EDTFObject # Example object types and attributes represented as tuples. # The first item in each tuple is the input EDTF string, and expected parse result. @@ -23,206 +26,209 @@ EXAMPLES = ( # ******************************* LEVEL 0 ********************************* # year, month, day - ('2001-02-03', ('2001-02-03',)), + ("2001-02-03", ("2001-02-03",)), # year, month - ('2008-12', ('2008-12-01', '2008-12-31')), + ("2008-12", ("2008-12-01", "2008-12-31")), # year - ('2008', ('2008-01-01', '2008-12-31')), + ("2008", ("2008-01-01", "2008-12-31")), # a negative year - ('-0999', ('-0999-01-01', '-0999-12-31')), + ("-0999", ("-0999-01-01", "-0999-12-31")), # year zero - ('0000', ('0000-01-01', '0000-12-31')), + ("0000", ("0000-01-01", "0000-12-31")), # DateTimes - ('2001-02-03T09:30:01', ('2001-02-03',)), - ('2004-01-01T10:10:10Z', ('2004-01-01',)), - ('2004-01-01T10:10:10+05:00', ('2004-01-01',)), - ('1985-04-12T23:20:30', ('1985-04-12',)), + ("2001-02-03T09:30:01", ("2001-02-03",)), + ("2004-01-01T10:10:10Z", ("2004-01-01",)), + ("2004-01-01T10:10:10+05:00", ("2004-01-01",)), + ("1985-04-12T23:20:30", ("1985-04-12",)), # Intervals # An interval beginning sometime in 1964 and ending sometime in 2008. Year precision. - ('1964/2008', ('1964-01-01', '2008-12-31')), + ("1964/2008", ("1964-01-01", "2008-12-31")), # An interval beginning sometime in June 2004 and ending sometime in August of 2006. Month precision. - ('2004-06/2006-08', ('2004-06-01', '2006-08-31')), + ("2004-06/2006-08", ("2004-06-01", "2006-08-31")), # An interval beginning sometime on February 1, 2004 and ending sometime on February 8, 2005. Day precision. - ('2004-02-01/2005-02-08', ('2004-02-01', '2005-02-08')), + ("2004-02-01/2005-02-08", ("2004-02-01", "2005-02-08")), # An interval beginning sometime on February 1, 2004 and ending sometime in February 2005. # The precision of the interval is not defined; the start endpoint has day precision and the end endpoint has month precision. - ('2004-02-01/2005-02', ('2004-02-01', '2005-02-28')), + ("2004-02-01/2005-02", ("2004-02-01", "2005-02-28")), # An interval beginning sometime on February 1, 2004 and ending sometime in 2005. # The start endpoint has day precision and the end endpoint has year precision. - ('2004-02-01/2005', ('2004-02-01', '2005-12-31')), + ("2004-02-01/2005", ("2004-02-01", "2005-12-31")), # An interval beginning sometime in 2005 and ending sometime in February 2006. - ('2005/2006-02', ('2005-01-01', '2006-02-28')), + ("2005/2006-02", ("2005-01-01", "2006-02-28")), # An interval beginning sometime in -2005 and ending sometime in February -2004. - ('-2005/-1999-02', ('-2005-01-01', '-1999-02-28')), - + ("-2005/-1999-02", ("-2005-01-01", "-1999-02-28")), # ******************************* LEVEL 1 ********************************* - # Uncertain/Approximate + # Uncertain/Approximate # uncertain: possibly the year 1984, but not definitely - ('1984?', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), - ('2004-06-11?', ('2004-06-11', '2004-06-11', '2004-06-10', '2004-06-12')), - ('2004-06?', ('2004-06-01', '2004-06-30', '2004-05-01', '2004-07-30')), + ("1984?", ("1984-01-01", "1984-12-31", "1983-01-01", "1985-12-31")), + ("2004-06-11?", ("2004-06-11", "2004-06-11", "2004-06-10", "2004-06-12")), + ("2004-06?", ("2004-06-01", "2004-06-30", "2004-05-01", "2004-07-30")), # "approximately" the year 1984 - ('1984~', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), + ("1984~", ("1984-01-01", "1984-12-31", "1983-01-01", "1985-12-31")), # the year is approximately 1984 and even that is uncertain - ('1984%', ('1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31')), + ("1984%", ("1984-01-01", "1984-12-31", "1982-01-01", "1986-12-31")), # Unspecified # some unspecified year in the 1990s. - ('199X', ('1990-01-01', '1999-12-31')), + ("199X", ("1990-01-01", "1999-12-31")), # some unspecified year in the 1900s. - ('19XX', ('1900-01-01', '1999-12-31')), + ("19XX", ("1900-01-01", "1999-12-31")), # some month in 1999 - ('1999-XX', ('1999-01-01', '1999-12-31')), + ("1999-XX", ("1999-01-01", "1999-12-31")), # some day in January 1999 - ('1999-01-XX', ('1999-01-01', '1999-01-31')), + ("1999-01-XX", ("1999-01-01", "1999-01-31")), # some day in 1999 - ('1999-XX-XX', ('1999-01-01', '1999-12-31')), - + ("1999-XX-XX", ("1999-01-01", "1999-12-31")), # Uncertain/Approximate lower boundary dates (BCE) - ('-0275~', ('-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31')), - ('-0001~', ('-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31')), - ('0000~', ('0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31')), - + ("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")), + ("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")), + ("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")), # L1 Extended Interval # beginning unknown, end 2006 - ('/2006', ('1996-12-31', '2006-12-31')), + ("/2006", ("1996-12-31", "2006-12-31")), # beginning June 1, 2004, end unknown - ('2004-06-01/', ('2004-06-01', '2014-06-01')), + ("2004-06-01/", ("2004-06-01", "2014-06-01")), # beginning open, end 2006 - ('../2006', ('-inf', '2006-12-31')), + ("../2006", ("-inf", "2006-12-31")), # beginning January 1, 2004 with no end date - ('2004-01-01/..', ('2004-01-01', 'inf')), + ("2004-01-01/..", ("2004-01-01", "inf")), # interval beginning approximately 1984 and ending June 2004 - ('1984~/2004-06', ('1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30')), + ("1984~/2004-06", ("1984-01-01", "2004-06-30", "1983-01-01", "2004-06-30")), # interval beginning 1984 and ending approximately June 2004 - ('1984/2004-06~', ('1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30')), - ('1984?/2004%', ('1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31')), - ('1984~/2004~', ('1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31')), + ("1984/2004-06~", ("1984-01-01", "2004-06-30", "1984-01-01", "2004-07-30")), + ("1984?/2004%", ("1984-01-01", "2004-12-31", "1983-01-01", "2006-12-31")), + ("1984~/2004~", ("1984-01-01", "2004-12-31", "1983-01-01", "2005-12-31")), # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 - ('1984-06?/2004-08?', ('1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30')), - ('1984-06-02?/2004-08-08~', ('1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09')), - ('1984-06-02?/', ('1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02')), + ("1984-06?/2004-08?", ("1984-06-01", "2004-08-31", "1984-05-01", "2004-09-30")), + ( + "1984-06-02?/2004-08-08~", + ("1984-06-02", "2004-08-08", "1984-06-01", "2004-08-09"), + ), + ("1984-06-02?/", ("1984-06-02", "1994-06-02", "1984-06-01", "1994-06-02")), # Year exceeding 4 digits - ('Y170000002', ('170000002-01-01', '170000002-12-31')), - ('Y-170000002', ('-170000002-01-01', '-170000002-12-31')), + ("Y170000002", ("170000002-01-01", "170000002-12-31")), + ("Y-170000002", ("-170000002-01-01", "-170000002-12-31")), # Seasons - ('2001-21', ('2001-03-01', '2001-05-31')), - ('2003-22', ('2003-06-01', '2003-08-31')), - ('2000-23', ('2000-09-01', '2000-11-30')), - ('2010-24', ('2010-12-01', '2010-12-31')), - + ("2001-21", ("2001-03-01", "2001-05-31")), + ("2003-22", ("2003-06-01", "2003-08-31")), + ("2000-23", ("2000-09-01", "2000-11-30")), + ("2010-24", ("2010-12-01", "2010-12-31")), # ******************************* LEVEL 2 ********************************* # Partial Uncertain/Approximate # uncertain year; month, day known - ('2004?-06-11', ('2004-06-11', '2003-06-11', '2005-06-11')), + ("2004?-06-11", ("2004-06-11", "2003-06-11", "2005-06-11")), # year and month are approximate; day known - ('2004-06~-11', ('2004-06-11', '2003-05-11', '2005-07-11')), + ("2004-06~-11", ("2004-06-11", "2003-05-11", "2005-07-11")), # uncertain month, year and day known - ('2004-?06-11', ('2004-06-11', '2004-05-11', '2004-07-11')), + ("2004-?06-11", ("2004-06-11", "2004-05-11", "2004-07-11")), # day is approximate; year, month known - ('2004-06-~11', ('2004-06-11', '2004-06-10', '2004-06-12')), + ("2004-06-~11", ("2004-06-11", "2004-06-10", "2004-06-12")), # Year known, month within year is approximate and uncertain - NEW SPEC - ('2004-%06', ('2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30')), + ("2004-%06", ("2004-06-01", "2004-06-30", "2004-04-01", "2004-08-30")), # Year known, month and day uncertain - NEW SPEC - ('2004-?06-?11', ('2004-06-11', '2004-05-10', '2004-07-12')), + ("2004-?06-?11", ("2004-06-11", "2004-05-10", "2004-07-12")), # Year uncertain, month known, day approximate - NEW SPEC - ('2004?-06-~11', ('2004-06-11', '2003-06-10', '2005-06-12')), + ("2004?-06-~11", ("2004-06-11", "2003-06-10", "2005-06-12")), # Year uncertain and month is both uncertain and approximate - NEW SPEC - ('?2004-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), + ("?2004-%06", ("2004-06-01", "2004-06-30", "2003-04-01", "2005-08-30")), # This has the same meaning as the previous example.- NEW SPEC - ('2004?-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), + ("2004?-%06", ("2004-06-01", "2004-06-30", "2003-04-01", "2005-08-30")), # Year uncertain, month and day approximate. - NEW SPEC - ('2004?-~06-~04', ('2004-06-04', '2003-05-03', '2005-07-05')), + ("2004?-~06-~04", ("2004-06-04", "2003-05-03", "2005-07-05")), # Year known, month and day approximate. - NEW SPEC - ('2011-~06-~04', ('2011-06-04', '2011-05-03', '2011-07-05')), + ("2011-~06-~04", ("2011-06-04", "2011-05-03", "2011-07-05")), # Partial unspecified # December 25 sometime during the 1560s - ('156X-12-25', ('1560-12-25', '1569-12-25')), + ("156X-12-25", ("1560-12-25", "1569-12-25")), # December 25 sometime during the 1500s - ('15XX-12-25', ('1500-12-25', '1599-12-25')), + ("15XX-12-25", ("1500-12-25", "1599-12-25")), # Year and day of month specified, month unspecified - ('1560-XX-25', ('1560-01-25', '1560-12-25')), - ('15XX-12-XX', ('1500-12-01', '1599-12-31')), + ("1560-XX-25", ("1560-01-25", "1560-12-25")), + ("15XX-12-XX", ("1500-12-01", "1599-12-31")), # Day specified, year and month unspecified - ('XXXX-XX-23', ('0000-01-23', '9999-12-23')), - + ("XXXX-XX-23", ("0000-01-23", "9999-12-23")), # One of a Set # One of the years 1667, 1668, 1670, 1671, 1672 - ('[1667, 1668, 1670..1672]', ('1667-01-01', '1672-12-31')), + ("[1667, 1668, 1670..1672]", ("1667-01-01", "1672-12-31")), # December 3, 1760 or some earlier date - ('[..1760-12-03]', ('-inf', '1760-12-03')), + ("[..1760-12-03]", ("-inf", "1760-12-03")), # December 1760 or some later month - ('[1760-12..]', ('1760-12-01', 'inf')), + ("[1760-12..]", ("1760-12-01", "inf")), # January or February of 1760 or December 1760 or some later month # This test is failing due to a code issue: # TypeError: '>' not supported between instances of 'float' and 'time.struct_time' - ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), #TODO fix in parser_classes + ( + "[1760-01, 1760-02, 1760-12..]", + ("1760-01-01", "inf"), + ), # TODO fix in parser_classes # Either the year 1667 or the month December of 1760. - ('[1667, 1760-12]', ('1667-01-01', '1760-12-31')), + ("[1667, 1760-12]", ("1667-01-01", "1760-12-31")), # Multiple Dates # All of the years 1667, 1668, 1670, 1671, 1672 - ('{1667,1668, 1670..1672}', ('1667-01-01', '1672-12-31')), + ("{1667,1668, 1670..1672}", ("1667-01-01", "1672-12-31")), # The year 1960 and the month December of 1961. - ('{1960, 1961-12}', ('1960-01-01', '1961-12-31')), - + ("{1960, 1961-12}", ("1960-01-01", "1961-12-31")), # Masked Precision --> eliminated # A date during the 1960s - #('196x', '1960-01-01', '1969-12-31'), + # ('196x', '1960-01-01', '1969-12-31'), # A date during the 1900s - #('19xx', '1900-01-01', '1999-12-31'), - + # ('19xx', '1900-01-01', '1999-12-31'), # L2 Extended Interval # Interval with fuzzy day endpoints in June 2004 - ('2004-06-~01/2004-06-~20', ('2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21')), + ( + "2004-06-~01/2004-06-~20", + ("2004-06-01", "2004-06-20", "2004-05-31", "2004-06-21"), + ), # The interval began on an unspecified day in June 2004. - ('2004-06-XX/2004-07-03', ('2004-06-01', '2004-07-03')), + ("2004-06-XX/2004-07-03", ("2004-06-01", "2004-07-03")), # Year Requiring More than Four Digits - Exponential Form # the year 170000000 - ('Y17E7', ('170000000-01-01', '170000000-12-31')), + ("Y17E7", ("170000000-01-01", "170000000-12-31")), # the year -170000000 - ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), + ("Y-17E7", ("-170000000-01-01", "-170000000-12-31")), # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), # L2 Seasons # Spring southern hemisphere, 2001 - ('2001-29', ('2001-09-01', '2001-11-30')), + ("2001-29", ("2001-09-01", "2001-11-30")), # second quarter of 2001 - ('2001-34', ('2001-04-01', '2001-06-30')), + ("2001-34", ("2001-04-01", "2001-06-30")), ) BAD_EXAMPLES = ( None, - '', - 'not a edtf string', - 'Y17E7-12-26', # Y indicates that the date is year only - '2016-13-08', # wrong day order - '2016-02-39', # out of range - '-0000-01-01', # negative zero year - '2004-(06)?-11', # uncertain month, year and day known - OLD SPEC - '2004-06-(11)~', # day is approximate; year, month known - OLD SPEC - '2004-(06)%', # Year known, month within year is approximate and uncertain - OLD SPEC - '2004-(06-11)?', # Year known, month and day uncertain - OLD SPEC - '2004?-06-(11)~', # Year uncertain, month known, day approximate - OLD SPEC - '(2004-(06)~)?', # Year uncertain and month is both uncertain and approximate - OLD SPEC - '(2004)?-06-04~', # Year uncertain, month and day approximate.- OLD SPEC - '(2011)-06-04~', # Year known, month and day approximate. Note that this has the same meaning as the following.- OLD SPEC - '2011-(06-04)~', # Year known, month and day approximate.- OLD SPEC - '2004-06-(01)~/2004-06-(20)~', # An interval in June 2004 beginning approximately the first and ending approximately the 20th - OLD SPEC + "", + "not a edtf string", + "Y17E7-12-26", # Y indicates that the date is year only + "2016-13-08", # wrong day order + "2016-02-39", # out of range + "-0000-01-01", # negative zero year + "2004-(06)?-11", # uncertain month, year and day known - OLD SPEC + "2004-06-(11)~", # day is approximate; year, month known - OLD SPEC + "2004-(06)%", # Year known, month within year is approximate and uncertain - OLD SPEC + "2004-(06-11)?", # Year known, month and day uncertain - OLD SPEC + "2004?-06-(11)~", # Year uncertain, month known, day approximate - OLD SPEC + "(2004-(06)~)?", # Year uncertain and month is both uncertain and approximate - OLD SPEC + "(2004)?-06-04~", # Year uncertain, month and day approximate.- OLD SPEC + "(2011)-06-04~", # Year known, month and day approximate. Note that this has the same meaning as the following.- OLD SPEC + "2011-(06-04)~", # Year known, month and day approximate.- OLD SPEC + "2004-06-(01)~/2004-06-(20)~", # An interval in June 2004 beginning approximately the first and ending approximately the 20th - OLD SPEC ) + def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings or infinities to time structs or float infinities. """ - if iso_date == 'inf': - return float('inf') - elif iso_date == '-inf': - return float('-inf') + """Convert YYYY-mm-dd date strings or infinities to time structs or float infinities.""" + if iso_date == "inf": + return float("inf") + elif iso_date == "-inf": + return float("-inf") - if iso_date[0] == '-': + if iso_date[0] == "-": is_negative = True iso_date = iso_date[1:] else: is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] + y, mo, d = (int(i) for i in iso_date.split("-")) if is_negative: y *= -1 return struct_time([y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) @@ -230,18 +236,20 @@ def iso_to_struct_time(iso_date): @pytest.mark.parametrize("test_input,expected_tuple", EXAMPLES) def test_edtf_examples(test_input, expected_tuple): - """ Test parsing of EDTF strings with expected outputs. """ + """Test parsing of EDTF strings with expected outputs.""" result = parse(test_input) assert isinstance(result, EDTFObject), "Result should be an instance of EDTFObject" # Extract only the date part if the result includes a time. result_date = str(result) - if 'T' in result_date: - result_date = result_date.split('T')[0] + if "T" in result_date: + result_date = result_date.split("T")[0] # Unpack expected results based on their count if len(expected_tuple) == 1: - assert result_date == expected_tuple[0], f"Expected {expected_tuple[0]}, got {result_date}" + assert ( + result_date == expected_tuple[0] + ), f"Expected {expected_tuple[0]}, got {result_date}" elif len(expected_tuple) == 2: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) @@ -268,13 +276,13 @@ def test_edtf_examples(test_input, expected_tuple): @pytest.mark.parametrize("bad_input", BAD_EXAMPLES) def test_non_parsing(bad_input): - """ Test that non-parsing inputs correctly raise an exception. """ + """Test that non-parsing inputs correctly raise an exception.""" with pytest.raises(EDTFParseException): parse(bad_input) def test_comparisons(): - """ Test comparisons between parsed EDTF objects and standard dates. """ + """Test comparisons between parsed EDTF objects and standard dates.""" d1 = parse("1979-08~") d2 = parse("1979-08~") d3 = parse("1979-09-16") diff --git a/edtf/tests.py b/edtf/tests.py index f5ef655..9812b65 100644 --- a/edtf/tests.py +++ b/edtf/tests.py @@ -1,14 +1,18 @@ +# ruff: noqa: S101 # Asserts are ok in tests + +from datetime import date, datetime from time import struct_time -from datetime import datetime, date from edtf import convert + def test_dt_to_struct_time_for_datetime(): now = datetime.now() st = convert.dt_to_struct_time(now) assert st[:6] == now.timetuple()[:6] assert st[6:] == (0, 0, -1) + def test_dt_to_struct_time_for_date(): today = date.today() st = convert.dt_to_struct_time(today) @@ -16,11 +20,15 @@ def test_dt_to_struct_time_for_date(): assert st[3:6] == (0, 0, 0) assert st[6:] == (0, 0, -1) + def test_struct_time_to_date(): - st = struct_time([2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) + st = struct_time( + [2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS + ) d = date(*st[:3]) assert d == convert.struct_time_to_date(st) + def test_struct_time_to_datetime(): st = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) dt = datetime(*st[:6]) @@ -28,14 +36,23 @@ def test_struct_time_to_datetime(): assert dt == converted_dt assert converted_dt.timetuple()[6:] == (3, 109, -1) + def test_trim_struct_time(): now = datetime.now() st = now.timetuple() trimmed_st = convert.trim_struct_time(st) - assert trimmed_st[:6] == (now.year, now.month, now.day, now.hour, now.minute, now.second) + assert trimmed_st[:6] == ( + now.year, + now.month, + now.day, + now.hour, + now.minute, + now.second, + ) assert trimmed_st[6:] == (0, 0, -1) assert st[6:] != (0, 0, -1) + def test_struct_time_to_jd(): st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) jd_ad = 2458227.9263194446 @@ -44,6 +61,7 @@ def test_struct_time_to_jd(): jd_bc = 984091.9263194444 assert jd_bc == convert.struct_time_to_jd(st_bc) + def test_jd_to_struct_time(): jd_ad = 2458227.9263194446 st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) @@ -52,27 +70,32 @@ def test_jd_to_struct_time(): st_bc = struct_time([-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) assert st_bc == convert.jd_to_struct_time(jd_bc) + def test_jd_round_trip_for_extreme_future(): original_st = struct_time([999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) jd = convert.struct_time_to_jd(original_st) converted_st = convert.jd_to_struct_time(jd) assert original_st[:5] == converted_st[:5] - assert 3 - 1 == converted_st[5] + assert converted_st[5] == 3 - 1 + def test_jd_round_trip_for_extreme_past(): original_st = struct_time([-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) - assert (-999999 + 1, 8, 4, 21, 15, 3, 0, 0, -1) == tuple(converted_st) + assert tuple(converted_st) == (-999999 + 1, 8, 4, 21, 15, 3, 0, 0, -1) + def test_jd_round_trip_for_zero_year_aka_1_bc(): original_st = struct_time([0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) - assert (0, 9, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + assert tuple(converted_st) == (0, 9, 5, 4, 58, 59, 0, 0, -1) + def test_jd_round_trip_for_2_bc(): original_st = struct_time([-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) - assert (-1, 12, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + assert tuple(converted_st) == (-1, 12, 5, 4, 58, 59, 0, 0, -1) + def test_roll_negative_time_fields(): year = -100 @@ -81,4 +104,6 @@ def test_roll_negative_time_fields(): hour = -25 minute = -74 second = -253 - assert (-102, 5, 24, 21, 41, 47) == convert._roll_negative_time_fields(year, month, day, hour, minute, second) + assert convert._roll_negative_time_fields( + year, month, day, hour, minute, second + ) == (-102, 5, 24, 21, 41, 47) diff --git a/edtf_django_tests/edtf_django_tests/settings.py b/edtf_django_tests/edtf_django_tests/settings.py index a8121e3..bad4f60 100644 --- a/edtf_django_tests/edtf_django_tests/settings.py +++ b/edtf_django_tests/edtf_django_tests/settings.py @@ -20,7 +20,7 @@ # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = "django-insecure-zkd&%e=di9d(p@wq7vnstn+4dx7cxbxkve�*+57sks0q$=0a" +SECRET_KEY = "django-insecure-zkd&%e=di9d(p@wq7vnstn+4dx7cxbxkve�*+57sks0q$=0a" # noqa: S105 (only for testing) # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True diff --git a/edtf_django_tests/edtf_django_tests/urls.py b/edtf_django_tests/edtf_django_tests/urls.py index ceca78b..0b30a1b 100644 --- a/edtf_django_tests/edtf_django_tests/urls.py +++ b/edtf_django_tests/edtf_django_tests/urls.py @@ -14,6 +14,7 @@ 1. Import the include() function: from django.urls import include, path 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ + from django.contrib import admin from django.urls import path diff --git a/edtf_django_tests/edtf_integration/admin.py b/edtf_django_tests/edtf_integration/admin.py index 8c38f3f..846f6b4 100644 --- a/edtf_django_tests/edtf_integration/admin.py +++ b/edtf_django_tests/edtf_integration/admin.py @@ -1,3 +1 @@ -from django.contrib import admin - # Register your models here. diff --git a/edtf_django_tests/edtf_integration/models.py b/edtf_django_tests/edtf_integration/models.py index 0274d5f..f5dbcc0 100644 --- a/edtf_django_tests/edtf_integration/models.py +++ b/edtf_django_tests/edtf_integration/models.py @@ -1,4 +1,5 @@ from django.db import models + from edtf.fields import EDTFField @@ -8,7 +9,7 @@ class TestEvent(models.Model): blank=True, null=True, max_length=255, - help_text="Enter the date in natural language format (e.g., 'Approximately June 2004')." + help_text="Enter the date in natural language format (e.g., 'Approximately June 2004').", ) date_edtf_direct = models.CharField( @@ -16,7 +17,7 @@ class TestEvent(models.Model): max_length=255, blank=True, null=True, - help_text="Enter the date in EDTF format (e.g., '2004-06~')." + help_text="Enter the date in EDTF format (e.g., '2004-06~').", ) # EDTF field that parses the input from either natural language or direct EDTF string @@ -28,12 +29,12 @@ class TestEvent(models.Model): # misparses an EDTF string as a natural language string (e.g. `2020-03-15/2020-04-15` -> `2020-03-15`) date_edtf = EDTFField( "Date of creation (EDTF)", - natural_text_field='date_display', - direct_input_field='date_edtf_direct', - lower_fuzzy_field='date_earliest', - upper_fuzzy_field='date_latest', - lower_strict_field='date_sort_ascending', - upper_strict_field='date_sort_descending', + natural_text_field="date_display", + direct_input_field="date_edtf_direct", + lower_fuzzy_field="date_earliest", + upper_fuzzy_field="date_latest", + lower_strict_field="date_sort_ascending", + upper_strict_field="date_sort_descending", blank=True, null=True, ) @@ -43,3 +44,14 @@ class TestEvent(models.Model): # Computed fields for sorting date_sort_ascending = models.FloatField(blank=True, null=True) date_sort_descending = models.FloatField(blank=True, null=True) + + def __str__(self) -> str: + return ( + f"Test Event: {self.date_display=}, " + f"{self.date_edtf_direct=}, " + f"{self.date_earliest=}, " + f"{self.date_latest=}, " + f"{self.date_sort_ascending=}, " + f"{self.date_sort_descending=}, " + f"{self.date_edtf=}" + ) diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index 9385733..fbea7f6 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -1,8 +1,11 @@ from django.test import TestCase -from .models import TestEvent -from edtf.parser.grammar import parse_edtf as parse -from edtf.parser import EDTFObject + from edtf.convert import struct_time_to_jd +from edtf.parser import EDTFObject +from edtf.parser.grammar import parse_edtf as parse + +from .models import TestEvent + class TestEventModelTests(TestCase): def setUp(self): @@ -15,28 +18,25 @@ def setUp(self): self.event4 = TestEvent.objects.create(date_display="Approximately August 2018") self.event5 = TestEvent.objects.create(date_edtf_direct="2021-05-06") - def test_edtf_object_returned(self): for event in TestEvent.objects.all(): self.assertIsInstance(event.date_edtf, EDTFObject) - def test_sorting(self): - events = list(TestEvent.objects.order_by('date_sort_ascending')) + events = list(TestEvent.objects.order_by("date_sort_ascending")) self.assertEqual(events[0].date_display, "Approximately August 2018") self.assertEqual(events[1].date_edtf_direct, "2019-11") self.assertEqual(events[2].date_edtf_direct, "2020-03-15/2020-04-15") self.assertEqual(events[3].date_edtf_direct, "2021-05-06") self.assertEqual(events[4].date_edtf_direct, "2021-05-06") - events_desc = list(TestEvent.objects.order_by('-date_sort_descending')) + events_desc = list(TestEvent.objects.order_by("-date_sort_descending")) self.assertEqual(events_desc[0].date_edtf_direct, "2021-05-06") self.assertEqual(events_desc[1].date_edtf_direct, "2021-05-06") self.assertEqual(events_desc[2].date_edtf_direct, "2020-03-15/2020-04-15") self.assertEqual(events_desc[3].date_edtf_direct, "2019-11") self.assertEqual(events_desc[4].date_display, "Approximately August 2018") - def test_date_boundaries(self): event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") expected_earliest_jd = struct_time_to_jd(parse("2020-03-15").lower_strict()) @@ -46,9 +46,11 @@ def test_date_boundaries(self): expected_earliest_jd = struct_time_to_jd(parse("2021-05-06").lower_strict()) expected_latest_jd = struct_time_to_jd(parse("2021-05-06").upper_strict()) - self.assertAlmostEqual(self.event2.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual( + self.event2.date_earliest, expected_earliest_jd, places=1 + ) self.assertAlmostEqual(self.event2.date_latest, expected_latest_jd, places=1) - + event3 = TestEvent.objects.get(date_edtf_direct="2019-11") expected_earliest_jd = struct_time_to_jd(parse("2019-11").lower_strict()) expected_latest_jd = struct_time_to_jd(parse("2019-11").upper_strict()) @@ -75,13 +77,29 @@ def test_date_display(self): def test_comparison(self): # test equality of the same dates - self.assertEqual(self.event2.date_edtf, self.event5.date_edtf, "Events with the same date should be equal") + self.assertEqual( + self.event2.date_edtf, + self.event5.date_edtf, + "Events with the same date should be equal", + ) # test inequality of different dates - self.assertNotEqual(self.event1.date_edtf, self.event2.date_edtf, "Events with different dates should not be equal") + self.assertNotEqual( + self.event1.date_edtf, + self.event2.date_edtf, + "Events with different dates should not be equal", + ) # greater than - self.assertGreater(self.event2.date_edtf, self.event3.date_edtf, "2021-05-06 is greater than 2019-11") + self.assertGreater( + self.event2.date_edtf, + self.event3.date_edtf, + "2021-05-06 is greater than 2019-11", + ) # less than - self.assertLess(self.event3.date_edtf, self.event2.date_edtf, "2019-11 is less than 2021-05-06") \ No newline at end of file + self.assertLess( + self.event3.date_edtf, + self.event2.date_edtf, + "2019-11 is less than 2021-05-06", + ) diff --git a/edtf_django_tests/edtf_integration/views.py b/edtf_django_tests/edtf_integration/views.py index 91ea44a..60f00ef 100644 --- a/edtf_django_tests/edtf_integration/views.py +++ b/edtf_django_tests/edtf_integration/views.py @@ -1,3 +1 @@ -from django.shortcuts import render - # Create your views here. diff --git a/edtf_django_tests/manage.py b/edtf_django_tests/manage.py index b2d2a20..ffd375b 100755 --- a/edtf_django_tests/manage.py +++ b/edtf_django_tests/manage.py @@ -1,5 +1,6 @@ #!/usr/bin/env python """Django's command-line utility for administrative tasks.""" + import os import sys diff --git a/pyproject.toml b/pyproject.toml index 0b7a0ae..7cebc56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,8 @@ classifiers = [ [project.optional-dependencies] test = [ "django>=4.2,<5.0", - "pytest" + "pytest", + "ruff" ] [project.urls] @@ -76,3 +77,44 @@ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] addopts = "--ignore=edtf_django_tests/" + +[tool.ruff] +# Python 3.8 +target-version = "py38" + +extend-exclude = [ + '**/migrations/*', +] + +[tool.ruff.lint] +select = [ + # pycodestyle + "E", + # pycodestyle warnings + "W", + # Pyflakes + "F", + # pyupgrade + ## Flake8 plugins + "UP", + # flake8-bugbear + "B", + # flake8-comprehensions + "C", + # flake8-django + "DJ", + # flake8-bandit + "S", + # flake8-simplify + "SIM", + # isort + "I", +] + +ignore = [ + # Ignore Pycodestyle line-length warnings, (mainly long comments). + "E501", + # Ignore McCabe complexity (for now). + "C901", +] + From 61c792e7e122ad6301a171b82c36d30969938ca2 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Tue, 14 May 2024 13:31:16 +1000 Subject: [PATCH 041/102] Remove pdbs --- edtf/parser/parser_classes_tests.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/edtf/parser/parser_classes_tests.py b/edtf/parser/parser_classes_tests.py index e9d7733..857d0f6 100644 --- a/edtf/parser/parser_classes_tests.py +++ b/edtf/parser/parser_classes_tests.py @@ -397,9 +397,6 @@ def _strict_date(self, lean): except ( AttributeError ): # an 'unknown' or 'open' string - depends on the lower date - # import pdb - - # pdb.set_trace() if self.upper and (self.upper == "open" or self.upper.date == "open"): return dt_to_struct_time(date.today()) # it's still happening else: @@ -502,7 +499,6 @@ def __str__(self): return ".." def _strict_date(self, lean): - # import pdb; pdb.set_trace() if lean == EARLIEST: if self.is_unknown: upper = self.other._strict_date(LATEST) @@ -525,7 +521,6 @@ class Unspecified(Date): class Level1Interval(Interval): def __init__(self, lower=None, upper=None): - # import pdb; pdb.set_trace() if lower: if lower["date"] == "..": self.lower = UnspecifiedIntervalSection( From 17f7960b371cc2b9efb807e9cadb1e44dfb98b2a Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Tue, 14 May 2024 13:44:27 +1000 Subject: [PATCH 042/102] Don't require null=True for the CharFields #49 --- edtf/fields.py | 6 +++--- .../edtf_integration/migrations/0001_initial.py | 4 ++-- edtf_django_tests/edtf_integration/models.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index b38873b..525fef6 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -125,8 +125,8 @@ def update_values(self, instance, *args, **kwargs): # Get existing value to determine if update is needed existing_value = getattr(instance, self.attname, None) - direct_input = getattr(instance, self.direct_input_field, None) - natural_text = getattr(instance, self.natural_text_field, None) + direct_input = getattr(instance, self.direct_input_field, "") + natural_text = getattr(instance, self.natural_text_field, "") # if direct_input is provided and is different from the existing value, update the EDTF field if direct_input and ( @@ -138,7 +138,7 @@ def update_values(self, instance, *args, **kwargs): # TODO pyparsing.ParseExceptions are very noisy and dumps the whole grammar (see https://github.com/ixc/python-edtf/issues/46) # set the natural_text (display) field to the direct_input if it is not provided - if natural_text is None: + if natural_text == "": setattr(instance, self.natural_text_field, direct_input) elif natural_text: diff --git a/edtf_django_tests/edtf_integration/migrations/0001_initial.py b/edtf_django_tests/edtf_integration/migrations/0001_initial.py index 286a9de..0311290 100644 --- a/edtf_django_tests/edtf_integration/migrations/0001_initial.py +++ b/edtf_django_tests/edtf_integration/migrations/0001_initial.py @@ -28,7 +28,7 @@ class Migration(migrations.Migration): blank=True, help_text="Enter the date in natural language format (e.g., 'Approximately June 2004').", max_length=255, - null=True, + null=False, verbose_name="Date of creation (display)", ), ), @@ -38,7 +38,7 @@ class Migration(migrations.Migration): blank=True, help_text="Enter the date in EDTF format (e.g., '2004-06~').", max_length=255, - null=True, + null=False, verbose_name="Date of creation (EDTF format)", ), ), diff --git a/edtf_django_tests/edtf_integration/models.py b/edtf_django_tests/edtf_integration/models.py index f5dbcc0..5120889 100644 --- a/edtf_django_tests/edtf_integration/models.py +++ b/edtf_django_tests/edtf_integration/models.py @@ -7,7 +7,7 @@ class TestEvent(models.Model): date_display = models.CharField( "Date of creation (display)", blank=True, - null=True, + null=False, max_length=255, help_text="Enter the date in natural language format (e.g., 'Approximately June 2004').", ) @@ -16,7 +16,7 @@ class TestEvent(models.Model): "Date of creation (EDTF format)", max_length=255, blank=True, - null=True, + null=False, help_text="Enter the date in EDTF format (e.g., '2004-06~').", ) From 85bac2a019ed1d0ff4f8d68561b4a734f8ab475c Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Tue, 14 May 2024 13:44:46 +1000 Subject: [PATCH 043/102] Ruff fix 349 --- edtf/parser/parser_classes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index df19d67..2b4368a 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -785,7 +785,7 @@ def __init__(self, *args): self.objects = args def __str__(self): - return "{%s}" % (", ".join([str(o) for o in self.objects])) + return "{{{}}}".format(", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): if lean == LATEST: From e8b643357c55d51677959feb0785776b50541425 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 16 May 2024 09:46:07 -0400 Subject: [PATCH 044/102] Update year prefix in docs --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index bf3155b..9456dfa 100644 --- a/README.md +++ b/README.md @@ -124,8 +124,8 @@ Test coverage includes every example given in the spec table of features. * Years exceeding four digits: - >>> parse_edtf('y-12000') # 12000 years BCE - LongYear: 'y-12000' + >>> parse_edtf('Y-12000') # 12000 years BCE + LongYear: 'Y-12000' * Season: @@ -167,8 +167,8 @@ Test coverage includes every example given in the spec table of features. * Year requiring more than 4 digits - exponential form: - >>> parse_edtf('y-17e7') - ExponentialYear: 'y-17e7' + >>> parse_edtf('Y-17e7') + ExponentialYear: 'Y-17e7' ### Natural language representation From 77c1b6a07c4d2cc1b592f8eacb7742a43bb7ba13 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 16 May 2024 13:20:42 -0400 Subject: [PATCH 045/102] Update module imports Use immediate modules within the package, and import from the package level for integration tests. For now, ignore `pickle` security warning. --- edtf/__init__.py | 69 +++++++++++++++++++-- edtf/fields.py | 2 +- edtf/natlang/__init__.py | 2 + edtf/parser/__init__.py | 53 +++++++++++++++- edtf_django_tests/edtf_integration/tests.py | 5 +- 5 files changed, 121 insertions(+), 10 deletions(-) diff --git a/edtf/__init__.py b/edtf/__init__.py index 4d423fa..7bb2885 100644 --- a/edtf/__init__.py +++ b/edtf/__init__.py @@ -1,4 +1,31 @@ -from edtf.convert import ( +from edtf.natlang import text_to_edtf +from edtf.parser import ( + UA, + Consecutives, + Date, + DateAndTime, + EarlierConsecutives, + EDTFObject, + EDTFParseException, + ExponentialYear, + Interval, + LaterConsecutives, + Level1Interval, + Level2Interval, + Level2Season, + LongYear, + MultipleDates, + OneOfASet, + PartialUncertainOrApproximate, + PartialUnspecified, + Season, + UncertainOrApproximate, + Unspecified, + UnspecifiedIntervalSection, + parse_edtf, +) + +from .convert import ( dt_to_struct_time, jd_to_struct_time, old_specs_to_new_specs_expression, @@ -7,6 +34,40 @@ struct_time_to_jd, trim_struct_time, ) -from edtf.natlang import text_to_edtf -from edtf.parser.grammar import parse_edtf -from edtf.parser.parser_classes import * + +# public +__all__ = [ + "dt_to_struct_time", + "jd_to_struct_time", + "old_specs_to_new_specs_expression", + "struct_time_to_date", + "struct_time_to_datetime", + "struct_time_to_jd", + "trim_struct_time", + "text_to_edtf", + "parse_edtf", + # parser_exceptions + "EDTFParseException", + # parser_classes + "EDTFObject", + "Date", + "DateAndTime", + "Interval", + "UA", + "UncertainOrApproximate", + "UnspecifiedIntervalSection", + "Unspecified", + "Level1Interval", + "LongYear", + "Season", + "PartialUncertainOrApproximate", + "PartialUnspecified", + "Consecutives", + "EarlierConsecutives", + "LaterConsecutives", + "OneOfASet", + "MultipleDates", + "Level2Interval", + "Level2Season", + "ExponentialYear", +] diff --git a/edtf/fields.py b/edtf/fields.py index 525fef6..f717592 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -89,7 +89,7 @@ def from_db_value(self, value, expression, connection): try: # Try to unpickle if the value was pickled - return pickle.loads(value) + return pickle.loads(value) # noqa S301 except (pickle.PickleError, TypeError): # If it fails because it's not pickled data, try parsing as EDTF return parse_edtf(value, fail_silently=True) diff --git a/edtf/natlang/__init__.py b/edtf/natlang/__init__.py index 325672f..463863c 100644 --- a/edtf/natlang/__init__.py +++ b/edtf/natlang/__init__.py @@ -1 +1,3 @@ from .en import text_to_edtf + +__all__ = ["text_to_edtf"] diff --git a/edtf/parser/__init__.py b/edtf/parser/__init__.py index e5a0e5f..43197d5 100644 --- a/edtf/parser/__init__.py +++ b/edtf/parser/__init__.py @@ -1,2 +1,51 @@ -from edtf.parser.grammar import parse_edtf -from edtf.parser.parser_classes import * +from .edtf_exceptions import EDTFParseException +from .grammar import parse_edtf +from .parser_classes import ( + UA, + Consecutives, + Date, + DateAndTime, + EarlierConsecutives, + EDTFObject, + ExponentialYear, + Interval, + LaterConsecutives, + Level1Interval, + Level2Interval, + Level2Season, + LongYear, + MultipleDates, + OneOfASet, + PartialUncertainOrApproximate, + PartialUnspecified, + Season, + UncertainOrApproximate, + Unspecified, + UnspecifiedIntervalSection, +) + +__all__ = [ + "parse_edtf", + "EDTFParseException", + "EDTFObject", + "Date", + "DateAndTime", + "Interval", + "UA", + "UncertainOrApproximate", + "Unspecified", + "UnspecifiedIntervalSection", + "Level1Interval", + "LongYear", + "Season", + "PartialUncertainOrApproximate", + "PartialUnspecified", + "Consecutives", + "EarlierConsecutives", + "LaterConsecutives", + "OneOfASet", + "MultipleDates", + "Level2Interval", + "Level2Season", + "ExponentialYear", +] diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index fbea7f6..88fdca8 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -1,8 +1,7 @@ from django.test import TestCase -from edtf.convert import struct_time_to_jd -from edtf.parser import EDTFObject -from edtf.parser.grammar import parse_edtf as parse +from edtf import EDTFObject, struct_time_to_jd +from edtf import parse_edtf as parse from .models import TestEvent From 8d6322586ca8e32b45eab64b98d8c7af64e6804c Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 16 May 2024 13:41:10 -0400 Subject: [PATCH 046/102] Add pre-commit hook Split requirements into primary and dev if still installing from requirements.txt --- .pre-commit-config.yaml | 22 ++++++++++++++++++++++ README.md | 17 +++++++++++++++++ dev-requirements.txt | 5 +++++ pyproject.toml | 4 ++-- 4 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 dev-requirements.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ff6df15 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: end-of-file-fixer + exclude: "business-facing/layer" + - id: trailing-whitespace + exclude: "business-facing/layer" + - id: check-yaml + exclude: "business-facing/layer" + - id: check-json + exclude: "business-facing/layer" + + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.4.4 + hooks: + # Run the linter, and enable lint fixes + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format diff --git a/README.md b/README.md index 76aec1a..98e33b7 100644 --- a/README.md +++ b/README.md @@ -375,3 +375,20 @@ Example usage: Since the `EDTFField` and the `_earliest` and `_latest` field values are set automatically, you may want to make them readonly, or not visible in your model admin. + +## To develop +### Setup +- Clone the repository: `git clone https://github.com/ixc/python-edtf.git` +- Set up a virtual environment: `python3 -m venv venv` +- Install the dependencies: `pip install -r dev-requirements.txt` +- Install precommit hooks: `pre-commit install` + +### Running tests +- From `python-edtf`, run the unit tests: `pytest` +- From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` + +### Linting and formatting +- Check linting: `ruff check --output-format=github --config pyproject.toml` +- Check formatting: `ruff format --check --config pyproject.toml` +- Fix formatting: `ruff format --config pyproject.toml` +- Linting and formatting checks and attempted fixes are also run as precommit hooks if you installed them. diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..1e37df5 --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,5 @@ +-r requirements.txt # Include all main requirements +django>=4.2,<5.0 +pytest +ruff +pre-commit diff --git a/pyproject.toml b/pyproject.toml index 7cebc56..869daf6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,8 @@ classifiers = [ test = [ "django>=4.2,<5.0", "pytest", - "ruff" + "ruff", + "pre-commit", ] [project.urls] @@ -117,4 +118,3 @@ ignore = [ # Ignore McCabe complexity (for now). "C901", ] - From f74ae803d879c8e3f280b974772130711a4cdaa7 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 20 May 2024 22:28:09 -0400 Subject: [PATCH 047/102] Linting fixes --- edtf/natlang/en.py | 30 ++-- edtf/natlang/tests.py | 347 +++++++++++++++++++++--------------------- edtf/parser/tests.py | 74 +++++---- 3 files changed, 233 insertions(+), 218 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 6ecb190..f6eef54 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -14,10 +14,10 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = r'(-?)([\dX])([\dX])([\dX])([\dX])' -LONG_YEAR_RE = r'Y(-?)([1-9]\d\d\d\d+)' -CENTURY_RE = r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?' -CE_RE = r'(\d{1,4}) (ad|ce|bc|bce)' +SHORT_YEAR_RE = r"(-?)([\dX])([\dX])([\dX])([\dX])" +LONG_YEAR_RE = r"Y(-?)([1-9]\d\d\d\d+)" +CENTURY_RE = r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" +CE_RE = r"(\d{1,4}) (ad|ce|bc|bce)" # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. @@ -101,9 +101,9 @@ def text_to_edtf(text): is_after = is_after or re.findall(r"\blater\b", t) if is_before: - result = f"/{result}" # unknown is replaced with null for intervals + result = f"/{result}" # unknown is replaced with null for intervals elif is_after: - result = f"{result}/" # unknown is replaced with null for intervals + result = f"{result}/" # unknown is replaced with null for intervals return result @@ -155,9 +155,8 @@ def text_to_edtf_date(text): is_ce = re.findall(CE_RE, t) if is_century: result = "%02dXX" % (int(is_century[0][0]) - 1,) - is_approximate = is_approximate or \ - re.findall(r'\b(ca?\.?) ?' + CENTURY_RE, t) - is_uncertain = is_uncertain or re.findall(CENTURY_RE + r'\?', t) + is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CENTURY_RE, t) + is_uncertain = is_uncertain or re.findall(CENTURY_RE + r"\?", t) try: is_bc = is_century[0][-1] in ("bc", "bce") @@ -221,14 +220,13 @@ def text_to_edtf_date(text): # if the given year could be a century (e.g. '1800s') then use # approximate/uncertain markers to decide whether we treat it as # a century or a decade. - if i == 2 and could_be_century and \ - not (is_approximate or is_uncertain): - result += 'X' + if i == 2 and could_be_century and not (is_approximate or is_uncertain): + result += "X" elif i == 3 and is_decade > 0: if mentions_year: - result += 'X' # previously year precision - now just X + result += "X" # previously year precision - now just X else: - result += 'X' # previously decade precision - now just X + result += "X" # previously decade precision - now just X elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default @@ -236,12 +234,12 @@ def text_to_edtf_date(text): else: # different values were produced, meaning that it's likely # a default. Use 'X' - result += 'X' + result += "X" # strip off unknown chars from end of string - except the first 4 for i in reversed(xrange(len(result))): - if result[i] not in ('X', '-'): + if result[i] not in ("X", "-"): smallest_length = 4 if mentions_month: diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 290fead..78ecbc9 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -4,185 +4,184 @@ from edtf.natlang.en import text_to_edtf + # TODO update the tests and code to test and output the new spec # where examples are tuples, the second item is the normalised output -@pytest.mark.parametrize("input_text,expected_output", [ - # Ignoring 'late' for simplicity in these examples - ('active late 17th-19th centuries', '16XX/18XX'), - ('active 17-19th Centuries', '16XX/18XX'), - - # Unrecognised values - ('', None), - ('this isn\'t a date', None), - - # Explicitly rejected values that would otherwise be badly converted - ('23rd Dynasty', None), - - # Implied century and specific years - ('90', '1990'), # Implied century - ('1860', '1860'), - ('the year 1800', '1800'), - ('the year 1897', '1897'), - ('January 2008', '2008-01'), - ('January 12, 1940', '1940-01-12'), - - # Uncertain or approximate dates - ('1860?', '1860?'), - ('1862 (uncertain)', '1862?'), - ('maybe 1862', '1862?'), - ('1862 maybe', '1862?'), - ('1862 guess', '1862?'), - ('uncertain: 1862', '1862?'), - ('uncertain: Jan 18 1862', '1862-01-18?'), - ('~ Feb 1812', '1812-02~'), - ('circa Feb 1812', '1812-02~'), - ('Feb 1812 approx', '1812-02~'), - ('c1860', '1860~'), # Different abbreviations - ('c.1860', '1860~'), # With or without . - ('ca1860', '1860~'), - ('ca.1860', '1860~'), - ('c 1860', '1860~'), # With or without space - ('c. 1860', '1860~'), - ('ca. 1860', '1860~'), - ('approx 1860', '1860~'), - ('1860 approx', '1860~'), - ('1860 approximately', '1860~'), - ('approximately 1860', '1860~'), - ('about 1860', '1860~'), - ('about Spring 1849', '1849-21~'), - ('notcirca 1860', '1860'), # Avoid words containing 'circa' - ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word - ('attic. 1802', '1802'), # Avoid false positive 'circa' - - # Previously tested masked precision, uncertain or ambiguous masked precision - ('1860s', '186X'), - ('ca. 1860s', '186X~'), - ('c. 1860s', '186X~'), - ('Circa 1840s', '184X~'), - ('circa 1840s', '184X~'), - ('ca. 1860s?', '186X%'), - ('uncertain: approx 1862', '1862%'), - ('1800s', '18XX'), - ('2000s', '20XX'), - ('c1900s', '190X~'), - ('c1800s?', '180X%'), - - # Unspecified dates - ('January 12', 'XXXX-01-12'), - ('January', 'XXXX-01'), - ('10/7/2008', '2008-10-07'), - ('7/2008', '2008-07'), - - # Seasons mapped to specific codes - ('Spring 1872', '1872-21'), - ('Summer 1872', '1872-22'), - ('Autumn 1872', '1872-23'), - ('Fall 1872', '1872-23'), - ('Winter 1872', '1872-24'), - - # Dates relative to known events (before/after) - ('earlier than 1928', '/1928'), - ('before 1928', '/1928'), - ('after 1928', '1928/'), - ('later than 1928', '1928/'), - ('before January 1928', '/1928-01'), - ('before 18 January 1928', '/1928-01-18'), - - # Approximations combined with before/after - ('before approx January 18 1928', '/1928-01-18~'), - ('before approx January 1928', '/1928-01~'), - ('after approx January 1928', '1928-01~/'), - ('after approx Summer 1928', '1928-22~/'), - - # Before and after with uncertain / unspecified components - ('after about the 1920s', '192X~/'), - ('before about the 1900s', '/190X~'), - ('before the 1900s', '/19XX'), - - # previous examples for masked precision, now removed from the EDTF spec - # use `X` for unknown regardless of precision or why the data is unknown - ('decade in 1800s', '18XX'), - ('decade somewhere during the 1800s', '18XX'), - ('year in the 1860s', '186X'), - ('year in the 1800s', '18XX'), - ('year in about the 1800s', '180X~'), - ('month in 1872', '1872-XX'), - ('day in Spring 1849', '1849-21-XX'), - ('day in January 1872', '1872-01-XX'), - ('day in 1872', '1872-XX-XX'), - ('birthday in 1872', '1872'), - - # Handling centuries with approximation and uncertainty - ('1st century', '00XX'), - ('10c', '09XX'), - ('19th century', '18XX'), - ('19th century?', '18XX?'), - ('before 19th century', '/18XX'), - ('19c', '18XX'), - ('15c.', '14XX'), - ('ca. 19c', '18XX~'), - ('~19c', '18XX~'), - ('about 19c', '18XX~'), - ('19c?', '18XX?'), - ('c.19c?', '18XX%'), - - # BC/AD dating - ('1 AD', '0001'), - ('17 CE', '0017'), - ('127 CE', '0127'), - ('1270 CE', '1270'), - ('c1 AD', '0001~'), - ('c17 CE', '0017~'), - ('c127 CE', '0127~'), - ('c1270 CE', '1270~'), - ('c64 BCE', '-0064~'), - ('2nd century bc', '-01XX'), # -200 to -101 - ('2nd century bce', '-01XX'), - ('2nd century ad', '01XX'), - ('2nd century ce', '01XX'), - - # Combining uncertainties and approximations in creative ways - ('a day in about Spring 1849?', '1849-21-XX%'), - - # Simple date ranges, showcasing both the limitations and capabilities of the parser - # Not all of these results are correct EDTF, but this is as good as the EDTF implementation - # and simple natural language parser we have. - ('1851-1852', '1851/1852'), - ('1851-1852; printed 1853-1854', '1851/1852'), - ('1851-52', '1851/1852'), - ('1852 - 1860', '1852/1860'), - ('1856-ca. 1865', '1856/1865~'), - ('1857-mid 1860s', '1857/186X'), - ('1858/1860', '[1858, 1860]'), - ('1860s-1870s', '186X/187X'), - ('1910-30', '1910/1930'), - ('active 1910-30', '1910/1930'), - ('1861-67', '1861/1867'), - ('1861-67 (later print)', '1861/1867'), - ('1863 or 1864', '1863'), - ('1863, printed 1870', '1863'), - ('1863, printed ca. 1866', '1863'), - ('1864 or 1866', '1864'), - ('1864, printed ca. 1864', '1864'), - ('1864-1872, printed 1870s', '1864/1872'), - ('1868-1871?', '1868/1871?'), - ('1869-70', '1869/1870'), - ('1870s, printed ca. 1880s', '187X'), - ('1900-1903, cast before 1929', '1900/1903'), - ('1900; 1973', '1900'), - ('1900; printed 1912', '1900'), - ('1915 late - autumn 1916', '1915/1916-23'), - ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} - ('1920s -early 1930s', '192X/193X'), - ('1930s, printed early 1960s', '193X'), # should be something like {193x, 196x}, - ('1932, printed 1976 by Gunther Sander', '1932'), # should be {1932, 1976} - ('1938, printed 1940s-1950s', '1938') # should be something like {1938, 194x-195x} -]) - +@pytest.mark.parametrize( + "input_text,expected_output", + [ + # Ignoring 'late' for simplicity in these examples + ("active late 17th-19th centuries", "16XX/18XX"), + ("active 17-19th Centuries", "16XX/18XX"), + # Unrecognised values + ("", None), + ("this isn't a date", None), + # Explicitly rejected values that would otherwise be badly converted + ("23rd Dynasty", None), + # Implied century and specific years + ("90", "1990"), # Implied century + ("1860", "1860"), + ("the year 1800", "1800"), + ("the year 1897", "1897"), + ("January 2008", "2008-01"), + ("January 12, 1940", "1940-01-12"), + # Uncertain or approximate dates + ("1860?", "1860?"), + ("1862 (uncertain)", "1862?"), + ("maybe 1862", "1862?"), + ("1862 maybe", "1862?"), + ("1862 guess", "1862?"), + ("uncertain: 1862", "1862?"), + ("uncertain: Jan 18 1862", "1862-01-18?"), + ("~ Feb 1812", "1812-02~"), + ("circa Feb 1812", "1812-02~"), + ("Feb 1812 approx", "1812-02~"), + ("c1860", "1860~"), # Different abbreviations + ("c.1860", "1860~"), # With or without . + ("ca1860", "1860~"), + ("ca.1860", "1860~"), + ("c 1860", "1860~"), # With or without space + ("c. 1860", "1860~"), + ("ca. 1860", "1860~"), + ("approx 1860", "1860~"), + ("1860 approx", "1860~"), + ("1860 approximately", "1860~"), + ("approximately 1860", "1860~"), + ("about 1860", "1860~"), + ("about Spring 1849", "1849-21~"), + ("notcirca 1860", "1860"), # Avoid words containing 'circa' + ( + "attica 1802", + "1802", + ), # Avoid false positive 'circa' at the end of preceding word + ("attic. 1802", "1802"), # Avoid false positive 'circa' + # Previously tested masked precision, uncertain or ambiguous masked precision + ("1860s", "186X"), + ("ca. 1860s", "186X~"), + ("c. 1860s", "186X~"), + ("Circa 1840s", "184X~"), + ("circa 1840s", "184X~"), + ("ca. 1860s?", "186X%"), + ("uncertain: approx 1862", "1862%"), + ("1800s", "18XX"), + ("2000s", "20XX"), + ("c1900s", "190X~"), + ("c1800s?", "180X%"), + # Unspecified dates + ("January 12", "XXXX-01-12"), + ("January", "XXXX-01"), + ("10/7/2008", "2008-10-07"), + ("7/2008", "2008-07"), + # Seasons mapped to specific codes + ("Spring 1872", "1872-21"), + ("Summer 1872", "1872-22"), + ("Autumn 1872", "1872-23"), + ("Fall 1872", "1872-23"), + ("Winter 1872", "1872-24"), + # Dates relative to known events (before/after) + ("earlier than 1928", "/1928"), + ("before 1928", "/1928"), + ("after 1928", "1928/"), + ("later than 1928", "1928/"), + ("before January 1928", "/1928-01"), + ("before 18 January 1928", "/1928-01-18"), + # Approximations combined with before/after + ("before approx January 18 1928", "/1928-01-18~"), + ("before approx January 1928", "/1928-01~"), + ("after approx January 1928", "1928-01~/"), + ("after approx Summer 1928", "1928-22~/"), + # Before and after with uncertain / unspecified components + ("after about the 1920s", "192X~/"), + ("before about the 1900s", "/190X~"), + ("before the 1900s", "/19XX"), + # previous examples for masked precision, now removed from the EDTF spec + # use `X` for unknown regardless of precision or why the data is unknown + ("decade in 1800s", "18XX"), + ("decade somewhere during the 1800s", "18XX"), + ("year in the 1860s", "186X"), + ("year in the 1800s", "18XX"), + ("year in about the 1800s", "180X~"), + ("month in 1872", "1872-XX"), + ("day in Spring 1849", "1849-21-XX"), + ("day in January 1872", "1872-01-XX"), + ("day in 1872", "1872-XX-XX"), + ("birthday in 1872", "1872"), + # Handling centuries with approximation and uncertainty + ("1st century", "00XX"), + ("10c", "09XX"), + ("19th century", "18XX"), + ("19th century?", "18XX?"), + ("before 19th century", "/18XX"), + ("19c", "18XX"), + ("15c.", "14XX"), + ("ca. 19c", "18XX~"), + ("~19c", "18XX~"), + ("about 19c", "18XX~"), + ("19c?", "18XX?"), + ("c.19c?", "18XX%"), + # BC/AD dating + ("1 AD", "0001"), + ("17 CE", "0017"), + ("127 CE", "0127"), + ("1270 CE", "1270"), + ("c1 AD", "0001~"), + ("c17 CE", "0017~"), + ("c127 CE", "0127~"), + ("c1270 CE", "1270~"), + ("c64 BCE", "-0064~"), + ("2nd century bc", "-01XX"), # -200 to -101 + ("2nd century bce", "-01XX"), + ("2nd century ad", "01XX"), + ("2nd century ce", "01XX"), + # Combining uncertainties and approximations in creative ways + ("a day in about Spring 1849?", "1849-21-XX%"), + # Simple date ranges, showcasing both the limitations and capabilities of the parser + # Not all of these results are correct EDTF, but this is as good as the EDTF implementation + # and simple natural language parser we have. + ("1851-1852", "1851/1852"), + ("1851-1852; printed 1853-1854", "1851/1852"), + ("1851-52", "1851/1852"), + ("1852 - 1860", "1852/1860"), + ("1856-ca. 1865", "1856/1865~"), + ("1857-mid 1860s", "1857/186X"), + ("1858/1860", "[1858, 1860]"), + ("1860s-1870s", "186X/187X"), + ("1910-30", "1910/1930"), + ("active 1910-30", "1910/1930"), + ("1861-67", "1861/1867"), + ("1861-67 (later print)", "1861/1867"), + ("1863 or 1864", "1863"), + ("1863, printed 1870", "1863"), + ("1863, printed ca. 1866", "1863"), + ("1864 or 1866", "1864"), + ("1864, printed ca. 1864", "1864"), + ("1864-1872, printed 1870s", "1864/1872"), + ("1868-1871?", "1868/1871?"), + ("1869-70", "1869/1870"), + ("1870s, printed ca. 1880s", "187X"), + ("1900-1903, cast before 1929", "1900/1903"), + ("1900; 1973", "1900"), + ("1900; printed 1912", "1900"), + ("1915 late - autumn 1916", "1915/1916-23"), + ("1915, from Camerawork, October 1916", "1915"), # should be {1915, 1916-10} + ("1920s -early 1930s", "192X/193X"), + ( + "1930s, printed early 1960s", + "193X", + ), # should be something like {193x, 196x}, + ("1932, printed 1976 by Gunther Sander", "1932"), # should be {1932, 1976} + ( + "1938, printed 1940s-1950s", + "1938", + ), # should be something like {1938, 194x-195x} + ], +) def test_natlang(input_text, expected_output): """ Test natural language conversion to EDTF format: Verify that the conversion from text to EDTF format matches the expected output. """ result = text_to_edtf(input_text) - assert result == expected_output, f"Failed for input: {input_text} - expected {expected_output}, got {result}" + assert ( + result == expected_output + ), f"Failed for input: {input_text} - expected {expected_output}, got {result}" diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index ae82057..69891b0 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -117,30 +117,29 @@ # Group qualification: a qualification character to the immediate right of a component applies # to that component as well as to all components to the left. # year, month, and day are uncertain and approximate - ('2004-06-11%', ('2004-06-11', '2004-06-09', '2004-06-13')), + ("2004-06-11%", ("2004-06-11", "2004-06-09", "2004-06-13")), # uncertain year; month, day known ("2004?-06-11", ("2004-06-11", "2003-06-11", "2005-06-11")), # year and month are approximate; day known - ('2004-06~-11', ('2004-06-11', '2003-05-11', '2005-07-11')), - + ("2004-06~-11", ("2004-06-11", "2003-05-11", "2005-07-11")), # Qualification of individual component: a qualification character to the immediate left # of the component applies to that component only # day is approximate; year, month known - ('2004-06-~11', ('2004-06-11', '2004-06-10', '2004-06-12')), + ("2004-06-~11", ("2004-06-11", "2004-06-10", "2004-06-12")), # Year known, month within year is approximate and uncertain - ('2004-%06', ('2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30')), + ("2004-%06", ("2004-06-01", "2004-06-30", "2004-04-01", "2004-08-30")), # Year known, month and day uncertain - ('2004-?06-?11', ('2004-06-11', '2004-05-10', '2004-07-12')), + ("2004-?06-?11", ("2004-06-11", "2004-05-10", "2004-07-12")), # Year uncertain, month known, day approximate - ('2004?-06-~11', ('2004-06-11', '2003-06-10', '2005-06-12')), + ("2004?-06-~11", ("2004-06-11", "2003-06-10", "2005-06-12")), # Year uncertain and month is both uncertain and approximate - ('?2004-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), + ("?2004-%06", ("2004-06-01", "2004-06-30", "2003-04-01", "2005-08-30")), # This has the same meaning as the previous example.- NEW SPEC - ('2004?-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), + ("2004?-%06", ("2004-06-01", "2004-06-30", "2003-04-01", "2005-08-30")), # Year uncertain, month and day approximate - ('2004?-~06-~04', ('2004-06-04', '2003-05-03', '2005-07-05')), + ("2004?-~06-~04", ("2004-06-04", "2003-05-03", "2005-07-05")), # Year known, month and day approximate - ('2011-~06-~04', ('2011-06-04', '2011-05-03', '2011-07-05')), + ("2011-~06-~04", ("2011-06-04", "2011-05-03", "2011-07-05")), # Partial unspecified # December 25 sometime during the 1560s ("156X-12-25", ("1560-12-25", "1569-12-25")), @@ -159,21 +158,19 @@ # December 1760 or some later month ("[1760-12..]", ("1760-12-01", "inf")), # January or February of 1760 or December 1760 or some later month - ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), + ("[1760-01, 1760-02, 1760-12..]", ("1760-01-01", "inf")), # Either the year 1667 or the month December of 1760. ("[1667, 1760-12]", ("1667-01-01", "1760-12-31")), # Multiple Dates # All of the years 1667, 1668, 1670, 1671, 1672 ("{1667,1668, 1670..1672}", ("1667-01-01", "1672-12-31")), # The year 1960 and the month December of 1961. - ('{1960, 1961-12}', ('1960-01-01', '1961-12-31')), - + ("{1960, 1961-12}", ("1960-01-01", "1961-12-31")), # Previously tested masked precision, now eliminated from the spec # A date during the 1960s - ('196X', ('1960-01-01', '1969-12-31')), + ("196X", ("1960-01-01", "1969-12-31")), # A date during the 1900s - ('19XX', ('1900-01-01', '1999-12-31')), - + ("19XX", ("1900-01-01", "1999-12-31")), # L2 Extended Interval # Interval with fuzzy day endpoints in June 2004 ( @@ -186,7 +183,7 @@ # the year 170000000 ("Y17E7", ("170000000-01-01", "170000000-12-31")), # the year -170000000 - ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), + ("Y-17E7", ("-170000000-01-01", "-170000000-12-31")), # L2 significant digits # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), @@ -236,6 +233,7 @@ def iso_to_struct_time(iso_date): y *= -1 return struct_time([y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + @pytest.mark.parametrize("test_input,expected_tuple", EXAMPLES) def test_edtf_examples(test_input, expected_tuple): """Test parsing of EDTF strings with expected outputs.""" @@ -255,25 +253,45 @@ def test_edtf_examples(test_input, expected_tuple): elif len(expected_tuple) == 2: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) - assert result.lower_strict() == lower_strict, f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" - assert result.upper_strict() == upper_strict, f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + assert ( + result.lower_strict() == lower_strict + ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + assert ( + result.upper_strict() == upper_strict + ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" elif len(expected_tuple) == 3: strict_date = iso_to_struct_time(expected_tuple[0]) lower_fuzzy = iso_to_struct_time(expected_tuple[1]) upper_fuzzy = iso_to_struct_time(expected_tuple[2]) - assert result.lower_strict() == strict_date, f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" - assert result.upper_strict() == strict_date, f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" - assert result.lower_fuzzy() == lower_fuzzy, f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" - assert result.upper_fuzzy() == upper_fuzzy, f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + assert ( + result.lower_strict() == strict_date + ), f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" + assert ( + result.upper_strict() == strict_date + ), f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" + assert ( + result.lower_fuzzy() == lower_fuzzy + ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + assert ( + result.upper_fuzzy() == upper_fuzzy + ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" elif len(expected_tuple) == 4: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) lower_fuzzy = iso_to_struct_time(expected_tuple[2]) upper_fuzzy = iso_to_struct_time(expected_tuple[3]) - assert result.lower_strict() == lower_strict, f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" - assert result.upper_strict() == upper_strict, f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" - assert result.lower_fuzzy() == lower_fuzzy, f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" - assert result.upper_fuzzy() == upper_fuzzy, f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + assert ( + result.lower_strict() == lower_strict + ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + assert ( + result.upper_strict() == upper_strict + ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + assert ( + result.lower_fuzzy() == lower_fuzzy + ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + assert ( + result.upper_fuzzy() == upper_fuzzy + ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" @pytest.mark.parametrize("bad_input", BAD_EXAMPLES) From 26b0afb312115ac691e06ef9b03561ad283a90f2 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 21 May 2024 09:30:34 -0400 Subject: [PATCH 048/102] Fix qualification (complete) for L1 qualification Apply it to the entire date when a date is parsed as UncertainOrApproximate (L1 qualified) --- edtf/parser/parser_classes.py | 26 +++++++++++++++++++------- edtf/parser/tests.py | 22 +++++++++++++++------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 2b4368a..bb9a213 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -442,15 +442,27 @@ def _strict_date(self, lean): def _get_fuzzy_padding(self, lean): if not self.ua: - return relativedelta(0) + return relativedelta() multiplier = self.ua._get_multiplier() + padding = relativedelta() + + # Check the presence of uncertainty on each component + # self.precision not helpful here: + # L1 qualified EDTF dates apply qualification across all parts of the date + if self.date.year: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) + ) + if self.date.month: + padding += relativedelta( + months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) + ) + if self.date.day: + padding += relativedelta( + days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days) + ) - if self.date.precision == PRECISION_DAY: - return multiplier * appsettings.PADDING_DAY_PRECISION - elif self.date.precision == PRECISION_MONTH: - return multiplier * appsettings.PADDING_MONTH_PRECISION - elif self.date.precision == PRECISION_YEAR: - return multiplier * appsettings.PADDING_YEAR_PRECISION + return padding class UnspecifiedIntervalSection(EDTFObject): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 69891b0..8d9a770 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -61,8 +61,11 @@ # Uncertain/Approximate # uncertain: possibly the year 1984, but not definitely ("1984?", ("1984-01-01", "1984-12-31", "1983-01-01", "1985-12-31")), - ("2004-06-11?", ("2004-06-11", "2004-06-11", "2004-06-10", "2004-06-12")), - ("2004-06?", ("2004-06-01", "2004-06-30", "2004-05-01", "2004-07-30")), + ( + "2004-06-11?", + ("2004-06-11", "2003-05-10", "2005-07-12"), + ), # everything is fuzzy by 100% for "qualification of a date (complete)" (L1) + ("2004-06?", ("2004-06-01", "2004-06-30", "2003-05-01", "2005-07-30")), # "approximately" the year 1984 ("1984~", ("1984-01-01", "1984-12-31", "1983-01-01", "1985-12-31")), # the year is approximately 1984 and even that is uncertain @@ -84,6 +87,7 @@ ("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")), # L1 Extended Interval # beginning unknown, end 2006 + # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) ("/2006", ("1996-12-31", "2006-12-31")), # beginning June 1, 2004, end unknown ("2004-06-01/", ("2004-06-01", "2014-06-01")), @@ -94,16 +98,16 @@ # interval beginning approximately 1984 and ending June 2004 ("1984~/2004-06", ("1984-01-01", "2004-06-30", "1983-01-01", "2004-06-30")), # interval beginning 1984 and ending approximately June 2004 - ("1984/2004-06~", ("1984-01-01", "2004-06-30", "1984-01-01", "2004-07-30")), + ("1984/2004-06~", ("1984-01-01", "2004-06-30", "1984-01-01", "2005-07-30")), ("1984?/2004%", ("1984-01-01", "2004-12-31", "1983-01-01", "2006-12-31")), ("1984~/2004~", ("1984-01-01", "2004-12-31", "1983-01-01", "2005-12-31")), # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 - ("1984-06?/2004-08?", ("1984-06-01", "2004-08-31", "1984-05-01", "2004-09-30")), + ("1984-06?/2004-08?", ("1984-06-01", "2004-08-31", "1983-05-01", "2005-09-30")), ( "1984-06-02?/2004-08-08~", - ("1984-06-02", "2004-08-08", "1984-06-01", "2004-08-09"), + ("1984-06-02", "2004-08-08", "1983-05-01", "2005-09-09"), ), - ("1984-06-02?/", ("1984-06-02", "1994-06-02", "1984-06-01", "1994-06-02")), + ("1984-06-02?/", ("1984-06-02", "1994-06-02", "1983-05-01", "1994-06-02")), # Year exceeding 4 digits ("Y170000002", ("170000002-01-01", "170000002-12-31")), ("Y-170000002", ("-170000002-01-01", "-170000002-12-31")), @@ -117,7 +121,11 @@ # Group qualification: a qualification character to the immediate right of a component applies # to that component as well as to all components to the left. # year, month, and day are uncertain and approximate - ("2004-06-11%", ("2004-06-11", "2004-06-09", "2004-06-13")), + # this example appears under "group qualification" but actually parses as L1 UncertainOrApproximate + ( + "2004-06-11%", + ("2004-06-11", "2002-04-09", "2006-08-13"), + ), # all parts to the left are fuzzy by 200% # uncertain year; month, day known ("2004?-06-11", ("2004-06-11", "2003-06-11", "2005-06-11")), # year and month are approximate; day known From 48a9b02749076c6ae29b468724ef1ad65439b35e Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 22 May 2024 18:28:44 -0400 Subject: [PATCH 049/102] Add code coverage The ci.yml updates add a commit to PRs. I tested this locally using `act` as best I could, but ran into an issue that I think will resolve when running on the real runner (undefined head). We'll see how it works when the workflows actually run ... --- .github/workflows/ci.yml | 34 ++++++++++++++++++++++++++++++++-- .gitignore | 3 +++ README.md | 4 ++++ pyproject.toml | 22 +++++++++++++++++++++- 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b41c764..be0326d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,8 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.12"] + # python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] defaults: run: working-directory: . @@ -38,8 +39,37 @@ jobs: - name: Run unit tests run: | pytest + mv .coverage .coverage_main - name: Run Django integration tests working-directory: ./edtf_django_tests run: | - python manage.py test edtf_integration + coverage run manage.py test edtf_integration + mv .coverage ../.coverage_django + + - name: Combine coverage reports + run: | + coverage combine .coverage_main .coverage_django + coverage report --omit="edtf_django_tests/*" + coverage xml -o coverage_combined.xml --omit="edtf_django_tests/*" + + - name: Pytest coverage comment + uses: MishaKav/pytest-coverage-comment@main + with: + pytest-xml-coverage-path: ./coverage_combined.xml + unique-id-for-comment: ${{ matrix.python-version }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Check the output coverage + run: | + echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" + echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" + echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" + echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" + echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" + echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" + echo "Coverage Failures - ${{ steps.coverageComment.outputs.failures }}" + echo "Coverage Skipped - ${{ steps.coverageComment.outputs.skipped }}" + echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" + echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" + echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" diff --git a/.gitignore b/.gitignore index 7c23190..182cf8b 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,9 @@ htmlcov/ .cache nosetests.xml coverage.xml +coverage_combined.xml +.coverage_main +.coverage_django *,cover # Translations diff --git a/README.md b/README.md index 82a9b7d..c4f172e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ edtf ===== + + + An implementation of EDTF format in Python, together with utility functions for parsing natural language date texts, and converting EDTF dates to related Python `date` or `struct_time` objects. See http://www.loc.gov/standards/datetime/ for the current draft specification. @@ -376,6 +379,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut ### Running tests - From `python-edtf`, run the unit tests: `pytest` - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` +- To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` ### Linting and formatting - Check linting: `ruff check --output-format=github --config pyproject.toml` diff --git a/pyproject.toml b/pyproject.toml index 869daf6..8dea9fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,8 @@ test = [ "pytest", "ruff", "pre-commit", + "coverage", + "pytest-cov" ] [project.urls] @@ -77,7 +79,25 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] -addopts = "--ignore=edtf_django_tests/" +addopts = "--ignore=edtf_django_tests/ --cov=edtf --cov-report=xml" +plugins = ["pytest_cov"] + +[tool.coverage.run] +# we run the edtf_integration tests but only care about them testing fields.py in the main package +omit = [ + "edtf_django_tests/*" +] + +[tool.coverage.report] +exclude_lines = [ + # Don't complain about missing debug-only code: + "if __name__ == .__main__.:", + # Don't complain if tests don't hit defensive assertion code: + "raise AssertionError", + "raise NotImplementedError", + "raise NotImplemented", + "raise NotImplemented" +] [tool.ruff] # Python 3.8 From 09b10d8ca6e196558523f37afa15cffa2c78b2d0 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 22 May 2024 18:28:57 -0400 Subject: [PATCH 050/102] Create coverage_readme.yml The new workflow adds a badge to the readme based on coverage for Python 3.12. --- .github/workflows/coverage_readme.yml | 68 +++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 .github/workflows/coverage_readme.yml diff --git a/.github/workflows/coverage_readme.yml b/.github/workflows/coverage_readme.yml new file mode 100644 index 0000000..86309de --- /dev/null +++ b/.github/workflows/coverage_readme.yml @@ -0,0 +1,68 @@ +name: Update Coverage on Readme +on: + push: + branches: + - main + +# https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs +# `contents` is for permission to the contents of the repository. +# `pull-requests` is for permission to pull request +permissions: + contents: write + checks: write + pull-requests: write + +# see: https://github.com/MishaKav/pytest-coverage-comment +jobs: + update-coverage-on-readme: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + fetch-depth: 0 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: 3.12 + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install .[test] + + - name: Run tests and generate coverage + run: | + pytest + mv .coverage .coverage_main + cd edtf_django_tests + coverage run manage.py test edtf_integration + mv .coverage ../.coverage_django + cd .. + coverage combine .coverage_main .coverage_django + coverage report --omit="edtf_django_tests/*" + coverage xml -o coverage_combined.xml --omit="edtf_django_tests/*" + + - name: Pytest coverage comment + if: ${{ github.ref == 'refs/heads/main' }} + id: coverageComment + uses: MishaKav/pytest-coverage-comment@main + with: + pytest-xml-coverage-path: ./coverage_combined.xml + hide-comment: true + + - name: Update Readme with Coverage Html + if: ${{ github.ref == 'refs/heads/main' }} + run: | + sed -i '//,//c\\n\${{ steps.coverageComment.outputs.coverageHtml }}\n' ./README.md + + - name: Commit & Push changes to README + run: | + git config --global user.name 'github-actions[bot]' + git config --global user.email 'github-actions[bot]@users.noreply.github.com' + git add README.md + git commit -m 'Update coverage badge in README' + git push From ac4705f5bd7b41f49f45458f053f7f55d468a29a Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 22 May 2024 18:29:16 -0400 Subject: [PATCH 051/102] Remove unnecessary files --- edtf/parser/grammar_test.py | 360 ------------ edtf/parser/parser_classes_tests.py | 834 ---------------------------- vagrant wheel install problems.txt | 5 - 3 files changed, 1199 deletions(-) delete mode 100644 edtf/parser/grammar_test.py delete mode 100644 edtf/parser/parser_classes_tests.py delete mode 100644 vagrant wheel install problems.txt diff --git a/edtf/parser/grammar_test.py b/edtf/parser/grammar_test.py deleted file mode 100644 index c8ff727..0000000 --- a/edtf/parser/grammar_test.py +++ /dev/null @@ -1,360 +0,0 @@ -from pyparsing import ( - Combine, - NotAny, - OneOrMore, - Optional, - ParseException, - Regex, - Word, - ZeroOrMore, - nums, - oneOf, -) -from pyparsing import Literal as L - -from edtf.parser.edtf_exceptions import EDTFParseException - -# (* ************************** Level 0 *************************** *) -from edtf.parser.parser_classes import ( - UA, - Consecutives, - Date, - DateAndTime, - EarlierConsecutives, - ExponentialYear, - Interval, - LaterConsecutives, - Level1Interval, - Level2Interval, # , Testi - LongYear, - MultipleDates, - OneOfASet, - PartialUncertainOrApproximate, - PartialUnspecified, - Season, - UncertainOrApproximate, - Unspecified, -) - -oneThru12 = oneOf(["%.2d" % i for i in range(1, 13)]) -oneThru13 = oneOf(["%.2d" % i for i in range(1, 14)]) -oneThru23 = oneOf(["%.2d" % i for i in range(1, 24)]) -zeroThru23 = oneOf(["%.2d" % i for i in range(0, 24)]) -oneThru29 = oneOf(["%.2d" % i for i in range(1, 30)]) -oneThru30 = oneOf(["%.2d" % i for i in range(1, 31)]) -oneThru31 = oneOf(["%.2d" % i for i in range(1, 32)]) -oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) -zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) - -positiveDigit = Word(nums, exact=1, excludeChars="0") -digit = Word(nums, exact=1) - -second = zeroThru59 -minute = zeroThru59 -hour = zeroThru23 -day = oneThru31("day") - -month = oneThru12("month") -monthDay = ( - (oneOf("01 03 05 07 08 10 12")("month") + "-" + oneThru31("day")) - ^ (oneOf("04 06 09 11")("month") + "-" + oneThru30("day")) - ^ (L("02")("month") + "-" + oneThru29("day")) -) - -# 4 digits, 0 to 9 -positiveYear = Word(nums, exact=4) - -# Negative version of positive year, but "-0000" is illegal -negativeYear = NotAny(L("-0000")) + ("-" + positiveYear) - -year = Combine(positiveYear ^ negativeYear)("year") - -yearMonth = year + "-" + month -yearMonthDay = year + "-" + monthDay # o hai iso date - -date = Combine(year ^ yearMonth ^ yearMonthDay)("date") -Date.set_parser(date) - -zoneOffsetHour = oneThru13 -zoneOffset = L("Z") ^ ( - Regex("[+-]") - + (zoneOffsetHour + Optional(":" + minute) ^ L("14:00") ^ ("00:" + oneThru59)) -) - -baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00") - -time = Combine(baseTime + Optional(zoneOffset))("time") - -dateAndTime = date + "T" + time -DateAndTime.set_parser(dateAndTime) - -l0Interval = date("lower") + "/" + date("upper") -Interval.set_parser(l0Interval) - -level0Expression = date ^ dateAndTime ^ l0Interval - - -# (* ************************** Level 1 *************************** *) - -# (* ** Auxiliary Assignments for Level 1 ** *) -UASymbol = Combine(oneOf("? ~ %")) -UA.set_parser(UASymbol) - -seasonNumber = oneOf("21 22 23 24") - -# (* *** Season (unqualified) *** *) -season = year + "-" + seasonNumber("season") -Season.set_parser(season) - -dateOrSeason = date("") ^ season - -# (* *** Long Year - Simple Form *** *) - -longYearSimple = "Y" + Combine( - Optional("-") + positiveDigit + digit + digit + digit + OneOrMore(digit) -)("year") -LongYear.set_parser(longYearSimple) - -# (* *** L1Interval *** *) -uaDateOrSeason = dateOrSeason + Optional(UASymbol) - - -# unspecifiedIntervalSec = L('..')('unknownOrOpen') + FollowedBy(L("/") + uaDateOrSeason)('other_section_element') -# Testi.set_parser(unspecifiedIntervalSec) - - -# bit of a kludge here to get the all the relevant tokens into the parse action -# cleanly otherwise the parameter names are overlapped. -def f(toks): - try: - return {"date": toks[0], "ua": toks[1]} - except IndexError: - return {"date": toks[0], "ua": None} - - -l1Start = ".." ^ uaDateOrSeason -# l1Start = unspecifiedIntervalSec ^ uaDateOrSeason -l1Start.addParseAction(f) -l1End = uaDateOrSeason ^ ".." -l1End.addParseAction(f) - -# level1Interval = l1Start("lower") + "/" + l1End("upper") -level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") ^ l1Start( - "lower" -) + "/" + Optional(l1End("upper")) -Level1Interval.set_parser(level1Interval) - -# (* *** unspecified *** *) -yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year") -monthUnspecified = year + "-" + L("XX")("month") -dayUnspecified = yearMonth + "-" + L("XX")("day") -dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") - -unspecified = ( - yearWithOneOrTwoUnspecifedDigits - ^ monthUnspecified - ^ dayUnspecified - ^ dayAndMonthUnspecified -) -Unspecified.set_parser(unspecified) - -# (* *** uncertainOrApproxDate *** *) - -uncertainOrApproxDate = date("date") + UASymbol("ua") -UncertainOrApproximate.set_parser(uncertainOrApproxDate) - -level1Expression = ( - uncertainOrApproxDate ^ unspecified ^ level1Interval ^ longYearSimple ^ season -) - -# (* ************************** Level 2 *************************** *) - -# (* ** Internal Unspecified** *) - -digitOrU = Word(nums + "X", exact=1) - -# 2-digit day with at least one 'X' present -dayWithU = Combine(("X" + digitOrU) ^ (digitOrU + "X"))("day") - -# 2-digit month with at least one 'X' present -monthWithU = Combine(oneOf("0X 1X") ^ ("X" + digitOrU))("month") - -# 4-digit year with at least one 'X' present -yearWithU = Combine( - ("X" + digitOrU + digitOrU + digitOrU) - ^ (digitOrU + "X" + digitOrU + digitOrU) - ^ (digitOrU + digitOrU + "X" + digitOrU) - ^ (digitOrU + digitOrU + digitOrU + "X") -)("year") - -yearMonthWithU = (Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) ^ ( - yearWithU + "-" + month -) - -monthDayWithU = (Combine(month("") ^ monthWithU(""))("month") + "-" + dayWithU) ^ ( - monthWithU + "-" + day -) - -yearMonthDayWithU = ( - ( - yearWithU - + "-" - + Combine(month("") ^ monthWithU(""))("month") - + "-" - + Combine(day("") ^ dayWithU(""))("day") - ) - ^ (year + "-" + monthWithU + "-" + Combine(day("") ^ dayWithU(""))("day")) - ^ (year + "-" + month + "-" + dayWithU) -) - -partialUnspecified = yearWithU ^ yearMonthWithU ^ yearMonthDayWithU -PartialUnspecified.set_parser(partialUnspecified) - -# (* ** Internal Uncertain or Approximate** *) - -# this line is out of spec, but the given examples (e.g. '(2004)?-06-04~') -# appear to require it. -year_with_brackets = year ^ ("(" + year + ")") - -# second clause below needed Optional() around the "year_ua" UASymbol, for dates -# like '(2011)-06-04~' to work. - -IUABase = ( - ( - year_with_brackets - + UASymbol("year_ua") - + "-" - + month - + Optional("-(" + day + ")" + UASymbol("day_ua")) - ) - ^ ( - year_with_brackets - + Optional(UASymbol)("year_ua") - + "-" - + monthDay - + Optional(UASymbol)("month_day_ua") - ) - ^ ( - year_with_brackets - + Optional(UASymbol)("year_ua") - + "-(" - + month - + ")" - + UASymbol("month_ua") - + Optional("-(" + day + ")" + UASymbol("day_ua")) - ) - ^ ( - year_with_brackets - + Optional(UASymbol)("year_ua") - + "-(" - + month - + ")" - + UASymbol("month_ua") - + Optional("-" + day) - ) - ^ (yearMonth + UASymbol("year_month_ua") + "-(" + day + ")" + UASymbol("day_ua")) - ^ (yearMonth + UASymbol("year_month_ua") + "-" + day) - ^ (yearMonth + "-(" + day + ")" + UASymbol("day_ua")) - ^ (year + "-(" + monthDay + ")" + UASymbol("month_day_ua")) - ^ (season("ssn") + UASymbol("season_ua")) -) - -partialUncertainOrApproximate = IUABase ^ ("(" + IUABase + ")" + UASymbol("all_ua")) -PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) - -dateWithInternalUncertainty = partialUncertainOrApproximate ^ partialUnspecified - -qualifyingString = Regex(r"\S") # any nonwhitespace char - -# (* ** SeasonQualified ** *) -seasonQualifier = qualifyingString -seasonQualified = season + "^" + seasonQualifier - -# (* ** Long Year - Scientific Form ** *) -positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) -longYearScientific = ( - "Y" - + Combine(Optional("-") + positiveInteger)("base") - + "E" - + positiveInteger("exponent") - + Optional("S" + positiveInteger("precision")) -) -ExponentialYear.set_parser(longYearScientific) - -# (* ** level2Interval ** *) -level2Interval = ( - (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) - ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) - ^ ( - dateWithInternalUncertainty("lower") - + "/" - + dateWithInternalUncertainty("upper") - ) -) -Level2Interval.set_parser(level2Interval) - -# (* ** Masked precision ** *) eliminated in latest specs -# maskedPrecision = Combine(digit + digit + ((digit + "x") ^ "xx"))("year") -# MaskedPrecision.set_parser(maskedPrecision) - -# (* ** Inclusive list and choice list** *) -consecutives = ( - (yearMonthDay("lower") + ".." + yearMonthDay("upper")) - ^ (yearMonth("lower") + ".." + yearMonth("upper")) - ^ (year("lower") + ".." + year("upper")) -) -Consecutives.set_parser(consecutives) - -listElement = ( - date - ^ dateWithInternalUncertainty - ^ uncertainOrApproxDate - ^ unspecified - ^ consecutives -) - -earlier = ".." + date("upper") -EarlierConsecutives.set_parser(earlier) -later = date("lower") + ".." -LaterConsecutives.set_parser(later) - -listContent = ( - (earlier + ZeroOrMore("," + listElement)) - ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) - ^ (listElement + OneOrMore("," + listElement)) - ^ consecutives -) - -choiceList = "[" + listContent + "]" -OneOfASet.set_parser(choiceList) - -inclusiveList = "{" + listContent + "}" -MultipleDates.set_parser(inclusiveList) - -level2Expression = ( - partialUncertainOrApproximate - ^ partialUnspecified - ^ choiceList - ^ inclusiveList - ^ level2Interval - ^ longYearScientific - ^ seasonQualified -) - -# putting it all together -edtfParser = ( - level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") -) - - -def parse_edtf(str, parseAll=True, fail_silently=False): - try: - if not str: - raise ParseException("You must supply some input text") - p = edtfParser.parseString(str.strip(), parseAll) - if p: - return p[0] - except ParseException as err: - if fail_silently: - return None - raise EDTFParseException(err) from err diff --git a/edtf/parser/parser_classes_tests.py b/edtf/parser/parser_classes_tests.py deleted file mode 100644 index 857d0f6..0000000 --- a/edtf/parser/parser_classes_tests.py +++ /dev/null @@ -1,834 +0,0 @@ -# ruff: noqa: S101 # Asserts are ok in tests - -import calendar -import re -from datetime import date, datetime -from operator import add, sub -from time import struct_time - -from dateutil.relativedelta import relativedelta - -from edtf import appsettings -from edtf.convert import ( - TIME_EMPTY_EXTRAS, - TIME_EMPTY_TIME, - dt_to_struct_time, - trim_struct_time, -) - -EARLIEST = "earliest" -LATEST = "latest" - -PRECISION_MILLENIUM = "millenium" -PRECISION_CENTURY = "century" -PRECISION_DECADE = "decade" -PRECISION_YEAR = "year" -PRECISION_MONTH = "month" -PRECISION_SEASON = "season" -PRECISION_DAY = "day" - - -def days_in_month(year, month): - """ - Return the number of days in the given year and month, where month is - 1=January to 12=December, and respecting leap years as identified by - `calendar.isleap()` - """ - return { - 1: 31, - 2: 29 if calendar.isleap(year) else 28, - 3: 31, - 4: 30, - 5: 31, - 6: 30, - 7: 31, - 8: 31, - 9: 30, - 10: 31, - 11: 30, - 12: 31, - }[month] - - -def apply_delta(op, time_struct, delta): - """ - Apply a `relativedelta` to a `struct_time` data structure. - - `op` is an operator function, probably always `add` or `sub`tract to - correspond to `a_date + a_delta` and `a_date - a_delta`. - - This function is required because we cannot use standard `datetime` module - objects for conversion when the date/time is, or will become, outside the - boundary years 1 AD to 9999 AD. - """ - if not delta: - return time_struct # No work to do - - try: - dt_result = op(datetime(*time_struct[:6]), delta) - return dt_to_struct_time(dt_result) - except (OverflowError, ValueError): - # Year is not within supported 1 to 9999 AD range - pass - - # Here we fake the year to one in the acceptable range to avoid having to - # write our own date rolling logic - - # Adjust the year to be close to the 2000 millenium in 1,000 year - # increments to try and retain accurate relative leap years - actual_year = time_struct.tm_year - millenium = int(float(actual_year) / 1000) - millenium_diff = (2 - millenium) * 1000 - adjusted_year = actual_year + millenium_diff - # Apply delta to the date/time with adjusted year - dt = datetime(*(adjusted_year,) + time_struct[1:6]) - dt_result = op(dt, delta) - # Convert result year back to its original millenium - final_year = dt_result.year - millenium_diff - return struct_time( - (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS) - ) - - -class EDTFObject: - """ - Object to attact to a parser to become instantiated when the parser - completes. - """ - - parser = None - - @classmethod - def set_parser(cls, p): - cls.parser = p - p.addParseAction(cls.parse_action) - - @classmethod - def parse_action(cls, toks): - kwargs = toks.asDict() - try: - return cls(**kwargs) # replace the token list with the class - except Exception as e: - print(f"trying to {cls.__name__}.__init__(**{kwargs})") - raise e - - @classmethod - def parse(cls, s): - return cls.parser.parseString(s)[0] - - def __repr__(self): - return f"{type(self).__name__}: '{str(self)}'" - - def __init__(self, *args, **kwargs): - str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" - raise NotImplementedError(f"{str} is not implemented.") - - def __str__(self): - raise NotImplementedError - - def _strict_date(self, lean): - raise NotImplementedError - - def lower_strict(self): - return self._strict_date(lean=EARLIEST) - - def upper_strict(self): - return self._strict_date(lean=LATEST) - - def _get_fuzzy_padding(self, lean): - """ - Subclasses should override this to pad based on how precise they are. - """ - return relativedelta(0) - - def get_is_approximate(self): - return getattr(self, "_is_approximate", False) - - def set_is_approximate(self, val): - self._is_approximate = val - - is_approximate = property(get_is_approximate, set_is_approximate) - - def get_is_uncertain(self): - return getattr(self, "_is_uncertain", False) - - def set_is_uncertain(self, val): - self._is_uncertain = val - - is_uncertain = property(get_is_uncertain, set_is_uncertain) - - def get_is_uncertain_and_approximate(self): - return getattr(self, "_uncertain_and_approximate", False) - - def set_is_uncertain_and_approximate(self, val): - self._uncertain_and_approximate = val - - is_uncertain_and_approximate = property( - get_is_uncertain_and_approximate, set_is_uncertain_and_approximate - ) - - def lower_fuzzy(self): - strict_val = self.lower_strict() - return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - - def upper_fuzzy(self): - strict_val = self.upper_strict() - return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) - - def __eq__(self, other): - if isinstance(other, EDTFObject): - return str(self) == str(other) - elif isinstance(other, date): - return str(self) == other.isoformat() - elif isinstance(other, struct_time): - return self._strict_date() == trim_struct_time(other) - return False - - def __ne__(self, other): - if isinstance(other, EDTFObject): - return str(self) != str(other) - elif isinstance(other, date): - return str(self) != other.isoformat() - elif isinstance(other, struct_time): - return self._strict_date() != trim_struct_time(other) - return True - - def __gt__(self, other): - if isinstance(other, EDTFObject): - return self.lower_strict() > other.lower_strict() - elif isinstance(other, date): - return self.lower_strict() > dt_to_struct_time(other) - elif isinstance(other, struct_time): - return self.lower_strict() > trim_struct_time(other) - raise TypeError( - f"can't compare {type(self).__name__} with {type(other).__name__}" - ) - - def __ge__(self, other): - if isinstance(other, EDTFObject): - return self.lower_strict() >= other.lower_strict() - elif isinstance(other, date): - return self.lower_strict() >= dt_to_struct_time(other) - elif isinstance(other, struct_time): - return self.lower_strict() >= trim_struct_time(other) - raise TypeError( - f"can't compare {type(self).__name__} with {type(other).__name__}" - ) - - def __lt__(self, other): - if isinstance(other, EDTFObject): - return self.lower_strict() < other.lower_strict() - elif isinstance(other, date): - return self.lower_strict() < dt_to_struct_time(other) - elif isinstance(other, struct_time): - return self.lower_strict() < trim_struct_time(other) - raise TypeError( - f"can't compare {type(self).__name__} with {type(other).__name__}" - ) - - def __le__(self, other): - if isinstance(other, EDTFObject): - return self.lower_strict() <= other.lower_strict() - elif isinstance(other, date): - return self.lower_strict() <= dt_to_struct_time(other) - elif isinstance(other, struct_time): - return self.lower_strict() <= trim_struct_time(other) - raise TypeError( - f"can't compare {type(self).__name__} with {type(other).__name__}" - ) - - -# (* ************************** Level 0 *************************** *) - - -class Date(EDTFObject): - def set_year(self, y): - if y is None: - raise AttributeError("Year must not be None") - self._year = y - - def get_year(self): - return self._year - - year = property(get_year, set_year) - - def set_month(self, m): - self._month = m - if m is None: - self.day = None - - def get_month(self): - return self._month - - month = property(get_month, set_month) - - def __init__(self, year=None, month=None, day=None, **kwargs): - for param in ("date", "lower", "upper"): - if param in kwargs: - self.__init__(**kwargs[param]) - return - - self.year = year # Year is required, but sometimes passed in as a 'date' dict. - self.month = month - self.day = day - - def __str__(self): - r = self.year - if self.month: - r += f"-{self.month}" - if self.day: - r += f"-{self.day}" - return r - - def isoformat(self, default=date.max): - return "%s-%02d-%02d" % ( - self.year, - int(self.month or default.month), - int(self.day or default.day), - ) - - def _precise_year(self, lean): - # Replace any ambiguous characters in the year string with 0s or 9s - if lean == EARLIEST: - return int(re.sub(r"X", r"0", self.year)) - else: - return int(re.sub(r"X", r"9", self.year)) - - def _precise_month(self, lean): - if self.month and self.month != "XX": - try: - return int(self.month) - except ValueError as err: - raise ValueError( - f"Couldn't convert {self.month} to int (in {self})" - ) from err - else: - return 1 if lean == EARLIEST else 12 - - def _precise_day(self, lean): - if not self.day or self.day == "XX": - if lean == EARLIEST: - return 1 - else: - return days_in_month( - self._precise_year(LATEST), self._precise_month(LATEST) - ) - else: - return int(self.day) - - def _strict_date(self, lean): - """ - Return a `time.struct_time` representation of the date. - """ - return struct_time( - ( - self._precise_year(lean), - self._precise_month(lean), - self._precise_day(lean), - ) - + tuple(TIME_EMPTY_TIME) - + tuple(TIME_EMPTY_EXTRAS) - ) - - @property - def precision(self): - if self.day: - return PRECISION_DAY - if self.month: - return PRECISION_MONTH - return PRECISION_YEAR - - -class DateAndTime(EDTFObject): - def __init__(self, date, time): - self.date = date - self.time = time - - def __str__(self): - return self.isoformat() - - def isoformat(self): - return self.date.isoformat() + "T" + self.time - - def _strict_date(self, lean): - return self.date._strict_date(lean) - - def __eq__(self, other): - if isinstance(other, datetime): - return self.isoformat() == other.isoformat() - elif isinstance(other, struct_time): - return self._strict_date() == trim_struct_time(other) - return super().__eq__(other) - - def __ne__(self, other): - if isinstance(other, datetime): - return self.isoformat() != other.isoformat() - elif isinstance(other, struct_time): - return self._strict_date() != trim_struct_time(other) - return super().__ne__(other) - - -class Interval(EDTFObject): - def __init__(self, lower, upper): - self.lower = lower - self.upper = upper - - def __str__(self): - return f"{self.lower}/{self.upper}" - - def _strict_date(self, lean): - if lean == EARLIEST: - try: - r = self.lower._strict_date(lean) - if r is None: - raise AttributeError - return r - except ( - AttributeError - ): # it's a string, or no date. Result depends on the upper date - upper = self.upper._strict_date(LATEST) - return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) - else: - try: - r = self.upper._strict_date(lean) - if r is None: - raise AttributeError - return r - except ( - AttributeError - ): # an 'unknown' or 'open' string - depends on the lower date - if self.upper and (self.upper == "open" or self.upper.date == "open"): - return dt_to_struct_time(date.today()) # it's still happening - else: - lower = self.lower._strict_date(EARLIEST) - return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) - - -# (* ************************** Level 1 *************************** *) - - -class UA(EDTFObject): - @classmethod - def parse_action(cls, toks): - args = toks.asList() - return cls(*args) - - def __init__(self, *args): - assert len(args) == 1 - ua = args[0] - - self.is_uncertain = "?" in ua - self.is_approximate = "~" in ua - self.is_uncertain_and_approximate = "%" in ua - - def __str__(self): - d = "" - if self.is_uncertain: - d += "?" - if self.is_approximate: - d += "~" - if self.is_uncertain_and_approximate: - d += "%" - return d - - def _get_multiplier(self): - if self.is_uncertain_and_approximate: - return appsettings.MULTIPLIER_IF_BOTH - elif self.is_uncertain: - return appsettings.MULTIPLIER_IF_UNCERTAIN - elif self.is_approximate: - return appsettings.MULTIPLIER_IF_APPROXIMATE - - -class UncertainOrApproximate(EDTFObject): - def __init__(self, date, ua): - self.date = date - self.ua = ua - - def __str__(self): - if self.ua: - return f"{self.date}{self.ua}" - else: - return str(self.date) - - def _strict_date(self, lean): - if self.date == "open": - return None # depends on the other date - return dt_to_struct_time(date.today()) - if self.date == "unknown": - return None # depends on the other date - return self.date._strict_date(lean) - - def _get_fuzzy_padding(self, lean): - if not self.ua: - return relativedelta(0) - multiplier = self.ua._get_multiplier() - - if self.date.precision == PRECISION_DAY: - return multiplier * appsettings.PADDING_DAY_PRECISION - elif self.date.precision == PRECISION_MONTH: - return multiplier * appsettings.PADDING_MONTH_PRECISION - elif self.date.precision == PRECISION_YEAR: - return multiplier * appsettings.PADDING_YEAR_PRECISION - - -class Testi(EDTFObject): - # @classmethod - # def parse_action(cls, toks): - # args = toks.asList() - # return cls(*args) - - def __init__(self, **args): - print(args) - - -class UnspecifiedIntervalSection(EDTFObject): - def __init__(self, sectionOpen=False, other_section_element=None): - if sectionOpen: - self.is_open = True - self.is_unknown = False - else: - self.is_open = False - self.is_unknown = True - self.other = other_section_element - - def __str__(self): - if self.is_unknown: - return "" - else: - return ".." - - def _strict_date(self, lean): - if lean == EARLIEST: - if self.is_unknown: - upper = self.other._strict_date(LATEST) - return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) - else: - return dt_to_struct_time( - date.min - ) # from the beginning of time; *ahem, i mean python datetime - else: - if self.is_unknown: - lower = self.other._strict_date(EARLIEST) - return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) - else: - return dt_to_struct_time(date.max) # to then end of python datetime - - -class Unspecified(Date): - pass - - -class Level1Interval(Interval): - def __init__(self, lower=None, upper=None): - if lower: - if lower["date"] == "..": - self.lower = UnspecifiedIntervalSection( - True, UncertainOrApproximate(**upper) - ) - else: - self.lower = UncertainOrApproximate(**lower) - else: - self.lower = UnspecifiedIntervalSection( - False, UncertainOrApproximate(**upper) - ) - if upper: - if upper["date"] == "..": - self.upper = UnspecifiedIntervalSection( - True, UncertainOrApproximate(**lower) - ) - else: - self.upper = UncertainOrApproximate(**upper) - else: - self.upper = UnspecifiedIntervalSection( - False, UncertainOrApproximate(**lower) - ) - - def _get_fuzzy_padding(self, lean): - if lean == EARLIEST: - return self.lower._get_fuzzy_padding(lean) - elif lean == LATEST: - return self.upper._get_fuzzy_padding(lean) - - -class LongYear(EDTFObject): - def __init__(self, year): - self.year = year - - def __str__(self): - return f"Y{self.year}" - - def _precise_year(self): - return int(self.year) - - def _strict_date(self, lean): - py = self._precise_year() - if lean == EARLIEST: - return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - else: - return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - - -class Season(Date): - def __init__(self, year, season, **kwargs): - self.year = year - self.season = season # use season to look up month - # day isn't part of the 'season' spec, but it helps the inherited - # `Date` methods do their thing. - self.day = None - - def __str__(self): - return f"{self.year}-{self.season}" - - def _precise_month(self, lean): - rng = appsettings.SEASON_MONTHS_RANGE[int(self.season)] - if lean == EARLIEST: - return rng[0] - else: - return rng[1] - - -# (* ************************** Level 2 *************************** *) - - -class PartialUncertainOrApproximate(Date): - def set_year(self, y): # Year can be None. - self._year = y - - year = property(Date.get_year, set_year) - - def __init__( - self, - year=None, - month=None, - day=None, - year_ua=False, - month_ua=False, - day_ua=False, - year_month_ua=False, - month_day_ua=False, - ssn=None, - season_ua=False, - all_ua=False, - ): - self.year = year - self.month = month - self.day = day - - self.year_ua = year_ua - self.month_ua = month_ua - self.day_ua = day_ua - - self.year_month_ua = year_month_ua - self.month_day_ua = month_day_ua - - self.season = ssn - self.season_ua = season_ua - - self.all_ua = all_ua - - def __str__(self): - if self.season_ua: - return f"{self.season}{self.season_ua}" - - y = f"{self.year}{self.year_ua}" if self.year_ua else str(self.year) - - m = f"({self.month}){self.month_ua}" if self.month_ua else str(self.month) - - if self.day: - d = f"({self.day}){self.day_ua}" if self.day_ua else str(self.day) - else: - d = None - - if self.year_month_ua: # year/month approximate. No brackets needed. - ym = f"{y}-{m}{self.year_month_ua}" - result = f"{ym}-{d}" if d else ym - elif self.month_day_ua: - if self.year_ua: # we don't need the brackets round month and day - result = f"{y}-{m}-{d}{self.month_day_ua}" - else: - result = f"{y}-({m}-{d}){self.month_day_ua}" - else: - result = f"{y}-{m}-{d}" if d else f"{y}-{m}" - - if self.all_ua: - result = f"({result}){self.all_ua}" - - return result - - def _precise_year(self, lean): - if self.season: - return self.season._precise_year(lean) - return super()._precise_year(lean) - - def _precise_month(self, lean): - if self.season: - return self.season._precise_month(lean) - return super()._precise_month(lean) - - def _precise_day(self, lean): - if self.season: - return self.season._precise_day(lean) - return super()._precise_day(lean) - - def _get_fuzzy_padding(self, lean): - """ - This is not a perfect interpretation as fuzziness is introduced for - redundant uncertainly modifiers e.g. (2006~)~ will get two sets of - fuzziness. - """ - result = relativedelta(0) - - if self.year_ua: - result += ( - appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() - ) - if self.month_ua: - result += ( - appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() - ) - if self.day_ua: - result += appsettings.PADDING_DAY_PRECISION * self.day_ua._get_multiplier() - - if self.year_month_ua: - result += ( - appsettings.PADDING_YEAR_PRECISION - * self.year_month_ua._get_multiplier() - ) - result += ( - appsettings.PADDING_MONTH_PRECISION - * self.year_month_ua._get_multiplier() - ) - if self.month_day_ua: - result += ( - appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() - ) - result += ( - appsettings.PADDING_MONTH_PRECISION - * self.month_day_ua._get_multiplier() - ) - - if self.season_ua: - result += ( - appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() - ) - - if self.all_ua: - multiplier = self.all_ua._get_multiplier() - - if self.precision == PRECISION_DAY: - result += multiplier * appsettings.PADDING_DAY_PRECISION - result += multiplier * appsettings.PADDING_MONTH_PRECISION - result += multiplier * appsettings.PADDING_YEAR_PRECISION - elif self.precision == PRECISION_MONTH: - result += multiplier * appsettings.PADDING_MONTH_PRECISION - result += multiplier * appsettings.PADDING_YEAR_PRECISION - elif self.precision == PRECISION_YEAR: - result += multiplier * appsettings.PADDING_YEAR_PRECISION - - return result - - -class PartialUnspecified(Unspecified): - pass - - -class Consecutives(Interval): - # Treating Consecutive ranges as intervals where one bound is optional - def __init__(self, lower=None, upper=None): - if lower and not isinstance(lower, EDTFObject): - self.lower = Date.parse(lower) - else: - self.lower = lower - - if upper and not isinstance(upper, EDTFObject): - self.upper = Date.parse(upper) - else: - self.upper = upper - - def __str__(self): - return "{}..{}".format(self.lower or "", self.upper or "") - - -class EarlierConsecutives(Consecutives): - pass - - -class LaterConsecutives(Consecutives): - pass - - -class OneOfASet(EDTFObject): - @classmethod - def parse_action(cls, toks): - args = [t for t in toks.asList() if isinstance(t, EDTFObject)] - return cls(*args) - - def __init__(self, *args): - self.objects = args - - def __str__(self): - return "[{}]".format(", ".join([str(o) for o in self.objects])) - - def _strict_date(self, lean): - if lean == LATEST: - return max([x._strict_date(lean) for x in self.objects]) - else: - return min([x._strict_date(lean) for x in self.objects]) - - -class MultipleDates(EDTFObject): - @classmethod - def parse_action(cls, toks): - args = [t for t in toks.asList() if isinstance(t, EDTFObject)] - return cls(*args) - - def __init__(self, *args): - self.objects = args - - def __str__(self): - return "{{{}}}".format(", ".join([str(o) for o in self.objects])) - - def _strict_date(self, lean): - if lean == LATEST: - return max([x._strict_date(lean) for x in self.objects]) - else: - return min([x._strict_date(lean) for x in self.objects]) - - -class MaskedPrecision(Date): - pass - - -class Level2Interval(Level1Interval): - def __init__(self, lower, upper): - # Check whether incoming lower/upper values are single-item lists, and - # if so take just the first item. This works around what I *think* is a - # bug in the grammer that provides us with single-item lists of - # `PartialUncertainOrApproximate` items for lower/upper values. - if isinstance(lower, (tuple, list)) and len(lower) == 1: - self.lower = lower[0] - else: - self.lower = lower - if isinstance(lower, (tuple, list)) and len(upper) == 1: - self.upper = upper[0] - else: - self.upper = upper - - -class ExponentialYear(LongYear): - def __init__(self, base, exponent, precision=None): - self.base = base - self.exponent = exponent - self.precision = precision - - def _precise_year(self): - return int(self.base) * 10 ** int(self.exponent) - - def get_year(self): - if self.precision: - return f"{self.base}E{self.exponent}S{self.precision}" - else: - return f"{self.base}E{self.exponent}" - - year = property(get_year) diff --git a/vagrant wheel install problems.txt b/vagrant wheel install problems.txt deleted file mode 100644 index 174f67e..0000000 --- a/vagrant wheel install problems.txt +++ /dev/null @@ -1,5 +0,0 @@ -vagrant wheel install problems -https://stackoverflow.com/questions/56851961/how-to-fix-no-such-file-or-directory-error-in-setuptools-wheel-py157-convert - -from that link: -So it turns out that this problem was being caused by lag in Vagrant/Virtualbox's synced folders. I was trying to build the Python project inside a Vagrant VM shared from the host file system using a synced folder. Copying the project out of the synced folder into another folder in the VM allows it to build. Another dirty hack that worked was to add a time.sleep(1) in the setuptools/wheel.py source file on line 157 before the os.rename that was causing the OS Exception to be raised. This gives the file system a chance to sync, and therefore works around the issue. \ No newline at end of file From 9f2b55066beb18de154f6ca5d62b7f0474e37740 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 22 May 2024 18:37:48 -0400 Subject: [PATCH 052/102] Fix matrix --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be0326d..34a2001 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,8 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.12"] - # python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] defaults: run: working-directory: . From 0eeb9bdce49f6f50c4c968a60627c8d50d1e174c Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 22 May 2024 18:52:47 -0400 Subject: [PATCH 053/102] Try adding permissions --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34a2001..fb06083 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,11 @@ on: workflow_dispatch: pull_request: +permissions: + contents: write + pull-requests: write + + jobs: python-unit: runs-on: ubuntu-latest From 7a02fcd8403277f31ea7a0e3e742a68b0733e704 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 09:43:40 +1000 Subject: [PATCH 054/102] Try adding checks: permission too --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb06083..22590c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,6 +5,7 @@ on: pull_request: permissions: + checks: write contents: write pull-requests: write From 54d5ec7cee810cc87ef42d94a94c269abe11a157 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 09:46:38 +1000 Subject: [PATCH 055/102] Run CI on push too --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 22590c3..9cbb841 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,8 +1,9 @@ name: CI on: - workflow_dispatch: pull_request: + push: + workflow_dispatch: permissions: checks: write From 0b92096dacc2d2047346ad6e59e5fc829fd1250a Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 09:54:48 +1000 Subject: [PATCH 056/102] Add id to Coverage Comment step --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9cbb841..07420a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,6 +60,7 @@ jobs: coverage xml -o coverage_combined.xml --omit="edtf_django_tests/*" - name: Pytest coverage comment + id: coverageComment uses: MishaKav/pytest-coverage-comment@main with: pytest-xml-coverage-path: ./coverage_combined.xml From 642819638506ae1c1ad06527c94706a1ccbfebe5 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 10:14:01 +1000 Subject: [PATCH 057/102] Remove html from printed output --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 07420a0..1618350 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,7 +71,6 @@ jobs: run: | echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" - echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" From b8fdbef35aac415ee35251e561afdf33f3dc2a80 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 10:22:20 +1000 Subject: [PATCH 058/102] Remove not-success from printed output --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1618350..767ebf0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,6 +71,7 @@ jobs: run: | echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" + # echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" @@ -78,4 +79,4 @@ jobs: echo "Coverage Skipped - ${{ steps.coverageComment.outputs.skipped }}" echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" - echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" + # echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" From f262199f120b49d13c48e4110c4d56929b9d99fb Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 10:28:44 +1000 Subject: [PATCH 059/102] Remove all junit-xml items from printed report (for now at least..) --- .github/workflows/ci.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 767ebf0..d5416ed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,12 +71,4 @@ jobs: run: | echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" - # echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" - echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" - echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" - echo "Coverage Failures - ${{ steps.coverageComment.outputs.failures }}" - echo "Coverage Skipped - ${{ steps.coverageComment.outputs.skipped }}" - echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" - echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" - # echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" From 5d3d80c1c7c5c550808557c8e7dfb4b7682351b0 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 11:25:54 -0400 Subject: [PATCH 060/102] Add back JUnit reporting stats - Switch to using pytest-django to run the Django tests, as that has JUnit support. Add Django settings as a flag rather than in pyproject.toml because defining it there makes the normal pytest run fail since it can't find the module. - Adds a simple script using junitparser to combine the two JUnit XML files. --- .github/workflows/ci.yml | 17 +++++++++++++++-- .gitignore | 4 ++++ combine_junit.py | 23 +++++++++++++++++++++++ pyproject.toml | 6 ++++-- 4 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 combine_junit.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d5416ed..cb9d7f3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,13 +44,13 @@ jobs: - name: Run unit tests run: | - pytest + pytest --junitxml=junit_pytest_main.xml mv .coverage .coverage_main - name: Run Django integration tests working-directory: ./edtf_django_tests run: | - coverage run manage.py test edtf_integration + pytest edtf_integration/tests.py --ds=edtf_django_tests.settings --junitxml=../junit_pytest_django.xml mv .coverage ../.coverage_django - name: Combine coverage reports @@ -59,11 +59,16 @@ jobs: coverage report --omit="edtf_django_tests/*" coverage xml -o coverage_combined.xml --omit="edtf_django_tests/*" + - name: Combine JUnit XML reports + run: | + python combine_junit.py combined_junit_pytest.xml junit_pytest_main.xml junit_pytest_django.xml + - name: Pytest coverage comment id: coverageComment uses: MishaKav/pytest-coverage-comment@main with: pytest-xml-coverage-path: ./coverage_combined.xml + junitxml-path: ./combined_junit_pytest.xml unique-id-for-comment: ${{ matrix.python-version }} github-token: ${{ secrets.GITHUB_TOKEN }} @@ -71,4 +76,12 @@ jobs: run: | echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" + echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" + echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" + echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" + echo "Coverage Failures - ${{ steps.coverageComment.outputs.failures }}" + echo "Coverage Skipped - ${{ steps.coverageComment.outputs.skipped }}" + echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" + echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" + echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" diff --git a/.gitignore b/.gitignore index 182cf8b..36df893 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,10 @@ coverage_combined.xml .coverage_main .coverage_django *,cover +combined_junit_pytest.xml +pytest.xml +junit_pytest_main.xml +junit_pytest_django.xml # Translations *.mo diff --git a/combine_junit.py b/combine_junit.py new file mode 100644 index 0000000..5e3a05b --- /dev/null +++ b/combine_junit.py @@ -0,0 +1,23 @@ +import sys + +from junitparser import JUnitXml + + +def combine_junit_xml(output_file, *input_files): + combined_xml = JUnitXml() + for input_file in input_files: + xml = JUnitXml.fromfile(input_file) + combined_xml.extend(xml) + combined_xml.write(output_file) + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print( + "Usage: python combine_junit_xml.py ... " + ) + sys.exit(1) + + output_file = sys.argv[1] + input_files = sys.argv[2:] + combine_junit_xml(output_file, *input_files) diff --git a/pyproject.toml b/pyproject.toml index 8dea9fd..64579ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,10 +38,12 @@ classifiers = [ test = [ "django>=4.2,<5.0", "pytest", + "pytest-django", "ruff", "pre-commit", "coverage", - "pytest-cov" + "pytest-cov", + "junitparser", ] [project.urls] @@ -79,7 +81,7 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] -addopts = "--ignore=edtf_django_tests/ --cov=edtf --cov-report=xml" +addopts = "--ignore=edtf_django_tests/ --cov=edtf" plugins = ["pytest_cov"] [tool.coverage.run] From 6771172126ca4e9fb84beb7b4d7f60724bf3434b Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 11:48:38 -0400 Subject: [PATCH 061/102] Skip covered due to long comments ``` File read successfully "/home/runner/work/python-edtf/python-edtf/./combined_junit_pytest.xml" Warning: Your comment is too long (maximum is 65536 characters), coverage report will not be added. Warning: Try add: "--cov-report=term-missing:skip-covered", or add "hide-report: true", or add "report-only-changed-files: true", or switch to "multiple-files" mode ``` --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb9d7f3..09b9c65 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,13 +44,13 @@ jobs: - name: Run unit tests run: | - pytest --junitxml=junit_pytest_main.xml + pytest --junitxml=junit_pytest_main.xml --cov-report=term-missing:skip-covered mv .coverage .coverage_main - name: Run Django integration tests working-directory: ./edtf_django_tests run: | - pytest edtf_integration/tests.py --ds=edtf_django_tests.settings --junitxml=../junit_pytest_django.xml + pytest edtf_integration/tests.py --ds=edtf_django_tests.settings --junitxml=../junit_pytest_django.xml --cov-report=term-missing:skip-covered mv .coverage ../.coverage_django - name: Combine coverage reports From 7e15e8909b528b5e5979a52f47c9ba692d041030 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Fri, 24 May 2024 11:28:07 +1000 Subject: [PATCH 062/102] Summary report comes with its own quotes #53 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09b9c65..95c29c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -77,7 +77,7 @@ jobs: echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" - echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" + echo "Summary Report -" ${{ steps.coverageComment.outputs.summaryReport }} echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" echo "Coverage Failures - ${{ steps.coverageComment.outputs.failures }}" From 7556479586ab46ea8641ea74d827ffeba0f0c063 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 12:03:48 -0400 Subject: [PATCH 063/102] Finish removing masked precision --- README.md | 6 ------ edtf/parser/grammar.py | 4 ---- edtf/parser/parser_classes.py | 4 ---- 3 files changed, 14 deletions(-) diff --git a/README.md b/README.md index c4f172e..fc9fe75 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,6 @@ The object returned by `parse_edtf()` is an instance of an `edtf.parser.parser_c PartialUnspecified OneOfASet MultipleDates - MaskedPrecision Level2Interval Level2Season ExponentialYear @@ -158,11 +157,6 @@ Test coverage includes every example given in the spec table of features. >>> parse_edtf('{1667,1668, 1670..1672}') MultipleDates: '{1667, 1668, 1670..1672}' -* Masked precision: - - >>> parse_edtf('197x') # A date in the 1970s. - MaskedPrecision: '197x' - * Level 2 Extended intervals: >>> parse_edtf('2004-06-(01)~/2004-06-(20)~') diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 730f47d..0eb2e9c 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -260,10 +260,6 @@ def f(toks): ) Level2Interval.set_parser(level2Interval) -# (* ** Masked precision ** *) eliminated in latest specs -# maskedPrecision = Combine(digit + digit + ((digit + "x") ^ "xx"))("year") -# MaskedPrecision.set_parser(maskedPrecision) - # (* ** Inclusive list and choice list** *) consecutives = ( (yearMonthDay("lower") + ".." + yearMonthDay("upper")) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index bb9a213..b31ffeb 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -806,10 +806,6 @@ def _strict_date(self, lean): return min([x._strict_date(lean) for x in self.objects]) -class MaskedPrecision(Date): - pass - - class Level2Interval(Level1Interval): def __init__(self, lower, upper): # Check whether incoming lower/upper values are single-item lists, and From 3ce6e875f79d496e61f9627d5e6887853d668db6 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 12:28:04 -0400 Subject: [PATCH 064/102] Add all required tests for significant digits Significant digits should work on a year in any format: "four-digit, 'Y' prefix, or exponential." These correspond to the python-edtf classes of Date, LongYear, and ExponentialYear. --- edtf/parser/tests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 8d9a770..6e0a8a1 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -193,8 +193,14 @@ # the year -170000000 ("Y-17E7", ("-170000000-01-01", "-170000000-12-31")), # L2 significant digits + # Some year between 1900 and 1999, estimated to be 1950 + ("1950S2", ("1900-01-01", "1999-12-31")), # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) - # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), + ("Y17101E4S3", ("171000000-01-01", "171999999-12-31")), + # Some year between 338000 and 338999, estimated to be 338800 + ("Y3388E2S3", ("338000-01-01", "338999-12-31")), + # some year between 171000000 and 171999999 estimated to be 171010000 + ("Y171010000S3", ("171010000-01-01", "171999999-12-31")), # L2 Seasons # Spring southern hemisphere, 2001 ("2001-29", ("2001-09-01", "2001-11-30")), From 7545b6a56127bea3459ead8dbc5e28f502014dd9 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 13:37:49 -0400 Subject: [PATCH 065/102] Parse significant digits for year, Y-prefixed, exponential - Add significant digit parsing for Date (year) and LongYear (y-prefixed) - Standardize grammar for significant digits - Use significant_digits rather than precision. Precision is used throughout for other functionality. - Add estimated() public functions for the above EDTF classes --- edtf/parser/grammar.py | 21 ++++++++++++++------- edtf/parser/parser_classes.py | 32 +++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 0eb2e9c..15947d0 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -48,8 +48,9 @@ oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) -positiveDigit = Word(nums, exact=1, excludeChars="0") digit = Word(nums, exact=1) +positiveDigit = Word(nums, exact=1, excludeChars="0") +positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) second = zeroThru59 minute = zeroThru59 @@ -63,13 +64,16 @@ ^ (L("02")("month") + "-" + oneThru29("day")) ) +# Significant digits suffix +significantDigits = "S" + Word(nums)("significant_digits") + # 4 digits, 0 to 9 positiveYear = Word(nums, exact=4) # Negative version of positive year, but "-0000" is illegal negativeYear = NotAny(L("-0000")) + ("-" + positiveYear) -year = Combine(positiveYear ^ negativeYear)("year") +year = Combine(positiveYear ^ negativeYear)("year") + Optional(significantDigits) yearMonth = year + "-" + month yearMonthDay = year + "-" + monthDay # o hai iso date @@ -112,9 +116,13 @@ # (* *** Long Year - Simple Form *** *) -longYearSimple = "Y" + Combine( - Optional("-") + positiveDigit + digit + digit + digit + OneOrMore(digit) -)("year") +longYearSimple = ( + "Y" + + Combine(Optional("-") + positiveDigit + digit + digit + digit + OneOrMore(digit))( + "year" + ) + + Optional(significantDigits) +) LongYear.set_parser(longYearSimple) # (* *** L1Interval *** *) @@ -238,13 +246,12 @@ def f(toks): seasonQualified = season + "^" + seasonQualifier # (* ** Long Year - Scientific Form ** *) -positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) longYearScientific = ( "Y" + Combine(Optional("-") + positiveInteger)("base") + "E" + positiveInteger("exponent") - + Optional("S" + positiveInteger("precision")) + + Optional(significantDigits) ) ExponentialYear.set_parser(longYearScientific) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index b31ffeb..09140b6 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -261,7 +261,9 @@ def get_month(self): month = property(get_month, set_month) - def __init__(self, year=None, month=None, day=None, **kwargs): + def __init__( + self, year=None, month=None, day=None, significant_digits=None, **kwargs + ): for param in ("date", "lower", "upper"): if param in kwargs: self.__init__(**kwargs[param]) @@ -270,6 +272,7 @@ def __init__(self, year=None, month=None, day=None, **kwargs): self.year = year # Year is required, but sometimes passed in as a 'date' dict. self.month = month self.day = day + self.significant_digits = significant_digits def __str__(self): r = self.year @@ -277,6 +280,8 @@ def __str__(self): r += f"-{self.month}" if self.day: r += f"-{self.day}" + if self.significant_digits: + r += f"S{self.significant_digits}" return r def isoformat(self, default=date.max): @@ -337,6 +342,9 @@ def precision(self): return PRECISION_MONTH return PRECISION_YEAR + def estimated(self): + return self._precise_year(EARLIEST) + class DateAndTime(EDTFObject): def __init__(self, date, time): @@ -537,11 +545,15 @@ def _get_fuzzy_padding(self, lean): class LongYear(EDTFObject): - def __init__(self, year): + def __init__(self, year, significant_digits=None): self.year = year + self.significant_digits = significant_digits def __str__(self): - return f"Y{self.year}" + if self.significant_digits: + return f"Y{self.year}S{self.significant_digits}" + else: + return f"Y{self.year}" def _precise_year(self): return int(self.year) @@ -553,6 +565,9 @@ def _strict_date(self, lean): else: return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + def estimated(self): + return self._precise_year() + class Season(Date): def __init__(self, year, season, **kwargs): @@ -827,18 +842,21 @@ class Level2Season(Season): class ExponentialYear(LongYear): - def __init__(self, base, exponent, precision=None): + def __init__(self, base, exponent, significant_digits=None): self.base = base self.exponent = exponent - self.precision = precision + self.significant_digits = significant_digits def _precise_year(self): return int(self.base) * 10 ** int(self.exponent) def get_year(self): - if self.precision: - return f"{self.base}E{self.exponent}S{self.precision}" + if self.significant_digits: + return f"{self.base}E{self.exponent}S{self.significant_digits}" else: return f"{self.base}E{self.exponent}" year = property(get_year) + + def estimated(self): + return self._precise_year() From 6b3a9d46d10fddd1941b900c610fdcee579b97fe Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 15:03:32 -0400 Subject: [PATCH 066/102] Fix a regression with Consecutives / OneOfASet Two tests were failing: ``` FAILED edtf/parser/tests.py::test_edtf_examples[[1667, 1668, 1670..1672]-expected_tuple62] - AttributeError: 'list' object has no attribute 'expandtabs' FAILED edtf/parser/tests.py::test_edtf_examples[{1667,1668, 1670..1672}-expected_tuple67] - AttributeError: 'list' object has no attribute 'expandtabs' ``` pyparsing.parse_string() was being passed a list by year somehow. Added year_basic for this use case (4 digit year without significant digits). If we need to support Consecutives with significant digits then this isn't a sufficient workaround. --- edtf/parser/grammar.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 15947d0..e6232c4 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -74,6 +74,8 @@ negativeYear = NotAny(L("-0000")) + ("-" + positiveYear) year = Combine(positiveYear ^ negativeYear)("year") + Optional(significantDigits) +# simple version for Consecutives +year_basic = Combine(positiveYear ^ negativeYear)("year") yearMonth = year + "-" + month yearMonthDay = year + "-" + monthDay # o hai iso date @@ -271,7 +273,9 @@ def f(toks): consecutives = ( (yearMonthDay("lower") + ".." + yearMonthDay("upper")) ^ (yearMonth("lower") + ".." + yearMonth("upper")) - ^ (year("lower") + ".." + year("upper")) + ^ ( + year_basic("lower") + ".." + year_basic("upper") + ) # using year_basic because some tests were throwing `'list' object has no attribute 'expandtabs'` - somewhere, pyparsing.parse_string() was being passed a list ) Consecutives.set_parser(consecutives) From 5883f539e7523fbabc7eb5075a1a4d279c2a8333 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Fri, 24 May 2024 15:58:57 -0400 Subject: [PATCH 067/102] Significant digits updates - Adds functionality for significant digits to Date, LongYear, and ExponentialYear - Updates the tests for significant digits - Updates the docs for significant digits and a few other references to old syntax (lowercase e, grouping) - ExponentialYear inherits from LongYear so only need to add it there; LongYear does not inherit from Date, so a bit of code duplication in the _fuzzy() overrides --- README.md | 42 +++++++++++++++---- edtf/parser/parser_classes.py | 78 +++++++++++++++++++++++++++++++++-- edtf/parser/tests.py | 20 ++++++--- 3 files changed, 124 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index fc9fe75..449912c 100644 --- a/README.md +++ b/README.md @@ -138,9 +138,8 @@ Test coverage includes every example given in the spec table of features. * Partial uncertain/approximate: - >>> parse_edtf('(2011)-06-04~') # year certain, month/day approximate. - # Note that the result text is normalized - PartialUncertainOrApproximate: '2011-(06-04)~' + >>> parse_edtf('2004-06~-11') # year certain, month/day approximate. + PartialUncertainOrApproximate: '2004-06~-11' * Partial unspecified: @@ -159,13 +158,42 @@ Test coverage includes every example given in the spec table of features. * Level 2 Extended intervals: - >>> parse_edtf('2004-06-(01)~/2004-06-(20)~') - Level2Interval: '2004-06-(01)~/2004-06-(20)~' + >>> parse_edtf('2004-06-~01/2004-06-~20') + Level2Interval: '2004-06-~01/2004-06-~20' * Year requiring more than 4 digits - exponential form: - >>> parse_edtf('Y-17e7') - ExponentialYear: 'Y-17e7' + >>> e = parse_edtf('Y-17E7') + ExponentialYear: 'Y-17E7' + >>> e.estimated() + -170000000 + +* Significant digits: + # '1950S2': some year between 1900 and 1999, estimated to be 1950 + >>> d = parse_edtf('1950S2') + Date: '1950S2' + >>> d.lower_fuzzy()[:3] + (1900, 1, 1) + >>> d.upper_fuzzy()[:3] + (1999, 12, 31) + # 'Y171010000S3': some year between some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. + >>> l = parse_edtf('Y171010000S3') + LongYear: 'Y171010000S3' + >>> l.estimated() + 171010000 + >>> l.lower_fuzzy()[:3] + (171000000, 1, 1) + >>> l.upper_fuzzy()[:3] + (171999999, 12, 31) + # 'Y3388E2S3': some year in exponential notation between 338000 and 338999, estimated to be 338800 + >>> e = parse_edtf('Y3388E2S3') + ExponentialYear: 'Y3388E2S3S3' + >>> e.estimated() + 338800 + >>> e.lower_fuzzy()[:3] + (338000, 1, 1) + >>> e.upper_fuzzy()[:3] + (338999, 12, 31) ### Natural language representation diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 09140b6..e12ecbd 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -272,7 +272,9 @@ def __init__( self.year = year # Year is required, but sometimes passed in as a 'date' dict. self.month = month self.day = day - self.significant_digits = significant_digits + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) def __str__(self): r = self.year @@ -291,6 +293,36 @@ def isoformat(self, default=date.max): int(self.day or default.day), ) + def lower_fuzzy(self): + if not hasattr(self, "significant_digits") or not self.significant_digits: + return apply_delta( + sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST) + ) + else: + total_digits = len(self.year) + insignificant_digits = total_digits - self.significant_digits + lower_year = ( + int(self.year) + // (10**insignificant_digits) + * (10**insignificant_digits) + ) + return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + def upper_fuzzy(self): + if not hasattr(self, "significant_digits") or not self.significant_digits: + return apply_delta( + add, self.upper_strict(), self._get_fuzzy_padding(LATEST) + ) + else: + total_digits = len(self.year) + insignificant_digits = total_digits - self.significant_digits + upper_year = (int(self.year) // (10**insignificant_digits) + 1) * ( + 10**insignificant_digits + ) - 1 + return struct_time( + [upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS + ) + def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: @@ -547,7 +579,9 @@ def _get_fuzzy_padding(self, lean): class LongYear(EDTFObject): def __init__(self, year, significant_digits=None): self.year = year - self.significant_digits = significant_digits + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) def __str__(self): if self.significant_digits: @@ -568,6 +602,42 @@ def _strict_date(self, lean): def estimated(self): return self._precise_year() + def lower_fuzzy(self): + full_year = self._precise_year() + strict_val = self.lower_strict() + if not self.significant_digits: + return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + else: + insignificant_digits = len(str(full_year)) - int(self.significant_digits) + if insignificant_digits <= 0: + return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + padding_value = 10**insignificant_digits + sig_digits = full_year // padding_value + lower_year = sig_digits * padding_value + return apply_delta( + sub, + struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS), + self._get_fuzzy_padding(EARLIEST), + ) + + def upper_fuzzy(self): + full_year = self._precise_year() + strict_val = self.upper_strict() + if not self.significant_digits: + return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + else: + insignificant_digits = len(str(full_year)) - self.significant_digits + if insignificant_digits <= 0: + return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + padding_value = 10**insignificant_digits + sig_digits = full_year // padding_value + upper_year = (sig_digits + 1) * padding_value - 1 + return apply_delta( + add, + struct_time([upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS), + self._get_fuzzy_padding(LATEST), + ) + class Season(Date): def __init__(self, year, season, **kwargs): @@ -845,7 +915,9 @@ class ExponentialYear(LongYear): def __init__(self, base, exponent, significant_digits=None): self.base = base self.exponent = exponent - self.significant_digits = significant_digits + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) def _precise_year(self): return int(self.base) * 10 ** int(self.exponent) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 6e0a8a1..1ec7452 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -14,8 +14,8 @@ # where the first value is a tuple, the second item is a tuple of the normalised parse result. # # The values in the second tuple indicate the iso versions of the derived Python `date`s. -# - If there's one other value, all the derived dates should be the same. -# - If there're two other values, then all the lower values should be the same +# - If there is one other value, all the derived dates should be the same. +# - If there are two other values, then all the lower values should be the same # and all the upper values should be the same. # - If there are three other values, then the upper and lower ``_strict`` values # should be the first value, and the upper and lower ``_fuzzy`` values should be @@ -194,13 +194,21 @@ ("Y-17E7", ("-170000000-01-01", "-170000000-12-31")), # L2 significant digits # Some year between 1900 and 1999, estimated to be 1950 - ("1950S2", ("1900-01-01", "1999-12-31")), + ("1950S2", ("1950-01-01", "1950-12-31", "1900-01-01", "1999-12-31")), + ("1953S2", ("1953-01-01", "1953-12-31", "1900-01-01", "1999-12-31")), + ("1953S3", ("1953-01-01", "1953-12-31", "1950-01-01", "1959-12-31")), # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) - ("Y17101E4S3", ("171000000-01-01", "171999999-12-31")), + ( + "Y17101E4S3", + ("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"), + ), # Some year between 338000 and 338999, estimated to be 338800 - ("Y3388E2S3", ("338000-01-01", "338999-12-31")), + ("Y3388E2S3", ("338800-01-01", "338800-12-31", "338000-01-01", "338999-12-31")), # some year between 171000000 and 171999999 estimated to be 171010000 - ("Y171010000S3", ("171010000-01-01", "171999999-12-31")), + ( + "Y171010000S3", + ("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"), + ), # L2 Seasons # Spring southern hemisphere, 2001 ("2001-29", ("2001-09-01", "2001-11-30")), From a6c869e5f32a7ae93fdcae5fd87172d2d4a8f28d Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 11:36:37 +1000 Subject: [PATCH 068/102] Minor fix to README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 449912c..a571813 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,7 @@ Test coverage includes every example given in the spec table of features. -170000000 * Significant digits: + # '1950S2': some year between 1900 and 1999, estimated to be 1950 >>> d = parse_edtf('1950S2') Date: '1950S2' @@ -176,7 +177,7 @@ Test coverage includes every example given in the spec table of features. (1900, 1, 1) >>> d.upper_fuzzy()[:3] (1999, 12, 31) - # 'Y171010000S3': some year between some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. + # 'Y171010000S3': some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. >>> l = parse_edtf('Y171010000S3') LongYear: 'Y171010000S3' >>> l.estimated() From 3a1f4368635c16f8929473db2f75b2071692a00a Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 11:44:24 +1000 Subject: [PATCH 069/102] Add limited benchmark tests No CI yet. #50 --- README.md | 1 + edtf/natlang/tests.py | 24 ++++++++++++++++++++++++ edtf/parser/grammar.py | 9 +++++++++ edtf/parser/tests.py | 21 +++++++++++++++++++++ pyproject.toml | 8 ++++++-- 5 files changed, 61 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a571813..76476c5 100644 --- a/README.md +++ b/README.md @@ -401,6 +401,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut ### Running tests - From `python-edtf`, run the unit tests: `pytest` +- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` - To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 78ecbc9..d2c43a5 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -185,3 +185,27 @@ def test_natlang(input_text, expected_output): assert ( result == expected_output ), f"Failed for input: {input_text} - expected {expected_output}, got {result}" + + +@pytest.mark.benchmark +@pytest.mark.parametrize( + "input_text,expected_output", + [ + ("23rd Dynasty", None), + ("January 2008", "2008-01"), + ("ca1860", "1860~"), + ("uncertain: approx 1862", "1862%"), + ("January", "XXXX-01"), + ("Winter 1872", "1872-24"), + ("before approx January 18 1928", "/1928-01-18~"), + ("birthday in 1872", "1872"), + ("1270 CE", "1270"), + ("2nd century bce", "-01XX"), + ("1858/1860", "[1858, 1860]"), + ], +) +def test_benchmark_natlang(benchmark, input_text, expected_output): + """ + Benchmark selected natural language conversions + """ + benchmark(text_to_edtf, input_text) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index e6232c4..1747341 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -1,3 +1,11 @@ +# ruff: noqa: E402 I001 + +# It's recommended to `enablePackrat()` immediately after importing pyparsing +# https://github.com/pyparsing/pyparsing/wiki/Performance-Tips +import pyparsing + +pyparsing.ParserElement.enablePackrat() + from pyparsing import ( Combine, NotAny, @@ -13,6 +21,7 @@ ) from pyparsing import Literal as L + from edtf.parser.edtf_exceptions import EDTFParseException # (* ************************** Level 0 *************************** *) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 1ec7452..4932e95 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -216,6 +216,20 @@ ("2001-34", ("2001-04-01", "2001-06-30")), ) +BENCHMARK_EXAMPLES = ( + "2001-02-03", + "2008-12", + "2008", + "-0999", + "2004-01-01T10:10:10+05:00", + "-2005/-1999-02", + "/2006", + "?2004-%06", + "[1667, 1760-12]", + "Y3388E2S3", + "2001-29", +) + BAD_EXAMPLES = ( # parentheses are not used for group qualification in the 2018 spec None, @@ -340,3 +354,10 @@ def test_comparisons(): assert d4 == d5 assert d1 < d5 assert d1 > d6 + + +@pytest.mark.benchmark +@pytest.mark.parametrize("test_input", BENCHMARK_EXAMPLES) +def test_benchmark_parser(benchmark, test_input): + """Benchmark parsing of selected EDTF strings.""" + benchmark(parse, test_input) diff --git a/pyproject.toml b/pyproject.toml index 64579ae..56978fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ test = [ "django>=4.2,<5.0", "pytest", "pytest-django", + "pytest-benchmark", "ruff", "pre-commit", "coverage", @@ -81,8 +82,11 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] -addopts = "--ignore=edtf_django_tests/ --cov=edtf" -plugins = ["pytest_cov"] +markers = [ + "benchmark: mark a test as a benchmark", +] +addopts = "--ignore=edtf_django_tests/ --cov=edtf -m 'not benchmark'" +plugins = ["pytest_cov", "pytest_benchmark"] [tool.coverage.run] # we run the edtf_integration tests but only care about them testing fields.py in the main package From 6e7b1093a43cd70906a7402a01621a0f1a195b3b Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 13:00:34 +1000 Subject: [PATCH 070/102] Add benchmark to CI #50 --- .github/workflows/ci.yml | 18 ++++++++++++++++++ dev-requirements.txt | 2 ++ edtf/parser/grammar.py | 5 +++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 95c29c6..39d0f4e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,8 @@ on: permissions: checks: write contents: write + # deployments permission to deploy GitHub pages website + deployments: write pull-requests: write @@ -85,3 +87,19 @@ jobs: echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" + + - name: Run benchmarks + run: | + pytest -m benchmark --benchmark-json=./output.json + + - name: Publish benchmark results + uses: benchmark-action/github-action-benchmark@v1 + with: + tool: 'pytest' + auto-push: false + output-file-path: output.json + github-token: ${{ secrets.GITHUB_TOKEN }} + comment-on-alert: true + save-data-file: false + skip-fetch-gh-pages: true + summary-always: true diff --git a/dev-requirements.txt b/dev-requirements.txt index 1e37df5..19242af 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,7 @@ -r requirements.txt # Include all main requirements django>=4.2,<5.0 pytest +pytest-benchmark +pytest-django ruff pre-commit diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 1747341..9840bde 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -2,9 +2,10 @@ # It's recommended to `enablePackrat()` immediately after importing pyparsing # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips -import pyparsing -pyparsing.ParserElement.enablePackrat() +# TODO: uncomment this once benchmark testing has run once in CI +# import pyparsing +# pyparsing.ParserElement.enablePackrat() from pyparsing import ( Combine, From 0ab80edfc0d0016490765b27f145e87332a22b42 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 14:12:38 +1000 Subject: [PATCH 071/102] Prevent gh-pages push --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39d0f4e..f30ea57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,9 +97,9 @@ jobs: with: tool: 'pytest' auto-push: false + comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} comment-on-alert: true save-data-file: false - skip-fetch-gh-pages: true summary-always: true From 34363577027222d6ce94a92e0dc10a8935f01d44 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 14:45:34 +1000 Subject: [PATCH 072/102] Add gh-pages push --- .github/workflows/ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f30ea57..ec93df0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,12 +94,13 @@ jobs: - name: Publish benchmark results uses: benchmark-action/github-action-benchmark@v1 + if: github.event_name != 'pull_request' with: tool: 'pytest' - auto-push: false + auto-push: true comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} comment-on-alert: true - save-data-file: false + save-data-file: true summary-always: true From 23a3d7e1de070bb0156e06d5ac7a91cf081d00e6 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:34:42 +1000 Subject: [PATCH 073/102] Make 2 CI paths #50 --- .github/workflows/ci.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec93df0..370258a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -101,6 +101,18 @@ jobs: comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} - comment-on-alert: true + comment-on-alert: false + save-data-file: true + summary-always: true + + - name: Publish benchmark results + uses: benchmark-action/github-action-benchmark@v1 + if: github.event_name == 'pull_request' + with: + tool: 'pytest' + auto-push: false + comment-always: true + output-file-path: output.json + comment-on-alert: false save-data-file: true summary-always: true From bb6e64052487511a23e256db10ca74308dd5c11b Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:39:25 +1000 Subject: [PATCH 074/102] Store/retrieve previous results --- .github/workflows/ci.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 370258a..fefb0c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,6 +92,12 @@ jobs: run: | pytest -m benchmark --benchmark-json=./output.json + - name: Download previous benchmark data + uses: actions/cache@v4 + with: + path: ./cache + key: ${{ runner.os }}-benchmark + - name: Publish benchmark results uses: benchmark-action/github-action-benchmark@v1 if: github.event_name != 'pull_request' @@ -101,11 +107,13 @@ jobs: comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} - comment-on-alert: false + comment-on-alert: true save-data-file: true summary-always: true + # Where the previous data file is stored + external-data-json-path: ./cache/benchmark-data.json - - name: Publish benchmark results + - name: Comment on benchmark results without publishing uses: benchmark-action/github-action-benchmark@v1 if: github.event_name == 'pull_request' with: @@ -116,3 +124,4 @@ jobs: comment-on-alert: false save-data-file: true summary-always: true + external-data-json-path: ./cache/benchmark-data.json From 13a8315234dae048461e8b2bd53b840f0bea8e12 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:44:12 +1000 Subject: [PATCH 075/102] Do not auto-push when using external-data file --- .github/workflows/ci.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fefb0c2..a13671e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -110,12 +110,9 @@ jobs: comment-on-alert: true save-data-file: true summary-always: true - # Where the previous data file is stored - external-data-json-path: ./cache/benchmark-data.json - name: Comment on benchmark results without publishing uses: benchmark-action/github-action-benchmark@v1 - if: github.event_name == 'pull_request' with: tool: 'pytest' auto-push: false From 57af55917d8baba8c334ab2bf7c0bce0d465d0ed Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:47:14 +1000 Subject: [PATCH 076/102] GH token required for comment-always --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a13671e..0f97b3c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -116,6 +116,7 @@ jobs: with: tool: 'pytest' auto-push: false + github-token: ${{ secrets.GITHUB_TOKEN }} comment-always: true output-file-path: output.json comment-on-alert: false From 90558b6bede78d310755e303328745ad4c70c087 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 16:16:32 +1000 Subject: [PATCH 077/102] Activate packrat #50 --- edtf/parser/grammar.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 9840bde..dc0f66d 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -3,9 +3,9 @@ # It's recommended to `enablePackrat()` immediately after importing pyparsing # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips -# TODO: uncomment this once benchmark testing has run once in CI -# import pyparsing -# pyparsing.ParserElement.enablePackrat() +import pyparsing + +pyparsing.ParserElement.enablePackrat() from pyparsing import ( Combine, From 6c0e23990a259e2bd66f41781d950940e015e379 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 16:30:08 +1000 Subject: [PATCH 078/102] Include benchmark url --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 76476c5..9fc6ede 100644 --- a/README.md +++ b/README.md @@ -401,7 +401,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut ### Running tests - From `python-edtf`, run the unit tests: `pytest` -- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks +- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks (published [here]( https://ixc.github.io/python-edtf/dev/bench/)) - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` - To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` From ef24bc71dbd5d9d8edae57f0cc1aea182c88f12a Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 27 May 2024 21:32:43 -0400 Subject: [PATCH 079/102] Handle unspecified and qualified ("16XX~") Unspecified dates previously could not handle qualification. Unspecified dates also couldn't handle dates with 3 unspecified digits ("1XXX"). This commit adds both those features and tests for those use cases. --- edtf/appsettings.py | 7 +++ edtf/parser/grammar.py | 8 ++-- edtf/parser/parser_classes.py | 84 ++++++++++++++++++++++++++++++++++- edtf/parser/tests.py | 7 +++ 4 files changed, 102 insertions(+), 4 deletions(-) diff --git a/edtf/appsettings.py b/edtf/appsettings.py index e1bc821..e00a223 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -87,6 +87,13 @@ PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1)) PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1)) PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12)) +PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10)) +PADDING_CENTURY_PRECISION = EDTF.get( + "PADDING_CENTURY_PRECISION", relativedelta(years=100) +) +PADDING_MILLENNIUM_PRECISION = EDTF.get( + "PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000) +) MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0) MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index dc0f66d..ae03251 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -161,17 +161,19 @@ def f(toks): Level1Interval.set_parser(level1Interval) # (* *** unspecified *** *) -yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year") +yearWithOneOrTwoOrThreeUnspecifedDigits = Combine( + digit + (digit ^ "X") + (digit ^ "X") + "X" +)("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") unspecified = ( - yearWithOneOrTwoUnspecifedDigits + yearWithOneOrTwoOrThreeUnspecifedDigits ^ monthUnspecified ^ dayUnspecified ^ dayAndMonthUnspecified -) +) + Optional(UASymbol)("ua") Unspecified.set_parser(unspecified) # (* *** uncertainOrApproxDate *** *) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index e12ecbd..0bbf855 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -541,7 +541,89 @@ def precision(self): class Unspecified(Date): - pass + def __init__( + self, + year=None, + month=None, + day=None, + significant_digits=None, + ua=None, + **kwargs, + ): + for param in ("date", "lower", "upper"): + if param in kwargs: + self.__init__(**kwargs[param]) + return + self.year = year # Year is required, but sometimes passed in as a 'date' dict. + self.month = month + self.day = day + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) + self.ua = ua if ua else None + + def __str__(self): + r = self.year + if self.month: + r += f"-{self.month}" + if self.day: + r += f"-{self.day}" + if self.ua: + r += str(self.ua) + return r + + def _get_fuzzy_padding(self, lean): + if not self.ua: + return relativedelta() + multiplier = self.ua._get_multiplier() + padding = relativedelta() + + if self.year: + if self.precision == PRECISION_MILLENIUM: + padding += relativedelta( + years=int( + multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years + ) + ) + elif self.precision == PRECISION_CENTURY: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) + ) + elif self.precision == PRECISION_DECADE: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) + ) + else: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) + ) + if self.month: + padding += relativedelta( + months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) + ) + if self.day: + padding += relativedelta( + days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days) + ) + + return padding + + @property + def precision(self): + if self.day: + return PRECISION_DAY + if self.month: + return PRECISION_MONTH + if self.year: + if self.year.isdigit(): + return PRECISION_YEAR + if len(self.year) == 4 and self.year.endswith("XXX"): + return PRECISION_MILLENIUM + if len(self.year) == 4 and self.year.endswith("XX"): + return PRECISION_CENTURY + if len(self.year) == 4 and self.year.endswith("X"): + return PRECISION_DECADE + raise ValueError(f"Unspecified date {self} has no precision") class Level1Interval(Interval): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 4932e95..464aca3 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -85,6 +85,13 @@ ("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")), ("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")), ("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")), + # Unspecified and qualified + # "circa 17th century" + ("16XX~", ("1600-01-01", "1699-12-31", "1500-01-01", "1799-12-31")), + ("16XX%", ("1600-01-01", "1699-12-31", "1400-01-01", "1899-12-31")), + ("1XXX", ("1000-01-01", "1999-12-31")), + ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), + ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) From b53df4a599fef6d25ecef43da0601f352505b48c Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 13:08:57 -0400 Subject: [PATCH 080/102] Handle negative unspecified and negative unspecified + qualified Requires quite a few overrides of lower_ and upper_ range methods to properly handle dates due to padding working in the opposite direction for negative dates, esp when combined with month/day padding. --- edtf/parser/grammar.py | 2 +- edtf/parser/parser_classes.py | 226 +++++++++++++++++++++++++++++----- edtf/parser/tests.py | 3 + 3 files changed, 201 insertions(+), 30 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index ae03251..f458b2b 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -162,7 +162,7 @@ def f(toks): # (* *** unspecified *** *) yearWithOneOrTwoOrThreeUnspecifedDigits = Combine( - digit + (digit ^ "X") + (digit ^ "X") + "X" + Optional("-") + digit + (digit ^ "X") + (digit ^ "X") + "X" )("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 0bbf855..43f4a9c 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -561,16 +561,13 @@ def __init__( int(significant_digits) if significant_digits else None ) self.ua = ua if ua else None + self.negative = self.year.startswith("-") def __str__(self): - r = self.year - if self.month: - r += f"-{self.month}" - if self.day: - r += f"-{self.day}" + base = super().__str__() if self.ua: - r += str(self.ua) - return r + base += str(self.ua) + return base def _get_fuzzy_padding(self, lean): if not self.ua: @@ -579,24 +576,16 @@ def _get_fuzzy_padding(self, lean): padding = relativedelta() if self.year: - if self.precision == PRECISION_MILLENIUM: - padding += relativedelta( - years=int( - multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years - ) - ) - elif self.precision == PRECISION_CENTURY: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) - ) - elif self.precision == PRECISION_DECADE: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) - ) - else: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) - ) + year_no_symbol = self.year.lstrip("-") + years_padding = self._calculate_years_padding(multiplier, year_no_symbol) + # Reverse the padding for negative years and earliest calculations + # if self.negative: + # years_padding = -years_padding if lean == EARLIEST else years_padding + # else: + # years_padding = years_padding if lean == EARLIEST else -years_padding + + padding += years_padding + if self.month: padding += relativedelta( months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) @@ -608,6 +597,184 @@ def _get_fuzzy_padding(self, lean): return padding + def _calculate_years_padding(self, multiplier, year_no_symbol): + if self.precision == PRECISION_MILLENIUM: + return relativedelta( + years=int(multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years) + ) + elif self.precision == PRECISION_CENTURY: + return relativedelta( + years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) + ) + elif self.precision == PRECISION_DECADE: + return relativedelta( + years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) + ) + else: + return relativedelta( + years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) + ) + + def lower_fuzzy(self): + time_empty_time_tuple = tuple(TIME_EMPTY_TIME) + time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) + strict_val = ( + self.lower_strict() + ) # negative handled in the lower_strict() override + + if self.negative: + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 1, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + else: + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 1, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ + 1 + ] + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, days_in_month) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + + return adjusted + + def upper_fuzzy(self): + time_empty_time_tuple = tuple(TIME_EMPTY_TIME) + time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) + strict_val = ( + self.upper_strict() + ) # negative handled in the upper_strict() override + + if self.negative: + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 12, 31) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ + 1 + ] + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, days_in_month) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + else: + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 12, 31) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + + return adjusted + + def lower_strict(self): + if self.negative: + strict_val = self._strict_date( + lean=LATEST + ) # gets the year right, but need to adjust day and month + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + return struct_time( + (strict_val.tm_year, 1, 1) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange( + strict_val.tm_year, strict_val.tm_mon + )[1] + return struct_time( + (strict_val.tm_year, strict_val.tm_mon, days_in_month) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + else: + return strict_val + else: + return self._strict_date(lean=EARLIEST) + + def upper_strict(self): + if self.negative: + strict_val = self._strict_date(lean=EARLIEST) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + return struct_time( + (strict_val.tm_year, 12, 31) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange( + strict_val.tm_year, strict_val.tm_mon + )[1] + return struct_time( + (strict_val.tm_year, strict_val.tm_mon, days_in_month) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + else: + return strict_val + else: + return self._strict_date(lean=LATEST) + @property def precision(self): if self.day: @@ -615,13 +782,14 @@ def precision(self): if self.month: return PRECISION_MONTH if self.year: - if self.year.isdigit(): + year_no_symbol = self.year.lstrip("-") + if year_no_symbol.isdigit(): return PRECISION_YEAR - if len(self.year) == 4 and self.year.endswith("XXX"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("XXX"): return PRECISION_MILLENIUM - if len(self.year) == 4 and self.year.endswith("XX"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("XX"): return PRECISION_CENTURY - if len(self.year) == 4 and self.year.endswith("X"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("X"): return PRECISION_DECADE raise ValueError(f"Unspecified date {self} has no precision") diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 464aca3..c89b3b8 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -81,6 +81,8 @@ ("1999-01-XX", ("1999-01-01", "1999-01-31")), # some day in 1999 ("1999-XX-XX", ("1999-01-01", "1999-12-31")), + # negative unspecified year + ("-01XX", ("-0199-01-01", "-0100-12-31")), # Uncertain/Approximate lower boundary dates (BCE) ("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")), ("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")), @@ -92,6 +94,7 @@ ("1XXX", ("1000-01-01", "1999-12-31")), ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), + ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "-0000-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) From c14a57b63846c5b94a00ae87c7ad16c37717ba6b Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 13:51:47 -0400 Subject: [PATCH 081/102] Cleanup --- edtf/parser/parser_classes.py | 171 ++++++---------------------------- edtf/parser/tests.py | 2 +- 2 files changed, 32 insertions(+), 141 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 43f4a9c..a15cbf1 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -550,17 +550,14 @@ def __init__( ua=None, **kwargs, ): - for param in ("date", "lower", "upper"): - if param in kwargs: - self.__init__(**kwargs[param]) - return - self.year = year # Year is required, but sometimes passed in as a 'date' dict. - self.month = month - self.day = day - self.significant_digits = ( - int(significant_digits) if significant_digits else None + super().__init__( + year=year, + month=month, + day=day, + significant_digits=significant_digits, + **kwargs, ) - self.ua = ua if ua else None + self.ua = ua self.negative = self.year.startswith("-") def __str__(self): @@ -576,16 +573,8 @@ def _get_fuzzy_padding(self, lean): padding = relativedelta() if self.year: - year_no_symbol = self.year.lstrip("-") - years_padding = self._calculate_years_padding(multiplier, year_no_symbol) - # Reverse the padding for negative years and earliest calculations - # if self.negative: - # years_padding = -years_padding if lean == EARLIEST else years_padding - # else: - # years_padding = years_padding if lean == EARLIEST else -years_padding - + years_padding = self._years_padding(multiplier) padding += years_padding - if self.month: padding += relativedelta( months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) @@ -594,127 +583,32 @@ def _get_fuzzy_padding(self, lean): padding += relativedelta( days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days) ) - return padding - def _calculate_years_padding(self, multiplier, year_no_symbol): - if self.precision == PRECISION_MILLENIUM: - return relativedelta( - years=int(multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years) - ) - elif self.precision == PRECISION_CENTURY: - return relativedelta( - years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) - ) - elif self.precision == PRECISION_DECADE: - return relativedelta( - years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) - ) - else: - return relativedelta( - years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) - ) + def _years_padding(self, multiplier): + """Calculate year padding based on the precision.""" + precision_settings = { + PRECISION_MILLENIUM: appsettings.PADDING_MILLENNIUM_PRECISION.years, + PRECISION_CENTURY: appsettings.PADDING_CENTURY_PRECISION.years, + PRECISION_DECADE: appsettings.PADDING_DECADE_PRECISION.years, + PRECISION_YEAR: appsettings.PADDING_YEAR_PRECISION.years, + } + years = precision_settings.get(self.precision, 0) + return relativedelta(years=int(multiplier * years)) def lower_fuzzy(self): - time_empty_time_tuple = tuple(TIME_EMPTY_TIME) - time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) strict_val = ( self.lower_strict() ) # negative handled in the lower_strict() override - - if self.negative: - adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(LATEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 1, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - else: - adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 1, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ - 1 - ] - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, days_in_month) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) return adjusted def upper_fuzzy(self): - time_empty_time_tuple = tuple(TIME_EMPTY_TIME) - time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) strict_val = ( self.upper_strict() ) # negative handled in the upper_strict() override - if self.negative: - adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 12, 31) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ - 1 - ] - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, days_in_month) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - else: - adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 12, 31) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) return adjusted def lower_strict(self): @@ -722,11 +616,11 @@ def lower_strict(self): strict_val = self._strict_date( lean=LATEST ) # gets the year right, but need to adjust day and month - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM + if self.precision in ( + PRECISION_YEAR, + PRECISION_DECADE, + PRECISION_CENTURY, + PRECISION_MILLENIUM, ): return struct_time( (strict_val.tm_year, 1, 1) @@ -734,11 +628,8 @@ def lower_strict(self): + tuple(TIME_EMPTY_EXTRAS) ) elif self.precision == PRECISION_MONTH: - days_in_month = calendar.monthrange( - strict_val.tm_year, strict_val.tm_mon - )[1] return struct_time( - (strict_val.tm_year, strict_val.tm_mon, days_in_month) + (strict_val.tm_year, strict_val.tm_mon, 1) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) ) @@ -750,11 +641,11 @@ def lower_strict(self): def upper_strict(self): if self.negative: strict_val = self._strict_date(lean=EARLIEST) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM + if self.precision in ( + PRECISION_YEAR, + PRECISION_DECADE, + PRECISION_CENTURY, + PRECISION_MILLENIUM, ): return struct_time( (strict_val.tm_year, 12, 31) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index c89b3b8..199f245 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -94,7 +94,7 @@ ("1XXX", ("1000-01-01", "1999-12-31")), ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), - ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "-0000-12-31")), + ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "0000-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) From 53d3a32c9fe0b18fb7aa550de4478cc18550bc2f Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 15:10:46 -0400 Subject: [PATCH 082/102] Add a global debug setting If not in debug mode, use a simpler EDTFParseException rather than returning the full pyparsing error --- edtf/appsettings.py | 2 ++ edtf/parser/grammar.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/edtf/appsettings.py b/edtf/appsettings.py index e00a223..8e15846 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -98,3 +98,5 @@ MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) + +DEBUG_PYPARSING = False diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index f458b2b..1e624fc 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -4,6 +4,7 @@ # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips import pyparsing +from edtf.appsettings import DEBUG_PYPARSING pyparsing.ParserElement.enablePackrat() @@ -342,7 +343,9 @@ def f(toks): ) -def parse_edtf(str, parseAll=True, fail_silently=False): +def parse_edtf(str, parseAll=True, fail_silently=False, debug=None): + if debug is None: + debug = DEBUG_PYPARSING try: if not str: raise ParseException("You must supply some input text") @@ -352,4 +355,8 @@ def parse_edtf(str, parseAll=True, fail_silently=False): except ParseException as err: if fail_silently: return None - raise EDTFParseException(err) from err + if debug: + raise + near_text = str[max(err.loc - 10, 0) : err.loc + 10] + full_msg = f"Error at position {err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." + raise EDTFParseException(full_msg) from None From ab6c41320eb2354bbf68b78ec5d121a0709dd777 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 15:39:42 -0400 Subject: [PATCH 083/102] Handle empty string --- edtf/parser/grammar.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 1e624fc..773f806 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -343,13 +343,13 @@ def f(toks): ) -def parse_edtf(str, parseAll=True, fail_silently=False, debug=None): +def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): if debug is None: debug = DEBUG_PYPARSING try: - if not str: + if not input_string: raise ParseException("You must supply some input text") - p = edtfParser.parseString(str.strip(), parseAll) + p = edtfParser.parseString(input_string.strip(), parseAll) if p: return p[0] except ParseException as err: @@ -357,6 +357,8 @@ def parse_edtf(str, parseAll=True, fail_silently=False, debug=None): return None if debug: raise - near_text = str[max(err.loc - 10, 0) : err.loc + 10] + near_text = "" + if input_string: + near_text = input_string[max(err.loc - 10, 0) : err.loc + 10] full_msg = f"Error at position {err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." raise EDTFParseException(full_msg) from None From 55b0723754b7eb606820b11ccc7bb04d5a6232b3 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:58:07 -0400 Subject: [PATCH 084/102] Add targeted failure and tests for empty and null inputs --- edtf/parser/grammar.py | 4 ++-- edtf/parser/tests.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 773f806..651b4b3 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -346,9 +346,9 @@ def f(toks): def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): if debug is None: debug = DEBUG_PYPARSING + if not input_string: + raise EDTFParseException("You must supply some input text") try: - if not input_string: - raise ParseException("You must supply some input text") p = edtfParser.parseString(input_string.strip(), parseAll) if p: return p[0] diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 199f245..15875b9 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -347,6 +347,14 @@ def test_non_parsing(bad_input): parse(bad_input) +@pytest.mark.parametrize("bad_input", [None, ""]) +def test_empty_input(bad_input): + """Test that empty input raises a specific exception.""" + with pytest.raises(EDTFParseException) as exc_info: + parse(bad_input) + assert "You must supply some input text" in str(exc_info.value) + + def test_comparisons(): """Test comparisons between parsed EDTF objects and standard dates.""" d1 = parse("1979-08~") From d5ad27b37916ebe333642de1cc5b20ea5986465a Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:00:32 -0400 Subject: [PATCH 085/102] Improve EDTFParseException handling Includes handling for empty or null input strings and null errs passed to the constructor Co-Authored-By: aweakley <224316+aweakley@users.noreply.github.com> --- edtf/fields.py | 12 ++++++++---- edtf/parser/edtf_exceptions.py | 26 +++++++++++++++++++++++++- edtf/parser/grammar.py | 8 ++------ 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index f717592..2f25c94 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -4,10 +4,12 @@ from django.db import models from django.db.models import signals from django.db.models.query_utils import DeferredAttribute +from pyparsing import ParseException from edtf import EDTFObject, parse_edtf from edtf.convert import struct_time_to_date, struct_time_to_jd from edtf.natlang import text_to_edtf +from edtf.parser.edtf_exceptions import EDTFParseException DATE_ATTRS = ( "lower_strict", @@ -132,10 +134,12 @@ def update_values(self, instance, *args, **kwargs): if direct_input and ( existing_value is None or str(existing_value) != direct_input ): - edtf = parse_edtf( - direct_input, fail_silently=True - ) # ParseException if invalid; should this be raised? - # TODO pyparsing.ParseExceptions are very noisy and dumps the whole grammar (see https://github.com/ixc/python-edtf/issues/46) + try: + edtf = parse_edtf( + direct_input, fail_silently=True + ) # ParseException if invalid; should this be raised? + except ParseException as err: + raise EDTFParseException(direct_input, err) from None # set the natural_text (display) field to the direct_input if it is not provided if natural_text == "": diff --git a/edtf/parser/edtf_exceptions.py b/edtf/parser/edtf_exceptions.py index 9530602..d906d58 100644 --- a/edtf/parser/edtf_exceptions.py +++ b/edtf/parser/edtf_exceptions.py @@ -2,4 +2,28 @@ class EDTFParseException(ParseException): - pass + """Raised when an input cannot be parsed as an EDTF string. + + Attributes: + input_string - the input string that could not be parsed + err -- the original ParseException that caused this one + """ + + def __init__(self, input_string, err=None): + if input_string is None: + input_string = "" + self.input_string = input_string + if err is None: + err = ParseException(input_string, 0, "Invalid input or format.") + self.err = err + super().__init__(str(err), err.loc if err.loc else 0, self.input_string) + + def __str__(self): + if not self.input_string: + return "You must supply some input text" + near_text = ( + self.input_string[max(self.err.loc - 10, 0) : self.err.loc + 10] + if hasattr(self.err, "loc") + else "" + ) + return f"Error at position {self.err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 651b4b3..beabf52 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -347,7 +347,7 @@ def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): if debug is None: debug = DEBUG_PYPARSING if not input_string: - raise EDTFParseException("You must supply some input text") + raise EDTFParseException(input_string) try: p = edtfParser.parseString(input_string.strip(), parseAll) if p: @@ -357,8 +357,4 @@ def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): return None if debug: raise - near_text = "" - if input_string: - near_text = input_string[max(err.loc - 10, 0) : err.loc + 10] - full_msg = f"Error at position {err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." - raise EDTFParseException(full_msg) from None + raise EDTFParseException(input_string, err) from None From daf0d041dc739975e822f35813dfd82ca75eacea Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:40:15 -0400 Subject: [PATCH 086/102] Add the TestEvent model to Django admin Make the string representation of TestEvent simpler --- edtf_django_tests/edtf_integration/admin.py | 44 +++++++++++++++++++- edtf_django_tests/edtf_integration/models.py | 4 -- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/edtf_django_tests/edtf_integration/admin.py b/edtf_django_tests/edtf_integration/admin.py index 846f6b4..3051891 100644 --- a/edtf_django_tests/edtf_integration/admin.py +++ b/edtf_django_tests/edtf_integration/admin.py @@ -1 +1,43 @@ -# Register your models here. +from django.contrib import admin + +from .models import TestEvent + + +class TestEventAdmin(admin.ModelAdmin): + list_display = ( + "date_display", + "date_edtf_direct", + "date_earliest", + "date_latest", + "date_sort_ascending", + "date_sort_descending", + "date_edtf", + ) + search_fields = ("date_display", "date_edtf_direct") + list_filter = ("date_earliest", "date_latest") + readonly_fields = ( + "date_earliest", + "date_latest", + "date_sort_ascending", + "date_sort_descending", + "date_edtf", + ) + + fieldsets = ( + (None, {"fields": ("date_display", "date_edtf_direct", "date_edtf")}), + ( + "Computed Dates", + { + "classes": ("collapse",), + "fields": ( + "date_earliest", + "date_latest", + "date_sort_ascending", + "date_sort_descending", + ), + }, + ), + ) + + +admin.site.register(TestEvent, TestEventAdmin) diff --git a/edtf_django_tests/edtf_integration/models.py b/edtf_django_tests/edtf_integration/models.py index 5120889..5e66592 100644 --- a/edtf_django_tests/edtf_integration/models.py +++ b/edtf_django_tests/edtf_integration/models.py @@ -49,9 +49,5 @@ def __str__(self) -> str: return ( f"Test Event: {self.date_display=}, " f"{self.date_edtf_direct=}, " - f"{self.date_earliest=}, " - f"{self.date_latest=}, " - f"{self.date_sort_ascending=}, " - f"{self.date_sort_descending=}, " f"{self.date_edtf=}" ) From 581855784dd428a51fd17ac71e92301030a48624 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 5 Jun 2024 13:48:07 -0400 Subject: [PATCH 087/102] Update qualification properties Properly set qualification properties on - UncertainOrApproximate - Unspecified - Level1Interval - PartialUncertainOrApproximate - Level2Interval Adds tests to check that each EDTF object is parsed and that .is_approximate, .is_uncertain, and .is_uncertain_and_approximate are set to what we expect them to be --- edtf/parser/parser_classes.py | 45 ++++++++++++++++++++++++++++++++++- edtf/parser/tests.py | 32 +++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index a15cbf1..b2dbadd 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -91,7 +91,7 @@ def apply_delta(op, time_struct, delta): class EDTFObject: """ - Object to attact to a parser to become instantiated when the parser + Object to attach to a parser to become instantiated when the parser completes. """ @@ -470,6 +470,11 @@ class UncertainOrApproximate(EDTFObject): def __init__(self, date, ua): self.date = date self.ua = ua + self.is_uncertain = ua.is_uncertain if ua else False + self.is_approximate = ua.is_approximate if ua else False + self.is_uncertain_and_approximate = ( + ua.is_uncertain_and_approximate if ua else False + ) def __str__(self): if self.ua: @@ -558,6 +563,11 @@ def __init__( **kwargs, ) self.ua = ua + self.is_uncertain = ua.is_uncertain if ua else False + self.is_approximate = ua.is_approximate if ua else False + self.is_uncertain_and_approximate = ( + ua.is_uncertain_and_approximate if ua else False + ) self.negative = self.year.startswith("-") def __str__(self): @@ -709,6 +719,12 @@ def __init__(self, lower=None, upper=None): self.upper = UnspecifiedIntervalSection( False, UncertainOrApproximate(**lower) ) + self.is_approximate = self.lower.is_approximate or self.upper.is_approximate + self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain + self.is_uncertain_and_approximate = ( + self.lower.is_uncertain_and_approximate + or self.upper.is_uncertain_and_approximate + ) def _get_fuzzy_padding(self, lean): if lean == EARLIEST: @@ -840,6 +856,27 @@ def __init__( self.all_ua = all_ua + uas = [ + year_ua, + month_ua, + day_ua, + year_month_ua, + month_day_ua, + season_ua, + all_ua, + ] + self.is_uncertain = any( + item.is_uncertain for item in uas if hasattr(item, "is_approximate") + ) + self.is_approximate = any( + item.is_approximate for item in uas if hasattr(item, "is_approximate") + ) + self.is_uncertain_and_approximate = any( + item.is_uncertain_and_approximate + for item in uas + if hasattr(item, "is_uncertain_and_approximate") + ) + def __str__(self): if self.season_ua: return f"{self.season}{self.season_ua}" @@ -1046,6 +1083,12 @@ def __init__(self, lower, upper): self.upper = upper[0] else: self.upper = upper + self.is_approximate = self.lower.is_approximate or self.upper.is_approximate + self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain + self.is_uncertain_and_approximate = ( + self.lower.is_uncertain_and_approximate + or self.upper.is_uncertain_and_approximate + ) class Level2Season(Season): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 15875b9..e7f2953 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -240,6 +240,24 @@ "2001-29", ) +APPROXIMATE_UNCERTAIN_EXAMPLES = ( + # first part of tuple is the input EDTF string, second part is a tuple of booleans: + # uncertain ?, approximate ~, both uncertain and approximate % + ("2004", (False, False, False)), + ("2006-06-11", (False, False, False)), + ("-0999", (False, False, False)), + ("1984?", (True, False, False)), + ("2004-06-11?", (True, False, False)), + ("1984~", (False, True, False)), + ("1984%", (False, False, True)), + ("1984~/2004-06", (False, True, False)), + ("2004-%06", (False, False, True)), + ("2004?-~06-~04", (True, True, False)), + ("2011-~06-~04", (False, True, False)), + ("2004-06-~01/2004-06-~20", (False, True, False)), + ("156X~", (False, True, False)), +) + BAD_EXAMPLES = ( # parentheses are not used for group qualification in the 2018 spec None, @@ -379,3 +397,17 @@ def test_comparisons(): def test_benchmark_parser(benchmark, test_input): """Benchmark parsing of selected EDTF strings.""" benchmark(parse, test_input) + + +@pytest.mark.parametrize("test_input,expected_tuple", APPROXIMATE_UNCERTAIN_EXAMPLES) +def test_approximate_uncertain(test_input, expected_tuple): + """Test parsing of EDTF strings and check .is_uncertain, .is_approximate, + and .is_uncertain_and_approximate properties. The expected_tuple should have three + values, the first should be a boolean indicating if the date is uncertain, + the second should be a boolean indicating if the date is approximate, and the + third should be a boolean indicating if the date is both uncertain and approximate.""" + result = parse(test_input) + assert isinstance(result, EDTFObject), "Result should be an instance of EDTFObject" + assert result.is_uncertain == expected_tuple[0] + assert result.is_approximate == expected_tuple[1] + assert result.is_uncertain_and_approximate == expected_tuple[2] From 63a15736d8d389bd2a8d29fb21990e7fb16f2569 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 5 Jun 2024 15:00:22 -0400 Subject: [PATCH 088/102] Add docs about qualification properties --- README.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/README.md b/README.md index 9fc6ede..b001157 100644 --- a/README.md +++ b/README.md @@ -342,6 +342,50 @@ One can interpret uncertain or approximate dates as 'plus or minus a [level of p If a date is both uncertain __and__ approximate, the padding is applied twice, i.e. it gets 100% * 2 padding, or 'plus or minus two [levels of precision]'. +### Qualification properties +EDTF objects support properties that provide an overview of how the object is qualified: +- `.is_uncertain (?)` +- `.is_approximate (~)` +- `.is_uncertain_and_approximate (%)` +These properties represent whether the any part of the date object is uncertain, approximate, or uncertain and approximate. For ranges, the properties are true if any part of the range (lower or upper section) is qualified as such. A date is not necessarily uncertain and approximate if it is separately both uncertain and approximate - it must have the "%" qualifier to be considered uncertain and aproximate. +```python +>>> parse_edtf("2006-06-11") +Date: '2006-06-11' +>>> parse_edtf("2006-06-11").is_uncertain +False +>>> parse_edtf("2006-06-11").is_approximate +False + +>>> parse_edtf("1984?") +UncertainOrApproximate: '1984?' +>>> parse_edtf("1984?").is_approximate +False +>>> parse_edtf("1984?").is_uncertain +True +>>> parse_edtf("1984?").is_uncertain_and_approximate +False + +>>> parse_edtf("1984%").is_uncertain +False +>>> parse_edtf("1984%").is_uncertain_and_approximate +True + +>>> parse_edtf("1984~/2004-06") +Level1Interval: '1984~/2004-06' +>>> parse_edtf("1984~/2004-06").is_approximate +True +>>> parse_edtf("1984~/2004-06").is_uncertain +False + +>>> parse_edtf("2004?-~06-~04") +PartialUncertainOrApproximate: '2004?-~06-~04'>>> L2_PartialUncertainOrApproximate.is_approximate +True +>>> parse_edtf("2004?-~06-~04").is_uncertain +True +>>> parse_edtf("2004?-~06-~04").is_uncertain_and_approximate +False +``` + ### Seasons Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in `appsettings.py`. From b3205afe2d634527fe2c6d5f83670f2c1e6c49ba Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 10 Jun 2024 22:57:59 -0400 Subject: [PATCH 089/102] Fix typo, add more tests --- README.md | 3 ++- edtf/parser/parser_classes.py | 2 +- edtf/parser/tests.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b001157..6acb176 100644 --- a/README.md +++ b/README.md @@ -378,7 +378,8 @@ True False >>> parse_edtf("2004?-~06-~04") -PartialUncertainOrApproximate: '2004?-~06-~04'>>> L2_PartialUncertainOrApproximate.is_approximate +PartialUncertainOrApproximate: '2004?-~06-~04' +>>> parse_edtf("2004?-~06-~04").is_approximate True >>> parse_edtf("2004?-~06-~04").is_uncertain True diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index b2dbadd..ed03355 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -866,7 +866,7 @@ def __init__( all_ua, ] self.is_uncertain = any( - item.is_uncertain for item in uas if hasattr(item, "is_approximate") + item.is_uncertain for item in uas if hasattr(item, "is_uncertain") ) self.is_approximate = any( item.is_approximate for item in uas if hasattr(item, "is_approximate") diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index e7f2953..c2dd711 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -253,6 +253,7 @@ ("1984~/2004-06", (False, True, False)), ("2004-%06", (False, False, True)), ("2004?-~06-~04", (True, True, False)), + ("2004?-06-04", (True, False, False)), ("2011-~06-~04", (False, True, False)), ("2004-06-~01/2004-06-~20", (False, True, False)), ("156X~", (False, True, False)), From 7a99f1203aa675aa37fb01b9a8af527c6c40dfd5 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 11 Jun 2024 18:37:22 -0400 Subject: [PATCH 090/102] Simplify EDTFField init; add direct_input_field to deconstruct() --- edtf/fields.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index 2f25c94..642b6bb 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -48,21 +48,12 @@ def __init__( **kwargs, ): kwargs["max_length"] = 2000 - ( - self.natural_text_field, - self.direct_input_field, - self.lower_strict_field, - self.upper_strict_field, - self.lower_fuzzy_field, - self.upper_fuzzy_field, - ) = ( - natural_text_field, - direct_input_field, - lower_strict_field, - upper_strict_field, - lower_fuzzy_field, - upper_fuzzy_field, - ) + self.natural_text_field = natural_text_field + self.direct_input_field = direct_input_field + self.lower_strict_field = lower_strict_field + self.upper_strict_field = upper_strict_field + self.lower_fuzzy_field = lower_fuzzy_field + self.upper_fuzzy_field = upper_fuzzy_field super().__init__(verbose_name, name, **kwargs) description = ( @@ -74,6 +65,8 @@ def deconstruct(self): name, path, args, kwargs = super().deconstruct() if self.natural_text_field: kwargs["natural_text_field"] = self.natural_text_field + if self.direct_input_field: + kwargs["direct_input_field"] = self.direct_input_field for attr in DATE_ATTRS: field = f"{attr}_field" @@ -152,7 +145,7 @@ def update_values(self, instance, *args, **kwargs): ): edtf = parse_edtf( edtf_string, fail_silently=True - ) # potetial ParseException if invalid; should this be raised? + ) # potential ParseException if invalid; should this be raised? else: edtf = existing_value else: From e99813cbdcf878111c946c9b8b5f142e38bfb833 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 11 Jun 2024 18:53:22 -0400 Subject: [PATCH 091/102] Only publish benchmark results on the upstream --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0f97b3c..4645d13 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -100,7 +100,7 @@ jobs: - name: Publish benchmark results uses: benchmark-action/github-action-benchmark@v1 - if: github.event_name != 'pull_request' + if: github.event_name == 'pull_request' && github.repository == 'ixc/python-edtf' with: tool: 'pytest' auto-push: true @@ -112,6 +112,7 @@ jobs: summary-always: true - name: Comment on benchmark results without publishing + if: github.event_name != 'pull_request' || github.repository != 'ixc/python-edtf' uses: benchmark-action/github-action-benchmark@v1 with: tool: 'pytest' From 952949156289cb1da8b1a9af59f32687cbdada8e Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 13 Jun 2024 21:12:55 +1000 Subject: [PATCH 092/102] Anticipate None for date_display #62 --- edtf/fields.py | 2 +- edtf_django_tests/edtf_integration/tests.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/edtf/fields.py b/edtf/fields.py index 642b6bb..7dba5d4 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -135,7 +135,7 @@ def update_values(self, instance, *args, **kwargs): raise EDTFParseException(direct_input, err) from None # set the natural_text (display) field to the direct_input if it is not provided - if natural_text == "": + if not natural_text: setattr(instance, self.natural_text_field, direct_input) elif natural_text: diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index 88fdca8..493d0d2 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -74,6 +74,26 @@ def test_date_display(self): self.assertEqual(self.event3.date_display, "2019-11") self.assertEqual(self.event4.date_display, "Approximately August 2018") + def test_date_display_with_none_or_empty_string(self): + """ + Test that the date_display field is correctly populated when the + `natural_date` field is set to empty string (for example, if it + were used with `null=False` in the model definition) or set to + None (if it were used with `null=True`). + """ + event = TestEvent(date_display="") + event.date_edtf_direct = "2020-03-15/2020-04-15" + # Trigger the descriptor to update the date_display field + event.date_edtf = "" + self.assertEqual(event.date_display, "2020-03-15/2020-04-15") + + event = TestEvent(date_display=None) + # Verify date_display is set to None even though the field is `null=False` + self.assertIsNone(event.date_display) + event.date_edtf_direct = "2020-03-15/2020-04-15" + event.date_edtf = "" + self.assertEqual(event.date_display, "2020-03-15/2020-04-15") + def test_comparison(self): # test equality of the same dates self.assertEqual( From 48b232fb70f3c0981fc26cca2f5ff4c965c83168 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sun, 16 Jun 2024 16:34:54 +0200 Subject: [PATCH 093/102] style(readme): use project name for heading --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6acb176..074d2f1 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,11 @@ -edtf -===== +# python-edtf An implementation of EDTF format in Python, together with utility functions for parsing natural language date texts, and converting EDTF dates to related Python `date` or `struct_time` objects. -See http://www.loc.gov/standards/datetime/ for the current draft specification. +See for the final draft specification. This project is based on python-edtf and was developed to include the newest specification From c0dce8ad8519a5129ec02231221ee54a89e88934 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sun, 16 Jun 2024 16:35:24 +0200 Subject: [PATCH 094/102] style(readme): unify code block style --- README.md | 549 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 305 insertions(+), 244 deletions(-) diff --git a/README.md b/README.md index 074d2f1..b5f5bbc 100644 --- a/README.md +++ b/README.md @@ -11,69 +11,87 @@ This project is based on python-edtf and was developed to include the newest spe ## To install - pip install edtf +```shell +pip install edtf +``` ## To use - >>> from edtf import parse_edtf - # Parse an EDTF string to an EDTFObject - >>> e = parse_edtf("1979-08~") # approx August 1979 - >>> e - UncertainOrApproximate: '1979-08~' - # normalised string representation (some different EDTF strings have identical meanings) - >>> unicode(e) - u'1979-08~' - - # Derive Python date objects - # lower and upper bounds that strictly adhere to the given range - >>> e.lower_strict()[:3], e.upper_strict()[:3] - ((1979, 8, 1), (1979, 8, 31)) - # lower and upper bounds that are padded if there's indicated uncertainty - >>> e.lower_fuzzy()[:3], e.upper_fuzzy()[:3] - ((1979, 7, 1), (1979, 9, 30)) - - # Date intervals - >>> interval = parse_edtf("1979-08~/..") - >>> interval - Level1Interval: '1979-08~/..' - # Intervals have lower and upper EDTF objects. - >>> interval.lower, interval.upper - (UncertainOrApproximate: '1979-08~', UnspecifiedIntervalSection: '..') - >>> interval.lower.lower_strict()[:3], interval.lower.upper_strict()[:3] - ((1979, 8, 1), (1979, 8, 31)) - >>> interval.upper.upper_strict() # '..' is interpreted to mean open interval and is returning -/+ math.inf - math.inf - - # Date collections - >>> coll = parse_edtf('{1667,1668, 1670..1672}') - >>> coll - MultipleDates: '{1667, 1668, 1670..1672}' - >>> coll.objects - (Date: '1667', Date: '1668', Consecutives: '1670..1672') +```python +>>> from edtf import parse_edtf + +# Parse an EDTF string to an EDTFObject +>>> +>>> e = parse_edtf("1979-08~") # approx August 1979 +>>> e +UncertainOrApproximate: '1979-08~' + +# normalised string representation (some different EDTF strings have identical meanings) +>>> +>>> unicode(e) +u'1979-08~' + +# Derive Python date objects + +# lower and upper bounds that strictly adhere to the given range +>>> +>>> e.lower_strict()[:3], e.upper_strict()[:3] +((1979, 8, 1), (1979, 8, 31)) + +# lower and upper bounds that are padded if there's indicated uncertainty +>>> +>>> e.lower_fuzzy()[:3], e.upper_fuzzy()[:3] +((1979, 7, 1), (1979, 9, 30)) + +# Date intervals +>>> +>>> interval = parse_edtf("1979-08~/..") +>>> interval +Level1Interval: '1979-08~/..' + +# Intervals have lower and upper EDTF objects +>>> +>>> interval.lower, interval.upper +(UncertainOrApproximate: '1979-08~', UnspecifiedIntervalSection: '..') +>>> interval.lower.lower_strict()[:3], interval.lower.upper_strict()[:3] +((1979, 8, 1), (1979, 8, 31)) +>>> interval.upper.upper_strict() # '..' is interpreted to mean open interval and is returning -/+ math.inf +math.inf + +# Date collections +>>> +>>> coll = parse_edtf('{1667,1668, 1670..1672}') +>>> coll +MultipleDates: '{1667, 1668, 1670..1672}' +>>> coll.objects +(Date: '1667', Date: '1668', Consecutives: '1670..1672') +``` The object returned by `parse_edtf()` is an instance of an `edtf.parser.parser_classes.EDTFObject` subclass, depending on the type of date that was parsed. These classes are: - # Level 0 - Date - DateAndTime - Interval - - # Level 1 - UncertainOrApproximate - Unspecified - Level1Interval - UnspecifiedIntervalSection - LongYear - Season - - # Level 2 - PartialUncertainOrApproximate - PartialUnspecified - OneOfASet - MultipleDates - Level2Interval - Level2Season - ExponentialYear +```text +# Level 0 +Date +DateAndTime +Interval + +# Level 1 +UncertainOrApproximate +Unspecified +Level1Interval +UnspecifiedIntervalSection +LongYear +Season + +# Level 2 +PartialUncertainOrApproximate +PartialUnspecified +OneOfASet +MultipleDates +Level2Interval +Level2Season +ExponentialYear +``` All of these implement `upper/lower_strict/fuzzy()` methods to derive `struct_time` objects, except of UnspecifiedIntervalSection, that can also return math.inf value @@ -91,177 +109,209 @@ Test coverage includes every example given in the spec table of features. * Date: - >>> parse_edtf('1979-08') # August 1979 - Date: '1979-08' +```python +>>> parse_edtf('1979-08') # August 1979 +Date: '1979-08' +``` * Date and Time: - >>> parse_edtf('2004-01-01T10:10:10+05:00') - DateAndTime: '2004-01-01T10:10:10+05:00' +```python +>>> parse_edtf('2004-01-01T10:10:10+05:00') +DateAndTime: '2004-01-01T10:10:10+05:00' +``` * Interval (start/end): - >>> parse_edtf('1979-08-28/1979-09-25') # From August 28 to September 25 1979 - Interval: '1979-08-28/1979-09-25' +```python +>>> parse_edtf('1979-08-28/1979-09-25') # From August 28 to September 25 1979 +Interval: '1979-08-28/1979-09-25' +``` ### Level 1 Extensions * Uncertain/Approximate dates: - >>> parse_edtf('1979-08-28~') # Approximately August 28th 1979 - UncertainOrApproximate: '1979-08-28~' +```python +>>> parse_edtf('1979-08-28~') # Approximately August 28th 1979 +UncertainOrApproximate: '1979-08-28~' +``` * Unspecified dates: - >>> parse_edtf('1979-08-XX') # An unknown day in August 1979 - Unspecified: '1979-08-XX' - >>> parse_edtf('1979-XX') # Some month in 1979 - Unspecified: '1979-XX' +```python +>>> parse_edtf('1979-08-XX') # An unknown day in August 1979 +Unspecified: '1979-08-XX' +>>> parse_edtf('1979-XX') # Some month in 1979 +Unspecified: '1979-XX' +``` * Extended intervals: - >>> parse_edtf('1984-06-02?/2004-08-08~') - Level1Interval: '1984-06-02?/2004-08-08~' +```python +>>> parse_edtf('1984-06-02?/2004-08-08~') +Level1Interval: '1984-06-02?/2004-08-08~' +``` * Years exceeding four digits: - >>> parse_edtf('Y-12000') # 12000 years BCE - LongYear: 'Y-12000' +```python +>>> parse_edtf('Y-12000') # 12000 years BCE +LongYear: 'Y-12000' +``` * Season: - >>> parse_edtf('1979-22') # Summer 1979 - Season: '1979-22' +```python +>>> parse_edtf('1979-22') # Summer 1979 +Season: '1979-22' +``` ### Level 2 Extensions * Partial uncertain/approximate: - >>> parse_edtf('2004-06~-11') # year certain, month/day approximate. - PartialUncertainOrApproximate: '2004-06~-11' +```python +>>> parse_edtf('2004-06~-11') # year certain, month/day approximate. +PartialUncertainOrApproximate: '2004-06~-11' +``` * Partial unspecified: - >>> parse_edtf('1979-XX-28') # The 28th day of an uncertain month in 1979 - PartialUnspecified: '1979-XX-28' +```python +>>> parse_edtf('1979-XX-28') # The 28th day of an uncertain month in 1979 +PartialUnspecified: '1979-XX-28' +``` * One of a set: - >>> parse_edtf("[..1760-12-03,1762]") - OneOfASet: '[..1760-12-03, 1762]' +```python +>>> parse_edtf("[..1760-12-03,1762]") +OneOfASet: '[..1760-12-03, 1762]' +``` * Multiple dates: - >>> parse_edtf('{1667,1668, 1670..1672}') - MultipleDates: '{1667, 1668, 1670..1672}' +```python +>>> parse_edtf('{1667,1668, 1670..1672}') +MultipleDates: '{1667, 1668, 1670..1672}' +``` * Level 2 Extended intervals: - >>> parse_edtf('2004-06-~01/2004-06-~20') - Level2Interval: '2004-06-~01/2004-06-~20' +```python +>>> parse_edtf('2004-06-~01/2004-06-~20') +Level2Interval: '2004-06-~01/2004-06-~20' +``` * Year requiring more than 4 digits - exponential form: - >>> e = parse_edtf('Y-17E7') - ExponentialYear: 'Y-17E7' - >>> e.estimated() - -170000000 +```python +>>> e = parse_edtf('Y-17E7') +ExponentialYear: 'Y-17E7' +>>> e.estimated() +-170000000 +``` * Significant digits: - # '1950S2': some year between 1900 and 1999, estimated to be 1950 - >>> d = parse_edtf('1950S2') - Date: '1950S2' - >>> d.lower_fuzzy()[:3] - (1900, 1, 1) - >>> d.upper_fuzzy()[:3] - (1999, 12, 31) - # 'Y171010000S3': some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. - >>> l = parse_edtf('Y171010000S3') - LongYear: 'Y171010000S3' - >>> l.estimated() - 171010000 - >>> l.lower_fuzzy()[:3] - (171000000, 1, 1) - >>> l.upper_fuzzy()[:3] - (171999999, 12, 31) - # 'Y3388E2S3': some year in exponential notation between 338000 and 338999, estimated to be 338800 - >>> e = parse_edtf('Y3388E2S3') - ExponentialYear: 'Y3388E2S3S3' - >>> e.estimated() - 338800 - >>> e.lower_fuzzy()[:3] - (338000, 1, 1) - >>> e.upper_fuzzy()[:3] - (338999, 12, 31) +```python +# '1950S2': some year between 1900 and 1999, estimated to be 1950 +>>> d = parse_edtf('1950S2') +Date: '1950S2' +>>> d.lower_fuzzy()[:3] +(1900, 1, 1) +>>> d.upper_fuzzy()[:3] +(1999, 12, 31) +# 'Y171010000S3': some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. +>>> l = parse_edtf('Y171010000S3') +LongYear: 'Y171010000S3' +>>> l.estimated() +171010000 +>>> l.lower_fuzzy()[:3] +(171000000, 1, 1) +>>> l.upper_fuzzy()[:3] +(171999999, 12, 31) +# 'Y3388E2S3': some year in exponential notation between 338000 and 338999, estimated to be 338800 +>>> e = parse_edtf('Y3388E2S3') +ExponentialYear: 'Y3388E2S3S3' +>>> e.estimated() +338800 +>>> e.lower_fuzzy()[:3] +(338000, 1, 1) +>>> e.upper_fuzzy()[:3] +(338999, 12, 31) +``` ### Natural language representation - The library includes a basic English natural language parser (it's not yet smart enough to work with occasions such as 'Easter', or in other languages): - >>> from edtf import text_to_edtf - >>> text_to_edtf("circa August 1979") - '1979-08~' +```python +>>> from edtf import text_to_edtf +>>> text_to_edtf("circa August 1979") +'1979-08~' +``` Note that the result is a string, not an `ETDFObject`. The parser can parse strings such as: - 'January 12, 1940' => '1940-01-12' - '90' => '1990' #implied century - 'January 2008' => '2008-01' - 'the year 1800' => '1800' - '10/7/2008' => '2008-10-07' # in a full-specced date, assume US ordering - - # uncertain/approximate - '1860?' => '1860?' - '1862 (uncertain)' => '1862?' - 'circa Feb 1812' => '1812-02~' - 'c.1860' => '1860~' #with or without . - 'ca1860' => '1860~' - 'approx 1860' => '1860~' - 'ca. 1860s' => '186X~' - 'circa 1840s' => '184X~' - 'ca. 1860s?' => '186X?~' - 'c1800s?' => '180X?~' # with uncertainty indicators, use the decade - - # unspecified parts - 'January 12' => 'XXXX-01-12' - 'January' => 'XXXX-01' - '7/2008' => '2008-07' - 'month in 1872' => '1872-XX' - 'day in January 1872' => '1872-01-XX' - 'day in 1872' => '1872-XX-XX' - - #seasons - 'Autumn 1872' => '1872-23' - 'Fall 1872' => '1872-23' - - # before/after - 'earlier than 1928' => '/1928' - 'later than 1928' => '1928/' - 'before January 1928' => '/1928-01' - 'after about the 1920s' => '192X~/' - - #centuries - '1st century' => '00XX' - '10c' => '09XX' - '19th century?' => '18XX?' - - # just showing off now... - 'a day in about Spring 1849?' => '1849-21-XX?~' - - # simple ranges, which aren't as accurate as they could be. The parser is - limited to only picking the first year range it finds. - '1851-1852' => '1851/1852' - '1851-1852; printed 1853-1854' => '1851/1852' - '1851-52' => '1851/1852' - '1856-ca. 1865' => '1856/1865~' - '1860s-1870s' => '186X/187X' - '1920s - early 1930s' => '192X/193X' - '1938, printed 1940s-1950s' => '1938' - +```text +'January 12, 1940' => '1940-01-12' +'90' => '1990' #implied century +'January 2008' => '2008-01' +'the year 1800' => '1800' +'10/7/2008' => '2008-10-07' # in a full-specced date, assume US ordering + +# uncertain/approximate +'1860?' => '1860?' +'1862 (uncertain)' => '1862?' +'circa Feb 1812' => '1812-02~' +'c.1860' => '1860~' #with or without . +'ca1860' => '1860~' +'approx 1860' => '1860~' +'ca. 1860s' => '186X~' +'circa 1840s' => '184X~' +'ca. 1860s?' => '186X?~' +'c1800s?' => '180X?~' # with uncertainty indicators, use the decade + +# unspecified parts +'January 12' => 'XXXX-01-12' +'January' => 'XXXX-01' +'7/2008' => '2008-07' +'month in 1872' => '1872-XX' +'day in January 1872' => '1872-01-XX' +'day in 1872' => '1872-XX-XX' + +#seasons +'Autumn 1872' => '1872-23' +'Fall 1872' => '1872-23' + +# before/after +'earlier than 1928' => '/1928' +'later than 1928' => '1928/' +'before January 1928' => '/1928-01' +'after about the 1920s' => '192X~/' + +#centuries +'1st century' => '00XX' +'10c' => '09XX' +'19th century?' => '18XX?' + +# just showing off now... +'a day in about Spring 1849?' => '1849-21-XX?~' + +# simple ranges, which aren't as accurate as they could be. The parser is +limited to only picking the first year range it finds. +'1851-1852' => '1851/1852' +'1851-1852; printed 1853-1854' => '1851/1852' +'1851-52' => '1851/1852' +'1856-ca. 1865' => '1856/1865~' +'1860s-1870s' => '186X/187X' +'1920s - early 1930s' => '192X/193X' +'1938, printed 1940s-1950s' => '1938' +``` Generating natural text from an EDTF representation is a future goal. @@ -275,13 +325,10 @@ Generating natural text from an EDTF representation is a future goal. * If a natural language groups dates with a '/', it's interpreted as "or" rather than "and". The resulting EDTF text is a list bracketed by `[]` ("one of these dates") rather than `{}` (all of these dates). - ## Converting to and from Python dates - Since EDTF dates are often regions, and often imprecise, we need to use a few different Python dates, depending on the circumstance. Generally, Python dates are used for sorting and filtering, and are not displayed directly to users. - ### `struct_time` date representation Because Python's `datetime` module does not support dates out side the range 1 AD to 9999 AD we return dates as `time.struct_time` objects by default instead of the `datetime.date` or `datetime.datetime` objects you might expect. @@ -290,7 +337,8 @@ The `struct_time` representation is more difficult to work with, but can be sort If you are sure you are working with dates within the range supported by Python's `datetime` module, you can get these more convenient objects using the `edtf.struct_time_to_date` and `edtf.struct_time_to_datetime` functions. -NOTE: This library previously did return `date` and `datetime` objects from methods by default before we switched to `struct_time`. See ticket https://github.com/ixc/python-edtf/issues/26. +[!NOTE] +This library previously did return `date` and `datetime` objects from methods by default before we switched to `struct_time`. See ticket . ### `lower_strict` and `upper_strict` @@ -298,26 +346,27 @@ These dates indicate the earliest and latest dates that are __strictly__ in the In an ascending sort (most recent last), sort by `lower_strict` to get a natural sort order. In a descending sort (most recent first), sort by `upper_strict`: - >>> e = parse_edtf('1912-04~') +```python +>>> e = parse_edtf('1912-04~') - >>> e.lower_strict() # Returns struct_time - >>> time.struct_time(tm_year=1912, tm_mon=4, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=0, tm_isdst=-1) +>>> e.lower_strict() # Returns struct_time +>>> time.struct_time(tm_year=1912, tm_mon=4, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=0, tm_isdst=-1) - >>> e.lower_strict()[:3] # Show only interesting parts of struct_time - (1912, 4, 01) +>>> e.lower_strict()[:3] # Show only interesting parts of struct_time +(1912, 4, 01) - >>> from edtf import struct_time_to_date - >>> struct_time_to_date(e.lower_strict()) # Convert to date - datetime.date(1912, 4, 01) +>>> from edtf import struct_time_to_date +>>> struct_time_to_date(e.lower_strict()) # Convert to date +datetime.date(1912, 4, 01) - >>> e.upper_strict()[:3] - (1912, 4, 30) +>>> e.upper_strict()[:3] +(1912, 4, 30) - >>> struct_time_to_date(e.upper_strict()) - datetime.date(1912, 4, 30) +>>> struct_time_to_date(e.upper_strict()) +datetime.date(1912, 4, 30) +``` ### `lower_fuzzy` and `upper_fuzzy` ------------------------------------ These dates indicate the earliest and latest dates that are __possible__ in the date range, for a fairly arbitrary definition of 'possibly'. @@ -325,28 +374,34 @@ These values are useful for filtering results - i.e. testing which EDTF dates mi The fuzzy dates are derived from the strict dates, plus or minus a level of padding that depends on how precise the date specfication is. For the case of approximate or uncertain dates, we (arbitrarily) pad the ostensible range by 100% of the uncertain timescale, or by a 12 weeks in the case of seasons. That is, if a date is approximate at the month scale, it is padded by a month. If it is approximate at the year scale, it is padded by a year: - >>> e = parse_edtf('1912-04~') - >>> e.lower_fuzzy()[:3] # padding is 100% of a month - (1912, 3, 1) - >>> e.upper_fuzzy()[:3] - (1912, 5, 30) - - >>> e = parse_edtf('1912~') - >>> e.lower_fuzzy()[:3] # padding is 100% of a year - (1911, 1, 1) - >>> e.upper_fuzzy()[:3] - (1913, 12, 31) +```python +>>> e = parse_edtf('1912-04~') +>>> e.lower_fuzzy()[:3] # padding is 100% of a month +(1912, 3, 1) +>>> e.upper_fuzzy()[:3] +(1912, 5, 30) + +>>> e = parse_edtf('1912~') +>>> e.lower_fuzzy()[:3] # padding is 100% of a year +(1911, 1, 1) +>>> e.upper_fuzzy()[:3] +(1913, 12, 31) +``` One can interpret uncertain or approximate dates as 'plus or minus a [level of precision]'. If a date is both uncertain __and__ approximate, the padding is applied twice, i.e. it gets 100% * 2 padding, or 'plus or minus two [levels of precision]'. ### Qualification properties + EDTF objects support properties that provide an overview of how the object is qualified: -- `.is_uncertain (?)` -- `.is_approximate (~)` -- `.is_uncertain_and_approximate (%)` + +* `.is_uncertain (?)` +* `.is_approximate (~)` +* `.is_uncertain_and_approximate (%)` + These properties represent whether the any part of the date object is uncertain, approximate, or uncertain and approximate. For ranges, the properties are true if any part of the range (lower or upper section) is qualified as such. A date is not necessarily uncertain and approximate if it is separately both uncertain and approximate - it must have the "%" qualifier to be considered uncertain and aproximate. + ```python >>> parse_edtf("2006-06-11") Date: '2006-06-11' @@ -388,11 +443,12 @@ False ### Seasons +[!IMPORTANT] Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in `appsettings.py`. ### Comparisons -Two EDTF dates are considered equal if their unicode() representations are the same. An EDTF date is considered greater than another if its `lower_strict` value is later. +Two EDTF dates are considered equal if their `unicode()` representations are the same. An EDTF date is considered greater than another if its `lower_strict` value is later. ## Django ORM field @@ -402,55 +458,60 @@ To store a natural language value on your model, define another field, and set t When your model is saved, the `natural_text_field` value will be parsed to set the `date_edtf` value, and the underlying EDTF object will set the `_earliest` and `_latest` fields on the model to a float value representing the Julian Date. - -**WARNING**: The conversion to and from Julian Date numerical values can be inaccurate, especially for ancient dates back to thousands of years BC. Ideally Julian Date values should be used for range and ordering operations only where complete accuracy is not required. They should **not** be used for definitive storage or for display after roundtrip conversions. +[!WARNING] +The conversion to and from Julian Date numerical values can be inaccurate, especially for ancient dates back to thousands of years BC. Ideally Julian Date values should be used for range and ordering operations only where complete accuracy is not required. They should __not__ be used for definitive storage or for display after roundtrip conversions. Example usage: - from django.db import models - from edtf.fields import EDTFField - - class MyModel(models.Model): - date_display = models.CharField( - "Date of creation (display)", - blank=True, - max_length=255, - ) - date_edtf = EDTFField( - "Date of creation (EDTF)", - natural_text_field='date_display', - lower_fuzzy_field='date_earliest', - upper_fuzzy_field='date_latest', - lower_strict_field='date_sort_ascending', - upper_strict_field='date_sort_descending', - blank=True, - null=True, - ) - # use for filtering - date_earliest = models.FloatField(blank=True, null=True) - date_latest = models.FloatField(blank=True, null=True) - # use for sorting - date_sort_ascending = models.FloatField(blank=True, null=True) - date_sort_descending = models.FloatField(blank=True, null=True) - +```python +from django.db import models +from edtf.fields import EDTFField + +class MyModel(models.Model): + date_display = models.CharField( + "Date of creation (display)", + blank=True, + max_length=255, + ) + date_edtf = EDTFField( + "Date of creation (EDTF)", + natural_text_field='date_display', + lower_fuzzy_field='date_earliest', + upper_fuzzy_field='date_latest', + lower_strict_field='date_sort_ascending', + upper_strict_field='date_sort_descending', + blank=True, + null=True, + ) + # use for filtering + date_earliest = models.FloatField(blank=True, null=True) + date_latest = models.FloatField(blank=True, null=True) + # use for sorting + date_sort_ascending = models.FloatField(blank=True, null=True) + date_sort_descending = models.FloatField(blank=True, null=True) +``` Since the `EDTFField` and the `_earliest` and `_latest` field values are set automatically, you may want to make them readonly, or not visible in your model admin. ## To develop + ### Setup -- Clone the repository: `git clone https://github.com/ixc/python-edtf.git` -- Set up a virtual environment: `python3 -m venv venv` -- Install the dependencies: `pip install -r dev-requirements.txt` -- Install precommit hooks: `pre-commit install` + +* Clone the repository: `git clone https://github.com/ixc/python-edtf.git` +* Set up a virtual environment: `python3 -m venv venv` +* Install the dependencies: `pip install -r dev-requirements.txt` +* Install precommit hooks: `pre-commit install` ### Running tests -- From `python-edtf`, run the unit tests: `pytest` -- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks (published [here]( https://ixc.github.io/python-edtf/dev/bench/)) -- From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` -- To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` + +* From `python-edtf`, run the unit tests: `pytest` +* From `python-edtf`, run `pytest -m benchmark` to run the benchmarks (published [here]( https://ixc.github.io/python-edtf/dev/bench/)) +* From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` +* To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a GitHub PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` ### Linting and formatting -- Check linting: `ruff check --output-format=github --config pyproject.toml` -- Check formatting: `ruff format --check --config pyproject.toml` -- Fix formatting: `ruff format --config pyproject.toml` -- Linting and formatting checks and attempted fixes are also run as precommit hooks if you installed them. + +* Check linting: `ruff check --output-format=github --config pyproject.toml` +* Check formatting: `ruff format --check --config pyproject.toml` +* Fix formatting: `ruff format --config pyproject.toml` +* Linting and formatting checks and attempted fixes are also run as precommit hooks if you installed them. From e90db901d2bbd61260d93ec9afb33a30ca0bd432 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sun, 16 Jun 2024 16:37:44 +0200 Subject: [PATCH 095/102] update authors --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 56978fb..ef2c639 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ authors = [ { name = "Mark Finger" }, { name = "Sabine Müller" }, { name = "Cole Crawford" } + { name = "Klaus Rettinghaus" } ] maintainers = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au" } From 336e8bfb6463015fa2333d1a424134e39eb7de84 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sun, 16 Jun 2024 16:49:39 +0200 Subject: [PATCH 096/102] style(readme): add relative link to file --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b5f5bbc..4ef2a69 100644 --- a/README.md +++ b/README.md @@ -444,7 +444,7 @@ False ### Seasons [!IMPORTANT] -Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in `appsettings.py`. +Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in [`appsettings.py`](edtf/appsettings.py). ### Comparisons From ea74dafb4298697ca01cd128db7f4b34ba8e5c3c Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 17 Jun 2024 09:52:17 +1000 Subject: [PATCH 097/102] Update contributors --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ef2c639..f533477 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,10 +12,11 @@ readme = {file = "README.txt", content-type = "text/markdown"} authors = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au"}, { name = "Alastair Weakley"}, + { name = "Greg Turner"}, { name = "James Murty"}, { name = "Mark Finger" }, { name = "Sabine Müller" }, - { name = "Cole Crawford" } + { name = "Cole Crawford" }, { name = "Klaus Rettinghaus" } ] maintainers = [ From 5f09bdf7dea2739f00f1c4de6a995fc61edb5966 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Mon, 17 Jun 2024 08:59:06 +0200 Subject: [PATCH 098/102] fix alerts --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4ef2a69..2aaef05 100644 --- a/README.md +++ b/README.md @@ -337,8 +337,8 @@ The `struct_time` representation is more difficult to work with, but can be sort If you are sure you are working with dates within the range supported by Python's `datetime` module, you can get these more convenient objects using the `edtf.struct_time_to_date` and `edtf.struct_time_to_datetime` functions. -[!NOTE] -This library previously did return `date` and `datetime` objects from methods by default before we switched to `struct_time`. See ticket . +> [!NOTE] +> This library previously did return `date` and `datetime` objects from methods by default before we switched to `struct_time`. See ticket . ### `lower_strict` and `upper_strict` @@ -443,8 +443,8 @@ False ### Seasons -[!IMPORTANT] -Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in [`appsettings.py`](edtf/appsettings.py). +> [!IMPORTANT] +> Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in [`appsettings.py`](edtf/appsettings.py). ### Comparisons @@ -458,8 +458,8 @@ To store a natural language value on your model, define another field, and set t When your model is saved, the `natural_text_field` value will be parsed to set the `date_edtf` value, and the underlying EDTF object will set the `_earliest` and `_latest` fields on the model to a float value representing the Julian Date. -[!WARNING] -The conversion to and from Julian Date numerical values can be inaccurate, especially for ancient dates back to thousands of years BC. Ideally Julian Date values should be used for range and ordering operations only where complete accuracy is not required. They should __not__ be used for definitive storage or for display after roundtrip conversions. +> [!WARNING] +> The conversion to and from Julian Date numerical values can be inaccurate, especially for ancient dates back to thousands of years BC. Ideally Julian Date values should be used for range and ordering operations only where complete accuracy is not required. They should __not__ be used for definitive storage or for display after roundtrip conversions. Example usage: From 4592e9ba50e68a6fbd87939a8f0da5c711ddccb4 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Mon, 17 Jun 2024 10:56:46 +0200 Subject: [PATCH 099/102] drop six dependency --- edtf/natlang/en.py | 5 ++--- pyproject.toml | 1 - requirements.txt | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index f6eef54..f28e685 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -4,7 +4,6 @@ from datetime import datetime from dateutil.parser import ParserError, parse -from six.moves import xrange from edtf import appsettings @@ -216,7 +215,7 @@ def text_to_edtf_date(text): mentions_month = re.findall(r"\bmonth\b.+(in|during)\b", t) mentions_day = re.findall(r"\bday\b.+(in|during)\b", t) - for i in xrange(len(date1)): + for i in range(len(date1)): # if the given year could be a century (e.g. '1800s') then use # approximate/uncertain markers to decide whether we treat it as # a century or a decade. @@ -238,7 +237,7 @@ def text_to_edtf_date(text): # strip off unknown chars from end of string - except the first 4 - for i in reversed(xrange(len(result))): + for i in reversed(range(len(result))): if result[i] not in ("X", "-"): smallest_length = 4 diff --git a/pyproject.toml b/pyproject.toml index f533477..860741e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,6 @@ version = "5.0.0" dependencies = [ "python-dateutil", "pyparsing", - "six" ] description = "Python implementation of Library of Congress EDTF (Extended Date Time Format) specification" requires-python = ">=3.8" diff --git a/requirements.txt b/requirements.txt index 0ab3a7d..1656e27 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ python-dateutil pyparsing -six From 2dee3d08e41e02fc1c1f6d9c777f7b1800c7457c Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 27 Jun 2024 16:52:22 +1000 Subject: [PATCH 100/102] WIP adding checks for edtf field aliases #62 --- edtf/fields.py | 42 +++++++++++++++++++++ edtf_django_tests/edtf_integration/tests.py | 35 +++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/edtf/fields.py b/edtf/fields.py index 7dba5d4..9cf6b27 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -1,5 +1,6 @@ import pickle +from django.core import checks from django.core.exceptions import FieldDoesNotExist from django.db import models from django.db.models import signals @@ -188,3 +189,44 @@ def contribute_to_class(self, cls, name, **kwargs): # Only run post-initialization values update on non-abstract models if not cls._meta.abstract: signals.post_init.connect(self.update_values, sender=cls) + + def check(self, **kwargs): + errors = super().check(**kwargs) + + for field_alias in [ + "direct_input_field", + "lower_fuzzy_field", + "lower_strict_field", + "natural_text_field", + "upper_fuzzy_field", + "upper_strict_field", + ]: + errors.extend(self._check_field(field_alias)) + + return errors + + def _check_field(self, field_alias): + field_name = getattr(self, field_alias, None) + + # Check if the alias value has been provided in the field definition + if not field_name: + return [ + checks.Error( + f"You must specify a '{field_alias}' for EDTFField", + hint=None, + obj=self, + ) + ] + + # Check if the field that is referenced actually exists + try: + self.model._meta.get_field(field_name) + except FieldDoesNotExist: + return [ + checks.Error( + f"'{self.name}' refers to a non-existent '{field_alias}' field: '{field_name}'", + hint=None, + obj=self, + ) + ] + return [] diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index 493d0d2..da5bb83 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -122,3 +122,38 @@ def test_comparison(self): self.event2.date_edtf, "2019-11 is less than 2021-05-06", ) + + def test_field_related_field_specification(self): + edtf_field_on_model = TestEvent._meta.get_field("date_edtf") + required_fields = ( + "direct_input_field", + "lower_fuzzy_field", + "lower_strict_field", + "natural_text_field", + "upper_fuzzy_field", + "upper_strict_field", + ) + for field_alias in required_fields: + # Remove the alias from the edtf_field + orig_value = getattr(edtf_field_on_model, field_alias) + setattr(edtf_field_on_model, field_alias, None) + errors = edtf_field_on_model.check() + self.assertEqual(len(errors), 1) + self.assertTrue(field_alias in errors[0].msg) + # Replace the field so later tests can still work + setattr(edtf_field_on_model, field_alias, orig_value) + + # TODO: this is not working yet + # # Remove the field from the model + # referenced_field_name = getattr(edtf_field_on_model, field_alias) + # orig_fields = TestEvent._meta.local_fields + # TestEvent._meta.local_fields = [ # type: ignore + # field + # for field in TestEvent._meta.local_fields + # if field.name != referenced_field_name + # ] + # errors = TestEvent._meta.get_field("date_edtf").check() + # self.assertEqual(len(errors), 1) + # self.assertTrue(referenced_field_name in errors[0].msg) + # # Replace the field so later tests can still work + # TestEvent._meta.local_fields = orig_fields From 1a5ebd53e0f78c17f4e037569a089a082cf6b8fb Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 27 Jun 2024 21:35:53 +1000 Subject: [PATCH 101/102] Tests for aliases that point to non-existent fields #62 --- edtf/fields.py | 2 ++ edtf_django_tests/edtf_integration/tests.py | 28 +++++++++------------ 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index 9cf6b27..07a9744 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -215,6 +215,7 @@ def _check_field(self, field_alias): f"You must specify a '{field_alias}' for EDTFField", hint=None, obj=self, + id="python-edtf.EDTF01", ) ] @@ -227,6 +228,7 @@ def _check_field(self, field_alias): f"'{self.name}' refers to a non-existent '{field_alias}' field: '{field_name}'", hint=None, obj=self, + id="python-edtf.EDTF02", ) ] return [] diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index da5bb83..aa1bf34 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -140,20 +140,16 @@ def test_field_related_field_specification(self): errors = edtf_field_on_model.check() self.assertEqual(len(errors), 1) self.assertTrue(field_alias in errors[0].msg) - # Replace the field so later tests can still work - setattr(edtf_field_on_model, field_alias, orig_value) + # Should be an 'alias not specified' error + self.assertEqual(errors[0].id, "python-edtf.EDTF01") + + # Point the alias to a non-existent field + setattr(edtf_field_on_model, field_alias, "fake") + errors = edtf_field_on_model.check() + self.assertEqual(len(errors), 1) + self.assertTrue(field_alias in errors[0].msg) + # Should be a 'non-eixstent field' error + self.assertEqual(errors[0].id, "python-edtf.EDTF02") - # TODO: this is not working yet - # # Remove the field from the model - # referenced_field_name = getattr(edtf_field_on_model, field_alias) - # orig_fields = TestEvent._meta.local_fields - # TestEvent._meta.local_fields = [ # type: ignore - # field - # for field in TestEvent._meta.local_fields - # if field.name != referenced_field_name - # ] - # errors = TestEvent._meta.get_field("date_edtf").check() - # self.assertEqual(len(errors), 1) - # self.assertTrue(referenced_field_name in errors[0].msg) - # # Replace the field so later tests can still work - # TestEvent._meta.local_fields = orig_fields + # Repair the field so later tests can still work + setattr(edtf_field_on_model, field_alias, orig_value) From d313a9d2ed57e5f88871da51f1b48ea875f244f8 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Sun, 7 Jul 2024 17:19:23 +1000 Subject: [PATCH 102/102] Note on benchmarks location --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 2aaef05..7542c36 100644 --- a/README.md +++ b/README.md @@ -515,3 +515,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut * Check formatting: `ruff format --check --config pyproject.toml` * Fix formatting: `ruff format --config pyproject.toml` * Linting and formatting checks and attempted fixes are also run as precommit hooks if you installed them. + +### Coverage and benchmraks + +Coverage reports are generated and added as comments to commits, and also visible in the actions log. Benchmarks are run on pull requests and are published [here]( https://ixc.github.io/python-edtf/dev/bench/) and also visible in the actions log.