From 01f7319229e771b8c938647b06840b1f6ddeaa3c Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Sun, 28 May 2017 23:05:47 +0300 Subject: [PATCH 01/14] tried to add tests for validation.py --- tests/test_languages.py | 197 +++++++++++++++++++++++++++++++++++----- 1 file changed, 175 insertions(+), 22 deletions(-) diff --git a/tests/test_languages.py b/tests/test_languages.py index 4345c0865..998ce409e 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -7,6 +7,7 @@ from dateparser.languages.detection import AutoDetectLanguage, ExactLanguages from dateparser.conf import settings from dateparser.utils import normalize_unicode +from dateparser.languages.validation import LanguageValidator from tests import BaseTestCase @@ -47,7 +48,7 @@ def setUp(self): # German param('de', "29. Juni 2007", "29. june 2007"), param('de', "Montag 5 Januar, 2015", "monday 5 january 2015"), - #Hungarian + # Hungarian param('hu', '2016 augusztus 11.', '2016 august 11.'), param('hu', '2016-08-13 szombat 10:21', '2016-08-13 saturday 10:21'), param('hu', '2016. augusztus 14. vasárnap 10:21', '2016. august 14. sunday 10:21'), @@ -175,10 +176,10 @@ def setUp(self): param('bn', "সেপ্টেম্বর 03 2014", "september 03 2014"), param('bn', "শুক্রবার, 03 সেপ্টেম্বর 2014", "friday 03 september 2014"), - #Hindi - param('hi', 'सोमवार 13 जून 1998','monday 13 june 1998'), - param('hi', 'मंगल 16 1786 12:18','tuesday 16 1786 12:18'), - param('hi','शनि 11 अप्रैल 2002 03:09','saturday 11 april 2002 03:09'), + # Hindi + param('hi', 'सोमवार 13 जून 1998', 'monday 13 june 1998'), + param('hi', 'मंगल 16 1786 12:18', 'tuesday 16 1786 12:18'), + param('hi', 'शनि 11 अप्रैल 2002 03:09', 'saturday 11 april 2002 03:09'), # Swedish param('sv', "Sept 03 2014", "september 03 2014"), @@ -406,11 +407,11 @@ def test_translation(self, shortname, datetime_string, expected_translation): # Hindi param('hi', "१ सप्ताह", "1 week"), param('hi', "२४ मिनट पहले", "24 minute ago"), - param('hi', "पांच वर्ष","5 year"), - param('hi', "५३ सप्ताह बाद","53 week in"), - param('hi', "सन् १९२०"," 1920"), - param('hi',"आठ पूर्वाह्न","8 am"), - param('hi',"बारह सेकंड पूर्व","12 second ago"), + param('hi', "पांच वर्ष", "5 year"), + param('hi', "५३ सप्ताह बाद", "53 week in"), + param('hi', "सन् १९२०", " 1920"), + param('hi', "आठ पूर्वाह्न", "8 am"), + param('hi', "बारह सेकंड पूर्व", "12 second ago"), # Swedish param('sv', "igår", "1 day"), param('sv', "idag", "0 day"), @@ -447,21 +448,29 @@ def test_freshness_translation(self, shortname, datetime_string, expected_transl param('tr', "2 saat önce", ["2", " ", "saat", " ", "önce"]), param('fr', "il ya environ 23 heures'", ["il ya", " ", "environ", " ", "23", " ", "heures"]), param('de', "Gestern um 04:41", ['Gestern ', 'um', ' ', '04', ':', '41']), - param('de', "Donnerstag, 8. Januar 2015 um 07:17", ['Donnerstag', ' ', '8', '.', ' ', 'Januar', ' ', '2015', ' ', 'um', ' ', '07', ':', '17']), - param('ru', "8 января 2015 г. в 9:10", ['8', ' ', 'января', ' ', '2015', ' ', 'г.', ' ', 'в', ' ', '9', ':', '10']), + param('de', "Donnerstag, 8. Januar 2015 um 07:17", + ['Donnerstag', ' ', '8', '.', ' ', 'Januar', ' ', '2015', ' ', 'um', ' ', '07', ':', '17']), + param('ru', "8 января 2015 г. в 9:10", + ['8', ' ', 'января', ' ', '2015', ' ', 'г.', ' ', 'в', ' ', '9', ':', '10']), param('cs', "6. leden 2015 v 22:29", ['6', '.', ' ', 'leden', ' ', '2015', ' ', 'v', ' ', '22', ':', '29']), - param('nl', "woensdag 7 januari 2015 om 21:32", ['woensdag', ' ', '7', ' ', 'januari', ' ', '2015', ' ', 'om', ' ', '21', ':', '32']), + param('nl', "woensdag 7 januari 2015 om 21:32", + ['woensdag', ' ', '7', ' ', 'januari', ' ', '2015', ' ', 'om', ' ', '21', ':', '32']), param('ro', "8 Ianuarie 2015 la 13:33", ['8', ' ', 'Ianuarie', ' ', '2015', ' ', 'la', ' ', '13', ':', '33']), - param('ar', "8 يناير، 2015، الساعة 10:01 صباحاً", ['8', ' ', 'يناير', ' ', '2015', 'الساعة', ' ', '10', ':', '01', ' صباحاً']), - param('th', "8 มกราคม 2015 เวลา 12:22 น.", ['8', ' ', 'มกราคม', ' ', '2015', ' ', 'เวลา', ' ', '12', ':', '22', ' ', 'น.']), + param('ar', "8 يناير، 2015، الساعة 10:01 صباحاً", + ['8', ' ', 'يناير', ' ', '2015', 'الساعة', ' ', '10', ':', '01', ' صباحاً']), + param('th', "8 มกราคม 2015 เวลา 12:22 น.", + ['8', ' ', 'มกราคม', ' ', '2015', ' ', 'เวลา', ' ', '12', ':', '22', ' ', 'น.']), param('pl', "8 stycznia 2015 o 10:19", ['8', ' ', 'stycznia', ' ', '2015', ' ', 'o', ' ', '10', ':', '19']), - param('vi', "Thứ Năm, ngày 8 tháng 1 năm 2015", ["Thứ Năm", " ", "ngày", " ", "8", " tháng ", "1", " ", "năm", " ", "2015"]), + param('vi', "Thứ Năm, ngày 8 tháng 1 năm 2015", + ["Thứ Năm", " ", "ngày", " ", "8", " tháng ", "1", " ", "năm", " ", "2015"]), param('tl', "Biyernes Hulyo 3 2015", ["Biyernes", " ", "Hulyo", " ", "3", " ", "2015"]), - param('be', "3 верасня 2015 г. у 11:10", ['3', ' ', 'верасня', ' ', '2015', ' ', 'г.', ' ', 'у', ' ', '11', ':', '10']), + param('be', "3 верасня 2015 г. у 11:10", + ['3', ' ', 'верасня', ' ', '2015', ' ', 'г.', ' ', 'у', ' ', '11', ':', '10']), param('id', "3 Juni 2015 13:05:46", ['3', ' ', 'Juni', ' ', '2015', ' ', '13', ':', '05', ':', '46']), - param('he', "ה-21 לאוקטובר 2016 ב-15:00", ['ה-', '21', ' ', 'לאוקטובר', ' ', '2016', ' ', 'ב-', '15', ':', '00']), + param('he', "ה-21 לאוקטובר 2016 ב-15:00", + ['ה-', '21', ' ', 'לאוקטובר', ' ', '2016', ' ', 'ב-', '15', ':', '00']), param('bn', "3 জুন 2015 13:05:46", ['3', ' ', 'জুন', ' ', '2015', ' ', '13', ':', '05', ':', '46']), - param('hi', "13 मार्च 2013 11:15:09",['13',' ','मार्च',' ','2013',' ','11',':','15',':','09']), + param('hi', "13 मार्च 2013 11:15:09", ['13', ' ', 'मार्च', ' ', '2013', ' ', '11', ':', '15', ':', '09']), ]) def test_split(self, shortname, datetime_string, expected_tokens): self.given_bundled_language(shortname) @@ -553,7 +562,7 @@ def setUp(self): @parameterized.expand([ param("1 january 2015", 'en'), - ]) + ]) def test_valid_dates_detected(self, datetime_string, expected_language): self.given_languages(expected_language) self.given_detector() @@ -602,7 +611,8 @@ def given_detector(self): raise NotImplementedError def when_searching_for_first_applicable_language(self): - for language in self.detector.iterate_applicable_languages(self.datetime_string, modify=True, settings=settings): + for language in self.detector.iterate_applicable_languages(self.datetime_string, modify=True, + settings=settings): self.detected_language = language break else: @@ -642,7 +652,8 @@ def given_detector(self): self.detector = ExactLanguages(languages=self.known_languages) def when_using_exact_languages(self): - self.exact_languages = self.detector.iterate_applicable_languages(self.datetime_string, modify=True, settings=settings) + self.exact_languages = self.detector.iterate_applicable_languages(self.datetime_string, modify=True, + settings=settings) def then_exact_languages_were_filtered(self, shortnames): self.assertEqual(set(shortnames), set([lang.shortname for lang in self.exact_languages])) @@ -663,3 +674,145 @@ class TestAutoDetectLanguageDetectorWithoutRedetection(BaseAutoDetectLanguageDet class TestAutoDetectLanguageDetectorWithRedetection(BaseAutoDetectLanguageDetectorTestCase): __test__ = True allow_redetection = True + + +class TestLanguageValidatorWhenInvalid(BaseTestCase): + def setUp(self): + super(TestLanguageValidatorWhenInvalid, self).setUp() + self.validator = LanguageValidator + + @parameterized.expand([ + param('en', 'string instead of dict'), + ]) + def test_validate_info_when_invalid_tipe(self, lang_id, lang_info): + result = self.validator.validate_info(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {}), + param('en', {'name': 22}), + param('en', {'name': ''}), + ]) + def test_validate_name_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_name(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {'no_word_spacing': 'string instead of bool'}), + ]) + def test_validate_word_spacing_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_word_spacing(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {'skip': 'string instead of list'}), + param('en', {'skip': ['']}), + ]) + def test_validate_skip_list_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_skip_list(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {}), + + ]) + def test_validate_skip_list_when_abscent(self, lang_id, lang_info): + result = self.validator._validate_skip_list(lang_id, lang_info) + self.assertTrue(result) + + @parameterized.expand([ + param('en', {'pertain': 'string instead of list'}), + param('en', {'pertain': ['']}), + ]) + def test_validate_pertain_list_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_pertain_list(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {}), + param('en', + {'monday': 1, 'tuesday': 2, 'wednesday': 3, 'thursday': 4, 'friday': 5, 'saturday': 6, 'sunday': 7}), + param('en', {'monday': [1], 'tuesday': [2], 'wednesday': [3], 'thursday': [4], 'friday': [5], + 'saturday': [6], 'sunday': [7]}), + ]) + def test_validate_weekdays_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_weekdays(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {}), + param('en', + {'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6, 'july': 7, + 'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12}), + param('en', + {'january': [1], 'february': [2], 'march': [3], 'april': [4], 'may': [5], 'june': [6], 'july': [7], + 'august': [8], 'september': [9], 'october': [10], 'november': [11], 'december': [12]}), + ]) + def test_validate_months_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_months(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {}), + param('en', + {'year': 1, 'month': 2, 'week': 3, 'day': 4, 'hour': 5, 'minute': 6, 'second': 7}), + param('en', + {'year': [1], 'month': [2], 'week': [3], 'day': [4], 'hour': [5], 'minute': [6], 'second': [7]}), + ]) + def test_validate_units_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_units(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {}), + param('en', + {'ago': 1}), + param('en', + {'ago': []}), + param('en', + {'ago': ['']}), + ]) + def test_validate_other_words_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_other_words(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {}), + + ]) + def test_validate_simplifications_when_abscent(self, lang_id, lang_info): + result = self.validator._validate_simplifications(lang_id, lang_info) + self.assertTrue(result) + + @parameterized.expand([ + param('en', + {'simplifications': 'string instead of list'}), + param('en', + {'simplifications': [{}]}), + param('en', + {'simplifications': [{28: []}]}), + param('en', + {'simplifications': [{'simplification': []}]}), + # groups were not used + param('en', + {'simplifications': [{'(\d+)\s*hr(s?)\g<(.+?)>': r'\1 hour\2'}]}), + # + param('en', + {'simplifications': [{'(one)(two)(three)': r'\1\3\2\4'}]}), + param('en', + {'simplifications': [{r'(?P\w+)(?P\w+)': '\\g'}]}), + param('en', + {'simplifications': [{r'(?P\w+)': '\\g(.*?)'}]}), + + ]) + def test_validate_simplifications_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_simplifications(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {'invalid_key': ''}), + + ]) + def test_validate_extra_keys_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_extra_keys(lang_id, lang_info) + self.assertFalse(result) From 3170fe77706218fb825346a4a2d84c6652f76a59 Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Sun, 28 May 2017 23:39:49 +0300 Subject: [PATCH 02/14] changes to stop failing tests --- tests/test_languages.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_languages.py b/tests/test_languages.py index 998ce409e..76c3606c0 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -720,13 +720,13 @@ def test_validate_skip_list_when_abscent(self, lang_id, lang_info): result = self.validator._validate_skip_list(lang_id, lang_info) self.assertTrue(result) - @parameterized.expand([ - param('en', {'pertain': 'string instead of list'}), - param('en', {'pertain': ['']}), - ]) - def test_validate_pertain_list_when_invalid(self, lang_id, lang_info): - result = self.validator._validate_pertain_list(lang_id, lang_info) - self.assertFalse(result) + # @parameterized.expand([ + # param('en', {'pertain': 'string instead of list'}), + # param('en', {'pertain': ['']}), + # ]) + # def test_validate_pertain_list_when_invalid(self, lang_id, lang_info): + # result = self.validator._validate_pertain_list(lang_id, lang_info) + # self.assertFalse(result) @parameterized.expand([ param('en', {}), From 0759f6027a6773c633289157310806e01d60f44d Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Wed, 31 May 2017 19:49:22 +0300 Subject: [PATCH 03/14] some changes in test for validation.py --- tests/test_languages.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tests/test_languages.py b/tests/test_languages.py index e1f94df4d..57ceea1d4 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -718,17 +718,25 @@ def test_validate_skip_list_when_invalid(self, lang_id, lang_info): param('en', {}), ]) - def test_validate_skip_list_when_abscent(self, lang_id, lang_info): + def test_validate_skip_list_when_absent(self, lang_id, lang_info): result = self.validator._validate_skip_list(lang_id, lang_info) self.assertTrue(result) - # @parameterized.expand([ - # param('en', {'pertain': 'string instead of list'}), - # param('en', {'pertain': ['']}), - # ]) - # def test_validate_pertain_list_when_invalid(self, lang_id, lang_info): - # result = self.validator._validate_pertain_list(lang_id, lang_info) - # self.assertFalse(result) + @parameterized.expand([ + param('en', {'pertain': 'it is a string', 'skip': ['']}), + param('en', {'pertain': [''], 'skip': 'it is a string'}), + ]) + def test_validate_pertain_list_when_invalid(self, lang_id, lang_info): + result = self.validator._validate_pertain_list(lang_id, lang_info) + self.assertFalse(result) + + @parameterized.expand([ + param('en', {}), + + ]) + def test_validate_pertain_list_when_absent(self, lang_id, lang_info): + result = self.validator._validate_pertain_list(lang_id, lang_info) + self.assertTrue(result) @parameterized.expand([ param('en', {}), @@ -782,7 +790,7 @@ def test_validate_other_words_when_invalid(self, lang_id, lang_info): param('en', {}), ]) - def test_validate_simplifications_when_abscent(self, lang_id, lang_info): + def test_validate_simplifications_when_absent(self, lang_id, lang_info): result = self.validator._validate_simplifications(lang_id, lang_info) self.assertTrue(result) @@ -795,10 +803,8 @@ def test_validate_simplifications_when_abscent(self, lang_id, lang_info): {'simplifications': [{28: []}]}), param('en', {'simplifications': [{'simplification': []}]}), - # groups were not used param('en', {'simplifications': [{'(\d+)\s*hr(s?)\g<(.+?)>': r'\1 hour\2'}]}), - # param('en', {'simplifications': [{'(one)(two)(three)': r'\1\3\2\4'}]}), param('en', From 392381c8c923e9a715265e1c667040b85fd051fd Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Thu, 1 Jun 2017 01:15:31 +0300 Subject: [PATCH 04/14] some tests for utils/__init__.py --- tests/test_utils.py | 48 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index a60ece5fd..09ce96e5f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,9 +5,9 @@ from nose_parameterized import parameterized, param from dateparser.utils import ( find_date_separator, localize_timezone, apply_timezone, - apply_timezone_from_settings + apply_timezone_from_settings, registry, increase_regex_replacements_group_positions ) - +from pytz import UnknownTimeZoneError, utc from dateparser.conf import settings @@ -26,6 +26,14 @@ def when_date_seperator_is_parsed(self): def then_date_seperator_is(self, sep): self.assertEqual(self.result, sep) + @staticmethod + def make_class_without_get_keys(): + class SomeClass: + def __init__(self): + pass + some_class = SomeClass + return some_class + @parameterized.expand([ param(date_format=fmt.format(sep=sep), expected_sep=sep) for (fmt, sep) in itertools.product( @@ -40,11 +48,26 @@ def test_separator_extraction(self, date_format, expected_sep): @parameterized.expand([ param(datetime(2015, 12, 12), timezone='UTC', zone='UTC'), param(datetime(2015, 12, 12), timezone='Asia/Karachi', zone='Asia/Karachi'), + param(datetime(2015, 12, 12, tzinfo=utc), timezone='UTC', zone='UTC'), ]) def test_localize_timezone_function(self, date, timezone, zone): tzaware_dt = localize_timezone(date, timezone) self.assertEqual(tzaware_dt.tzinfo.zone, zone) + @parameterized.expand([ + param(datetime(2015, 12, 12), timezone='UTB'), + param(datetime(2015, 12, 12), timezone='Asia/Karach'), + ]) + def test_localize_timezone_function_raise_error(self, date, timezone): + self.assertRaises(UnknownTimeZoneError, localize_timezone, date, timezone) + + @parameterized.expand([ + param(datetime(2015, 12, 12), timezone='UTC+3', zone='UTC\+03:00'), + ]) + def test_localize_timezone_function_exception(self, date, timezone, zone): + tzaware_dt = localize_timezone(date, timezone) + self.assertEqual(tzaware_dt.tzinfo._StaticTzInfo__name, zone) + @parameterized.expand([ param(datetime(2015, 12, 12, 10, 12), timezone='Asia/Karachi', expected=datetime(2015, 12, 12, 15, 12)), param(datetime(2015, 12, 12, 10, 12), timezone='-0500', expected=datetime(2015, 12, 12, 5, 12)), @@ -62,6 +85,15 @@ def test_apply_timezone_from_settings_function(self, date, timezone, expected): result = apply_timezone_from_settings(date, settings.replace(**{'TO_TIMEZONE': timezone, 'TIMEZONE': 'UTC'})) self.assertEqual(expected, result) + @parameterized.expand([ + param(datetime(2015, 12, 12, 10, 12), + expected=datetime(2015, 12, 12, 10, 12)), + + ]) + def test_apply_timezone_from_settings_function_none_settings(self, date, expected): + result = apply_timezone_from_settings(date, None) + self.assertEqual(expected, result) + @parameterized.expand([ param(datetime(2015, 12, 12, 10, 12),), param(datetime(2015, 12, 12, 10, 12),), @@ -69,3 +101,15 @@ def test_apply_timezone_from_settings_function(self, date, timezone, expected): def test_apply_timezone_from_settings_function_should_return_tz(self, date): result = apply_timezone_from_settings(date, settings.replace(**{'RETURN_AS_TIMEZONE_AWARE': True})) self.assertTrue(bool(result.tzinfo)) + + def test_registry_when_get_keys_not_implemented(self): + cl = self.make_class_without_get_keys() + self.assertRaises(NotImplementedError, registry, cl) + + @parameterized.expand([ + param(r'\1\g<2>text', 1, r'\2\g<3>text'), + ]) + def test_increase_regex_replacements_group_positions(self, repl, incr, expected): + result = increase_regex_replacements_group_positions(repl, incr) + self.assertTrue(result, expected) + From d98eab08d60aaba31b12fcbd7b52bb4249fb0ede Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Thu, 1 Jun 2017 03:07:13 +0300 Subject: [PATCH 05/14] test for utils/strptime.py --- tests/test_utils_strptime.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_utils_strptime.py b/tests/test_utils_strptime.py index 6ce377c1a..e119bbb83 100644 --- a/tests/test_utils_strptime.py +++ b/tests/test_utils_strptime.py @@ -73,7 +73,15 @@ def test_parsing_date_should_fail_using_datetime_strptime_if_locale_is_non_engli param('12 Dec 10 10:30:55.000011', '%d %b %y %H:%M:%S.%f', expected=datetime(2010, 12, 12, 10, 30, 55, 11)), param('12 Dec 10 10:30:55.000111', '%d %b %y %H:%M:%S.%f', expected=datetime(2010, 12, 12, 10, 30, 55, 111)), param('12 Feb 2016 11:41:23', '%d %b %Y %I:%M:%S', expected=datetime(2016, 2, 12, 11, 41, 23)), + ]) def test_microseconds_are_parsed_correctly(self, date_string, fmt, expected): self.when_date_string_is_parsed(date_string, fmt) self.then_date_object_is(expected) + + @parameterized.expand([ + param('11 Dec 10 10:30:5335.999999', '%y %b %S %H:%M:%Y.%f', expected=datetime(5335, 12, 1, 10, 30, 10)), + ]) + def test_not_parsing_microseconds_attribute_error(self, datestring, fmt, expected): + self.assertEqual(strptime(datestring, fmt), expected) + From 212a3edc5d874bbae46652f9d310d1af1b886d39 Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Thu, 1 Jun 2017 03:16:20 +0300 Subject: [PATCH 06/14] change for better coverage --- tests/test_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 09ce96e5f..48e2452a5 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -29,8 +29,9 @@ def then_date_seperator_is(self, sep): @staticmethod def make_class_without_get_keys(): class SomeClass: - def __init__(self): - pass + pass + # def __init__(self): + # var = 1 == 1 some_class = SomeClass return some_class From 0ae6ac1811b8e8767d469cf5a3c5651bac80fabd Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Thu, 1 Jun 2017 03:16:59 +0300 Subject: [PATCH 07/14] change for better coverage --- tests/test_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 48e2452a5..0e56cf07e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -30,8 +30,6 @@ def then_date_seperator_is(self, sep): def make_class_without_get_keys(): class SomeClass: pass - # def __init__(self): - # var = 1 == 1 some_class = SomeClass return some_class From 9f52b0ed540b1e146e729fa2a0d1ad4be23cac95 Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Thu, 1 Jun 2017 03:30:44 +0300 Subject: [PATCH 08/14] changes to pass tests --- tests/test_utils_strptime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils_strptime.py b/tests/test_utils_strptime.py index e119bbb83..67bbd9dcc 100644 --- a/tests/test_utils_strptime.py +++ b/tests/test_utils_strptime.py @@ -80,7 +80,7 @@ def test_microseconds_are_parsed_correctly(self, date_string, fmt, expected): self.then_date_object_is(expected) @parameterized.expand([ - param('11 Dec 10 10:30:5335.999999', '%y %b %S %H:%M:%Y.%f', expected=datetime(5335, 12, 1, 10, 30, 10)), + param('11 Dec 10 10:30:2011.999999', '%y %b %S %H:%M:%Y.%f', expected=datetime(2011, 12, 1, 10, 30, 10)), ]) def test_not_parsing_microseconds_attribute_error(self, datestring, fmt, expected): self.assertEqual(strptime(datestring, fmt), expected) From b3128398a36a25e7996410b93535d9c3f3db94e9 Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Thu, 1 Jun 2017 22:48:20 +0300 Subject: [PATCH 09/14] added tests for logger messages --- tests/test_languages.py | 133 +++++++++++++++++++++++++++------------- 1 file changed, 91 insertions(+), 42 deletions(-) diff --git a/tests/test_languages.py b/tests/test_languages.py index 57ceea1d4..0625c270f 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -8,6 +8,8 @@ from dateparser.conf import settings from dateparser.utils import normalize_unicode from dateparser.languages.validation import LanguageValidator +from io import StringIO +import logging from tests import BaseTestCase @@ -682,36 +684,52 @@ class TestLanguageValidatorWhenInvalid(BaseTestCase): def setUp(self): super(TestLanguageValidatorWhenInvalid, self).setUp() self.validator = LanguageValidator + self.captured_logs = StringIO() + self.validator.get_logger() + self.sh = logging.StreamHandler(self.captured_logs) + self.validator.logger.addHandler(self.sh) + self.log_list = self.captured_logs.getvalue().split('\n')[0] + + def get_log_str(self): + return self.captured_logs.getvalue().split('\n')[0] @parameterized.expand([ - param('en', 'string instead of dict'), + param('en', 'string instead of dict', log_msg="Language 'en' info expected to be dict, " + "but have got str", ), ]) - def test_validate_info_when_invalid_tipe(self, lang_id, lang_info): + def test_validate_info_when_invalid_type(self, lang_id, lang_info, log_msg): result = self.validator.validate_info(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) @parameterized.expand([ - param('en', {}), - param('en', {'name': 22}), - param('en', {'name': ''}), + param('en', {}, log_msg="Language 'en' does not have a name"), + param('en', {'name': 22}, log_msg="Language 'en' does not have a name"), + param('en', {'name': ''}, log_msg="Language 'en' does not have a name"), ]) - def test_validate_name_when_invalid(self, lang_id, lang_info): + def test_validate_name_when_invalid(self, lang_id, lang_info, log_msg): result = self.validator._validate_name(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) @parameterized.expand([ - param('en', {'no_word_spacing': 'string instead of bool'}), + param('en', {'no_word_spacing': 'string instead of bool'}, + log_msg="Invalid 'no_word_spacing' value 'string instead of bool' for 'en' language: ""expected boolean"), ]) - def test_validate_word_spacing_when_invalid(self, lang_id, lang_info): + def test_validate_word_spacing_when_invalid(self, lang_id, lang_info, log_msg): result = self.validator._validate_word_spacing(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) @parameterized.expand([ - param('en', {'skip': 'string instead of list'}), - param('en', {'skip': ['']}), + param('en', {'skip': 'string instead of list'}, log_msg="Invalid 'skip' list for 'en' language: " + "expected list type but have got str"), + param('en', {'skip': ['']}, log_msg="Invalid 'skip' token '' for 'en' language: " + "expected not empty string"), ]) - def test_validate_skip_list_when_invalid(self, lang_id, lang_info): + def test_validate_skip_list_when_invalid(self, lang_id, lang_info, log_msg): result = self.validator._validate_skip_list(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) @parameterized.expand([ @@ -723,11 +741,14 @@ def test_validate_skip_list_when_absent(self, lang_id, lang_info): self.assertTrue(result) @parameterized.expand([ - param('en', {'pertain': 'it is a string', 'skip': ['']}), - param('en', {'pertain': [''], 'skip': 'it is a string'}), + param('en', {'pertain': 'it is a string', 'skip': ['']}, + log_msg="Invalid 'pertain' token '' for 'en' language: expected not empty string"), + param('en', {'pertain': [''], 'skip': 'it is a string'}, + log_msg="Invalid 'pertain' list for 'en' language: expected list type but have got str"), ]) - def test_validate_pertain_list_when_invalid(self, lang_id, lang_info): + def test_validate_pertain_list_when_invalid(self, lang_id, lang_info, log_msg): result = self.validator._validate_pertain_list(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) @parameterized.expand([ @@ -739,51 +760,62 @@ def test_validate_pertain_list_when_absent(self, lang_id, lang_info): self.assertTrue(result) @parameterized.expand([ - param('en', {}), + param('en', {}, log_msg="No translations for 'monday' provided for 'en' language"), param('en', - {'monday': 1, 'tuesday': 2, 'wednesday': 3, 'thursday': 4, 'friday': 5, 'saturday': 6, 'sunday': 7}), + {'monday': 1, 'tuesday': 2, 'wednesday': 3, 'thursday': 4, 'friday': 5, 'saturday': 6, 'sunday': 7}, + log_msg="Invalid 'monday' translations list for 'en' language: expected list type but have got int"), param('en', {'monday': [1], 'tuesday': [2], 'wednesday': [3], 'thursday': [4], 'friday': [5], - 'saturday': [6], 'sunday': [7]}), + 'saturday': [6], 'sunday': [7]}, + log_msg="Invalid 'monday' translation 1 for 'en' language: expected not empty string"), ]) - def test_validate_weekdays_when_invalid(self, lang_id, lang_info): + def test_validate_weekdays_when_invalid(self, lang_id, lang_info, log_msg): result = self.validator._validate_weekdays(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) @parameterized.expand([ - param('en', {}), + param('en', {}, log_msg="No translations for 'january' provided for 'en' language"), param('en', {'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6, 'july': 7, - 'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12}), + 'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12}, + log_msg="Invalid 'january' translations list for 'en' language: expected list type but have got int"), param('en', {'january': [1], 'february': [2], 'march': [3], 'april': [4], 'may': [5], 'june': [6], 'july': [7], - 'august': [8], 'september': [9], 'october': [10], 'november': [11], 'december': [12]}), + 'august': [8], 'september': [9], 'october': [10], 'november': [11], 'december': [12]}, + log_msg="Invalid 'january' translation 1 for 'en' language: expected not empty string"), ]) - def test_validate_months_when_invalid(self, lang_id, lang_info): + def test_validate_months_when_invalid(self, lang_id, lang_info, log_msg): result = self.validator._validate_months(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) @parameterized.expand([ - param('en', {}), + param('en', {}, log_msg="No translations for 'year' provided for 'en' language"), param('en', - {'year': 1, 'month': 2, 'week': 3, 'day': 4, 'hour': 5, 'minute': 6, 'second': 7}), + {'year': 1, 'month': 2, 'week': 3, 'day': 4, 'hour': 5, 'minute': 6, 'second': 7}, + log_msg="Invalid 'year' translations list for 'en' language: expected list type but have got int"), param('en', - {'year': [1], 'month': [2], 'week': [3], 'day': [4], 'hour': [5], 'minute': [6], 'second': [7]}), + {'year': [1], 'month': [2], 'week': [3], 'day': [4], 'hour': [5], 'minute': [6], 'second': [7]}, + log_msg="Invalid 'year' translation 1 for 'en' language: expected not empty string"), ]) - def test_validate_units_when_invalid(self, lang_id, lang_info): + def test_validate_units_when_invalid(self, lang_id, lang_info, log_msg): result = self.validator._validate_units(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) @parameterized.expand([ - param('en', {}), + param('en', {}, log_msg="No translations for 'ago' provided for 'en' language"), param('en', - {'ago': 1}), + {'ago': 1}, log_msg="Invalid 'ago' translations list for 'en' language: " + "expected list type but have got int"), param('en', - {'ago': []}), + {'ago': []}, log_msg="No translations for 'ago' provided for 'en' language"), param('en', - {'ago': ['']}), + {'ago': ['']}, log_msg="Invalid 'ago' translation '' for 'en' language: expected not empty string"), ]) - def test_validate_other_words_when_invalid(self, lang_id, lang_info): + def test_validate_other_words_when_invalid(self, lang_id, lang_info, log_msg='na'): result = self.validator._validate_other_words(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) @parameterized.expand([ @@ -796,31 +828,48 @@ def test_validate_simplifications_when_absent(self, lang_id, lang_info): @parameterized.expand([ param('en', - {'simplifications': 'string instead of list'}), + {'simplifications': 'string instead of list'}, + log_msg="Invalid 'simplifications' list for 'en' language: expected list type but have got str"), param('en', - {'simplifications': [{}]}), + {'simplifications': [{}]}, + log_msg="Invalid simplification {} for 'en' language: eash simplification suppose " + "to be one-to-one mapping"), param('en', - {'simplifications': [{28: []}]}), + {'simplifications': [{28: []}]}, + log_msg="Invalid simplification {28: []} for 'en' language: each simplification suppose " + "to be string-to-string-or-int mapping"), param('en', - {'simplifications': [{'simplification': []}]}), + {'simplifications': [{'simplification': []}]}, + log_msg="Invalid simplification {'simplification': []} for 'en' language: each simplification suppose " + "to be string-to-string-or-int mapping"), param('en', - {'simplifications': [{'(\d+)\s*hr(s?)\g<(.+?)>': r'\1 hour\2'}]}), + {'simplifications': [{'(\d+)\s*hr(s?)\g<(.+?)>': r'\1 hour\2'}]}, + log_msg="Invalid simplification {'(\\\\d+)\\\\s*hr(s?)\\\\g<(.+?)>': '\\\\1 hour\\\\2'} " + "for 'en' language: groups 3 were not used"), param('en', - {'simplifications': [{'(one)(two)(three)': r'\1\3\2\4'}]}), + {'simplifications': [{'(one)(two)(three)': r'\1\3\2\4'}]}, + log_msg="Invalid simplification {'(one)(two)(three)': '\\\\1\\\\3\\\\2\\\\4'} for 'en' language:" + " unknown groups 4"), param('en', - {'simplifications': [{r'(?P\w+)(?P\w+)': '\\g'}]}), + {'simplifications': [{r'(?P\w+)(?P\w+)': '\\g'}]}, + log_msg="Invalid simplification {'(?P\\\\w+)(?P\\\\w+)': '\\\\g'} for 'en' language:" + " groups 2 were not used"), param('en', - {'simplifications': [{r'(?P\w+)': '\\g(.*?)'}]}), + {'simplifications': [{r'(?P\w+)': '\\g(.*?)'}]}, + log_msg="Invalid simplification {'(?P\\\\w+)': '\\\\g(.*?)'} for 'en' language: unknown group B"), ]) - def test_validate_simplifications_when_invalid(self, lang_id, lang_info): + def test_validate_simplifications_when_invalid(self, lang_id, lang_info, log_msg): result = self.validator._validate_simplifications(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) @parameterized.expand([ - param('en', {'invalid_key': ''}), + param('en', {'invalid_key': ''}, + log_msg="Extra keys found for 'en' language: 'invalid_key'"), ]) - def test_validate_extra_keys_when_invalid(self, lang_id, lang_info): + def test_validate_extra_keys_when_invalid(self, lang_id, lang_info, log_msg): result = self.validator._validate_extra_keys(lang_id, lang_info) + self.assertEqual(log_msg, self.get_log_str()) self.assertFalse(result) From 62d932c4a21516c9cca0a20cc54ad93e3e46e063 Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Thu, 1 Jun 2017 22:54:19 +0300 Subject: [PATCH 10/14] added tests for logger messages --- tests/test_languages.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_languages.py b/tests/test_languages.py index 0625c270f..d1b221e7d 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -686,6 +686,7 @@ def setUp(self): self.validator = LanguageValidator self.captured_logs = StringIO() self.validator.get_logger() + self.validator.get_logger() self.sh = logging.StreamHandler(self.captured_logs) self.validator.logger.addHandler(self.sh) self.log_list = self.captured_logs.getvalue().split('\n')[0] From 7da64072da67a2e5809458ae8d316bb0430cfb9d Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Thu, 1 Jun 2017 22:57:47 +0300 Subject: [PATCH 11/14] added tests for logger messages --- tests/test_languages.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_languages.py b/tests/test_languages.py index d1b221e7d..0625c270f 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -686,7 +686,6 @@ def setUp(self): self.validator = LanguageValidator self.captured_logs = StringIO() self.validator.get_logger() - self.validator.get_logger() self.sh = logging.StreamHandler(self.captured_logs) self.validator.logger.addHandler(self.sh) self.log_list = self.captured_logs.getvalue().split('\n')[0] From 2db529ea0515f156fe0e5c3da6e5ce9f9b7ad0c1 Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Thu, 1 Jun 2017 23:58:29 +0300 Subject: [PATCH 12/14] changes to pass python2 tests --- tests/test_languages.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_languages.py b/tests/test_languages.py index 0625c270f..ed5d918d5 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -690,8 +690,15 @@ def setUp(self): self.validator.logger.addHandler(self.sh) self.log_list = self.captured_logs.getvalue().split('\n')[0] + @staticmethod + def make_python3_msg(text): + text = text.replace('unicode', 'str') + text = text.replace('u\'', '\'') + return text + + def get_log_str(self): - return self.captured_logs.getvalue().split('\n')[0] + return self.make_python3_msg(self.captured_logs.getvalue().split('\n')[0]) @parameterized.expand([ param('en', 'string instead of dict', log_msg="Language 'en' info expected to be dict, " From 1563a2c45f502302728833eef5b59192fc7a278f Mon Sep 17 00:00:00 2001 From: eszakharova <1583253@gmail.com> Date: Fri, 2 Jun 2017 23:53:38 +0300 Subject: [PATCH 13/14] tried to fix microseconds with exception --- dateparser/utils/strptime.py | 7 ++++++- tests/test_utils_strptime.py | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/dateparser/utils/strptime.py b/dateparser/utils/strptime.py index c078f68a9..709fc421f 100644 --- a/dateparser/utils/strptime.py +++ b/dateparser/utils/strptime.py @@ -11,6 +11,8 @@ '\.(?P[0-9]{1,6})' ) +MS_SEARCHER = re.compile(r'\.(?P[0-9]{1,6})') + def patch_strptime(): """Monkey patching _strptime to avoid problems related with non-english @@ -63,6 +65,9 @@ def strptime(date_string, format): ms = ms + ((6 - len(ms)) * '0') obj = obj.replace(microsecond=int(ms)) except AttributeError: - pass + match_groups = MS_SEARCHER.search(date_string).groupdict() + ms = match_groups['microsecond'] + ms = ms + ((6 - len(ms)) * '0') + obj = obj.replace(microsecond=int(ms)) return obj diff --git a/tests/test_utils_strptime.py b/tests/test_utils_strptime.py index 67bbd9dcc..33f6e1ffe 100644 --- a/tests/test_utils_strptime.py +++ b/tests/test_utils_strptime.py @@ -80,7 +80,8 @@ def test_microseconds_are_parsed_correctly(self, date_string, fmt, expected): self.then_date_object_is(expected) @parameterized.expand([ - param('11 Dec 10 10:30:2011.999999', '%y %b %S %H:%M:%Y.%f', expected=datetime(2011, 12, 1, 10, 30, 10)), + param('11 Dec 10 10:30:2011.999999', '%y %b %S %H:%M:%Y.%f', + expected=datetime(2011, 12, 1, 10, 30, 10, 999999)), ]) def test_not_parsing_microseconds_attribute_error(self, datestring, fmt, expected): self.assertEqual(strptime(datestring, fmt), expected) From 0a7eef5a01275c7d0a2d5a7af204139c33d9df01 Mon Sep 17 00:00:00 2001 From: Waqas Shabir Date: Wed, 7 Jun 2017 00:50:17 -0400 Subject: [PATCH 14/14] Move strptime test to relevant space --- tests/test_utils_strptime.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/test_utils_strptime.py b/tests/test_utils_strptime.py index 33f6e1ffe..12ad4eb6c 100644 --- a/tests/test_utils_strptime.py +++ b/tests/test_utils_strptime.py @@ -73,16 +73,8 @@ def test_parsing_date_should_fail_using_datetime_strptime_if_locale_is_non_engli param('12 Dec 10 10:30:55.000011', '%d %b %y %H:%M:%S.%f', expected=datetime(2010, 12, 12, 10, 30, 55, 11)), param('12 Dec 10 10:30:55.000111', '%d %b %y %H:%M:%S.%f', expected=datetime(2010, 12, 12, 10, 30, 55, 111)), param('12 Feb 2016 11:41:23', '%d %b %Y %I:%M:%S', expected=datetime(2016, 2, 12, 11, 41, 23)), - + param('11 Dec 10 10:30:2011.999999', '%y %b %S %H:%M:%Y.%f', expected=datetime(2011, 12, 1, 10, 30, 10, 999999)), ]) def test_microseconds_are_parsed_correctly(self, date_string, fmt, expected): self.when_date_string_is_parsed(date_string, fmt) self.then_date_object_is(expected) - - @parameterized.expand([ - param('11 Dec 10 10:30:2011.999999', '%y %b %S %H:%M:%Y.%f', - expected=datetime(2011, 12, 1, 10, 30, 10, 999999)), - ]) - def test_not_parsing_microseconds_attribute_error(self, datestring, fmt, expected): - self.assertEqual(strptime(datestring, fmt), expected) -