Skip to content

Commit

Permalink
Merge branch 'eszakharova-add-missing-tests'
Browse files Browse the repository at this point in the history
  • Loading branch information
waqasshabbir committed Jun 7, 2017
2 parents 9f0487a + 0a7eef5 commit 827b68a
Show file tree
Hide file tree
Showing 4 changed files with 289 additions and 25 deletions.
7 changes: 6 additions & 1 deletion dateparser/utils/strptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
'\.(?P<microsecond>[0-9]{1,6})'
)

MS_SEARCHER = re.compile(r'\.(?P<microsecond>[0-9]{1,6})')


def patch_strptime():
"""Monkey patching _strptime to avoid problems related with non-english
Expand Down Expand Up @@ -63,6 +65,9 @@ def strptime(date_string, format):
ms = ms + ((6 - len(ms)) * '0')
obj = obj.replace(microsecond=int(ms))
except AttributeError:
pass
match_groups = MS_SEARCHER.search(date_string).groupdict()
ms = match_groups['microsecond']
ms = ms + ((6 - len(ms)) * '0')
obj = obj.replace(microsecond=int(ms))

return obj
259 changes: 237 additions & 22 deletions tests/test_languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from dateparser.languages.detection import AutoDetectLanguage, ExactLanguages
from dateparser.conf import settings
from dateparser.utils import normalize_unicode
from dateparser.languages.validation import LanguageValidator
from io import StringIO
import logging

from tests import BaseTestCase

Expand Down Expand Up @@ -47,7 +50,7 @@ def setUp(self):
# German
param('de', "29. Juni 2007", "29. june 2007"),
param('de', "Montag 5 Januar, 2015", "monday 5 january 2015"),
#Hungarian
# Hungarian
param('hu', '2016 augusztus 11.', '2016 august 11.'),
param('hu', '2016-08-13 szombat 10:21', '2016-08-13 saturday 10:21'),
param('hu', '2016. augusztus 14. vasárnap 10:21', '2016. august 14. sunday 10:21'),
Expand Down Expand Up @@ -175,10 +178,10 @@ def setUp(self):
param('bn', "সেপ্টেম্বর 03 2014", "september 03 2014"),
param('bn', "শুক্রবার, 03 সেপ্টেম্বর 2014", "friday 03 september 2014"),
#Hindi
param('hi', 'सोमवार 13 जून 1998','monday 13 june 1998'),
param('hi', 'मंगल 16 1786 12:18','tuesday 16 1786 12:18'),
param('hi','शनि 11 अप्रैल 2002 03:09','saturday 11 april 2002 03:09'),
# Hindi
param('hi', 'सोमवार 13 जून 1998', 'monday 13 june 1998'),
param('hi', 'मंगल 16 1786 12:18', 'tuesday 16 1786 12:18'),
param('hi', 'शनि 11 अप्रैल 2002 03:09', 'saturday 11 april 2002 03:09'),
# Swedish
param('sv', "Sept 03 2014", "september 03 2014"),
Expand Down Expand Up @@ -408,11 +411,11 @@ def test_translation(self, shortname, datetime_string, expected_translation):
# Hindi
param('hi', "१ सप्ताह", "1 week"),
param('hi', "२४ मिनट पहले", "24 minute ago"),
param('hi', "पांच वर्ष","5 year"),
param('hi', "५३ सप्ताह बाद","53 week in"),
param('hi', "सन् १९२०"," 1920"),
param('hi',"आठ पूर्वाह्न","8 am"),
param('hi',"बारह सेकंड पूर्व","12 second ago"),
param('hi', "पांच वर्ष", "5 year"),
param('hi', "५३ सप्ताह बाद", "53 week in"),
param('hi', "सन् १९२०", " 1920"),
param('hi', "आठ पूर्वाह्न", "8 am"),
param('hi', "बारह सेकंड पूर्व", "12 second ago"),
# Swedish
param('sv', "igår", "1 day"),
param('sv', "idag", "0 day"),
Expand Down Expand Up @@ -449,21 +452,29 @@ def test_freshness_translation(self, shortname, datetime_string, expected_transl
param('tr', "2 saat önce", ["2", " ", "saat", " ", "önce"]),
param('fr', "il ya environ 23 heures'", ["il ya", " ", "environ", " ", "23", " ", "heures"]),
param('de', "Gestern um 04:41", ['Gestern ', 'um', ' ', '04', ':', '41']),
param('de', "Donnerstag, 8. Januar 2015 um 07:17", ['Donnerstag', ' ', '8', '.', ' ', 'Januar', ' ', '2015', ' ', 'um', ' ', '07', ':', '17']),
param('ru', "8 января 2015 г. в 9:10", ['8', ' ', 'января', ' ', '2015', ' ', 'г.', ' ', 'в', ' ', '9', ':', '10']),
param('de', "Donnerstag, 8. Januar 2015 um 07:17",
['Donnerstag', ' ', '8', '.', ' ', 'Januar', ' ', '2015', ' ', 'um', ' ', '07', ':', '17']),
param('ru', "8 января 2015 г. в 9:10",
['8', ' ', 'января', ' ', '2015', ' ', 'г.', ' ', 'в', ' ', '9', ':', '10']),
param('cs', "6. leden 2015 v 22:29", ['6', '.', ' ', 'leden', ' ', '2015', ' ', 'v', ' ', '22', ':', '29']),
param('nl', "woensdag 7 januari 2015 om 21:32", ['woensdag', ' ', '7', ' ', 'januari', ' ', '2015', ' ', 'om', ' ', '21', ':', '32']),
param('nl', "woensdag 7 januari 2015 om 21:32",
['woensdag', ' ', '7', ' ', 'januari', ' ', '2015', ' ', 'om', ' ', '21', ':', '32']),
param('ro', "8 Ianuarie 2015 la 13:33", ['8', ' ', 'Ianuarie', ' ', '2015', ' ', 'la', ' ', '13', ':', '33']),
param('ar', "8 يناير، 2015، الساعة 10:01 صباحاً", ['8', ' ', 'يناير', ' ', '2015', 'الساعة', ' ', '10', ':', '01', ' صباحاً']),
param('th', "8 มกราคม 2015 เวลา 12:22 น.", ['8', ' ', 'มกราคม', ' ', '2015', ' ', 'เวลา', ' ', '12', ':', '22', ' ', 'น.']),
param('ar', "8 يناير، 2015، الساعة 10:01 صباحاً",
['8', ' ', 'يناير', ' ', '2015', 'الساعة', ' ', '10', ':', '01', ' صباحاً']),
param('th', "8 มกราคม 2015 เวลา 12:22 น.",
['8', ' ', 'มกราคม', ' ', '2015', ' ', 'เวลา', ' ', '12', ':', '22', ' ', 'น.']),
param('pl', "8 stycznia 2015 o 10:19", ['8', ' ', 'stycznia', ' ', '2015', ' ', 'o', ' ', '10', ':', '19']),
param('vi', "Thứ Năm, ngày 8 tháng 1 năm 2015", ["Thứ Năm", " ", "ngày", " ", "8", " tháng ", "1", " ", "năm", " ", "2015"]),
param('vi', "Thứ Năm, ngày 8 tháng 1 năm 2015",
["Thứ Năm", " ", "ngày", " ", "8", " tháng ", "1", " ", "năm", " ", "2015"]),
param('tl', "Biyernes Hulyo 3 2015", ["Biyernes", " ", "Hulyo", " ", "3", " ", "2015"]),
param('be', "3 верасня 2015 г. у 11:10", ['3', ' ', 'верасня', ' ', '2015', ' ', 'г.', ' ', 'у', ' ', '11', ':', '10']),
param('be', "3 верасня 2015 г. у 11:10",
['3', ' ', 'верасня', ' ', '2015', ' ', 'г.', ' ', 'у', ' ', '11', ':', '10']),
param('id', "3 Juni 2015 13:05:46", ['3', ' ', 'Juni', ' ', '2015', ' ', '13', ':', '05', ':', '46']),
param('he', "ה-21 לאוקטובר 2016 ב-15:00", ['ה-', '21', ' ', 'לאוקטובר', ' ', '2016', ' ', 'ב-', '15', ':', '00']),
param('he', "ה-21 לאוקטובר 2016 ב-15:00",
['ה-', '21', ' ', 'לאוקטובר', ' ', '2016', ' ', 'ב-', '15', ':', '00']),
param('bn', "3 জুন 2015 13:05:46", ['3', ' ', 'জুন', ' ', '2015', ' ', '13', ':', '05', ':', '46']),
param('hi', "13 मार्च 2013 11:15:09",['13',' ','मार्च',' ','2013',' ','11',':','15',':','09']),
param('hi', "13 मार्च 2013 11:15:09", ['13', ' ', 'मार्च', ' ', '2013', ' ', '11', ':', '15', ':', '09']),
])
def test_split(self, shortname, datetime_string, expected_tokens):
self.given_bundled_language(shortname)
Expand Down Expand Up @@ -555,7 +566,7 @@ def setUp(self):

@parameterized.expand([
param("1 january 2015", 'en'),
])
])
def test_valid_dates_detected(self, datetime_string, expected_language):
self.given_languages(expected_language)
self.given_detector()
Expand Down Expand Up @@ -604,7 +615,8 @@ def given_detector(self):
raise NotImplementedError

def when_searching_for_first_applicable_language(self):
for language in self.detector.iterate_applicable_languages(self.datetime_string, modify=True, settings=settings):
for language in self.detector.iterate_applicable_languages(self.datetime_string, modify=True,
settings=settings):
self.detected_language = language
break
else:
Expand Down Expand Up @@ -644,7 +656,8 @@ def given_detector(self):
self.detector = ExactLanguages(languages=self.known_languages)

def when_using_exact_languages(self):
self.exact_languages = self.detector.iterate_applicable_languages(self.datetime_string, modify=True, settings=settings)
self.exact_languages = self.detector.iterate_applicable_languages(self.datetime_string, modify=True,
settings=settings)

def then_exact_languages_were_filtered(self, shortnames):
self.assertEqual(set(shortnames), set([lang.shortname for lang in self.exact_languages]))
Expand All @@ -665,3 +678,205 @@ class TestAutoDetectLanguageDetectorWithoutRedetection(BaseAutoDetectLanguageDet
class TestAutoDetectLanguageDetectorWithRedetection(BaseAutoDetectLanguageDetectorTestCase):
__test__ = True
allow_redetection = True


class TestLanguageValidatorWhenInvalid(BaseTestCase):
def setUp(self):
super(TestLanguageValidatorWhenInvalid, self).setUp()
self.validator = LanguageValidator
self.captured_logs = StringIO()
self.validator.get_logger()
self.sh = logging.StreamHandler(self.captured_logs)
self.validator.logger.addHandler(self.sh)
self.log_list = self.captured_logs.getvalue().split('\n')[0]

@staticmethod
def make_python3_msg(text):
text = text.replace('unicode', 'str')
text = text.replace('u\'', '\'')
return text


def get_log_str(self):
return self.make_python3_msg(self.captured_logs.getvalue().split('\n')[0])

@parameterized.expand([
param('en', 'string instead of dict', log_msg="Language 'en' info expected to be dict, "
"but have got str", ),
])
def test_validate_info_when_invalid_type(self, lang_id, lang_info, log_msg):
result = self.validator.validate_info(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)

@parameterized.expand([
param('en', {}, log_msg="Language 'en' does not have a name"),
param('en', {'name': 22}, log_msg="Language 'en' does not have a name"),
param('en', {'name': ''}, log_msg="Language 'en' does not have a name"),
])
def test_validate_name_when_invalid(self, lang_id, lang_info, log_msg):
result = self.validator._validate_name(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)

@parameterized.expand([
param('en', {'no_word_spacing': 'string instead of bool'},
log_msg="Invalid 'no_word_spacing' value 'string instead of bool' for 'en' language: ""expected boolean"),
])
def test_validate_word_spacing_when_invalid(self, lang_id, lang_info, log_msg):
result = self.validator._validate_word_spacing(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)

@parameterized.expand([
param('en', {'skip': 'string instead of list'}, log_msg="Invalid 'skip' list for 'en' language: "
"expected list type but have got str"),
param('en', {'skip': ['']}, log_msg="Invalid 'skip' token '' for 'en' language: "
"expected not empty string"),
])
def test_validate_skip_list_when_invalid(self, lang_id, lang_info, log_msg):
result = self.validator._validate_skip_list(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)

@parameterized.expand([
param('en', {}),
])
def test_validate_skip_list_when_absent(self, lang_id, lang_info):
result = self.validator._validate_skip_list(lang_id, lang_info)
self.assertTrue(result)

@parameterized.expand([
param('en', {'pertain': 'it is a string', 'skip': ['']},
log_msg="Invalid 'pertain' token '' for 'en' language: expected not empty string"),
param('en', {'pertain': [''], 'skip': 'it is a string'},
log_msg="Invalid 'pertain' list for 'en' language: expected list type but have got str"),
])
def test_validate_pertain_list_when_invalid(self, lang_id, lang_info, log_msg):
result = self.validator._validate_pertain_list(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)

@parameterized.expand([
param('en', {}),
])
def test_validate_pertain_list_when_absent(self, lang_id, lang_info):
result = self.validator._validate_pertain_list(lang_id, lang_info)
self.assertTrue(result)

@parameterized.expand([
param('en', {}, log_msg="No translations for 'monday' provided for 'en' language"),
param('en',
{'monday': 1, 'tuesday': 2, 'wednesday': 3, 'thursday': 4, 'friday': 5, 'saturday': 6, 'sunday': 7},
log_msg="Invalid 'monday' translations list for 'en' language: expected list type but have got int"),
param('en', {'monday': [1], 'tuesday': [2], 'wednesday': [3], 'thursday': [4], 'friday': [5],
'saturday': [6], 'sunday': [7]},
log_msg="Invalid 'monday' translation 1 for 'en' language: expected not empty string"),
])
def test_validate_weekdays_when_invalid(self, lang_id, lang_info, log_msg):
result = self.validator._validate_weekdays(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)

@parameterized.expand([
param('en', {}, log_msg="No translations for 'january' provided for 'en' language"),
param('en',
{'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6, 'july': 7,
'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12},
log_msg="Invalid 'january' translations list for 'en' language: expected list type but have got int"),
param('en',
{'january': [1], 'february': [2], 'march': [3], 'april': [4], 'may': [5], 'june': [6], 'july': [7],
'august': [8], 'september': [9], 'october': [10], 'november': [11], 'december': [12]},
log_msg="Invalid 'january' translation 1 for 'en' language: expected not empty string"),
])
def test_validate_months_when_invalid(self, lang_id, lang_info, log_msg):
result = self.validator._validate_months(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)

@parameterized.expand([
param('en', {}, log_msg="No translations for 'year' provided for 'en' language"),
param('en',
{'year': 1, 'month': 2, 'week': 3, 'day': 4, 'hour': 5, 'minute': 6, 'second': 7},
log_msg="Invalid 'year' translations list for 'en' language: expected list type but have got int"),
param('en',
{'year': [1], 'month': [2], 'week': [3], 'day': [4], 'hour': [5], 'minute': [6], 'second': [7]},
log_msg="Invalid 'year' translation 1 for 'en' language: expected not empty string"),
])
def test_validate_units_when_invalid(self, lang_id, lang_info, log_msg):
result = self.validator._validate_units(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)

@parameterized.expand([
param('en', {}, log_msg="No translations for 'ago' provided for 'en' language"),
param('en',
{'ago': 1}, log_msg="Invalid 'ago' translations list for 'en' language: "
"expected list type but have got int"),
param('en',
{'ago': []}, log_msg="No translations for 'ago' provided for 'en' language"),
param('en',
{'ago': ['']}, log_msg="Invalid 'ago' translation '' for 'en' language: expected not empty string"),
])
def test_validate_other_words_when_invalid(self, lang_id, lang_info, log_msg='na'):
result = self.validator._validate_other_words(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)

@parameterized.expand([
param('en', {}),
])
def test_validate_simplifications_when_absent(self, lang_id, lang_info):
result = self.validator._validate_simplifications(lang_id, lang_info)
self.assertTrue(result)

@parameterized.expand([
param('en',
{'simplifications': 'string instead of list'},
log_msg="Invalid 'simplifications' list for 'en' language: expected list type but have got str"),
param('en',
{'simplifications': [{}]},
log_msg="Invalid simplification {} for 'en' language: eash simplification suppose "
"to be one-to-one mapping"),
param('en',
{'simplifications': [{28: []}]},
log_msg="Invalid simplification {28: []} for 'en' language: each simplification suppose "
"to be string-to-string-or-int mapping"),
param('en',
{'simplifications': [{'simplification': []}]},
log_msg="Invalid simplification {'simplification': []} for 'en' language: each simplification suppose "
"to be string-to-string-or-int mapping"),
param('en',
{'simplifications': [{'(\d+)\s*hr(s?)\g<(.+?)>': r'\1 hour\2'}]},
log_msg="Invalid simplification {'(\\\\d+)\\\\s*hr(s?)\\\\g<(.+?)>': '\\\\1 hour\\\\2'} "
"for 'en' language: groups 3 were not used"),
param('en',
{'simplifications': [{'(one)(two)(three)': r'\1\3\2\4'}]},
log_msg="Invalid simplification {'(one)(two)(three)': '\\\\1\\\\3\\\\2\\\\4'} for 'en' language:"
" unknown groups 4"),
param('en',
{'simplifications': [{r'(?P<A>\w+)(?P<B>\w+)': '\\g<A>'}]},
log_msg="Invalid simplification {'(?P<A>\\\\w+)(?P<B>\\\\w+)': '\\\\g<A>'} for 'en' language:"
" groups 2 were not used"),
param('en',
{'simplifications': [{r'(?P<A>\w+)': '\\g<B>(.*?)'}]},
log_msg="Invalid simplification {'(?P<A>\\\\w+)': '\\\\g<B>(.*?)'} for 'en' language: unknown group B"),
])
def test_validate_simplifications_when_invalid(self, lang_id, lang_info, log_msg):
result = self.validator._validate_simplifications(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)

@parameterized.expand([
param('en', {'invalid_key': ''},
log_msg="Extra keys found for 'en' language: 'invalid_key'"),
])
def test_validate_extra_keys_when_invalid(self, lang_id, lang_info, log_msg):
result = self.validator._validate_extra_keys(lang_id, lang_info)
self.assertEqual(log_msg, self.get_log_str())
self.assertFalse(result)
Loading

0 comments on commit 827b68a

Please sign in to comment.