From 1f5662f864ef4ce094ea6644a4bf2047e130c039 Mon Sep 17 00:00:00 2001 From: umerhasan17 Date: Sun, 16 Feb 2020 21:35:22 +0000 Subject: [PATCH 1/4] Implement extra function to fix and enhance o'clock behaviour, 9 tests failing --- timefhuman/categorize.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/timefhuman/categorize.py b/timefhuman/categorize.py index 0d58138..6a04bdd 100644 --- a/timefhuman/categorize.py +++ b/timefhuman/categorize.py @@ -316,6 +316,33 @@ def extract_hour_minute(string, time_of_day=None): return TimeToken(relative_hour=hour, minute=minute, time_of_day=time_of_day) +def extract_hour_minute_token(tokens, time_of_day=None): + """Attempt to extract the exact token which contains the hour and minute and convert it into a number. + This will either be 1 before or 2 before the am/pm token. + 12:00 is the default token to prevent failure + + """ + number_words = ["zero", "one", "two", "three", "four", "five", "six", + "seven", "eight", "nine", "ten", "eleven", "twelve"] + + # look at previous n tokens + n = 2 + for i in range(1, n+1): + try: + current_token = tokens[-i] + if current_token.lower() in number_words: + current_token = str(number_words.index(current_token.lower())) + return tokens[:-i-1], extract_hour_minute(current_token, time_of_day) + # if nothing is returned from extract_hour_minute + except ValueError: + pass + # if the tokens list is only of length 1 + except IndexError: + pass + # default return value + return tokens, "12:00" + + def maybe_substitute_hour_minute(tokens): """Attempt to extract hour and minute. @@ -347,8 +374,8 @@ def maybe_substitute_hour_minute(tokens): for time_of_day in ('am', 'pm'): while time_of_day in temp_tokens: index = temp_tokens.index(time_of_day) - time_token = extract_hour_minute(temp_tokens[index-1], time_of_day) - tokens = tokens[:index-1] + [time_token] + tokens[index+1:] + (previous_tokens, time_token) = extract_hour_minute_token(temp_tokens[:index], time_of_day) + tokens = previous_tokens + [time_token] + tokens[index+1:] temp_tokens = clean_tokens(tokens, remove_dots) tokens = [extract_hour_minute(token, None) From aebb10d3d00b5b5c628117c37529c3e90d81889c Mon Sep 17 00:00:00 2001 From: umerhasan17 Date: Mon, 17 Feb 2020 23:02:11 +0000 Subject: [PATCH 2/4] Bug fix to solve 9 previous test failures. --- timefhuman/categorize.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/timefhuman/categorize.py b/timefhuman/categorize.py index 6a04bdd..c8d4e97 100644 --- a/timefhuman/categorize.py +++ b/timefhuman/categorize.py @@ -332,7 +332,7 @@ def extract_hour_minute_token(tokens, time_of_day=None): current_token = tokens[-i] if current_token.lower() in number_words: current_token = str(number_words.index(current_token.lower())) - return tokens[:-i-1], extract_hour_minute(current_token, time_of_day) + return -i, extract_hour_minute(current_token, time_of_day) # if nothing is returned from extract_hour_minute except ValueError: pass @@ -340,7 +340,7 @@ def extract_hour_minute_token(tokens, time_of_day=None): except IndexError: pass # default return value - return tokens, "12:00" + return -1, "12:00" def maybe_substitute_hour_minute(tokens): @@ -374,8 +374,8 @@ def maybe_substitute_hour_minute(tokens): for time_of_day in ('am', 'pm'): while time_of_day in temp_tokens: index = temp_tokens.index(time_of_day) - (previous_tokens, time_token) = extract_hour_minute_token(temp_tokens[:index], time_of_day) - tokens = previous_tokens + [time_token] + tokens[index+1:] + (unchanged_index, time_token) = extract_hour_minute_token(temp_tokens[:index], time_of_day) + tokens = tokens[:index+unchanged_index] + [time_token] + tokens[index+1:] temp_tokens = clean_tokens(tokens, remove_dots) tokens = [extract_hour_minute(token, None) From b1d5d01c49628b6b3a9a870b88d358a7071b1c98 Mon Sep 17 00:00:00 2001 From: umerhasan17 Date: Mon, 17 Feb 2020 23:39:12 +0000 Subject: [PATCH 3/4] Implement function to convert word numbers to digit numbers + add tests --- timefhuman/categorize.py | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/timefhuman/categorize.py b/timefhuman/categorize.py index c8d4e97..b43f73c 100644 --- a/timefhuman/categorize.py +++ b/timefhuman/categorize.py @@ -27,6 +27,7 @@ def categorize(tokens, now): [7/17/2018, 3 pm, '-', 7/19/2018, 2 pm] """ tokens = list(tokens) + tokens = convert_words_to_numbers(tokens) tokens = convert_day_of_week(tokens, now) tokens = convert_relative_days(tokens, now) tokens = convert_time_of_day(tokens) @@ -37,6 +38,24 @@ def categorize(tokens, now): return tokens +# TODO: add conversions for thirty, fourty-five +# TODO: maybe set default for seven o'clock to 7 pm not am? +def convert_words_to_numbers(tokens): + """ + Converts numbers in word format into number format + >>> convert_words_to_numbers(['five', "o'clock"]) + ['5', "o'clock"] + >>> convert_words_to_numbers(['seven', "o'clock"]) + ['7', "o'clock"] + """ + number_words = ["zero", "one", "two", "three", "four", "five", "six", + "seven", "eight", "nine", "ten", "eleven", "twelve"] + for index, token in enumerate(tokens): + if token.lower() in number_words: + tokens[index] = str(number_words.index(token.lower())) + return tokens + + # TODO: "monday next week" def convert_day_of_week(tokens, now=datetime.datetime.now()): """Convert day-of-week vernacular into date-like string. @@ -317,22 +336,18 @@ def extract_hour_minute(string, time_of_day=None): def extract_hour_minute_token(tokens, time_of_day=None): - """Attempt to extract the exact token which contains the hour and minute and convert it into a number. - This will either be 1 before or 2 before the am/pm token. - 12:00 is the default token to prevent failure - """ - number_words = ["zero", "one", "two", "three", "four", "five", "six", - "seven", "eight", "nine", "ten", "eleven", "twelve"] + Attempt to extract the exact token which contains the hour and minute and convert it into a number. + This will either be 1 before or 2 before the am/pm token. + 12:00 is the default token to prevent failure + Tests for this helper function are included in maybe_substitute_hour_minute + """ # look at previous n tokens n = 2 for i in range(1, n+1): try: - current_token = tokens[-i] - if current_token.lower() in number_words: - current_token = str(number_words.index(current_token.lower())) - return -i, extract_hour_minute(current_token, time_of_day) + return -i, extract_hour_minute(tokens[-i], time_of_day) # if nothing is returned from extract_hour_minute except ValueError: pass @@ -367,6 +382,10 @@ def maybe_substitute_hour_minute(tokens): ['7/17/18', 3 pm] >>> maybe_substitute_hour_minute(['3', 'p.m.', '-', '4', 'p.m.']) [3 pm, '-', 4 pm] + >>> maybe_substitute_hour_minute(['5', "o'clock", 'pm']) + [5 pm] + >>> maybe_substitute_hour_minute(['12', "o'clock", 'pm']) + [12 pm] """ remove_dots = lambda token: token.replace('.', '') temp_tokens = clean_tokens(tokens, remove_dots) From 9f5f8cd0675a0cc8c6c743223d52875ae8e879cd Mon Sep 17 00:00:00 2001 From: umerhasan17 Date: Wed, 19 Feb 2020 22:31:17 +0000 Subject: [PATCH 4/4] Modify default value + add tests to improve coverage --- timefhuman/categorize.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/timefhuman/categorize.py b/timefhuman/categorize.py index b43f73c..c6241ad 100644 --- a/timefhuman/categorize.py +++ b/timefhuman/categorize.py @@ -109,7 +109,6 @@ def convert_relative_days(tokens, now=datetime.datetime.now()): return tokens - # TODO: convert to new token-based system def extract_weeks_offset(tokens, end=None, key_tokens=( 'next', 'previous', 'last', 'upcoming', 'past', 'prev')): @@ -341,6 +340,12 @@ def extract_hour_minute_token(tokens, time_of_day=None): This will either be 1 before or 2 before the am/pm token. 12:00 is the default token to prevent failure Tests for this helper function are included in maybe_substitute_hour_minute + >>> extract_hour_minute_token(["3", "o'clock"]) + -2, 3 + >>> extract_hour_minute_token(["Gibberish", "twice"]) + -1, 12 + >>> extract_hour_minute_token(["only one value"]) + -1 12 """ # look at previous n tokens @@ -355,7 +360,7 @@ def extract_hour_minute_token(tokens, time_of_day=None): except IndexError: pass # default return value - return -1, "12:00" + return -1, 12 def maybe_substitute_hour_minute(tokens):