Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

o'clock fix #13

Merged
merged 4 commits into from
Feb 20, 2020
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 48 additions & 2 deletions timefhuman/categorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def categorize(tokens, now):
[7/17/2018, 3 pm, '-', 7/19/2018, 2 pm]
"""
tokens = list(tokens)
tokens = convert_words_to_numbers(tokens)
tokens = convert_day_of_week(tokens, now)
tokens = convert_relative_days(tokens, now)
tokens = convert_time_of_day(tokens)
Expand All @@ -37,6 +38,24 @@ def categorize(tokens, now):
return tokens


# TODO: add conversions for thirty, fourty-five
Copy link
Owner

@alvinwan alvinwan Feb 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These conversions could actually be done by https://pypi.org/project/word2number/

Edit: adding word2number might be too much for a single PR. Just noting we don't have to build it ourselves

# TODO: maybe set default for seven o'clock to 7 pm not am?
def convert_words_to_numbers(tokens):
"""
Converts numbers in word format into number format
>>> convert_words_to_numbers(['five', "o'clock"])
['5', "o'clock"]
>>> convert_words_to_numbers(['seven', "o'clock"])
['7', "o'clock"]
"""
number_words = ["zero", "one", "two", "three", "four", "five", "six",
"seven", "eight", "nine", "ten", "eleven", "twelve"]
for index, token in enumerate(tokens):
if token.lower() in number_words:
tokens[index] = str(number_words.index(token.lower()))
return tokens


# TODO: "monday next week"
def convert_day_of_week(tokens, now=datetime.datetime.now()):
"""Convert day-of-week vernacular into date-like string.
Expand Down Expand Up @@ -316,6 +335,29 @@ def extract_hour_minute(string, time_of_day=None):
return TimeToken(relative_hour=hour, minute=minute, time_of_day=time_of_day)


def extract_hour_minute_token(tokens, time_of_day=None):
"""
Attempt to extract the exact token which contains the hour and minute and convert it into a number.
This will either be 1 before or 2 before the am/pm token.
12:00 is the default token to prevent failure
Tests for this helper function are included in maybe_substitute_hour_minute
"""

# look at previous n tokens
n = 2
for i in range(1, n+1):
try:
return -i, extract_hour_minute(tokens[-i], time_of_day)
# if nothing is returned from extract_hour_minute
except ValueError:
pass
# if the tokens list is only of length 1
except IndexError:
pass
# default return value
return -1, "12:00"


def maybe_substitute_hour_minute(tokens):
"""Attempt to extract hour and minute.

Expand All @@ -340,15 +382,19 @@ def maybe_substitute_hour_minute(tokens):
['7/17/18', 3 pm]
>>> maybe_substitute_hour_minute(['3', 'p.m.', '-', '4', 'p.m.'])
[3 pm, '-', 4 pm]
>>> maybe_substitute_hour_minute(['5', "o'clock", 'pm'])
[5 pm]
>>> maybe_substitute_hour_minute(['12', "o'clock", 'pm'])
[12 pm]
"""
remove_dots = lambda token: token.replace('.', '')
temp_tokens = clean_tokens(tokens, remove_dots)

for time_of_day in ('am', 'pm'):
while time_of_day in temp_tokens:
index = temp_tokens.index(time_of_day)
time_token = extract_hour_minute(temp_tokens[index-1], time_of_day)
tokens = tokens[:index-1] + [time_token] + tokens[index+1:]
(unchanged_index, time_token) = extract_hour_minute_token(temp_tokens[:index], time_of_day)
tokens = tokens[:index+unchanged_index] + [time_token] + tokens[index+1:]
temp_tokens = clean_tokens(tokens, remove_dots)

tokens = [extract_hour_minute(token, None)
Expand Down