Skip to content

Commit

Permalink
infer 'start_time' and 'end_time' from 'time_range' due to type issues (
Browse files Browse the repository at this point in the history
#691)

* infer 'start_time' and 'end_time' from 'time_range' due to type issues

* add warning

* fix ci issue
  • Loading branch information
jtmims authored Sep 23, 2024
1 parent 25aee92 commit 9d054c5
Showing 1 changed file with 12 additions and 13 deletions.
25 changes: 12 additions & 13 deletions src/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,19 +820,18 @@ def check_group_daterange(self, group_df: pd.DataFrame, case_dr,
"""
date_col = "date_range"
delimiters = ",.!?/&-:;@_'\\s+"
if not hasattr(group_df, 'start_time') or not hasattr(group_df, 'end_time'):
if hasattr(group_df, 'time_range'):
start_times = []
end_times = []
for tr in group_df['time_range'].values:
tr = tr.split('-')
start_times.append(tr[0])
end_times.append(tr[1])
group_df['start_time'] = pd.Series(start_times)
group_df['end_time'] = pd.Series(end_times)
else:
raise AttributeError('Data catalog is missing attributes `start_time` and/or'
' `end_time` and can not infer from `time_range`')
if hasattr(group_df, 'time_range'):
start_times = []
end_times = []
for tr in group_df['time_range'].values:
tr = tr.replace(' ', '').replace('-', '').replace(':', '')
start_times.append(tr[0:len(tr)//2])
end_times.append(tr[len(tr)//2:])
group_df['start_time'] = pd.Series(start_times)
group_df['end_time'] = pd.Series(end_times)
else:
raise AttributeError('Data catalog is missing the attribute `time_range`;'
' this is a required entry.')
try:
start_time_vals = self.normalize_group_time_vals(group_df['start_time'].values.astype(str))
end_time_vals = self.normalize_group_time_vals(group_df['end_time'].values.astype(str))
Expand Down

0 comments on commit 9d054c5

Please sign in to comment.