Skip to content

Commit

Permalink
Date facets now return datetimes instead of strings.
Browse files Browse the repository at this point in the history
Also added one more test to date facets.
  • Loading branch information
jorgecarleitao committed Nov 11, 2015
1 parent a3ea182 commit 45191f8
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 49 deletions.
36 changes: 27 additions & 9 deletions tests/xapian_tests/tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def test_raise_index_error_on_wrong_field(self):
"""
self.assertRaises(InvalidIndexError, self.backend.search, xapian.Query(''), facets=['dsdas'])

def test_date_facets(self):
def test_date_facets_month(self):
facets = {'datetime': {'start_date': datetime.datetime(2008, 10, 26),
'end_date': datetime.datetime(2009, 3, 26),
'gap_by': 'month'}}
Expand All @@ -394,23 +394,41 @@ def test_date_facets(self):
results = self.backend.search(xapian.Query('indexed'), date_facets=facets)
self.assertEqual(results['hits'], 3)
self.assertEqual(results['facets']['dates']['datetime'], [
(b'2009-02-26T00:00:00', 0),
(b'2009-01-26T00:00:00', 3),
(b'2008-12-26T00:00:00', 0),
(b'2008-11-26T00:00:00', 0),
(b'2008-10-26T00:00:00', 0),
(datetime.datetime(2009, 2, 26, 0, 0), 0),
(datetime.datetime(2009, 1, 26, 0, 0), 3),
(datetime.datetime(2008, 12, 26, 0, 0), 0),
(datetime.datetime(2008, 11, 26, 0, 0), 0),
(datetime.datetime(2008, 10, 26, 0, 0), 0),
])

def test_date_facets_seconds(self):
facets = {'datetime': {'start_date': datetime.datetime(2009, 2, 25, 1, 0, 57),
'end_date': datetime.datetime(2009, 2, 25, 1, 1, 1),
'gap_by': 'second'}}

self.assertEqual(self.backend.search(xapian.Query(), date_facets=facets),
{'hits': 0, 'results': []})

results = self.backend.search(xapian.Query('indexed'), date_facets=facets)
self.assertEqual(results['hits'], 3)
self.assertEqual(results['facets']['dates']['datetime'], [
(datetime.datetime(2009, 2, 25, 1, 1, 0), 0),
(datetime.datetime(2009, 2, 25, 1, 0, 59), 1),
(datetime.datetime(2009, 2, 25, 1, 0, 58), 1),
(datetime.datetime(2009, 2, 25, 1, 0, 57), 1),
])

def test_date_facets_days(self):
facets = {'date': {'start_date': datetime.datetime(2009, 2, 1),
'end_date': datetime.datetime(2009, 3, 15),
'gap_by': 'day',
'gap_amount': 15}}
results = self.backend.search(xapian.Query('indexed'), date_facets=facets)
self.assertEqual(results['hits'], 3)
self.assertEqual(results['facets']['dates']['date'], [
(b'2009-03-03T00:00:00', 0),
(b'2009-02-16T00:00:00', 3),
(b'2009-02-01T00:00:00', 0)
(datetime.datetime(2009, 3, 3, 0, 0), 0),
(datetime.datetime(2009, 2, 16, 0, 0), 3),
(datetime.datetime(2009, 2, 1, 0, 0), 0)
])

def test_query_facets(self):
Expand Down
93 changes: 53 additions & 40 deletions xapian_backend.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import unicode_literals

import time
import datetime
import pickle
import os
Expand Down Expand Up @@ -1010,61 +1009,75 @@ def _do_date_facets(results, date_facets):
eg. {
'pub_date': [
('2009-01-01T00:00:00Z', 5),
('2009-02-01T00:00:00Z', 0),
('2009-03-01T00:00:00Z', 0),
('2009-04-01T00:00:00Z', 1),
('2009-05-01T00:00:00Z', 2),
(datetime.datetime(2009, 1, 1, 0, 0), 5),
(datetime.datetime(2009, 2, 1, 0, 0), 0),
(datetime.datetime(2009, 3, 1, 0, 0), 0),
(datetime.datetime(2008, 4, 1, 0, 0), 1),
(datetime.datetime(2008, 5, 1, 0, 0), 2),
],
}
"""
def next_datetime(previous, gap_value, gap_type):
year = previous.year
month = previous.month

if gap_type == 'year':
next = previous.replace(year=year + gap_value)
elif gap_type == 'month':
if month + gap_value <= 12:
next = previous.replace(month=month + gap_value)
else:
next = previous.replace(
month=((month + gap_value) % 12),
year=(year + (month + gap_value) / 12)
)
elif gap_type == 'day':
next = previous + datetime.timedelta(days=gap_value)
elif gap_type == 'hour':
return previous + datetime.timedelta(hours=gap_value)
elif gap_type == 'minute':
next = previous + datetime.timedelta(minutes=gap_value)
elif gap_type == 'second':
next = previous + datetime.timedelta(seconds=gap_value)
else:
raise TypeError('\'gap_by\' must be '
'{second, minute, day, month, year}')
return next

facet_dict = {}

for date_facet, facet_params in list(date_facets.items()):
gap_type = facet_params.get('gap_by')
gap_value = facet_params.get('gap_amount', 1)
date_range = facet_params['start_date']

# construct the bins of the histogram
facet_list = []
while date_range < facet_params['end_date']:
facet_list.append((date_range.isoformat(), 0))
if gap_type == 'year':
date_range = date_range.replace(
year=date_range.year + int(gap_value)
)
elif gap_type == 'month':
if date_range.month + int(gap_value) > 12:
date_range = date_range.replace(
month=((date_range.month + int(gap_value)) % 12),
year=(date_range.year + (date_range.month + int(gap_value)) / 12)
)
else:
date_range = date_range.replace(
month=date_range.month + int(gap_value)
)
elif gap_type == 'day':
date_range += datetime.timedelta(days=int(gap_value))
elif gap_type == 'hour':
date_range += datetime.timedelta(hours=int(gap_value))
elif gap_type == 'minute':
date_range += datetime.timedelta(minutes=int(gap_value))
elif gap_type == 'second':
date_range += datetime.timedelta(seconds=int(gap_value))
facet_list.append((date_range, 0))
date_range = next_datetime(date_range, gap_value, gap_type)

facet_list = sorted(facet_list, key=lambda x: x[0], reverse=True)

for result in results:
result_date = getattr(result, date_facet)
if result_date:
if not isinstance(result_date, datetime.datetime):
result_date = datetime.datetime(
year=result_date.year,
month=result_date.month,
day=result_date.day,
)
for n, facet_date in enumerate(facet_list):
if result_date > datetime.datetime(*(time.strptime(facet_date[0], '%Y-%m-%dT%H:%M:%S')[0:6])):
facet_list[n] = (facet_list[n][0], (facet_list[n][1] + 1))
break

# convert date to datetime
if not isinstance(result_date, datetime.datetime):
result_date = datetime.datetime(result_date.year,
result_date.month,
result_date.day)

# ignore results outside the boundaries.
if facet_list[0][0] < result_date < facet_list[-1][0]:
continue

# populate the histogram by putting the result on the right bin.
for n, facet_date in enumerate(facet_list):
if result_date > facet_date[0]:
# equal to facet_list[n][1] += 1, but for a tuple
facet_list[n] = (facet_list[n][0], (facet_list[n][1] + 1))
break # bin found; go to next result

facet_dict[date_facet] = facet_list

Expand Down

0 comments on commit 45191f8

Please sign in to comment.