From a56be265477e84b8a89ca5218b1a1963f372cfbc Mon Sep 17 00:00:00 2001 From: "Charles V. Dulac" Date: Sat, 19 Nov 2016 15:19:10 +0900 Subject: [PATCH] Accept string based primary keys Fixes #138 --- install_xapian.sh | 19 +++++++++--- tests/xapian_tests/models.py | 23 +++++++++++++++ tests/xapian_tests/search_indexes.py | 29 +++++++++++++++++++ tests/xapian_tests/tests/test_backend.py | 37 ++++++++++++++++++++++-- xapian_backend.py | 13 +++++++-- 5 files changed, 112 insertions(+), 9 deletions(-) diff --git a/install_xapian.sh b/install_xapian.sh index 0aee0cd..c4c9607 100755 --- a/install_xapian.sh +++ b/install_xapian.sh @@ -1,9 +1,9 @@ -#!/usr/bin/env bash +#!/usr/bin/env bash -e # first argument of the script is Xapian version (e.g. 1.2.19) VERSION=$1 # prepare -mkdir $VIRTUAL_ENV/packages && cd $VIRTUAL_ENV/packages +mkdir -p $VIRTUAL_ENV/packages && cd $VIRTUAL_ENV/packages CORE=xapian-core-$VERSION BINDINGS=xapian-bindings-$VERSION @@ -15,8 +15,19 @@ curl -O https://oligarchy.co.uk/xapian/$VERSION/${BINDINGS}.tar.xz # extract echo "Extracting source..." -tar xf ${CORE}.tar.xz -tar xf ${BINDINGS}.tar.xz +if [ $OSTYPE = 'darwin*' ]; then + xz -d ${CORE}.tar.xz + xz -d ${BINDINGS}.tar.xz + + tar xf ${CORE}.tar + tar xf ${BINDINGS}.tar +else + tar xf ${CORE}.tar.xz + tar xf ${BINDINGS}.tar.xz +fi + +test -e $VIRTUAL_ENV/packages/${BINDINGS} || exit 1 +test -e $VIRTUAL_ENV/packages/${CORE} || exit 1 # install echo "Installing Xapian-core..." diff --git a/tests/xapian_tests/models.py b/tests/xapian_tests/models.py index 3757eae..af80358 100644 --- a/tests/xapian_tests/models.py +++ b/tests/xapian_tests/models.py @@ -34,3 +34,26 @@ class BlogEntry(models.Model): number = models.IntegerField() float_number = models.FloatField() decimal_number = models.DecimalField(max_digits=4, decimal_places=2) + + +class UUIDBlogEntry(models.Model): + """ + A blog entry with string based primary key instead of an integer. Covers #138 + """ + uuid = models.CharField(primary_key=True, max_length=20) + + datetime = models.DateTimeField() + date = models.DateField() + + tags = models.ManyToManyField(MockTag) + + author = models.CharField(max_length=255) + text = models.TextField() + funny_text = models.TextField() + non_ascii = models.TextField() + url = models.URLField() + + boolean = models.BooleanField() + number = models.IntegerField() + float_number = models.FloatField() + decimal_number = models.DecimalField(max_digits=4, decimal_places=2) diff --git a/tests/xapian_tests/search_indexes.py b/tests/xapian_tests/search_indexes.py index 9d1c023..f41e03b 100644 --- a/tests/xapian_tests/search_indexes.py +++ b/tests/xapian_tests/search_indexes.py @@ -82,6 +82,35 @@ def prepare_empty(self, obj): return '' +class UUIDBlogSearchIndex(BlogSearchIndex): + + def get_model(self): + return models.UUIDBlogEntry + + def prepare_sites(self, obj): + return ['%d' % (i * int(obj.pk.split('-')[1])) for i in range(1, 4)] + + def prepare_tags(self, obj): + if obj.pk == 'uuid-1': + return ['a', 'b', 'c'] + elif obj.pk == 'uuid-2': + return ['ab', 'bc', 'cd'] + else: + return ['an', 'to', 'or'] + + def prepare_keys(self, obj): + return [i * int(obj.pk.split('-')[1]) for i in range(1, 4)] + + def prepare_titles(self, obj): + if obj.pk == 'uuid-1': + return ['object one title one', 'object one title two'] + elif obj.pk == 'uuid-2': + return ['object two title one', 'object two title two'] + else: + return ['object three title one', 'object three title two'] + + + class CompleteBlogEntryIndex(indexes.SearchIndex): text = indexes.CharField(model_attr='text', document=True) author = indexes.CharField(model_attr='author') diff --git a/tests/xapian_tests/tests/test_backend.py b/tests/xapian_tests/tests/test_backend.py index f3625f0..cad2fa9 100644 --- a/tests/xapian_tests/tests/test_backend.py +++ b/tests/xapian_tests/tests/test_backend.py @@ -17,9 +17,8 @@ from haystack.utils.loading import UnifiedIndex from ..search_indexes import XapianNGramIndex, XapianEdgeNGramIndex, \ - CompleteBlogEntryIndex, BlogSearchIndex -from ..models import BlogEntry, AnotherMockModel, MockTag - + CompleteBlogEntryIndex, BlogSearchIndex, UUIDBlogSearchIndex +from ..models import BlogEntry, AnotherMockModel, MockTag, UUIDBlogEntry XAPIAN_VERSION = [int(x) for x in xapian.__version__.split('.')] @@ -331,6 +330,38 @@ def test_duplicate_update(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(self.backend.document_count(), 3) + def test_update_string_pk(self): + """ + Covers #138, Must not assume django_id is an int + """ + self.sample_objs = [] + + for i in range(1, 4): + entry = UUIDBlogEntry() + entry.uuid = 'uuid-%s' % i + entry.author = 'david%s' % i + entry.url = 'http://example.com/%d/' % i + entry.boolean = bool(i % 2) + entry.number = i*5 + entry.float_number = i*5.0 + entry.decimal_number = Decimal('22.34') + entry.datetime = ( + datetime.datetime(2009, 2, 25, 1, 1, 1) - datetime.timedelta(seconds=i) + ) + entry.date = datetime.date(2009, 2, 23) + datetime.timedelta(days=i) + self.sample_objs.append(entry) + + self.sample_objs[0].float_number = 834.0 + self.sample_objs[1].float_number = 35.5 + self.sample_objs[2].float_number = 972.0 + for obj in self.sample_objs: + obj.save() + + self.backend.update(UUIDBlogSearchIndex(), UUIDBlogEntry.objects.all()) + + self.assertEqual(pks(self.backend.search(xapian.Query(''))['results']), + [1, 2, 3]) + def test_remove(self): self.backend.remove(self.sample_objs[0]) self.assertEqual(pks(self.backend.search(xapian.Query(''))['results']), diff --git a/xapian_backend.py b/xapian_backend.py index b017505..7a24a8f 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -436,7 +436,11 @@ def add_datetime_to_document(termpos, prefix, term, weight): # `django_id` is an int and `django_ct` is text; # besides, they are indexed by their (unstemmed) value. if field['field_name'] == 'django_id': - value = int(value) + try: + value = int(value) + except ValueError: + # Django_id is a string + field['type'] = 'text' value = _term_to_xapian_value(value, field['type']) document.add_term(TERM_PREFIXES[field['field_name']] + value, weight) @@ -1499,7 +1503,12 @@ def _term_query(self, term, field_name, field_type, stemmed=True): if field_name in ('id', 'django_id', 'django_ct'): # to ensure the value is serialized correctly. if field_name == 'django_id': - term = int(term) + try: + term = int(term) + except ValueError: + # Django_id is a string + field_type = 'text' + term = _term_to_xapian_value(term, field_type) return xapian.Query('%s%s' % (TERM_PREFIXES[field_name], term))