Merge pull request #86 from Sparrho/issue-85

Version 1.1.0 changes
ChristopherRabotin · Jun 26, 2015 · 41d0de5 · 41d0de5
2 parents ce46a47 + ab177fe
commit 41d0de5
Show file tree

Hide file tree

Showing 9 changed files with 140 additions and 130 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,4 @@
+[![Build Status](https://travis-ci.org/Sparrho/bungiesearch.svg?branch=master)](https://travis-ci.org/Sparrho/bungiesearch)
 # Purpose
 Bungiesearch is a Django wrapper for [elasticsearch-dsl-py](https://github.com/elasticsearch/elasticsearch-dsl-py).
 It inherits from elasticsearch-dsl-py's `Search` class, so all the fabulous features developed by the elasticsearch-dsl-py team are also available in Bungiesearch.
@@ -31,6 +32,11 @@ for item in lazy[5:10]:
 * Django signals
 	* Connect to post save and pre delete signals for the elasticsearch index to correctly reflect the database (almost) at all times.
 
+* Requirements
+	* Django >= 1.7
+	* Python 2.7 (**no Python 3 support yet**)
+
+
 ## Feature examples
 See section "Full example" at the bottom of page to see the code needed to perform these following examples.
 ### Query a word (or list thereof) on a managed model.
@@ -93,6 +99,105 @@ for item in lazy.filter('range', effective_date={'lte': '2014-09-22'}):
     print item
 
 ```
+# Quick start example
+This example is from the `test` folder. It may be partially out-dated, so please refer to the `test` folder for the latest version.
+
+## Procedure
+1. In your models.py file (or your managers.py), import bungiesearch and use it as a model manager.
+2. Define one or more ModelIndex subclasses which define the mapping between your Django model and elasticsearch.
+3. (Optional) Define SearchAlias subclasses which make it trivial to call complex elasticsearch-dsl-py functions.
+4. Add a BUNGIESEARCH variable in your Django settings, which must contain the elasticsearch URL(s), the modules for the indices, the modules for the search aliases and the signal definitions.
+
+## Example
+
+Here's the code which is applicable to the previous examples.
+### Django Model
+
+```python
+from django.db import models
+from bungiesearch.managers import BungiesearchManager
+
+class Article(models.Model):
+    title = models.TextField(db_index=True)
+    authors = models.TextField(blank=True)
+    description = models.TextField(blank=True)
+    link = models.URLField(max_length=510, unique=True, db_index=True)
+    published = models.DateTimeField(null=True)
+    created = models.DateTimeField(auto_now_add=True)
+    updated = models.DateTimeField(null=True)
+    tweet_count = models.IntegerField()
+    raw = models.BinaryField(null=True)
+    source_hash = models.BigIntegerField(null=True)
+    missing_data = models.CharField(blank=True, max_length=255)
+    positive_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
+    negative_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
+    popularity_index = models.IntegerField(default=0)
+
+    objects = BungiesearchManager()
+
+    class Meta:
+        app_label = 'core'
+```
+
+### ModelIndex
+
+The following ModelIndex will generate a mapping containing all fields from `Article`, minus those defined in `ArticleIndex.Meta.exclude`. When the mapping is generated, each field will the most appropriate [elasticsearch core type](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-core-types.html), with default attributes (as defined in bungiesearch.fields).
+
+These default attributes can be overwritten with `ArticleIndex.Meta.hotfixes`: each dictionary key must be field defined either in the model or in the ModelIndex subclass (`ArticleIndex` in this case).
+
+```python
+from core.models import Article
+from bungiesearch.fields import DateField, StringField
+from bungiesearch.indices import ModelIndex
+
+
+class ArticleIndex(ModelIndex):
+    effectived_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
+    meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
+
+    class Meta:
+        model = Article
+        exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
+        hotfixes = {'updated': {'null_value': '2013-07-01'},
+                    'title': {'boost': 1.75},
+                    'description': {'boost': 1.35},
+                    'full_text': {'boost': 1.125}}
+
+```
+
+### SearchAlias
+Defines a search alias for one or more models (in this case only for `core.models.Article`).
+```python
+from core.models import Article
+from bungiesearch.aliases import SearchAlias
+
+
+class SearchTitle(SearchAlias):
+    def alias_for(self, title):
+        return self.search_instance.query('match', title=title)
+
+    class Meta:
+        models = (Article,)
+        alias_name = 'title_search' # This is optional. If none is provided, the name will be the class name in lower case.
+
+class InvalidAlias(SearchAlias):
+    def alias_for_does_not_exist(self, title):
+        return title
+
+    class Meta:
+        models = (Article,)
+```
+
+### Django settings
+```python
+BUNGIESEARCH = {
+                'URLS': [os.getenv('ELASTIC_SEARCH_URL')],
+                'INDICES': {'bungiesearch_demo': 'core.search_indices'},
+                'ALIASES': {'bsearch': 'myproject.search_aliases'},
+                'SIGNALS': {'BUFFER_SIZE': 1}  # uses BungieSignalProcessor
+                }
+```
+
 # Documentation
 
 ## ModelIndex
@@ -304,100 +409,6 @@ Hence, a possibly better implementation is wrapping `post_save_connector` and `p
 ### TIMEOUT
 *Optional:* Elasticsearch connection timeout in seconds. Defaults to `5`.
 
-# Backend code example
-This example is from the `test` folder. It may be partially out-dated, so please refer to the `test` folder for the latest version.
-
-Here's the code which is applicable to the previous examples.
-### Django Model
-
-```python
-from django.db import models
-from bungiesearch.managers import BungiesearchManager
-
-class Article(models.Model):
-    title = models.TextField(db_index=True)
-    authors = models.TextField(blank=True)
-    description = models.TextField(blank=True)
-    link = models.URLField(max_length=510, unique=True, db_index=True)
-    published = models.DateTimeField(null=True)
-    created = models.DateTimeField(auto_now_add=True)
-    updated = models.DateTimeField(null=True)
-    tweet_count = models.IntegerField()
-    raw = models.BinaryField(null=True)
-    source_hash = models.BigIntegerField(null=True)
-    missing_data = models.CharField(blank=True, max_length=255)
-    positive_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
-    negative_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
-    popularity_index = models.IntegerField(default=0)
-
-    objects = BungiesearchManager()
-
-    class Meta:
-        app_label = 'core'
-```
-
-### ModelIndex
-
-The following ModelIndex will generate a mapping containing all fields from `Article`, minus those defined in `ArticleIndex.Meta.exclude`. When the mapping is generated, each field will the most appropriate [elasticsearch core type](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-core-types.html), with default attributes (as defined in bungiesearch.fields).
-
-These default attributes can be overwritten with `ArticleIndex.Meta.hotfixes`: each dictionary key must be field defined either in the model or in the ModelIndex subclass (`ArticleIndex` in this case).
-
-```python
-from core.models import Article
-from bungiesearch.fields import DateField, StringField
-from bungiesearch.indices import ModelIndex
-
-
-class ArticleIndex(ModelIndex):
-    effectived_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
-    meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
-
-    class Meta:
-        model = Article
-        exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
-        hotfixes = {'updated': {'null_value': '2013-07-01'},
-                    'title': {'boost': 1.75},
-                    'description': {'boost': 1.35},
-                    'full_text': {'boost': 1.125}}
-
-```
-
-### SearchAlias
-Defines a search alias for one or more models (in this case only for `core.models.Article`).
-```python
-from core.models import Article
-from bungiesearch.aliases import SearchAlias
-
-
-class SearchTitle(SearchAlias):
-    def alias_for(self, title):
-        return self.search_instance.query('match', title=title)
-
-    class Meta:
-        models = (Article,)
-        alias_name = 'title_search' # This is optional. If none is provided, the name will be the class name in lower case.
-
-class InvalidAlias(SearchAlias):
-    def alias_for_does_not_exist(self, title):
-        return title
-
-    class Meta:
-        models = (Article,)
-```
-
-### Django settings
-```python
-BUNGIESEARCH = {
-                'URLS': [os.getenv('ELASTIC_SEARCH_URL')],
-                'INDICES': {'bungiesearch_demo': 'core.search_indices'},
-                'ALIASES': {'bsearch': 'myproject.search_aliases'},
-                'SIGNALS': {'BUFFER_SIZE': 1}  # uses BungieSignalProcessor
-                }
-```
-
-# Build Status
-![Travis image](https://travis-ci.org/Sparrho/bungiesearch.svg)
-
 # Testing
 All Bungiesearch tests are in `tests/core/test_bungiesearch.py`.
 You can run the tests by creating a Python virtual environment, installing the requirements from `tests/requirements.txt`, installing the package (`pip install .`) and running `python tests/manage.py test`.

diff --git a/bungiesearch/__init__.py b/bungiesearch/__init__.py
@@ -8,7 +8,7 @@
 import bungiesearch.managers
 from django.conf import settings
 from elasticsearch.client import Elasticsearch
-from six import string_types
+from six import string_types, iteritems, itervalues
 
 
 class Bungiesearch(Search):
@@ -37,9 +37,9 @@ def __load_settings__(cls):
         cls.__loaded_indices__ = True
 
         # Loading indices.
-        for index_name, module_str in cls.BUNGIE['INDICES'].iteritems():
+        for index_name, module_str in iteritems(cls.BUNGIE['INDICES']):
             index_module = import_module(module_str)
-            for index_obj in index_module.__dict__.itervalues():
+            for index_obj in itervalues(index_module.__dict__):
                 try:
                     if issubclass(index_obj, ModelIndex) and index_obj != ModelIndex:
                         index_instance = index_obj()
@@ -55,19 +55,19 @@ def __load_settings__(cls):
                     pass # Oops, just attempted to get subclasses of a non-class.
 
         # Create reverse maps in order to have O(1) access.
-        for index_name, models in cls._index_to_model.iteritems():
+        for index_name, models in iteritems(cls._index_to_model):
             for model in models:
                 cls._model_to_index[model].append(index_name)
                 cls._model_name_to_index[model.__name__].append(index_name)
 
         # Loading aliases.
-        for alias_prefix, module_str in cls.BUNGIE.get('ALIASES', {}).iteritems():
+        for alias_prefix, module_str in iteritems(cls.BUNGIE.get('ALIASES', {})):
             if alias_prefix is None:
                 alias_prefix = 'bungie'
             if alias_prefix != '':
                 alias_prefix += '_'
             alias_module = import_module(module_str)
-            for alias_obj in alias_module.__dict__.itervalues():
+            for alias_obj in itervalues(alias_module.__dict__):
                 try:
                     if issubclass(alias_obj, SearchAlias) and alias_obj != SearchAlias:
                         alias_instance = alias_obj()
@@ -178,7 +178,7 @@ def map_raw_results(cls, raw_results, instance=None):
                 found_results['{1.meta.index}.{0}.{1.meta.id}'.format(model_name, result)] = (pos, result.meta)
 
         # Now that we have model ids per model name, let's fetch everything at once.
-        for ref_name, ids in model_results.iteritems():
+        for ref_name, ids in iteritems(model_results):
             index_name, model_name = ref_name.split('.')
             model_idx = Bungiesearch._idx_name_to_mdl_to_mdlidx[index_name][model_name]
             model_obj = model_idx.get_model()
@@ -224,7 +224,7 @@ def __init__(self, urls=None, timeout=None, force_new=False, raw_results=False,
 
         search_keys = ['using', 'index', 'doc_type', 'extra']
         search_settings, es_settings = {}, {}
-        for k, v in kwargs.iteritems():
+        for k, v in iteritems(kwargs):
             if k in search_keys:
                 search_settings[k] = v
             else:

diff --git a/bungiesearch/fields.py b/bungiesearch/fields.py
@@ -1,4 +1,5 @@
 from django.template.defaultfilters import striptags
+from six import iteritems
 
 
 class AbstractField(object):
@@ -52,12 +53,12 @@ def __init__(self, **args):
         if not self.model_attr and not self.eval_func:
             raise KeyError('{} gets its value via a model attribute or an eval function, but neither of `model_attr`, `eval_as` is provided. Args were {}.'.format(unicode(self), args))
 
-        for attr, value in args.iteritems():
+        for attr, value in iteritems(args):
             if attr not in self.fields and attr not in AbstractField.common_fields:
                 raise KeyError('Attribute `{}` is not allowed for core type {}.'.format(attr, self.coretype))
             setattr(self, attr, value)
 
-        for attr, value in self.defaults.iteritems():
+        for attr, value in iteritems(self.defaults):
             if not hasattr(self, attr):
                 setattr(self, attr, value)
 
@@ -77,7 +78,7 @@ def value(self, obj):
         return getattr(obj, self.model_attr)
 
     def json(self):
-        return dict((attr, val) for attr, val in self.__dict__.iteritems() if attr not in ['eval_func', 'model_attr'])
+        return dict((attr, val) for attr, val in iteritems(self.__dict__) if attr not in ['eval_func', 'model_attr'])
 
 # All the following definitions could probably be done with better polymorphism.
 

diff --git a/bungiesearch/indices.py b/bungiesearch/indices.py
@@ -1,6 +1,7 @@
 import logging
 
 from bungiesearch.fields import AbstractField, django_field_to_index
+from six import iteritems
 
 
 class ModelIndex(object):
@@ -46,7 +47,7 @@ def __init__(self):
         self.fields_to_fetch = list(set(self.fields.keys()).union(additional_fields))
 
         # Adding or updating the fields which are defined at class level.
-        for cls_attr, obj in self.__class__.__dict__.iteritems():
+        for cls_attr, obj in iteritems(self.__class__.__dict__):
             if not isinstance(obj, AbstractField):
                 continue
 
@@ -67,7 +68,7 @@ def get_mapping(self):
         '''
         :return: a dictionary which can be used to generate the elasticsearch index mapping for this doctype.
         '''
-        return {'properties': dict((name, field.json()) for name, field in self.fields.iteritems())}
+        return {'properties': dict((name, field.json()) for name, field in iteritems(self.fields))}
 
     def serialize_object(self, obj, obj_pk=None):
         '''
@@ -84,7 +85,7 @@ def serialize_object(self, obj, obj_pk=None):
             except Exception as e:
                 raise ValueError('Could not find object of primary key = {} in model {} (model index class {}). (Original exception: {}.)'.format(obj_pk, self.model, self.__class__.__name__, e))
 
-        return dict((name, field.value(obj)) for name, field in self.fields.iteritems())
+        return dict((name, field.value(obj)) for name, field in iteritems(self.fields))
 
     def _get_fields(self, fields, excludes, hotfixes):
         '''

diff --git a/bungiesearch/management/commands/search_index.py b/bungiesearch/management/commands/search_index.py
@@ -4,6 +4,7 @@
 
 from django.core.management.base import BaseCommand
 from elasticsearch.helpers import bulk_index
+from six import iteritems
 
 from ... import Bungiesearch
 from ...utils import update_index
@@ -119,7 +120,7 @@ def handle(self, *args, **options):
                         index_to_doctypes[index] = src.get_models(index)
                     logging.info('Deleting mapping for all models ({}) on all indices ({}).'.format(index_to_doctypes.values(), index_to_doctypes.keys()))
 
-                for index, doctype_list in index_to_doctypes.iteritems():
+                for index, doctype_list in iteritems(index_to_doctypes):
                     es.indices.delete_mapping(index, ','.join(doctype_list), params=None)
 
         elif options['action'] == 'create':
@@ -154,10 +155,10 @@ def handle(self, *args, **options):
                     try:
                         es.indices.put_mapping(model_name, src._idx_name_to_mdl_to_mdlidx[index][model_name].get_mapping(), index=index)
                     except Exception as e:
-                        print e
+                        print(e)
                         if raw_input('Something terrible happened! Type "abort" to stop updating the mappings: ') == 'abort':
                             raise e
-                        print 'Continuing.'
+                        print('Continuing.')
 
         else:
             if options['models']:

diff --git a/bungiesearch/utils.py b/bungiesearch/utils.py
@@ -48,7 +48,7 @@ def update_index(model_items, model_name, bulk_size=100, num_docs=-1, start_date
         logging.info('Indexing {} documents on index {}.'.format(num_docs, index_name))
         prev_step = 0
         max_docs = num_docs + bulk_size if num_docs > bulk_size else bulk_size + 1
-        for next_step in xrange(bulk_size, max_docs, bulk_size):
+        for next_step in range(bulk_size, max_docs, bulk_size):
             logging.info('Indexing documents {} to {} of {} total on index {}.'.format(prev_step, next_step, num_docs, index_name))
             bulk_index(src.get_es_instance(), [index_instance.serialize_object(doc) for doc in model_items[prev_step:next_step] if index_instance.matches_indexing_condition(doc)], index=index_name, doc_type=model.__name__, raise_on_error=True)
             prev_step = next_step