zoukankan      html  css  js  c++  java
  • django haystack

    # coding=utf-8
    from haystack import indexes
    from yw_asset.models import *
    
    
    class AssetIndex(indexes.SearchIndex, indexes.Indexable):
        text = indexes.CharField(document=True, use_template=True)
        module = indexes.MultiValueField(indexed=True, stored=True)
        processport_set = indexes.MultiValueField()
    
        def get_model(self):
            return Asset
    
        def index_queryset(self, using=None):
            return self.get_model().objects.all()
    
        def prepare_module(self, obj):
            return [module.module_name for module in obj.module.all()]
    
        def prepare_processport_set(self, obj):
            return [processport.process_id for processport in obj.processport_set.all()]
    # encoding: utf-8
    
    from __future__ import absolute_import, division, print_function, unicode_literals
    
    import json
    import os
    import re
    import shutil
    import threading
    import warnings
    
    from django.conf import settings
    from django.core.exceptions import ImproperlyConfigured
    from django.utils import six
    from django.utils.datetime_safe import datetime
    from django.utils.encoding import force_text
    
    from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query
    from haystack.constants import DJANGO_CT, DJANGO_ID, ID
    from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument
    from haystack.inputs import Clean, Exact, PythonData, Raw
    from haystack.models import SearchResult
    from haystack.utils import log as logging
    from haystack.utils import get_identifier, get_model_ct
    from haystack.utils.app_loading import haystack_get_model
    from jieba.analyse import ChineseAnalyzer
    
    try:
        import whoosh
    except ImportError:
        raise MissingDependency("The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.")
    
    # Handle minimum requirement.
    if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
        raise MissingDependency("The 'whoosh' backend requires version 2.5.0 or greater.")
    
    # Bubble up the correct error.
    from whoosh import index
    from whoosh.analysis import StemmingAnalyzer
    from whoosh.fields import ID as WHOOSH_ID
    from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT
    from whoosh.filedb.filestore import FileStorage, RamStorage
    from whoosh.highlight import highlight as whoosh_highlight
    from whoosh.highlight import ContextFragmenter, HtmlFormatter
    from whoosh.qparser import QueryParser
    from whoosh.searching import ResultsPage
    from whoosh.writing import AsyncWriter
    
    
    DATETIME_REGEX = re.compile('^(?P<year>d{4})-(?P<month>d{2})-(?P<day>d{2})T(?P<hour>d{2}):(?P<minute>d{2}):(?P<second>d{2})(.d{3,6}Z?)?$')
    LOCALS = threading.local()
    LOCALS.RAM_STORE = None
    
    
    class WhooshHtmlFormatter(HtmlFormatter):
        """
        This is a HtmlFormatter simpler than the whoosh.HtmlFormatter.
        We use it to have consistent results across backends. Specifically,
        Solr, Xapian and Elasticsearch are using this formatting.
        """
        template = '<%(tag)s>%(t)s</%(tag)s>'
    
    
    class WhooshSearchBackend(BaseSearchBackend):
        # Word reserved by Whoosh for special use.
        RESERVED_WORDS = (
            'AND',
            'NOT',
            'OR',
            'TO',
        )
    
        # Characters reserved by Whoosh for special use.
        # The '\' must come first, so as not to overwrite the other slash replacements.
        RESERVED_CHARACTERS = (
            '\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
            '[', ']', '^', '"', '~', '*', '?', ':', '.',
        )
    
        def __init__(self, connection_alias, **connection_options):
            super(WhooshSearchBackend, self).__init__(connection_alias, **connection_options)
            self.setup_complete = False
            self.use_file_storage = True
            self.post_limit = getattr(connection_options, 'POST_LIMIT', 128 * 1024 * 1024)
            self.path = connection_options.get('PATH')
    
            if connection_options.get('STORAGE', 'file') != 'file':
                self.use_file_storage = False
    
            if self.use_file_storage and not self.path:
                raise ImproperlyConfigured("You must specify a 'PATH' in your settings for connection '%s'." % connection_alias)
    
            self.log = logging.getLogger('haystack')
    
        def setup(self):
            """
            Defers loading until needed.
            """
            from haystack import connections
            new_index = False
    
            # Make sure the index is there.
            if self.use_file_storage and not os.path.exists(self.path):
                os.makedirs(self.path)
                new_index = True
    
            if self.use_file_storage and not os.access(self.path, os.W_OK):
                raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)
    
            if self.use_file_storage:
                self.storage = FileStorage(self.path)
            else:
                global LOCALS
    
                if getattr(LOCALS, 'RAM_STORE', None) is None:
                    LOCALS.RAM_STORE = RamStorage()
    
                self.storage = LOCALS.RAM_STORE
    
            self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
            self.parser = QueryParser(self.content_field_name, schema=self.schema)
    
            if new_index is True:
                self.index = self.storage.create_index(self.schema)
            else:
                try:
                    self.index = self.storage.open_index(schema=self.schema)
                except index.EmptyIndexError:
                    self.index = self.storage.create_index(self.schema)
    
            self.setup_complete = True
    
        def build_schema(self, fields):
            schema_fields = {
                ID: WHOOSH_ID(stored=True, unique=True),
                DJANGO_CT: WHOOSH_ID(stored=True),
                DJANGO_ID: WHOOSH_ID(stored=True),
            }
            # Grab the number of keys that are hard-coded into Haystack.
            # We'll use this to (possibly) fail slightly more gracefully later.
            initial_key_count = len(schema_fields)
            content_field_name = ''
    
            for field_name, field_class in fields.items():
                if field_class.is_multivalued:
                    if field_class.indexed is False:
                        schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
                    else:
                        schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost)
                elif field_class.field_type in ['date', 'datetime']:
                    schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
                elif field_class.field_type == 'integer':
                    schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost)
                elif field_class.field_type == 'float':
                    schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost)
                elif field_class.field_type == 'boolean':
                    # Field boost isn't supported on BOOLEAN as of 1.8.2.
                    schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
                elif field_class.field_type == 'ngram':
                    schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
                elif field_class.field_type == 'edge_ngram':
                    schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost)
                else:
                    schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True)
    
                if field_class.document is True:
                    content_field_name = field_class.index_fieldname
                    schema_fields[field_class.index_fieldname].spelling = True
    
            # Fail more gracefully than relying on the backend to die if no fields
            # are found.
            if len(schema_fields) <= initial_key_count:
                raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.")
    
            return (content_field_name, Schema(**schema_fields))
    
        def update(self, index, iterable, commit=True):
            if not self.setup_complete:
                self.setup()
    
            self.index = self.index.refresh()
            writer = AsyncWriter(self.index)
    
            for obj in iterable:
                try:
                    doc = index.full_prepare(obj)
                except SkipDocument:
                    self.log.debug(u"Indexing for object `%s` skipped", obj)
                else:
                    # Really make sure it's unicode, because Whoosh won't have it any
                    # other way.
                    for key in doc:
                        doc[key] = self._from_python(doc[key])
    
                    # Document boosts aren't supported in Whoosh 2.5.0+.
                    if 'boost' in doc:
                        del doc['boost']
    
                    try:
                        writer.update_document(**doc)
                    except Exception as e:
                        if not self.silently_fail:
                            raise
    
                        # We'll log the object identifier but won't include the actual object
                        # to avoid the possibility of that generating encoding errors while
                        # processing the log message:
                        self.log.error(u"%s while preparing object for update" % e.__class__.__name__,
                                       exc_info=True, extra={"data": {"index": index,
                                                                      "object": get_identifier(obj)}})
    
            if len(iterable) > 0:
                # For now, commit no matter what, as we run into locking issues otherwise.
                writer.commit()
    
        def remove(self, obj_or_string, commit=True):
            if not self.setup_complete:
                self.setup()
    
            self.index = self.index.refresh()
            whoosh_id = get_identifier(obj_or_string)
    
            try:
                self.index.delete_by_query(q=self.parser.parse(u'%s:"%s"' % (ID, whoosh_id)))
            except Exception as e:
                if not self.silently_fail:
                    raise
    
                self.log.error("Failed to remove document '%s' from Whoosh: %s", whoosh_id, e, exc_info=True)
    
        def clear(self, models=None, commit=True):
            if not self.setup_complete:
                self.setup()
    
            self.index = self.index.refresh()
    
            if models is not None:
                assert isinstance(models, (list, tuple))
    
            try:
                if models is None:
                    self.delete_index()
                else:
                    models_to_delete = []
    
                    for model in models:
                        models_to_delete.append(u"%s:%s" % (DJANGO_CT, get_model_ct(model)))
    
                    self.index.delete_by_query(q=self.parser.parse(u" OR ".join(models_to_delete)))
            except Exception as e:
                if not self.silently_fail:
                    raise
    
                if models is not None:
                    self.log.error("Failed to clear Whoosh index of models '%s': %s", ','.join(models_to_delete),
                                   e, exc_info=True)
                else:
                    self.log.error("Failed to clear Whoosh index: %s", e, exc_info=True)
    
        def delete_index(self):
            # Per the Whoosh mailing list, if wiping out everything from the index,
            # it's much more efficient to simply delete the index files.
            if self.use_file_storage and os.path.exists(self.path):
                shutil.rmtree(self.path)
            elif not self.use_file_storage:
                self.storage.clean()
    
            # Recreate everything.
            self.setup()
    
        def optimize(self):
            if not self.setup_complete:
                self.setup()
    
            self.index = self.index.refresh()
            self.index.optimize()
    
        def calculate_page(self, start_offset=0, end_offset=None):
            # Prevent against Whoosh throwing an error. Requires an end_offset
            # greater than 0.
            if not end_offset is None and end_offset <= 0:
                end_offset = 1
    
            # Determine the page.
            page_num = 0
    
            if end_offset is None:
                end_offset = 1000000
    
            if start_offset is None:
                start_offset = 0
    
            page_length = end_offset - start_offset
    
            if page_length and page_length > 0:
                page_num = int(start_offset / page_length)
    
            # Increment because Whoosh uses 1-based page numbers.
            page_num += 1
            return page_num, page_length
    
        @log_query
        def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
                   fields='', highlight=False, facets=None, date_facets=None, query_facets=None,
                   narrow_queries=None, spelling_query=None, within=None,
                   dwithin=None, distance_point=None, models=None,
                   limit_to_registered_models=None, result_class=None, **kwargs):
            if not self.setup_complete:
                self.setup()
    
            # A zero length query should return no results.
            if len(query_string) == 0:
                return {
                    'results': [],
                    'hits': 0,
                }
    
            query_string = force_text(query_string)
    
            # A one-character query (non-wildcard) gets nabbed by a stopwords
            # filter and should yield zero results.
            if len(query_string) <= 1 and query_string != u'*':
                return {
                    'results': [],
                    'hits': 0,
                }
    
            reverse = False
    
            if sort_by is not None:
                # Determine if we need to reverse the results and if Whoosh can
                # handle what it's being asked to sort by. Reversing is an
                # all-or-nothing action, unfortunately.
                sort_by_list = []
                reverse_counter = 0
    
                for order_by in sort_by:
                    if order_by.startswith('-'):
                        reverse_counter += 1
    
                if reverse_counter and reverse_counter != len(sort_by):
                    raise SearchBackendError("Whoosh requires all order_by fields"
                                             " to use the same sort direction")
    
                for order_by in sort_by:
                    if order_by.startswith('-'):
                        sort_by_list.append(order_by[1:])
    
                        if len(sort_by_list) == 1:
                            reverse = True
                    else:
                        sort_by_list.append(order_by)
    
                        if len(sort_by_list) == 1:
                            reverse = False
    
                sort_by = sort_by_list[0]
    
            if facets is not None:
                warnings.warn("Whoosh does not handle faceting.", Warning, stacklevel=2)
    
            if date_facets is not None:
                warnings.warn("Whoosh does not handle date faceting.", Warning, stacklevel=2)
    
            if query_facets is not None:
                warnings.warn("Whoosh does not handle query faceting.", Warning, stacklevel=2)
    
            narrowed_results = None
            self.index = self.index.refresh()
    
            if limit_to_registered_models is None:
                limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
    
            if models and len(models):
                model_choices = sorted(get_model_ct(model) for model in models)
            elif limit_to_registered_models:
                # Using narrow queries, limit the results to only models handled
                # with the current routers.
                model_choices = self.build_models_list()
            else:
                model_choices = []
    
            if len(model_choices) > 0:
                if narrow_queries is None:
                    narrow_queries = set()
    
                narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
    
            narrow_searcher = None
    
            if narrow_queries is not None:
                # Potentially expensive? I don't see another way to do it in Whoosh...
                narrow_searcher = self.index.searcher()
    
                for nq in narrow_queries:
                    recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)),
                                                                     limit=None)
    
                    if len(recent_narrowed_results) <= 0:
                        return {
                            'results': [],
                            'hits': 0,
                        }
    
                    if narrowed_results:
                        narrowed_results.filter(recent_narrowed_results)
                    else:
                       narrowed_results = recent_narrowed_results
    
            self.index = self.index.refresh()
    
            if self.index.doc_count():
                searcher = self.index.searcher()
                parsed_query = self.parser.parse(query_string)
    
                # In the event of an invalid/stopworded query, recover gracefully.
                if parsed_query is None:
                    return {
                        'results': [],
                        'hits': 0,
                    }
    
                page_num, page_length = self.calculate_page(start_offset, end_offset)
    
                search_kwargs = {
                    'pagelen': page_length,
                    'sortedby': sort_by,
                    'reverse': reverse,
                }
    
                # Handle the case where the results have been narrowed.
                if narrowed_results is not None:
                    search_kwargs['filter'] = narrowed_results
    
                try:
                    raw_page = searcher.search_page(
                        parsed_query,
                        page_num,
                        **search_kwargs
                    )
                except ValueError:
                    if not self.silently_fail:
                        raise
    
                    return {
                        'results': [],
                        'hits': 0,
                        'spelling_suggestion': None,
                    }
    
                # Because as of Whoosh 2.5.1, it will return the wrong page of
                # results if you request something too high. :(
                if raw_page.pagenum < page_num:
                    return {
                        'results': [],
                        'hits': 0,
                        'spelling_suggestion': None,
                    }
    
                results = self._process_results(raw_page, highlight=highlight, query_string=query_string, spelling_query=spelling_query, result_class=result_class)
                searcher.close()
    
                if hasattr(narrow_searcher, 'close'):
                    narrow_searcher.close()
    
                return results
            else:
                if self.include_spelling:
                    if spelling_query:
                        spelling_suggestion = self.create_spelling_suggestion(spelling_query)
                    else:
                        spelling_suggestion = self.create_spelling_suggestion(query_string)
                else:
                    spelling_suggestion = None
    
                return {
                    'results': [],
                    'hits': 0,
                    'spelling_suggestion': spelling_suggestion,
                }
    
        def more_like_this(self, model_instance, additional_query_string=None,
                           start_offset=0, end_offset=None, models=None,
                           limit_to_registered_models=None, result_class=None, **kwargs):
            if not self.setup_complete:
                self.setup()
    
            # Deferred models will have a different class ("RealClass_Deferred_fieldname")
            # which won't be in our registry:
            model_klass = model_instance._meta.concrete_model
    
            field_name = self.content_field_name
            narrow_queries = set()
            narrowed_results = None
            self.index = self.index.refresh()
    
            if limit_to_registered_models is None:
                limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
    
            if models and len(models):
                model_choices = sorted(get_model_ct(model) for model in models)
            elif limit_to_registered_models:
                # Using narrow queries, limit the results to only models handled
                # with the current routers.
                model_choices = self.build_models_list()
            else:
                model_choices = []
    
            if len(model_choices) > 0:
                if narrow_queries is None:
                    narrow_queries = set()
    
                narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
    
            if additional_query_string and additional_query_string != '*':
                narrow_queries.add(additional_query_string)
    
            narrow_searcher = None
    
            if narrow_queries is not None:
                # Potentially expensive? I don't see another way to do it in Whoosh...
                narrow_searcher = self.index.searcher()
    
                for nq in narrow_queries:
                    recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)),
                                                                     limit=None)
    
                    if len(recent_narrowed_results) <= 0:
                        return {
                            'results': [],
                            'hits': 0,
                        }
    
                    if narrowed_results:
                        narrowed_results.filter(recent_narrowed_results)
                    else:
                       narrowed_results = recent_narrowed_results
    
            page_num, page_length = self.calculate_page(start_offset, end_offset)
    
            self.index = self.index.refresh()
            raw_results = EmptyResults()
    
            if self.index.doc_count():
                query = "%s:%s" % (ID, get_identifier(model_instance))
                searcher = self.index.searcher()
                parsed_query = self.parser.parse(query)
                results = searcher.search(parsed_query)
    
                if len(results):
                    raw_results = results[0].more_like_this(field_name, top=end_offset)
    
                # Handle the case where the results have been narrowed.
                if narrowed_results is not None and hasattr(raw_results, 'filter'):
                    raw_results.filter(narrowed_results)
    
            try:
                raw_page = ResultsPage(raw_results, page_num, page_length)
            except ValueError:
                if not self.silently_fail:
                    raise
    
                return {
                    'results': [],
                    'hits': 0,
                    'spelling_suggestion': None,
                }
    
            # Because as of Whoosh 2.5.1, it will return the wrong page of
            # results if you request something too high. :(
            if raw_page.pagenum < page_num:
                return {
                    'results': [],
                    'hits': 0,
                    'spelling_suggestion': None,
                }
    
            results = self._process_results(raw_page, result_class=result_class)
            searcher.close()
    
            if hasattr(narrow_searcher, 'close'):
                narrow_searcher.close()
    
            return results
    
        def _process_results(self, raw_page, highlight=False, query_string='', spelling_query=None, result_class=None):
            from haystack import connections
            results = []
    
            # It's important to grab the hits first before slicing. Otherwise, this
            # can cause pagination failures.
            hits = len(raw_page)
    
            if result_class is None:
                result_class = SearchResult
    
            facets = {}
            spelling_suggestion = None
            unified_index = connections[self.connection_alias].get_unified_index()
            indexed_models = unified_index.get_indexed_models()
    
            for doc_offset, raw_result in enumerate(raw_page):
                score = raw_page.score(doc_offset) or 0
                app_label, model_name = raw_result[DJANGO_CT].split('.')
                additional_fields = {}
                model = haystack_get_model(app_label, model_name)
    
                if model and model in indexed_models:
                    for key, value in raw_result.items():
                        index = unified_index.get_index(model)
                        string_key = str(key)
    
                        if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
                            # Special-cased due to the nature of KEYWORD fields.
                            if index.fields[string_key].is_multivalued:
                                if value is None or len(value) is 0:
                                    additional_fields[string_key] = []
                                else:
                                    additional_fields[string_key] = value.split(',')
                            else:
                                additional_fields[string_key] = index.fields[string_key].convert(value)
                        else:
                            additional_fields[string_key] = self._to_python(value)
    
                    del(additional_fields[DJANGO_CT])
                    del(additional_fields[DJANGO_ID])
    
                    if highlight:
                        sa = StemmingAnalyzer()
                        formatter = WhooshHtmlFormatter('em')
                        terms = [token.text for token in sa(query_string)]
    
                        whoosh_result = whoosh_highlight(
                            additional_fields.get(self.content_field_name),
                            terms,
                            sa,
                            ContextFragmenter(),
                            formatter
                        )
                        additional_fields['highlighted'] = {
                            self.content_field_name: [whoosh_result],
                        }
    
                    result = result_class(app_label, model_name, raw_result[DJANGO_ID], score, **additional_fields)
                    results.append(result)
                else:
                    hits -= 1
    
            if self.include_spelling:
                if spelling_query:
                    spelling_suggestion = self.create_spelling_suggestion(spelling_query)
                else:
                    spelling_suggestion = self.create_spelling_suggestion(query_string)
    
            return {
                'results': results,
                'hits': hits,
                'facets': facets,
                'spelling_suggestion': spelling_suggestion,
            }
    
        def create_spelling_suggestion(self, query_string):
            spelling_suggestion = None
            reader = self.index.reader()
            corrector = reader.corrector(self.content_field_name)
            cleaned_query = force_text(query_string)
    
            if not query_string:
                return spelling_suggestion
    
            # Clean the string.
            for rev_word in self.RESERVED_WORDS:
                cleaned_query = cleaned_query.replace(rev_word, '')
    
            for rev_char in self.RESERVED_CHARACTERS:
                cleaned_query = cleaned_query.replace(rev_char, '')
    
            # Break it down.
            query_words = cleaned_query.split()
            suggested_words = []
    
            for word in query_words:
                suggestions = corrector.suggest(word, limit=1)
    
                if len(suggestions) > 0:
                    suggested_words.append(suggestions[0])
    
            spelling_suggestion = ' '.join(suggested_words)
            return spelling_suggestion
    
        def _from_python(self, value):
            """
            Converts Python values to a string for Whoosh.
    
            Code courtesy of pysolr.
            """
            if hasattr(value, 'strftime'):
                if not hasattr(value, 'hour'):
                    value = datetime(value.year, value.month, value.day, 0, 0, 0)
            elif isinstance(value, bool):
                if value:
                    value = 'true'
                else:
                    value = 'false'
            elif isinstance(value, (list, tuple)):
                value = u','.join([force_text(v) for v in value])
            elif isinstance(value, (six.integer_types, float)):
                # Leave it alone.
                pass
            else:
                value = force_text(value)
            return value
    
        def _to_python(self, value):
            """
            Converts values from Whoosh to native Python values.
    
            A port of the same method in pysolr, as they deal with data the same way.
            """
            if value == 'true':
                return True
            elif value == 'false':
                return False
    
            if value and isinstance(value, six.string_types):
                possible_datetime = DATETIME_REGEX.search(value)
    
                if possible_datetime:
                    date_values = possible_datetime.groupdict()
    
                    for dk, dv in date_values.items():
                        date_values[dk] = int(dv)
    
                    return datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'], date_values['minute'], date_values['second'])
    
            try:
                # Attempt to use json to load the values.
                converted_value = json.loads(value)
    
                # Try to handle most built-in types.
                if isinstance(converted_value, (list, tuple, set, dict, six.integer_types, float, complex)):
                    return converted_value
            except:
                # If it fails (SyntaxError or its ilk) or we don't trust it,
                # continue on.
                pass
    
            return value
    
    
    class WhooshSearchQuery(BaseSearchQuery):
        def _convert_datetime(self, date):
            if hasattr(date, 'hour'):
                return force_text(date.strftime('%Y%m%d%H%M%S'))
            else:
                return force_text(date.strftime('%Y%m%d000000'))
    
        def clean(self, query_fragment):
            """
            Provides a mechanism for sanitizing user input before presenting the
            value to the backend.
    
            Whoosh 1.X differs here in that you can no longer use a backslash
            to escape reserved characters. Instead, the whole word should be
            quoted.
            """
            words = query_fragment.split()
            cleaned_words = []
    
            for word in words:
                if word in self.backend.RESERVED_WORDS:
                    word = word.replace(word, word.lower())
    
                for char in self.backend.RESERVED_CHARACTERS:
                    if char in word:
                        word = "'%s'" % word
                        break
    
                cleaned_words.append(word)
    
            return ' '.join(cleaned_words)
    
        def build_query_fragment(self, field, filter_type, value):
            from haystack import connections
            query_frag = ''
            is_datetime = False
    
            if not hasattr(value, 'input_type_name'):
                # Handle when we've got a ``ValuesListQuerySet``...
                if hasattr(value, 'values_list'):
                    value = list(value)
    
                if hasattr(value, 'strftime'):
                    is_datetime = True
    
                if isinstance(value, six.string_types) and value != ' ':
                    # It's not an ``InputType``. Assume ``Clean``.
                    value = Clean(value)
                else:
                    value = PythonData(value)
    
            # Prepare the query using the InputType.
            prepared_value = value.prepare(self)
    
            if not isinstance(prepared_value, (set, list, tuple)):
                # Then convert whatever we get back to what pysolr wants if needed.
                prepared_value = self.backend._from_python(prepared_value)
    
            # 'content' is a special reserved word, much like 'pk' in
            # Django's ORM layer. It indicates 'no special field'.
            if field == 'content':
                index_fieldname = ''
            else:
                index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field)
    
            filter_types = {
                'content': '%s',
                'contains': '*%s*',
                'endswith': "*%s",
                'startswith': "%s*",
                'exact': '%s',
                'gt': "{%s to}",
                'gte': "[%s to]",
                'lt': "{to %s}",
                'lte': "[to %s]",
                'fuzzy': u'%s~',
            }
    
            if value.post_process is False:
                query_frag = prepared_value
            else:
                if filter_type in ['content', 'contains', 'startswith', 'endswith', 'fuzzy']:
                    if value.input_type_name == 'exact':
                        query_frag = prepared_value
                    else:
                        # Iterate over terms & incorportate the converted form of each into the query.
                        terms = []
    
                        if isinstance(prepared_value, six.string_types):
                            possible_values = prepared_value.split(' ')
                        else:
                            if is_datetime is True:
                                prepared_value = self._convert_datetime(prepared_value)
    
                            possible_values = [prepared_value]
    
                        for possible_value in possible_values:
                            terms.append(filter_types[filter_type] % self.backend._from_python(possible_value))
    
                        if len(terms) == 1:
                            query_frag = terms[0]
                        else:
                            query_frag = u"(%s)" % " AND ".join(terms)
                elif filter_type == 'in':
                    in_options = []
    
                    for possible_value in prepared_value:
                        is_datetime = False
    
                        if hasattr(possible_value, 'strftime'):
                            is_datetime = True
    
                        pv = self.backend._from_python(possible_value)
    
                        if is_datetime is True:
                            pv = self._convert_datetime(pv)
    
                        if isinstance(pv, six.string_types) and not is_datetime:
                            in_options.append('"%s"' % pv)
                        else:
                            in_options.append('%s' % pv)
    
                    query_frag = "(%s)" % " OR ".join(in_options)
                elif filter_type == 'range':
                    start = self.backend._from_python(prepared_value[0])
                    end = self.backend._from_python(prepared_value[1])
    
                    if hasattr(prepared_value[0], 'strftime'):
                        start = self._convert_datetime(start)
    
                    if hasattr(prepared_value[1], 'strftime'):
                        end = self._convert_datetime(end)
    
                    query_frag = u"[%s to %s]" % (start, end)
                elif filter_type == 'exact':
                    if value.input_type_name == 'exact':
                        query_frag = prepared_value
                    else:
                        prepared_value = Exact(prepared_value).prepare(self)
                        query_frag = filter_types[filter_type] % prepared_value
                else:
                    if is_datetime is True:
                        prepared_value = self._convert_datetime(prepared_value)
    
                    query_frag = filter_types[filter_type] % prepared_value
    
            if len(query_frag) and not isinstance(value, Raw):
                if not query_frag.startswith('(') and not query_frag.endswith(')'):
                    query_frag = "(%s)" % query_frag
    
            return u"%s%s" % (index_fieldname, query_frag)
    
    
            # if not filter_type in ('in', 'range'):
            #     # 'in' is a bit of a special case, as we don't want to
            #     # convert a valid list/tuple to string. Defer handling it
            #     # until later...
            #     value = self.backend._from_python(value)
    
    
    class WhooshEngine(BaseEngine):
        backend = WhooshSearchBackend
        query = WhooshSearchQuery
  • 相关阅读:
    单例实现c++
    c++智能指针实现方式1
    c++中处理输入输出的方法
    makefile函数
    5. Longest Palindromic Substring
    go 语言中常用的包
    ubuntu14.04 boost动态库找不到 libboost_system.so.1.58.0
    boost-asio-cpp-network-programming阅读笔记
    链接-装载-库,读书笔记
    leecode第二百一十七题(存在重复元素)
  • 原文地址:https://www.cnblogs.com/muzinan110/p/7573925.html
Copyright © 2011-2022 走看看