123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347 |
- # Copyright 2007 Matt Chaput. All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are met:
- #
- # 1. Redistributions of source code must retain the above copyright notice,
- # this list of conditions and the following disclaimer.
- #
- # 2. Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- #
- # THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
- # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
- # EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
- # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #
- # The views and conclusions contained in the software and documentation are
- # those of the authors and should not be interpreted as representing official
- # policies, either expressed or implied, of Matt Chaput.
- from __future__ import division
- from whoosh.compat import b, u
- from whoosh.query import qcore, terms, compound, wrappers
- from whoosh.util.times import datetime_to_long
- class RangeMixin(object):
- # Contains methods shared by TermRange and NumericRange
- def __repr__(self):
- return ('%s(%r, %r, %r, %s, %s, boost=%s, constantscore=%s)'
- % (self.__class__.__name__, self.fieldname, self.start,
- self.end, self.startexcl, self.endexcl, self.boost,
- self.constantscore))
- def __unicode__(self):
- startchar = "{" if self.startexcl else "["
- endchar = "}" if self.endexcl else "]"
- start = '' if self.start is None else self.start
- end = '' if self.end is None else self.end
- return u("%s:%s%s TO %s%s") % (self.fieldname, startchar, start, end,
- endchar)
- __str__ = __unicode__
- def __eq__(self, other):
- return (other and self.__class__ is other.__class__
- and self.fieldname == other.fieldname
- and self.start == other.start and self.end == other.end
- and self.startexcl == other.startexcl
- and self.endexcl == other.endexcl
- and self.boost == other.boost
- and self.constantscore == other.constantscore)
- def __hash__(self):
- return (hash(self.fieldname) ^ hash(self.start) ^ hash(self.startexcl)
- ^ hash(self.end) ^ hash(self.endexcl) ^ hash(self.boost))
- def is_range(self):
- return True
- def _comparable_start(self):
- if self.start is None:
- return (qcore.Lowest, 0)
- else:
- second = 1 if self.startexcl else 0
- return (self.start, second)
- def _comparable_end(self):
- if self.end is None:
- return (qcore.Highest, 0)
- else:
- second = -1 if self.endexcl else 0
- return (self.end, second)
- def overlaps(self, other):
- if not isinstance(other, TermRange):
- return False
- if self.fieldname != other.fieldname:
- return False
- start1 = self._comparable_start()
- start2 = other._comparable_start()
- end1 = self._comparable_end()
- end2 = other._comparable_end()
- return ((start1 >= start2 and start1 <= end2)
- or (end1 >= start2 and end1 <= end2)
- or (start2 >= start1 and start2 <= end1)
- or (end2 >= start1 and end2 <= end1))
- def merge(self, other, intersect=True):
- assert self.fieldname == other.fieldname
- start1 = self._comparable_start()
- start2 = other._comparable_start()
- end1 = self._comparable_end()
- end2 = other._comparable_end()
- if start1 >= start2 and end1 <= end2:
- start = start2
- end = end2
- elif start2 >= start1 and end2 <= end1:
- start = start1
- end = end1
- elif intersect:
- start = max(start1, start2)
- end = min(end1, end2)
- else:
- start = min(start1, start2)
- end = max(end1, end2)
- startval = None if start[0] is qcore.Lowest else start[0]
- startexcl = start[1] == 1
- endval = None if end[0] is qcore.Highest else end[0]
- endexcl = end[1] == -1
- boost = max(self.boost, other.boost)
- constantscore = self.constantscore or other.constantscore
- return self.__class__(self.fieldname, startval, endval, startexcl,
- endexcl, boost=boost,
- constantscore=constantscore)
- class TermRange(RangeMixin, terms.MultiTerm):
- """Matches documents containing any terms in a given range.
- >>> # Match documents where the indexed "id" field is greater than or equal
- >>> # to 'apple' and less than or equal to 'pear'.
- >>> TermRange("id", u"apple", u"pear")
- """
- def __init__(self, fieldname, start, end, startexcl=False, endexcl=False,
- boost=1.0, constantscore=True):
- """
- :param fieldname: The name of the field to search.
- :param start: Match terms equal to or greater than this.
- :param end: Match terms equal to or less than this.
- :param startexcl: If True, the range start is exclusive. If False, the
- range start is inclusive.
- :param endexcl: If True, the range end is exclusive. If False, the
- range end is inclusive.
- :param boost: Boost factor that should be applied to the raw score of
- results matched by this query.
- """
- self.fieldname = fieldname
- self.start = start
- self.end = end
- self.startexcl = startexcl
- self.endexcl = endexcl
- self.boost = boost
- self.constantscore = constantscore
- def normalize(self):
- if self.start in ('', None) and self.end in (u('\uffff'), None):
- from whoosh.query import Every
- return Every(self.fieldname, boost=self.boost)
- elif self.start == self.end:
- if self.startexcl or self.endexcl:
- return qcore.NullQuery
- return terms.Term(self.fieldname, self.start, boost=self.boost)
- else:
- return TermRange(self.fieldname, self.start, self.end,
- self.startexcl, self.endexcl,
- boost=self.boost)
- #def replace(self, fieldname, oldtext, newtext):
- # q = self.copy()
- # if q.fieldname == fieldname:
- # if q.start == oldtext:
- # q.start = newtext
- # if q.end == oldtext:
- # q.end = newtext
- # return q
- def _btexts(self, ixreader):
- fieldname = self.fieldname
- field = ixreader.schema[fieldname]
- startexcl = self.startexcl
- endexcl = self.endexcl
- if self.start is None:
- start = b("")
- else:
- try:
- start = field.to_bytes(self.start)
- except ValueError:
- return
- if self.end is None:
- end = b("\xFF\xFF\xFF\xFF")
- else:
- try:
- end = field.to_bytes(self.end)
- except ValueError:
- return
- for fname, t in ixreader.terms_from(fieldname, start):
- if fname != fieldname:
- break
- if t == start and startexcl:
- continue
- if t == end and endexcl:
- break
- if t > end:
- break
- yield t
- class NumericRange(RangeMixin, qcore.Query):
- """A range query for NUMERIC fields. Takes advantage of tiered indexing
- to speed up large ranges by matching at a high resolution at the edges of
- the range and a low resolution in the middle.
- >>> # Match numbers from 10 to 5925 in the "number" field.
- >>> nr = NumericRange("number", 10, 5925)
- """
- def __init__(self, fieldname, start, end, startexcl=False, endexcl=False,
- boost=1.0, constantscore=True):
- """
- :param fieldname: The name of the field to search.
- :param start: Match terms equal to or greater than this number. This
- should be a number type, not a string.
- :param end: Match terms equal to or less than this number. This should
- be a number type, not a string.
- :param startexcl: If True, the range start is exclusive. If False, the
- range start is inclusive.
- :param endexcl: If True, the range end is exclusive. If False, the
- range end is inclusive.
- :param boost: Boost factor that should be applied to the raw score of
- results matched by this query.
- :param constantscore: If True, the compiled query returns a constant
- score (the value of the ``boost`` keyword argument) instead of
- actually scoring the matched terms. This gives a nice speed boost
- and won't affect the results in most cases since numeric ranges
- will almost always be used as a filter.
- """
- self.fieldname = fieldname
- self.start = start
- self.end = end
- self.startexcl = startexcl
- self.endexcl = endexcl
- self.boost = boost
- self.constantscore = constantscore
- def simplify(self, ixreader):
- return self._compile_query(ixreader).simplify(ixreader)
- def estimate_size(self, ixreader):
- return self._compile_query(ixreader).estimate_size(ixreader)
- def estimate_min_size(self, ixreader):
- return self._compile_query(ixreader).estimate_min_size(ixreader)
- def docs(self, searcher):
- q = self._compile_query(searcher.reader())
- return q.docs(searcher)
- def _compile_query(self, ixreader):
- from whoosh.fields import NUMERIC
- from whoosh.util.numeric import tiered_ranges
- field = ixreader.schema[self.fieldname]
- if not isinstance(field, NUMERIC):
- raise Exception("NumericRange: field %r is not numeric"
- % self.fieldname)
- start = self.start
- if start is not None:
- start = field.prepare_number(start)
- end = self.end
- if end is not None:
- end = field.prepare_number(end)
- subqueries = []
- stb = field.sortable_to_bytes
- # Get the term ranges for the different resolutions
- ranges = tiered_ranges(field.numtype, field.bits, field.signed,
- start, end, field.shift_step,
- self.startexcl, self.endexcl)
- for startnum, endnum, shift in ranges:
- if startnum == endnum:
- subq = terms.Term(self.fieldname, stb(startnum, shift))
- else:
- startbytes = stb(startnum, shift)
- endbytes = stb(endnum, shift)
- subq = TermRange(self.fieldname, startbytes, endbytes)
- subqueries.append(subq)
- if len(subqueries) == 1:
- q = subqueries[0]
- elif subqueries:
- q = compound.Or(subqueries, boost=self.boost)
- else:
- return qcore.NullQuery
- if self.constantscore:
- q = wrappers.ConstantScoreQuery(q, self.boost)
- return q
- def matcher(self, searcher, context=None):
- q = self._compile_query(searcher.reader())
- return q.matcher(searcher, context)
- class DateRange(NumericRange):
- """This is a very thin subclass of :class:`NumericRange` that only
- overrides the initializer and ``__repr__()`` methods to work with datetime
- objects instead of numbers. Internally this object converts the datetime
- objects it's created with to numbers and otherwise acts like a
- ``NumericRange`` query.
- >>> DateRange("date", datetime(2010, 11, 3, 3, 0),
- ... datetime(2010, 11, 3, 17, 59))
- """
- def __init__(self, fieldname, start, end, startexcl=False, endexcl=False,
- boost=1.0, constantscore=True):
- self.startdate = start
- self.enddate = end
- if start:
- start = datetime_to_long(start)
- if end:
- end = datetime_to_long(end)
- super(DateRange, self).__init__(fieldname, start, end,
- startexcl=startexcl, endexcl=endexcl,
- boost=boost,
- constantscore=constantscore)
- def __repr__(self):
- return '%s(%r, %r, %r, %s, %s, boost=%s)' % (self.__class__.__name__,
- self.fieldname,
- self.startdate, self.enddate,
- self.startexcl, self.endexcl,
- self.boost)
|