dateparse.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922
  1. # Copyright 2010 Matt Chaput. All rights reserved.
  2. #
  3. # Redistribution and use in source and binary forms, with or without
  4. # modification, are permitted provided that the following conditions are met:
  5. #
  6. # 1. Redistributions of source code must retain the above copyright notice,
  7. # this list of conditions and the following disclaimer.
  8. #
  9. # 2. Redistributions in binary form must reproduce the above copyright
  10. # notice, this list of conditions and the following disclaimer in the
  11. # documentation and/or other materials provided with the distribution.
  12. #
  13. # THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
  14. # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  15. # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  16. # EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  17. # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  18. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  19. # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  20. # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  21. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  22. # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. #
  24. # The views and conclusions contained in the software and documentation are
  25. # those of the authors and should not be interpreted as representing official
  26. # policies, either expressed or implied, of Matt Chaput.
  27. import re
  28. import sys
  29. from datetime import datetime, timedelta
  30. from whoosh.compat import string_type, iteritems
  31. from whoosh.qparser import plugins, syntax
  32. from whoosh.qparser.taggers import Tagger
  33. from whoosh.support.relativedelta import relativedelta
  34. from whoosh.util.text import rcompile
  35. from whoosh.util.times import adatetime, timespan
  36. from whoosh.util.times import fill_in, is_void, relative_days
  37. from whoosh.util.times import TimeError
  38. class DateParseError(Exception):
  39. "Represents an error in parsing date text."
  40. # Utility functions
  41. def print_debug(level, msg, *args):
  42. if level > 0:
  43. print((" " * (level - 1)) + (msg % args))
  44. # Parser element objects
  45. class Props(object):
  46. """A dumb little object that just puts copies a dictionary into attibutes
  47. so I can use dot syntax instead of square bracket string item lookup and
  48. save a little bit of typing. Used by :class:`Regex`.
  49. """
  50. def __init__(self, **args):
  51. self.__dict__ = args
  52. def __repr__(self):
  53. return repr(self.__dict__)
  54. def get(self, key, default=None):
  55. return self.__dict__.get(key, default)
  56. class ParserBase(object):
  57. """Base class for date parser elements.
  58. """
  59. def to_parser(self, e):
  60. if isinstance(e, string_type):
  61. return Regex(e)
  62. else:
  63. return e
  64. def parse(self, text, dt, pos=0, debug=-9999):
  65. raise NotImplementedError
  66. def date_from(self, text, dt=None, pos=0, debug=-9999):
  67. if dt is None:
  68. dt = datetime.now()
  69. d, pos = self.parse(text, dt, pos, debug + 1)
  70. return d
  71. class MultiBase(ParserBase):
  72. """Base class for date parser elements such as Sequence and Bag that
  73. have sub-elements.
  74. """
  75. def __init__(self, elements, name=None):
  76. """
  77. :param elements: the sub-elements to match.
  78. :param name: a name for this element (for debugging purposes only).
  79. """
  80. self.elements = [self.to_parser(e) for e in elements]
  81. self.name = name
  82. def __repr__(self):
  83. return "%s<%s>%r" % (self.__class__.__name__, self.name or '',
  84. self.elements)
  85. class Sequence(MultiBase):
  86. """Merges the dates parsed by a sequence of sub-elements.
  87. """
  88. def __init__(self, elements, sep="(\\s+|\\s*,\\s*)", name=None,
  89. progressive=False):
  90. """
  91. :param elements: the sequence of sub-elements to parse.
  92. :param sep: a separator regular expression to match between elements,
  93. or None to not have separators.
  94. :param name: a name for this element (for debugging purposes only).
  95. :param progressive: if True, elements after the first do not need to
  96. match. That is, for elements (a, b, c) and progressive=True, the
  97. sequence matches like ``a[b[c]]``.
  98. """
  99. super(Sequence, self).__init__(elements, name)
  100. self.sep_pattern = sep
  101. if sep:
  102. self.sep_expr = rcompile(sep, re.IGNORECASE)
  103. else:
  104. self.sep_expr = None
  105. self.progressive = progressive
  106. def parse(self, text, dt, pos=0, debug=-9999):
  107. d = adatetime()
  108. first = True
  109. foundall = False
  110. failed = False
  111. print_debug(debug, "Seq %s sep=%r text=%r", self.name,
  112. self.sep_pattern, text[pos:])
  113. for e in self.elements:
  114. print_debug(debug, "Seq %s text=%r", self.name, text[pos:])
  115. if self.sep_expr and not first:
  116. print_debug(debug, "Seq %s looking for sep", self.name)
  117. m = self.sep_expr.match(text, pos)
  118. if m:
  119. pos = m.end()
  120. else:
  121. print_debug(debug, "Seq %s didn't find sep", self.name)
  122. break
  123. print_debug(debug, "Seq %s trying=%r at=%s", self.name, e, pos)
  124. try:
  125. at, newpos = e.parse(text, dt, pos=pos, debug=debug + 1)
  126. except TimeError:
  127. failed = True
  128. break
  129. print_debug(debug, "Seq %s result=%r", self.name, at)
  130. if not at:
  131. break
  132. pos = newpos
  133. print_debug(debug, "Seq %s adding=%r to=%r", self.name, at, d)
  134. try:
  135. d = fill_in(d, at)
  136. except TimeError:
  137. print_debug(debug, "Seq %s Error in fill_in", self.name)
  138. failed = True
  139. break
  140. print_debug(debug, "Seq %s filled date=%r", self.name, d)
  141. first = False
  142. else:
  143. foundall = True
  144. if not failed and (foundall or (not first and self.progressive)):
  145. print_debug(debug, "Seq %s final=%r", self.name, d)
  146. return (d, pos)
  147. else:
  148. print_debug(debug, "Seq %s failed", self.name)
  149. return (None, None)
  150. class Combo(Sequence):
  151. """Parses a sequence of elements in order and combines the dates parsed
  152. by the sub-elements somehow. The default behavior is to accept two dates
  153. from the sub-elements and turn them into a range.
  154. """
  155. def __init__(self, elements, fn=None, sep="(\\s+|\\s*,\\s*)", min=2, max=2,
  156. name=None):
  157. """
  158. :param elements: the sequence of sub-elements to parse.
  159. :param fn: a function to run on all dates found. It should return a
  160. datetime, adatetime, or timespan object. If this argument is None,
  161. the default behavior accepts two dates and returns a timespan.
  162. :param sep: a separator regular expression to match between elements,
  163. or None to not have separators.
  164. :param min: the minimum number of dates required from the sub-elements.
  165. :param max: the maximum number of dates allowed from the sub-elements.
  166. :param name: a name for this element (for debugging purposes only).
  167. """
  168. super(Combo, self).__init__(elements, sep=sep, name=name)
  169. self.fn = fn
  170. self.min = min
  171. self.max = max
  172. def parse(self, text, dt, pos=0, debug=-9999):
  173. dates = []
  174. first = True
  175. print_debug(debug, "Combo %s sep=%r text=%r", self.name,
  176. self.sep_pattern, text[pos:])
  177. for e in self.elements:
  178. if self.sep_expr and not first:
  179. print_debug(debug, "Combo %s looking for sep at %r",
  180. self.name, text[pos:])
  181. m = self.sep_expr.match(text, pos)
  182. if m:
  183. pos = m.end()
  184. else:
  185. print_debug(debug, "Combo %s didn't find sep", self.name)
  186. return (None, None)
  187. print_debug(debug, "Combo %s trying=%r", self.name, e)
  188. try:
  189. at, pos = e.parse(text, dt, pos, debug + 1)
  190. except TimeError:
  191. at, pos = None, None
  192. print_debug(debug, "Combo %s result=%r", self.name, at)
  193. if at is None:
  194. return (None, None)
  195. first = False
  196. if is_void(at):
  197. continue
  198. if len(dates) == self.max:
  199. print_debug(debug, "Combo %s length > %s", self.name, self.max)
  200. return (None, None)
  201. dates.append(at)
  202. print_debug(debug, "Combo %s dates=%r", self.name, dates)
  203. if len(dates) < self.min:
  204. print_debug(debug, "Combo %s length < %s", self.name, self.min)
  205. return (None, None)
  206. return (self.dates_to_timespan(dates), pos)
  207. def dates_to_timespan(self, dates):
  208. if self.fn:
  209. return self.fn(dates)
  210. elif len(dates) == 2:
  211. return timespan(dates[0], dates[1])
  212. else:
  213. raise DateParseError("Don't know what to do with %r" % (dates,))
  214. class Choice(MultiBase):
  215. """Returns the date from the first of its sub-elements that matches.
  216. """
  217. def parse(self, text, dt, pos=0, debug=-9999):
  218. print_debug(debug, "Choice %s text=%r", self.name, text[pos:])
  219. for e in self.elements:
  220. print_debug(debug, "Choice %s trying=%r", self.name, e)
  221. try:
  222. d, newpos = e.parse(text, dt, pos, debug + 1)
  223. except TimeError:
  224. d, newpos = None, None
  225. if d:
  226. print_debug(debug, "Choice %s matched", self.name)
  227. return (d, newpos)
  228. print_debug(debug, "Choice %s no match", self.name)
  229. return (None, None)
  230. class Bag(MultiBase):
  231. """Parses its sub-elements in any order and merges the dates.
  232. """
  233. def __init__(self, elements, sep="(\\s+|\\s*,\\s*)", onceper=True,
  234. requireall=False, allof=None, anyof=None, name=None):
  235. """
  236. :param elements: the sub-elements to parse.
  237. :param sep: a separator regular expression to match between elements,
  238. or None to not have separators.
  239. :param onceper: only allow each element to match once.
  240. :param requireall: if True, the sub-elements can match in any order,
  241. but they must all match.
  242. :param allof: a list of indexes into the list of elements. When this
  243. argument is not None, this element matches only if all the
  244. indicated sub-elements match.
  245. :param allof: a list of indexes into the list of elements. When this
  246. argument is not None, this element matches only if any of the
  247. indicated sub-elements match.
  248. :param name: a name for this element (for debugging purposes only).
  249. """
  250. super(Bag, self).__init__(elements, name)
  251. self.sep_expr = rcompile(sep, re.IGNORECASE)
  252. self.onceper = onceper
  253. self.requireall = requireall
  254. self.allof = allof
  255. self.anyof = anyof
  256. def parse(self, text, dt, pos=0, debug=-9999):
  257. first = True
  258. d = adatetime()
  259. seen = [False] * len(self.elements)
  260. while True:
  261. newpos = pos
  262. print_debug(debug, "Bag %s text=%r", self.name, text[pos:])
  263. if not first:
  264. print_debug(debug, "Bag %s looking for sep", self.name)
  265. m = self.sep_expr.match(text, pos)
  266. if m:
  267. newpos = m.end()
  268. else:
  269. print_debug(debug, "Bag %s didn't find sep", self.name)
  270. break
  271. for i, e in enumerate(self.elements):
  272. print_debug(debug, "Bag %s trying=%r", self.name, e)
  273. try:
  274. at, xpos = e.parse(text, dt, newpos, debug + 1)
  275. except TimeError:
  276. at, xpos = None, None
  277. print_debug(debug, "Bag %s result=%r", self.name, at)
  278. if at:
  279. if self.onceper and seen[i]:
  280. return (None, None)
  281. d = fill_in(d, at)
  282. newpos = xpos
  283. seen[i] = True
  284. break
  285. else:
  286. break
  287. pos = newpos
  288. if self.onceper and all(seen):
  289. break
  290. first = False
  291. if (not any(seen)
  292. or (self.allof and not all(seen[pos] for pos in self.allof))
  293. or (self.anyof and not any(seen[pos] for pos in self.anyof))
  294. or (self.requireall and not all(seen))):
  295. return (None, None)
  296. print_debug(debug, "Bag %s final=%r", self.name, d)
  297. return (d, pos)
  298. class Optional(ParserBase):
  299. """Wraps a sub-element to indicate that the sub-element is optional.
  300. """
  301. def __init__(self, element):
  302. self.element = self.to_parser(element)
  303. def __repr__(self):
  304. return "%s(%r)" % (self.__class__.__name__, self.element)
  305. def parse(self, text, dt, pos=0, debug=-9999):
  306. try:
  307. d, pos = self.element.parse(text, dt, pos, debug + 1)
  308. except TimeError:
  309. d, pos = None, None
  310. if d:
  311. return (d, pos)
  312. else:
  313. return (adatetime(), pos)
  314. class ToEnd(ParserBase):
  315. """Wraps a sub-element and requires that the end of the sub-element's match
  316. be the end of the text.
  317. """
  318. def __init__(self, element):
  319. self.element = element
  320. def __repr__(self):
  321. return "%s(%r)" % (self.__class__.__name__, self.element)
  322. def parse(self, text, dt, pos=0, debug=-9999):
  323. try:
  324. d, pos = self.element.parse(text, dt, pos, debug + 1)
  325. except TimeError:
  326. d, pos = None, None
  327. if d and pos == len(text):
  328. return (d, pos)
  329. else:
  330. return (None, None)
  331. class Regex(ParserBase):
  332. """Matches a regular expression and maps named groups in the pattern to
  333. datetime attributes using a function or overridden method.
  334. There are two points at which you can customize the behavior of this class,
  335. either by supplying functions to the initializer or overriding methods.
  336. * The ``modify`` function or ``modify_props`` method takes a ``Props``
  337. object containing the named groups and modifies its values (in place).
  338. * The ``fn`` function or ``props_to_date`` method takes a ``Props`` object
  339. and the base datetime and returns an adatetime/datetime.
  340. """
  341. fn = None
  342. modify = None
  343. def __init__(self, pattern, fn=None, modify=None):
  344. self.pattern = pattern
  345. self.expr = rcompile(pattern, re.IGNORECASE)
  346. self.fn = fn
  347. self.modify = modify
  348. def __repr__(self):
  349. return "<%r>" % (self.pattern,)
  350. def parse(self, text, dt, pos=0, debug=-9999):
  351. m = self.expr.match(text, pos)
  352. if not m:
  353. return (None, None)
  354. props = self.extract(m)
  355. self.modify_props(props)
  356. try:
  357. d = self.props_to_date(props, dt)
  358. except TimeError:
  359. d = None
  360. if d:
  361. return (d, m.end())
  362. else:
  363. return (None, None)
  364. def extract(self, match):
  365. d = match.groupdict()
  366. for key, value in iteritems(d):
  367. try:
  368. value = int(value)
  369. d[key] = value
  370. except (ValueError, TypeError):
  371. pass
  372. return Props(**d)
  373. def modify_props(self, props):
  374. if self.modify:
  375. self.modify(props)
  376. def props_to_date(self, props, dt):
  377. if self.fn:
  378. return self.fn(props, dt)
  379. else:
  380. args = {}
  381. for key in adatetime.units:
  382. args[key] = props.get(key)
  383. return adatetime(**args)
  384. class Month(Regex):
  385. def __init__(self, *patterns):
  386. self.patterns = patterns
  387. self.exprs = [rcompile(pat, re.IGNORECASE) for pat in self.patterns]
  388. self.pattern = ("(?P<month>"
  389. + "|".join("(%s)" % pat for pat in self.patterns)
  390. + ")")
  391. self.expr = rcompile(self.pattern, re.IGNORECASE)
  392. def modify_props(self, p):
  393. text = p.month
  394. for i, expr in enumerate(self.exprs):
  395. m = expr.match(text)
  396. if m:
  397. p.month = i + 1
  398. break
  399. class PlusMinus(Regex):
  400. def __init__(self, years, months, weeks, days, hours, minutes, seconds):
  401. rel_years = "((?P<years>[0-9]+) *(%s))?" % years
  402. rel_months = "((?P<months>[0-9]+) *(%s))?" % months
  403. rel_weeks = "((?P<weeks>[0-9]+) *(%s))?" % weeks
  404. rel_days = "((?P<days>[0-9]+) *(%s))?" % days
  405. rel_hours = "((?P<hours>[0-9]+) *(%s))?" % hours
  406. rel_mins = "((?P<mins>[0-9]+) *(%s))?" % minutes
  407. rel_secs = "((?P<secs>[0-9]+) *(%s))?" % seconds
  408. self.pattern = ("(?P<dir>[+-]) *%s *%s *%s *%s *%s *%s *%s(?=(\\W|$))"
  409. % (rel_years, rel_months, rel_weeks, rel_days,
  410. rel_hours, rel_mins, rel_secs))
  411. self.expr = rcompile(self.pattern, re.IGNORECASE)
  412. def props_to_date(self, p, dt):
  413. if p.dir == "-":
  414. dir = -1
  415. else:
  416. dir = 1
  417. delta = relativedelta(years=(p.get("years") or 0) * dir,
  418. months=(p.get("months") or 0) * dir,
  419. weeks=(p.get("weeks") or 0) * dir,
  420. days=(p.get("days") or 0) * dir,
  421. hours=(p.get("hours") or 0) * dir,
  422. minutes=(p.get("mins") or 0) * dir,
  423. seconds=(p.get("secs") or 0) * dir)
  424. return dt + delta
  425. class Daynames(Regex):
  426. def __init__(self, next, last, daynames):
  427. self.next_pattern = next
  428. self.last_pattern = last
  429. self._dayname_exprs = tuple(rcompile(pat, re.IGNORECASE)
  430. for pat in daynames)
  431. dn_pattern = "|".join(daynames)
  432. self.pattern = ("(?P<dir>%s|%s) +(?P<day>%s)(?=(\\W|$))"
  433. % (next, last, dn_pattern))
  434. self.expr = rcompile(self.pattern, re.IGNORECASE)
  435. def props_to_date(self, p, dt):
  436. if re.match(p.dir, self.last_pattern):
  437. dir = -1
  438. else:
  439. dir = 1
  440. for daynum, expr in enumerate(self._dayname_exprs):
  441. m = expr.match(p.day)
  442. if m:
  443. break
  444. current_daynum = dt.weekday()
  445. days_delta = relative_days(current_daynum, daynum, dir)
  446. d = dt.date() + timedelta(days=days_delta)
  447. return adatetime(year=d.year, month=d.month, day=d.day)
  448. class Time12(Regex):
  449. def __init__(self):
  450. self.pattern = ("(?P<hour>[1-9]|10|11|12)(:(?P<mins>[0-5][0-9])"
  451. "(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?)?"
  452. "\\s*(?P<ampm>am|pm)(?=(\\W|$))")
  453. self.expr = rcompile(self.pattern, re.IGNORECASE)
  454. def props_to_date(self, p, dt):
  455. isam = p.ampm.lower().startswith("a")
  456. if p.hour == 12:
  457. if isam:
  458. hr = 0
  459. else:
  460. hr = 12
  461. else:
  462. hr = p.hour
  463. if not isam:
  464. hr += 12
  465. return adatetime(hour=hr, minute=p.mins, second=p.secs, microsecond=p.usecs)
  466. # Top-level parser classes
  467. class DateParser(object):
  468. """Base class for locale-specific parser classes.
  469. """
  470. day = Regex("(?P<day>([123][0-9])|[1-9])(?=(\\W|$))(?!=:)",
  471. lambda p, dt: adatetime(day=p.day))
  472. year = Regex("(?P<year>[0-9]{4})(?=(\\W|$))",
  473. lambda p, dt: adatetime(year=p.year))
  474. time24 = Regex("(?P<hour>([0-1][0-9])|(2[0-3])):(?P<mins>[0-5][0-9])"
  475. "(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?"
  476. "(?=(\\W|$))",
  477. lambda p, dt: adatetime(hour=p.hour, minute=p.mins,
  478. second=p.secs, microsecond=p.usecs))
  479. time12 = Time12()
  480. def __init__(self):
  481. simple_year = "(?P<year>[0-9]{4})"
  482. simple_month = "(?P<month>[0-1][0-9])"
  483. simple_day = "(?P<day>[0-3][0-9])"
  484. simple_hour = "(?P<hour>([0-1][0-9])|(2[0-3]))"
  485. simple_minute = "(?P<minute>[0-5][0-9])"
  486. simple_second = "(?P<second>[0-5][0-9])"
  487. simple_usec = "(?P<microsecond>[0-9]{6})"
  488. tup = (simple_year, simple_month, simple_day, simple_hour,
  489. simple_minute, simple_second, simple_usec)
  490. simple_seq = Sequence(tup, sep="[- .:/]*", name="simple",
  491. progressive=True)
  492. self.simple = Sequence((simple_seq, "(?=(\\s|$))"), sep='')
  493. self.setup()
  494. def setup(self):
  495. raise NotImplementedError
  496. #
  497. def get_parser(self):
  498. return self.all
  499. def parse(self, text, dt, pos=0, debug=-9999):
  500. parser = self.get_parser()
  501. d, newpos = parser.parse(text, dt, pos=pos, debug=debug)
  502. if isinstance(d, (adatetime, timespan)):
  503. d = d.disambiguated(dt)
  504. return (d, newpos)
  505. def date_from(self, text, basedate=None, pos=0, debug=-9999, toend=True):
  506. if basedate is None:
  507. basedate = datetime.utcnow()
  508. parser = self.get_parser()
  509. if toend:
  510. parser = ToEnd(parser)
  511. d = parser.date_from(text, basedate, pos=pos, debug=debug)
  512. if isinstance(d, (adatetime, timespan)):
  513. d = d.disambiguated(basedate)
  514. return d
  515. class English(DateParser):
  516. day = Regex("(?P<day>([123][0-9])|[1-9])(st|nd|rd|th)?(?=(\\W|$))",
  517. lambda p, dt: adatetime(day=p.day))
  518. def setup(self):
  519. self.plusdate = PlusMinus("years|year|yrs|yr|ys|y",
  520. "months|month|mons|mon|mos|mo",
  521. "weeks|week|wks|wk|ws|w",
  522. "days|day|dys|dy|ds|d",
  523. "hours|hour|hrs|hr|hs|h",
  524. "minutes|minute|mins|min|ms|m",
  525. "seconds|second|secs|sec|s")
  526. self.dayname = Daynames("next", "last",
  527. ("monday|mon|mo", "tuesday|tues|tue|tu",
  528. "wednesday|wed|we", "thursday|thur|thu|th",
  529. "friday|fri|fr", "saturday|sat|sa",
  530. "sunday|sun|su"))
  531. midnight_l = lambda p, dt: adatetime(hour=0, minute=0, second=0,
  532. microsecond=0)
  533. midnight = Regex("midnight", midnight_l)
  534. noon_l = lambda p, dt: adatetime(hour=12, minute=0, second=0,
  535. microsecond=0)
  536. noon = Regex("noon", noon_l)
  537. now = Regex("now", lambda p, dt: dt)
  538. self.time = Choice((self.time12, self.time24, midnight, noon, now),
  539. name="time")
  540. def tomorrow_to_date(p, dt):
  541. d = dt.date() + timedelta(days=+1)
  542. return adatetime(year=d.year, month=d.month, day=d.day)
  543. tomorrow = Regex("tomorrow", tomorrow_to_date)
  544. def yesterday_to_date(p, dt):
  545. d = dt.date() + timedelta(days=-1)
  546. return adatetime(year=d.year, month=d.month, day=d.day)
  547. yesterday = Regex("yesterday", yesterday_to_date)
  548. thisyear = Regex("this year", lambda p, dt: adatetime(year=dt.year))
  549. thismonth = Regex("this month",
  550. lambda p, dt: adatetime(year=dt.year,
  551. month=dt.month))
  552. today = Regex("today",
  553. lambda p, dt: adatetime(year=dt.year, month=dt.month,
  554. day=dt.day))
  555. self.month = Month("january|jan", "february|febuary|feb", "march|mar",
  556. "april|apr", "may", "june|jun", "july|jul",
  557. "august|aug", "september|sept|sep", "october|oct",
  558. "november|nov", "december|dec")
  559. # If you specify a day number you must also specify a month... this
  560. # Choice captures that constraint
  561. self.dmy = Choice((Sequence((self.day, self.month, self.year),
  562. name="dmy"),
  563. Sequence((self.month, self.day, self.year),
  564. name="mdy"),
  565. Sequence((self.year, self.month, self.day),
  566. name="ymd"),
  567. Sequence((self.year, self.day, self.month),
  568. name="ydm"),
  569. Sequence((self.day, self.month), name="dm"),
  570. Sequence((self.month, self.day), name="md"),
  571. Sequence((self.month, self.year), name="my"),
  572. self.month, self.year, self.dayname, tomorrow,
  573. yesterday, thisyear, thismonth, today, now,
  574. ), name="date")
  575. self.datetime = Bag((self.time, self.dmy), name="datetime")
  576. self.bundle = Choice((self.plusdate, self.datetime, self.simple),
  577. name="bundle")
  578. self.torange = Combo((self.bundle, "to", self.bundle), name="torange")
  579. self.all = Choice((self.torange, self.bundle), name="all")
  580. # QueryParser plugin
  581. class DateParserPlugin(plugins.Plugin):
  582. """Adds more powerful parsing of DATETIME fields.
  583. >>> parser.add_plugin(DateParserPlugin())
  584. >>> parser.parse(u"date:'last tuesday'")
  585. """
  586. def __init__(self, basedate=None, dateparser=None, callback=None,
  587. free=False, free_expr="([A-Za-z][A-Za-z_0-9]*):([^^]+)"):
  588. """
  589. :param basedate: a datetime object representing the current time
  590. against which to measure relative dates. If you do not supply this
  591. argument, the plugin uses ``datetime.utcnow()``.
  592. :param dateparser: an instance of
  593. :class:`whoosh.qparser.dateparse.DateParser`. If you do not supply
  594. this argument, the plugin automatically uses
  595. :class:`whoosh.qparser.dateparse.English`.
  596. :param callback: a callback function for parsing errors. This allows
  597. you to provide feedback to the user about problems parsing dates.
  598. :param remove: if True, unparseable dates are removed from the token
  599. stream instead of being replaced with ErrorToken.
  600. :param free: if True, this plugin will install a filter early in the
  601. parsing process and try to find undelimited dates such as
  602. ``date:last tuesday``. Note that allowing this could result in
  603. normal query words accidentally being parsed as dates sometimes.
  604. """
  605. self.basedate = basedate
  606. if dateparser is None:
  607. dateparser = English()
  608. self.dateparser = dateparser
  609. self.callback = callback
  610. self.free = free
  611. self.freeexpr = free_expr
  612. def taggers(self, parser):
  613. if self.free:
  614. # If we're tokenizing, we have to go before the FieldsPlugin
  615. return [(DateTagger(self, self.freeexpr), -1)]
  616. else:
  617. return ()
  618. def filters(self, parser):
  619. # Run the filter after the FieldsPlugin assigns field names
  620. return [(self.do_dates, 110)]
  621. def errorize(self, message, node):
  622. if self.callback:
  623. self.callback(message)
  624. return syntax.ErrorNode(message, node)
  625. def text_to_dt(self, node):
  626. text = node.text
  627. try:
  628. dt = self.dateparser.date_from(text, self.basedate)
  629. if dt is None:
  630. return self.errorize(text, node)
  631. else:
  632. n = DateTimeNode(node.fieldname, dt, node.boost)
  633. except DateParseError:
  634. e = sys.exc_info()[1]
  635. n = self.errorize(e, node)
  636. n.startchar = node.startchar
  637. n.endchar = node.endchar
  638. return n
  639. def range_to_dt(self, node):
  640. start = end = None
  641. dp = self.dateparser.get_parser()
  642. if node.start:
  643. start = dp.date_from(node.start, self.basedate)
  644. if start is None:
  645. return self.errorize(node.start, node)
  646. if node.end:
  647. end = dp.date_from(node.end, self.basedate)
  648. if end is None:
  649. return self.errorize(node.end, node)
  650. if start and end:
  651. ts = timespan(start, end).disambiguated(self.basedate)
  652. start, end = ts.start, ts.end
  653. elif start:
  654. start = start.disambiguated(self.basedate)
  655. if isinstance(start, timespan):
  656. start = start.start
  657. elif end:
  658. end = end.disambiguated(self.basedate)
  659. if isinstance(end, timespan):
  660. end = end.end
  661. drn = DateRangeNode(node.fieldname, start, end, boost=node.boost)
  662. drn.startchar = node.startchar
  663. drn.endchar = node.endchar
  664. return drn
  665. def do_dates(self, parser, group):
  666. schema = parser.schema
  667. if not schema:
  668. return group
  669. from whoosh.fields import DATETIME
  670. datefields = frozenset(fieldname for fieldname, field
  671. in parser.schema.items()
  672. if isinstance(field, DATETIME))
  673. for i, node in enumerate(group):
  674. if node.has_fieldname:
  675. fname = node.fieldname or parser.fieldname
  676. else:
  677. fname = None
  678. if isinstance(node, syntax.GroupNode):
  679. group[i] = self.do_dates(parser, node)
  680. elif fname in datefields:
  681. if node.has_text:
  682. group[i] = self.text_to_dt(node)
  683. elif isinstance(node, syntax.RangeNode):
  684. group[i] = self.range_to_dt(node)
  685. return group
  686. class DateTimeNode(syntax.SyntaxNode):
  687. has_fieldname = True
  688. has_boost = True
  689. def __init__(self, fieldname, dt, boost=1.0):
  690. self.fieldname = fieldname
  691. self.dt = dt
  692. self.boost = 1.0
  693. def r(self):
  694. return repr(self.dt)
  695. def query(self, parser):
  696. from whoosh import query
  697. fieldname = self.fieldname or parser.fieldname
  698. field = parser.schema[fieldname]
  699. dt = self.dt
  700. if isinstance(self.dt, datetime):
  701. btext = field.to_bytes(dt)
  702. return query.Term(fieldname, btext, boost=self.boost)
  703. elif isinstance(self.dt, timespan):
  704. return query.DateRange(fieldname, dt.start, dt.end,
  705. boost=self.boost)
  706. else:
  707. raise Exception("Unknown time object: %r" % dt)
  708. class DateRangeNode(syntax.SyntaxNode):
  709. has_fieldname = True
  710. has_boost = True
  711. def __init__(self, fieldname, start, end, boost=1.0):
  712. self.fieldname = fieldname
  713. self.start = start
  714. self.end = end
  715. self.boost = 1.0
  716. def r(self):
  717. return "%r-%r" % (self.start, self.end)
  718. def query(self, parser):
  719. from whoosh import query
  720. fieldname = self.fieldname or parser.fieldname
  721. return query.DateRange(fieldname, self.start, self.end,
  722. boost=self.boost)
  723. class DateTagger(Tagger):
  724. def __init__(self, plugin, expr):
  725. self.plugin = plugin
  726. self.expr = rcompile(expr, re.IGNORECASE)
  727. def match(self, parser, text, pos):
  728. from whoosh.fields import DATETIME
  729. match = self.expr.match(text, pos)
  730. if match:
  731. fieldname = match.group(1)
  732. dtext = match.group(2)
  733. if parser.schema and fieldname in parser.schema:
  734. field = parser.schema[fieldname]
  735. if isinstance(field, DATETIME):
  736. plugin = self.plugin
  737. dateparser = plugin.dateparser
  738. basedate = plugin.basedate
  739. d, newpos = dateparser.parse(dtext, basedate)
  740. if d:
  741. node = DateTimeNode(fieldname, d)
  742. node.startchar = match.start()
  743. node.endchar = newpos + match.start(2)
  744. return node