base.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843
  1. # Copyright 2011 Matt Chaput. All rights reserved.
  2. #
  3. # Redistribution and use in source and binary forms, with or without
  4. # modification, are permitted provided that the following conditions are met:
  5. #
  6. # 1. Redistributions of source code must retain the above copyright notice,
  7. # this list of conditions and the following disclaimer.
  8. #
  9. # 2. Redistributions in binary form must reproduce the above copyright
  10. # notice, this list of conditions and the following disclaimer in the
  11. # documentation and/or other materials provided with the distribution.
  12. #
  13. # THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
  14. # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  15. # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  16. # EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  17. # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  18. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  19. # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  20. # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  21. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  22. # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. #
  24. # The views and conclusions contained in the software and documentation are
  25. # those of the authors and should not be interpreted as representing official
  26. # policies, either expressed or implied, of Matt Chaput.
  27. """
  28. This module contains base classes/interfaces for "codec" objects.
  29. """
  30. from bisect import bisect_right
  31. from whoosh import columns
  32. from whoosh.automata import lev
  33. from whoosh.compat import abstractmethod, izip, unichr, xrange
  34. from whoosh.filedb.compound import CompoundStorage
  35. from whoosh.system import emptybytes
  36. from whoosh.util import random_name
  37. # Exceptions
  38. class OutOfOrderError(Exception):
  39. pass
  40. # Base classes
  41. class Codec(object):
  42. length_stats = True
  43. # Per document value writer
  44. @abstractmethod
  45. def per_document_writer(self, storage, segment):
  46. raise NotImplementedError
  47. # Inverted index writer
  48. @abstractmethod
  49. def field_writer(self, storage, segment):
  50. raise NotImplementedError
  51. # Postings
  52. @abstractmethod
  53. def postings_writer(self, dbfile, byteids=False):
  54. raise NotImplementedError
  55. @abstractmethod
  56. def postings_reader(self, dbfile, terminfo, format_, term=None, scorer=None):
  57. raise NotImplementedError
  58. # Index readers
  59. def automata(self, storage, segment):
  60. return Automata()
  61. @abstractmethod
  62. def terms_reader(self, storage, segment):
  63. raise NotImplementedError
  64. @abstractmethod
  65. def per_document_reader(self, storage, segment):
  66. raise NotImplementedError
  67. # Segments and generations
  68. @abstractmethod
  69. def new_segment(self, storage, indexname):
  70. raise NotImplementedError
  71. class WrappingCodec(Codec):
  72. def __init__(self, child):
  73. self._child = child
  74. def per_document_writer(self, storage, segment):
  75. return self._child.per_document_writer(storage, segment)
  76. def field_writer(self, storage, segment):
  77. return self._child.field_writer(storage, segment)
  78. def postings_writer(self, dbfile, byteids=False):
  79. return self._child.postings_writer(dbfile, byteids=byteids)
  80. def postings_reader(self, dbfile, terminfo, format_, term=None, scorer=None):
  81. return self._child.postings_reader(dbfile, terminfo, format_, term=term,
  82. scorer=scorer)
  83. def automata(self, storage, segment):
  84. return self._child.automata(storage, segment)
  85. def terms_reader(self, storage, segment):
  86. return self._child.terms_reader(storage, segment)
  87. def per_document_reader(self, storage, segment):
  88. return self._child.per_document_reader(storage, segment)
  89. def new_segment(self, storage, indexname):
  90. return self._child.new_segment(storage, indexname)
  91. # Writer classes
  92. class PerDocumentWriter(object):
  93. @abstractmethod
  94. def start_doc(self, docnum):
  95. raise NotImplementedError
  96. @abstractmethod
  97. def add_field(self, fieldname, fieldobj, value, length):
  98. raise NotImplementedError
  99. @abstractmethod
  100. def add_column_value(self, fieldname, columnobj, value):
  101. raise NotImplementedError("Codec does not implement writing columns")
  102. @abstractmethod
  103. def add_vector_items(self, fieldname, fieldobj, items):
  104. raise NotImplementedError
  105. def add_vector_matcher(self, fieldname, fieldobj, vmatcher):
  106. def readitems():
  107. while vmatcher.is_active():
  108. text = vmatcher.id()
  109. weight = vmatcher.weight()
  110. valuestring = vmatcher.value()
  111. yield (text, weight, valuestring)
  112. vmatcher.next()
  113. self.add_vector_items(fieldname, fieldobj, readitems())
  114. def finish_doc(self):
  115. pass
  116. def close(self):
  117. pass
  118. class FieldWriter(object):
  119. def add_postings(self, schema, lengths, items):
  120. # This method translates a generator of (fieldname, btext, docnum, w, v)
  121. # postings into calls to start_field(), start_term(), add(),
  122. # finish_term(), finish_field(), etc.
  123. start_field = self.start_field
  124. start_term = self.start_term
  125. add = self.add
  126. finish_term = self.finish_term
  127. finish_field = self.finish_field
  128. if lengths:
  129. dfl = lengths.doc_field_length
  130. else:
  131. dfl = lambda docnum, fieldname: 0
  132. # The fieldname of the previous posting
  133. lastfn = None
  134. # The bytes text of the previous posting
  135. lasttext = None
  136. # The (fieldname, btext) of the previous spelling posting
  137. lastspell = None
  138. # The field object for the current field
  139. fieldobj = None
  140. for fieldname, btext, docnum, weight, value in items:
  141. # Check for out-of-order postings. This is convoluted because Python
  142. # 3 removed the ability to compare a string to None
  143. if lastfn is not None and fieldname < lastfn:
  144. raise OutOfOrderError("Field %r .. %r" % (lastfn, fieldname))
  145. if fieldname == lastfn and lasttext and btext < lasttext:
  146. raise OutOfOrderError("Term %s:%r .. %s:%r"
  147. % (lastfn, lasttext, fieldname, btext))
  148. # If the fieldname of this posting is different from the last one,
  149. # tell the writer we're starting a new field
  150. if fieldname != lastfn:
  151. if lasttext is not None:
  152. finish_term()
  153. if lastfn is not None and fieldname != lastfn:
  154. finish_field()
  155. fieldobj = schema[fieldname]
  156. start_field(fieldname, fieldobj)
  157. lastfn = fieldname
  158. lasttext = None
  159. # HACK: items where docnum == -1 indicate words that should be added
  160. # to the spelling graph, not the postings
  161. if docnum == -1:
  162. # spellterm = (fieldname, btext)
  163. # # There can be duplicates of spelling terms, so only add a spell
  164. # # term if it's greater than the last one
  165. # if lastspell is None or spellterm > lastspell:
  166. # spellword = fieldobj.from_bytes(btext)
  167. # self.add_spell_word(fieldname, spellword)
  168. # lastspell = spellterm
  169. continue
  170. # If this term is different from the term in the previous posting,
  171. # tell the writer to start a new term
  172. if btext != lasttext:
  173. if lasttext is not None:
  174. finish_term()
  175. start_term(btext)
  176. lasttext = btext
  177. # Add this posting
  178. length = dfl(docnum, fieldname)
  179. if value is None:
  180. value = emptybytes
  181. add(docnum, weight, value, length)
  182. if lasttext is not None:
  183. finish_term()
  184. if lastfn is not None:
  185. finish_field()
  186. @abstractmethod
  187. def start_field(self, fieldname, fieldobj):
  188. raise NotImplementedError
  189. @abstractmethod
  190. def start_term(self, text):
  191. raise NotImplementedError
  192. @abstractmethod
  193. def add(self, docnum, weight, vbytes, length):
  194. raise NotImplementedError
  195. def add_spell_word(self, fieldname, text):
  196. raise NotImplementedError
  197. @abstractmethod
  198. def finish_term(self):
  199. raise NotImplementedError
  200. def finish_field(self):
  201. pass
  202. def close(self):
  203. pass
  204. # Postings
  205. class PostingsWriter(object):
  206. @abstractmethod
  207. def start_postings(self, format_, terminfo):
  208. raise NotImplementedError
  209. @abstractmethod
  210. def add_posting(self, id_, weight, vbytes, length=None):
  211. raise NotImplementedError
  212. def finish_postings(self):
  213. pass
  214. @abstractmethod
  215. def written(self):
  216. """Returns True if this object has already written to disk.
  217. """
  218. raise NotImplementedError
  219. # Reader classes
  220. class FieldCursor(object):
  221. def first(self):
  222. raise NotImplementedError
  223. def find(self, string):
  224. raise NotImplementedError
  225. def next(self):
  226. raise NotImplementedError
  227. def term(self):
  228. raise NotImplementedError
  229. class TermsReader(object):
  230. @abstractmethod
  231. def __contains__(self, term):
  232. raise NotImplementedError
  233. @abstractmethod
  234. def cursor(self, fieldname, fieldobj):
  235. raise NotImplementedError
  236. @abstractmethod
  237. def terms(self):
  238. raise NotImplementedError
  239. @abstractmethod
  240. def terms_from(self, fieldname, prefix):
  241. raise NotImplementedError
  242. @abstractmethod
  243. def items(self):
  244. raise NotImplementedError
  245. @abstractmethod
  246. def items_from(self, fieldname, prefix):
  247. raise NotImplementedError
  248. @abstractmethod
  249. def term_info(self, fieldname, text):
  250. raise NotImplementedError
  251. @abstractmethod
  252. def frequency(self, fieldname, text):
  253. return self.term_info(fieldname, text).weight()
  254. @abstractmethod
  255. def doc_frequency(self, fieldname, text):
  256. return self.term_info(fieldname, text).doc_frequency()
  257. @abstractmethod
  258. def matcher(self, fieldname, text, format_, scorer=None):
  259. raise NotImplementedError
  260. @abstractmethod
  261. def indexed_field_names(self):
  262. raise NotImplementedError
  263. def close(self):
  264. pass
  265. class Automata(object):
  266. @staticmethod
  267. def levenshtein_dfa(uterm, maxdist, prefix=0):
  268. return lev.levenshtein_automaton(uterm, maxdist, prefix).to_dfa()
  269. @staticmethod
  270. def find_matches(dfa, cur):
  271. unull = unichr(0)
  272. term = cur.text()
  273. if term is None:
  274. return
  275. match = dfa.next_valid_string(term)
  276. while match:
  277. cur.find(match)
  278. term = cur.text()
  279. if term is None:
  280. return
  281. if match == term:
  282. yield match
  283. term += unull
  284. match = dfa.next_valid_string(term)
  285. def terms_within(self, fieldcur, uterm, maxdist, prefix=0):
  286. dfa = self.levenshtein_dfa(uterm, maxdist, prefix)
  287. return self.find_matches(dfa, fieldcur)
  288. # Per-doc value reader
  289. class PerDocumentReader(object):
  290. def close(self):
  291. pass
  292. @abstractmethod
  293. def doc_count(self):
  294. raise NotImplementedError
  295. @abstractmethod
  296. def doc_count_all(self):
  297. raise NotImplementedError
  298. # Deletions
  299. @abstractmethod
  300. def has_deletions(self):
  301. raise NotImplementedError
  302. @abstractmethod
  303. def is_deleted(self, docnum):
  304. raise NotImplementedError
  305. @abstractmethod
  306. def deleted_docs(self):
  307. raise NotImplementedError
  308. def all_doc_ids(self):
  309. """
  310. Returns an iterator of all (undeleted) document IDs in the reader.
  311. """
  312. is_deleted = self.is_deleted
  313. return (docnum for docnum in xrange(self.doc_count_all())
  314. if not is_deleted(docnum))
  315. def iter_docs(self):
  316. for docnum in self.all_doc_ids():
  317. yield docnum, self.stored_fields(docnum)
  318. # Columns
  319. def supports_columns(self):
  320. return False
  321. def has_column(self, fieldname):
  322. return False
  323. def list_columns(self):
  324. raise NotImplementedError
  325. # Don't need to override this if supports_columns() returns False
  326. def column_reader(self, fieldname, column):
  327. raise NotImplementedError
  328. # Bitmaps
  329. def field_docs(self, fieldname):
  330. return None
  331. # Lengths
  332. @abstractmethod
  333. def doc_field_length(self, docnum, fieldname, default=0):
  334. raise NotImplementedError
  335. @abstractmethod
  336. def field_length(self, fieldname):
  337. raise NotImplementedError
  338. @abstractmethod
  339. def min_field_length(self, fieldname):
  340. raise NotImplementedError
  341. @abstractmethod
  342. def max_field_length(self, fieldname):
  343. raise NotImplementedError
  344. # Vectors
  345. def has_vector(self, docnum, fieldname):
  346. return False
  347. # Don't need to override this if has_vector() always returns False
  348. def vector(self, docnum, fieldname, format_):
  349. raise NotImplementedError
  350. # Stored
  351. @abstractmethod
  352. def stored_fields(self, docnum):
  353. raise NotImplementedError
  354. def all_stored_fields(self):
  355. for docnum in self.all_doc_ids():
  356. yield self.stored_fields(docnum)
  357. # Segment base class
  358. class Segment(object):
  359. """Do not instantiate this object directly. It is used by the Index object
  360. to hold information about a segment. A list of objects of this class are
  361. pickled as part of the TOC file.
  362. The TOC file stores a minimal amount of information -- mostly a list of
  363. Segment objects. Segments are the real reverse indexes. Having multiple
  364. segments allows quick incremental indexing: just create a new segment for
  365. the new documents, and have the index overlay the new segment over previous
  366. ones for purposes of reading/search. "Optimizing" the index combines the
  367. contents of existing segments into one (removing any deleted documents
  368. along the way).
  369. """
  370. # Extension for compound segment files
  371. COMPOUND_EXT = ".seg"
  372. # self.indexname
  373. # self.segid
  374. def __init__(self, indexname):
  375. self.indexname = indexname
  376. self.segid = self._random_id()
  377. self.compound = False
  378. @classmethod
  379. def _random_id(cls, size=16):
  380. return random_name(size=size)
  381. def __repr__(self):
  382. return "<%s %s>" % (self.__class__.__name__, self.segment_id())
  383. def codec(self):
  384. raise NotImplementedError
  385. def index_name(self):
  386. return self.indexname
  387. def segment_id(self):
  388. if hasattr(self, "name"):
  389. # Old segment class
  390. return self.name
  391. else:
  392. return "%s_%s" % (self.index_name(), self.segid)
  393. def is_compound(self):
  394. if not hasattr(self, "compound"):
  395. return False
  396. return self.compound
  397. # File convenience methods
  398. def make_filename(self, ext):
  399. return "%s%s" % (self.segment_id(), ext)
  400. def list_files(self, storage):
  401. prefix = "%s." % self.segment_id()
  402. return [name for name in storage.list() if name.startswith(prefix)]
  403. def create_file(self, storage, ext, **kwargs):
  404. """Convenience method to create a new file in the given storage named
  405. with this segment's ID and the given extension. Any keyword arguments
  406. are passed to the storage's create_file method.
  407. """
  408. fname = self.make_filename(ext)
  409. return storage.create_file(fname, **kwargs)
  410. def open_file(self, storage, ext, **kwargs):
  411. """Convenience method to open a file in the given storage named with
  412. this segment's ID and the given extension. Any keyword arguments are
  413. passed to the storage's open_file method.
  414. """
  415. fname = self.make_filename(ext)
  416. return storage.open_file(fname, **kwargs)
  417. def create_compound_file(self, storage):
  418. segfiles = self.list_files(storage)
  419. assert not any(name.endswith(self.COMPOUND_EXT) for name in segfiles)
  420. cfile = self.create_file(storage, self.COMPOUND_EXT)
  421. CompoundStorage.assemble(cfile, storage, segfiles)
  422. for name in segfiles:
  423. storage.delete_file(name)
  424. self.compound = True
  425. def open_compound_file(self, storage):
  426. name = self.make_filename(self.COMPOUND_EXT)
  427. dbfile = storage.open_file(name)
  428. return CompoundStorage(dbfile, use_mmap=storage.supports_mmap)
  429. # Abstract methods
  430. @abstractmethod
  431. def doc_count_all(self):
  432. """
  433. Returns the total number of documents, DELETED OR UNDELETED, in this
  434. segment.
  435. """
  436. raise NotImplementedError
  437. def doc_count(self):
  438. """
  439. Returns the number of (undeleted) documents in this segment.
  440. """
  441. return self.doc_count_all() - self.deleted_count()
  442. def set_doc_count(self, doccount):
  443. raise NotImplementedError
  444. def has_deletions(self):
  445. """
  446. Returns True if any documents in this segment are deleted.
  447. """
  448. return self.deleted_count() > 0
  449. @abstractmethod
  450. def deleted_count(self):
  451. """
  452. Returns the total number of deleted documents in this segment.
  453. """
  454. raise NotImplementedError
  455. @abstractmethod
  456. def deleted_docs(self):
  457. raise NotImplementedError
  458. @abstractmethod
  459. def delete_document(self, docnum, delete=True):
  460. """Deletes the given document number. The document is not actually
  461. removed from the index until it is optimized.
  462. :param docnum: The document number to delete.
  463. :param delete: If False, this undeletes a deleted document.
  464. """
  465. raise NotImplementedError
  466. @abstractmethod
  467. def is_deleted(self, docnum):
  468. """
  469. Returns True if the given document number is deleted.
  470. """
  471. raise NotImplementedError
  472. def should_assemble(self):
  473. return True
  474. # Wrapping Segment
  475. class WrappingSegment(Segment):
  476. def __init__(self, child):
  477. self._child = child
  478. def codec(self):
  479. return self._child.codec()
  480. def index_name(self):
  481. return self._child.index_name()
  482. def segment_id(self):
  483. return self._child.segment_id()
  484. def is_compound(self):
  485. return self._child.is_compound()
  486. def should_assemble(self):
  487. return self._child.should_assemble()
  488. def make_filename(self, ext):
  489. return self._child.make_filename(ext)
  490. def list_files(self, storage):
  491. return self._child.list_files(storage)
  492. def create_file(self, storage, ext, **kwargs):
  493. return self._child.create_file(storage, ext, **kwargs)
  494. def open_file(self, storage, ext, **kwargs):
  495. return self._child.open_file(storage, ext, **kwargs)
  496. def create_compound_file(self, storage):
  497. return self._child.create_compound_file(storage)
  498. def open_compound_file(self, storage):
  499. return self._child.open_compound_file(storage)
  500. def delete_document(self, docnum, delete=True):
  501. return self._child.delete_document(docnum, delete=delete)
  502. def has_deletions(self):
  503. return self._child.has_deletions()
  504. def deleted_count(self):
  505. return self._child.deleted_count()
  506. def deleted_docs(self):
  507. return self._child.deleted_docs()
  508. def is_deleted(self, docnum):
  509. return self._child.is_deleted(docnum)
  510. def set_doc_count(self, doccount):
  511. self._child.set_doc_count(doccount)
  512. def doc_count(self):
  513. return self._child.doc_count()
  514. def doc_count_all(self):
  515. return self._child.doc_count_all()
  516. # Multi per doc reader
  517. class MultiPerDocumentReader(PerDocumentReader):
  518. def __init__(self, readers, offset=0):
  519. self._readers = readers
  520. self._doc_offsets = []
  521. self._doccount = 0
  522. for pdr in readers:
  523. self._doc_offsets.append(self._doccount)
  524. self._doccount += pdr.doc_count_all()
  525. self.is_closed = False
  526. def close(self):
  527. for r in self._readers:
  528. r.close()
  529. self.is_closed = True
  530. def doc_count_all(self):
  531. return self._doccount
  532. def doc_count(self):
  533. total = 0
  534. for r in self._readers:
  535. total += r.doc_count()
  536. return total
  537. def _document_reader(self, docnum):
  538. return max(0, bisect_right(self._doc_offsets, docnum) - 1)
  539. def _reader_and_docnum(self, docnum):
  540. rnum = self._document_reader(docnum)
  541. offset = self._doc_offsets[rnum]
  542. return rnum, docnum - offset
  543. # Deletions
  544. def has_deletions(self):
  545. return any(r.has_deletions() for r in self._readers)
  546. def is_deleted(self, docnum):
  547. x, y = self._reader_and_docnum(docnum)
  548. return self._readers[x].is_deleted(y)
  549. def deleted_docs(self):
  550. for r, offset in izip(self._readers, self._doc_offsets):
  551. for docnum in r.deleted_docs():
  552. yield docnum + offset
  553. def all_doc_ids(self):
  554. for r, offset in izip(self._readers, self._doc_offsets):
  555. for docnum in r.all_doc_ids():
  556. yield docnum + offset
  557. # Columns
  558. def has_column(self, fieldname):
  559. return any(r.has_column(fieldname) for r in self._readers)
  560. def column_reader(self, fieldname, column):
  561. if not self.has_column(fieldname):
  562. raise ValueError("No column %r" % (fieldname,))
  563. default = column.default_value()
  564. colreaders = []
  565. for r in self._readers:
  566. if r.has_column(fieldname):
  567. cr = r.column_reader(fieldname, column)
  568. else:
  569. cr = columns.EmptyColumnReader(default, r.doc_count_all())
  570. colreaders.append(cr)
  571. if len(colreaders) == 1:
  572. return colreaders[0]
  573. else:
  574. return columns.MultiColumnReader(colreaders)
  575. # Lengths
  576. def doc_field_length(self, docnum, fieldname, default=0):
  577. x, y = self._reader_and_docnum(docnum)
  578. return self._readers[x].doc_field_length(y, fieldname, default)
  579. def field_length(self, fieldname):
  580. total = 0
  581. for r in self._readers:
  582. total += r.field_length(fieldname)
  583. return total
  584. def min_field_length(self):
  585. return min(r.min_field_length() for r in self._readers)
  586. def max_field_length(self):
  587. return max(r.max_field_length() for r in self._readers)
  588. # Extended base classes
  589. class PerDocWriterWithColumns(PerDocumentWriter):
  590. def __init__(self):
  591. PerDocumentWriter.__init__(self)
  592. # Implementations need to set these attributes
  593. self._storage = None
  594. self._segment = None
  595. self._docnum = None
  596. @abstractmethod
  597. def _has_column(self, fieldname):
  598. raise NotImplementedError
  599. @abstractmethod
  600. def _create_column(self, fieldname, column):
  601. raise NotImplementedError
  602. @abstractmethod
  603. def _get_column(self, fieldname):
  604. raise NotImplementedError
  605. def add_column_value(self, fieldname, column, value):
  606. if not self._has_column(fieldname):
  607. self._create_column(fieldname, column)
  608. self._get_column(fieldname).add(self._docnum, value)
  609. # FieldCursor implementations
  610. class EmptyCursor(FieldCursor):
  611. def first(self):
  612. return None
  613. def find(self, term):
  614. return None
  615. def next(self):
  616. return None
  617. def text(self):
  618. return None
  619. def term_info(self):
  620. return None
  621. def is_valid(self):
  622. return False