database.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. """
  2. Contains the :class:`database <tinydb.database.TinyDB>` and
  3. :class:`tables <tinydb.database.Table>` implementation.
  4. """
  5. import warnings
  6. from . import JSONStorage
  7. from .utils import LRUCache, iteritems, itervalues
  8. class Document(dict):
  9. """
  10. Represents a document stored in the database.
  11. This is a transparent proxy for database records. It exists
  12. to provide a way to access a record's id via ``el.doc_id``.
  13. """
  14. def __init__(self, value, doc_id, **kwargs):
  15. super(Document, self).__init__(**kwargs)
  16. self.update(value)
  17. self.doc_id = doc_id
  18. @property
  19. def eid(self):
  20. warnings.warn('eid has been renamed to doc_id', DeprecationWarning)
  21. return self.doc_id
  22. Element = Document
  23. def _get_doc_id(doc_id, eid):
  24. # Backwards-compatibility shim
  25. if eid is not None:
  26. if doc_id is not None:
  27. raise TypeError('cannot pass both eid and doc_id')
  28. warnings.warn('eid has been renamed to doc_ids', DeprecationWarning)
  29. return eid
  30. else:
  31. return doc_id
  32. def _get_doc_ids(doc_ids, eids):
  33. # Backwards-compatibility shim
  34. if eids is not None:
  35. if doc_ids is not None:
  36. raise TypeError('cannot pass both eids and doc_ids')
  37. warnings.warn('eids has been renamed to doc_ids', DeprecationWarning)
  38. return eids
  39. else:
  40. return doc_ids
  41. class DataProxy(dict):
  42. """
  43. A proxy to a table's data that remembers the storage's
  44. data dictionary.
  45. """
  46. def __init__(self, table, raw_data, **kwargs):
  47. super(DataProxy, self).__init__(**kwargs)
  48. self.update(table)
  49. self.raw_data = raw_data
  50. class StorageProxy(object):
  51. """
  52. A proxy that only allows to read a single table from a
  53. storage.
  54. """
  55. def __init__(self, storage, table_name):
  56. self._storage = storage
  57. self._table_name = table_name
  58. def read(self):
  59. raw_data = self._storage.read() or {}
  60. try:
  61. table = raw_data[self._table_name]
  62. except KeyError:
  63. raw_data.update({self._table_name: {}})
  64. self._storage.write(raw_data)
  65. return DataProxy({}, raw_data)
  66. docs = {}
  67. for key, val in iteritems(table):
  68. doc_id = int(key)
  69. docs[doc_id] = Document(val, doc_id)
  70. return DataProxy(docs, raw_data)
  71. def write(self, data):
  72. try:
  73. # Try accessing the full data dict from the data proxy
  74. raw_data = data.raw_data
  75. except AttributeError:
  76. # Not a data proxy, fall back to regular reading
  77. raw_data = self._storage.read()
  78. raw_data[self._table_name] = dict(data)
  79. self._storage.write(raw_data)
  80. def purge_table(self):
  81. try:
  82. data = self._storage.read() or {}
  83. del data[self._table_name]
  84. self._storage.write(data)
  85. except KeyError:
  86. pass
  87. class TinyDB(object):
  88. """
  89. The main class of TinyDB.
  90. Gives access to the database, provides methods to insert/search/remove
  91. and getting tables.
  92. """
  93. DEFAULT_TABLE = '_default'
  94. DEFAULT_STORAGE = JSONStorage
  95. def __init__(self, *args, **kwargs):
  96. """
  97. Create a new instance of TinyDB.
  98. All arguments and keyword arguments will be passed to the underlying
  99. storage class (default: :class:`~tinydb.storages.JSONStorage`).
  100. :param storage: The class of the storage to use. Will be initialized
  101. with ``args`` and ``kwargs``.
  102. :param default_table: The name of the default table to populate.
  103. """
  104. storage = kwargs.pop('storage', self.DEFAULT_STORAGE)
  105. default_table = kwargs.pop('default_table', self.DEFAULT_TABLE)
  106. # Prepare the storage
  107. #: :type: Storage
  108. self._storage = storage(*args, **kwargs)
  109. self._opened = True
  110. # Prepare the default table
  111. self._table_cache = {}
  112. self._table = self.table(default_table)
  113. def table(self, name=DEFAULT_TABLE, **options):
  114. """
  115. Get access to a specific table.
  116. Creates a new table, if it hasn't been created before, otherwise it
  117. returns the cached :class:`~tinydb.Table` object.
  118. :param name: The name of the table.
  119. :type name: str
  120. :param cache_size: How many query results to cache.
  121. :param table_class: Which table class to use.
  122. """
  123. if name in self._table_cache:
  124. return self._table_cache[name]
  125. table_class = options.pop('table_class', self.table_class)
  126. table = table_class(StorageProxy(self._storage, name), name, **options)
  127. self._table_cache[name] = table
  128. return table
  129. def tables(self):
  130. """
  131. Get the names of all tables in the database.
  132. :returns: a set of table names
  133. :rtype: set[str]
  134. """
  135. return set(self._storage.read())
  136. def purge_tables(self):
  137. """
  138. Purge all tables from the database. **CANNOT BE REVERSED!**
  139. """
  140. self._storage.write({})
  141. self._table_cache.clear()
  142. def purge_table(self, name):
  143. """
  144. Purge a specific table from the database. **CANNOT BE REVERSED!**
  145. :param name: The name of the table.
  146. :type name: str
  147. """
  148. if name in self._table_cache:
  149. del self._table_cache[name]
  150. proxy = StorageProxy(self._storage, name)
  151. proxy.purge_table()
  152. def close(self):
  153. """
  154. Close the database.
  155. """
  156. self._opened = False
  157. self._storage.close()
  158. def __enter__(self):
  159. return self
  160. def __exit__(self, *args):
  161. if self._opened:
  162. self.close()
  163. def __getattr__(self, name):
  164. """
  165. Forward all unknown attribute calls to the underlying standard table.
  166. """
  167. return getattr(self._table, name)
  168. # Methods that are executed on the default table
  169. # Because magic methods are not handled by __getattr__ we need to forward
  170. # them manually here
  171. def __len__(self):
  172. """
  173. Get the total number of documents in the default table.
  174. >>> db = TinyDB('db.json')
  175. >>> len(db)
  176. 0
  177. """
  178. return len(self._table)
  179. def __iter__(self):
  180. """
  181. Iter over all documents from default table.
  182. """
  183. return self._table.__iter__()
  184. class Table(object):
  185. """
  186. Represents a single TinyDB Table.
  187. """
  188. def __init__(self, storage, name, cache_size=10):
  189. """
  190. Get access to a table.
  191. :param storage: Access to the storage
  192. :type storage: StorageProxy
  193. :param name: The table name
  194. :param cache_size: Maximum size of query cache.
  195. """
  196. self._storage = storage
  197. self._name = name
  198. self._query_cache = LRUCache(capacity=cache_size)
  199. data = self._read()
  200. if data:
  201. self._last_id = max(i for i in data)
  202. else:
  203. self._last_id = 0
  204. @property
  205. def name(self):
  206. """
  207. Get the table name.
  208. """
  209. return self._name
  210. def process_elements(self, func, cond=None, doc_ids=None, eids=None):
  211. """
  212. Helper function for processing all documents specified by condition
  213. or IDs.
  214. A repeating pattern in TinyDB is to run some code on all documents
  215. that match a condition or are specified by their ID. This is
  216. implemented in this function.
  217. The function passed as ``func`` has to be a callable. Its first
  218. argument will be the data currently in the database. Its second
  219. argument is the document ID of the currently processed document.
  220. See: :meth:`~.update`, :meth:`.remove`
  221. :param func: the function to execute on every included document.
  222. first argument: all data
  223. second argument: the current eid
  224. :param cond: query that matches documents to use, or
  225. :param doc_ids: list of document IDs to use
  226. :param eids: list of document IDs to use (deprecated)
  227. :returns: the document IDs that were affected during processing
  228. """
  229. doc_ids = _get_doc_ids(doc_ids, eids)
  230. data = self._read()
  231. if doc_ids is not None:
  232. # Processed document specified by id
  233. for doc_id in doc_ids:
  234. func(data, doc_id)
  235. elif cond is not None:
  236. # Collect affected doc_ids
  237. doc_ids = []
  238. # Processed documents specified by condition
  239. for doc_id in list(data):
  240. if cond(data[doc_id]):
  241. func(data, doc_id)
  242. doc_ids.append(doc_id)
  243. else:
  244. # Processed documents
  245. doc_ids = list(data)
  246. for doc_id in doc_ids:
  247. func(data, doc_id)
  248. self._write(data)
  249. return doc_ids
  250. def clear_cache(self):
  251. """
  252. Clear the query cache.
  253. A simple helper that clears the internal query cache.
  254. """
  255. self._query_cache.clear()
  256. def _get_next_id(self):
  257. """
  258. Increment the ID used the last time and return it
  259. """
  260. current_id = self._last_id + 1
  261. self._last_id = current_id
  262. return current_id
  263. def _read(self):
  264. """
  265. Reading access to the DB.
  266. :returns: all values
  267. :rtype: DataProxy
  268. """
  269. return self._storage.read()
  270. def _write(self, values):
  271. """
  272. Writing access to the DB.
  273. :param values: the new values to write
  274. :type values: DataProxy | dict
  275. """
  276. self._query_cache.clear()
  277. self._storage.write(values)
  278. def __len__(self):
  279. """
  280. Get the total number of documents in the table.
  281. """
  282. return len(self._read())
  283. def all(self):
  284. """
  285. Get all documents stored in the table.
  286. :returns: a list with all documents.
  287. :rtype: list[Element]
  288. """
  289. return list(itervalues(self._read()))
  290. def __iter__(self):
  291. """
  292. Iter over all documents stored in the table.
  293. :returns: an iterator over all documents.
  294. :rtype: listiterator[Element]
  295. """
  296. for value in itervalues(self._read()):
  297. yield value
  298. def insert(self, document):
  299. """
  300. Insert a new document into the table.
  301. :param document: the document to insert
  302. :returns: the inserted document's ID
  303. """
  304. doc_id = self._get_next_id()
  305. if not isinstance(document, dict):
  306. raise ValueError('Document is not a dictionary')
  307. data = self._read()
  308. data[doc_id] = document
  309. self._write(data)
  310. return doc_id
  311. def insert_multiple(self, documents):
  312. """
  313. Insert multiple documents into the table.
  314. :param documents: a list of documents to insert
  315. :returns: a list containing the inserted documents' IDs
  316. """
  317. doc_ids = []
  318. data = self._read()
  319. for doc in documents:
  320. doc_id = self._get_next_id()
  321. doc_ids.append(doc_id)
  322. data[doc_id] = doc
  323. self._write(data)
  324. return doc_ids
  325. def remove(self, cond=None, doc_ids=None, eids=None):
  326. """
  327. Remove all matching documents.
  328. :param cond: the condition to check against
  329. :type cond: query
  330. :param doc_ids: a list of document IDs
  331. :type doc_ids: list
  332. :returns: a list containing the removed document's ID
  333. """
  334. doc_ids = _get_doc_ids(doc_ids, eids)
  335. if cond is None and doc_ids is None:
  336. raise RuntimeError('Use purge() to remove all documents')
  337. return self.process_elements(
  338. lambda data, doc_id: data.pop(doc_id),
  339. cond, doc_ids
  340. )
  341. def update(self, fields, cond=None, doc_ids=None, eids=None):
  342. """
  343. Update all matching documents to have a given set of fields.
  344. :param fields: the fields that the matching documents will have
  345. or a method that will update the documents
  346. :type fields: dict | dict -> None
  347. :param cond: which documents to update
  348. :type cond: query
  349. :param doc_ids: a list of document IDs
  350. :type doc_ids: list
  351. :returns: a list containing the updated document's ID
  352. """
  353. doc_ids = _get_doc_ids(doc_ids, eids)
  354. if callable(fields):
  355. return self.process_elements(
  356. lambda data, doc_id: fields(data[doc_id]),
  357. cond, doc_ids
  358. )
  359. else:
  360. return self.process_elements(
  361. lambda data, doc_id: data[doc_id].update(fields),
  362. cond, doc_ids
  363. )
  364. def write_back(self, documents, doc_ids=None, eids=None):
  365. """
  366. Write back documents by doc_id
  367. :param documents: a list of document to write back
  368. :param doc_ids: a list of documents' ID which need to be wrote back
  369. :returns: a list of documents' ID that have been written
  370. """
  371. doc_ids = _get_doc_ids(doc_ids, eids)
  372. if doc_ids is not None and not len(documents) == len(doc_ids):
  373. raise ValueError(
  374. 'The length of documents and doc_ids is not match.')
  375. if doc_ids is None:
  376. doc_ids = [doc.doc_id for doc in documents]
  377. # Since this function will write docs back like inserting, to ensure
  378. # here only process existing or removed instead of inserting new,
  379. # raise error if doc_id exceeded the last.
  380. if len(doc_ids) > 0 and max(doc_ids) > self._last_id:
  381. raise IndexError(
  382. 'ID exceeds table length, use existing or removed doc_id.')
  383. data = self._read()
  384. # Document specified by ID
  385. documents.reverse()
  386. for doc_id in doc_ids:
  387. data[doc_id] = documents.pop()
  388. self._write(data)
  389. return doc_ids
  390. def upsert(self, document, cond):
  391. """
  392. Update a document, if it exist - insert it otherwise.
  393. Note: this will update *all* documents matching the query.
  394. :param document: the document to insert or the fields to update
  395. :param cond: which document to look for
  396. :returns: a list containing the updated document's ID
  397. """
  398. updated_docs = self.update(document, cond)
  399. if updated_docs:
  400. return updated_docs
  401. else:
  402. return self.insert(document)
  403. def purge(self):
  404. """
  405. Purge the table by removing all documents.
  406. """
  407. self._write({})
  408. self._last_id = 0
  409. def search(self, cond):
  410. """
  411. Search for all documents matching a 'where' cond.
  412. :param cond: the condition to check against
  413. :type cond: Query
  414. :returns: list of matching documents
  415. :rtype: list[Element]
  416. """
  417. if cond in self._query_cache:
  418. return self._query_cache[cond][:]
  419. docs = [doc for doc in self.all() if cond(doc)]
  420. self._query_cache[cond] = docs
  421. return docs[:]
  422. def get(self, cond=None, doc_id=None, eid=None):
  423. """
  424. Get exactly one document specified by a query or and ID.
  425. Returns ``None`` if the document doesn't exist
  426. :param cond: the condition to check against
  427. :type cond: Query
  428. :param doc_id: the document's ID
  429. :returns: the document or None
  430. :rtype: Element | None
  431. """
  432. doc_id = _get_doc_id(doc_id, eid)
  433. # Cannot use process_elements here because we want to return a
  434. # specific document
  435. if doc_id is not None:
  436. # Document specified by ID
  437. return self._read().get(doc_id, None)
  438. # Document specified by condition
  439. for doc in self.all():
  440. if cond(doc):
  441. return doc
  442. def count(self, cond):
  443. """
  444. Count the documents matching a condition.
  445. :param cond: the condition use
  446. :type cond: Query
  447. """
  448. return len(self.search(cond))
  449. def contains(self, cond=None, doc_ids=None, eids=None):
  450. """
  451. Check wether the database contains a document matching a condition or
  452. an ID.
  453. If ``eids`` is set, it checks if the db contains a document with one
  454. of the specified.
  455. :param cond: the condition use
  456. :type cond: Query
  457. :param doc_ids: the document IDs to look for
  458. """
  459. doc_ids = _get_doc_ids(doc_ids, eids)
  460. if doc_ids is not None:
  461. # Documents specified by ID
  462. return any(self.get(doc_id=doc_id) for doc_id in doc_ids)
  463. # Document specified by condition
  464. return self.get(cond) is not None
  465. # Set the default table class
  466. TinyDB.table_class = Table