gae.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. """
  2. This module contains EXPERIMENTAL support for storing a Whoosh index's files in
  3. the Google App Engine blobstore. This will use a lot of RAM since all files are
  4. loaded into RAM, but it potentially useful as a workaround for the lack of file
  5. storage in Google App Engine.
  6. Use at your own risk, but please report any problems to me so I can fix them.
  7. To create a new index::
  8. from whoosh.filedb.gae import DatastoreStorage
  9. ix = DatastoreStorage().create_index(schema)
  10. To open an existing index::
  11. ix = DatastoreStorage().open_index()
  12. """
  13. import time
  14. from google.appengine.api import memcache # @UnresolvedImport
  15. from google.appengine.ext import db # @UnresolvedImport
  16. from whoosh.compat import BytesIO
  17. from whoosh.index import TOC, FileIndex, _DEF_INDEX_NAME
  18. from whoosh.filedb.filestore import ReadOnlyError, Storage
  19. from whoosh.filedb.structfile import StructFile
  20. class DatastoreFile(db.Model):
  21. """A file-like object that is backed by a BytesIO() object whose contents
  22. is loaded from a BlobProperty in the app engine datastore.
  23. """
  24. value = db.BlobProperty()
  25. mtime = db.IntegerProperty(default=0)
  26. def __init__(self, *args, **kwargs):
  27. super(DatastoreFile, self).__init__(*args, **kwargs)
  28. self.data = BytesIO()
  29. @classmethod
  30. def loadfile(cls, name):
  31. value = memcache.get(name, namespace="DatastoreFile")
  32. if value is None:
  33. file = cls.get_by_key_name(name)
  34. memcache.set(name, file.value, namespace="DatastoreFile")
  35. else:
  36. file = cls(value=value)
  37. file.data = BytesIO(file.value)
  38. return file
  39. def close(self):
  40. oldvalue = self.value
  41. self.value = self.getvalue()
  42. if oldvalue != self.value:
  43. self.mtime = int(time.time())
  44. self.put()
  45. memcache.set(self.key().id_or_name(), self.value,
  46. namespace="DatastoreFile")
  47. def tell(self):
  48. return self.data.tell()
  49. def write(self, data):
  50. return self.data.write(data)
  51. def read(self, length):
  52. return self.data.read(length)
  53. def seek(self, *args):
  54. return self.data.seek(*args)
  55. def readline(self):
  56. return self.data.readline()
  57. def getvalue(self):
  58. return self.data.getvalue()
  59. class MemcacheLock(object):
  60. def __init__(self, name):
  61. self.name = name
  62. def acquire(self, blocking=False):
  63. val = memcache.add(self.name, "L", 360, namespace="whooshlocks")
  64. if blocking and not val:
  65. # Simulate blocking by retrying the acquire over and over
  66. import time
  67. while not val:
  68. time.sleep(0.1)
  69. val = memcache.add(self.name, "", 360, namespace="whooshlocks")
  70. return val
  71. def release(self):
  72. memcache.delete(self.name, namespace="whooshlocks")
  73. class DatastoreStorage(Storage):
  74. """An implementation of :class:`whoosh.store.Storage` that stores files in
  75. the app engine datastore as blob properties.
  76. """
  77. def create_index(self, schema, indexname=_DEF_INDEX_NAME):
  78. if self.readonly:
  79. raise ReadOnlyError
  80. TOC.create(self, schema, indexname)
  81. return FileIndex(self, schema, indexname)
  82. def open_index(self, indexname=_DEF_INDEX_NAME, schema=None):
  83. return FileIndex(self, schema=schema, indexname=indexname)
  84. def list(self):
  85. query = DatastoreFile.all()
  86. keys = []
  87. for file in query:
  88. keys.append(file.key().id_or_name())
  89. return keys
  90. def clean(self):
  91. pass
  92. def total_size(self):
  93. return sum(self.file_length(f) for f in self.list())
  94. def file_exists(self, name):
  95. return DatastoreFile.get_by_key_name(name) is not None
  96. def file_modified(self, name):
  97. return DatastoreFile.get_by_key_name(name).mtime
  98. def file_length(self, name):
  99. return len(DatastoreFile.get_by_key_name(name).value)
  100. def delete_file(self, name):
  101. memcache.delete(name, namespace="DatastoreFile")
  102. return DatastoreFile.get_by_key_name(name).delete()
  103. def rename_file(self, name, newname, safe=False):
  104. file = DatastoreFile.get_by_key_name(name)
  105. newfile = DatastoreFile(key_name=newname)
  106. newfile.value = file.value
  107. newfile.mtime = file.mtime
  108. newfile.put()
  109. file.delete()
  110. def create_file(self, name, **kwargs):
  111. f = StructFile(DatastoreFile(key_name=name), name=name,
  112. onclose=lambda sfile: sfile.file.close())
  113. return f
  114. def open_file(self, name, *args, **kwargs):
  115. return StructFile(DatastoreFile.loadfile(name))
  116. def lock(self, name):
  117. return MemcacheLock(name)
  118. def temp_storage(self, name=None):
  119. tempstore = DatastoreStorage()
  120. return tempstore.create()