zippath.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. # -*- test-case-name: twisted.python.test.test_zippath -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. This module contains implementations of L{IFilePath} for zip files.
  6. See the constructor of L{ZipArchive} for use.
  7. """
  8. from __future__ import absolute_import, division
  9. import os
  10. import time
  11. import errno
  12. from zipfile import ZipFile
  13. from twisted.python.compat import comparable, cmp
  14. from twisted.python.filepath import IFilePath, FilePath, AbstractFilePath
  15. from twisted.python.filepath import _coerceToFilesystemEncoding
  16. from twisted.python.filepath import UnlistableError
  17. from zope.interface import implementer
  18. ZIP_PATH_SEP = '/' # In zipfiles, "/" is universally used as the
  19. # path separator, regardless of platform.
  20. @comparable
  21. @implementer(IFilePath)
  22. class ZipPath(AbstractFilePath):
  23. """
  24. I represent a file or directory contained within a zip file.
  25. """
  26. def __init__(self, archive, pathInArchive):
  27. """
  28. Don't construct me directly. Use C{ZipArchive.child()}.
  29. @param archive: a L{ZipArchive} instance.
  30. @param pathInArchive: a ZIP_PATH_SEP-separated string.
  31. """
  32. self.archive = archive
  33. self.pathInArchive = pathInArchive
  34. # self.path pretends to be os-specific because that's the way the
  35. # 'zipimport' module does it.
  36. sep = _coerceToFilesystemEncoding(pathInArchive, ZIP_PATH_SEP)
  37. archiveFilename = _coerceToFilesystemEncoding(
  38. pathInArchive, archive.zipfile.filename)
  39. self.path = os.path.join(archiveFilename,
  40. *(self.pathInArchive.split(sep)))
  41. def __cmp__(self, other):
  42. if not isinstance(other, ZipPath):
  43. return NotImplemented
  44. return cmp((self.archive, self.pathInArchive),
  45. (other.archive, other.pathInArchive))
  46. def __repr__(self):
  47. parts = [_coerceToFilesystemEncoding(
  48. self.sep, os.path.abspath(self.archive.path))]
  49. parts.extend(self.pathInArchive.split(self.sep))
  50. ossep = _coerceToFilesystemEncoding(self.sep, os.sep)
  51. return "ZipPath(%r)" % (ossep.join(parts),)
  52. @property
  53. def sep(self):
  54. """
  55. Return a zip directory separator.
  56. @return: The zip directory separator.
  57. @returntype: The same type as C{self.path}.
  58. """
  59. return _coerceToFilesystemEncoding(self.path, ZIP_PATH_SEP)
  60. def parent(self):
  61. splitup = self.pathInArchive.split(self.sep)
  62. if len(splitup) == 1:
  63. return self.archive
  64. return ZipPath(self.archive, self.sep.join(splitup[:-1]))
  65. def child(self, path):
  66. """
  67. Return a new ZipPath representing a path in C{self.archive} which is
  68. a child of this path.
  69. @note: Requesting the C{".."} (or other special name) child will not
  70. cause L{InsecurePath} to be raised since these names do not have
  71. any special meaning inside a zip archive. Be particularly
  72. careful with the C{path} attribute (if you absolutely must use
  73. it) as this means it may include special names with special
  74. meaning outside of the context of a zip archive.
  75. """
  76. joiner = _coerceToFilesystemEncoding(path, ZIP_PATH_SEP)
  77. pathInArchive = _coerceToFilesystemEncoding(path, self.pathInArchive)
  78. return ZipPath(self.archive, joiner.join([pathInArchive, path]))
  79. def sibling(self, path):
  80. return self.parent().child(path)
  81. def exists(self):
  82. return self.isdir() or self.isfile()
  83. def isdir(self):
  84. return self.pathInArchive in self.archive.childmap
  85. def isfile(self):
  86. return self.pathInArchive in self.archive.zipfile.NameToInfo
  87. def islink(self):
  88. return False
  89. def listdir(self):
  90. if self.exists():
  91. if self.isdir():
  92. return list(self.archive.childmap[self.pathInArchive].keys())
  93. else:
  94. raise UnlistableError(
  95. OSError(errno.ENOTDIR, "Leaf zip entry listed"))
  96. else:
  97. raise UnlistableError(
  98. OSError(errno.ENOENT, "Non-existent zip entry listed"))
  99. def splitext(self):
  100. """
  101. Return a value similar to that returned by C{os.path.splitext}.
  102. """
  103. # This happens to work out because of the fact that we use OS-specific
  104. # path separators in the constructor to construct our fake 'path'
  105. # attribute.
  106. return os.path.splitext(self.path)
  107. def basename(self):
  108. return self.pathInArchive.split(self.sep)[-1]
  109. def dirname(self):
  110. # XXX NOTE: This API isn't a very good idea on filepath, but it's even
  111. # less meaningful here.
  112. return self.parent().path
  113. def open(self, mode="r"):
  114. pathInArchive = _coerceToFilesystemEncoding('', self.pathInArchive)
  115. return self.archive.zipfile.open(pathInArchive, mode=mode)
  116. def changed(self):
  117. pass
  118. def getsize(self):
  119. """
  120. Retrieve this file's size.
  121. @return: file size, in bytes
  122. """
  123. pathInArchive = _coerceToFilesystemEncoding("", self.pathInArchive)
  124. return self.archive.zipfile.NameToInfo[pathInArchive].file_size
  125. def getAccessTime(self):
  126. """
  127. Retrieve this file's last access-time. This is the same as the last access
  128. time for the archive.
  129. @return: a number of seconds since the epoch
  130. """
  131. return self.archive.getAccessTime()
  132. def getModificationTime(self):
  133. """
  134. Retrieve this file's last modification time. This is the time of
  135. modification recorded in the zipfile.
  136. @return: a number of seconds since the epoch.
  137. """
  138. pathInArchive = _coerceToFilesystemEncoding("", self.pathInArchive)
  139. return time.mktime(
  140. self.archive.zipfile.NameToInfo[pathInArchive].date_time
  141. + (0, 0, 0))
  142. def getStatusChangeTime(self):
  143. """
  144. Retrieve this file's last modification time. This name is provided for
  145. compatibility, and returns the same value as getmtime.
  146. @return: a number of seconds since the epoch.
  147. """
  148. return self.getModificationTime()
  149. class ZipArchive(ZipPath):
  150. """
  151. I am a L{FilePath}-like object which can wrap a zip archive as if it were a
  152. directory.
  153. It works similarly to L{FilePath} in L{bytes} and L{unicode} handling --
  154. instantiating with a L{bytes} will return a "bytes mode" L{ZipArchive},
  155. and instantiating with a L{unicode} will return a "text mode"
  156. L{ZipArchive}. Methods that return new L{ZipArchive} or L{ZipPath}
  157. instances will be in the mode of the argument to the creator method,
  158. converting if required.
  159. """
  160. archive = property(lambda self: self)
  161. def __init__(self, archivePathname):
  162. """
  163. Create a ZipArchive, treating the archive at archivePathname as a zip
  164. file.
  165. @param archivePathname: a L{bytes} or L{unicode}, naming a path in the
  166. filesystem.
  167. """
  168. self.path = archivePathname
  169. self.zipfile = ZipFile(_coerceToFilesystemEncoding('',
  170. archivePathname))
  171. self.pathInArchive = _coerceToFilesystemEncoding(archivePathname, '')
  172. # zipfile is already wasting O(N) memory on cached ZipInfo instances,
  173. # so there's no sense in trying to do this lazily or intelligently
  174. self.childmap = {} # map parent: list of children
  175. for name in self.zipfile.namelist():
  176. name = _coerceToFilesystemEncoding(self.path, name).split(self.sep)
  177. for x in range(len(name)):
  178. child = name[-x]
  179. parent = self.sep.join(name[:-x])
  180. if parent not in self.childmap:
  181. self.childmap[parent] = {}
  182. self.childmap[parent][child] = 1
  183. parent = _coerceToFilesystemEncoding(archivePathname, '')
  184. def child(self, path):
  185. """
  186. Create a ZipPath pointing at a path within the archive.
  187. @param path: a L{bytes} or L{unicode} with no path separators in it
  188. (either '/' or the system path separator, if it's different).
  189. """
  190. return ZipPath(self, path)
  191. def exists(self):
  192. """
  193. Returns C{True} if the underlying archive exists.
  194. """
  195. return FilePath(self.zipfile.filename).exists()
  196. def getAccessTime(self):
  197. """
  198. Return the archive file's last access time.
  199. """
  200. return FilePath(self.zipfile.filename).getAccessTime()
  201. def getModificationTime(self):
  202. """
  203. Return the archive file's modification time.
  204. """
  205. return FilePath(self.zipfile.filename).getModificationTime()
  206. def getStatusChangeTime(self):
  207. """
  208. Return the archive file's status change time.
  209. """
  210. return FilePath(self.zipfile.filename).getStatusChangeTime()
  211. def __repr__(self):
  212. return 'ZipArchive(%r)' % (os.path.abspath(self.path),)
  213. __all__ = ['ZipArchive', 'ZipPath']