py7zlib.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903
  1. #!/usr/bin/python -u
  2. #
  3. # Python Bindings for LZMA
  4. #
  5. # Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de
  6. # 7-Zip Copyright (C) 1999-2010 Igor Pavlov
  7. # LZMA SDK Copyright (C) 1999-2010 Igor Pavlov
  8. #
  9. # This library is free software; you can redistribute it and/or
  10. # modify it under the terms of the GNU Lesser General Public
  11. # License as published by the Free Software Foundation; either
  12. # version 2.1 of the License, or (at your option) any later version.
  13. #
  14. # This library is distributed in the hope that it will be useful,
  15. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. # Lesser General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Lesser General Public
  20. # License along with this library; if not, write to the Free Software
  21. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  22. #
  23. # $Id$
  24. #
  25. """Read from and write to 7zip format archives.
  26. """
  27. from array import array
  28. from binascii import unhexlify
  29. from datetime import datetime
  30. import pylzma
  31. from struct import pack, unpack
  32. from zlib import crc32
  33. import zlib
  34. import bz2
  35. import os
  36. import sys
  37. try:
  38. from io import BytesIO
  39. except ImportError:
  40. from cStringIO import StringIO as BytesIO
  41. try:
  42. from functools import reduce
  43. except ImportError:
  44. # reduce is available in functools starting with Python 2.6
  45. pass
  46. try:
  47. from pytz import UTC
  48. except ImportError:
  49. # pytz is optional, define own "UTC" timestamp
  50. # reference implementation from Python documentation
  51. from datetime import timedelta, tzinfo
  52. ZERO = timedelta(0)
  53. class UTC(tzinfo):
  54. """UTC"""
  55. def utcoffset(self, dt):
  56. return ZERO
  57. def tzname(self, dt):
  58. return "UTC"
  59. def dst(self, dt):
  60. return ZERO
  61. def _call__(self):
  62. return self
  63. UTC = UTC()
  64. try:
  65. unicode
  66. except NameError:
  67. # Python 3.x
  68. def unicode(s, encoding):
  69. return s
  70. else:
  71. def bytes(s, encoding):
  72. return s
  73. try:
  74. long
  75. except NameError:
  76. # Python 3.x
  77. long = int
  78. try:
  79. xrange
  80. except NameError:
  81. # Python 3.x
  82. xrange = range
  83. IS_PYTHON3 = sys.version_info[0] == 3
  84. NEED_BYTESWAP = sys.byteorder != 'little'
  85. if array('L').itemsize == 4:
  86. ARRAY_TYPE_UINT32 = 'L'
  87. else:
  88. assert array('I').itemsize == 4
  89. ARRAY_TYPE_UINT32 = 'I'
  90. READ_BLOCKSIZE = 16384
  91. MAGIC_7Z = unhexlify('377abcaf271c') # '7z\xbc\xaf\x27\x1c'
  92. PROPERTY_END = unhexlify('00') # '\x00'
  93. PROPERTY_HEADER = unhexlify('01') # '\x01'
  94. PROPERTY_ARCHIVE_PROPERTIES = unhexlify('02') # '\x02'
  95. PROPERTY_ADDITIONAL_STREAMS_INFO = unhexlify('03') # '\x03'
  96. PROPERTY_MAIN_STREAMS_INFO = unhexlify('04') # '\x04'
  97. PROPERTY_FILES_INFO = unhexlify('05') # '\x05'
  98. PROPERTY_PACK_INFO = unhexlify('06') # '\x06'
  99. PROPERTY_UNPACK_INFO = unhexlify('07') # '\x07'
  100. PROPERTY_SUBSTREAMS_INFO = unhexlify('08') # '\x08'
  101. PROPERTY_SIZE = unhexlify('09') # '\x09'
  102. PROPERTY_CRC = unhexlify('0a') # '\x0a'
  103. PROPERTY_FOLDER = unhexlify('0b') # '\x0b'
  104. PROPERTY_CODERS_UNPACK_SIZE = unhexlify('0c') # '\x0c'
  105. PROPERTY_NUM_UNPACK_STREAM = unhexlify('0d') # '\x0d'
  106. PROPERTY_EMPTY_STREAM = unhexlify('0e') # '\x0e'
  107. PROPERTY_EMPTY_FILE = unhexlify('0f') # '\x0f'
  108. PROPERTY_ANTI = unhexlify('10') # '\x10'
  109. PROPERTY_NAME = unhexlify('11') # '\x11'
  110. PROPERTY_CREATION_TIME = unhexlify('12') # '\x12'
  111. PROPERTY_LAST_ACCESS_TIME = unhexlify('13') # '\x13'
  112. PROPERTY_LAST_WRITE_TIME = unhexlify('14') # '\x14'
  113. PROPERTY_ATTRIBUTES = unhexlify('15') # '\x15'
  114. PROPERTY_COMMENT = unhexlify('16') # '\x16'
  115. PROPERTY_ENCODED_HEADER = unhexlify('17') # '\x17'
  116. COMPRESSION_METHOD_COPY = unhexlify('00') # '\x00'
  117. COMPRESSION_METHOD_LZMA = unhexlify('03') # '\x03'
  118. COMPRESSION_METHOD_CRYPTO = unhexlify('06') # '\x06'
  119. COMPRESSION_METHOD_MISC = unhexlify('04') # '\x04'
  120. COMPRESSION_METHOD_MISC_ZIP = unhexlify('0401') # '\x04\x01'
  121. COMPRESSION_METHOD_MISC_BZIP = unhexlify('0402') # '\x04\x02'
  122. COMPRESSION_METHOD_7Z_AES256_SHA256 = unhexlify('06f10701') # '\x06\xf1\x07\x01'
  123. # number of seconds between 1601/01/01 and 1970/01/01 (UTC)
  124. # used to adjust 7z FILETIME to Python timestamp
  125. TIMESTAMP_ADJUST = -11644473600
  126. def toTimestamp(filetime):
  127. """Convert 7z FILETIME to Python timestamp."""
  128. # FILETIME is 100-nanosecond intervals since 1601/01/01 (UTC)
  129. return (filetime / 10000000.0) + TIMESTAMP_ADJUST
  130. def calculate_crc32(data, value=None, blocksize=1024*1024):
  131. """Calculate CRC32 of strings with arbitrary lengths."""
  132. length = len(data)
  133. pos = blocksize
  134. if value:
  135. value = crc32(data[:pos], value)
  136. else:
  137. value = crc32(data[:pos])
  138. while pos < length:
  139. value = crc32(data[pos:pos+blocksize], value)
  140. pos += blocksize
  141. return value & 0xffffffff
  142. class ArchiveError(Exception):
  143. pass
  144. class FormatError(ArchiveError):
  145. pass
  146. class EncryptedArchiveError(ArchiveError):
  147. pass
  148. class UnsupportedCompressionMethodError(ArchiveError):
  149. pass
  150. class DecryptionError(ArchiveError):
  151. pass
  152. class NoPasswordGivenError(DecryptionError):
  153. pass
  154. class WrongPasswordError(DecryptionError):
  155. pass
  156. class DecompressionError(ArchiveError):
  157. pass
  158. class ArchiveTimestamp(long):
  159. """Windows FILETIME timestamp."""
  160. def __repr__(self):
  161. return '%s(%d)' % (type(self).__name__, self)
  162. def as_datetime(self):
  163. """Convert FILETIME to Python datetime object."""
  164. return datetime.fromtimestamp(toTimestamp(self), UTC)
  165. class Base(object):
  166. """ base class with support for various basic read/write functions """
  167. def _readReal64Bit(self, file):
  168. res = file.read(8)
  169. a, b = unpack('<LL', res)
  170. return b << 32 | a, res
  171. def _read64Bit(self, file):
  172. b = ord(file.read(1))
  173. mask = 0x80
  174. for i in xrange(8):
  175. if b & mask == 0:
  176. bytes = array('B', file.read(i))
  177. bytes.reverse()
  178. value = (bytes and reduce(lambda x, y: x << 8 | y, bytes)) or 0
  179. highpart = b & (mask - 1)
  180. return value + (highpart << (i * 8))
  181. mask >>= 1
  182. def _readBoolean(self, file, count, checkall=0):
  183. if checkall:
  184. alldefined = file.read(1)
  185. if alldefined != unhexlify('00'):
  186. return [True] * count
  187. result = []
  188. b = 0
  189. mask = 0
  190. for i in xrange(count):
  191. if mask == 0:
  192. b = ord(file.read(1))
  193. mask = 0x80
  194. result.append(b & mask != 0)
  195. mask >>= 1
  196. return result
  197. def checkcrc(self, crc, data):
  198. check = calculate_crc32(data)
  199. return crc == check
  200. class PackInfo(Base):
  201. """ informations about packed streams """
  202. def __init__(self, file):
  203. self.packpos = self._read64Bit(file)
  204. self.numstreams = self._read64Bit(file)
  205. id = file.read(1)
  206. if id == PROPERTY_SIZE:
  207. self.packsizes = [self._read64Bit(file) for x in xrange(self.numstreams)]
  208. id = file.read(1)
  209. if id == PROPERTY_CRC:
  210. self.crcs = [self._read64Bit(file) for x in xrange(self.numstreams)]
  211. id = file.read(1)
  212. if id != PROPERTY_END:
  213. raise FormatError('end id expected but %s found' % repr(id))
  214. class Folder(Base):
  215. """ a "Folder" represents a stream of compressed data """
  216. def __init__(self, file):
  217. numcoders = self._read64Bit(file)
  218. self.coders = []
  219. self.digestdefined = False
  220. totalin = 0
  221. self.totalout = 0
  222. for i in xrange(numcoders):
  223. while True:
  224. b = ord(file.read(1))
  225. methodsize = b & 0xf
  226. issimple = b & 0x10 == 0
  227. noattributes = b & 0x20 == 0
  228. last_alternative = b & 0x80 == 0
  229. c = {}
  230. c['method'] = file.read(methodsize)
  231. if not issimple:
  232. c['numinstreams'] = self._read64Bit(file)
  233. c['numoutstreams'] = self._read64Bit(file)
  234. else:
  235. c['numinstreams'] = 1
  236. c['numoutstreams'] = 1
  237. totalin += c['numinstreams']
  238. self.totalout += c['numoutstreams']
  239. if not noattributes:
  240. c['properties'] = file.read(self._read64Bit(file))
  241. self.coders.append(c)
  242. if last_alternative:
  243. break
  244. numbindpairs = self.totalout - 1
  245. self.bindpairs = []
  246. for i in xrange(numbindpairs):
  247. self.bindpairs.append((self._read64Bit(file), self._read64Bit(file), ))
  248. numpackedstreams = totalin - numbindpairs
  249. self.packed_indexes = []
  250. if numpackedstreams == 1:
  251. for i in xrange(totalin):
  252. if self.findInBindPair(i) < 0:
  253. self.packed_indexes.append(i)
  254. elif numpackedstreams > 1:
  255. for i in xrange(numpackedstreams):
  256. self.packed_indexes.append(self._read64Bit(file))
  257. def getUnpackSize(self):
  258. if not self.unpacksizes:
  259. return 0
  260. for i in xrange(len(self.unpacksizes)-1, -1, -1):
  261. if self.findOutBindPair(i):
  262. return self.unpacksizes[i]
  263. raise TypeError('not found')
  264. def findInBindPair(self, index):
  265. for idx, (a, b) in enumerate(self.bindpairs):
  266. if a == index:
  267. return idx
  268. return -1
  269. def findOutBindPair(self, index):
  270. for idx, (a, b) in enumerate(self.bindpairs):
  271. if b == index:
  272. return idx
  273. return -1
  274. def isEncrypted(self):
  275. return COMPRESSION_METHOD_7Z_AES256_SHA256 in [x['method'] for x in self.coders]
  276. class Digests(Base):
  277. """ holds a list of checksums """
  278. def __init__(self, file, count):
  279. self.defined = self._readBoolean(file, count, checkall=1)
  280. self.crcs = array(ARRAY_TYPE_UINT32, file.read(4*count))
  281. if NEED_BYTESWAP:
  282. self.crcs.byteswap()
  283. UnpackDigests = Digests
  284. class UnpackInfo(Base):
  285. """ combines multiple folders """
  286. def __init__(self, file):
  287. id = file.read(1)
  288. if id != PROPERTY_FOLDER:
  289. raise FormatError('folder id expected but %s found' % repr(id))
  290. self.numfolders = self._read64Bit(file)
  291. self.folders = []
  292. external = file.read(1)
  293. if external == unhexlify('00'):
  294. self.folders = [Folder(file) for x in xrange(self.numfolders)]
  295. elif external == unhexlify('01'):
  296. self.datastreamidx = self._read64Bit(file)
  297. else:
  298. raise FormatError('0x00 or 0x01 expected but %s found' % repr(external))
  299. id = file.read(1)
  300. if id != PROPERTY_CODERS_UNPACK_SIZE:
  301. raise FormatError('coders unpack size id expected but %s found' % repr(id))
  302. for folder in self.folders:
  303. folder.unpacksizes = [self._read64Bit(file) for x in xrange(folder.totalout)]
  304. id = file.read(1)
  305. if id == PROPERTY_CRC:
  306. digests = UnpackDigests(file, self.numfolders)
  307. for idx, folder in enumerate(self.folders):
  308. folder.digestdefined = digests.defined[idx]
  309. folder.crc = digests.crcs[idx]
  310. id = file.read(1)
  311. if id != PROPERTY_END:
  312. raise FormatError('end id expected but %s found' % repr(id))
  313. class SubstreamsInfo(Base):
  314. """ defines the substreams of a folder """
  315. def __init__(self, file, numfolders, folders):
  316. self.digests = []
  317. self.digestsdefined = []
  318. id = file.read(1)
  319. if id == PROPERTY_NUM_UNPACK_STREAM:
  320. self.numunpackstreams = [self._read64Bit(file) for x in xrange(numfolders)]
  321. id = file.read(1)
  322. else:
  323. self.numunpackstreams = [1]*numfolders
  324. if id == PROPERTY_SIZE:
  325. self.unpacksizes = []
  326. for i in xrange(len(self.numunpackstreams)):
  327. sum = 0
  328. for j in xrange(1, self.numunpackstreams[i]):
  329. size = self._read64Bit(file)
  330. self.unpacksizes.append(size)
  331. sum += size
  332. self.unpacksizes.append(folders[i].getUnpackSize() - sum)
  333. id = file.read(1)
  334. numdigests = 0
  335. numdigeststotal = 0
  336. for i in xrange(numfolders):
  337. numsubstreams = self.numunpackstreams[i]
  338. if numsubstreams != 1 or not folders[i].digestdefined:
  339. numdigests += numsubstreams
  340. numdigeststotal += numsubstreams
  341. if id == PROPERTY_CRC:
  342. digests = Digests(file, numdigests)
  343. didx = 0
  344. for i in xrange(numfolders):
  345. folder = folders[i]
  346. numsubstreams = self.numunpackstreams[i]
  347. if numsubstreams == 1 and folder.digestdefined:
  348. self.digestsdefined.append(True)
  349. self.digests.append(folder.crc)
  350. else:
  351. for j in xrange(numsubstreams):
  352. self.digestsdefined.append(digests.defined[didx])
  353. self.digests.append(digests.crcs[didx])
  354. didx += 1
  355. id = file.read(1)
  356. if id != PROPERTY_END:
  357. raise FormatError('end id expected but %r found' % id)
  358. if not self.digestsdefined:
  359. self.digestsdefined = [False] * numdigeststotal
  360. self.digests = [0] * numdigeststotal
  361. class StreamsInfo(Base):
  362. """ informations about compressed streams """
  363. def __init__(self, file):
  364. id = file.read(1)
  365. if id == PROPERTY_PACK_INFO:
  366. self.packinfo = PackInfo(file)
  367. id = file.read(1)
  368. if id == PROPERTY_UNPACK_INFO:
  369. self.unpackinfo = UnpackInfo(file)
  370. id = file.read(1)
  371. if id == PROPERTY_SUBSTREAMS_INFO:
  372. self.substreamsinfo = SubstreamsInfo(file, self.unpackinfo.numfolders, self.unpackinfo.folders)
  373. id = file.read(1)
  374. if id != PROPERTY_END:
  375. raise FormatError('end id expected but %s found' % repr(id))
  376. class FilesInfo(Base):
  377. """ holds file properties """
  378. def _readTimes(self, file, files, name):
  379. defined = self._readBoolean(file, len(files), checkall=1)
  380. # NOTE: the "external" flag is currently ignored, should be 0x00
  381. external = file.read(1)
  382. for i in xrange(len(files)):
  383. if defined[i]:
  384. files[i][name] = ArchiveTimestamp(self._readReal64Bit(file)[0])
  385. else:
  386. files[i][name] = None
  387. def __init__(self, file):
  388. self.numfiles = self._read64Bit(file)
  389. self.files = [{'emptystream': False} for x in xrange(self.numfiles)]
  390. numemptystreams = 0
  391. while True:
  392. typ = self._read64Bit(file)
  393. if typ > 255:
  394. raise FormatError('invalid type, must be below 256, is %d' % typ)
  395. typ = pack('B', typ)
  396. if typ == PROPERTY_END:
  397. break
  398. size = self._read64Bit(file)
  399. buffer = BytesIO(file.read(size))
  400. if typ == PROPERTY_EMPTY_STREAM:
  401. isempty = self._readBoolean(buffer, self.numfiles)
  402. list(map(lambda x, y: x.update({'emptystream': y}), self.files, isempty))
  403. for x in isempty:
  404. if x: numemptystreams += 1
  405. emptyfiles = [False] * numemptystreams
  406. antifiles = [False] * numemptystreams
  407. elif typ == PROPERTY_EMPTY_FILE:
  408. emptyfiles = self._readBoolean(buffer, numemptystreams)
  409. elif typ == PROPERTY_ANTI:
  410. antifiles = self._readBoolean(buffer, numemptystreams)
  411. elif typ == PROPERTY_NAME:
  412. external = buffer.read(1)
  413. if external != unhexlify('00'):
  414. self.dataindex = self._read64Bit(buffer)
  415. # XXX: evaluate external
  416. raise NotImplementedError
  417. for f in self.files:
  418. name = ''
  419. while True:
  420. ch = buffer.read(2)
  421. if ch == unhexlify('0000'):
  422. f['filename'] = name
  423. break
  424. name += ch.decode('utf-16')
  425. elif typ == PROPERTY_CREATION_TIME:
  426. self._readTimes(buffer, self.files, 'creationtime')
  427. elif typ == PROPERTY_LAST_ACCESS_TIME:
  428. self._readTimes(buffer, self.files, 'lastaccesstime')
  429. elif typ == PROPERTY_LAST_WRITE_TIME:
  430. self._readTimes(buffer, self.files, 'lastwritetime')
  431. elif typ == PROPERTY_ATTRIBUTES:
  432. defined = self._readBoolean(buffer, self.numfiles, checkall=1)
  433. for idx, f in enumerate(self.files):
  434. if defined[idx]:
  435. f['attributes'] = unpack('<L', buffer.read(4))[0]
  436. else:
  437. f['attributes'] = None
  438. else:
  439. raise FormatError('invalid type %r' % (typ))
  440. class Header(Base):
  441. """ the archive header """
  442. def __init__(self, file):
  443. id = file.read(1)
  444. if id == PROPERTY_ARCHIVE_PROPERTIES:
  445. self.properties = ArchiveProperties(file)
  446. id = file.read(1)
  447. if id == PROPERTY_ADDITIONAL_STREAMS_INFO:
  448. self.additional_streams = StreamsInfo(file)
  449. id = file.read(1)
  450. if id == PROPERTY_MAIN_STREAMS_INFO:
  451. self.main_streams = StreamsInfo(file)
  452. id = file.read(1)
  453. if id == PROPERTY_FILES_INFO:
  454. self.files = FilesInfo(file)
  455. id = file.read(1)
  456. if id != PROPERTY_END:
  457. raise FormatError('end id expected but %s found' % (repr(id)))
  458. class ArchiveFile(Base):
  459. """ wrapper around a file in the archive """
  460. def __init__(self, info, start, src_start, folder, archive, maxsize=None):
  461. self.digest = None
  462. self._archive = archive
  463. self._file = archive._file
  464. self._start = start
  465. self._src_start = src_start
  466. self._folder = folder
  467. # maxsize is only valid for solid archives
  468. self._maxsize = maxsize
  469. for k, v in info.items():
  470. setattr(self, k, v)
  471. self.size = self.uncompressed = self._uncompressed[-1]
  472. if not hasattr(self, 'filename'):
  473. # compressed file is stored without a name, generate one
  474. try:
  475. basefilename = self._file.name
  476. except AttributeError:
  477. # 7z archive file doesn't have a name
  478. self.filename = 'contents'
  479. else:
  480. self.filename = os.path.splitext(os.path.basename(basefilename))[0]
  481. self.reset()
  482. self._decoders = {
  483. COMPRESSION_METHOD_COPY: '_read_copy',
  484. COMPRESSION_METHOD_LZMA: '_read_lzma',
  485. COMPRESSION_METHOD_MISC_ZIP: '_read_zip',
  486. COMPRESSION_METHOD_MISC_BZIP: '_read_bzip',
  487. COMPRESSION_METHOD_7Z_AES256_SHA256: '_read_7z_aes256_sha256',
  488. }
  489. def _is_encrypted(self):
  490. return self._folder.isEncrypted()
  491. def reset(self):
  492. self.pos = 0
  493. def read(self):
  494. if not self._folder.coders:
  495. raise TypeError("file has no coder informations")
  496. data = None
  497. level = 0
  498. for coder in self._folder.coders:
  499. method = coder['method']
  500. decoder = None
  501. while method and decoder is None:
  502. decoder = self._decoders.get(method, None)
  503. method = method[:-1]
  504. if decoder is None:
  505. raise UnsupportedCompressionMethodError(repr(coder['method']))
  506. data = getattr(self, decoder)(coder, data, level)
  507. level += 1
  508. return data
  509. def _read_copy(self, coder, input, level):
  510. size = self._uncompressed[level]
  511. if not input:
  512. self._file.seek(self._src_start)
  513. input = self._file.read(size)
  514. return input[self._start:self._start+size]
  515. def _read_from_decompressor(self, coder, decompressor, input, level, checkremaining=False, with_cache=False):
  516. size = self._uncompressed[level]
  517. data = ''
  518. idx = 0
  519. cnt = 0
  520. properties = coder.get('properties', None)
  521. if properties:
  522. decompressor.decompress(properties)
  523. total = self.compressed
  524. if not input and total is None:
  525. remaining = self._start+size
  526. out = BytesIO()
  527. cache = getattr(self._folder, '_decompress_cache', None)
  528. if cache is not None:
  529. data, pos, decompressor = cache
  530. out.write(data)
  531. remaining -= len(data)
  532. self._file.seek(pos)
  533. else:
  534. self._file.seek(self._src_start)
  535. checkremaining = checkremaining and not self._folder.solid
  536. while remaining > 0:
  537. data = self._file.read(READ_BLOCKSIZE)
  538. if checkremaining or (with_cache and len(data) < READ_BLOCKSIZE):
  539. tmp = decompressor.decompress(data, remaining)
  540. else:
  541. tmp = decompressor.decompress(data)
  542. if not tmp and not data:
  543. raise DecompressionError('end of stream while decompressing')
  544. out.write(tmp)
  545. remaining -= len(tmp)
  546. data = out.getvalue()
  547. if with_cache and self._folder.solid:
  548. # don't decompress start of solid archive for next file
  549. # TODO: limit size of cached data
  550. self._folder._decompress_cache = (data, self._file.tell(), decompressor)
  551. else:
  552. if not input:
  553. self._file.seek(self._src_start)
  554. input = self._file.read(total)
  555. if checkremaining:
  556. data = decompressor.decompress(input, self._start+size)
  557. else:
  558. data = decompressor.decompress(input)
  559. return data[self._start:self._start+size]
  560. def _read_lzma(self, coder, input, level):
  561. size = self._uncompressed[level]
  562. dec = pylzma.decompressobj(maxlength=self._start+size)
  563. try:
  564. return self._read_from_decompressor(coder, dec, input, level, checkremaining=True, with_cache=True)
  565. except ValueError:
  566. if self._is_encrypted():
  567. raise WrongPasswordError('invalid password')
  568. raise
  569. def _read_zip(self, coder, input, level):
  570. dec = zlib.decompressobj(-15)
  571. return self._read_from_decompressor(coder, dec, input, level, checkremaining=True)
  572. def _read_bzip(self, coder, input, level):
  573. dec = bz2.BZ2Decompressor()
  574. return self._read_from_decompressor(coder, dec, input, level)
  575. def _read_7z_aes256_sha256(self, coder, input, level):
  576. if not self._archive.password:
  577. raise NoPasswordGivenError()
  578. # TODO: this needs some sanity checks
  579. firstbyte = coder['properties'][0]
  580. if not IS_PYTHON3:
  581. firstbyte = ord(firstbyte)
  582. numcyclespower = firstbyte & 0x3f
  583. if firstbyte & 0xc0 != 0:
  584. saltsize = (firstbyte >> 7) & 1
  585. ivsize = (firstbyte >> 6) & 1
  586. secondbyte = coder['properties'][1]
  587. if not IS_PYTHON3:
  588. secondbyte = ord(secondbyte)
  589. saltsize += (secondbyte >> 4)
  590. ivsize += (secondbyte & 0x0f)
  591. assert len(coder['properties']) == 2+saltsize+ivsize
  592. salt = coder['properties'][2:2+saltsize]
  593. iv = coder['properties'][2+saltsize:2+saltsize+ivsize]
  594. assert len(salt) == saltsize
  595. assert len(iv) == ivsize
  596. assert numcyclespower <= 24
  597. if ivsize < 16:
  598. iv += bytes('\x00'*(16-ivsize), 'ascii')
  599. else:
  600. salt = iv = bytes('', 'ascii')
  601. password = self._archive.password.encode('utf-16-le')
  602. key = pylzma.calculate_key(password, numcyclespower, salt=salt)
  603. cipher = pylzma.AESDecrypt(key, iv=iv)
  604. if not input:
  605. self._file.seek(self._src_start)
  606. uncompressed_size = self._uncompressed[level]
  607. if uncompressed_size & 0x0f:
  608. # we need a multiple of 16 bytes
  609. uncompressed_size += 16 - (uncompressed_size & 0x0f)
  610. input = self._file.read(uncompressed_size)
  611. result = cipher.decrypt(input)
  612. return result
  613. def checkcrc(self):
  614. if self.digest is None:
  615. return True
  616. self.reset()
  617. data = self.read()
  618. return super(ArchiveFile, self).checkcrc(self.digest, data)
  619. class Archive7z(Base):
  620. """ the archive itself """
  621. def __init__(self, file, password=None):
  622. self._file = file
  623. self.password = password
  624. self.header = file.read(len(MAGIC_7Z))
  625. if self.header != MAGIC_7Z:
  626. raise FormatError('not a 7z file')
  627. self.version = unpack('BB', file.read(2))
  628. self.startheadercrc = unpack('<L', file.read(4))[0]
  629. self.nextheaderofs, data = self._readReal64Bit(file)
  630. crc = calculate_crc32(data)
  631. self.nextheadersize, data = self._readReal64Bit(file)
  632. crc = calculate_crc32(data, crc)
  633. data = file.read(4)
  634. self.nextheadercrc = unpack('<L', data)[0]
  635. crc = calculate_crc32(data, crc)
  636. if crc != self.startheadercrc:
  637. raise FormatError('invalid header data')
  638. self.afterheader = file.tell()
  639. file.seek(self.nextheaderofs, 1)
  640. buffer = BytesIO(file.read(self.nextheadersize))
  641. if not self.checkcrc(self.nextheadercrc, buffer.getvalue()):
  642. raise FormatError('invalid header data')
  643. while True:
  644. id = buffer.read(1)
  645. if not id or id == PROPERTY_HEADER:
  646. break
  647. if id != PROPERTY_ENCODED_HEADER:
  648. raise TypeError('Unknown field: %r' % (id))
  649. streams = StreamsInfo(buffer)
  650. file.seek(self.afterheader + 0)
  651. data = bytes('', 'ascii')
  652. src_start = self.afterheader
  653. for folder in streams.unpackinfo.folders:
  654. if folder.isEncrypted() and not password:
  655. raise NoPasswordGivenError()
  656. src_start += streams.packinfo.packpos
  657. uncompressed = folder.unpacksizes
  658. if not isinstance(uncompressed, (list, tuple)):
  659. uncompressed = [uncompressed] * len(folder.coders)
  660. info = {
  661. 'compressed': streams.packinfo.packsizes[0],
  662. '_uncompressed': uncompressed,
  663. }
  664. tmp = ArchiveFile(info, 0, src_start, folder, self)
  665. folderdata = tmp.read()
  666. src_start += uncompressed[-1]
  667. if folder.digestdefined:
  668. if not self.checkcrc(folder.crc, folderdata):
  669. raise FormatError('invalid block data')
  670. data += folderdata
  671. buffer = BytesIO(data)
  672. self.files = []
  673. self.files_map = {}
  674. if not id:
  675. # empty archive
  676. self.solid = False
  677. self.numfiles = 0
  678. self.filenames = []
  679. return
  680. self.header = Header(buffer)
  681. files = self.header.files
  682. folders = self.header.main_streams.unpackinfo.folders
  683. packinfo = self.header.main_streams.packinfo
  684. subinfo = self.header.main_streams.substreamsinfo
  685. packsizes = packinfo.packsizes
  686. self.solid = packinfo.numstreams == 1
  687. if hasattr(subinfo, 'unpacksizes'):
  688. unpacksizes = subinfo.unpacksizes
  689. else:
  690. unpacksizes = [x.unpacksizes for x in folders]
  691. fidx = 0
  692. obidx = 0
  693. streamidx = 0
  694. src_pos = self.afterheader
  695. pos = 0
  696. folder_pos = src_pos
  697. maxsize = (self.solid and packinfo.packsizes[0]) or None
  698. for info in files.files:
  699. if info['emptystream']:
  700. continue
  701. folder = folders[fidx]
  702. if streamidx == 0:
  703. folder.solid = subinfo.numunpackstreams[fidx] > 1
  704. maxsize = (folder.solid and packinfo.packsizes[fidx]) or None
  705. uncompressed = unpacksizes[obidx]
  706. if not isinstance(uncompressed, (list, tuple)):
  707. uncompressed = [uncompressed] * len(folder.coders)
  708. if pos > 0:
  709. # file is part of solid archive
  710. info['compressed'] = None
  711. elif fidx < len(packsizes):
  712. # file is compressed
  713. info['compressed'] = packsizes[fidx]
  714. else:
  715. # file is not compressed
  716. info['compressed'] = uncompressed
  717. info['_uncompressed'] = uncompressed
  718. file = ArchiveFile(info, pos, src_pos, folder, self, maxsize=maxsize)
  719. if subinfo.digestsdefined[obidx]:
  720. file.digest = subinfo.digests[obidx]
  721. self.files.append(file)
  722. if folder.solid:
  723. pos += unpacksizes[obidx]
  724. else:
  725. src_pos += info['compressed']
  726. obidx += 1
  727. streamidx += 1
  728. if streamidx >= subinfo.numunpackstreams[fidx]:
  729. pos = 0
  730. folder_pos += packinfo.packsizes[fidx]
  731. src_pos = folder_pos
  732. fidx += 1
  733. streamidx = 0
  734. self.numfiles = len(self.files)
  735. self.filenames = list(map(lambda x: x.filename, self.files))
  736. self.files_map.update([(x.filename, x) for x in self.files])
  737. # interface like TarFile
  738. def getmember(self, name):
  739. if isinstance(name, (int, long)):
  740. try:
  741. return self.files[name]
  742. except IndexError:
  743. return None
  744. return self.files_map.get(name, None)
  745. def getmembers(self):
  746. return self.files
  747. def getnames(self):
  748. return self.filenames
  749. def list(self, verbose=True, file=sys.stdout):
  750. file.write('total %d files in %sarchive\n' % (self.numfiles, (self.solid and 'solid ') or ''))
  751. if not verbose:
  752. file.write('\n'.join(self.filenames) + '\n')
  753. return
  754. for f in self.files:
  755. extra = (f.compressed and '%10d ' % (f.compressed)) or ' '
  756. file.write('%10d%s%.8x %s\n' % (f.size, extra, f.digest, f.filename))
  757. if __name__ == '__main__':
  758. f = Archive7z(open('test.7z', 'rb'))
  759. #f = Archive7z(open('pylzma.7z', 'rb'))
  760. f.list()