hashes.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. from __future__ import absolute_import
  2. import hashlib
  3. from pip._vendor.six import iteritems, iterkeys, itervalues
  4. from pip._internal.exceptions import HashMismatch, HashMissing, InstallationError
  5. from pip._internal.utils.misc import read_chunks
  6. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  7. if MYPY_CHECK_RUNNING:
  8. from typing import BinaryIO, Dict, Iterator, List, NoReturn
  9. from pip._vendor.six import PY3
  10. if PY3:
  11. from hashlib import _Hash
  12. else:
  13. from hashlib import _hash as _Hash
  14. # The recommended hash algo of the moment. Change this whenever the state of
  15. # the art changes; it won't hurt backward compatibility.
  16. FAVORITE_HASH = 'sha256'
  17. # Names of hashlib algorithms allowed by the --hash option and ``pip hash``
  18. # Currently, those are the ones at least as collision-resistant as sha256.
  19. STRONG_HASHES = ['sha256', 'sha384', 'sha512']
  20. class Hashes(object):
  21. """A wrapper that builds multiple hashes at once and checks them against
  22. known-good values
  23. """
  24. def __init__(self, hashes=None):
  25. # type: (Dict[str, List[str]]) -> None
  26. """
  27. :param hashes: A dict of algorithm names pointing to lists of allowed
  28. hex digests
  29. """
  30. allowed = {}
  31. if hashes is not None:
  32. for alg, keys in hashes.items():
  33. # Make sure values are always sorted (to ease equality checks)
  34. allowed[alg] = sorted(keys)
  35. self._allowed = allowed
  36. def __and__(self, other):
  37. # type: (Hashes) -> Hashes
  38. if not isinstance(other, Hashes):
  39. return NotImplemented
  40. # If either of the Hashes object is entirely empty (i.e. no hash
  41. # specified at all), all hashes from the other object are allowed.
  42. if not other:
  43. return self
  44. if not self:
  45. return other
  46. # Otherwise only hashes that present in both objects are allowed.
  47. new = {}
  48. for alg, values in iteritems(other._allowed):
  49. if alg not in self._allowed:
  50. continue
  51. new[alg] = [v for v in values if v in self._allowed[alg]]
  52. return Hashes(new)
  53. @property
  54. def digest_count(self):
  55. # type: () -> int
  56. return sum(len(digests) for digests in self._allowed.values())
  57. def is_hash_allowed(
  58. self,
  59. hash_name, # type: str
  60. hex_digest, # type: str
  61. ):
  62. # type: (...) -> bool
  63. """Return whether the given hex digest is allowed."""
  64. return hex_digest in self._allowed.get(hash_name, [])
  65. def check_against_chunks(self, chunks):
  66. # type: (Iterator[bytes]) -> None
  67. """Check good hashes against ones built from iterable of chunks of
  68. data.
  69. Raise HashMismatch if none match.
  70. """
  71. gots = {}
  72. for hash_name in iterkeys(self._allowed):
  73. try:
  74. gots[hash_name] = hashlib.new(hash_name)
  75. except (ValueError, TypeError):
  76. raise InstallationError(
  77. 'Unknown hash name: {}'.format(hash_name)
  78. )
  79. for chunk in chunks:
  80. for hash in itervalues(gots):
  81. hash.update(chunk)
  82. for hash_name, got in iteritems(gots):
  83. if got.hexdigest() in self._allowed[hash_name]:
  84. return
  85. self._raise(gots)
  86. def _raise(self, gots):
  87. # type: (Dict[str, _Hash]) -> NoReturn
  88. raise HashMismatch(self._allowed, gots)
  89. def check_against_file(self, file):
  90. # type: (BinaryIO) -> None
  91. """Check good hashes against a file-like object
  92. Raise HashMismatch if none match.
  93. """
  94. return self.check_against_chunks(read_chunks(file))
  95. def check_against_path(self, path):
  96. # type: (str) -> None
  97. with open(path, 'rb') as file:
  98. return self.check_against_file(file)
  99. def __nonzero__(self):
  100. # type: () -> bool
  101. """Return whether I know any known-good hashes."""
  102. return bool(self._allowed)
  103. def __bool__(self):
  104. # type: () -> bool
  105. return self.__nonzero__()
  106. def __eq__(self, other):
  107. # type: (object) -> bool
  108. if not isinstance(other, Hashes):
  109. return NotImplemented
  110. return self._allowed == other._allowed
  111. def __hash__(self):
  112. # type: () -> int
  113. return hash(
  114. ",".join(sorted(
  115. ":".join((alg, digest))
  116. for alg, digest_list in self._allowed.items()
  117. for digest in digest_list
  118. ))
  119. )
  120. class MissingHashes(Hashes):
  121. """A workalike for Hashes used when we're missing a hash for a requirement
  122. It computes the actual hash of the requirement and raises a HashMissing
  123. exception showing it to the user.
  124. """
  125. def __init__(self):
  126. # type: () -> None
  127. """Don't offer the ``hashes`` kwarg."""
  128. # Pass our favorite hash in to generate a "gotten hash". With the
  129. # empty list, it will never match, so an error will always raise.
  130. super(MissingHashes, self).__init__(hashes={FAVORITE_HASH: []})
  131. def _raise(self, gots):
  132. # type: (Dict[str, _Hash]) -> NoReturn
  133. raise HashMissing(gots[FAVORITE_HASH].hexdigest())