prepare.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. """Prepares a distribution for installation
  2. """
  3. # The following comment should be removed at some point in the future.
  4. # mypy: strict-optional=False
  5. import logging
  6. import mimetypes
  7. import os
  8. import shutil
  9. from pip._vendor.packaging.utils import canonicalize_name
  10. from pip._vendor.six import PY2
  11. from pip._internal.distributions import make_distribution_for_install_requirement
  12. from pip._internal.distributions.installed import InstalledDistribution
  13. from pip._internal.exceptions import (
  14. DirectoryUrlHashUnsupported,
  15. HashMismatch,
  16. HashUnpinned,
  17. InstallationError,
  18. NetworkConnectionError,
  19. PreviousBuildDirError,
  20. VcsHashUnsupported,
  21. )
  22. from pip._internal.models.wheel import Wheel
  23. from pip._internal.network.download import BatchDownloader, Downloader
  24. from pip._internal.network.lazy_wheel import (
  25. HTTPRangeRequestUnsupported,
  26. dist_from_wheel_url,
  27. )
  28. from pip._internal.utils.filesystem import copy2_fixed
  29. from pip._internal.utils.hashes import MissingHashes
  30. from pip._internal.utils.logging import indent_log
  31. from pip._internal.utils.misc import display_path, hide_url, path_to_display, rmtree
  32. from pip._internal.utils.temp_dir import TempDirectory
  33. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  34. from pip._internal.utils.unpacking import unpack_file
  35. from pip._internal.vcs import vcs
  36. if MYPY_CHECK_RUNNING:
  37. from typing import Callable, Dict, Iterable, List, Optional, Tuple
  38. from mypy_extensions import TypedDict
  39. from pip._vendor.pkg_resources import Distribution
  40. from pip._internal.index.package_finder import PackageFinder
  41. from pip._internal.models.link import Link
  42. from pip._internal.network.session import PipSession
  43. from pip._internal.req.req_install import InstallRequirement
  44. from pip._internal.req.req_tracker import RequirementTracker
  45. from pip._internal.utils.hashes import Hashes
  46. if PY2:
  47. CopytreeKwargs = TypedDict(
  48. 'CopytreeKwargs',
  49. {
  50. 'ignore': Callable[[str, List[str]], List[str]],
  51. 'symlinks': bool,
  52. },
  53. total=False,
  54. )
  55. else:
  56. CopytreeKwargs = TypedDict(
  57. 'CopytreeKwargs',
  58. {
  59. 'copy_function': Callable[[str, str], None],
  60. 'ignore': Callable[[str, List[str]], List[str]],
  61. 'ignore_dangling_symlinks': bool,
  62. 'symlinks': bool,
  63. },
  64. total=False,
  65. )
  66. logger = logging.getLogger(__name__)
  67. def _get_prepared_distribution(
  68. req, # type: InstallRequirement
  69. req_tracker, # type: RequirementTracker
  70. finder, # type: PackageFinder
  71. build_isolation, # type: bool
  72. ):
  73. # type: (...) -> Distribution
  74. """Prepare a distribution for installation."""
  75. abstract_dist = make_distribution_for_install_requirement(req)
  76. with req_tracker.track(req):
  77. abstract_dist.prepare_distribution_metadata(finder, build_isolation)
  78. return abstract_dist.get_pkg_resources_distribution()
  79. def unpack_vcs_link(link, location):
  80. # type: (Link, str) -> None
  81. vcs_backend = vcs.get_backend_for_scheme(link.scheme)
  82. assert vcs_backend is not None
  83. vcs_backend.unpack(location, url=hide_url(link.url))
  84. class File(object):
  85. def __init__(self, path, content_type):
  86. # type: (str, Optional[str]) -> None
  87. self.path = path
  88. if content_type is None:
  89. self.content_type = mimetypes.guess_type(path)[0]
  90. else:
  91. self.content_type = content_type
  92. def get_http_url(
  93. link, # type: Link
  94. download, # type: Downloader
  95. download_dir=None, # type: Optional[str]
  96. hashes=None, # type: Optional[Hashes]
  97. ):
  98. # type: (...) -> File
  99. temp_dir = TempDirectory(kind="unpack", globally_managed=True)
  100. # If a download dir is specified, is the file already downloaded there?
  101. already_downloaded_path = None
  102. if download_dir:
  103. already_downloaded_path = _check_download_dir(
  104. link, download_dir, hashes
  105. )
  106. if already_downloaded_path:
  107. from_path = already_downloaded_path
  108. content_type = None
  109. else:
  110. # let's download to a tmp dir
  111. from_path, content_type = download(link, temp_dir.path)
  112. if hashes:
  113. hashes.check_against_path(from_path)
  114. return File(from_path, content_type)
  115. def _copy2_ignoring_special_files(src, dest):
  116. # type: (str, str) -> None
  117. """Copying special files is not supported, but as a convenience to users
  118. we skip errors copying them. This supports tools that may create e.g.
  119. socket files in the project source directory.
  120. """
  121. try:
  122. copy2_fixed(src, dest)
  123. except shutil.SpecialFileError as e:
  124. # SpecialFileError may be raised due to either the source or
  125. # destination. If the destination was the cause then we would actually
  126. # care, but since the destination directory is deleted prior to
  127. # copy we ignore all of them assuming it is caused by the source.
  128. logger.warning(
  129. "Ignoring special file error '%s' encountered copying %s to %s.",
  130. str(e),
  131. path_to_display(src),
  132. path_to_display(dest),
  133. )
  134. def _copy_source_tree(source, target):
  135. # type: (str, str) -> None
  136. target_abspath = os.path.abspath(target)
  137. target_basename = os.path.basename(target_abspath)
  138. target_dirname = os.path.dirname(target_abspath)
  139. def ignore(d, names):
  140. # type: (str, List[str]) -> List[str]
  141. skipped = [] # type: List[str]
  142. if d == source:
  143. # Pulling in those directories can potentially be very slow,
  144. # exclude the following directories if they appear in the top
  145. # level dir (and only it).
  146. # See discussion at https://github.com/pypa/pip/pull/6770
  147. skipped += ['.tox', '.nox']
  148. if os.path.abspath(d) == target_dirname:
  149. # Prevent an infinite recursion if the target is in source.
  150. # This can happen when TMPDIR is set to ${PWD}/...
  151. # and we copy PWD to TMPDIR.
  152. skipped += [target_basename]
  153. return skipped
  154. kwargs = dict(ignore=ignore, symlinks=True) # type: CopytreeKwargs
  155. if not PY2:
  156. # Python 2 does not support copy_function, so we only ignore
  157. # errors on special file copy in Python 3.
  158. kwargs['copy_function'] = _copy2_ignoring_special_files
  159. shutil.copytree(source, target, **kwargs)
  160. def get_file_url(
  161. link, # type: Link
  162. download_dir=None, # type: Optional[str]
  163. hashes=None # type: Optional[Hashes]
  164. ):
  165. # type: (...) -> File
  166. """Get file and optionally check its hash.
  167. """
  168. # If a download dir is specified, is the file already there and valid?
  169. already_downloaded_path = None
  170. if download_dir:
  171. already_downloaded_path = _check_download_dir(
  172. link, download_dir, hashes
  173. )
  174. if already_downloaded_path:
  175. from_path = already_downloaded_path
  176. else:
  177. from_path = link.file_path
  178. # If --require-hashes is off, `hashes` is either empty, the
  179. # link's embedded hash, or MissingHashes; it is required to
  180. # match. If --require-hashes is on, we are satisfied by any
  181. # hash in `hashes` matching: a URL-based or an option-based
  182. # one; no internet-sourced hash will be in `hashes`.
  183. if hashes:
  184. hashes.check_against_path(from_path)
  185. return File(from_path, None)
  186. def unpack_url(
  187. link, # type: Link
  188. location, # type: str
  189. download, # type: Downloader
  190. download_dir=None, # type: Optional[str]
  191. hashes=None, # type: Optional[Hashes]
  192. ):
  193. # type: (...) -> Optional[File]
  194. """Unpack link into location, downloading if required.
  195. :param hashes: A Hashes object, one of whose embedded hashes must match,
  196. or HashMismatch will be raised. If the Hashes is empty, no matches are
  197. required, and unhashable types of requirements (like VCS ones, which
  198. would ordinarily raise HashUnsupported) are allowed.
  199. """
  200. # non-editable vcs urls
  201. if link.is_vcs:
  202. unpack_vcs_link(link, location)
  203. return None
  204. # If it's a url to a local directory
  205. if link.is_existing_dir():
  206. if os.path.isdir(location):
  207. rmtree(location)
  208. _copy_source_tree(link.file_path, location)
  209. return None
  210. # file urls
  211. if link.is_file:
  212. file = get_file_url(link, download_dir, hashes=hashes)
  213. # http urls
  214. else:
  215. file = get_http_url(
  216. link,
  217. download,
  218. download_dir,
  219. hashes=hashes,
  220. )
  221. # unpack the archive to the build dir location. even when only downloading
  222. # archives, they have to be unpacked to parse dependencies, except wheels
  223. if not link.is_wheel:
  224. unpack_file(file.path, location, file.content_type)
  225. return file
  226. def _check_download_dir(link, download_dir, hashes):
  227. # type: (Link, str, Optional[Hashes]) -> Optional[str]
  228. """ Check download_dir for previously downloaded file with correct hash
  229. If a correct file is found return its path else None
  230. """
  231. download_path = os.path.join(download_dir, link.filename)
  232. if not os.path.exists(download_path):
  233. return None
  234. # If already downloaded, does its hash match?
  235. logger.info('File was already downloaded %s', download_path)
  236. if hashes:
  237. try:
  238. hashes.check_against_path(download_path)
  239. except HashMismatch:
  240. logger.warning(
  241. 'Previously-downloaded file %s has bad hash. '
  242. 'Re-downloading.',
  243. download_path
  244. )
  245. os.unlink(download_path)
  246. return None
  247. return download_path
  248. class RequirementPreparer(object):
  249. """Prepares a Requirement
  250. """
  251. def __init__(
  252. self,
  253. build_dir, # type: str
  254. download_dir, # type: Optional[str]
  255. src_dir, # type: str
  256. build_isolation, # type: bool
  257. req_tracker, # type: RequirementTracker
  258. session, # type: PipSession
  259. progress_bar, # type: str
  260. finder, # type: PackageFinder
  261. require_hashes, # type: bool
  262. use_user_site, # type: bool
  263. lazy_wheel, # type: bool
  264. ):
  265. # type: (...) -> None
  266. super(RequirementPreparer, self).__init__()
  267. self.src_dir = src_dir
  268. self.build_dir = build_dir
  269. self.req_tracker = req_tracker
  270. self._session = session
  271. self._download = Downloader(session, progress_bar)
  272. self._batch_download = BatchDownloader(session, progress_bar)
  273. self.finder = finder
  274. # Where still-packed archives should be written to. If None, they are
  275. # not saved, and are deleted immediately after unpacking.
  276. self.download_dir = download_dir
  277. # Is build isolation allowed?
  278. self.build_isolation = build_isolation
  279. # Should hash-checking be required?
  280. self.require_hashes = require_hashes
  281. # Should install in user site-packages?
  282. self.use_user_site = use_user_site
  283. # Should wheels be downloaded lazily?
  284. self.use_lazy_wheel = lazy_wheel
  285. # Memoized downloaded files, as mapping of url: (path, mime type)
  286. self._downloaded = {} # type: Dict[str, Tuple[str, str]]
  287. # Previous "header" printed for a link-based InstallRequirement
  288. self._previous_requirement_header = ("", "")
  289. def _log_preparing_link(self, req):
  290. # type: (InstallRequirement) -> None
  291. """Provide context for the requirement being prepared."""
  292. if req.link.is_file and not req.original_link_is_in_wheel_cache:
  293. message = "Processing %s"
  294. information = str(display_path(req.link.file_path))
  295. else:
  296. message = "Collecting %s"
  297. information = str(req.req or req)
  298. if (message, information) != self._previous_requirement_header:
  299. self._previous_requirement_header = (message, information)
  300. logger.info(message, information)
  301. if req.original_link_is_in_wheel_cache:
  302. with indent_log():
  303. logger.info("Using cached %s", req.link.filename)
  304. def _ensure_link_req_src_dir(self, req, parallel_builds):
  305. # type: (InstallRequirement, bool) -> None
  306. """Ensure source_dir of a linked InstallRequirement."""
  307. # Since source_dir is only set for editable requirements.
  308. if req.link.is_wheel:
  309. # We don't need to unpack wheels, so no need for a source
  310. # directory.
  311. return
  312. assert req.source_dir is None
  313. # We always delete unpacked sdists after pip runs.
  314. req.ensure_has_source_dir(
  315. self.build_dir,
  316. autodelete=True,
  317. parallel_builds=parallel_builds,
  318. )
  319. # If a checkout exists, it's unwise to keep going. version
  320. # inconsistencies are logged later, but do not fail the
  321. # installation.
  322. # FIXME: this won't upgrade when there's an existing
  323. # package unpacked in `req.source_dir`
  324. if os.path.exists(os.path.join(req.source_dir, 'setup.py')):
  325. raise PreviousBuildDirError(
  326. "pip can't proceed with requirements '{}' due to a"
  327. "pre-existing build directory ({}). This is likely "
  328. "due to a previous installation that failed . pip is "
  329. "being responsible and not assuming it can delete this. "
  330. "Please delete it and try again.".format(req, req.source_dir)
  331. )
  332. def _get_linked_req_hashes(self, req):
  333. # type: (InstallRequirement) -> Hashes
  334. # By the time this is called, the requirement's link should have
  335. # been checked so we can tell what kind of requirements req is
  336. # and raise some more informative errors than otherwise.
  337. # (For example, we can raise VcsHashUnsupported for a VCS URL
  338. # rather than HashMissing.)
  339. if not self.require_hashes:
  340. return req.hashes(trust_internet=True)
  341. # We could check these first 2 conditions inside unpack_url
  342. # and save repetition of conditions, but then we would
  343. # report less-useful error messages for unhashable
  344. # requirements, complaining that there's no hash provided.
  345. if req.link.is_vcs:
  346. raise VcsHashUnsupported()
  347. if req.link.is_existing_dir():
  348. raise DirectoryUrlHashUnsupported()
  349. # Unpinned packages are asking for trouble when a new version
  350. # is uploaded. This isn't a security check, but it saves users
  351. # a surprising hash mismatch in the future.
  352. # file:/// URLs aren't pinnable, so don't complain about them
  353. # not being pinned.
  354. if req.original_link is None and not req.is_pinned:
  355. raise HashUnpinned()
  356. # If known-good hashes are missing for this requirement,
  357. # shim it with a facade object that will provoke hash
  358. # computation and then raise a HashMissing exception
  359. # showing the user what the hash should be.
  360. return req.hashes(trust_internet=False) or MissingHashes()
  361. def _fetch_metadata_using_lazy_wheel(self, link):
  362. # type: (Link) -> Optional[Distribution]
  363. """Fetch metadata using lazy wheel, if possible."""
  364. if not self.use_lazy_wheel:
  365. return None
  366. if self.require_hashes:
  367. logger.debug('Lazy wheel is not used as hash checking is required')
  368. return None
  369. if link.is_file or not link.is_wheel:
  370. logger.debug(
  371. 'Lazy wheel is not used as '
  372. '%r does not points to a remote wheel',
  373. link,
  374. )
  375. return None
  376. wheel = Wheel(link.filename)
  377. name = canonicalize_name(wheel.name)
  378. logger.info(
  379. 'Obtaining dependency information from %s %s',
  380. name, wheel.version,
  381. )
  382. url = link.url.split('#', 1)[0]
  383. try:
  384. return dist_from_wheel_url(name, url, self._session)
  385. except HTTPRangeRequestUnsupported:
  386. logger.debug('%s does not support range requests', url)
  387. return None
  388. def prepare_linked_requirement(self, req, parallel_builds=False):
  389. # type: (InstallRequirement, bool) -> Distribution
  390. """Prepare a requirement to be obtained from req.link."""
  391. assert req.link
  392. link = req.link
  393. self._log_preparing_link(req)
  394. with indent_log():
  395. # Check if the relevant file is already available
  396. # in the download directory
  397. file_path = None
  398. if self.download_dir is not None and link.is_wheel:
  399. hashes = self._get_linked_req_hashes(req)
  400. file_path = _check_download_dir(req.link, self.download_dir, hashes)
  401. if file_path is not None:
  402. # The file is already available, so mark it as downloaded
  403. self._downloaded[req.link.url] = file_path, None
  404. else:
  405. # The file is not available, attempt to fetch only metadata
  406. wheel_dist = self._fetch_metadata_using_lazy_wheel(link)
  407. if wheel_dist is not None:
  408. req.needs_more_preparation = True
  409. return wheel_dist
  410. # None of the optimizations worked, fully prepare the requirement
  411. return self._prepare_linked_requirement(req, parallel_builds)
  412. def prepare_linked_requirements_more(self, reqs, parallel_builds=False):
  413. # type: (Iterable[InstallRequirement], bool) -> None
  414. """Prepare a linked requirement more, if needed."""
  415. reqs = [req for req in reqs if req.needs_more_preparation]
  416. links = [req.link for req in reqs]
  417. # Let's download to a temporary directory.
  418. tmpdir = TempDirectory(kind="unpack", globally_managed=True).path
  419. self._downloaded.update(self._batch_download(links, tmpdir))
  420. for req in reqs:
  421. self._prepare_linked_requirement(req, parallel_builds)
  422. def _prepare_linked_requirement(self, req, parallel_builds):
  423. # type: (InstallRequirement, bool) -> Distribution
  424. assert req.link
  425. link = req.link
  426. self._ensure_link_req_src_dir(req, parallel_builds)
  427. hashes = self._get_linked_req_hashes(req)
  428. if link.url not in self._downloaded:
  429. try:
  430. local_file = unpack_url(
  431. link, req.source_dir, self._download,
  432. self.download_dir, hashes,
  433. )
  434. except NetworkConnectionError as exc:
  435. raise InstallationError(
  436. 'Could not install requirement {} because of HTTP '
  437. 'error {} for URL {}'.format(req, exc, link)
  438. )
  439. else:
  440. file_path, content_type = self._downloaded[link.url]
  441. if hashes:
  442. hashes.check_against_path(file_path)
  443. local_file = File(file_path, content_type)
  444. # For use in later processing,
  445. # preserve the file path on the requirement.
  446. if local_file:
  447. req.local_file_path = local_file.path
  448. dist = _get_prepared_distribution(
  449. req, self.req_tracker, self.finder, self.build_isolation,
  450. )
  451. return dist
  452. def save_linked_requirement(self, req):
  453. # type: (InstallRequirement) -> None
  454. assert self.download_dir is not None
  455. assert req.link is not None
  456. link = req.link
  457. if link.is_vcs or (link.is_existing_dir() and req.editable):
  458. # Make a .zip of the source_dir we already created.
  459. req.archive(self.download_dir)
  460. return
  461. if link.is_existing_dir():
  462. logger.debug(
  463. 'Not copying link to destination directory '
  464. 'since it is a directory: %s', link,
  465. )
  466. return
  467. if req.local_file_path is None:
  468. # No distribution was downloaded for this requirement.
  469. return
  470. download_location = os.path.join(self.download_dir, link.filename)
  471. if not os.path.exists(download_location):
  472. shutil.copy(req.local_file_path, download_location)
  473. download_path = display_path(download_location)
  474. logger.info('Saved %s', download_path)
  475. def prepare_editable_requirement(
  476. self,
  477. req, # type: InstallRequirement
  478. ):
  479. # type: (...) -> Distribution
  480. """Prepare an editable requirement
  481. """
  482. assert req.editable, "cannot prepare a non-editable req as editable"
  483. logger.info('Obtaining %s', req)
  484. with indent_log():
  485. if self.require_hashes:
  486. raise InstallationError(
  487. 'The editable requirement {} cannot be installed when '
  488. 'requiring hashes, because there is no single file to '
  489. 'hash.'.format(req)
  490. )
  491. req.ensure_has_source_dir(self.src_dir)
  492. req.update_editable(self.download_dir is None)
  493. dist = _get_prepared_distribution(
  494. req, self.req_tracker, self.finder, self.build_isolation,
  495. )
  496. req.check_if_exists(self.use_user_site)
  497. return dist
  498. def prepare_installed_requirement(
  499. self,
  500. req, # type: InstallRequirement
  501. skip_reason # type: str
  502. ):
  503. # type: (...) -> Distribution
  504. """Prepare an already-installed requirement
  505. """
  506. assert req.satisfied_by, "req should have been satisfied but isn't"
  507. assert skip_reason is not None, (
  508. "did not get skip reason skipped but req.satisfied_by "
  509. "is set to {}".format(req.satisfied_by)
  510. )
  511. logger.info(
  512. 'Requirement %s: %s (%s)',
  513. skip_reason, req, req.satisfied_by.version
  514. )
  515. with indent_log():
  516. if self.require_hashes:
  517. logger.debug(
  518. 'Since it is already installed, we are trusting this '
  519. 'package without checking its hash. To ensure a '
  520. 'completely repeatable environment, install into an '
  521. 'empty virtualenv.'
  522. )
  523. return InstalledDistribution(req).get_pkg_resources_distribution()