session.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. """PipSession and supporting code, containing all pip-specific
  2. network request configuration and behavior.
  3. """
  4. # The following comment should be removed at some point in the future.
  5. # mypy: disallow-untyped-defs=False
  6. import email.utils
  7. import json
  8. import logging
  9. import mimetypes
  10. import os
  11. import platform
  12. import sys
  13. import warnings
  14. from pip._vendor import requests, six, urllib3
  15. from pip._vendor.cachecontrol import CacheControlAdapter
  16. from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter
  17. from pip._vendor.requests.models import Response
  18. from pip._vendor.requests.structures import CaseInsensitiveDict
  19. from pip._vendor.six.moves.urllib import parse as urllib_parse
  20. from pip._vendor.urllib3.exceptions import InsecureRequestWarning
  21. from pip import __version__
  22. from pip._internal.network.auth import MultiDomainBasicAuth
  23. from pip._internal.network.cache import SafeFileCache
  24. # Import ssl from compat so the initial import occurs in only one place.
  25. from pip._internal.utils.compat import has_tls, ipaddress
  26. from pip._internal.utils.glibc import libc_ver
  27. from pip._internal.utils.misc import (
  28. build_url_from_netloc,
  29. get_installed_version,
  30. parse_netloc,
  31. )
  32. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  33. from pip._internal.utils.urls import url_to_path
  34. if MYPY_CHECK_RUNNING:
  35. from typing import Iterator, List, Optional, Tuple, Union
  36. from pip._internal.models.link import Link
  37. SecureOrigin = Tuple[str, str, Optional[Union[int, str]]]
  38. logger = logging.getLogger(__name__)
  39. # Ignore warning raised when using --trusted-host.
  40. warnings.filterwarnings("ignore", category=InsecureRequestWarning)
  41. SECURE_ORIGINS = [
  42. # protocol, hostname, port
  43. # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC)
  44. ("https", "*", "*"),
  45. ("*", "localhost", "*"),
  46. ("*", "127.0.0.0/8", "*"),
  47. ("*", "::1/128", "*"),
  48. ("file", "*", None),
  49. # ssh is always secure.
  50. ("ssh", "*", "*"),
  51. ] # type: List[SecureOrigin]
  52. # These are environment variables present when running under various
  53. # CI systems. For each variable, some CI systems that use the variable
  54. # are indicated. The collection was chosen so that for each of a number
  55. # of popular systems, at least one of the environment variables is used.
  56. # This list is used to provide some indication of and lower bound for
  57. # CI traffic to PyPI. Thus, it is okay if the list is not comprehensive.
  58. # For more background, see: https://github.com/pypa/pip/issues/5499
  59. CI_ENVIRONMENT_VARIABLES = (
  60. # Azure Pipelines
  61. 'BUILD_BUILDID',
  62. # Jenkins
  63. 'BUILD_ID',
  64. # AppVeyor, CircleCI, Codeship, Gitlab CI, Shippable, Travis CI
  65. 'CI',
  66. # Explicit environment variable.
  67. 'PIP_IS_CI',
  68. )
  69. def looks_like_ci():
  70. # type: () -> bool
  71. """
  72. Return whether it looks like pip is running under CI.
  73. """
  74. # We don't use the method of checking for a tty (e.g. using isatty())
  75. # because some CI systems mimic a tty (e.g. Travis CI). Thus that
  76. # method doesn't provide definitive information in either direction.
  77. return any(name in os.environ for name in CI_ENVIRONMENT_VARIABLES)
  78. def user_agent():
  79. """
  80. Return a string representing the user agent.
  81. """
  82. data = {
  83. "installer": {"name": "pip", "version": __version__},
  84. "python": platform.python_version(),
  85. "implementation": {
  86. "name": platform.python_implementation(),
  87. },
  88. }
  89. if data["implementation"]["name"] == 'CPython':
  90. data["implementation"]["version"] = platform.python_version()
  91. elif data["implementation"]["name"] == 'PyPy':
  92. if sys.pypy_version_info.releaselevel == 'final':
  93. pypy_version_info = sys.pypy_version_info[:3]
  94. else:
  95. pypy_version_info = sys.pypy_version_info
  96. data["implementation"]["version"] = ".".join(
  97. [str(x) for x in pypy_version_info]
  98. )
  99. elif data["implementation"]["name"] == 'Jython':
  100. # Complete Guess
  101. data["implementation"]["version"] = platform.python_version()
  102. elif data["implementation"]["name"] == 'IronPython':
  103. # Complete Guess
  104. data["implementation"]["version"] = platform.python_version()
  105. if sys.platform.startswith("linux"):
  106. from pip._vendor import distro
  107. distro_infos = dict(filter(
  108. lambda x: x[1],
  109. zip(["name", "version", "id"], distro.linux_distribution()),
  110. ))
  111. libc = dict(filter(
  112. lambda x: x[1],
  113. zip(["lib", "version"], libc_ver()),
  114. ))
  115. if libc:
  116. distro_infos["libc"] = libc
  117. if distro_infos:
  118. data["distro"] = distro_infos
  119. if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
  120. data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
  121. if platform.system():
  122. data.setdefault("system", {})["name"] = platform.system()
  123. if platform.release():
  124. data.setdefault("system", {})["release"] = platform.release()
  125. if platform.machine():
  126. data["cpu"] = platform.machine()
  127. if has_tls():
  128. import _ssl as ssl
  129. data["openssl_version"] = ssl.OPENSSL_VERSION
  130. setuptools_version = get_installed_version("setuptools")
  131. if setuptools_version is not None:
  132. data["setuptools_version"] = setuptools_version
  133. # Use None rather than False so as not to give the impression that
  134. # pip knows it is not being run under CI. Rather, it is a null or
  135. # inconclusive result. Also, we include some value rather than no
  136. # value to make it easier to know that the check has been run.
  137. data["ci"] = True if looks_like_ci() else None
  138. user_data = os.environ.get("PIP_USER_AGENT_USER_DATA")
  139. if user_data is not None:
  140. data["user_data"] = user_data
  141. return "{data[installer][name]}/{data[installer][version]} {json}".format(
  142. data=data,
  143. json=json.dumps(data, separators=(",", ":"), sort_keys=True),
  144. )
  145. class LocalFSAdapter(BaseAdapter):
  146. def send(self, request, stream=None, timeout=None, verify=None, cert=None,
  147. proxies=None):
  148. pathname = url_to_path(request.url)
  149. resp = Response()
  150. resp.status_code = 200
  151. resp.url = request.url
  152. try:
  153. stats = os.stat(pathname)
  154. except OSError as exc:
  155. resp.status_code = 404
  156. resp.raw = exc
  157. else:
  158. modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
  159. content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
  160. resp.headers = CaseInsensitiveDict({
  161. "Content-Type": content_type,
  162. "Content-Length": stats.st_size,
  163. "Last-Modified": modified,
  164. })
  165. resp.raw = open(pathname, "rb")
  166. resp.close = resp.raw.close
  167. return resp
  168. def close(self):
  169. pass
  170. class InsecureHTTPAdapter(HTTPAdapter):
  171. def cert_verify(self, conn, url, verify, cert):
  172. super(InsecureHTTPAdapter, self).cert_verify(
  173. conn=conn, url=url, verify=False, cert=cert
  174. )
  175. class InsecureCacheControlAdapter(CacheControlAdapter):
  176. def cert_verify(self, conn, url, verify, cert):
  177. super(InsecureCacheControlAdapter, self).cert_verify(
  178. conn=conn, url=url, verify=False, cert=cert
  179. )
  180. class PipSession(requests.Session):
  181. timeout = None # type: Optional[int]
  182. def __init__(self, *args, **kwargs):
  183. """
  184. :param trusted_hosts: Domains not to emit warnings for when not using
  185. HTTPS.
  186. """
  187. retries = kwargs.pop("retries", 0)
  188. cache = kwargs.pop("cache", None)
  189. trusted_hosts = kwargs.pop("trusted_hosts", []) # type: List[str]
  190. index_urls = kwargs.pop("index_urls", None)
  191. super(PipSession, self).__init__(*args, **kwargs)
  192. # Namespace the attribute with "pip_" just in case to prevent
  193. # possible conflicts with the base class.
  194. self.pip_trusted_origins = [] # type: List[Tuple[str, Optional[int]]]
  195. # Attach our User Agent to the request
  196. self.headers["User-Agent"] = user_agent()
  197. # Attach our Authentication handler to the session
  198. self.auth = MultiDomainBasicAuth(index_urls=index_urls)
  199. # Create our urllib3.Retry instance which will allow us to customize
  200. # how we handle retries.
  201. retries = urllib3.Retry(
  202. # Set the total number of retries that a particular request can
  203. # have.
  204. total=retries,
  205. # A 503 error from PyPI typically means that the Fastly -> Origin
  206. # connection got interrupted in some way. A 503 error in general
  207. # is typically considered a transient error so we'll go ahead and
  208. # retry it.
  209. # A 500 may indicate transient error in Amazon S3
  210. # A 520 or 527 - may indicate transient error in CloudFlare
  211. status_forcelist=[500, 503, 520, 527],
  212. # Add a small amount of back off between failed requests in
  213. # order to prevent hammering the service.
  214. backoff_factor=0.25,
  215. )
  216. # Our Insecure HTTPAdapter disables HTTPS validation. It does not
  217. # support caching so we'll use it for all http:// URLs.
  218. # If caching is disabled, we will also use it for
  219. # https:// hosts that we've marked as ignoring
  220. # TLS errors for (trusted-hosts).
  221. insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
  222. # We want to _only_ cache responses on securely fetched origins or when
  223. # the host is specified as trusted. We do this because
  224. # we can't validate the response of an insecurely/untrusted fetched
  225. # origin, and we don't want someone to be able to poison the cache and
  226. # require manual eviction from the cache to fix it.
  227. if cache:
  228. secure_adapter = CacheControlAdapter(
  229. cache=SafeFileCache(cache),
  230. max_retries=retries,
  231. )
  232. self._trusted_host_adapter = InsecureCacheControlAdapter(
  233. cache=SafeFileCache(cache),
  234. max_retries=retries,
  235. )
  236. else:
  237. secure_adapter = HTTPAdapter(max_retries=retries)
  238. self._trusted_host_adapter = insecure_adapter
  239. self.mount("https://", secure_adapter)
  240. self.mount("http://", insecure_adapter)
  241. # Enable file:// urls
  242. self.mount("file://", LocalFSAdapter())
  243. for host in trusted_hosts:
  244. self.add_trusted_host(host, suppress_logging=True)
  245. def update_index_urls(self, new_index_urls):
  246. # type: (List[str]) -> None
  247. """
  248. :param new_index_urls: New index urls to update the authentication
  249. handler with.
  250. """
  251. self.auth.index_urls = new_index_urls
  252. def add_trusted_host(self, host, source=None, suppress_logging=False):
  253. # type: (str, Optional[str], bool) -> None
  254. """
  255. :param host: It is okay to provide a host that has previously been
  256. added.
  257. :param source: An optional source string, for logging where the host
  258. string came from.
  259. """
  260. if not suppress_logging:
  261. msg = 'adding trusted host: {!r}'.format(host)
  262. if source is not None:
  263. msg += ' (from {})'.format(source)
  264. logger.info(msg)
  265. host_port = parse_netloc(host)
  266. if host_port not in self.pip_trusted_origins:
  267. self.pip_trusted_origins.append(host_port)
  268. self.mount(
  269. build_url_from_netloc(host) + '/',
  270. self._trusted_host_adapter
  271. )
  272. if not host_port[1]:
  273. # Mount wildcard ports for the same host.
  274. self.mount(
  275. build_url_from_netloc(host) + ':',
  276. self._trusted_host_adapter
  277. )
  278. def iter_secure_origins(self):
  279. # type: () -> Iterator[SecureOrigin]
  280. for secure_origin in SECURE_ORIGINS:
  281. yield secure_origin
  282. for host, port in self.pip_trusted_origins:
  283. yield ('*', host, '*' if port is None else port)
  284. def is_secure_origin(self, location):
  285. # type: (Link) -> bool
  286. # Determine if this url used a secure transport mechanism
  287. parsed = urllib_parse.urlparse(str(location))
  288. origin_protocol, origin_host, origin_port = (
  289. parsed.scheme, parsed.hostname, parsed.port,
  290. )
  291. # The protocol to use to see if the protocol matches.
  292. # Don't count the repository type as part of the protocol: in
  293. # cases such as "git+ssh", only use "ssh". (I.e., Only verify against
  294. # the last scheme.)
  295. origin_protocol = origin_protocol.rsplit('+', 1)[-1]
  296. # Determine if our origin is a secure origin by looking through our
  297. # hardcoded list of secure origins, as well as any additional ones
  298. # configured on this PackageFinder instance.
  299. for secure_origin in self.iter_secure_origins():
  300. secure_protocol, secure_host, secure_port = secure_origin
  301. if origin_protocol != secure_protocol and secure_protocol != "*":
  302. continue
  303. try:
  304. addr = ipaddress.ip_address(
  305. None
  306. if origin_host is None
  307. else six.ensure_text(origin_host)
  308. )
  309. network = ipaddress.ip_network(
  310. six.ensure_text(secure_host)
  311. )
  312. except ValueError:
  313. # We don't have both a valid address or a valid network, so
  314. # we'll check this origin against hostnames.
  315. if (
  316. origin_host and
  317. origin_host.lower() != secure_host.lower() and
  318. secure_host != "*"
  319. ):
  320. continue
  321. else:
  322. # We have a valid address and network, so see if the address
  323. # is contained within the network.
  324. if addr not in network:
  325. continue
  326. # Check to see if the port matches.
  327. if (
  328. origin_port != secure_port and
  329. secure_port != "*" and
  330. secure_port is not None
  331. ):
  332. continue
  333. # If we've gotten here, then this origin matches the current
  334. # secure origin and we should return True
  335. return True
  336. # If we've gotten to this point, then the origin isn't secure and we
  337. # will not accept it as a valid location to search. We will however
  338. # log a warning that we are ignoring it.
  339. logger.warning(
  340. "The repository located at %s is not a trusted or secure host and "
  341. "is being ignored. If this repository is available via HTTPS we "
  342. "recommend you use HTTPS instead, otherwise you may silence "
  343. "this warning and allow it anyway with '--trusted-host %s'.",
  344. origin_host,
  345. origin_host,
  346. )
  347. return False
  348. def request(self, method, url, *args, **kwargs):
  349. # Allow setting a default timeout on a session
  350. kwargs.setdefault("timeout", self.timeout)
  351. # Dispatch the actual request
  352. return super(PipSession, self).request(method, url, *args, **kwargs)