req_file.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. """
  2. Requirements file parsing
  3. """
  4. from __future__ import absolute_import
  5. import optparse
  6. import os
  7. import re
  8. import shlex
  9. import sys
  10. from pip._vendor.six.moves.urllib import parse as urllib_parse
  11. from pip._internal.cli import cmdoptions
  12. from pip._internal.exceptions import InstallationError, RequirementsFileParseError
  13. from pip._internal.models.search_scope import SearchScope
  14. from pip._internal.network.utils import raise_for_status
  15. from pip._internal.utils.encoding import auto_decode
  16. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  17. from pip._internal.utils.urls import get_url_scheme, url_to_path
  18. if MYPY_CHECK_RUNNING:
  19. from optparse import Values
  20. from typing import (
  21. Any,
  22. Callable,
  23. Dict,
  24. Iterator,
  25. List,
  26. NoReturn,
  27. Optional,
  28. Text,
  29. Tuple,
  30. )
  31. from pip._internal.index.package_finder import PackageFinder
  32. from pip._internal.network.session import PipSession
  33. ReqFileLines = Iterator[Tuple[int, Text]]
  34. LineParser = Callable[[Text], Tuple[str, Values]]
  35. __all__ = ['parse_requirements']
  36. SCHEME_RE = re.compile(r'^(http|https|file):', re.I)
  37. COMMENT_RE = re.compile(r'(^|\s+)#.*$')
  38. # Matches environment variable-style values in '${MY_VARIABLE_1}' with the
  39. # variable name consisting of only uppercase letters, digits or the '_'
  40. # (underscore). This follows the POSIX standard defined in IEEE Std 1003.1,
  41. # 2013 Edition.
  42. ENV_VAR_RE = re.compile(r'(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})')
  43. SUPPORTED_OPTIONS = [
  44. cmdoptions.index_url,
  45. cmdoptions.extra_index_url,
  46. cmdoptions.no_index,
  47. cmdoptions.constraints,
  48. cmdoptions.requirements,
  49. cmdoptions.editable,
  50. cmdoptions.find_links,
  51. cmdoptions.no_binary,
  52. cmdoptions.only_binary,
  53. cmdoptions.prefer_binary,
  54. cmdoptions.require_hashes,
  55. cmdoptions.pre,
  56. cmdoptions.trusted_host,
  57. cmdoptions.use_new_feature,
  58. ] # type: List[Callable[..., optparse.Option]]
  59. # options to be passed to requirements
  60. SUPPORTED_OPTIONS_REQ = [
  61. cmdoptions.install_options,
  62. cmdoptions.global_options,
  63. cmdoptions.hash,
  64. ] # type: List[Callable[..., optparse.Option]]
  65. # the 'dest' string values
  66. SUPPORTED_OPTIONS_REQ_DEST = [str(o().dest) for o in SUPPORTED_OPTIONS_REQ]
  67. class ParsedRequirement(object):
  68. def __init__(
  69. self,
  70. requirement, # type:str
  71. is_editable, # type: bool
  72. comes_from, # type: str
  73. constraint, # type: bool
  74. options=None, # type: Optional[Dict[str, Any]]
  75. line_source=None, # type: Optional[str]
  76. ):
  77. # type: (...) -> None
  78. self.requirement = requirement
  79. self.is_editable = is_editable
  80. self.comes_from = comes_from
  81. self.options = options
  82. self.constraint = constraint
  83. self.line_source = line_source
  84. class ParsedLine(object):
  85. def __init__(
  86. self,
  87. filename, # type: str
  88. lineno, # type: int
  89. args, # type: str
  90. opts, # type: Values
  91. constraint, # type: bool
  92. ):
  93. # type: (...) -> None
  94. self.filename = filename
  95. self.lineno = lineno
  96. self.opts = opts
  97. self.constraint = constraint
  98. if args:
  99. self.is_requirement = True
  100. self.is_editable = False
  101. self.requirement = args
  102. elif opts.editables:
  103. self.is_requirement = True
  104. self.is_editable = True
  105. # We don't support multiple -e on one line
  106. self.requirement = opts.editables[0]
  107. else:
  108. self.is_requirement = False
  109. def parse_requirements(
  110. filename, # type: str
  111. session, # type: PipSession
  112. finder=None, # type: Optional[PackageFinder]
  113. options=None, # type: Optional[optparse.Values]
  114. constraint=False, # type: bool
  115. ):
  116. # type: (...) -> Iterator[ParsedRequirement]
  117. """Parse a requirements file and yield ParsedRequirement instances.
  118. :param filename: Path or url of requirements file.
  119. :param session: PipSession instance.
  120. :param finder: Instance of pip.index.PackageFinder.
  121. :param options: cli options.
  122. :param constraint: If true, parsing a constraint file rather than
  123. requirements file.
  124. """
  125. line_parser = get_line_parser(finder)
  126. parser = RequirementsFileParser(session, line_parser)
  127. for parsed_line in parser.parse(filename, constraint):
  128. parsed_req = handle_line(
  129. parsed_line,
  130. options=options,
  131. finder=finder,
  132. session=session
  133. )
  134. if parsed_req is not None:
  135. yield parsed_req
  136. def preprocess(content):
  137. # type: (Text) -> ReqFileLines
  138. """Split, filter, and join lines, and return a line iterator
  139. :param content: the content of the requirements file
  140. """
  141. lines_enum = enumerate(content.splitlines(), start=1) # type: ReqFileLines
  142. lines_enum = join_lines(lines_enum)
  143. lines_enum = ignore_comments(lines_enum)
  144. lines_enum = expand_env_variables(lines_enum)
  145. return lines_enum
  146. def handle_requirement_line(
  147. line, # type: ParsedLine
  148. options=None, # type: Optional[optparse.Values]
  149. ):
  150. # type: (...) -> ParsedRequirement
  151. # preserve for the nested code path
  152. line_comes_from = '{} {} (line {})'.format(
  153. '-c' if line.constraint else '-r', line.filename, line.lineno,
  154. )
  155. assert line.is_requirement
  156. if line.is_editable:
  157. # For editable requirements, we don't support per-requirement
  158. # options, so just return the parsed requirement.
  159. return ParsedRequirement(
  160. requirement=line.requirement,
  161. is_editable=line.is_editable,
  162. comes_from=line_comes_from,
  163. constraint=line.constraint,
  164. )
  165. else:
  166. if options:
  167. # Disable wheels if the user has specified build options
  168. cmdoptions.check_install_build_global(options, line.opts)
  169. # get the options that apply to requirements
  170. req_options = {}
  171. for dest in SUPPORTED_OPTIONS_REQ_DEST:
  172. if dest in line.opts.__dict__ and line.opts.__dict__[dest]:
  173. req_options[dest] = line.opts.__dict__[dest]
  174. line_source = 'line {} of {}'.format(line.lineno, line.filename)
  175. return ParsedRequirement(
  176. requirement=line.requirement,
  177. is_editable=line.is_editable,
  178. comes_from=line_comes_from,
  179. constraint=line.constraint,
  180. options=req_options,
  181. line_source=line_source,
  182. )
  183. def handle_option_line(
  184. opts, # type: Values
  185. filename, # type: str
  186. lineno, # type: int
  187. finder=None, # type: Optional[PackageFinder]
  188. options=None, # type: Optional[optparse.Values]
  189. session=None, # type: Optional[PipSession]
  190. ):
  191. # type: (...) -> None
  192. if options:
  193. # percolate options upward
  194. if opts.require_hashes:
  195. options.require_hashes = opts.require_hashes
  196. if opts.features_enabled:
  197. options.features_enabled.extend(
  198. f for f in opts.features_enabled
  199. if f not in options.features_enabled
  200. )
  201. # set finder options
  202. if finder:
  203. find_links = finder.find_links
  204. index_urls = finder.index_urls
  205. if opts.index_url:
  206. index_urls = [opts.index_url]
  207. if opts.no_index is True:
  208. index_urls = []
  209. if opts.extra_index_urls:
  210. index_urls.extend(opts.extra_index_urls)
  211. if opts.find_links:
  212. # FIXME: it would be nice to keep track of the source
  213. # of the find_links: support a find-links local path
  214. # relative to a requirements file.
  215. value = opts.find_links[0]
  216. req_dir = os.path.dirname(os.path.abspath(filename))
  217. relative_to_reqs_file = os.path.join(req_dir, value)
  218. if os.path.exists(relative_to_reqs_file):
  219. value = relative_to_reqs_file
  220. find_links.append(value)
  221. if session:
  222. # We need to update the auth urls in session
  223. session.update_index_urls(index_urls)
  224. search_scope = SearchScope(
  225. find_links=find_links,
  226. index_urls=index_urls,
  227. )
  228. finder.search_scope = search_scope
  229. if opts.pre:
  230. finder.set_allow_all_prereleases()
  231. if opts.prefer_binary:
  232. finder.set_prefer_binary()
  233. if session:
  234. for host in opts.trusted_hosts or []:
  235. source = 'line {} of {}'.format(lineno, filename)
  236. session.add_trusted_host(host, source=source)
  237. def handle_line(
  238. line, # type: ParsedLine
  239. options=None, # type: Optional[optparse.Values]
  240. finder=None, # type: Optional[PackageFinder]
  241. session=None, # type: Optional[PipSession]
  242. ):
  243. # type: (...) -> Optional[ParsedRequirement]
  244. """Handle a single parsed requirements line; This can result in
  245. creating/yielding requirements, or updating the finder.
  246. :param line: The parsed line to be processed.
  247. :param options: CLI options.
  248. :param finder: The finder - updated by non-requirement lines.
  249. :param session: The session - updated by non-requirement lines.
  250. Returns a ParsedRequirement object if the line is a requirement line,
  251. otherwise returns None.
  252. For lines that contain requirements, the only options that have an effect
  253. are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
  254. requirement. Other options from SUPPORTED_OPTIONS may be present, but are
  255. ignored.
  256. For lines that do not contain requirements, the only options that have an
  257. effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
  258. be present, but are ignored. These lines may contain multiple options
  259. (although our docs imply only one is supported), and all our parsed and
  260. affect the finder.
  261. """
  262. if line.is_requirement:
  263. parsed_req = handle_requirement_line(line, options)
  264. return parsed_req
  265. else:
  266. handle_option_line(
  267. line.opts,
  268. line.filename,
  269. line.lineno,
  270. finder,
  271. options,
  272. session,
  273. )
  274. return None
  275. class RequirementsFileParser(object):
  276. def __init__(
  277. self,
  278. session, # type: PipSession
  279. line_parser, # type: LineParser
  280. ):
  281. # type: (...) -> None
  282. self._session = session
  283. self._line_parser = line_parser
  284. def parse(self, filename, constraint):
  285. # type: (str, bool) -> Iterator[ParsedLine]
  286. """Parse a given file, yielding parsed lines.
  287. """
  288. for line in self._parse_and_recurse(filename, constraint):
  289. yield line
  290. def _parse_and_recurse(self, filename, constraint):
  291. # type: (str, bool) -> Iterator[ParsedLine]
  292. for line in self._parse_file(filename, constraint):
  293. if (
  294. not line.is_requirement and
  295. (line.opts.requirements or line.opts.constraints)
  296. ):
  297. # parse a nested requirements file
  298. if line.opts.requirements:
  299. req_path = line.opts.requirements[0]
  300. nested_constraint = False
  301. else:
  302. req_path = line.opts.constraints[0]
  303. nested_constraint = True
  304. # original file is over http
  305. if SCHEME_RE.search(filename):
  306. # do a url join so relative paths work
  307. req_path = urllib_parse.urljoin(filename, req_path)
  308. # original file and nested file are paths
  309. elif not SCHEME_RE.search(req_path):
  310. # do a join so relative paths work
  311. req_path = os.path.join(
  312. os.path.dirname(filename), req_path,
  313. )
  314. for inner_line in self._parse_and_recurse(
  315. req_path, nested_constraint,
  316. ):
  317. yield inner_line
  318. else:
  319. yield line
  320. def _parse_file(self, filename, constraint):
  321. # type: (str, bool) -> Iterator[ParsedLine]
  322. _, content = get_file_content(filename, self._session)
  323. lines_enum = preprocess(content)
  324. for line_number, line in lines_enum:
  325. try:
  326. args_str, opts = self._line_parser(line)
  327. except OptionParsingError as e:
  328. # add offending line
  329. msg = 'Invalid requirement: {}\n{}'.format(line, e.msg)
  330. raise RequirementsFileParseError(msg)
  331. yield ParsedLine(
  332. filename,
  333. line_number,
  334. args_str,
  335. opts,
  336. constraint,
  337. )
  338. def get_line_parser(finder):
  339. # type: (Optional[PackageFinder]) -> LineParser
  340. def parse_line(line):
  341. # type: (Text) -> Tuple[str, Values]
  342. # Build new parser for each line since it accumulates appendable
  343. # options.
  344. parser = build_parser()
  345. defaults = parser.get_default_values()
  346. defaults.index_url = None
  347. if finder:
  348. defaults.format_control = finder.format_control
  349. args_str, options_str = break_args_options(line)
  350. # Prior to 2.7.3, shlex cannot deal with unicode entries
  351. if sys.version_info < (2, 7, 3):
  352. # https://github.com/python/mypy/issues/1174
  353. options_str = options_str.encode('utf8') # type: ignore
  354. # https://github.com/python/mypy/issues/1174
  355. opts, _ = parser.parse_args(
  356. shlex.split(options_str), defaults) # type: ignore
  357. return args_str, opts
  358. return parse_line
  359. def break_args_options(line):
  360. # type: (Text) -> Tuple[str, Text]
  361. """Break up the line into an args and options string. We only want to shlex
  362. (and then optparse) the options, not the args. args can contain markers
  363. which are corrupted by shlex.
  364. """
  365. tokens = line.split(' ')
  366. args = []
  367. options = tokens[:]
  368. for token in tokens:
  369. if token.startswith('-') or token.startswith('--'):
  370. break
  371. else:
  372. args.append(token)
  373. options.pop(0)
  374. return ' '.join(args), ' '.join(options) # type: ignore
  375. class OptionParsingError(Exception):
  376. def __init__(self, msg):
  377. # type: (str) -> None
  378. self.msg = msg
  379. def build_parser():
  380. # type: () -> optparse.OptionParser
  381. """
  382. Return a parser for parsing requirement lines
  383. """
  384. parser = optparse.OptionParser(add_help_option=False)
  385. option_factories = SUPPORTED_OPTIONS + SUPPORTED_OPTIONS_REQ
  386. for option_factory in option_factories:
  387. option = option_factory()
  388. parser.add_option(option)
  389. # By default optparse sys.exits on parsing errors. We want to wrap
  390. # that in our own exception.
  391. def parser_exit(self, msg):
  392. # type: (Any, str) -> NoReturn
  393. raise OptionParsingError(msg)
  394. # NOTE: mypy disallows assigning to a method
  395. # https://github.com/python/mypy/issues/2427
  396. parser.exit = parser_exit # type: ignore
  397. return parser
  398. def join_lines(lines_enum):
  399. # type: (ReqFileLines) -> ReqFileLines
  400. """Joins a line ending in '\' with the previous line (except when following
  401. comments). The joined line takes on the index of the first line.
  402. """
  403. primary_line_number = None
  404. new_line = [] # type: List[Text]
  405. for line_number, line in lines_enum:
  406. if not line.endswith('\\') or COMMENT_RE.match(line):
  407. if COMMENT_RE.match(line):
  408. # this ensures comments are always matched later
  409. line = ' ' + line
  410. if new_line:
  411. new_line.append(line)
  412. assert primary_line_number is not None
  413. yield primary_line_number, ''.join(new_line)
  414. new_line = []
  415. else:
  416. yield line_number, line
  417. else:
  418. if not new_line:
  419. primary_line_number = line_number
  420. new_line.append(line.strip('\\'))
  421. # last line contains \
  422. if new_line:
  423. assert primary_line_number is not None
  424. yield primary_line_number, ''.join(new_line)
  425. # TODO: handle space after '\'.
  426. def ignore_comments(lines_enum):
  427. # type: (ReqFileLines) -> ReqFileLines
  428. """
  429. Strips comments and filter empty lines.
  430. """
  431. for line_number, line in lines_enum:
  432. line = COMMENT_RE.sub('', line)
  433. line = line.strip()
  434. if line:
  435. yield line_number, line
  436. def expand_env_variables(lines_enum):
  437. # type: (ReqFileLines) -> ReqFileLines
  438. """Replace all environment variables that can be retrieved via `os.getenv`.
  439. The only allowed format for environment variables defined in the
  440. requirement file is `${MY_VARIABLE_1}` to ensure two things:
  441. 1. Strings that contain a `$` aren't accidentally (partially) expanded.
  442. 2. Ensure consistency across platforms for requirement files.
  443. These points are the result of a discussion on the `github pull
  444. request #3514 <https://github.com/pypa/pip/pull/3514>`_.
  445. Valid characters in variable names follow the `POSIX standard
  446. <http://pubs.opengroup.org/onlinepubs/9699919799/>`_ and are limited
  447. to uppercase letter, digits and the `_` (underscore).
  448. """
  449. for line_number, line in lines_enum:
  450. for env_var, var_name in ENV_VAR_RE.findall(line):
  451. value = os.getenv(var_name)
  452. if not value:
  453. continue
  454. line = line.replace(env_var, value)
  455. yield line_number, line
  456. def get_file_content(url, session):
  457. # type: (str, PipSession) -> Tuple[str, Text]
  458. """Gets the content of a file; it may be a filename, file: URL, or
  459. http: URL. Returns (location, content). Content is unicode.
  460. Respects # -*- coding: declarations on the retrieved files.
  461. :param url: File path or url.
  462. :param session: PipSession instance.
  463. """
  464. scheme = get_url_scheme(url)
  465. if scheme in ['http', 'https']:
  466. # FIXME: catch some errors
  467. resp = session.get(url)
  468. raise_for_status(resp)
  469. return resp.url, resp.text
  470. elif scheme == 'file':
  471. url = url_to_path(url)
  472. try:
  473. with open(url, 'rb') as f:
  474. content = auto_decode(f.read())
  475. except IOError as exc:
  476. raise InstallationError(
  477. 'Could not open requirements file: {}'.format(exc)
  478. )
  479. return url, content