123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230 |
- """Set of common tools to aid bundler implementations."""
- # Copyright (c) Jupyter Development Team.
- # Distributed under the terms of the Modified BSD License.
- import os
- import shutil
- import errno
- import nbformat
- import fnmatch
- import glob
- def get_file_references(abs_nb_path, version):
- """Gets a list of files referenced either in Markdown fenced code blocks
- or in HTML comments from the notebook. Expands patterns expressed in
- gitignore syntax (https://git-scm.com/docs/gitignore). Returns the
- fully expanded list of filenames relative to the notebook dirname.
- Parameters
- ----------
- abs_nb_path: str
- Absolute path of the notebook on disk
- version: int
- Version of the notebook document format to use
- Returns
- -------
- list
- Filename strings relative to the notebook path
- """
- ref_patterns = get_reference_patterns(abs_nb_path, version)
- expanded = expand_references(os.path.dirname(abs_nb_path), ref_patterns)
- return expanded
- def get_reference_patterns(abs_nb_path, version):
- """Gets a list of reference patterns either in Markdown fenced code blocks
- or in HTML comments from the notebook.
- Parameters
- ----------
- abs_nb_path: str
- Absolute path of the notebook on disk
- version: int
- Version of the notebook document format to use
- Returns
- -------
- list
- Pattern strings from the notebook
- """
- notebook = nbformat.read(abs_nb_path, version)
- referenced_list = []
- for cell in notebook.cells:
- references = get_cell_reference_patterns(cell)
- if references:
- referenced_list = referenced_list + references
- return referenced_list
- def get_cell_reference_patterns(cell):
- '''
- Retrieves the list of references from a single notebook cell. Looks for
- fenced code blocks or HTML comments in Markdown cells, e.g.,
- ```
- some.csv
- foo/
- !foo/bar
- ```
- or
- <!--associate:
- some.csv
- foo/
- !foo/bar
- -->
- Parameters
- ----------
- cell: dict
- Notebook cell object
- Returns
- -------
- list
- Reference patterns found in the cell
- '''
- referenced = []
- # invisible after execution: unrendered HTML comment
- if cell.get('cell_type').startswith('markdown') and cell.get('source').startswith('<!--associate:'):
- lines = cell.get('source')[len('<!--associate:'):].splitlines()
- for line in lines:
- if line.startswith('-->'):
- break
- # Trying to go out of the current directory leads to
- # trouble when deploying
- if line.find('../') < 0 and not line.startswith('#'):
- referenced.append(line)
- # visible after execution: rendered as a code element within a pre element
- elif cell.get('cell_type').startswith('markdown') and cell.get('source').find('```') >= 0:
- source = cell.get('source')
- offset = source.find('```')
- lines = source[offset + len('```'):].splitlines()
- for line in lines:
- if line.startswith('```'):
- break
- # Trying to go out of the current directory leads to
- # trouble when deploying
- if line.find('../') < 0 and not line.startswith('#'):
- referenced.append(line)
- # Clean out blank references
- return [ref for ref in referenced if ref.strip()]
- def expand_references(root_path, references):
- """Expands a set of reference patterns by evaluating them against the
- given root directory. Expansions are performed against patterns
- expressed in the same manner as in gitignore
- (https://git-scm.com/docs/gitignore).
- NOTE: Temporarily changes the current working directory when called.
- Parameters
- ----------
- root_path: str
- Assumed root directory for the patterns
- references: list
- Reference patterns from get_reference_patterns expressed with
- forward-slash directory separators
- Returns
- -------
- list
- Filename strings relative to the root path
- """
- # Use normpath to convert to platform specific slashes, but be sure
- # to retain a trailing slash which normpath pulls off
- normalized_references = []
- for ref in references:
- normalized_ref = os.path.normpath(ref)
- # un-normalized separator
- if ref.endswith('/'):
- normalized_ref += os.sep
- normalized_references.append(normalized_ref)
- references = normalized_references
- globbed = []
- negations = []
- must_walk = []
- for pattern in references:
- if pattern and pattern.find(os.sep) < 0:
- # simple shell glob
- cwd = os.getcwd()
- os.chdir(root_path)
- if pattern.startswith('!'):
- negations = negations + glob.glob(pattern[1:])
- else:
- globbed = globbed + glob.glob(pattern)
- os.chdir(cwd)
- elif pattern:
- must_walk.append(pattern)
- for pattern in must_walk:
- pattern_is_negation = pattern.startswith('!')
- if pattern_is_negation:
- testpattern = pattern[1:]
- else:
- testpattern = pattern
- for root, _, filenames in os.walk(root_path):
- for filename in filenames:
- joined = os.path.join(root[len(root_path) + 1:], filename)
- if testpattern.endswith(os.sep):
- if joined.startswith(testpattern):
- if pattern_is_negation:
- negations.append(joined)
- else:
- globbed.append(joined)
- elif testpattern.find('**') >= 0:
- # path wildcard
- ends = testpattern.split('**')
- if len(ends) == 2:
- if joined.startswith(ends[0]) and joined.endswith(ends[1]):
- if pattern_is_negation:
- negations.append(joined)
- else:
- globbed.append(joined)
- else:
- # segments should be respected
- if fnmatch.fnmatch(joined, testpattern):
- if pattern_is_negation:
- negations.append(joined)
- else:
- globbed.append(joined)
- for negated in negations:
- try:
- globbed.remove(negated)
- except ValueError as err:
- pass
- return set(globbed)
- def copy_filelist(src, dst, src_relative_filenames):
- """Copies the given list of files, relative to src, into dst, creating
- directories along the way as needed and ignore existence errors.
- Skips any files that do not exist. Does not create empty directories
- from src in dst.
- Parameters
- ----------
- src: str
- Root of the source directory
- dst: str
- Root of the destination directory
- src_relative_filenames: list
- Filenames relative to src
- """
- for filename in src_relative_filenames:
- # Only consider the file if it exists in src
- if os.path.isfile(os.path.join(src, filename)):
- parent_relative = os.path.dirname(filename)
- if parent_relative:
- # Make sure the parent directory exists
- parent_dst = os.path.join(dst, parent_relative)
- try:
- os.makedirs(parent_dst)
- except OSError as exc:
- if exc.errno == errno.EEXIST:
- pass
- else:
- raise exc
- shutil.copy2(os.path.join(src, filename), os.path.join(dst, filename))
|