tools.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. """Set of common tools to aid bundler implementations."""
  2. # Copyright (c) Jupyter Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. import os
  5. import shutil
  6. import errno
  7. import nbformat
  8. import fnmatch
  9. import glob
  10. def get_file_references(abs_nb_path, version):
  11. """Gets a list of files referenced either in Markdown fenced code blocks
  12. or in HTML comments from the notebook. Expands patterns expressed in
  13. gitignore syntax (https://git-scm.com/docs/gitignore). Returns the
  14. fully expanded list of filenames relative to the notebook dirname.
  15. Parameters
  16. ----------
  17. abs_nb_path: str
  18. Absolute path of the notebook on disk
  19. version: int
  20. Version of the notebook document format to use
  21. Returns
  22. -------
  23. list
  24. Filename strings relative to the notebook path
  25. """
  26. ref_patterns = get_reference_patterns(abs_nb_path, version)
  27. expanded = expand_references(os.path.dirname(abs_nb_path), ref_patterns)
  28. return expanded
  29. def get_reference_patterns(abs_nb_path, version):
  30. """Gets a list of reference patterns either in Markdown fenced code blocks
  31. or in HTML comments from the notebook.
  32. Parameters
  33. ----------
  34. abs_nb_path: str
  35. Absolute path of the notebook on disk
  36. version: int
  37. Version of the notebook document format to use
  38. Returns
  39. -------
  40. list
  41. Pattern strings from the notebook
  42. """
  43. notebook = nbformat.read(abs_nb_path, version)
  44. referenced_list = []
  45. for cell in notebook.cells:
  46. references = get_cell_reference_patterns(cell)
  47. if references:
  48. referenced_list = referenced_list + references
  49. return referenced_list
  50. def get_cell_reference_patterns(cell):
  51. '''
  52. Retrieves the list of references from a single notebook cell. Looks for
  53. fenced code blocks or HTML comments in Markdown cells, e.g.,
  54. ```
  55. some.csv
  56. foo/
  57. !foo/bar
  58. ```
  59. or
  60. <!--associate:
  61. some.csv
  62. foo/
  63. !foo/bar
  64. -->
  65. Parameters
  66. ----------
  67. cell: dict
  68. Notebook cell object
  69. Returns
  70. -------
  71. list
  72. Reference patterns found in the cell
  73. '''
  74. referenced = []
  75. # invisible after execution: unrendered HTML comment
  76. if cell.get('cell_type').startswith('markdown') and cell.get('source').startswith('<!--associate:'):
  77. lines = cell.get('source')[len('<!--associate:'):].splitlines()
  78. for line in lines:
  79. if line.startswith('-->'):
  80. break
  81. # Trying to go out of the current directory leads to
  82. # trouble when deploying
  83. if line.find('../') < 0 and not line.startswith('#'):
  84. referenced.append(line)
  85. # visible after execution: rendered as a code element within a pre element
  86. elif cell.get('cell_type').startswith('markdown') and cell.get('source').find('```') >= 0:
  87. source = cell.get('source')
  88. offset = source.find('```')
  89. lines = source[offset + len('```'):].splitlines()
  90. for line in lines:
  91. if line.startswith('```'):
  92. break
  93. # Trying to go out of the current directory leads to
  94. # trouble when deploying
  95. if line.find('../') < 0 and not line.startswith('#'):
  96. referenced.append(line)
  97. # Clean out blank references
  98. return [ref for ref in referenced if ref.strip()]
  99. def expand_references(root_path, references):
  100. """Expands a set of reference patterns by evaluating them against the
  101. given root directory. Expansions are performed against patterns
  102. expressed in the same manner as in gitignore
  103. (https://git-scm.com/docs/gitignore).
  104. NOTE: Temporarily changes the current working directory when called.
  105. Parameters
  106. ----------
  107. root_path: str
  108. Assumed root directory for the patterns
  109. references: list
  110. Reference patterns from get_reference_patterns expressed with
  111. forward-slash directory separators
  112. Returns
  113. -------
  114. list
  115. Filename strings relative to the root path
  116. """
  117. # Use normpath to convert to platform specific slashes, but be sure
  118. # to retain a trailing slash which normpath pulls off
  119. normalized_references = []
  120. for ref in references:
  121. normalized_ref = os.path.normpath(ref)
  122. # un-normalized separator
  123. if ref.endswith('/'):
  124. normalized_ref += os.sep
  125. normalized_references.append(normalized_ref)
  126. references = normalized_references
  127. globbed = []
  128. negations = []
  129. must_walk = []
  130. for pattern in references:
  131. if pattern and pattern.find(os.sep) < 0:
  132. # simple shell glob
  133. cwd = os.getcwd()
  134. os.chdir(root_path)
  135. if pattern.startswith('!'):
  136. negations = negations + glob.glob(pattern[1:])
  137. else:
  138. globbed = globbed + glob.glob(pattern)
  139. os.chdir(cwd)
  140. elif pattern:
  141. must_walk.append(pattern)
  142. for pattern in must_walk:
  143. pattern_is_negation = pattern.startswith('!')
  144. if pattern_is_negation:
  145. testpattern = pattern[1:]
  146. else:
  147. testpattern = pattern
  148. for root, _, filenames in os.walk(root_path):
  149. for filename in filenames:
  150. joined = os.path.join(root[len(root_path) + 1:], filename)
  151. if testpattern.endswith(os.sep):
  152. if joined.startswith(testpattern):
  153. if pattern_is_negation:
  154. negations.append(joined)
  155. else:
  156. globbed.append(joined)
  157. elif testpattern.find('**') >= 0:
  158. # path wildcard
  159. ends = testpattern.split('**')
  160. if len(ends) == 2:
  161. if joined.startswith(ends[0]) and joined.endswith(ends[1]):
  162. if pattern_is_negation:
  163. negations.append(joined)
  164. else:
  165. globbed.append(joined)
  166. else:
  167. # segments should be respected
  168. if fnmatch.fnmatch(joined, testpattern):
  169. if pattern_is_negation:
  170. negations.append(joined)
  171. else:
  172. globbed.append(joined)
  173. for negated in negations:
  174. try:
  175. globbed.remove(negated)
  176. except ValueError as err:
  177. pass
  178. return set(globbed)
  179. def copy_filelist(src, dst, src_relative_filenames):
  180. """Copies the given list of files, relative to src, into dst, creating
  181. directories along the way as needed and ignore existence errors.
  182. Skips any files that do not exist. Does not create empty directories
  183. from src in dst.
  184. Parameters
  185. ----------
  186. src: str
  187. Root of the source directory
  188. dst: str
  189. Root of the destination directory
  190. src_relative_filenames: list
  191. Filenames relative to src
  192. """
  193. for filename in src_relative_filenames:
  194. # Only consider the file if it exists in src
  195. if os.path.isfile(os.path.join(src, filename)):
  196. parent_relative = os.path.dirname(filename)
  197. if parent_relative:
  198. # Make sure the parent directory exists
  199. parent_dst = os.path.join(dst, parent_relative)
  200. try:
  201. os.makedirs(parent_dst)
  202. except OSError as exc:
  203. if exc.errno == errno.EEXIST:
  204. pass
  205. else:
  206. raise exc
  207. shutil.copy2(os.path.join(src, filename), os.path.join(dst, filename))