exporter.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. """This module defines a base Exporter class. For Jinja template-based export,
  2. see templateexporter.py.
  3. """
  4. # Copyright (c) Jupyter Development Team.
  5. # Distributed under the terms of the Modified BSD License.
  6. from __future__ import print_function, absolute_import
  7. import io
  8. import os
  9. import copy
  10. import collections
  11. import datetime
  12. import nbformat
  13. from traitlets.config.configurable import LoggingConfigurable
  14. from traitlets.config import Config
  15. from traitlets import HasTraits, Unicode, List, TraitError
  16. from traitlets.utils.importstring import import_item
  17. from ipython_genutils import text, py3compat
  18. class ResourcesDict(collections.defaultdict):
  19. def __missing__(self, key):
  20. return ''
  21. class FilenameExtension(Unicode):
  22. """A trait for filename extensions."""
  23. default_value = u''
  24. info_text = 'a filename extension, beginning with a dot'
  25. def validate(self, obj, value):
  26. # cast to proper unicode
  27. value = super(FilenameExtension, self).validate(obj, value)
  28. # check that it starts with a dot
  29. if value and not value.startswith('.'):
  30. msg = "FileExtension trait '{}' does not begin with a dot: {!r}"
  31. raise TraitError(msg.format(self.name, value))
  32. return value
  33. class Exporter(LoggingConfigurable):
  34. """
  35. Class containing methods that sequentially run a list of preprocessors on a
  36. NotebookNode object and then return the modified NotebookNode object and
  37. accompanying resources dict.
  38. """
  39. file_extension = FilenameExtension('.txt',
  40. help="Extension of the file that should be written to disk"
  41. ).tag(config=True)
  42. # MIME type of the result file, for HTTP response headers.
  43. # This is *not* a traitlet, because we want to be able to access it from
  44. # the class, not just on instances.
  45. output_mimetype = ''
  46. # Should this converter be accessible from the notebook front-end?
  47. # If so, should be a friendly name to display (and possibly translated).
  48. export_from_notebook = None
  49. #Configurability, allows the user to easily add filters and preprocessors.
  50. preprocessors = List(
  51. help="""List of preprocessors, by name or namespace, to enable."""
  52. ).tag(config=True)
  53. _preprocessors = List()
  54. default_preprocessors = List([
  55. 'nbconvert.preprocessors.TagRemovePreprocessor',
  56. 'nbconvert.preprocessors.RegexRemovePreprocessor',
  57. 'nbconvert.preprocessors.ClearOutputPreprocessor',
  58. 'nbconvert.preprocessors.ExecutePreprocessor',
  59. 'nbconvert.preprocessors.coalesce_streams',
  60. 'nbconvert.preprocessors.SVG2PDFPreprocessor',
  61. 'nbconvert.preprocessors.CSSHTMLHeaderPreprocessor',
  62. 'nbconvert.preprocessors.LatexPreprocessor',
  63. 'nbconvert.preprocessors.HighlightMagicsPreprocessor',
  64. 'nbconvert.preprocessors.ExtractOutputPreprocessor',
  65. 'nbconvert.preprocessors.ClearMetadataPreprocessor',
  66. ],
  67. help="""List of preprocessors available by default, by name, namespace,
  68. instance, or type."""
  69. ).tag(config=True)
  70. def __init__(self, config=None, **kw):
  71. """
  72. Public constructor
  73. Parameters
  74. ----------
  75. config : :class:`~traitlets.config.Config`
  76. User configuration instance.
  77. `**kw`
  78. Additional keyword arguments passed to parent __init__
  79. """
  80. with_default_config = self.default_config
  81. if config:
  82. with_default_config.merge(config)
  83. super(Exporter, self).__init__(config=with_default_config, **kw)
  84. self._init_preprocessors()
  85. @property
  86. def default_config(self):
  87. return Config()
  88. def from_notebook_node(self, nb, resources=None, **kw):
  89. """
  90. Convert a notebook from a notebook node instance.
  91. Parameters
  92. ----------
  93. nb : :class:`~nbformat.NotebookNode`
  94. Notebook node (dict-like with attr-access)
  95. resources : dict
  96. Additional resources that can be accessed read/write by
  97. preprocessors and filters.
  98. `**kw`
  99. Ignored
  100. """
  101. nb_copy = copy.deepcopy(nb)
  102. resources = self._init_resources(resources)
  103. if 'language' in nb['metadata']:
  104. resources['language'] = nb['metadata']['language'].lower()
  105. # Preprocess
  106. nb_copy, resources = self._preprocess(nb_copy, resources)
  107. return nb_copy, resources
  108. def from_filename(self, filename, resources=None, **kw):
  109. """
  110. Convert a notebook from a notebook file.
  111. Parameters
  112. ----------
  113. filename : str
  114. Full filename of the notebook file to open and convert.
  115. resources : dict
  116. Additional resources that can be accessed read/write by
  117. preprocessors and filters.
  118. `**kw`
  119. Ignored
  120. """
  121. # Convert full filename string to unicode
  122. # In python 2.7.x if filename comes as unicode string,
  123. # just skip converting it.
  124. if isinstance(filename, str):
  125. filename = py3compat.str_to_unicode(filename)
  126. # Pull the metadata from the filesystem.
  127. if resources is None:
  128. resources = ResourcesDict()
  129. if not 'metadata' in resources or resources['metadata'] == '':
  130. resources['metadata'] = ResourcesDict()
  131. path, basename = os.path.split(filename)
  132. notebook_name = os.path.splitext(basename)[0]
  133. resources['metadata']['name'] = notebook_name
  134. resources['metadata']['path'] = path
  135. modified_date = datetime.datetime.fromtimestamp(os.path.getmtime(filename))
  136. resources['metadata']['modified_date'] = modified_date.strftime(text.date_format)
  137. with io.open(filename, encoding='utf-8') as f:
  138. return self.from_file(f, resources=resources, **kw)
  139. def from_file(self, file_stream, resources=None, **kw):
  140. """
  141. Convert a notebook from a notebook file.
  142. Parameters
  143. ----------
  144. file_stream : file-like object
  145. Notebook file-like object to convert.
  146. resources : dict
  147. Additional resources that can be accessed read/write by
  148. preprocessors and filters.
  149. `**kw`
  150. Ignored
  151. """
  152. return self.from_notebook_node(nbformat.read(file_stream, as_version=4), resources=resources, **kw)
  153. def register_preprocessor(self, preprocessor, enabled=False):
  154. """
  155. Register a preprocessor.
  156. Preprocessors are classes that act upon the notebook before it is
  157. passed into the Jinja templating engine. preprocessors are also
  158. capable of passing additional information to the Jinja
  159. templating engine.
  160. Parameters
  161. ----------
  162. preprocessor : :class:`~nbconvert.preprocessors.Preprocessor`
  163. A dotted module name, a type, or an instance
  164. enabled : bool
  165. Mark the preprocessor as enabled
  166. """
  167. if preprocessor is None:
  168. raise TypeError('preprocessor must not be None')
  169. isclass = isinstance(preprocessor, type)
  170. constructed = not isclass
  171. # Handle preprocessor's registration based on it's type
  172. if constructed and isinstance(preprocessor, py3compat.string_types):
  173. # Preprocessor is a string, import the namespace and recursively call
  174. # this register_preprocessor method
  175. preprocessor_cls = import_item(preprocessor)
  176. return self.register_preprocessor(preprocessor_cls, enabled)
  177. if constructed and hasattr(preprocessor, '__call__'):
  178. # Preprocessor is a function, no need to construct it.
  179. # Register and return the preprocessor.
  180. if enabled:
  181. preprocessor.enabled = True
  182. self._preprocessors.append(preprocessor)
  183. return preprocessor
  184. elif isclass and issubclass(preprocessor, HasTraits):
  185. # Preprocessor is configurable. Make sure to pass in new default for
  186. # the enabled flag if one was specified.
  187. self.register_preprocessor(preprocessor(parent=self), enabled)
  188. elif isclass:
  189. # Preprocessor is not configurable, construct it
  190. self.register_preprocessor(preprocessor(), enabled)
  191. else:
  192. # Preprocessor is an instance of something without a __call__
  193. # attribute.
  194. raise TypeError('preprocessor must be callable or an importable constructor, got %r' % preprocessor)
  195. def _init_preprocessors(self):
  196. """
  197. Register all of the preprocessors needed for this exporter, disabled
  198. unless specified explicitly.
  199. """
  200. self._preprocessors = []
  201. # Load default preprocessors (not necessarily enabled by default).
  202. for preprocessor in self.default_preprocessors:
  203. self.register_preprocessor(preprocessor)
  204. # Load user-specified preprocessors. Enable by default.
  205. for preprocessor in self.preprocessors:
  206. self.register_preprocessor(preprocessor, enabled=True)
  207. def _init_resources(self, resources):
  208. #Make sure the resources dict is of ResourcesDict type.
  209. if resources is None:
  210. resources = ResourcesDict()
  211. if not isinstance(resources, ResourcesDict):
  212. new_resources = ResourcesDict()
  213. new_resources.update(resources)
  214. resources = new_resources
  215. #Make sure the metadata extension exists in resources
  216. if 'metadata' in resources:
  217. if not isinstance(resources['metadata'], ResourcesDict):
  218. new_metadata = ResourcesDict()
  219. new_metadata.update(resources['metadata'])
  220. resources['metadata'] = new_metadata
  221. else:
  222. resources['metadata'] = ResourcesDict()
  223. if not resources['metadata']['name']:
  224. resources['metadata']['name'] = 'Notebook'
  225. #Set the output extension
  226. resources['output_extension'] = self.file_extension
  227. return resources
  228. def _preprocess(self, nb, resources):
  229. """
  230. Preprocess the notebook before passing it into the Jinja engine.
  231. To preprocess the notebook is to successively apply all the
  232. enabled preprocessors. Output from each preprocessor is passed
  233. along to the next one.
  234. Parameters
  235. ----------
  236. nb : notebook node
  237. notebook that is being exported.
  238. resources : a dict of additional resources that
  239. can be accessed read/write by preprocessors
  240. """
  241. # Do a copy.deepcopy first,
  242. # we are never safe enough with what the preprocessors could do.
  243. nbc = copy.deepcopy(nb)
  244. resc = copy.deepcopy(resources)
  245. #Run each preprocessor on the notebook. Carry the output along
  246. #to each preprocessor
  247. for preprocessor in self._preprocessors:
  248. nbc, resc = preprocessor(nbc, resc)
  249. try:
  250. nbformat.validate(nbc, relax_add_props=True)
  251. except nbformat.ValidationError:
  252. self.log.error('Notebook is invalid after preprocessor {}',
  253. preprocessor)
  254. raise
  255. return nbc, resc