convert.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. """Code for converting notebooks to and from v3."""
  2. # Copyright (c) IPython Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. import json
  5. import re
  6. from .nbbase import (
  7. nbformat, nbformat_minor,
  8. NotebookNode,
  9. )
  10. from nbformat import v3
  11. from traitlets.log import get_logger
  12. def _warn_if_invalid(nb, version):
  13. """Log validation errors, if there are any."""
  14. from nbformat import validate, ValidationError
  15. try:
  16. validate(nb, version=version)
  17. except ValidationError as e:
  18. get_logger().error("Notebook JSON is not valid v%i: %s", version, e)
  19. def upgrade(nb, from_version=3, from_minor=0):
  20. """Convert a notebook to v4.
  21. Parameters
  22. ----------
  23. nb : NotebookNode
  24. The Python representation of the notebook to convert.
  25. from_version : int
  26. The original version of the notebook to convert.
  27. from_minor : int
  28. The original minor version of the notebook to convert (only relevant for v >= 3).
  29. """
  30. if from_version == 3:
  31. # Validate the notebook before conversion
  32. _warn_if_invalid(nb, from_version)
  33. # Mark the original nbformat so consumers know it has been converted
  34. orig_nbformat = nb.pop('orig_nbformat', None)
  35. orig_nbformat_minor = nb.pop('orig_nbformat_minor', None)
  36. nb.metadata.orig_nbformat = orig_nbformat or 3
  37. nb.metadata.orig_nbformat_minor = orig_nbformat_minor or 0
  38. # Mark the new format
  39. nb.nbformat = nbformat
  40. nb.nbformat_minor = nbformat_minor
  41. # remove worksheet(s)
  42. nb['cells'] = cells = []
  43. # In the unlikely event of multiple worksheets,
  44. # they will be flattened
  45. for ws in nb.pop('worksheets', []):
  46. # upgrade each cell
  47. for cell in ws['cells']:
  48. cells.append(upgrade_cell(cell))
  49. # upgrade metadata
  50. nb.metadata.pop('name', '')
  51. nb.metadata.pop('signature', '')
  52. # Validate the converted notebook before returning it
  53. _warn_if_invalid(nb, nbformat)
  54. return nb
  55. elif from_version == 4:
  56. # nothing to do
  57. if from_minor != nbformat_minor:
  58. nb.metadata.orig_nbformat_minor = from_minor
  59. nb.nbformat_minor = nbformat_minor
  60. return nb
  61. else:
  62. raise ValueError('Cannot convert a notebook directly from v%s to v4. ' \
  63. 'Try using the nbformat.convert module.' % from_version)
  64. def upgrade_cell(cell):
  65. """upgrade a cell from v3 to v4
  66. heading cell:
  67. - -> markdown heading
  68. code cell:
  69. - remove language metadata
  70. - cell.input -> cell.source
  71. - cell.prompt_number -> cell.execution_count
  72. - update outputs
  73. """
  74. cell.setdefault('metadata', NotebookNode())
  75. if cell.cell_type == 'code':
  76. cell.pop('language', '')
  77. if 'collapsed' in cell:
  78. cell.metadata['collapsed'] = cell.pop('collapsed')
  79. cell.source = cell.pop('input', '')
  80. cell.execution_count = cell.pop('prompt_number', None)
  81. cell.outputs = upgrade_outputs(cell.outputs)
  82. elif cell.cell_type == 'heading':
  83. cell.cell_type = 'markdown'
  84. level = cell.pop('level', 1)
  85. cell.source = u'{hashes} {single_line}'.format(
  86. hashes='#' * level,
  87. single_line = ' '.join(cell.get('source', '').splitlines()),
  88. )
  89. elif cell.cell_type == 'html':
  90. # Technically, this exists. It will never happen in practice.
  91. cell.cell_type = 'markdown'
  92. return cell
  93. def downgrade_cell(cell):
  94. """downgrade a cell from v4 to v3
  95. code cell:
  96. - set cell.language
  97. - cell.input <- cell.source
  98. - cell.prompt_number <- cell.execution_count
  99. - update outputs
  100. markdown cell:
  101. - single-line heading -> heading cell
  102. """
  103. if cell.cell_type == 'code':
  104. cell.language = 'python'
  105. cell.input = cell.pop('source', '')
  106. cell.prompt_number = cell.pop('execution_count', None)
  107. cell.collapsed = cell.metadata.pop('collapsed', False)
  108. cell.outputs = downgrade_outputs(cell.outputs)
  109. elif cell.cell_type == 'markdown':
  110. source = cell.get('source', '')
  111. if '\n' not in source and source.startswith('#'):
  112. prefix, text = re.match(r'(#+)\s*(.*)', source).groups()
  113. cell.cell_type = 'heading'
  114. cell.source = text
  115. cell.level = len(prefix)
  116. cell.pop('attachments', None)
  117. return cell
  118. _mime_map = {
  119. "text" : "text/plain",
  120. "html" : "text/html",
  121. "svg" : "image/svg+xml",
  122. "png" : "image/png",
  123. "jpeg" : "image/jpeg",
  124. "latex" : "text/latex",
  125. "json" : "application/json",
  126. "javascript" : "application/javascript",
  127. };
  128. def to_mime_key(d):
  129. """convert dict with v3 aliases to plain mime-type keys"""
  130. for alias, mime in _mime_map.items():
  131. if alias in d:
  132. d[mime] = d.pop(alias)
  133. return d
  134. def from_mime_key(d):
  135. """convert dict with mime-type keys to v3 aliases"""
  136. d2 = {}
  137. for alias, mime in _mime_map.items():
  138. if mime in d:
  139. d2[alias] = d[mime]
  140. return d2
  141. def upgrade_output(output):
  142. """upgrade a single code cell output from v3 to v4
  143. - pyout -> execute_result
  144. - pyerr -> error
  145. - output.type -> output.data.mime/type
  146. - mime-type keys
  147. - stream.stream -> stream.name
  148. """
  149. if output['output_type'] in {'pyout', 'display_data'}:
  150. output.setdefault('metadata', NotebookNode())
  151. if output['output_type'] == 'pyout':
  152. output['output_type'] = 'execute_result'
  153. output['execution_count'] = output.pop('prompt_number', None)
  154. # move output data into data sub-dict
  155. data = {}
  156. for key in list(output):
  157. if key in {'output_type', 'execution_count', 'metadata'}:
  158. continue
  159. data[key] = output.pop(key)
  160. to_mime_key(data)
  161. output['data'] = data
  162. to_mime_key(output.metadata)
  163. if 'application/json' in data:
  164. data['application/json'] = json.loads(data['application/json'])
  165. # promote ascii bytes (from v2) to unicode
  166. for key in ('image/png', 'image/jpeg'):
  167. if key in data and isinstance(data[key], bytes):
  168. data[key] = data[key].decode('ascii')
  169. elif output['output_type'] == 'pyerr':
  170. output['output_type'] = 'error'
  171. elif output['output_type'] == 'stream':
  172. output['name'] = output.pop('stream', 'stdout')
  173. return output
  174. def downgrade_output(output):
  175. """downgrade a single code cell output to v3 from v4
  176. - pyout <- execute_result
  177. - pyerr <- error
  178. - output.data.mime/type -> output.type
  179. - un-mime-type keys
  180. - stream.stream <- stream.name
  181. """
  182. if output['output_type'] in {'execute_result', 'display_data'}:
  183. if output['output_type'] == 'execute_result':
  184. output['output_type'] = 'pyout'
  185. output['prompt_number'] = output.pop('execution_count', None)
  186. # promote data dict to top-level output namespace
  187. data = output.pop('data', {})
  188. if 'application/json' in data:
  189. data['application/json'] = json.dumps(data['application/json'])
  190. data = from_mime_key(data)
  191. output.update(data)
  192. from_mime_key(output.get('metadata', {}))
  193. elif output['output_type'] == 'error':
  194. output['output_type'] = 'pyerr'
  195. elif output['output_type'] == 'stream':
  196. output['stream'] = output.pop('name')
  197. return output
  198. def upgrade_outputs(outputs):
  199. """upgrade outputs of a code cell from v3 to v4"""
  200. return [upgrade_output(op) for op in outputs]
  201. def downgrade_outputs(outputs):
  202. """downgrade outputs of a code cell to v3 from v4"""
  203. return [downgrade_output(op) for op in outputs]
  204. def downgrade(nb):
  205. """Convert a v4 notebook to v3.
  206. Parameters
  207. ----------
  208. nb : NotebookNode
  209. The Python representation of the notebook to convert.
  210. """
  211. if nb.nbformat != nbformat:
  212. return nb
  213. # Validate the notebook before conversion
  214. _warn_if_invalid(nb, nbformat)
  215. nb.nbformat = v3.nbformat
  216. nb.nbformat_minor = v3.nbformat_minor
  217. cells = [ downgrade_cell(cell) for cell in nb.pop('cells') ]
  218. nb.worksheets = [v3.new_worksheet(cells=cells)]
  219. nb.metadata.setdefault('name', '')
  220. # Validate the converted notebook before returning it
  221. _warn_if_invalid(nb, v3.nbformat)
  222. nb.orig_nbformat = nb.metadata.pop('orig_nbformat', nbformat)
  223. nb.orig_nbformat_minor = nb.metadata.pop('orig_nbformat_minor', nbformat_minor)
  224. return nb