rwbase.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. """Base classes and utilities for readers and writers.
  2. Authors:
  3. * Brian Granger
  4. """
  5. #-----------------------------------------------------------------------------
  6. # Copyright (C) 2008-2011 The IPython Development Team
  7. #
  8. # Distributed under the terms of the BSD License. The full license is in
  9. # the file COPYING, distributed as part of this software.
  10. #-----------------------------------------------------------------------------
  11. #-----------------------------------------------------------------------------
  12. # Imports
  13. #-----------------------------------------------------------------------------
  14. from base64 import encodestring, decodestring
  15. import pprint
  16. from ipython_genutils.py3compat import str_to_bytes, unicode_type, string_types
  17. #-----------------------------------------------------------------------------
  18. # Code
  19. #-----------------------------------------------------------------------------
  20. def restore_bytes(nb):
  21. """Restore bytes of image data from unicode-only formats.
  22. Base64 encoding is handled elsewhere. Bytes objects in the notebook are
  23. always b64-encoded. We DO NOT encode/decode around file formats.
  24. """
  25. for ws in nb.worksheets:
  26. for cell in ws.cells:
  27. if cell.cell_type == 'code':
  28. for output in cell.outputs:
  29. if 'png' in output:
  30. output.png = str_to_bytes(output.png, 'ascii')
  31. if 'jpeg' in output:
  32. output.jpeg = str_to_bytes(output.jpeg, 'ascii')
  33. return nb
  34. # output keys that are likely to have multiline values
  35. _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
  36. def rejoin_lines(nb):
  37. """rejoin multiline text into strings
  38. For reversing effects of ``split_lines(nb)``.
  39. This only rejoins lines that have been split, so if text objects were not split
  40. they will pass through unchanged.
  41. Used when reading JSON files that may have been passed through split_lines.
  42. """
  43. for ws in nb.worksheets:
  44. for cell in ws.cells:
  45. if cell.cell_type == 'code':
  46. if 'input' in cell and isinstance(cell.input, list):
  47. cell.input = u'\n'.join(cell.input)
  48. for output in cell.outputs:
  49. for key in _multiline_outputs:
  50. item = output.get(key, None)
  51. if isinstance(item, list):
  52. output[key] = u'\n'.join(item)
  53. else: # text cell
  54. for key in ['source', 'rendered']:
  55. item = cell.get(key, None)
  56. if isinstance(item, list):
  57. cell[key] = u'\n'.join(item)
  58. return nb
  59. def split_lines(nb):
  60. """split likely multiline text into lists of strings
  61. For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
  62. reverse the effects of ``split_lines(nb)``.
  63. Used when writing JSON files.
  64. """
  65. for ws in nb.worksheets:
  66. for cell in ws.cells:
  67. if cell.cell_type == 'code':
  68. if 'input' in cell and isinstance(cell.input, string_types):
  69. cell.input = cell.input.splitlines()
  70. for output in cell.outputs:
  71. for key in _multiline_outputs:
  72. item = output.get(key, None)
  73. if isinstance(item, string_types):
  74. output[key] = item.splitlines()
  75. else: # text cell
  76. for key in ['source', 'rendered']:
  77. item = cell.get(key, None)
  78. if isinstance(item, string_types):
  79. cell[key] = item.splitlines()
  80. return nb
  81. # b64 encode/decode are never actually used, because all bytes objects in
  82. # the notebook are already b64-encoded, and we don't need/want to double-encode
  83. def base64_decode(nb):
  84. """Restore all bytes objects in the notebook from base64-encoded strings.
  85. Note: This is never used
  86. """
  87. for ws in nb.worksheets:
  88. for cell in ws.cells:
  89. if cell.cell_type == 'code':
  90. for output in cell.outputs:
  91. if 'png' in output:
  92. if isinstance(output.png, unicode_type):
  93. output.png = output.png.encode('ascii')
  94. output.png = decodestring(output.png)
  95. if 'jpeg' in output:
  96. if isinstance(output.jpeg, unicode_type):
  97. output.jpeg = output.jpeg.encode('ascii')
  98. output.jpeg = decodestring(output.jpeg)
  99. return nb
  100. def base64_encode(nb):
  101. """Base64 encode all bytes objects in the notebook.
  102. These will be b64-encoded unicode strings
  103. Note: This is never used
  104. """
  105. for ws in nb.worksheets:
  106. for cell in ws.cells:
  107. if cell.cell_type == 'code':
  108. for output in cell.outputs:
  109. if 'png' in output:
  110. output.png = encodestring(output.png).decode('ascii')
  111. if 'jpeg' in output:
  112. output.jpeg = encodestring(output.jpeg).decode('ascii')
  113. return nb
  114. class NotebookReader(object):
  115. """A class for reading notebooks."""
  116. def reads(self, s, **kwargs):
  117. """Read a notebook from a string."""
  118. raise NotImplementedError("loads must be implemented in a subclass")
  119. def read(self, fp, **kwargs):
  120. """Read a notebook from a file like object"""
  121. return self.read(fp.read(), **kwargs)
  122. class NotebookWriter(object):
  123. """A class for writing notebooks."""
  124. def writes(self, nb, **kwargs):
  125. """Write a notebook to a string."""
  126. raise NotImplementedError("loads must be implemented in a subclass")
  127. def write(self, nb, fp, **kwargs):
  128. """Write a notebook to a file like object"""
  129. return fp.write(self.writes(nb,**kwargs))