rich_text.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. """ Defines classes and functions for working with Qt's rich text system.
  2. """
  3. # Copyright (c) Jupyter Development Team.
  4. # Distributed under the terms of the Modified BSD License.
  5. import io
  6. import os
  7. import re
  8. from qtpy import QtWidgets
  9. from ipython_genutils import py3compat
  10. #-----------------------------------------------------------------------------
  11. # Constants
  12. #-----------------------------------------------------------------------------
  13. # A regular expression for an HTML paragraph with no content.
  14. EMPTY_P_RE = re.compile(r'<p[^/>]*>\s*</p>')
  15. # A regular expression for matching images in rich text HTML.
  16. # Note that this is overly restrictive, but Qt's output is predictable...
  17. IMG_RE = re.compile(r'<img src="(?P<name>[\d]+)" />')
  18. #-----------------------------------------------------------------------------
  19. # Classes
  20. #-----------------------------------------------------------------------------
  21. class HtmlExporter(object):
  22. """ A stateful HTML exporter for a Q(Plain)TextEdit.
  23. This class is designed for convenient user interaction.
  24. """
  25. def __init__(self, control):
  26. """ Creates an HtmlExporter for the given Q(Plain)TextEdit.
  27. """
  28. assert isinstance(control, (QtWidgets.QPlainTextEdit, QtWidgets.QTextEdit))
  29. self.control = control
  30. self.filename = 'ipython.html'
  31. self.image_tag = None
  32. self.inline_png = None
  33. def export(self):
  34. """ Displays a dialog for exporting HTML generated by Qt's rich text
  35. system.
  36. Returns
  37. -------
  38. The name of the file that was saved, or None if no file was saved.
  39. """
  40. parent = self.control.window()
  41. dialog = QtWidgets.QFileDialog(parent, 'Save as...')
  42. dialog.setAcceptMode(QtWidgets.QFileDialog.AcceptSave)
  43. filters = [
  44. 'HTML with PNG figures (*.html *.htm)',
  45. 'XHTML with inline SVG figures (*.xhtml *.xml)'
  46. ]
  47. dialog.setNameFilters(filters)
  48. if self.filename:
  49. dialog.selectFile(self.filename)
  50. root,ext = os.path.splitext(self.filename)
  51. if ext.lower() in ('.xml', '.xhtml'):
  52. dialog.selectNameFilter(filters[-1])
  53. if dialog.exec_():
  54. self.filename = dialog.selectedFiles()[0]
  55. choice = dialog.selectedNameFilter()
  56. html = py3compat.cast_unicode(self.control.document().toHtml())
  57. # Configure the exporter.
  58. if choice.startswith('XHTML'):
  59. exporter = export_xhtml
  60. else:
  61. # If there are PNGs, decide how to export them.
  62. inline = self.inline_png
  63. if inline is None and IMG_RE.search(html):
  64. dialog = QtWidgets.QDialog(parent)
  65. dialog.setWindowTitle('Save as...')
  66. layout = QtWidgets.QVBoxLayout(dialog)
  67. msg = "Exporting HTML with PNGs"
  68. info = "Would you like inline PNGs (single large html " \
  69. "file) or external image files?"
  70. checkbox = QtWidgets.QCheckBox("&Don't ask again")
  71. checkbox.setShortcut('D')
  72. ib = QtWidgets.QPushButton("&Inline")
  73. ib.setShortcut('I')
  74. eb = QtWidgets.QPushButton("&External")
  75. eb.setShortcut('E')
  76. box = QtWidgets.QMessageBox(QtWidgets.QMessageBox.Question,
  77. dialog.windowTitle(), msg)
  78. box.setInformativeText(info)
  79. box.addButton(ib, QtWidgets.QMessageBox.NoRole)
  80. box.addButton(eb, QtWidgets.QMessageBox.YesRole)
  81. layout.setSpacing(0)
  82. layout.addWidget(box)
  83. layout.addWidget(checkbox)
  84. dialog.setLayout(layout)
  85. dialog.show()
  86. reply = box.exec_()
  87. dialog.hide()
  88. inline = (reply == 0)
  89. if checkbox.checkState():
  90. # Don't ask anymore; always use this choice.
  91. self.inline_png = inline
  92. exporter = lambda h, f, i: export_html(h, f, i, inline)
  93. # Perform the export!
  94. try:
  95. return exporter(html, self.filename, self.image_tag)
  96. except Exception as e:
  97. msg = "Error exporting HTML to %s\n" % self.filename + str(e)
  98. reply = QtWidgets.QMessageBox.warning(parent, 'Error', msg,
  99. QtWidgets.QMessageBox.Ok, QtWidgets.QMessageBox.Ok)
  100. return None
  101. #-----------------------------------------------------------------------------
  102. # Functions
  103. #-----------------------------------------------------------------------------
  104. def export_html(html, filename, image_tag = None, inline = True):
  105. """ Export the contents of the ConsoleWidget as HTML.
  106. Parameters
  107. ----------
  108. html : unicode,
  109. A Python unicode string containing the Qt HTML to export.
  110. filename : str
  111. The file to be saved.
  112. image_tag : callable, optional (default None)
  113. Used to convert images. See ``default_image_tag()`` for information.
  114. inline : bool, optional [default True]
  115. If True, include images as inline PNGs. Otherwise, include them as
  116. links to external PNG files, mimicking web browsers' "Web Page,
  117. Complete" behavior.
  118. """
  119. if image_tag is None:
  120. image_tag = default_image_tag
  121. if inline:
  122. path = None
  123. else:
  124. root,ext = os.path.splitext(filename)
  125. path = root + "_files"
  126. if os.path.isfile(path):
  127. raise OSError("%s exists, but is not a directory." % path)
  128. with io.open(filename, 'w', encoding='utf-8') as f:
  129. html = fix_html(html)
  130. f.write(IMG_RE.sub(lambda x: image_tag(x, path = path, format = "png"),
  131. html))
  132. def export_xhtml(html, filename, image_tag=None):
  133. """ Export the contents of the ConsoleWidget as XHTML with inline SVGs.
  134. Parameters
  135. ----------
  136. html : unicode,
  137. A Python unicode string containing the Qt HTML to export.
  138. filename : str
  139. The file to be saved.
  140. image_tag : callable, optional (default None)
  141. Used to convert images. See ``default_image_tag()`` for information.
  142. """
  143. if image_tag is None:
  144. image_tag = default_image_tag
  145. with io.open(filename, 'w', encoding='utf-8') as f:
  146. # Hack to make xhtml header -- note that we are not doing any check for
  147. # valid XML.
  148. offset = html.find("<html>")
  149. assert offset > -1, 'Invalid HTML string: no <html> tag.'
  150. html = (u'<html xmlns="http://www.w3.org/1999/xhtml">\n'+
  151. html[offset+6:])
  152. html = fix_html(html)
  153. f.write(IMG_RE.sub(lambda x: image_tag(x, path = None, format = "svg"),
  154. html))
  155. def default_image_tag(match, path = None, format = "png"):
  156. """ Return (X)HTML mark-up for the image-tag given by match.
  157. This default implementation merely removes the image, and exists mostly
  158. for documentation purposes. More information than is present in the Qt
  159. HTML is required to supply the images.
  160. Parameters
  161. ----------
  162. match : re.SRE_Match
  163. A match to an HTML image tag as exported by Qt, with match.group("Name")
  164. containing the matched image ID.
  165. path : string|None, optional [default None]
  166. If not None, specifies a path to which supporting files may be written
  167. (e.g., for linked images). If None, all images are to be included
  168. inline.
  169. format : "png"|"svg", optional [default "png"]
  170. Format for returned or referenced images.
  171. """
  172. return u''
  173. def fix_html(html):
  174. """ Transforms a Qt-generated HTML string into a standards-compliant one.
  175. Parameters
  176. ----------
  177. html : unicode,
  178. A Python unicode string containing the Qt HTML.
  179. """
  180. # A UTF-8 declaration is needed for proper rendering of some characters
  181. # (e.g., indented commands) when viewing exported HTML on a local system
  182. # (i.e., without seeing an encoding declaration in an HTTP header).
  183. # C.f. http://www.w3.org/International/O-charset for details.
  184. offset = html.find('<head>')
  185. if offset > -1:
  186. html = (html[:offset+6]+
  187. '\n<meta http-equiv="Content-Type" '+
  188. 'content="text/html; charset=utf-8" />\n'+
  189. html[offset+6:])
  190. # Replace empty paragraphs tags with line breaks.
  191. html = re.sub(EMPTY_P_RE, '<br/>', html)
  192. return html