test_sanitize.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. """Tests for the HTMLSanitize preprocessor"""
  2. from .base import PreprocessorTestsBase
  3. from ..sanitize import SanitizeHTML
  4. from nbformat import v4 as nbformat
  5. class TestSanitizer(PreprocessorTestsBase):
  6. """Contains test functions for sanitize.py"""
  7. maxDiff = None
  8. def build_preprocessor(self):
  9. """Make an instance of a preprocessor"""
  10. preprocessor = SanitizeHTML()
  11. preprocessor.enabled = True
  12. return preprocessor
  13. def preprocess_source(self, cell_type, source, preprocessor):
  14. nb = self.build_notebook()
  15. res = self.build_resources()
  16. nb.cells[0].cell_type = cell_type
  17. nb.cells[0].source = source
  18. nb, res = preprocessor(nb, res)
  19. return nb.cells[0].source
  20. def test_constructor(self):
  21. """Can a SanitizeHTML be constructed?"""
  22. self.build_preprocessor()
  23. def test_svg_handling(self):
  24. """
  25. Test to make sure that svgs are handled 'properly'
  26. We only allow <img> tags (via markdown syntax) and not all the other ways
  27. to embed svg: <object>, <embed>, <iframe> nor inline <svg>
  28. """
  29. preprocessor = self.build_preprocessor()
  30. preprocessor.strip = True
  31. self.assertEqual(
  32. self.preprocess_source(
  33. 'markdown',
  34. """
  35. ![some image](http://example.com/something.svg)
  36. <object data="something.svg" type="image/svg+xml"></object>
  37. <embed data="something.svg" type="image/svg+xml" />
  38. <iframe src="http://example.com/something.svg"></iframe>
  39. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 68 65">
  40. <path fill="#1A374D" d="M42 27v-20c0-3.7-3.3-7-7-7s-7 3.3-7 7v21l12 15-7 15.7c14.5 13.9 35 2.8 35-13.7 0-13.3-13.4-21.8-26-18zm6 25c-3.9 0-7-3.1-7-7s3.1-7 7-7 7 3.1 7 7-3.1 7-7 7z"/>
  41. <path d="M14 27v-20c0-3.7-3.3-7-7-7s-7 3.3-7 7v41c0 8.2 9.2 17 20 17s20-9.2 20-20c0-13.3-13.4-21.8-26-18zm6 25c-3.9 0-7-3.1-7-7s3.1-7 7-7 7 3.1 7 7-3.1 7-7 7z"/>
  42. </svg>
  43. """,
  44. preprocessor
  45. ).strip(),
  46. """
  47. ![some image](http://example.com/something.svg)
  48. """.strip(),
  49. )
  50. def test_tag_whitelist_stripping(self):
  51. """Test tag whitelisting + stripping out offending tags"""
  52. preprocessor = self.build_preprocessor()
  53. preprocessor.strip = True
  54. self.assertEqual(
  55. self.preprocess_source(
  56. 'markdown',
  57. '_A_ <em>few</em> <script>tags</script>',
  58. preprocessor
  59. ),
  60. '_A_ <em>few</em> tags'
  61. )
  62. def test_comment_stripping(self):
  63. """Test HTML comment stripping"""
  64. preprocessor = self.build_preprocessor()
  65. self.assertEqual(
  66. self.preprocess_source(
  67. 'markdown',
  68. '_A_ <em>few</em> <!-- tags -->',
  69. preprocessor
  70. ),
  71. '_A_ <em>few</em> '
  72. )
  73. preprocessor.strip_comments = False
  74. self.assertEqual(
  75. self.preprocess_source(
  76. 'markdown',
  77. '_A_ <em>few</em> <!-- tags -->',
  78. preprocessor
  79. ),
  80. '_A_ <em>few</em> <!-- tags -->'
  81. )
  82. def test_attributes_whitelist(self):
  83. """Test style"""
  84. preprocessor = self.build_preprocessor()
  85. preprocessor.attributes['a'] = ['href', 'title']
  86. self.assertEqual(
  87. self.preprocess_source(
  88. 'markdown',
  89. '<a href="link" rel="nofollow">Hi</a>',
  90. preprocessor
  91. ),
  92. '<a href="link">Hi</a>'
  93. )
  94. def test_style_whitelist(self):
  95. """Test style"""
  96. preprocessor = self.build_preprocessor()
  97. if '*' in preprocessor.attributes:
  98. preprocessor.attributes['*'].append('style')
  99. else:
  100. preprocessor.attributes['*'] = ['style']
  101. preprocessor.styles = [
  102. 'color',
  103. ]
  104. self.assertEqual(
  105. self.preprocess_source(
  106. 'markdown',
  107. '_A_ <em style="color: blue; background-color: pink">'
  108. 'few</em> <script>tags</script>',
  109. preprocessor
  110. ),
  111. '_A_ <em style="color: blue;">few</em> '
  112. '&lt;script&gt;tags&lt;/script&gt;'
  113. )
  114. def test_tag_passthrough(self):
  115. """Test passing through raw output"""
  116. preprocessor = self.build_preprocessor()
  117. self.assertEqual(
  118. self.preprocess_source(
  119. 'raw',
  120. '_A_ <em>few</em> <script>tags</script>',
  121. preprocessor
  122. ),
  123. '_A_ <em>few</em> &lt;script&gt;tags&lt;/script&gt;'
  124. )
  125. def test_output_sanitizing(self):
  126. """Test that outputs are also sanitized properly"""
  127. preprocessor = self.build_preprocessor()
  128. nb = self.build_notebook()
  129. outputs = [
  130. nbformat.new_output("display_data", data={
  131. 'text/plain': 'b',
  132. 'text/html': '<script>more evil</script>',
  133. 'text/css': '<style> * {display:none}</style>'
  134. }),
  135. nbformat.new_output('stream', name='stdout', text="wat"),
  136. nbformat.new_output('stream', name='stdout', text="<script>Evil tag</script>")
  137. ]
  138. nb.cells[0].outputs = outputs
  139. res = self.build_resources()
  140. nb, res = preprocessor(nb, res)
  141. expected_output = [
  142. {
  143. 'data': {
  144. 'text/html': '&lt;script&gt;more evil&lt;/script&gt;',
  145. 'text/plain': 'b'
  146. },
  147. 'metadata': {},
  148. 'output_type': 'display_data',
  149. },
  150. {
  151. 'name': 'stdout',
  152. 'output_type': 'stream',
  153. 'text': 'wat'
  154. },
  155. {
  156. 'name': 'stdout',
  157. 'output_type':
  158. 'stream', 'text': '<script>Evil tag</script>'
  159. }
  160. ]
  161. self.assertEqual(nb.cells[0].outputs, expected_output)
  162. def test_tag_whitelist(self):
  163. """Test tag whitelisting"""
  164. preprocessor = self.build_preprocessor()
  165. self.assertEqual(
  166. self.preprocess_source(
  167. 'markdown',
  168. '_A_ <em>few</em> <script>tags</script>',
  169. preprocessor
  170. ),
  171. '_A_ <em>few</em> &lt;script&gt;tags&lt;/script&gt;'
  172. )