_flatten.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. # -*- test-case-name: twisted.web.test.test_flatten -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. Context-free flattener/serializer for rendering Python objects, possibly
  6. complex or arbitrarily nested, as strings.
  7. """
  8. from __future__ import division, absolute_import
  9. from io import BytesIO
  10. from sys import exc_info
  11. from types import GeneratorType
  12. from traceback import extract_tb
  13. from twisted.internet.defer import Deferred
  14. from twisted.python.compat import unicode, nativeString, iteritems
  15. from twisted.web._stan import Tag, slot, voidElements, Comment, CDATA, CharRef
  16. from twisted.web.error import UnfilledSlot, UnsupportedType, FlattenerError
  17. from twisted.web.iweb import IRenderable
  18. def escapeForContent(data):
  19. """
  20. Escape some character or UTF-8 byte data for inclusion in an HTML or XML
  21. document, by replacing metacharacters (C{&<>}) with their entity
  22. equivalents (C{&amp;&lt;&gt;}).
  23. This is used as an input to L{_flattenElement}'s C{dataEscaper} parameter.
  24. @type data: C{bytes} or C{unicode}
  25. @param data: The string to escape.
  26. @rtype: C{bytes}
  27. @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8
  28. encoded string.
  29. """
  30. if isinstance(data, unicode):
  31. data = data.encode('utf-8')
  32. data = data.replace(b'&', b'&amp;'
  33. ).replace(b'<', b'&lt;'
  34. ).replace(b'>', b'&gt;')
  35. return data
  36. def attributeEscapingDoneOutside(data):
  37. """
  38. Escape some character or UTF-8 byte data for inclusion in the top level of
  39. an attribute. L{attributeEscapingDoneOutside} actually passes the data
  40. through unchanged, because L{writeWithAttributeEscaping} handles the
  41. quoting of the text within attributes outside the generator returned by
  42. L{_flattenElement}; this is used as the C{dataEscaper} argument to that
  43. L{_flattenElement} call so that that generator does not redundantly escape
  44. its text output.
  45. @type data: C{bytes} or C{unicode}
  46. @param data: The string to escape.
  47. @return: The string, unchanged, except for encoding.
  48. @rtype: C{bytes}
  49. """
  50. if isinstance(data, unicode):
  51. return data.encode("utf-8")
  52. return data
  53. def writeWithAttributeEscaping(write):
  54. """
  55. Decorate a C{write} callable so that all output written is properly quoted
  56. for inclusion within an XML attribute value.
  57. If a L{Tag <twisted.web.template.Tag>} C{x} is flattened within the context
  58. of the contents of another L{Tag <twisted.web.template.Tag>} C{y}, the
  59. metacharacters (C{<>&"}) delimiting C{x} should be passed through
  60. unchanged, but the textual content of C{x} should still be quoted, as
  61. usual. For example: C{<y><x>&amp;</x></y>}. That is the default behavior
  62. of L{_flattenElement} when L{escapeForContent} is passed as the
  63. C{dataEscaper}.
  64. However, when a L{Tag <twisted.web.template.Tag>} C{x} is flattened within
  65. the context of an I{attribute} of another L{Tag <twisted.web.template.Tag>}
  66. C{y}, then the metacharacters delimiting C{x} should be quoted so that it
  67. can be parsed from the attribute's value. In the DOM itself, this is not a
  68. valid thing to do, but given that renderers and slots may be freely moved
  69. around in a L{twisted.web.template} template, it is a condition which may
  70. arise in a document and must be handled in a way which produces valid
  71. output. So, for example, you should be able to get C{<y attr="&lt;x /&gt;"
  72. />}. This should also be true for other XML/HTML meta-constructs such as
  73. comments and CDATA, so if you were to serialize a L{comment
  74. <twisted.web.template.Comment>} in an attribute you should get C{<y
  75. attr="&lt;-- comment --&gt;" />}. Therefore in order to capture these
  76. meta-characters, flattening is done with C{write} callable that is wrapped
  77. with L{writeWithAttributeEscaping}.
  78. The final case, and hopefully the much more common one as compared to
  79. serializing L{Tag <twisted.web.template.Tag>} and arbitrary L{IRenderable}
  80. objects within an attribute, is to serialize a simple string, and those
  81. should be passed through for L{writeWithAttributeEscaping} to quote
  82. without applying a second, redundant level of quoting.
  83. @param write: A callable which will be invoked with the escaped L{bytes}.
  84. @return: A callable that writes data with escaping.
  85. """
  86. def _write(data):
  87. write(escapeForContent(data).replace(b'"', b'&quot;'))
  88. return _write
  89. def escapedCDATA(data):
  90. """
  91. Escape CDATA for inclusion in a document.
  92. @type data: L{str} or L{unicode}
  93. @param data: The string to escape.
  94. @rtype: L{str}
  95. @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8
  96. encoded string.
  97. """
  98. if isinstance(data, unicode):
  99. data = data.encode('utf-8')
  100. return data.replace(b']]>', b']]]]><![CDATA[>')
  101. def escapedComment(data):
  102. """
  103. Escape a comment for inclusion in a document.
  104. @type data: L{str} or L{unicode}
  105. @param data: The string to escape.
  106. @rtype: C{str}
  107. @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8
  108. encoded string.
  109. """
  110. if isinstance(data, unicode):
  111. data = data.encode('utf-8')
  112. data = data.replace(b'--', b'- - ').replace(b'>', b'&gt;')
  113. if data and data[-1:] == b'-':
  114. data += b' '
  115. return data
  116. def _getSlotValue(name, slotData, default=None):
  117. """
  118. Find the value of the named slot in the given stack of slot data.
  119. """
  120. for slotFrame in slotData[::-1]:
  121. if slotFrame is not None and name in slotFrame:
  122. return slotFrame[name]
  123. else:
  124. if default is not None:
  125. return default
  126. raise UnfilledSlot(name)
  127. def _flattenElement(request, root, write, slotData, renderFactory,
  128. dataEscaper):
  129. """
  130. Make C{root} slightly more flat by yielding all its immediate contents as
  131. strings, deferreds or generators that are recursive calls to itself.
  132. @param request: A request object which will be passed to
  133. L{IRenderable.render}.
  134. @param root: An object to be made flatter. This may be of type C{unicode},
  135. L{str}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple}, L{list},
  136. L{types.GeneratorType}, L{Deferred}, or an object that implements
  137. L{IRenderable}.
  138. @param write: A callable which will be invoked with each L{bytes} produced
  139. by flattening C{root}.
  140. @param slotData: A L{list} of L{dict} mapping L{str} slot names to data
  141. with which those slots will be replaced.
  142. @param renderFactory: If not L{None}, an object that provides
  143. L{IRenderable}.
  144. @param dataEscaper: A 1-argument callable which takes L{bytes} or
  145. L{unicode} and returns L{bytes}, quoted as appropriate for the
  146. rendering context. This is really only one of two values:
  147. L{attributeEscapingDoneOutside} or L{escapeForContent}, depending on
  148. whether the rendering context is within an attribute or not. See the
  149. explanation in L{writeWithAttributeEscaping}.
  150. @return: An iterator that eventually yields L{bytes} that should be written
  151. to the output. However it may also yield other iterators or
  152. L{Deferred}s; if it yields another iterator, the caller will iterate
  153. it; if it yields a L{Deferred}, the result of that L{Deferred} will
  154. either be L{bytes}, in which case it's written, or another generator,
  155. in which case it is iterated. See L{_flattenTree} for the trampoline
  156. that consumes said values.
  157. @rtype: An iterator which yields L{bytes}, L{Deferred}, and more iterators
  158. of the same type.
  159. """
  160. def keepGoing(newRoot, dataEscaper=dataEscaper,
  161. renderFactory=renderFactory, write=write):
  162. return _flattenElement(request, newRoot, write, slotData,
  163. renderFactory, dataEscaper)
  164. if isinstance(root, (bytes, unicode)):
  165. write(dataEscaper(root))
  166. elif isinstance(root, slot):
  167. slotValue = _getSlotValue(root.name, slotData, root.default)
  168. yield keepGoing(slotValue)
  169. elif isinstance(root, CDATA):
  170. write(b'<![CDATA[')
  171. write(escapedCDATA(root.data))
  172. write(b']]>')
  173. elif isinstance(root, Comment):
  174. write(b'<!--')
  175. write(escapedComment(root.data))
  176. write(b'-->')
  177. elif isinstance(root, Tag):
  178. slotData.append(root.slotData)
  179. if root.render is not None:
  180. rendererName = root.render
  181. rootClone = root.clone(False)
  182. rootClone.render = None
  183. renderMethod = renderFactory.lookupRenderMethod(rendererName)
  184. result = renderMethod(request, rootClone)
  185. yield keepGoing(result)
  186. slotData.pop()
  187. return
  188. if not root.tagName:
  189. yield keepGoing(root.children)
  190. return
  191. write(b'<')
  192. if isinstance(root.tagName, unicode):
  193. tagName = root.tagName.encode('ascii')
  194. else:
  195. tagName = root.tagName
  196. write(tagName)
  197. for k, v in iteritems(root.attributes):
  198. if isinstance(k, unicode):
  199. k = k.encode('ascii')
  200. write(b' ' + k + b'="')
  201. # Serialize the contents of the attribute, wrapping the results of
  202. # that serialization so that _everything_ is quoted.
  203. yield keepGoing(
  204. v,
  205. attributeEscapingDoneOutside,
  206. write=writeWithAttributeEscaping(write))
  207. write(b'"')
  208. if root.children or nativeString(tagName) not in voidElements:
  209. write(b'>')
  210. # Regardless of whether we're in an attribute or not, switch back
  211. # to the escapeForContent dataEscaper. The contents of a tag must
  212. # be quoted no matter what; in the top-level document, just so
  213. # they're valid, and if they're within an attribute, they have to
  214. # be quoted so that after applying the *un*-quoting required to re-
  215. # parse the tag within the attribute, all the quoting is still
  216. # correct.
  217. yield keepGoing(root.children, escapeForContent)
  218. write(b'</' + tagName + b'>')
  219. else:
  220. write(b' />')
  221. elif isinstance(root, (tuple, list, GeneratorType)):
  222. for element in root:
  223. yield keepGoing(element)
  224. elif isinstance(root, CharRef):
  225. escaped = '&#%d;' % (root.ordinal,)
  226. write(escaped.encode('ascii'))
  227. elif isinstance(root, Deferred):
  228. yield root.addCallback(lambda result: (result, keepGoing(result)))
  229. elif IRenderable.providedBy(root):
  230. result = root.render(request)
  231. yield keepGoing(result, renderFactory=root)
  232. else:
  233. raise UnsupportedType(root)
  234. def _flattenTree(request, root, write):
  235. """
  236. Make C{root} into an iterable of L{bytes} and L{Deferred} by doing a depth
  237. first traversal of the tree.
  238. @param request: A request object which will be passed to
  239. L{IRenderable.render}.
  240. @param root: An object to be made flatter. This may be of type C{unicode},
  241. L{bytes}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple},
  242. L{list}, L{types.GeneratorType}, L{Deferred}, or something providing
  243. L{IRenderable}.
  244. @param write: A callable which will be invoked with each L{bytes} produced
  245. by flattening C{root}.
  246. @return: An iterator which yields objects of type L{bytes} and L{Deferred}.
  247. A L{Deferred} is only yielded when one is encountered in the process of
  248. flattening C{root}. The returned iterator must not be iterated again
  249. until the L{Deferred} is called back.
  250. """
  251. stack = [_flattenElement(request, root, write, [], None, escapeForContent)]
  252. while stack:
  253. try:
  254. frame = stack[-1].gi_frame
  255. element = next(stack[-1])
  256. except StopIteration:
  257. stack.pop()
  258. except Exception as e:
  259. stack.pop()
  260. roots = []
  261. for generator in stack:
  262. roots.append(generator.gi_frame.f_locals['root'])
  263. roots.append(frame.f_locals['root'])
  264. raise FlattenerError(e, roots, extract_tb(exc_info()[2]))
  265. else:
  266. if isinstance(element, Deferred):
  267. def cbx(originalAndToFlatten):
  268. original, toFlatten = originalAndToFlatten
  269. stack.append(toFlatten)
  270. return original
  271. yield element.addCallback(cbx)
  272. else:
  273. stack.append(element)
  274. def _writeFlattenedData(state, write, result):
  275. """
  276. Take strings from an iterator and pass them to a writer function.
  277. @param state: An iterator of L{str} and L{Deferred}. L{str} instances will
  278. be passed to C{write}. L{Deferred} instances will be waited on before
  279. resuming iteration of C{state}.
  280. @param write: A callable which will be invoked with each L{str}
  281. produced by iterating C{state}.
  282. @param result: A L{Deferred} which will be called back when C{state} has
  283. been completely flattened into C{write} or which will be errbacked if
  284. an exception in a generator passed to C{state} or an errback from a
  285. L{Deferred} from state occurs.
  286. @return: L{None}
  287. """
  288. while True:
  289. try:
  290. element = next(state)
  291. except StopIteration:
  292. result.callback(None)
  293. except:
  294. result.errback()
  295. else:
  296. def cby(original):
  297. _writeFlattenedData(state, write, result)
  298. return original
  299. element.addCallbacks(cby, result.errback)
  300. break
  301. def flatten(request, root, write):
  302. """
  303. Incrementally write out a string representation of C{root} using C{write}.
  304. In order to create a string representation, C{root} will be decomposed into
  305. simpler objects which will themselves be decomposed and so on until strings
  306. or objects which can easily be converted to strings are encountered.
  307. @param request: A request object which will be passed to the C{render}
  308. method of any L{IRenderable} provider which is encountered.
  309. @param root: An object to be made flatter. This may be of type L{unicode},
  310. L{bytes}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple},
  311. L{list}, L{types.GeneratorType}, L{Deferred}, or something that provides
  312. L{IRenderable}.
  313. @param write: A callable which will be invoked with each L{bytes} produced
  314. by flattening C{root}.
  315. @return: A L{Deferred} which will be called back when C{root} has been
  316. completely flattened into C{write} or which will be errbacked if an
  317. unexpected exception occurs.
  318. """
  319. result = Deferred()
  320. state = _flattenTree(request, root, write)
  321. _writeFlattenedData(state, write, result)
  322. return result
  323. def flattenString(request, root):
  324. """
  325. Collate a string representation of C{root} into a single string.
  326. This is basically gluing L{flatten} to an L{io.BytesIO} and returning
  327. the results. See L{flatten} for the exact meanings of C{request} and
  328. C{root}.
  329. @return: A L{Deferred} which will be called back with a single string as
  330. its result when C{root} has been completely flattened into C{write} or
  331. which will be errbacked if an unexpected exception occurs.
  332. """
  333. io = BytesIO()
  334. d = flatten(request, root, io.write)
  335. d.addCallback(lambda _: io.getvalue())
  336. return d