test_xml.py 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081
  1. # -*- test-case-name: twisted.web.test.test_xml -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. Some fairly inadequate testcases for Twisted XML support.
  6. """
  7. from twisted.trial.unittest import TestCase
  8. from twisted.web import sux
  9. from twisted.web import microdom
  10. from twisted.web import domhelpers
  11. class Sux0r(sux.XMLParser):
  12. def __init__(self):
  13. self.tokens = []
  14. def getTagStarts(self):
  15. return [token for token in self.tokens if token[0] == 'start']
  16. def gotTagStart(self, name, attrs):
  17. self.tokens.append(("start", name, attrs))
  18. def gotText(self, text):
  19. self.tokens.append(("text", text))
  20. class SUXTests(TestCase):
  21. def testBork(self):
  22. s = "<bork><bork><bork>"
  23. ms = Sux0r()
  24. ms.connectionMade()
  25. ms.dataReceived(s)
  26. self.assertEqual(len(ms.getTagStarts()),3)
  27. class MicroDOMTests(TestCase):
  28. def test_leadingTextDropping(self):
  29. """
  30. Make sure that if there's no top-level node lenient-mode won't
  31. drop leading text that's outside of any elements.
  32. """
  33. s = "Hi orders! <br>Well. <br>"
  34. d = microdom.parseString(s, beExtremelyLenient=True)
  35. self.assertEqual(d.firstChild().toxml(),
  36. '<html>Hi orders! <br />Well. <br /></html>')
  37. def test_trailingTextDropping(self):
  38. """
  39. Ensure that no *trailing* text in a mal-formed
  40. no-top-level-element document(s) will not be dropped.
  41. """
  42. s = "<br>Hi orders!"
  43. d = microdom.parseString(s, beExtremelyLenient=True)
  44. self.assertEqual(d.firstChild().toxml(),
  45. '<html><br />Hi orders!</html>')
  46. def test_noTags(self):
  47. """
  48. A string with nothing that looks like a tag at all should just
  49. be parsed as body text.
  50. """
  51. s = "Hi orders!"
  52. d = microdom.parseString(s, beExtremelyLenient=True)
  53. self.assertEqual(d.firstChild().toxml(),
  54. "<html>Hi orders!</html>")
  55. def test_surroundingCrap(self):
  56. """
  57. If a document is surrounded by non-xml text, the text should
  58. be remain in the XML.
  59. """
  60. s = "Hi<br> orders!"
  61. d = microdom.parseString(s, beExtremelyLenient=True)
  62. self.assertEqual(d.firstChild().toxml(),
  63. "<html>Hi<br /> orders!</html>")
  64. def testCaseSensitiveSoonCloser(self):
  65. s = """
  66. <HTML><BODY>
  67. <P ALIGN="CENTER">
  68. <A HREF="http://www.apache.org/"><IMG SRC="/icons/apache_pb.gif"></A>
  69. </P>
  70. <P>
  71. This is an insane set of text nodes that should NOT be gathered under
  72. the A tag above.
  73. </P>
  74. </BODY></HTML>
  75. """
  76. d = microdom.parseString(s, beExtremelyLenient=1)
  77. l = domhelpers.findNodesNamed(d.documentElement, 'a')
  78. n = domhelpers.gatherTextNodes(l[0],1).replace('&nbsp;',' ')
  79. self.assertEqual(n.find('insane'), -1)
  80. def test_lenientParenting(self):
  81. """
  82. Test that C{parentNode} attributes are set to meaningful values when
  83. we are parsing HTML that lacks a root node.
  84. """
  85. # Spare the rod, ruin the child.
  86. s = "<br/><br/>"
  87. d = microdom.parseString(s, beExtremelyLenient=1)
  88. self.assertIdentical(d.documentElement,
  89. d.documentElement.firstChild().parentNode)
  90. def test_lenientParentSingle(self):
  91. """
  92. Test that the C{parentNode} attribute is set to a meaningful value
  93. when we parse an HTML document that has a non-Element root node.
  94. """
  95. s = "Hello"
  96. d = microdom.parseString(s, beExtremelyLenient=1)
  97. self.assertIdentical(d.documentElement,
  98. d.documentElement.firstChild().parentNode)
  99. def testUnEntities(self):
  100. s = """
  101. <HTML>
  102. This HTML goes between Stupid <=CrAzY!=> Dumb.
  103. </HTML>
  104. """
  105. d = microdom.parseString(s, beExtremelyLenient=1)
  106. n = domhelpers.gatherTextNodes(d)
  107. self.assertNotEqual(n.find('>'), -1)
  108. def testEmptyError(self):
  109. self.assertRaises(sux.ParseError, microdom.parseString, "")
  110. def testTameDocument(self):
  111. s = """
  112. <test>
  113. <it>
  114. <is>
  115. <a>
  116. test
  117. </a>
  118. </is>
  119. </it>
  120. </test>
  121. """
  122. d = microdom.parseString(s)
  123. self.assertEqual(
  124. domhelpers.gatherTextNodes(d.documentElement).strip() ,'test')
  125. def testAwfulTagSoup(self):
  126. s = """
  127. <html>
  128. <head><title> I send you this message to have your advice!!!!</titl e
  129. </headd>
  130. <body bgcolor alink hlink vlink>
  131. <h1><BLINK>SALE</blINK> TWENTY MILLION EMAILS & FUR COAT NOW
  132. FREE WITH `ENLARGER'</h1>
  133. YES THIS WONDERFUL AWFER IS NOW HERER!!!
  134. <script LANGUAGE="javascript">
  135. function give_answers() {
  136. if (score < 70) {
  137. alert("I hate you");
  138. }}
  139. </script><a href=/foo.com/lalal name=foo>lalal</a>
  140. </body>
  141. </HTML>
  142. """
  143. d = microdom.parseString(s, beExtremelyLenient=1)
  144. l = domhelpers.findNodesNamed(d.documentElement, 'blink')
  145. self.assertEqual(len(l), 1)
  146. def testScriptLeniency(self):
  147. s = """
  148. <script>(foo < bar) and (bar > foo)</script>
  149. <script language="javascript">foo </scrip bar </script>
  150. <script src="foo">
  151. <script src="foo">baz</script>
  152. <script /><script></script>
  153. """
  154. d = microdom.parseString(s, beExtremelyLenient=1)
  155. self.assertEqual(d.firstChild().firstChild().firstChild().data,
  156. "(foo < bar) and (bar > foo)")
  157. self.assertEqual(
  158. d.firstChild().getElementsByTagName("script")[1].firstChild().data,
  159. "foo </scrip bar ")
  160. def testScriptLeniencyIntelligence(self):
  161. # if there is comment or CDATA in script, the autoquoting in bEL mode
  162. # should not happen
  163. s = """<script><!-- lalal --></script>"""
  164. self.assertEqual(
  165. microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s)
  166. s = """<script><![CDATA[lalal]]></script>"""
  167. self.assertEqual(
  168. microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s)
  169. s = """<script> // <![CDATA[
  170. lalal
  171. //]]></script>"""
  172. self.assertEqual(
  173. microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s)
  174. def testPreserveCase(self):
  175. s = '<eNcApSuLaTe><sUxor></sUxor><bOrk><w00T>TeXt</W00t></BoRk></EnCaPsUlAtE>'
  176. s2 = s.lower().replace('text', 'TeXt')
  177. # these are the only two option permutations that *can* parse the above
  178. d = microdom.parseString(s, caseInsensitive=1, preserveCase=1)
  179. d2 = microdom.parseString(s, caseInsensitive=1, preserveCase=0)
  180. # caseInsensitive=0 preserveCase=0 is not valid, it's converted to
  181. # caseInsensitive=0 preserveCase=1
  182. d3 = microdom.parseString(s2, caseInsensitive=0, preserveCase=1)
  183. d4 = microdom.parseString(s2, caseInsensitive=1, preserveCase=0)
  184. d5 = microdom.parseString(s2, caseInsensitive=1, preserveCase=1)
  185. # this is slightly contrived, toxml() doesn't need to be identical
  186. # for the documents to be equivalent (i.e. <b></b> to <b/>),
  187. # however this assertion tests preserving case for start and
  188. # end tags while still matching stuff like <bOrk></BoRk>
  189. self.assertEqual(d.documentElement.toxml(), s)
  190. self.assertTrue(d.isEqualToDocument(d2), "%r != %r" % (d.toxml(), d2.toxml()))
  191. self.assertTrue(d2.isEqualToDocument(d3), "%r != %r" % (d2.toxml(), d3.toxml()))
  192. # caseInsensitive=0 on the left, NOT perserveCase=1 on the right
  193. ## XXX THIS TEST IS TURNED OFF UNTIL SOMEONE WHO CARES ABOUT FIXING IT DOES
  194. #self.assertFalse(d3.isEqualToDocument(d2), "%r == %r" % (d3.toxml(), d2.toxml()))
  195. self.assertTrue(d3.isEqualToDocument(d4), "%r != %r" % (d3.toxml(), d4.toxml()))
  196. self.assertTrue(d4.isEqualToDocument(d5), "%r != %r" % (d4.toxml(), d5.toxml()))
  197. def testDifferentQuotes(self):
  198. s = '<test a="a" b=\'b\' />'
  199. d = microdom.parseString(s)
  200. e = d.documentElement
  201. self.assertEqual(e.getAttribute('a'), 'a')
  202. self.assertEqual(e.getAttribute('b'), 'b')
  203. def testLinebreaks(self):
  204. s = '<test \na="a"\n\tb="#b" />'
  205. d = microdom.parseString(s)
  206. e = d.documentElement
  207. self.assertEqual(e.getAttribute('a'), 'a')
  208. self.assertEqual(e.getAttribute('b'), '#b')
  209. def testMismatchedTags(self):
  210. for s in '<test>', '<test> </tset>', '</test>':
  211. self.assertRaises(microdom.MismatchedTags, microdom.parseString, s)
  212. def testComment(self):
  213. s = "<bar><!--<foo />--></bar>"
  214. d = microdom.parseString(s)
  215. e = d.documentElement
  216. self.assertEqual(e.nodeName, "bar")
  217. c = e.childNodes[0]
  218. self.assertTrue(isinstance(c, microdom.Comment))
  219. self.assertEqual(c.value, "<foo />")
  220. c2 = c.cloneNode()
  221. self.assertTrue(c is not c2)
  222. self.assertEqual(c2.toxml(), "<!--<foo />-->")
  223. def testText(self):
  224. d = microdom.parseString("<bar>xxxx</bar>").documentElement
  225. text = d.childNodes[0]
  226. self.assertTrue(isinstance(text, microdom.Text))
  227. self.assertEqual(text.value, "xxxx")
  228. clone = text.cloneNode()
  229. self.assertTrue(clone is not text)
  230. self.assertEqual(clone.toxml(), "xxxx")
  231. def testEntities(self):
  232. nodes = microdom.parseString("<b>&amp;&#12AB;</b>").documentElement.childNodes
  233. self.assertEqual(len(nodes), 2)
  234. self.assertEqual(nodes[0].data, "&amp;")
  235. self.assertEqual(nodes[1].data, "&#12AB;")
  236. self.assertEqual(nodes[0].cloneNode().toxml(), "&amp;")
  237. for n in nodes:
  238. self.assertTrue(isinstance(n, microdom.EntityReference))
  239. def testCData(self):
  240. s = '<x><![CDATA[</x>\r\n & foo]]></x>'
  241. cdata = microdom.parseString(s).documentElement.childNodes[0]
  242. self.assertTrue(isinstance(cdata, microdom.CDATASection))
  243. self.assertEqual(cdata.data, "</x>\r\n & foo")
  244. self.assertEqual(cdata.cloneNode().toxml(), "<![CDATA[</x>\r\n & foo]]>")
  245. def testSingletons(self):
  246. s = "<foo><b/><b /><b\n/></foo>"
  247. s2 = "<foo><b/><b/><b/></foo>"
  248. nodes = microdom.parseString(s).documentElement.childNodes
  249. nodes2 = microdom.parseString(s2).documentElement.childNodes
  250. self.assertEqual(len(nodes), 3)
  251. for (n, n2) in zip(nodes, nodes2):
  252. self.assertTrue(isinstance(n, microdom.Element))
  253. self.assertEqual(n.nodeName, "b")
  254. self.assertTrue(n.isEqualToNode(n2))
  255. def testAttributes(self):
  256. s = '<foo a="b" />'
  257. node = microdom.parseString(s).documentElement
  258. self.assertEqual(node.getAttribute("a"), "b")
  259. self.assertEqual(node.getAttribute("c"), None)
  260. self.assertTrue(node.hasAttribute("a"))
  261. self.assertTrue(not node.hasAttribute("c"))
  262. a = node.getAttributeNode("a")
  263. self.assertEqual(a.value, "b")
  264. node.setAttribute("foo", "bar")
  265. self.assertEqual(node.getAttribute("foo"), "bar")
  266. def testChildren(self):
  267. s = "<foo><bar /><baz /><bax>foo</bax></foo>"
  268. d = microdom.parseString(s).documentElement
  269. self.assertEqual([n.nodeName for n in d.childNodes], ["bar", "baz", "bax"])
  270. self.assertEqual(d.lastChild().nodeName, "bax")
  271. self.assertEqual(d.firstChild().nodeName, "bar")
  272. self.assertTrue(d.hasChildNodes())
  273. self.assertTrue(not d.firstChild().hasChildNodes())
  274. def testMutate(self):
  275. s = "<foo />"
  276. s1 = '<foo a="b"><bar/><foo/></foo>'
  277. s2 = '<foo a="b">foo</foo>'
  278. d = microdom.parseString(s).documentElement
  279. d1 = microdom.parseString(s1).documentElement
  280. d2 = microdom.parseString(s2).documentElement
  281. d.appendChild(d.cloneNode())
  282. d.setAttribute("a", "b")
  283. child = d.childNodes[0]
  284. self.assertEqual(child.getAttribute("a"), None)
  285. self.assertEqual(child.nodeName, "foo")
  286. d.insertBefore(microdom.Element("bar"), child)
  287. self.assertEqual(d.childNodes[0].nodeName, "bar")
  288. self.assertEqual(d.childNodes[1], child)
  289. for n in d.childNodes:
  290. self.assertEqual(n.parentNode, d)
  291. self.assertTrue(d.isEqualToNode(d1))
  292. d.removeChild(child)
  293. self.assertEqual(len(d.childNodes), 1)
  294. self.assertEqual(d.childNodes[0].nodeName, "bar")
  295. t = microdom.Text("foo")
  296. d.replaceChild(t, d.firstChild())
  297. self.assertEqual(d.firstChild(), t)
  298. self.assertTrue(d.isEqualToNode(d2))
  299. def test_replaceNonChild(self):
  300. """
  301. L{Node.replaceChild} raises L{ValueError} if the node given to be
  302. replaced is not a child of the node C{replaceChild} is called on.
  303. """
  304. parent = microdom.parseString('<foo />')
  305. orphan = microdom.parseString('<bar />')
  306. replacement = microdom.parseString('<baz />')
  307. self.assertRaises(
  308. ValueError, parent.replaceChild, replacement, orphan)
  309. def testSearch(self):
  310. s = "<foo><bar id='me' /><baz><foo /></baz></foo>"
  311. s2 = "<fOo><bAr id='me' /><bAz><fOO /></bAz></fOo>"
  312. d = microdom.parseString(s)
  313. d2 = microdom.parseString(s2, caseInsensitive=0, preserveCase=1)
  314. d3 = microdom.parseString(s2, caseInsensitive=1, preserveCase=1)
  315. root = d.documentElement
  316. self.assertEqual(root.firstChild(), d.getElementById('me'))
  317. self.assertEqual(d.getElementsByTagName("foo"),
  318. [root, root.lastChild().firstChild()])
  319. root = d2.documentElement
  320. self.assertEqual(root.firstChild(), d2.getElementById('me'))
  321. self.assertEqual(d2.getElementsByTagName('fOo'), [root])
  322. self.assertEqual(d2.getElementsByTagName('fOO'),
  323. [root.lastChild().firstChild()])
  324. self.assertEqual(d2.getElementsByTagName('foo'), [])
  325. root = d3.documentElement
  326. self.assertEqual(root.firstChild(), d3.getElementById('me'))
  327. self.assertEqual(d3.getElementsByTagName('FOO'),
  328. [root, root.lastChild().firstChild()])
  329. self.assertEqual(d3.getElementsByTagName('fOo'),
  330. [root, root.lastChild().firstChild()])
  331. def testDoctype(self):
  332. s = ('<?xml version="1.0"?>'
  333. '<!DOCTYPE foo PUBLIC "baz" "http://www.example.com/example.dtd">'
  334. '<foo></foo>')
  335. s2 = '<foo/>'
  336. d = microdom.parseString(s)
  337. d2 = microdom.parseString(s2)
  338. self.assertEqual(d.doctype,
  339. 'foo PUBLIC "baz" "http://www.example.com/example.dtd"')
  340. self.assertEqual(d.toxml(), s)
  341. self.assertFalse(d.isEqualToDocument(d2))
  342. self.assertTrue(d.documentElement.isEqualToNode(d2.documentElement))
  343. samples = [("<img/>", "<img />"),
  344. ("<foo A='b'>x</foo>", '<foo A="b">x</foo>'),
  345. ("<foo><BAR /></foo>", "<foo><BAR></BAR></foo>"),
  346. ("<foo>hello there &amp; yoyoy</foo>",
  347. "<foo>hello there &amp; yoyoy</foo>"),
  348. ]
  349. def testOutput(self):
  350. for s, out in self.samples:
  351. d = microdom.parseString(s, caseInsensitive=0)
  352. d2 = microdom.parseString(out, caseInsensitive=0)
  353. testOut = d.documentElement.toxml()
  354. self.assertEqual(out, testOut)
  355. self.assertTrue(d.isEqualToDocument(d2))
  356. def testErrors(self):
  357. for s in ["<foo>&am</foo>", "<foo", "<f>&</f>", "<() />"]:
  358. self.assertRaises(Exception, microdom.parseString, s)
  359. def testCaseInsensitive(self):
  360. s = "<foo a='b'><BAx>x</bax></FOO>"
  361. s2 = '<foo a="b"><bax>x</bax></foo>'
  362. s3 = "<FOO a='b'><BAx>x</BAx></FOO>"
  363. s4 = "<foo A='b'>x</foo>"
  364. d = microdom.parseString(s)
  365. d2 = microdom.parseString(s2)
  366. d3 = microdom.parseString(s3, caseInsensitive=1)
  367. d4 = microdom.parseString(s4, caseInsensitive=1, preserveCase=1)
  368. d5 = microdom.parseString(s4, caseInsensitive=1, preserveCase=0)
  369. d6 = microdom.parseString(s4, caseInsensitive=0, preserveCase=0)
  370. out = microdom.parseString(s).documentElement.toxml()
  371. self.assertRaises(microdom.MismatchedTags, microdom.parseString,
  372. s, caseInsensitive=0)
  373. self.assertEqual(out, s2)
  374. self.assertTrue(d.isEqualToDocument(d2))
  375. self.assertTrue(d.isEqualToDocument(d3))
  376. self.assertTrue(d4.documentElement.hasAttribute('a'))
  377. self.assertFalse(d6.documentElement.hasAttribute('a'))
  378. self.assertEqual(d4.documentElement.toxml(), '<foo A="b">x</foo>')
  379. self.assertEqual(d5.documentElement.toxml(), '<foo a="b">x</foo>')
  380. def testEatingWhitespace(self):
  381. s = """<hello>
  382. </hello>"""
  383. d = microdom.parseString(s)
  384. self.assertTrue(not d.documentElement.hasChildNodes(),
  385. d.documentElement.childNodes)
  386. self.assertTrue(d.isEqualToDocument(microdom.parseString('<hello></hello>')))
  387. def testLenientAmpersand(self):
  388. prefix = "<?xml version='1.0'?>"
  389. # we use <pre> so space will be preserved
  390. for i, o in [("&", "&amp;"),
  391. ("& ", "&amp; "),
  392. ("&amp;", "&amp;"),
  393. ("&hello monkey", "&amp;hello monkey")]:
  394. d = microdom.parseString("%s<pre>%s</pre>"
  395. % (prefix, i), beExtremelyLenient=1)
  396. self.assertEqual(d.documentElement.toxml(), "<pre>%s</pre>" % o)
  397. # non-space preserving
  398. d = microdom.parseString("<t>hello & there</t>", beExtremelyLenient=1)
  399. self.assertEqual(d.documentElement.toxml(), "<t>hello &amp; there</t>")
  400. def testInsensitiveLenient(self):
  401. # testing issue #537
  402. d = microdom.parseString(
  403. "<?xml version='1.0'?><bar><xA><y>c</Xa> <foo></bar>",
  404. beExtremelyLenient=1)
  405. self.assertEqual(d.documentElement.firstChild().toxml(), "<xa><y>c</y></xa>")
  406. def testLaterCloserSimple(self):
  407. s = "<ul><li>foo<li>bar<li>baz</ul>"
  408. d = microdom.parseString(s, beExtremelyLenient=1)
  409. expected = "<ul><li>foo</li><li>bar</li><li>baz</li></ul>"
  410. actual = d.documentElement.toxml()
  411. self.assertEqual(expected, actual)
  412. def testLaterCloserCaseInsensitive(self):
  413. s = "<DL><p><DT>foo<DD>bar</DL>"
  414. d = microdom.parseString(s, beExtremelyLenient=1)
  415. expected = "<dl><p></p><dt>foo</dt><dd>bar</dd></dl>"
  416. actual = d.documentElement.toxml()
  417. self.assertEqual(expected, actual)
  418. def testLaterCloserDL(self):
  419. s = ("<dl>"
  420. "<dt>word<dd>definition"
  421. "<dt>word<dt>word<dd>definition<dd>definition"
  422. "</dl>")
  423. expected = ("<dl>"
  424. "<dt>word</dt><dd>definition</dd>"
  425. "<dt>word</dt><dt>word</dt><dd>definition</dd><dd>definition</dd>"
  426. "</dl>")
  427. d = microdom.parseString(s, beExtremelyLenient=1)
  428. actual = d.documentElement.toxml()
  429. self.assertEqual(expected, actual)
  430. def testUnicodeTolerance(self):
  431. import struct
  432. s = '<foo><bar><baz /></bar></foo>'
  433. j =(u'<?xml version="1.0" encoding="UCS-2" ?>\r\n<JAPANESE>\r\n'
  434. u'<TITLE>\u5c02\u9580\u5bb6\u30ea\u30b9\u30c8 </TITLE></JAPANESE>')
  435. j2=('\xff\xfe<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r\x00s\x00i\x00o'
  436. '\x00n\x00=\x00"\x001\x00.\x000\x00"\x00 \x00e\x00n\x00c\x00o\x00d'
  437. '\x00i\x00n\x00g\x00=\x00"\x00U\x00C\x00S\x00-\x002\x00"\x00 \x00?'
  438. '\x00>\x00\r\x00\n\x00<\x00J\x00A\x00P\x00A\x00N\x00E\x00S\x00E'
  439. '\x00>\x00\r\x00\n\x00<\x00T\x00I\x00T\x00L\x00E\x00>\x00\x02\\'
  440. '\x80\x95\xb6[\xea0\xb90\xc80 \x00<\x00/\x00T\x00I\x00T\x00L\x00E'
  441. '\x00>\x00<\x00/\x00J\x00A\x00P\x00A\x00N\x00E\x00S\x00E\x00>\x00')
  442. def reverseBytes(s):
  443. fmt = str(len(s) // 2) + 'H'
  444. return struct.pack('<' + fmt, *struct.unpack('>' + fmt, s))
  445. urd = microdom.parseString(reverseBytes(s.encode('UTF-16')))
  446. ud = microdom.parseString(s.encode('UTF-16'))
  447. sd = microdom.parseString(s)
  448. self.assertTrue(ud.isEqualToDocument(sd))
  449. self.assertTrue(ud.isEqualToDocument(urd))
  450. ud = microdom.parseString(j)
  451. urd = microdom.parseString(reverseBytes(j2))
  452. sd = microdom.parseString(j2)
  453. self.assertTrue(ud.isEqualToDocument(sd))
  454. self.assertTrue(ud.isEqualToDocument(urd))
  455. # test that raw text still gets encoded
  456. # test that comments get encoded
  457. j3=microdom.parseString(u'<foo/>')
  458. hdr='<?xml version="1.0"?>'
  459. div=microdom.lmx().text(u'\u221a', raw=1).node
  460. de=j3.documentElement
  461. de.appendChild(div)
  462. de.appendChild(j3.createComment(u'\u221a'))
  463. self.assertEqual(j3.toxml(), hdr+
  464. u'<foo><div>\u221a</div><!--\u221a--></foo>'.encode('utf8'))
  465. def testNamedChildren(self):
  466. tests = {"<foo><bar /><bar unf='1' /><bar>asdfadsf</bar>"
  467. "<bam/></foo>" : 3,
  468. '<foo>asdf</foo>' : 0,
  469. '<foo><bar><bar></bar></bar></foo>' : 1,
  470. }
  471. for t in tests.keys():
  472. node = microdom.parseString(t).documentElement
  473. result = domhelpers.namedChildren(node, 'bar')
  474. self.assertEqual(len(result), tests[t])
  475. if result:
  476. self.assertTrue(hasattr(result[0], 'tagName'))
  477. def testCloneNode(self):
  478. s = '<foo a="b"><bax>x</bax></foo>'
  479. node = microdom.parseString(s).documentElement
  480. clone = node.cloneNode(deep=1)
  481. self.failIfEquals(node, clone)
  482. self.assertEqual(len(node.childNodes), len(clone.childNodes))
  483. c1, c2 = node.firstChild(), clone.firstChild()
  484. self.failIfEquals(c1, c2)
  485. self.assertEqual(len(c1.childNodes), len(c2.childNodes))
  486. self.failIfEquals(c1.firstChild(), c2.firstChild())
  487. self.assertEqual(s, clone.toxml())
  488. self.assertEqual(node.namespace, clone.namespace)
  489. def testCloneDocument(self):
  490. s = ('<?xml version="1.0"?>'
  491. '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'
  492. '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><foo></foo>')
  493. node = microdom.parseString(s)
  494. clone = node.cloneNode(deep=1)
  495. self.failIfEquals(node, clone)
  496. self.assertEqual(len(node.childNodes), len(clone.childNodes))
  497. self.assertEqual(s, clone.toxml())
  498. self.assertTrue(clone.isEqualToDocument(node))
  499. self.assertTrue(node.isEqualToDocument(clone))
  500. def testLMX(self):
  501. n = microdom.Element("p")
  502. lmx = microdom.lmx(n)
  503. lmx.text("foo")
  504. b = lmx.b(a="c")
  505. b.foo()["z"] = "foo"
  506. b.foo()
  507. b.add("bar", c="y")
  508. s = '<p>foo<b a="c"><foo z="foo"></foo><foo></foo><bar c="y"></bar></b></p>'
  509. self.assertEqual(s, n.toxml())
  510. def testDict(self):
  511. """
  512. Returns a dictionary which is hashable.
  513. """
  514. n = microdom.Element("p")
  515. hash(n)
  516. def testEscaping(self):
  517. # issue 590
  518. raw = "&'some \"stuff\"', <what up?>"
  519. cooked = "&amp;'some &quot;stuff&quot;', &lt;what up?&gt;"
  520. esc1 = microdom.escape(raw)
  521. self.assertEqual(esc1, cooked)
  522. self.assertEqual(microdom.unescape(esc1), raw)
  523. def testNamespaces(self):
  524. s = '''
  525. <x xmlns="base">
  526. <y />
  527. <y q="1" x:q="2" y:q="3" />
  528. <y:y xml:space="1">here is some space </y:y>
  529. <y:y />
  530. <x:y />
  531. </x>
  532. '''
  533. d = microdom.parseString(s)
  534. # at least make sure it doesn't traceback
  535. s2 = d.toprettyxml()
  536. self.assertEqual(d.documentElement.namespace,
  537. "base")
  538. self.assertEqual(d.documentElement.getElementsByTagName("y")[0].namespace,
  539. "base")
  540. self.assertEqual(
  541. d.documentElement.getElementsByTagName("y")[1].getAttributeNS('base','q'),
  542. '1')
  543. d2 = microdom.parseString(s2)
  544. self.assertEqual(d2.documentElement.namespace,
  545. "base")
  546. self.assertEqual(d2.documentElement.getElementsByTagName("y")[0].namespace,
  547. "base")
  548. self.assertEqual(
  549. d2.documentElement.getElementsByTagName("y")[1].getAttributeNS('base','q'),
  550. '1')
  551. def testNamespaceDelete(self):
  552. """
  553. Test that C{toxml} can support xml structures that remove namespaces.
  554. """
  555. s1 = ('<?xml version="1.0"?><html xmlns="http://www.w3.org/TR/REC-html40">'
  556. '<body xmlns=""></body></html>')
  557. s2 = microdom.parseString(s1).toxml()
  558. self.assertEqual(s1, s2)
  559. def testNamespaceInheritance(self):
  560. """
  561. Check that unspecified namespace is a thing separate from undefined
  562. namespace. This test added after discovering some weirdness in Lore.
  563. """
  564. # will only work if childNodes is mutated. not sure why.
  565. child = microdom.Element('ol')
  566. parent = microdom.Element('div', namespace='http://www.w3.org/1999/xhtml')
  567. parent.childNodes = [child]
  568. self.assertEqual(parent.toxml(),
  569. '<div xmlns="http://www.w3.org/1999/xhtml"><ol></ol></div>')
  570. def test_prefixedTags(self):
  571. """
  572. XML elements with a prefixed name as per upper level tag definition
  573. have a start-tag of C{"<prefix:tag>"} and an end-tag of
  574. C{"</prefix:tag>"}.
  575. Refer to U{http://www.w3.org/TR/xml-names/#ns-using} for details.
  576. """
  577. outerNamespace = "http://example.com/outer"
  578. innerNamespace = "http://example.com/inner"
  579. document = microdom.Document()
  580. # Create the root in one namespace. Microdom will probably make this
  581. # the default namespace.
  582. root = document.createElement("root", namespace=outerNamespace)
  583. # Give the root some prefixes to use.
  584. root.addPrefixes({innerNamespace: "inner"})
  585. # Append a child to the root from the namespace that prefix is bound
  586. # to.
  587. tag = document.createElement("tag", namespace=innerNamespace)
  588. # Give that tag a child too. This way we test rendering of tags with
  589. # children and without children.
  590. child = document.createElement("child", namespace=innerNamespace)
  591. tag.appendChild(child)
  592. root.appendChild(tag)
  593. document.appendChild(root)
  594. # ok, the xml should appear like this
  595. xmlOk = (
  596. '<?xml version="1.0"?>'
  597. '<root xmlns="http://example.com/outer" '
  598. 'xmlns:inner="http://example.com/inner">'
  599. '<inner:tag><inner:child></inner:child></inner:tag>'
  600. '</root>')
  601. xmlOut = document.toxml()
  602. self.assertEqual(xmlOut, xmlOk)
  603. def test_prefixPropagation(self):
  604. """
  605. Children of prefixed tags respect the default namespace at the point
  606. where they are rendered. Specifically, they are not influenced by the
  607. prefix of their parent as that prefix has no bearing on them.
  608. See U{http://www.w3.org/TR/xml-names/#scoping} for details.
  609. To further clarify the matter, the following::
  610. <root xmlns="http://example.com/ns/test">
  611. <mytag xmlns="http://example.com/ns/mytags">
  612. <mysubtag xmlns="http://example.com/ns/mytags">
  613. <element xmlns="http://example.com/ns/test"></element>
  614. </mysubtag>
  615. </mytag>
  616. </root>
  617. Should become this after all the namespace declarations have been
  618. I{moved up}::
  619. <root xmlns="http://example.com/ns/test"
  620. xmlns:mytags="http://example.com/ns/mytags">
  621. <mytags:mytag>
  622. <mytags:mysubtag>
  623. <element></element>
  624. </mytags:mysubtag>
  625. </mytags:mytag>
  626. </root>
  627. """
  628. outerNamespace = "http://example.com/outer"
  629. innerNamespace = "http://example.com/inner"
  630. document = microdom.Document()
  631. # creates a root element
  632. root = document.createElement("root", namespace=outerNamespace)
  633. document.appendChild(root)
  634. # Create a child with a specific namespace with a prefix bound to it.
  635. root.addPrefixes({innerNamespace: "inner"})
  636. mytag = document.createElement("mytag",namespace=innerNamespace)
  637. root.appendChild(mytag)
  638. # Create a child of that which has the outer namespace.
  639. mysubtag = document.createElement("mysubtag", namespace=outerNamespace)
  640. mytag.appendChild(mysubtag)
  641. xmlOk = (
  642. '<?xml version="1.0"?>'
  643. '<root xmlns="http://example.com/outer" '
  644. 'xmlns:inner="http://example.com/inner">'
  645. '<inner:mytag>'
  646. '<mysubtag></mysubtag>'
  647. '</inner:mytag>'
  648. '</root>'
  649. )
  650. xmlOut = document.toxml()
  651. self.assertEqual(xmlOut, xmlOk)
  652. class BrokenHTMLTests(TestCase):
  653. """
  654. Tests for when microdom encounters very bad HTML and C{beExtremelyLenient}
  655. is enabled. These tests are inspired by some HTML generated in by a mailer,
  656. which breaks up very long lines by splitting them with '!\n '. The expected
  657. behaviour is loosely modelled on the way Firefox treats very bad HTML.
  658. """
  659. def checkParsed(self, input, expected, beExtremelyLenient=1):
  660. """
  661. Check that C{input}, when parsed, produces a DOM where the XML
  662. of the document element is equal to C{expected}.
  663. """
  664. output = microdom.parseString(input,
  665. beExtremelyLenient=beExtremelyLenient)
  666. self.assertEqual(output.documentElement.toxml(), expected)
  667. def test_brokenAttributeName(self):
  668. """
  669. Check that microdom does its best to handle broken attribute names.
  670. The important thing is that it doesn't raise an exception.
  671. """
  672. input = '<body><h1><div al!\n ign="center">Foo</div></h1></body>'
  673. expected = ('<body><h1><div al="True" ign="center">'
  674. 'Foo</div></h1></body>')
  675. self.checkParsed(input, expected)
  676. def test_brokenAttributeValue(self):
  677. """
  678. Check that microdom encompasses broken attribute values.
  679. """
  680. input = '<body><h1><div align="cen!\n ter">Foo</div></h1></body>'
  681. expected = '<body><h1><div align="cen!\n ter">Foo</div></h1></body>'
  682. self.checkParsed(input, expected)
  683. def test_brokenOpeningTag(self):
  684. """
  685. Check that microdom does its best to handle broken opening tags.
  686. The important thing is that it doesn't raise an exception.
  687. """
  688. input = '<body><h1><sp!\n an>Hello World!</span></h1></body>'
  689. expected = '<body><h1><sp an="True">Hello World!</sp></h1></body>'
  690. self.checkParsed(input, expected)
  691. def test_brokenSelfClosingTag(self):
  692. """
  693. Check that microdom does its best to handle broken self-closing tags
  694. The important thing is that it doesn't raise an exception.
  695. """
  696. self.checkParsed('<body><span /!\n></body>',
  697. '<body><span></span></body>')
  698. self.checkParsed('<span!\n />', '<span></span>')
  699. def test_brokenClosingTag(self):
  700. """
  701. Check that microdom does its best to handle broken closing tags.
  702. The important thing is that it doesn't raise an exception.
  703. """
  704. input = '<body><h1><span>Hello World!</sp!\nan></h1></body>'
  705. expected = '<body><h1><span>Hello World!</span></h1></body>'
  706. self.checkParsed(input, expected)
  707. input = '<body><h1><span>Hello World!</!\nspan></h1></body>'
  708. self.checkParsed(input, expected)
  709. input = '<body><h1><span>Hello World!</span!\n></h1></body>'
  710. self.checkParsed(input, expected)
  711. input = '<body><h1><span>Hello World!<!\n/span></h1></body>'
  712. expected = '<body><h1><span>Hello World!<!></!></span></h1></body>'
  713. self.checkParsed(input, expected)
  714. class NodeTests(TestCase):
  715. """
  716. Tests for L{Node}.
  717. """
  718. def test_isNodeEqualTo(self):
  719. """
  720. L{Node.isEqualToNode} returns C{True} if and only if passed a L{Node}
  721. with the same children.
  722. """
  723. # A node is equal to itself
  724. node = microdom.Node(object())
  725. self.assertTrue(node.isEqualToNode(node))
  726. another = microdom.Node(object())
  727. # Two nodes with no children are equal
  728. self.assertTrue(node.isEqualToNode(another))
  729. node.appendChild(microdom.Node(object()))
  730. # A node with no children is not equal to a node with a child
  731. self.assertFalse(node.isEqualToNode(another))
  732. another.appendChild(microdom.Node(object()))
  733. # A node with a child and no grandchildren is equal to another node
  734. # with a child and no grandchildren.
  735. self.assertTrue(node.isEqualToNode(another))
  736. # A node with a child and a grandchild is not equal to another node
  737. # with a child and no grandchildren.
  738. node.firstChild().appendChild(microdom.Node(object()))
  739. self.assertFalse(node.isEqualToNode(another))
  740. # A node with a child and a grandchild is equal to another node with a
  741. # child and a grandchild.
  742. another.firstChild().appendChild(microdom.Node(object()))
  743. self.assertTrue(node.isEqualToNode(another))
  744. def test_validChildInstance(self):
  745. """
  746. Children of L{Node} instances must also be L{Node} instances.
  747. """
  748. node = microdom.Node()
  749. child = microdom.Node()
  750. # Node.appendChild() only accepts Node instances.
  751. node.appendChild(child)
  752. self.assertRaises(TypeError, node.appendChild, None)
  753. # Node.insertBefore() only accepts Node instances.
  754. self.assertRaises(TypeError, node.insertBefore, child, None)
  755. self.assertRaises(TypeError, node.insertBefore, None, child)
  756. self.assertRaises(TypeError, node.insertBefore, None, None)
  757. # Node.removeChild() only accepts Node instances.
  758. node.removeChild(child)
  759. self.assertRaises(TypeError, node.removeChild, None)
  760. # Node.replaceChild() only accepts Node instances.
  761. self.assertRaises(TypeError, node.replaceChild, child, None)
  762. self.assertRaises(TypeError, node.replaceChild, None, child)
  763. self.assertRaises(TypeError, node.replaceChild, None, None)
  764. class DocumentTests(TestCase):
  765. """
  766. Tests for L{Document}.
  767. """
  768. doctype = 'foo PUBLIC "baz" "http://www.example.com/example.dtd"'
  769. def test_isEqualToNode(self):
  770. """
  771. L{Document.isEqualToNode} returns C{True} if and only if passed a
  772. L{Document} with the same C{doctype} and C{documentElement}.
  773. """
  774. # A document is equal to itself
  775. document = microdom.Document()
  776. self.assertTrue(document.isEqualToNode(document))
  777. # A document without a doctype or documentElement is equal to another
  778. # document without a doctype or documentElement.
  779. another = microdom.Document()
  780. self.assertTrue(document.isEqualToNode(another))
  781. # A document with a doctype is not equal to a document without a
  782. # doctype.
  783. document.doctype = self.doctype
  784. self.assertFalse(document.isEqualToNode(another))
  785. # Two documents with the same doctype are equal
  786. another.doctype = self.doctype
  787. self.assertTrue(document.isEqualToNode(another))
  788. # A document with a documentElement is not equal to a document without
  789. # a documentElement
  790. document.appendChild(microdom.Node(object()))
  791. self.assertFalse(document.isEqualToNode(another))
  792. # Two documents with equal documentElements are equal.
  793. another.appendChild(microdom.Node(object()))
  794. self.assertTrue(document.isEqualToNode(another))
  795. # Two documents with documentElements which are not equal are not
  796. # equal.
  797. document.documentElement.appendChild(microdom.Node(object()))
  798. self.assertFalse(document.isEqualToNode(another))
  799. def test_childRestriction(self):
  800. """
  801. L{Document.appendChild} raises L{ValueError} if the document already
  802. has a child.
  803. """
  804. document = microdom.Document()
  805. child = microdom.Node()
  806. another = microdom.Node()
  807. document.appendChild(child)
  808. self.assertRaises(ValueError, document.appendChild, another)
  809. class EntityReferenceTests(TestCase):
  810. """
  811. Tests for L{EntityReference}.
  812. """
  813. def test_isEqualToNode(self):
  814. """
  815. L{EntityReference.isEqualToNode} returns C{True} if and only if passed
  816. a L{EntityReference} with the same C{eref}.
  817. """
  818. self.assertTrue(
  819. microdom.EntityReference('quot').isEqualToNode(
  820. microdom.EntityReference('quot')))
  821. self.assertFalse(
  822. microdom.EntityReference('quot').isEqualToNode(
  823. microdom.EntityReference('apos')))
  824. class CharacterDataTests(TestCase):
  825. """
  826. Tests for L{CharacterData}.
  827. """
  828. def test_isEqualToNode(self):
  829. """
  830. L{CharacterData.isEqualToNode} returns C{True} if and only if passed a
  831. L{CharacterData} with the same value.
  832. """
  833. self.assertTrue(
  834. microdom.CharacterData('foo').isEqualToNode(
  835. microdom.CharacterData('foo')))
  836. self.assertFalse(
  837. microdom.CharacterData('foo').isEqualToNode(
  838. microdom.CharacterData('bar')))
  839. class CommentTests(TestCase):
  840. """
  841. Tests for L{Comment}.
  842. """
  843. def test_isEqualToNode(self):
  844. """
  845. L{Comment.isEqualToNode} returns C{True} if and only if passed a
  846. L{Comment} with the same value.
  847. """
  848. self.assertTrue(
  849. microdom.Comment('foo').isEqualToNode(
  850. microdom.Comment('foo')))
  851. self.assertFalse(
  852. microdom.Comment('foo').isEqualToNode(
  853. microdom.Comment('bar')))
  854. class TextTests(TestCase):
  855. """
  856. Tests for L{Text}.
  857. """
  858. def test_isEqualToNode(self):
  859. """
  860. L{Text.isEqualToNode} returns C{True} if and only if passed a L{Text}
  861. which represents the same data.
  862. """
  863. self.assertTrue(
  864. microdom.Text('foo', raw=True).isEqualToNode(
  865. microdom.Text('foo', raw=True)))
  866. self.assertFalse(
  867. microdom.Text('foo', raw=True).isEqualToNode(
  868. microdom.Text('foo', raw=False)))
  869. self.assertFalse(
  870. microdom.Text('foo', raw=True).isEqualToNode(
  871. microdom.Text('bar', raw=True)))
  872. class CDATASectionTests(TestCase):
  873. """
  874. Tests for L{CDATASection}.
  875. """
  876. def test_isEqualToNode(self):
  877. """
  878. L{CDATASection.isEqualToNode} returns C{True} if and only if passed a
  879. L{CDATASection} which represents the same data.
  880. """
  881. self.assertTrue(
  882. microdom.CDATASection('foo').isEqualToNode(
  883. microdom.CDATASection('foo')))
  884. self.assertFalse(
  885. microdom.CDATASection('foo').isEqualToNode(
  886. microdom.CDATASection('bar')))
  887. class ElementTests(TestCase):
  888. """
  889. Tests for L{Element}.
  890. """
  891. def test_isEqualToNode(self):
  892. """
  893. L{Element.isEqualToNode} returns C{True} if and only if passed a
  894. L{Element} with the same C{nodeName}, C{namespace}, C{childNodes}, and
  895. C{attributes}.
  896. """
  897. self.assertTrue(
  898. microdom.Element(
  899. 'foo', {'a': 'b'}, object(), namespace='bar').isEqualToNode(
  900. microdom.Element(
  901. 'foo', {'a': 'b'}, object(), namespace='bar')))
  902. # Elements with different nodeName values do not compare equal.
  903. self.assertFalse(
  904. microdom.Element(
  905. 'foo', {'a': 'b'}, object(), namespace='bar').isEqualToNode(
  906. microdom.Element(
  907. 'bar', {'a': 'b'}, object(), namespace='bar')))
  908. # Elements with different namespaces do not compare equal.
  909. self.assertFalse(
  910. microdom.Element(
  911. 'foo', {'a': 'b'}, object(), namespace='bar').isEqualToNode(
  912. microdom.Element(
  913. 'foo', {'a': 'b'}, object(), namespace='baz')))
  914. # Elements with different childNodes do not compare equal.
  915. one = microdom.Element('foo', {'a': 'b'}, object(), namespace='bar')
  916. two = microdom.Element('foo', {'a': 'b'}, object(), namespace='bar')
  917. two.appendChild(microdom.Node(object()))
  918. self.assertFalse(one.isEqualToNode(two))
  919. # Elements with different attributes do not compare equal.
  920. self.assertFalse(
  921. microdom.Element(
  922. 'foo', {'a': 'b'}, object(), namespace='bar').isEqualToNode(
  923. microdom.Element(
  924. 'foo', {'a': 'c'}, object(), namespace='bar')))