ElementTree.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. # defusedxml
  2. #
  3. # Copyright (c) 2013 by Christian Heimes <christian@python.org>
  4. # Licensed to PSF under a Contributor Agreement.
  5. # See https://www.python.org/psf/license for licensing details.
  6. """Defused xml.etree.ElementTree facade
  7. """
  8. from __future__ import print_function, absolute_import
  9. import sys
  10. import warnings
  11. from xml.etree.ElementTree import TreeBuilder as _TreeBuilder
  12. from xml.etree.ElementTree import parse as _parse
  13. from xml.etree.ElementTree import tostring
  14. from .common import PY3
  15. if PY3:
  16. import importlib
  17. else:
  18. from xml.etree.ElementTree import XMLParser as _XMLParser
  19. from xml.etree.ElementTree import iterparse as _iterparse
  20. from xml.etree.ElementTree import ParseError
  21. from .common import (
  22. DTDForbidden,
  23. EntitiesForbidden,
  24. ExternalReferenceForbidden,
  25. _generate_etree_functions,
  26. )
  27. __origin__ = "xml.etree.ElementTree"
  28. def _get_py3_cls():
  29. """Python 3.3 hides the pure Python code but defusedxml requires it.
  30. The code is based on test.support.import_fresh_module().
  31. """
  32. pymodname = "xml.etree.ElementTree"
  33. cmodname = "_elementtree"
  34. pymod = sys.modules.pop(pymodname, None)
  35. cmod = sys.modules.pop(cmodname, None)
  36. sys.modules[cmodname] = None
  37. pure_pymod = importlib.import_module(pymodname)
  38. if cmod is not None:
  39. sys.modules[cmodname] = cmod
  40. else:
  41. sys.modules.pop(cmodname)
  42. sys.modules[pymodname] = pymod
  43. _XMLParser = pure_pymod.XMLParser
  44. _iterparse = pure_pymod.iterparse
  45. ParseError = pure_pymod.ParseError
  46. return _XMLParser, _iterparse, ParseError
  47. if PY3:
  48. _XMLParser, _iterparse, ParseError = _get_py3_cls()
  49. _sentinel = object()
  50. class DefusedXMLParser(_XMLParser):
  51. def __init__(
  52. self,
  53. html=_sentinel,
  54. target=None,
  55. encoding=None,
  56. forbid_dtd=False,
  57. forbid_entities=True,
  58. forbid_external=True,
  59. ):
  60. # Python 2.x old style class
  61. _XMLParser.__init__(self, target=target, encoding=encoding)
  62. if html is not _sentinel:
  63. # the 'html' argument has been deprecated and ignored in all
  64. # supported versions of Python. Python 3.8 finally removed it.
  65. if html:
  66. raise TypeError("'html=True' is no longer supported.")
  67. else:
  68. warnings.warn(
  69. "'html' keyword argument is no longer supported. Pass "
  70. "in arguments as keyword arguments.",
  71. category=DeprecationWarning,
  72. )
  73. self.forbid_dtd = forbid_dtd
  74. self.forbid_entities = forbid_entities
  75. self.forbid_external = forbid_external
  76. if PY3:
  77. parser = self.parser
  78. else:
  79. parser = self._parser
  80. if self.forbid_dtd:
  81. parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl
  82. if self.forbid_entities:
  83. parser.EntityDeclHandler = self.defused_entity_decl
  84. parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl
  85. if self.forbid_external:
  86. parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler
  87. def defused_start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
  88. raise DTDForbidden(name, sysid, pubid)
  89. def defused_entity_decl(
  90. self, name, is_parameter_entity, value, base, sysid, pubid, notation_name
  91. ):
  92. raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
  93. def defused_unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
  94. # expat 1.2
  95. raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name) # pragma: no cover
  96. def defused_external_entity_ref_handler(self, context, base, sysid, pubid):
  97. raise ExternalReferenceForbidden(context, base, sysid, pubid)
  98. # aliases
  99. # XMLParse is a typo, keep it for backwards compatibility
  100. XMLTreeBuilder = XMLParse = XMLParser = DefusedXMLParser
  101. parse, iterparse, fromstring = _generate_etree_functions(
  102. DefusedXMLParser, _TreeBuilder, _parse, _iterparse
  103. )
  104. XML = fromstring
  105. __all__ = [
  106. "ParseError",
  107. "XML",
  108. "XMLParse",
  109. "XMLParser",
  110. "XMLTreeBuilder",
  111. "fromstring",
  112. "iterparse",
  113. "parse",
  114. "tostring",
  115. ]