lxml.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. # defusedxml
  2. #
  3. # Copyright (c) 2013 by Christian Heimes <christian@python.org>
  4. # Licensed to PSF under a Contributor Agreement.
  5. # See https://www.python.org/psf/license for licensing details.
  6. """DEPRECATED Example code for lxml.etree protection
  7. The code has NO protection against decompression bombs.
  8. """
  9. from __future__ import print_function, absolute_import
  10. import threading
  11. import warnings
  12. from lxml import etree as _etree
  13. from .common import DTDForbidden, EntitiesForbidden, NotSupportedError
  14. LXML3 = _etree.LXML_VERSION[0] >= 3
  15. __origin__ = "lxml.etree"
  16. tostring = _etree.tostring
  17. warnings.warn(
  18. "defusedxml.lxml is no longer supported and will be removed in a " "future release.",
  19. category=DeprecationWarning,
  20. stacklevel=2,
  21. )
  22. class RestrictedElement(_etree.ElementBase):
  23. """A restricted Element class that filters out instances of some classes
  24. """
  25. __slots__ = ()
  26. # blacklist = (etree._Entity, etree._ProcessingInstruction, etree._Comment)
  27. blacklist = _etree._Entity
  28. def _filter(self, iterator):
  29. blacklist = self.blacklist
  30. for child in iterator:
  31. if isinstance(child, blacklist):
  32. continue
  33. yield child
  34. def __iter__(self):
  35. iterator = super(RestrictedElement, self).__iter__()
  36. return self._filter(iterator)
  37. def iterchildren(self, tag=None, reversed=False):
  38. iterator = super(RestrictedElement, self).iterchildren(tag=tag, reversed=reversed)
  39. return self._filter(iterator)
  40. def iter(self, tag=None, *tags):
  41. iterator = super(RestrictedElement, self).iter(tag=tag, *tags)
  42. return self._filter(iterator)
  43. def iterdescendants(self, tag=None, *tags):
  44. iterator = super(RestrictedElement, self).iterdescendants(tag=tag, *tags)
  45. return self._filter(iterator)
  46. def itersiblings(self, tag=None, preceding=False):
  47. iterator = super(RestrictedElement, self).itersiblings(tag=tag, preceding=preceding)
  48. return self._filter(iterator)
  49. def getchildren(self):
  50. iterator = super(RestrictedElement, self).__iter__()
  51. return list(self._filter(iterator))
  52. def getiterator(self, tag=None):
  53. iterator = super(RestrictedElement, self).getiterator(tag)
  54. return self._filter(iterator)
  55. class GlobalParserTLS(threading.local):
  56. """Thread local context for custom parser instances
  57. """
  58. parser_config = {
  59. "resolve_entities": False,
  60. # 'remove_comments': True,
  61. # 'remove_pis': True,
  62. }
  63. element_class = RestrictedElement
  64. def createDefaultParser(self):
  65. parser = _etree.XMLParser(**self.parser_config)
  66. element_class = self.element_class
  67. if self.element_class is not None:
  68. lookup = _etree.ElementDefaultClassLookup(element=element_class)
  69. parser.set_element_class_lookup(lookup)
  70. return parser
  71. def setDefaultParser(self, parser):
  72. self._default_parser = parser
  73. def getDefaultParser(self):
  74. parser = getattr(self, "_default_parser", None)
  75. if parser is None:
  76. parser = self.createDefaultParser()
  77. self.setDefaultParser(parser)
  78. return parser
  79. _parser_tls = GlobalParserTLS()
  80. getDefaultParser = _parser_tls.getDefaultParser
  81. def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True):
  82. """Check docinfo of an element tree for DTD and entity declarations
  83. The check for entity declarations needs lxml 3 or newer. lxml 2.x does
  84. not support dtd.iterentities().
  85. """
  86. docinfo = elementtree.docinfo
  87. if docinfo.doctype:
  88. if forbid_dtd:
  89. raise DTDForbidden(docinfo.doctype, docinfo.system_url, docinfo.public_id)
  90. if forbid_entities and not LXML3:
  91. # lxml < 3 has no iterentities()
  92. raise NotSupportedError("Unable to check for entity declarations " "in lxml 2.x")
  93. if forbid_entities:
  94. for dtd in docinfo.internalDTD, docinfo.externalDTD:
  95. if dtd is None:
  96. continue
  97. for entity in dtd.iterentities():
  98. raise EntitiesForbidden(entity.name, entity.content, None, None, None, None)
  99. def parse(source, parser=None, base_url=None, forbid_dtd=False, forbid_entities=True):
  100. if parser is None:
  101. parser = getDefaultParser()
  102. elementtree = _etree.parse(source, parser, base_url=base_url)
  103. check_docinfo(elementtree, forbid_dtd, forbid_entities)
  104. return elementtree
  105. def fromstring(text, parser=None, base_url=None, forbid_dtd=False, forbid_entities=True):
  106. if parser is None:
  107. parser = getDefaultParser()
  108. rootelement = _etree.fromstring(text, parser, base_url=base_url)
  109. elementtree = rootelement.getroottree()
  110. check_docinfo(elementtree, forbid_dtd, forbid_entities)
  111. return rootelement
  112. XML = fromstring
  113. def iterparse(*args, **kwargs):
  114. raise NotSupportedError("defused lxml.etree.iterparse not available")