__init__.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. import packaging.version
  4. from bleach.linkifier import (
  5. DEFAULT_CALLBACKS,
  6. Linker,
  7. )
  8. from bleach.sanitizer import (
  9. ALLOWED_ATTRIBUTES,
  10. ALLOWED_PROTOCOLS,
  11. ALLOWED_STYLES,
  12. ALLOWED_TAGS,
  13. Cleaner,
  14. )
  15. # yyyymmdd
  16. __releasedate__ = '20200429'
  17. # x.y.z or x.y.z.dev0 -- semver
  18. __version__ = '3.1.5'
  19. VERSION = packaging.version.Version(__version__)
  20. __all__ = ['clean', 'linkify']
  21. def clean(text, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES,
  22. styles=ALLOWED_STYLES, protocols=ALLOWED_PROTOCOLS, strip=False,
  23. strip_comments=True):
  24. """Clean an HTML fragment of malicious content and return it
  25. This function is a security-focused function whose sole purpose is to
  26. remove malicious content from a string such that it can be displayed as
  27. content in a web page.
  28. This function is not designed to use to transform content to be used in
  29. non-web-page contexts.
  30. Example::
  31. import bleach
  32. better_text = bleach.clean(yucky_text)
  33. .. Note::
  34. If you're cleaning a lot of text and passing the same argument values or
  35. you want more configurability, consider using a
  36. :py:class:`bleach.sanitizer.Cleaner` instance.
  37. :arg str text: the text to clean
  38. :arg list tags: allowed list of tags; defaults to
  39. ``bleach.sanitizer.ALLOWED_TAGS``
  40. :arg dict attributes: allowed attributes; can be a callable, list or dict;
  41. defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
  42. :arg list styles: allowed list of css styles; defaults to
  43. ``bleach.sanitizer.ALLOWED_STYLES``
  44. :arg list protocols: allowed list of protocols for links; defaults
  45. to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
  46. :arg bool strip: whether or not to strip disallowed elements
  47. :arg bool strip_comments: whether or not to strip HTML comments
  48. :returns: cleaned text as unicode
  49. """
  50. cleaner = Cleaner(
  51. tags=tags,
  52. attributes=attributes,
  53. styles=styles,
  54. protocols=protocols,
  55. strip=strip,
  56. strip_comments=strip_comments,
  57. )
  58. return cleaner.clean(text)
  59. def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False):
  60. """Convert URL-like strings in an HTML fragment to links
  61. This function converts strings that look like URLs, domain names and email
  62. addresses in text that may be an HTML fragment to links, while preserving:
  63. 1. links already in the string
  64. 2. urls found in attributes
  65. 3. email addresses
  66. linkify does a best-effort approach and tries to recover from bad
  67. situations due to crazy text.
  68. .. Note::
  69. If you're linking a lot of text and passing the same argument values or
  70. you want more configurability, consider using a
  71. :py:class:`bleach.linkifier.Linker` instance.
  72. .. Note::
  73. If you have text that you want to clean and then linkify, consider using
  74. the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean
  75. pass. That way you're not parsing the HTML twice.
  76. :arg str text: the text to linkify
  77. :arg list callbacks: list of callbacks to run when adjusting tag attributes;
  78. defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
  79. :arg list skip_tags: list of tags that you don't want to linkify the
  80. contents of; for example, you could set this to ``['pre']`` to skip
  81. linkifying contents of ``pre`` tags
  82. :arg bool parse_email: whether or not to linkify email addresses
  83. :returns: linkified text as unicode
  84. """
  85. linker = Linker(
  86. callbacks=callbacks,
  87. skip_tags=skip_tags,
  88. parse_email=parse_email
  89. )
  90. return linker.linkify(text)