123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- # -*- coding: utf-8 -*-
- from __future__ import unicode_literals
- import packaging.version
- from bleach.linkifier import (
- DEFAULT_CALLBACKS,
- Linker,
- )
- from bleach.sanitizer import (
- ALLOWED_ATTRIBUTES,
- ALLOWED_PROTOCOLS,
- ALLOWED_STYLES,
- ALLOWED_TAGS,
- Cleaner,
- )
- # yyyymmdd
- __releasedate__ = '20200429'
- # x.y.z or x.y.z.dev0 -- semver
- __version__ = '3.1.5'
- VERSION = packaging.version.Version(__version__)
- __all__ = ['clean', 'linkify']
- def clean(text, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES,
- styles=ALLOWED_STYLES, protocols=ALLOWED_PROTOCOLS, strip=False,
- strip_comments=True):
- """Clean an HTML fragment of malicious content and return it
- This function is a security-focused function whose sole purpose is to
- remove malicious content from a string such that it can be displayed as
- content in a web page.
- This function is not designed to use to transform content to be used in
- non-web-page contexts.
- Example::
- import bleach
- better_text = bleach.clean(yucky_text)
- .. Note::
- If you're cleaning a lot of text and passing the same argument values or
- you want more configurability, consider using a
- :py:class:`bleach.sanitizer.Cleaner` instance.
- :arg str text: the text to clean
- :arg list tags: allowed list of tags; defaults to
- ``bleach.sanitizer.ALLOWED_TAGS``
- :arg dict attributes: allowed attributes; can be a callable, list or dict;
- defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
- :arg list styles: allowed list of css styles; defaults to
- ``bleach.sanitizer.ALLOWED_STYLES``
- :arg list protocols: allowed list of protocols for links; defaults
- to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
- :arg bool strip: whether or not to strip disallowed elements
- :arg bool strip_comments: whether or not to strip HTML comments
- :returns: cleaned text as unicode
- """
- cleaner = Cleaner(
- tags=tags,
- attributes=attributes,
- styles=styles,
- protocols=protocols,
- strip=strip,
- strip_comments=strip_comments,
- )
- return cleaner.clean(text)
- def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False):
- """Convert URL-like strings in an HTML fragment to links
- This function converts strings that look like URLs, domain names and email
- addresses in text that may be an HTML fragment to links, while preserving:
- 1. links already in the string
- 2. urls found in attributes
- 3. email addresses
- linkify does a best-effort approach and tries to recover from bad
- situations due to crazy text.
- .. Note::
- If you're linking a lot of text and passing the same argument values or
- you want more configurability, consider using a
- :py:class:`bleach.linkifier.Linker` instance.
- .. Note::
- If you have text that you want to clean and then linkify, consider using
- the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean
- pass. That way you're not parsing the HTML twice.
- :arg str text: the text to linkify
- :arg list callbacks: list of callbacks to run when adjusting tag attributes;
- defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
- :arg list skip_tags: list of tags that you don't want to linkify the
- contents of; for example, you could set this to ``['pre']`` to skip
- linkifying contents of ``pre`` tags
- :arg bool parse_email: whether or not to linkify email addresses
- :returns: linkified text as unicode
- """
- linker = Linker(
- callbacks=callbacks,
- skip_tags=skip_tags,
- parse_email=parse_email
- )
- return linker.linkify(text)
|