123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703 |
- """Implementation of JSONEncoder
- """
- from __future__ import absolute_import
- import re
- from operator import itemgetter
- # Do not import Decimal directly to avoid reload issues
- import decimal
- from .compat import u, unichr, binary_type, text_type, string_types, integer_types, PY3
- def _import_speedups():
- try:
- from . import _speedups
- return _speedups.encode_basestring_ascii, _speedups.make_encoder
- except ImportError:
- return None, None
- c_encode_basestring_ascii, c_make_encoder = _import_speedups()
- from simplejson.decoder import PosInf
- #ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
- # This is required because u() will mangle the string and ur'' isn't valid
- # python3 syntax
- ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
- ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
- HAS_UTF8 = re.compile(r'[\x80-\xff]')
- ESCAPE_DCT = {
- '\\': '\\\\',
- '"': '\\"',
- '\b': '\\b',
- '\f': '\\f',
- '\n': '\\n',
- '\r': '\\r',
- '\t': '\\t',
- }
- for i in range(0x20):
- #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
- ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
- for i in [0x2028, 0x2029]:
- ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,))
- FLOAT_REPR = repr
- class RawJSON(object):
- """Wrap an encoded JSON document for direct embedding in the output
- """
- def __init__(self, encoded_json):
- self.encoded_json = encoded_json
- def encode_basestring(s, _PY3=PY3, _q=u('"')):
- """Return a JSON representation of a Python string
- """
- if _PY3:
- if isinstance(s, binary_type):
- s = s.decode('utf-8')
- if type(s) is not text_type:
- s = text_type(s)
- else:
- if isinstance(s, str) and HAS_UTF8.search(s) is not None:
- s = s.decode('utf-8')
- if type(s) not in string_types:
- s = text_type(s)
- def replace(match):
- return ESCAPE_DCT[match.group(0)]
- return _q + ESCAPE.sub(replace, s) + _q
- def py_encode_basestring_ascii(s, _PY3=PY3):
- """Return an ASCII-only JSON representation of a Python string
- """
- if _PY3:
- if isinstance(s, binary_type):
- s = s.decode('utf-8')
- if type(s) is not text_type:
- s = text_type(s)
- else:
- if isinstance(s, str) and HAS_UTF8.search(s) is not None:
- s = s.decode('utf-8')
- if type(s) not in string_types:
- s = text_type(s)
- def replace(match):
- s = match.group(0)
- try:
- return ESCAPE_DCT[s]
- except KeyError:
- n = ord(s)
- if n < 0x10000:
- #return '\\u{0:04x}'.format(n)
- return '\\u%04x' % (n,)
- else:
- # surrogate pair
- n -= 0x10000
- s1 = 0xd800 | ((n >> 10) & 0x3ff)
- s2 = 0xdc00 | (n & 0x3ff)
- #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
- return '\\u%04x\\u%04x' % (s1, s2)
- return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
- encode_basestring_ascii = (
- c_encode_basestring_ascii or py_encode_basestring_ascii)
- class JSONEncoder(object):
- """Extensible JSON <http://json.org> encoder for Python data structures.
- Supports the following objects and types by default:
- +-------------------+---------------+
- | Python | JSON |
- +===================+===============+
- | dict, namedtuple | object |
- +-------------------+---------------+
- | list, tuple | array |
- +-------------------+---------------+
- | str, unicode | string |
- +-------------------+---------------+
- | int, long, float | number |
- +-------------------+---------------+
- | True | true |
- +-------------------+---------------+
- | False | false |
- +-------------------+---------------+
- | None | null |
- +-------------------+---------------+
- To extend this to recognize other objects, subclass and implement a
- ``.default()`` method with another method that returns a serializable
- object for ``o`` if possible, otherwise it should call the superclass
- implementation (to raise ``TypeError``).
- """
- item_separator = ', '
- key_separator = ': '
- def __init__(self, skipkeys=False, ensure_ascii=True,
- check_circular=True, allow_nan=True, sort_keys=False,
- indent=None, separators=None, encoding='utf-8', default=None,
- use_decimal=True, namedtuple_as_object=True,
- tuple_as_array=True, bigint_as_string=False,
- item_sort_key=None, for_json=False, ignore_nan=False,
- int_as_string_bitcount=None, iterable_as_array=False):
- """Constructor for JSONEncoder, with sensible defaults.
- If skipkeys is false, then it is a TypeError to attempt
- encoding of keys that are not str, int, long, float or None. If
- skipkeys is True, such items are simply skipped.
- If ensure_ascii is true, the output is guaranteed to be str
- objects with all incoming unicode characters escaped. If
- ensure_ascii is false, the output will be unicode object.
- If check_circular is true, then lists, dicts, and custom encoded
- objects will be checked for circular references during encoding to
- prevent an infinite recursion (which would cause an OverflowError).
- Otherwise, no such check takes place.
- If allow_nan is true, then NaN, Infinity, and -Infinity will be
- encoded as such. This behavior is not JSON specification compliant,
- but is consistent with most JavaScript based encoders and decoders.
- Otherwise, it will be a ValueError to encode such floats.
- If sort_keys is true, then the output of dictionaries will be
- sorted by key; this is useful for regression tests to ensure
- that JSON serializations can be compared on a day-to-day basis.
- If indent is a string, then JSON array elements and object members
- will be pretty-printed with a newline followed by that string repeated
- for each level of nesting. ``None`` (the default) selects the most compact
- representation without any newlines. For backwards compatibility with
- versions of simplejson earlier than 2.1.0, an integer is also accepted
- and is converted to a string with that many spaces.
- If specified, separators should be an (item_separator, key_separator)
- tuple. The default is (', ', ': ') if *indent* is ``None`` and
- (',', ': ') otherwise. To get the most compact JSON representation,
- you should specify (',', ':') to eliminate whitespace.
- If specified, default is a function that gets called for objects
- that can't otherwise be serialized. It should return a JSON encodable
- version of the object or raise a ``TypeError``.
- If encoding is not None, then all input strings will be
- transformed into unicode using that encoding prior to JSON-encoding.
- The default is UTF-8.
- If use_decimal is true (default: ``True``), ``decimal.Decimal`` will
- be supported directly by the encoder. For the inverse, decode JSON
- with ``parse_float=decimal.Decimal``.
- If namedtuple_as_object is true (the default), objects with
- ``_asdict()`` methods will be encoded as JSON objects.
- If tuple_as_array is true (the default), tuple (and subclasses) will
- be encoded as JSON arrays.
- If *iterable_as_array* is true (default: ``False``),
- any object not in the above table that implements ``__iter__()``
- will be encoded as a JSON array.
- If bigint_as_string is true (not the default), ints 2**53 and higher
- or lower than -2**53 will be encoded as strings. This is to avoid the
- rounding that happens in Javascript otherwise.
- If int_as_string_bitcount is a positive number (n), then int of size
- greater than or equal to 2**n or lower than or equal to -2**n will be
- encoded as strings.
- If specified, item_sort_key is a callable used to sort the items in
- each dictionary. This is useful if you want to sort items other than
- in alphabetical order by key.
- If for_json is true (not the default), objects with a ``for_json()``
- method will use the return value of that method for encoding as JSON
- instead of the object.
- If *ignore_nan* is true (default: ``False``), then out of range
- :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized
- as ``null`` in compliance with the ECMA-262 specification. If true,
- this will override *allow_nan*.
- """
- self.skipkeys = skipkeys
- self.ensure_ascii = ensure_ascii
- self.check_circular = check_circular
- self.allow_nan = allow_nan
- self.sort_keys = sort_keys
- self.use_decimal = use_decimal
- self.namedtuple_as_object = namedtuple_as_object
- self.tuple_as_array = tuple_as_array
- self.iterable_as_array = iterable_as_array
- self.bigint_as_string = bigint_as_string
- self.item_sort_key = item_sort_key
- self.for_json = for_json
- self.ignore_nan = ignore_nan
- self.int_as_string_bitcount = int_as_string_bitcount
- if indent is not None and not isinstance(indent, string_types):
- indent = indent * ' '
- self.indent = indent
- if separators is not None:
- self.item_separator, self.key_separator = separators
- elif indent is not None:
- self.item_separator = ','
- if default is not None:
- self.default = default
- self.encoding = encoding
- def default(self, o):
- """Implement this method in a subclass such that it returns
- a serializable object for ``o``, or calls the base implementation
- (to raise a ``TypeError``).
- For example, to support arbitrary iterators, you could
- implement default like this::
- def default(self, o):
- try:
- iterable = iter(o)
- except TypeError:
- pass
- else:
- return list(iterable)
- return JSONEncoder.default(self, o)
- """
- raise TypeError(repr(o) + " is not JSON serializable")
- def encode(self, o):
- """Return a JSON string representation of a Python data structure.
- >>> from simplejson import JSONEncoder
- >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
- '{"foo": ["bar", "baz"]}'
- """
- # This is for extremely simple cases and benchmarks.
- if isinstance(o, binary_type):
- _encoding = self.encoding
- if (_encoding is not None and not (_encoding == 'utf-8')):
- o = o.decode(_encoding)
- if isinstance(o, string_types):
- if self.ensure_ascii:
- return encode_basestring_ascii(o)
- else:
- return encode_basestring(o)
- # This doesn't pass the iterator directly to ''.join() because the
- # exceptions aren't as detailed. The list call should be roughly
- # equivalent to the PySequence_Fast that ''.join() would do.
- chunks = self.iterencode(o, _one_shot=True)
- if not isinstance(chunks, (list, tuple)):
- chunks = list(chunks)
- if self.ensure_ascii:
- return ''.join(chunks)
- else:
- return u''.join(chunks)
- def iterencode(self, o, _one_shot=False):
- """Encode the given object and yield each string
- representation as available.
- For example::
- for chunk in JSONEncoder().iterencode(bigobject):
- mysocket.write(chunk)
- """
- if self.check_circular:
- markers = {}
- else:
- markers = None
- if self.ensure_ascii:
- _encoder = encode_basestring_ascii
- else:
- _encoder = encode_basestring
- if self.encoding != 'utf-8':
- def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
- if isinstance(o, binary_type):
- o = o.decode(_encoding)
- return _orig_encoder(o)
- def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
- _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
- # Check for specials. Note that this type of test is processor
- # and/or platform-specific, so do tests which don't depend on
- # the internals.
- if o != o:
- text = 'NaN'
- elif o == _inf:
- text = 'Infinity'
- elif o == _neginf:
- text = '-Infinity'
- else:
- if type(o) != float:
- # See #118, do not trust custom str/repr
- o = float(o)
- return _repr(o)
- if ignore_nan:
- text = 'null'
- elif not allow_nan:
- raise ValueError(
- "Out of range float values are not JSON compliant: " +
- repr(o))
- return text
- key_memo = {}
- int_as_string_bitcount = (
- 53 if self.bigint_as_string else self.int_as_string_bitcount)
- if (_one_shot and c_make_encoder is not None
- and self.indent is None):
- _iterencode = c_make_encoder(
- markers, self.default, _encoder, self.indent,
- self.key_separator, self.item_separator, self.sort_keys,
- self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
- self.namedtuple_as_object, self.tuple_as_array,
- int_as_string_bitcount,
- self.item_sort_key, self.encoding, self.for_json,
- self.ignore_nan, decimal.Decimal, self.iterable_as_array)
- else:
- _iterencode = _make_iterencode(
- markers, self.default, _encoder, self.indent, floatstr,
- self.key_separator, self.item_separator, self.sort_keys,
- self.skipkeys, _one_shot, self.use_decimal,
- self.namedtuple_as_object, self.tuple_as_array,
- int_as_string_bitcount,
- self.item_sort_key, self.encoding, self.for_json,
- self.iterable_as_array, Decimal=decimal.Decimal)
- try:
- return _iterencode(o, 0)
- finally:
- key_memo.clear()
- class JSONEncoderForHTML(JSONEncoder):
- """An encoder that produces JSON safe to embed in HTML.
- To embed JSON content in, say, a script tag on a web page, the
- characters &, < and > should be escaped. They cannot be escaped
- with the usual entities (e.g. &) because they are not expanded
- within <script> tags.
- """
- def encode(self, o):
- # Override JSONEncoder.encode because it has hacks for
- # performance that make things more complicated.
- chunks = self.iterencode(o, True)
- if self.ensure_ascii:
- return ''.join(chunks)
- else:
- return u''.join(chunks)
- def iterencode(self, o, _one_shot=False):
- chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
- for chunk in chunks:
- chunk = chunk.replace('&', '\\u0026')
- chunk = chunk.replace('<', '\\u003c')
- chunk = chunk.replace('>', '\\u003e')
- yield chunk
- def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
- _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
- _use_decimal, _namedtuple_as_object, _tuple_as_array,
- _int_as_string_bitcount, _item_sort_key,
- _encoding,_for_json,
- _iterable_as_array,
- ## HACK: hand-optimized bytecode; turn globals into locals
- _PY3=PY3,
- ValueError=ValueError,
- string_types=string_types,
- Decimal=None,
- dict=dict,
- float=float,
- id=id,
- integer_types=integer_types,
- isinstance=isinstance,
- list=list,
- str=str,
- tuple=tuple,
- iter=iter,
- ):
- if _use_decimal and Decimal is None:
- Decimal = decimal.Decimal
- if _item_sort_key and not callable(_item_sort_key):
- raise TypeError("item_sort_key must be None or callable")
- elif _sort_keys and not _item_sort_key:
- _item_sort_key = itemgetter(0)
- if (_int_as_string_bitcount is not None and
- (_int_as_string_bitcount <= 0 or
- not isinstance(_int_as_string_bitcount, integer_types))):
- raise TypeError("int_as_string_bitcount must be a positive integer")
- def _encode_int(value):
- skip_quoting = (
- _int_as_string_bitcount is None
- or
- _int_as_string_bitcount < 1
- )
- if type(value) not in integer_types:
- # See #118, do not trust custom str/repr
- value = int(value)
- if (
- skip_quoting or
- (-1 << _int_as_string_bitcount)
- < value <
- (1 << _int_as_string_bitcount)
- ):
- return str(value)
- return '"' + str(value) + '"'
- def _iterencode_list(lst, _current_indent_level):
- if not lst:
- yield '[]'
- return
- if markers is not None:
- markerid = id(lst)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = lst
- buf = '['
- if _indent is not None:
- _current_indent_level += 1
- newline_indent = '\n' + (_indent * _current_indent_level)
- separator = _item_separator + newline_indent
- buf += newline_indent
- else:
- newline_indent = None
- separator = _item_separator
- first = True
- for value in lst:
- if first:
- first = False
- else:
- buf = separator
- if (isinstance(value, string_types) or
- (_PY3 and isinstance(value, binary_type))):
- yield buf + _encoder(value)
- elif isinstance(value, RawJSON):
- yield buf + value.encoded_json
- elif value is None:
- yield buf + 'null'
- elif value is True:
- yield buf + 'true'
- elif value is False:
- yield buf + 'false'
- elif isinstance(value, integer_types):
- yield buf + _encode_int(value)
- elif isinstance(value, float):
- yield buf + _floatstr(value)
- elif _use_decimal and isinstance(value, Decimal):
- yield buf + str(value)
- else:
- yield buf
- for_json = _for_json and getattr(value, 'for_json', None)
- if for_json and callable(for_json):
- chunks = _iterencode(for_json(), _current_indent_level)
- elif isinstance(value, list):
- chunks = _iterencode_list(value, _current_indent_level)
- else:
- _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
- if _asdict and callable(_asdict):
- chunks = _iterencode_dict(_asdict(),
- _current_indent_level)
- elif _tuple_as_array and isinstance(value, tuple):
- chunks = _iterencode_list(value, _current_indent_level)
- elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
- else:
- chunks = _iterencode(value, _current_indent_level)
- for chunk in chunks:
- yield chunk
- if first:
- # iterable_as_array misses the fast path at the top
- yield '[]'
- else:
- if newline_indent is not None:
- _current_indent_level -= 1
- yield '\n' + (_indent * _current_indent_level)
- yield ']'
- if markers is not None:
- del markers[markerid]
- def _stringify_key(key):
- if isinstance(key, string_types): # pragma: no cover
- pass
- elif isinstance(key, binary_type):
- key = key.decode(_encoding)
- elif isinstance(key, float):
- key = _floatstr(key)
- elif key is True:
- key = 'true'
- elif key is False:
- key = 'false'
- elif key is None:
- key = 'null'
- elif isinstance(key, integer_types):
- if type(key) not in integer_types:
- # See #118, do not trust custom str/repr
- key = int(key)
- key = str(key)
- elif _use_decimal and isinstance(key, Decimal):
- key = str(key)
- elif _skipkeys:
- key = None
- else:
- raise TypeError("key " + repr(key) + " is not a string")
- return key
- def _iterencode_dict(dct, _current_indent_level):
- if not dct:
- yield '{}'
- return
- if markers is not None:
- markerid = id(dct)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = dct
- yield '{'
- if _indent is not None:
- _current_indent_level += 1
- newline_indent = '\n' + (_indent * _current_indent_level)
- item_separator = _item_separator + newline_indent
- yield newline_indent
- else:
- newline_indent = None
- item_separator = _item_separator
- first = True
- if _PY3:
- iteritems = dct.items()
- else:
- iteritems = dct.iteritems()
- if _item_sort_key:
- items = []
- for k, v in dct.items():
- if not isinstance(k, string_types):
- k = _stringify_key(k)
- if k is None:
- continue
- items.append((k, v))
- items.sort(key=_item_sort_key)
- else:
- items = iteritems
- for key, value in items:
- if not (_item_sort_key or isinstance(key, string_types)):
- key = _stringify_key(key)
- if key is None:
- # _skipkeys must be True
- continue
- if first:
- first = False
- else:
- yield item_separator
- yield _encoder(key)
- yield _key_separator
- if (isinstance(value, string_types) or
- (_PY3 and isinstance(value, binary_type))):
- yield _encoder(value)
- elif isinstance(value, RawJSON):
- yield value.encoded_json
- elif value is None:
- yield 'null'
- elif value is True:
- yield 'true'
- elif value is False:
- yield 'false'
- elif isinstance(value, integer_types):
- yield _encode_int(value)
- elif isinstance(value, float):
- yield _floatstr(value)
- elif _use_decimal and isinstance(value, Decimal):
- yield str(value)
- else:
- for_json = _for_json and getattr(value, 'for_json', None)
- if for_json and callable(for_json):
- chunks = _iterencode(for_json(), _current_indent_level)
- elif isinstance(value, list):
- chunks = _iterencode_list(value, _current_indent_level)
- else:
- _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
- if _asdict and callable(_asdict):
- chunks = _iterencode_dict(_asdict(),
- _current_indent_level)
- elif _tuple_as_array and isinstance(value, tuple):
- chunks = _iterencode_list(value, _current_indent_level)
- elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
- else:
- chunks = _iterencode(value, _current_indent_level)
- for chunk in chunks:
- yield chunk
- if newline_indent is not None:
- _current_indent_level -= 1
- yield '\n' + (_indent * _current_indent_level)
- yield '}'
- if markers is not None:
- del markers[markerid]
- def _iterencode(o, _current_indent_level):
- if (isinstance(o, string_types) or
- (_PY3 and isinstance(o, binary_type))):
- yield _encoder(o)
- elif isinstance(o, RawJSON):
- yield o.encoded_json
- elif o is None:
- yield 'null'
- elif o is True:
- yield 'true'
- elif o is False:
- yield 'false'
- elif isinstance(o, integer_types):
- yield _encode_int(o)
- elif isinstance(o, float):
- yield _floatstr(o)
- else:
- for_json = _for_json and getattr(o, 'for_json', None)
- if for_json and callable(for_json):
- for chunk in _iterencode(for_json(), _current_indent_level):
- yield chunk
- elif isinstance(o, list):
- for chunk in _iterencode_list(o, _current_indent_level):
- yield chunk
- else:
- _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
- if _asdict and callable(_asdict):
- for chunk in _iterencode_dict(_asdict(),
- _current_indent_level):
- yield chunk
- elif (_tuple_as_array and isinstance(o, tuple)):
- for chunk in _iterencode_list(o, _current_indent_level):
- yield chunk
- elif isinstance(o, dict):
- for chunk in _iterencode_dict(o, _current_indent_level):
- yield chunk
- elif _use_decimal and isinstance(o, Decimal):
- yield str(o)
- else:
- while _iterable_as_array:
- # Markers are not checked here because it is valid for
- # an iterable to return self.
- try:
- o = iter(o)
- except TypeError:
- break
- for chunk in _iterencode_list(o, _current_indent_level):
- yield chunk
- return
- if markers is not None:
- markerid = id(o)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = o
- o = _default(o)
- for chunk in _iterencode(o, _current_indent_level):
- yield chunk
- if markers is not None:
- del markers[markerid]
- return _iterencode
|