123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863 |
- # Copyright 2009-present MongoDB, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """Tools for using Python's :mod:`json` module with BSON documents.
- This module provides two helper methods `dumps` and `loads` that wrap the
- native :mod:`json` methods and provide explicit BSON conversion to and from
- JSON. :class:`~bson.json_util.JSONOptions` provides a way to control how JSON
- is emitted and parsed, with the default being the legacy PyMongo format.
- :mod:`~bson.json_util` can also generate Canonical or Relaxed `Extended JSON`_
- when :const:`CANONICAL_JSON_OPTIONS` or :const:`RELAXED_JSON_OPTIONS` is
- provided, respectively.
- .. _Extended JSON: https://github.com/mongodb/specifications/blob/master/source/extended-json.rst
- Example usage (deserialization):
- .. doctest::
- >>> from bson.json_util import loads
- >>> loads('[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "80", "$binary": "AQIDBA=="}}]')
- [{u'foo': [1, 2]}, {u'bar': {u'hello': u'world'}}, {u'code': Code('function x() { return 1; }', {})}, {u'bin': Binary('...', 128)}]
- Example usage (serialization):
- .. doctest::
- >>> from bson import Binary, Code
- >>> from bson.json_util import dumps
- >>> dumps([{'foo': [1, 2]},
- ... {'bar': {'hello': 'world'}},
- ... {'code': Code("function x() { return 1; }", {})},
- ... {'bin': Binary(b"\x01\x02\x03\x04")}])
- '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }", "$scope": {}}}, {"bin": {"$binary": "AQIDBA==", "$type": "00"}}]'
- Example usage (with :const:`CANONICAL_JSON_OPTIONS`):
- .. doctest::
- >>> from bson import Binary, Code
- >>> from bson.json_util import dumps, CANONICAL_JSON_OPTIONS
- >>> dumps([{'foo': [1, 2]},
- ... {'bar': {'hello': 'world'}},
- ... {'code': Code("function x() { return 1; }")},
- ... {'bin': Binary(b"\x01\x02\x03\x04")}],
- ... json_options=CANONICAL_JSON_OPTIONS)
- '[{"foo": [{"$numberInt": "1"}, {"$numberInt": "2"}]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]'
- Example usage (with :const:`RELAXED_JSON_OPTIONS`):
- .. doctest::
- >>> from bson import Binary, Code
- >>> from bson.json_util import dumps, RELAXED_JSON_OPTIONS
- >>> dumps([{'foo': [1, 2]},
- ... {'bar': {'hello': 'world'}},
- ... {'code': Code("function x() { return 1; }")},
- ... {'bin': Binary(b"\x01\x02\x03\x04")}],
- ... json_options=RELAXED_JSON_OPTIONS)
- '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]'
- Alternatively, you can manually pass the `default` to :func:`json.dumps`.
- It won't handle :class:`~bson.binary.Binary` and :class:`~bson.code.Code`
- instances (as they are extended strings you can't provide custom defaults),
- but it will be faster as there is less recursion.
- .. note::
- If your application does not need the flexibility offered by
- :class:`JSONOptions` and spends a large amount of time in the `json_util`
- module, look to
- `python-bsonjs <https://pypi.python.org/pypi/python-bsonjs>`_ for a nice
- performance improvement. `python-bsonjs` is a fast BSON to MongoDB
- Extended JSON converter for Python built on top of
- `libbson <https://github.com/mongodb/libbson>`_. `python-bsonjs` works best
- with PyMongo when using :class:`~bson.raw_bson.RawBSONDocument`.
- .. versionchanged:: 2.8
- The output format for :class:`~bson.timestamp.Timestamp` has changed from
- '{"t": <int>, "i": <int>}' to '{"$timestamp": {"t": <int>, "i": <int>}}'.
- This new format will be decoded to an instance of
- :class:`~bson.timestamp.Timestamp`. The old format will continue to be
- decoded to a python dict as before. Encoding to the old format is no longer
- supported as it was never correct and loses type information.
- Added support for $numberLong and $undefined - new in MongoDB 2.6 - and
- parsing $date in ISO-8601 format.
- .. versionchanged:: 2.7
- Preserves order when rendering SON, Timestamp, Code, Binary, and DBRef
- instances.
- .. versionchanged:: 2.3
- Added dumps and loads helpers to automatically handle conversion to and
- from json and supports :class:`~bson.binary.Binary` and
- :class:`~bson.code.Code`
- """
- import base64
- import datetime
- import json
- import math
- import re
- import uuid
- from pymongo.errors import ConfigurationError
- import bson
- from bson import EPOCH_AWARE, RE_TYPE, SON
- from bson.binary import (Binary, UuidRepresentation, ALL_UUID_SUBTYPES,
- UUID_SUBTYPE)
- from bson.code import Code
- from bson.codec_options import CodecOptions
- from bson.dbref import DBRef
- from bson.decimal128 import Decimal128
- from bson.int64 import Int64
- from bson.max_key import MaxKey
- from bson.min_key import MinKey
- from bson.objectid import ObjectId
- from bson.py3compat import (PY3, iteritems, integer_types, string_type,
- text_type)
- from bson.regex import Regex
- from bson.timestamp import Timestamp
- from bson.tz_util import utc
- _RE_OPT_TABLE = {
- "i": re.I,
- "l": re.L,
- "m": re.M,
- "s": re.S,
- "u": re.U,
- "x": re.X,
- }
- # Dollar-prefixed keys which may appear in DBRefs.
- _DBREF_KEYS = frozenset(['$id', '$ref', '$db'])
- class DatetimeRepresentation:
- LEGACY = 0
- """Legacy MongoDB Extended JSON datetime representation.
- :class:`datetime.datetime` instances will be encoded to JSON in the
- format `{"$date": <dateAsMilliseconds>}`, where `dateAsMilliseconds` is
- a 64-bit signed integer giving the number of milliseconds since the Unix
- epoch UTC. This was the default encoding before PyMongo version 3.4.
- .. versionadded:: 3.4
- """
- NUMBERLONG = 1
- """NumberLong datetime representation.
- :class:`datetime.datetime` instances will be encoded to JSON in the
- format `{"$date": {"$numberLong": "<dateAsMilliseconds>"}}`,
- where `dateAsMilliseconds` is the string representation of a 64-bit signed
- integer giving the number of milliseconds since the Unix epoch UTC.
- .. versionadded:: 3.4
- """
- ISO8601 = 2
- """ISO-8601 datetime representation.
- :class:`datetime.datetime` instances greater than or equal to the Unix
- epoch UTC will be encoded to JSON in the format `{"$date": "<ISO-8601>"}`.
- :class:`datetime.datetime` instances before the Unix epoch UTC will be
- encoded as if the datetime representation is
- :const:`~DatetimeRepresentation.NUMBERLONG`.
- .. versionadded:: 3.4
- """
- class JSONMode:
- LEGACY = 0
- """Legacy Extended JSON representation.
- In this mode, :func:`~bson.json_util.dumps` produces PyMongo's legacy
- non-standard JSON output. Consider using
- :const:`~bson.json_util.JSONMode.RELAXED` or
- :const:`~bson.json_util.JSONMode.CANONICAL` instead.
- .. versionadded:: 3.5
- """
- RELAXED = 1
- """Relaxed Extended JSON representation.
- In this mode, :func:`~bson.json_util.dumps` produces Relaxed Extended JSON,
- a mostly JSON-like format. Consider using this for things like a web API,
- where one is sending a document (or a projection of a document) that only
- uses ordinary JSON type primitives. In particular, the ``int``,
- :class:`~bson.int64.Int64`, and ``float`` numeric types are represented in
- the native JSON number format. This output is also the most human readable
- and is useful for debugging and documentation.
- .. seealso:: The specification for Relaxed `Extended JSON`_.
- .. versionadded:: 3.5
- """
- CANONICAL = 2
- """Canonical Extended JSON representation.
- In this mode, :func:`~bson.json_util.dumps` produces Canonical Extended
- JSON, a type preserving format. Consider using this for things like
- testing, where one has to precisely specify expected types in JSON. In
- particular, the ``int``, :class:`~bson.int64.Int64`, and ``float`` numeric
- types are encoded with type wrappers.
- .. seealso:: The specification for Canonical `Extended JSON`_.
- .. versionadded:: 3.5
- """
- class JSONOptions(CodecOptions):
- """Encapsulates JSON options for :func:`dumps` and :func:`loads`.
- :Parameters:
- - `strict_number_long`: If ``True``, :class:`~bson.int64.Int64` objects
- are encoded to MongoDB Extended JSON's *Strict mode* type
- `NumberLong`, ie ``'{"$numberLong": "<number>" }'``. Otherwise they
- will be encoded as an `int`. Defaults to ``False``.
- - `datetime_representation`: The representation to use when encoding
- instances of :class:`datetime.datetime`. Defaults to
- :const:`~DatetimeRepresentation.LEGACY`.
- - `strict_uuid`: If ``True``, :class:`uuid.UUID` object are encoded to
- MongoDB Extended JSON's *Strict mode* type `Binary`. Otherwise it
- will be encoded as ``'{"$uuid": "<hex>" }'``. Defaults to ``False``.
- - `json_mode`: The :class:`JSONMode` to use when encoding BSON types to
- Extended JSON. Defaults to :const:`~JSONMode.LEGACY`.
- - `document_class`: BSON documents returned by :func:`loads` will be
- decoded to an instance of this class. Must be a subclass of
- :class:`collections.MutableMapping`. Defaults to :class:`dict`.
- - `uuid_representation`: The :class:`~bson.binary.UuidRepresentation`
- to use when encoding and decoding instances of :class:`uuid.UUID`.
- Defaults to :const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`.
- - `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type
- `Date` will be decoded to timezone aware instances of
- :class:`datetime.datetime`. Otherwise they will be naive. Defaults
- to ``True``.
- - `tzinfo`: A :class:`datetime.tzinfo` subclass that specifies the
- timezone from which :class:`~datetime.datetime` objects should be
- decoded. Defaults to :const:`~bson.tz_util.utc`.
- - `args`: arguments to :class:`~bson.codec_options.CodecOptions`
- - `kwargs`: arguments to :class:`~bson.codec_options.CodecOptions`
- .. seealso:: The specification for Relaxed and Canonical `Extended JSON`_.
- .. versionadded:: 3.4
- .. versionchanged:: 3.5
- Accepts the optional parameter `json_mode`.
- """
- def __new__(cls, strict_number_long=False,
- datetime_representation=DatetimeRepresentation.LEGACY,
- strict_uuid=False, json_mode=JSONMode.LEGACY,
- *args, **kwargs):
- kwargs["tz_aware"] = kwargs.get("tz_aware", True)
- if kwargs["tz_aware"]:
- kwargs["tzinfo"] = kwargs.get("tzinfo", utc)
- if datetime_representation not in (DatetimeRepresentation.LEGACY,
- DatetimeRepresentation.NUMBERLONG,
- DatetimeRepresentation.ISO8601):
- raise ConfigurationError(
- "JSONOptions.datetime_representation must be one of LEGACY, "
- "NUMBERLONG, or ISO8601 from DatetimeRepresentation.")
- self = super(JSONOptions, cls).__new__(cls, *args, **kwargs)
- if json_mode not in (JSONMode.LEGACY,
- JSONMode.RELAXED,
- JSONMode.CANONICAL):
- raise ConfigurationError(
- "JSONOptions.json_mode must be one of LEGACY, RELAXED, "
- "or CANONICAL from JSONMode.")
- self.json_mode = json_mode
- if self.json_mode == JSONMode.RELAXED:
- self.strict_number_long = False
- self.datetime_representation = DatetimeRepresentation.ISO8601
- self.strict_uuid = True
- elif self.json_mode == JSONMode.CANONICAL:
- self.strict_number_long = True
- self.datetime_representation = DatetimeRepresentation.NUMBERLONG
- self.strict_uuid = True
- else:
- self.strict_number_long = strict_number_long
- self.datetime_representation = datetime_representation
- self.strict_uuid = strict_uuid
- return self
- def _arguments_repr(self):
- return ('strict_number_long=%r, '
- 'datetime_representation=%r, '
- 'strict_uuid=%r, json_mode=%r, %s' % (
- self.strict_number_long,
- self.datetime_representation,
- self.strict_uuid,
- self.json_mode,
- super(JSONOptions, self)._arguments_repr()))
- def _options_dict(self):
- # TODO: PYTHON-2442 use _asdict() instead
- options_dict = super(JSONOptions, self)._options_dict()
- options_dict.update({
- 'strict_number_long': self.strict_number_long,
- 'datetime_representation': self.datetime_representation,
- 'strict_uuid': self.strict_uuid,
- 'json_mode': self.json_mode})
- return options_dict
- def with_options(self, **kwargs):
- """
- Make a copy of this JSONOptions, overriding some options::
- >>> from bson.json_util import CANONICAL_JSON_OPTIONS
- >>> CANONICAL_JSON_OPTIONS.tz_aware
- True
- >>> json_options = CANONICAL_JSON_OPTIONS.with_options(tz_aware=False)
- >>> json_options.tz_aware
- False
- .. versionadded:: 3.12
- """
- opts = self._options_dict()
- for opt in ('strict_number_long', 'datetime_representation',
- 'strict_uuid', 'json_mode'):
- opts[opt] = kwargs.get(opt, getattr(self, opt))
- opts.update(kwargs)
- return JSONOptions(**opts)
- LEGACY_JSON_OPTIONS = JSONOptions(json_mode=JSONMode.LEGACY)
- """:class:`JSONOptions` for encoding to PyMongo's legacy JSON format.
- .. seealso:: The documentation for :const:`bson.json_util.JSONMode.LEGACY`.
- .. versionadded:: 3.5
- """
- DEFAULT_JSON_OPTIONS = LEGACY_JSON_OPTIONS
- """The default :class:`JSONOptions` for JSON encoding/decoding.
- The same as :const:`LEGACY_JSON_OPTIONS`. This will change to
- :const:`RELAXED_JSON_OPTIONS` in a future release.
- .. versionadded:: 3.4
- """
- CANONICAL_JSON_OPTIONS = JSONOptions(json_mode=JSONMode.CANONICAL)
- """:class:`JSONOptions` for Canonical Extended JSON.
- .. seealso:: The documentation for :const:`bson.json_util.JSONMode.CANONICAL`.
- .. versionadded:: 3.5
- """
- RELAXED_JSON_OPTIONS = JSONOptions(json_mode=JSONMode.RELAXED)
- """:class:`JSONOptions` for Relaxed Extended JSON.
- .. seealso:: The documentation for :const:`bson.json_util.JSONMode.RELAXED`.
- .. versionadded:: 3.5
- """
- STRICT_JSON_OPTIONS = JSONOptions(
- strict_number_long=True,
- datetime_representation=DatetimeRepresentation.ISO8601,
- strict_uuid=True)
- """**DEPRECATED** - :class:`JSONOptions` for MongoDB Extended JSON's *Strict
- mode* encoding.
- .. versionadded:: 3.4
- .. versionchanged:: 3.5
- Deprecated. Use :const:`RELAXED_JSON_OPTIONS` or
- :const:`CANONICAL_JSON_OPTIONS` instead.
- """
- def dumps(obj, *args, **kwargs):
- """Helper function that wraps :func:`json.dumps`.
- Recursive function that handles all BSON types including
- :class:`~bson.binary.Binary` and :class:`~bson.code.Code`.
- :Parameters:
- - `json_options`: A :class:`JSONOptions` instance used to modify the
- encoding of MongoDB Extended JSON types. Defaults to
- :const:`DEFAULT_JSON_OPTIONS`.
- .. versionchanged:: 3.4
- Accepts optional parameter `json_options`. See :class:`JSONOptions`.
- .. versionchanged:: 2.7
- Preserves order when rendering SON, Timestamp, Code, Binary, and DBRef
- instances.
- """
- json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS)
- return json.dumps(_json_convert(obj, json_options), *args, **kwargs)
- def loads(s, *args, **kwargs):
- """Helper function that wraps :func:`json.loads`.
- Automatically passes the object_hook for BSON type conversion.
- Raises ``TypeError``, ``ValueError``, ``KeyError``, or
- :exc:`~bson.errors.InvalidId` on invalid MongoDB Extended JSON.
- :Parameters:
- - `json_options`: A :class:`JSONOptions` instance used to modify the
- decoding of MongoDB Extended JSON types. Defaults to
- :const:`DEFAULT_JSON_OPTIONS`.
- .. versionchanged:: 3.5
- Parses Relaxed and Canonical Extended JSON as well as PyMongo's legacy
- format. Now raises ``TypeError`` or ``ValueError`` when parsing JSON
- type wrappers with values of the wrong type or any extra keys.
- .. versionchanged:: 3.4
- Accepts optional parameter `json_options`. See :class:`JSONOptions`.
- """
- json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS)
- kwargs["object_pairs_hook"] = lambda pairs: object_pairs_hook(
- pairs, json_options)
- return json.loads(s, *args, **kwargs)
- def _json_convert(obj, json_options=DEFAULT_JSON_OPTIONS):
- """Recursive helper method that converts BSON types so they can be
- converted into json.
- """
- if hasattr(obj, 'iteritems') or hasattr(obj, 'items'): # PY3 support
- return SON(((k, _json_convert(v, json_options))
- for k, v in iteritems(obj)))
- elif hasattr(obj, '__iter__') and not isinstance(obj, (text_type, bytes)):
- return list((_json_convert(v, json_options) for v in obj))
- try:
- return default(obj, json_options)
- except TypeError:
- return obj
- def object_pairs_hook(pairs, json_options=DEFAULT_JSON_OPTIONS):
- return object_hook(json_options.document_class(pairs), json_options)
- def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS):
- if "$oid" in dct:
- return _parse_canonical_oid(dct)
- if "$ref" in dct:
- return _parse_canonical_dbref(dct)
- if "$date" in dct:
- return _parse_canonical_datetime(dct, json_options)
- if "$regex" in dct:
- return _parse_legacy_regex(dct)
- if "$minKey" in dct:
- return _parse_canonical_minkey(dct)
- if "$maxKey" in dct:
- return _parse_canonical_maxkey(dct)
- if "$binary" in dct:
- if "$type" in dct:
- return _parse_legacy_binary(dct, json_options)
- else:
- return _parse_canonical_binary(dct, json_options)
- if "$code" in dct:
- return _parse_canonical_code(dct)
- if "$uuid" in dct:
- return _parse_legacy_uuid(dct, json_options)
- if "$undefined" in dct:
- return None
- if "$numberLong" in dct:
- return _parse_canonical_int64(dct)
- if "$timestamp" in dct:
- tsp = dct["$timestamp"]
- return Timestamp(tsp["t"], tsp["i"])
- if "$numberDecimal" in dct:
- return _parse_canonical_decimal128(dct)
- if "$dbPointer" in dct:
- return _parse_canonical_dbpointer(dct)
- if "$regularExpression" in dct:
- return _parse_canonical_regex(dct)
- if "$symbol" in dct:
- return _parse_canonical_symbol(dct)
- if "$numberInt" in dct:
- return _parse_canonical_int32(dct)
- if "$numberDouble" in dct:
- return _parse_canonical_double(dct)
- return dct
- def _parse_legacy_regex(doc):
- pattern = doc["$regex"]
- # Check if this is the $regex query operator.
- if isinstance(pattern, Regex):
- return doc
- flags = 0
- # PyMongo always adds $options but some other tools may not.
- for opt in doc.get("$options", ""):
- flags |= _RE_OPT_TABLE.get(opt, 0)
- return Regex(pattern, flags)
- def _parse_legacy_uuid(doc, json_options):
- """Decode a JSON legacy $uuid to Python UUID."""
- if len(doc) != 1:
- raise TypeError('Bad $uuid, extra field(s): %s' % (doc,))
- if not isinstance(doc["$uuid"], text_type):
- raise TypeError('$uuid must be a string: %s' % (doc,))
- if json_options.uuid_representation == UuidRepresentation.UNSPECIFIED:
- return Binary.from_uuid(uuid.UUID(doc["$uuid"]))
- else:
- return uuid.UUID(doc["$uuid"])
- def _binary_or_uuid(data, subtype, json_options):
- # special handling for UUID
- if subtype in ALL_UUID_SUBTYPES:
- uuid_representation = json_options.uuid_representation
- binary_value = Binary(data, subtype)
- if uuid_representation == UuidRepresentation.UNSPECIFIED:
- return binary_value
- if subtype == UUID_SUBTYPE:
- # Legacy behavior: use STANDARD with binary subtype 4.
- uuid_representation = UuidRepresentation.STANDARD
- elif uuid_representation == UuidRepresentation.STANDARD:
- # subtype == OLD_UUID_SUBTYPE
- # Legacy behavior: STANDARD is the same as PYTHON_LEGACY.
- uuid_representation = UuidRepresentation.PYTHON_LEGACY
- return binary_value.as_uuid(uuid_representation)
- if PY3 and subtype == 0:
- return data
- return Binary(data, subtype)
- def _parse_legacy_binary(doc, json_options):
- if isinstance(doc["$type"], int):
- doc["$type"] = "%02x" % doc["$type"]
- subtype = int(doc["$type"], 16)
- if subtype >= 0xffffff80: # Handle mongoexport values
- subtype = int(doc["$type"][6:], 16)
- data = base64.b64decode(doc["$binary"].encode())
- return _binary_or_uuid(data, subtype, json_options)
- def _parse_canonical_binary(doc, json_options):
- binary = doc["$binary"]
- b64 = binary["base64"]
- subtype = binary["subType"]
- if not isinstance(b64, string_type):
- raise TypeError('$binary base64 must be a string: %s' % (doc,))
- if not isinstance(subtype, string_type) or len(subtype) > 2:
- raise TypeError('$binary subType must be a string at most 2 '
- 'characters: %s' % (doc,))
- if len(binary) != 2:
- raise TypeError('$binary must include only "base64" and "subType" '
- 'components: %s' % (doc,))
- data = base64.b64decode(b64.encode())
- return _binary_or_uuid(data, int(subtype, 16), json_options)
- def _parse_canonical_datetime(doc, json_options):
- """Decode a JSON datetime to python datetime.datetime."""
- dtm = doc["$date"]
- if len(doc) != 1:
- raise TypeError('Bad $date, extra field(s): %s' % (doc,))
- # mongoexport 2.6 and newer
- if isinstance(dtm, string_type):
- # Parse offset
- if dtm[-1] == 'Z':
- dt = dtm[:-1]
- offset = 'Z'
- elif dtm[-6] in ('+', '-') and dtm[-3] == ':':
- # (+|-)HH:MM
- dt = dtm[:-6]
- offset = dtm[-6:]
- elif dtm[-5] in ('+', '-'):
- # (+|-)HHMM
- dt = dtm[:-5]
- offset = dtm[-5:]
- elif dtm[-3] in ('+', '-'):
- # (+|-)HH
- dt = dtm[:-3]
- offset = dtm[-3:]
- else:
- dt = dtm
- offset = ''
- # Parse the optional factional seconds portion.
- dot_index = dt.rfind('.')
- microsecond = 0
- if dot_index != -1:
- microsecond = int(float(dt[dot_index:]) * 1000000)
- dt = dt[:dot_index]
- aware = datetime.datetime.strptime(
- dt, "%Y-%m-%dT%H:%M:%S").replace(microsecond=microsecond,
- tzinfo=utc)
- if offset and offset != 'Z':
- if len(offset) == 6:
- hours, minutes = offset[1:].split(':')
- secs = (int(hours) * 3600 + int(minutes) * 60)
- elif len(offset) == 5:
- secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60)
- elif len(offset) == 3:
- secs = int(offset[1:3]) * 3600
- if offset[0] == "-":
- secs *= -1
- aware = aware - datetime.timedelta(seconds=secs)
- if json_options.tz_aware:
- if json_options.tzinfo:
- aware = aware.astimezone(json_options.tzinfo)
- return aware
- else:
- return aware.replace(tzinfo=None)
- return bson._millis_to_datetime(int(dtm), json_options)
- def _parse_canonical_oid(doc):
- """Decode a JSON ObjectId to bson.objectid.ObjectId."""
- if len(doc) != 1:
- raise TypeError('Bad $oid, extra field(s): %s' % (doc,))
- return ObjectId(doc['$oid'])
- def _parse_canonical_symbol(doc):
- """Decode a JSON symbol to Python string."""
- symbol = doc['$symbol']
- if len(doc) != 1:
- raise TypeError('Bad $symbol, extra field(s): %s' % (doc,))
- return text_type(symbol)
- def _parse_canonical_code(doc):
- """Decode a JSON code to bson.code.Code."""
- for key in doc:
- if key not in ('$code', '$scope'):
- raise TypeError('Bad $code, extra field(s): %s' % (doc,))
- return Code(doc['$code'], scope=doc.get('$scope'))
- def _parse_canonical_regex(doc):
- """Decode a JSON regex to bson.regex.Regex."""
- regex = doc['$regularExpression']
- if len(doc) != 1:
- raise TypeError('Bad $regularExpression, extra field(s): %s' % (doc,))
- if len(regex) != 2:
- raise TypeError('Bad $regularExpression must include only "pattern"'
- 'and "options" components: %s' % (doc,))
- return Regex(regex['pattern'], regex['options'])
- def _parse_canonical_dbref(doc):
- """Decode a JSON DBRef to bson.dbref.DBRef."""
- for key in doc:
- if key.startswith('$') and key not in _DBREF_KEYS:
- # Other keys start with $, so dct cannot be parsed as a DBRef.
- return doc
- return DBRef(doc.pop('$ref'), doc.pop('$id'),
- database=doc.pop('$db', None), **doc)
- def _parse_canonical_dbpointer(doc):
- """Decode a JSON (deprecated) DBPointer to bson.dbref.DBRef."""
- dbref = doc['$dbPointer']
- if len(doc) != 1:
- raise TypeError('Bad $dbPointer, extra field(s): %s' % (doc,))
- if isinstance(dbref, DBRef):
- dbref_doc = dbref.as_doc()
- # DBPointer must not contain $db in its value.
- if dbref.database is not None:
- raise TypeError(
- 'Bad $dbPointer, extra field $db: %s' % (dbref_doc,))
- if not isinstance(dbref.id, ObjectId):
- raise TypeError(
- 'Bad $dbPointer, $id must be an ObjectId: %s' % (dbref_doc,))
- if len(dbref_doc) != 2:
- raise TypeError(
- 'Bad $dbPointer, extra field(s) in DBRef: %s' % (dbref_doc,))
- return dbref
- else:
- raise TypeError('Bad $dbPointer, expected a DBRef: %s' % (doc,))
- def _parse_canonical_int32(doc):
- """Decode a JSON int32 to python int."""
- i_str = doc['$numberInt']
- if len(doc) != 1:
- raise TypeError('Bad $numberInt, extra field(s): %s' % (doc,))
- if not isinstance(i_str, string_type):
- raise TypeError('$numberInt must be string: %s' % (doc,))
- return int(i_str)
- def _parse_canonical_int64(doc):
- """Decode a JSON int64 to bson.int64.Int64."""
- l_str = doc['$numberLong']
- if len(doc) != 1:
- raise TypeError('Bad $numberLong, extra field(s): %s' % (doc,))
- return Int64(l_str)
- def _parse_canonical_double(doc):
- """Decode a JSON double to python float."""
- d_str = doc['$numberDouble']
- if len(doc) != 1:
- raise TypeError('Bad $numberDouble, extra field(s): %s' % (doc,))
- if not isinstance(d_str, string_type):
- raise TypeError('$numberDouble must be string: %s' % (doc,))
- return float(d_str)
- def _parse_canonical_decimal128(doc):
- """Decode a JSON decimal128 to bson.decimal128.Decimal128."""
- d_str = doc['$numberDecimal']
- if len(doc) != 1:
- raise TypeError('Bad $numberDecimal, extra field(s): %s' % (doc,))
- if not isinstance(d_str, string_type):
- raise TypeError('$numberDecimal must be string: %s' % (doc,))
- return Decimal128(d_str)
- def _parse_canonical_minkey(doc):
- """Decode a JSON MinKey to bson.min_key.MinKey."""
- if type(doc['$minKey']) is not int or doc['$minKey'] != 1:
- raise TypeError('$minKey value must be 1: %s' % (doc,))
- if len(doc) != 1:
- raise TypeError('Bad $minKey, extra field(s): %s' % (doc,))
- return MinKey()
- def _parse_canonical_maxkey(doc):
- """Decode a JSON MaxKey to bson.max_key.MaxKey."""
- if type(doc['$maxKey']) is not int or doc['$maxKey'] != 1:
- raise TypeError('$maxKey value must be 1: %s', (doc,))
- if len(doc) != 1:
- raise TypeError('Bad $minKey, extra field(s): %s' % (doc,))
- return MaxKey()
- def _encode_binary(data, subtype, json_options):
- if json_options.json_mode == JSONMode.LEGACY:
- return SON([
- ('$binary', base64.b64encode(data).decode()),
- ('$type', "%02x" % subtype)])
- return {'$binary': SON([
- ('base64', base64.b64encode(data).decode()),
- ('subType', "%02x" % subtype)])}
- def default(obj, json_options=DEFAULT_JSON_OPTIONS):
- # We preserve key order when rendering SON, DBRef, etc. as JSON by
- # returning a SON for those types instead of a dict.
- if isinstance(obj, ObjectId):
- return {"$oid": str(obj)}
- if isinstance(obj, DBRef):
- return _json_convert(obj.as_doc(), json_options=json_options)
- if isinstance(obj, datetime.datetime):
- if (json_options.datetime_representation ==
- DatetimeRepresentation.ISO8601):
- if not obj.tzinfo:
- obj = obj.replace(tzinfo=utc)
- if obj >= EPOCH_AWARE:
- off = obj.tzinfo.utcoffset(obj)
- if (off.days, off.seconds, off.microseconds) == (0, 0, 0):
- tz_string = 'Z'
- else:
- tz_string = obj.strftime('%z')
- millis = int(obj.microsecond / 1000)
- fracsecs = ".%03d" % (millis,) if millis else ""
- return {"$date": "%s%s%s" % (
- obj.strftime("%Y-%m-%dT%H:%M:%S"), fracsecs, tz_string)}
- millis = bson._datetime_to_millis(obj)
- if (json_options.datetime_representation ==
- DatetimeRepresentation.LEGACY):
- return {"$date": millis}
- return {"$date": {"$numberLong": str(millis)}}
- if json_options.strict_number_long and isinstance(obj, Int64):
- return {"$numberLong": str(obj)}
- if isinstance(obj, (RE_TYPE, Regex)):
- flags = ""
- if obj.flags & re.IGNORECASE:
- flags += "i"
- if obj.flags & re.LOCALE:
- flags += "l"
- if obj.flags & re.MULTILINE:
- flags += "m"
- if obj.flags & re.DOTALL:
- flags += "s"
- if obj.flags & re.UNICODE:
- flags += "u"
- if obj.flags & re.VERBOSE:
- flags += "x"
- if isinstance(obj.pattern, text_type):
- pattern = obj.pattern
- else:
- pattern = obj.pattern.decode('utf-8')
- if json_options.json_mode == JSONMode.LEGACY:
- return SON([("$regex", pattern), ("$options", flags)])
- return {'$regularExpression': SON([("pattern", pattern),
- ("options", flags)])}
- if isinstance(obj, MinKey):
- return {"$minKey": 1}
- if isinstance(obj, MaxKey):
- return {"$maxKey": 1}
- if isinstance(obj, Timestamp):
- return {"$timestamp": SON([("t", obj.time), ("i", obj.inc)])}
- if isinstance(obj, Code):
- if obj.scope is None:
- return {'$code': str(obj)}
- return SON([
- ('$code', str(obj)),
- ('$scope', _json_convert(obj.scope, json_options))])
- if isinstance(obj, Binary):
- return _encode_binary(obj, obj.subtype, json_options)
- if PY3 and isinstance(obj, bytes):
- return _encode_binary(obj, 0, json_options)
- if isinstance(obj, uuid.UUID):
- if json_options.strict_uuid:
- binval = Binary.from_uuid(
- obj, uuid_representation=json_options.uuid_representation)
- return _encode_binary(binval, binval.subtype, json_options)
- else:
- return {"$uuid": obj.hex}
- if isinstance(obj, Decimal128):
- return {"$numberDecimal": str(obj)}
- if isinstance(obj, bool):
- return obj
- if (json_options.json_mode == JSONMode.CANONICAL and
- isinstance(obj, integer_types)):
- if -2 ** 31 <= obj < 2 ** 31:
- return {'$numberInt': text_type(obj)}
- return {'$numberLong': text_type(obj)}
- if json_options.json_mode != JSONMode.LEGACY and isinstance(obj, float):
- if math.isnan(obj):
- return {'$numberDouble': 'NaN'}
- elif math.isinf(obj):
- representation = 'Infinity' if obj > 0 else '-Infinity'
- return {'$numberDouble': representation}
- elif json_options.json_mode == JSONMode.CANONICAL:
- # repr() will return the shortest string guaranteed to produce the
- # original value, when float() is called on it. str produces a
- # shorter string in Python 2.
- return {'$numberDouble': text_type(repr(obj))}
- raise TypeError("%r is not JSON serializable" % obj)
|