json_util.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863
  1. # Copyright 2009-present MongoDB, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Tools for using Python's :mod:`json` module with BSON documents.
  15. This module provides two helper methods `dumps` and `loads` that wrap the
  16. native :mod:`json` methods and provide explicit BSON conversion to and from
  17. JSON. :class:`~bson.json_util.JSONOptions` provides a way to control how JSON
  18. is emitted and parsed, with the default being the legacy PyMongo format.
  19. :mod:`~bson.json_util` can also generate Canonical or Relaxed `Extended JSON`_
  20. when :const:`CANONICAL_JSON_OPTIONS` or :const:`RELAXED_JSON_OPTIONS` is
  21. provided, respectively.
  22. .. _Extended JSON: https://github.com/mongodb/specifications/blob/master/source/extended-json.rst
  23. Example usage (deserialization):
  24. .. doctest::
  25. >>> from bson.json_util import loads
  26. >>> loads('[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "80", "$binary": "AQIDBA=="}}]')
  27. [{u'foo': [1, 2]}, {u'bar': {u'hello': u'world'}}, {u'code': Code('function x() { return 1; }', {})}, {u'bin': Binary('...', 128)}]
  28. Example usage (serialization):
  29. .. doctest::
  30. >>> from bson import Binary, Code
  31. >>> from bson.json_util import dumps
  32. >>> dumps([{'foo': [1, 2]},
  33. ... {'bar': {'hello': 'world'}},
  34. ... {'code': Code("function x() { return 1; }", {})},
  35. ... {'bin': Binary(b"\x01\x02\x03\x04")}])
  36. '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }", "$scope": {}}}, {"bin": {"$binary": "AQIDBA==", "$type": "00"}}]'
  37. Example usage (with :const:`CANONICAL_JSON_OPTIONS`):
  38. .. doctest::
  39. >>> from bson import Binary, Code
  40. >>> from bson.json_util import dumps, CANONICAL_JSON_OPTIONS
  41. >>> dumps([{'foo': [1, 2]},
  42. ... {'bar': {'hello': 'world'}},
  43. ... {'code': Code("function x() { return 1; }")},
  44. ... {'bin': Binary(b"\x01\x02\x03\x04")}],
  45. ... json_options=CANONICAL_JSON_OPTIONS)
  46. '[{"foo": [{"$numberInt": "1"}, {"$numberInt": "2"}]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]'
  47. Example usage (with :const:`RELAXED_JSON_OPTIONS`):
  48. .. doctest::
  49. >>> from bson import Binary, Code
  50. >>> from bson.json_util import dumps, RELAXED_JSON_OPTIONS
  51. >>> dumps([{'foo': [1, 2]},
  52. ... {'bar': {'hello': 'world'}},
  53. ... {'code': Code("function x() { return 1; }")},
  54. ... {'bin': Binary(b"\x01\x02\x03\x04")}],
  55. ... json_options=RELAXED_JSON_OPTIONS)
  56. '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]'
  57. Alternatively, you can manually pass the `default` to :func:`json.dumps`.
  58. It won't handle :class:`~bson.binary.Binary` and :class:`~bson.code.Code`
  59. instances (as they are extended strings you can't provide custom defaults),
  60. but it will be faster as there is less recursion.
  61. .. note::
  62. If your application does not need the flexibility offered by
  63. :class:`JSONOptions` and spends a large amount of time in the `json_util`
  64. module, look to
  65. `python-bsonjs <https://pypi.python.org/pypi/python-bsonjs>`_ for a nice
  66. performance improvement. `python-bsonjs` is a fast BSON to MongoDB
  67. Extended JSON converter for Python built on top of
  68. `libbson <https://github.com/mongodb/libbson>`_. `python-bsonjs` works best
  69. with PyMongo when using :class:`~bson.raw_bson.RawBSONDocument`.
  70. .. versionchanged:: 2.8
  71. The output format for :class:`~bson.timestamp.Timestamp` has changed from
  72. '{"t": <int>, "i": <int>}' to '{"$timestamp": {"t": <int>, "i": <int>}}'.
  73. This new format will be decoded to an instance of
  74. :class:`~bson.timestamp.Timestamp`. The old format will continue to be
  75. decoded to a python dict as before. Encoding to the old format is no longer
  76. supported as it was never correct and loses type information.
  77. Added support for $numberLong and $undefined - new in MongoDB 2.6 - and
  78. parsing $date in ISO-8601 format.
  79. .. versionchanged:: 2.7
  80. Preserves order when rendering SON, Timestamp, Code, Binary, and DBRef
  81. instances.
  82. .. versionchanged:: 2.3
  83. Added dumps and loads helpers to automatically handle conversion to and
  84. from json and supports :class:`~bson.binary.Binary` and
  85. :class:`~bson.code.Code`
  86. """
  87. import base64
  88. import datetime
  89. import json
  90. import math
  91. import re
  92. import uuid
  93. from pymongo.errors import ConfigurationError
  94. import bson
  95. from bson import EPOCH_AWARE, RE_TYPE, SON
  96. from bson.binary import (Binary, UuidRepresentation, ALL_UUID_SUBTYPES,
  97. UUID_SUBTYPE)
  98. from bson.code import Code
  99. from bson.codec_options import CodecOptions
  100. from bson.dbref import DBRef
  101. from bson.decimal128 import Decimal128
  102. from bson.int64 import Int64
  103. from bson.max_key import MaxKey
  104. from bson.min_key import MinKey
  105. from bson.objectid import ObjectId
  106. from bson.py3compat import (PY3, iteritems, integer_types, string_type,
  107. text_type)
  108. from bson.regex import Regex
  109. from bson.timestamp import Timestamp
  110. from bson.tz_util import utc
  111. _RE_OPT_TABLE = {
  112. "i": re.I,
  113. "l": re.L,
  114. "m": re.M,
  115. "s": re.S,
  116. "u": re.U,
  117. "x": re.X,
  118. }
  119. # Dollar-prefixed keys which may appear in DBRefs.
  120. _DBREF_KEYS = frozenset(['$id', '$ref', '$db'])
  121. class DatetimeRepresentation:
  122. LEGACY = 0
  123. """Legacy MongoDB Extended JSON datetime representation.
  124. :class:`datetime.datetime` instances will be encoded to JSON in the
  125. format `{"$date": <dateAsMilliseconds>}`, where `dateAsMilliseconds` is
  126. a 64-bit signed integer giving the number of milliseconds since the Unix
  127. epoch UTC. This was the default encoding before PyMongo version 3.4.
  128. .. versionadded:: 3.4
  129. """
  130. NUMBERLONG = 1
  131. """NumberLong datetime representation.
  132. :class:`datetime.datetime` instances will be encoded to JSON in the
  133. format `{"$date": {"$numberLong": "<dateAsMilliseconds>"}}`,
  134. where `dateAsMilliseconds` is the string representation of a 64-bit signed
  135. integer giving the number of milliseconds since the Unix epoch UTC.
  136. .. versionadded:: 3.4
  137. """
  138. ISO8601 = 2
  139. """ISO-8601 datetime representation.
  140. :class:`datetime.datetime` instances greater than or equal to the Unix
  141. epoch UTC will be encoded to JSON in the format `{"$date": "<ISO-8601>"}`.
  142. :class:`datetime.datetime` instances before the Unix epoch UTC will be
  143. encoded as if the datetime representation is
  144. :const:`~DatetimeRepresentation.NUMBERLONG`.
  145. .. versionadded:: 3.4
  146. """
  147. class JSONMode:
  148. LEGACY = 0
  149. """Legacy Extended JSON representation.
  150. In this mode, :func:`~bson.json_util.dumps` produces PyMongo's legacy
  151. non-standard JSON output. Consider using
  152. :const:`~bson.json_util.JSONMode.RELAXED` or
  153. :const:`~bson.json_util.JSONMode.CANONICAL` instead.
  154. .. versionadded:: 3.5
  155. """
  156. RELAXED = 1
  157. """Relaxed Extended JSON representation.
  158. In this mode, :func:`~bson.json_util.dumps` produces Relaxed Extended JSON,
  159. a mostly JSON-like format. Consider using this for things like a web API,
  160. where one is sending a document (or a projection of a document) that only
  161. uses ordinary JSON type primitives. In particular, the ``int``,
  162. :class:`~bson.int64.Int64`, and ``float`` numeric types are represented in
  163. the native JSON number format. This output is also the most human readable
  164. and is useful for debugging and documentation.
  165. .. seealso:: The specification for Relaxed `Extended JSON`_.
  166. .. versionadded:: 3.5
  167. """
  168. CANONICAL = 2
  169. """Canonical Extended JSON representation.
  170. In this mode, :func:`~bson.json_util.dumps` produces Canonical Extended
  171. JSON, a type preserving format. Consider using this for things like
  172. testing, where one has to precisely specify expected types in JSON. In
  173. particular, the ``int``, :class:`~bson.int64.Int64`, and ``float`` numeric
  174. types are encoded with type wrappers.
  175. .. seealso:: The specification for Canonical `Extended JSON`_.
  176. .. versionadded:: 3.5
  177. """
  178. class JSONOptions(CodecOptions):
  179. """Encapsulates JSON options for :func:`dumps` and :func:`loads`.
  180. :Parameters:
  181. - `strict_number_long`: If ``True``, :class:`~bson.int64.Int64` objects
  182. are encoded to MongoDB Extended JSON's *Strict mode* type
  183. `NumberLong`, ie ``'{"$numberLong": "<number>" }'``. Otherwise they
  184. will be encoded as an `int`. Defaults to ``False``.
  185. - `datetime_representation`: The representation to use when encoding
  186. instances of :class:`datetime.datetime`. Defaults to
  187. :const:`~DatetimeRepresentation.LEGACY`.
  188. - `strict_uuid`: If ``True``, :class:`uuid.UUID` object are encoded to
  189. MongoDB Extended JSON's *Strict mode* type `Binary`. Otherwise it
  190. will be encoded as ``'{"$uuid": "<hex>" }'``. Defaults to ``False``.
  191. - `json_mode`: The :class:`JSONMode` to use when encoding BSON types to
  192. Extended JSON. Defaults to :const:`~JSONMode.LEGACY`.
  193. - `document_class`: BSON documents returned by :func:`loads` will be
  194. decoded to an instance of this class. Must be a subclass of
  195. :class:`collections.MutableMapping`. Defaults to :class:`dict`.
  196. - `uuid_representation`: The :class:`~bson.binary.UuidRepresentation`
  197. to use when encoding and decoding instances of :class:`uuid.UUID`.
  198. Defaults to :const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`.
  199. - `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type
  200. `Date` will be decoded to timezone aware instances of
  201. :class:`datetime.datetime`. Otherwise they will be naive. Defaults
  202. to ``True``.
  203. - `tzinfo`: A :class:`datetime.tzinfo` subclass that specifies the
  204. timezone from which :class:`~datetime.datetime` objects should be
  205. decoded. Defaults to :const:`~bson.tz_util.utc`.
  206. - `args`: arguments to :class:`~bson.codec_options.CodecOptions`
  207. - `kwargs`: arguments to :class:`~bson.codec_options.CodecOptions`
  208. .. seealso:: The specification for Relaxed and Canonical `Extended JSON`_.
  209. .. versionadded:: 3.4
  210. .. versionchanged:: 3.5
  211. Accepts the optional parameter `json_mode`.
  212. """
  213. def __new__(cls, strict_number_long=False,
  214. datetime_representation=DatetimeRepresentation.LEGACY,
  215. strict_uuid=False, json_mode=JSONMode.LEGACY,
  216. *args, **kwargs):
  217. kwargs["tz_aware"] = kwargs.get("tz_aware", True)
  218. if kwargs["tz_aware"]:
  219. kwargs["tzinfo"] = kwargs.get("tzinfo", utc)
  220. if datetime_representation not in (DatetimeRepresentation.LEGACY,
  221. DatetimeRepresentation.NUMBERLONG,
  222. DatetimeRepresentation.ISO8601):
  223. raise ConfigurationError(
  224. "JSONOptions.datetime_representation must be one of LEGACY, "
  225. "NUMBERLONG, or ISO8601 from DatetimeRepresentation.")
  226. self = super(JSONOptions, cls).__new__(cls, *args, **kwargs)
  227. if json_mode not in (JSONMode.LEGACY,
  228. JSONMode.RELAXED,
  229. JSONMode.CANONICAL):
  230. raise ConfigurationError(
  231. "JSONOptions.json_mode must be one of LEGACY, RELAXED, "
  232. "or CANONICAL from JSONMode.")
  233. self.json_mode = json_mode
  234. if self.json_mode == JSONMode.RELAXED:
  235. self.strict_number_long = False
  236. self.datetime_representation = DatetimeRepresentation.ISO8601
  237. self.strict_uuid = True
  238. elif self.json_mode == JSONMode.CANONICAL:
  239. self.strict_number_long = True
  240. self.datetime_representation = DatetimeRepresentation.NUMBERLONG
  241. self.strict_uuid = True
  242. else:
  243. self.strict_number_long = strict_number_long
  244. self.datetime_representation = datetime_representation
  245. self.strict_uuid = strict_uuid
  246. return self
  247. def _arguments_repr(self):
  248. return ('strict_number_long=%r, '
  249. 'datetime_representation=%r, '
  250. 'strict_uuid=%r, json_mode=%r, %s' % (
  251. self.strict_number_long,
  252. self.datetime_representation,
  253. self.strict_uuid,
  254. self.json_mode,
  255. super(JSONOptions, self)._arguments_repr()))
  256. def _options_dict(self):
  257. # TODO: PYTHON-2442 use _asdict() instead
  258. options_dict = super(JSONOptions, self)._options_dict()
  259. options_dict.update({
  260. 'strict_number_long': self.strict_number_long,
  261. 'datetime_representation': self.datetime_representation,
  262. 'strict_uuid': self.strict_uuid,
  263. 'json_mode': self.json_mode})
  264. return options_dict
  265. def with_options(self, **kwargs):
  266. """
  267. Make a copy of this JSONOptions, overriding some options::
  268. >>> from bson.json_util import CANONICAL_JSON_OPTIONS
  269. >>> CANONICAL_JSON_OPTIONS.tz_aware
  270. True
  271. >>> json_options = CANONICAL_JSON_OPTIONS.with_options(tz_aware=False)
  272. >>> json_options.tz_aware
  273. False
  274. .. versionadded:: 3.12
  275. """
  276. opts = self._options_dict()
  277. for opt in ('strict_number_long', 'datetime_representation',
  278. 'strict_uuid', 'json_mode'):
  279. opts[opt] = kwargs.get(opt, getattr(self, opt))
  280. opts.update(kwargs)
  281. return JSONOptions(**opts)
  282. LEGACY_JSON_OPTIONS = JSONOptions(json_mode=JSONMode.LEGACY)
  283. """:class:`JSONOptions` for encoding to PyMongo's legacy JSON format.
  284. .. seealso:: The documentation for :const:`bson.json_util.JSONMode.LEGACY`.
  285. .. versionadded:: 3.5
  286. """
  287. DEFAULT_JSON_OPTIONS = LEGACY_JSON_OPTIONS
  288. """The default :class:`JSONOptions` for JSON encoding/decoding.
  289. The same as :const:`LEGACY_JSON_OPTIONS`. This will change to
  290. :const:`RELAXED_JSON_OPTIONS` in a future release.
  291. .. versionadded:: 3.4
  292. """
  293. CANONICAL_JSON_OPTIONS = JSONOptions(json_mode=JSONMode.CANONICAL)
  294. """:class:`JSONOptions` for Canonical Extended JSON.
  295. .. seealso:: The documentation for :const:`bson.json_util.JSONMode.CANONICAL`.
  296. .. versionadded:: 3.5
  297. """
  298. RELAXED_JSON_OPTIONS = JSONOptions(json_mode=JSONMode.RELAXED)
  299. """:class:`JSONOptions` for Relaxed Extended JSON.
  300. .. seealso:: The documentation for :const:`bson.json_util.JSONMode.RELAXED`.
  301. .. versionadded:: 3.5
  302. """
  303. STRICT_JSON_OPTIONS = JSONOptions(
  304. strict_number_long=True,
  305. datetime_representation=DatetimeRepresentation.ISO8601,
  306. strict_uuid=True)
  307. """**DEPRECATED** - :class:`JSONOptions` for MongoDB Extended JSON's *Strict
  308. mode* encoding.
  309. .. versionadded:: 3.4
  310. .. versionchanged:: 3.5
  311. Deprecated. Use :const:`RELAXED_JSON_OPTIONS` or
  312. :const:`CANONICAL_JSON_OPTIONS` instead.
  313. """
  314. def dumps(obj, *args, **kwargs):
  315. """Helper function that wraps :func:`json.dumps`.
  316. Recursive function that handles all BSON types including
  317. :class:`~bson.binary.Binary` and :class:`~bson.code.Code`.
  318. :Parameters:
  319. - `json_options`: A :class:`JSONOptions` instance used to modify the
  320. encoding of MongoDB Extended JSON types. Defaults to
  321. :const:`DEFAULT_JSON_OPTIONS`.
  322. .. versionchanged:: 3.4
  323. Accepts optional parameter `json_options`. See :class:`JSONOptions`.
  324. .. versionchanged:: 2.7
  325. Preserves order when rendering SON, Timestamp, Code, Binary, and DBRef
  326. instances.
  327. """
  328. json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS)
  329. return json.dumps(_json_convert(obj, json_options), *args, **kwargs)
  330. def loads(s, *args, **kwargs):
  331. """Helper function that wraps :func:`json.loads`.
  332. Automatically passes the object_hook for BSON type conversion.
  333. Raises ``TypeError``, ``ValueError``, ``KeyError``, or
  334. :exc:`~bson.errors.InvalidId` on invalid MongoDB Extended JSON.
  335. :Parameters:
  336. - `json_options`: A :class:`JSONOptions` instance used to modify the
  337. decoding of MongoDB Extended JSON types. Defaults to
  338. :const:`DEFAULT_JSON_OPTIONS`.
  339. .. versionchanged:: 3.5
  340. Parses Relaxed and Canonical Extended JSON as well as PyMongo's legacy
  341. format. Now raises ``TypeError`` or ``ValueError`` when parsing JSON
  342. type wrappers with values of the wrong type or any extra keys.
  343. .. versionchanged:: 3.4
  344. Accepts optional parameter `json_options`. See :class:`JSONOptions`.
  345. """
  346. json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS)
  347. kwargs["object_pairs_hook"] = lambda pairs: object_pairs_hook(
  348. pairs, json_options)
  349. return json.loads(s, *args, **kwargs)
  350. def _json_convert(obj, json_options=DEFAULT_JSON_OPTIONS):
  351. """Recursive helper method that converts BSON types so they can be
  352. converted into json.
  353. """
  354. if hasattr(obj, 'iteritems') or hasattr(obj, 'items'): # PY3 support
  355. return SON(((k, _json_convert(v, json_options))
  356. for k, v in iteritems(obj)))
  357. elif hasattr(obj, '__iter__') and not isinstance(obj, (text_type, bytes)):
  358. return list((_json_convert(v, json_options) for v in obj))
  359. try:
  360. return default(obj, json_options)
  361. except TypeError:
  362. return obj
  363. def object_pairs_hook(pairs, json_options=DEFAULT_JSON_OPTIONS):
  364. return object_hook(json_options.document_class(pairs), json_options)
  365. def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS):
  366. if "$oid" in dct:
  367. return _parse_canonical_oid(dct)
  368. if "$ref" in dct:
  369. return _parse_canonical_dbref(dct)
  370. if "$date" in dct:
  371. return _parse_canonical_datetime(dct, json_options)
  372. if "$regex" in dct:
  373. return _parse_legacy_regex(dct)
  374. if "$minKey" in dct:
  375. return _parse_canonical_minkey(dct)
  376. if "$maxKey" in dct:
  377. return _parse_canonical_maxkey(dct)
  378. if "$binary" in dct:
  379. if "$type" in dct:
  380. return _parse_legacy_binary(dct, json_options)
  381. else:
  382. return _parse_canonical_binary(dct, json_options)
  383. if "$code" in dct:
  384. return _parse_canonical_code(dct)
  385. if "$uuid" in dct:
  386. return _parse_legacy_uuid(dct, json_options)
  387. if "$undefined" in dct:
  388. return None
  389. if "$numberLong" in dct:
  390. return _parse_canonical_int64(dct)
  391. if "$timestamp" in dct:
  392. tsp = dct["$timestamp"]
  393. return Timestamp(tsp["t"], tsp["i"])
  394. if "$numberDecimal" in dct:
  395. return _parse_canonical_decimal128(dct)
  396. if "$dbPointer" in dct:
  397. return _parse_canonical_dbpointer(dct)
  398. if "$regularExpression" in dct:
  399. return _parse_canonical_regex(dct)
  400. if "$symbol" in dct:
  401. return _parse_canonical_symbol(dct)
  402. if "$numberInt" in dct:
  403. return _parse_canonical_int32(dct)
  404. if "$numberDouble" in dct:
  405. return _parse_canonical_double(dct)
  406. return dct
  407. def _parse_legacy_regex(doc):
  408. pattern = doc["$regex"]
  409. # Check if this is the $regex query operator.
  410. if isinstance(pattern, Regex):
  411. return doc
  412. flags = 0
  413. # PyMongo always adds $options but some other tools may not.
  414. for opt in doc.get("$options", ""):
  415. flags |= _RE_OPT_TABLE.get(opt, 0)
  416. return Regex(pattern, flags)
  417. def _parse_legacy_uuid(doc, json_options):
  418. """Decode a JSON legacy $uuid to Python UUID."""
  419. if len(doc) != 1:
  420. raise TypeError('Bad $uuid, extra field(s): %s' % (doc,))
  421. if not isinstance(doc["$uuid"], text_type):
  422. raise TypeError('$uuid must be a string: %s' % (doc,))
  423. if json_options.uuid_representation == UuidRepresentation.UNSPECIFIED:
  424. return Binary.from_uuid(uuid.UUID(doc["$uuid"]))
  425. else:
  426. return uuid.UUID(doc["$uuid"])
  427. def _binary_or_uuid(data, subtype, json_options):
  428. # special handling for UUID
  429. if subtype in ALL_UUID_SUBTYPES:
  430. uuid_representation = json_options.uuid_representation
  431. binary_value = Binary(data, subtype)
  432. if uuid_representation == UuidRepresentation.UNSPECIFIED:
  433. return binary_value
  434. if subtype == UUID_SUBTYPE:
  435. # Legacy behavior: use STANDARD with binary subtype 4.
  436. uuid_representation = UuidRepresentation.STANDARD
  437. elif uuid_representation == UuidRepresentation.STANDARD:
  438. # subtype == OLD_UUID_SUBTYPE
  439. # Legacy behavior: STANDARD is the same as PYTHON_LEGACY.
  440. uuid_representation = UuidRepresentation.PYTHON_LEGACY
  441. return binary_value.as_uuid(uuid_representation)
  442. if PY3 and subtype == 0:
  443. return data
  444. return Binary(data, subtype)
  445. def _parse_legacy_binary(doc, json_options):
  446. if isinstance(doc["$type"], int):
  447. doc["$type"] = "%02x" % doc["$type"]
  448. subtype = int(doc["$type"], 16)
  449. if subtype >= 0xffffff80: # Handle mongoexport values
  450. subtype = int(doc["$type"][6:], 16)
  451. data = base64.b64decode(doc["$binary"].encode())
  452. return _binary_or_uuid(data, subtype, json_options)
  453. def _parse_canonical_binary(doc, json_options):
  454. binary = doc["$binary"]
  455. b64 = binary["base64"]
  456. subtype = binary["subType"]
  457. if not isinstance(b64, string_type):
  458. raise TypeError('$binary base64 must be a string: %s' % (doc,))
  459. if not isinstance(subtype, string_type) or len(subtype) > 2:
  460. raise TypeError('$binary subType must be a string at most 2 '
  461. 'characters: %s' % (doc,))
  462. if len(binary) != 2:
  463. raise TypeError('$binary must include only "base64" and "subType" '
  464. 'components: %s' % (doc,))
  465. data = base64.b64decode(b64.encode())
  466. return _binary_or_uuid(data, int(subtype, 16), json_options)
  467. def _parse_canonical_datetime(doc, json_options):
  468. """Decode a JSON datetime to python datetime.datetime."""
  469. dtm = doc["$date"]
  470. if len(doc) != 1:
  471. raise TypeError('Bad $date, extra field(s): %s' % (doc,))
  472. # mongoexport 2.6 and newer
  473. if isinstance(dtm, string_type):
  474. # Parse offset
  475. if dtm[-1] == 'Z':
  476. dt = dtm[:-1]
  477. offset = 'Z'
  478. elif dtm[-6] in ('+', '-') and dtm[-3] == ':':
  479. # (+|-)HH:MM
  480. dt = dtm[:-6]
  481. offset = dtm[-6:]
  482. elif dtm[-5] in ('+', '-'):
  483. # (+|-)HHMM
  484. dt = dtm[:-5]
  485. offset = dtm[-5:]
  486. elif dtm[-3] in ('+', '-'):
  487. # (+|-)HH
  488. dt = dtm[:-3]
  489. offset = dtm[-3:]
  490. else:
  491. dt = dtm
  492. offset = ''
  493. # Parse the optional factional seconds portion.
  494. dot_index = dt.rfind('.')
  495. microsecond = 0
  496. if dot_index != -1:
  497. microsecond = int(float(dt[dot_index:]) * 1000000)
  498. dt = dt[:dot_index]
  499. aware = datetime.datetime.strptime(
  500. dt, "%Y-%m-%dT%H:%M:%S").replace(microsecond=microsecond,
  501. tzinfo=utc)
  502. if offset and offset != 'Z':
  503. if len(offset) == 6:
  504. hours, minutes = offset[1:].split(':')
  505. secs = (int(hours) * 3600 + int(minutes) * 60)
  506. elif len(offset) == 5:
  507. secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60)
  508. elif len(offset) == 3:
  509. secs = int(offset[1:3]) * 3600
  510. if offset[0] == "-":
  511. secs *= -1
  512. aware = aware - datetime.timedelta(seconds=secs)
  513. if json_options.tz_aware:
  514. if json_options.tzinfo:
  515. aware = aware.astimezone(json_options.tzinfo)
  516. return aware
  517. else:
  518. return aware.replace(tzinfo=None)
  519. return bson._millis_to_datetime(int(dtm), json_options)
  520. def _parse_canonical_oid(doc):
  521. """Decode a JSON ObjectId to bson.objectid.ObjectId."""
  522. if len(doc) != 1:
  523. raise TypeError('Bad $oid, extra field(s): %s' % (doc,))
  524. return ObjectId(doc['$oid'])
  525. def _parse_canonical_symbol(doc):
  526. """Decode a JSON symbol to Python string."""
  527. symbol = doc['$symbol']
  528. if len(doc) != 1:
  529. raise TypeError('Bad $symbol, extra field(s): %s' % (doc,))
  530. return text_type(symbol)
  531. def _parse_canonical_code(doc):
  532. """Decode a JSON code to bson.code.Code."""
  533. for key in doc:
  534. if key not in ('$code', '$scope'):
  535. raise TypeError('Bad $code, extra field(s): %s' % (doc,))
  536. return Code(doc['$code'], scope=doc.get('$scope'))
  537. def _parse_canonical_regex(doc):
  538. """Decode a JSON regex to bson.regex.Regex."""
  539. regex = doc['$regularExpression']
  540. if len(doc) != 1:
  541. raise TypeError('Bad $regularExpression, extra field(s): %s' % (doc,))
  542. if len(regex) != 2:
  543. raise TypeError('Bad $regularExpression must include only "pattern"'
  544. 'and "options" components: %s' % (doc,))
  545. return Regex(regex['pattern'], regex['options'])
  546. def _parse_canonical_dbref(doc):
  547. """Decode a JSON DBRef to bson.dbref.DBRef."""
  548. for key in doc:
  549. if key.startswith('$') and key not in _DBREF_KEYS:
  550. # Other keys start with $, so dct cannot be parsed as a DBRef.
  551. return doc
  552. return DBRef(doc.pop('$ref'), doc.pop('$id'),
  553. database=doc.pop('$db', None), **doc)
  554. def _parse_canonical_dbpointer(doc):
  555. """Decode a JSON (deprecated) DBPointer to bson.dbref.DBRef."""
  556. dbref = doc['$dbPointer']
  557. if len(doc) != 1:
  558. raise TypeError('Bad $dbPointer, extra field(s): %s' % (doc,))
  559. if isinstance(dbref, DBRef):
  560. dbref_doc = dbref.as_doc()
  561. # DBPointer must not contain $db in its value.
  562. if dbref.database is not None:
  563. raise TypeError(
  564. 'Bad $dbPointer, extra field $db: %s' % (dbref_doc,))
  565. if not isinstance(dbref.id, ObjectId):
  566. raise TypeError(
  567. 'Bad $dbPointer, $id must be an ObjectId: %s' % (dbref_doc,))
  568. if len(dbref_doc) != 2:
  569. raise TypeError(
  570. 'Bad $dbPointer, extra field(s) in DBRef: %s' % (dbref_doc,))
  571. return dbref
  572. else:
  573. raise TypeError('Bad $dbPointer, expected a DBRef: %s' % (doc,))
  574. def _parse_canonical_int32(doc):
  575. """Decode a JSON int32 to python int."""
  576. i_str = doc['$numberInt']
  577. if len(doc) != 1:
  578. raise TypeError('Bad $numberInt, extra field(s): %s' % (doc,))
  579. if not isinstance(i_str, string_type):
  580. raise TypeError('$numberInt must be string: %s' % (doc,))
  581. return int(i_str)
  582. def _parse_canonical_int64(doc):
  583. """Decode a JSON int64 to bson.int64.Int64."""
  584. l_str = doc['$numberLong']
  585. if len(doc) != 1:
  586. raise TypeError('Bad $numberLong, extra field(s): %s' % (doc,))
  587. return Int64(l_str)
  588. def _parse_canonical_double(doc):
  589. """Decode a JSON double to python float."""
  590. d_str = doc['$numberDouble']
  591. if len(doc) != 1:
  592. raise TypeError('Bad $numberDouble, extra field(s): %s' % (doc,))
  593. if not isinstance(d_str, string_type):
  594. raise TypeError('$numberDouble must be string: %s' % (doc,))
  595. return float(d_str)
  596. def _parse_canonical_decimal128(doc):
  597. """Decode a JSON decimal128 to bson.decimal128.Decimal128."""
  598. d_str = doc['$numberDecimal']
  599. if len(doc) != 1:
  600. raise TypeError('Bad $numberDecimal, extra field(s): %s' % (doc,))
  601. if not isinstance(d_str, string_type):
  602. raise TypeError('$numberDecimal must be string: %s' % (doc,))
  603. return Decimal128(d_str)
  604. def _parse_canonical_minkey(doc):
  605. """Decode a JSON MinKey to bson.min_key.MinKey."""
  606. if type(doc['$minKey']) is not int or doc['$minKey'] != 1:
  607. raise TypeError('$minKey value must be 1: %s' % (doc,))
  608. if len(doc) != 1:
  609. raise TypeError('Bad $minKey, extra field(s): %s' % (doc,))
  610. return MinKey()
  611. def _parse_canonical_maxkey(doc):
  612. """Decode a JSON MaxKey to bson.max_key.MaxKey."""
  613. if type(doc['$maxKey']) is not int or doc['$maxKey'] != 1:
  614. raise TypeError('$maxKey value must be 1: %s', (doc,))
  615. if len(doc) != 1:
  616. raise TypeError('Bad $minKey, extra field(s): %s' % (doc,))
  617. return MaxKey()
  618. def _encode_binary(data, subtype, json_options):
  619. if json_options.json_mode == JSONMode.LEGACY:
  620. return SON([
  621. ('$binary', base64.b64encode(data).decode()),
  622. ('$type', "%02x" % subtype)])
  623. return {'$binary': SON([
  624. ('base64', base64.b64encode(data).decode()),
  625. ('subType', "%02x" % subtype)])}
  626. def default(obj, json_options=DEFAULT_JSON_OPTIONS):
  627. # We preserve key order when rendering SON, DBRef, etc. as JSON by
  628. # returning a SON for those types instead of a dict.
  629. if isinstance(obj, ObjectId):
  630. return {"$oid": str(obj)}
  631. if isinstance(obj, DBRef):
  632. return _json_convert(obj.as_doc(), json_options=json_options)
  633. if isinstance(obj, datetime.datetime):
  634. if (json_options.datetime_representation ==
  635. DatetimeRepresentation.ISO8601):
  636. if not obj.tzinfo:
  637. obj = obj.replace(tzinfo=utc)
  638. if obj >= EPOCH_AWARE:
  639. off = obj.tzinfo.utcoffset(obj)
  640. if (off.days, off.seconds, off.microseconds) == (0, 0, 0):
  641. tz_string = 'Z'
  642. else:
  643. tz_string = obj.strftime('%z')
  644. millis = int(obj.microsecond / 1000)
  645. fracsecs = ".%03d" % (millis,) if millis else ""
  646. return {"$date": "%s%s%s" % (
  647. obj.strftime("%Y-%m-%dT%H:%M:%S"), fracsecs, tz_string)}
  648. millis = bson._datetime_to_millis(obj)
  649. if (json_options.datetime_representation ==
  650. DatetimeRepresentation.LEGACY):
  651. return {"$date": millis}
  652. return {"$date": {"$numberLong": str(millis)}}
  653. if json_options.strict_number_long and isinstance(obj, Int64):
  654. return {"$numberLong": str(obj)}
  655. if isinstance(obj, (RE_TYPE, Regex)):
  656. flags = ""
  657. if obj.flags & re.IGNORECASE:
  658. flags += "i"
  659. if obj.flags & re.LOCALE:
  660. flags += "l"
  661. if obj.flags & re.MULTILINE:
  662. flags += "m"
  663. if obj.flags & re.DOTALL:
  664. flags += "s"
  665. if obj.flags & re.UNICODE:
  666. flags += "u"
  667. if obj.flags & re.VERBOSE:
  668. flags += "x"
  669. if isinstance(obj.pattern, text_type):
  670. pattern = obj.pattern
  671. else:
  672. pattern = obj.pattern.decode('utf-8')
  673. if json_options.json_mode == JSONMode.LEGACY:
  674. return SON([("$regex", pattern), ("$options", flags)])
  675. return {'$regularExpression': SON([("pattern", pattern),
  676. ("options", flags)])}
  677. if isinstance(obj, MinKey):
  678. return {"$minKey": 1}
  679. if isinstance(obj, MaxKey):
  680. return {"$maxKey": 1}
  681. if isinstance(obj, Timestamp):
  682. return {"$timestamp": SON([("t", obj.time), ("i", obj.inc)])}
  683. if isinstance(obj, Code):
  684. if obj.scope is None:
  685. return {'$code': str(obj)}
  686. return SON([
  687. ('$code', str(obj)),
  688. ('$scope', _json_convert(obj.scope, json_options))])
  689. if isinstance(obj, Binary):
  690. return _encode_binary(obj, obj.subtype, json_options)
  691. if PY3 and isinstance(obj, bytes):
  692. return _encode_binary(obj, 0, json_options)
  693. if isinstance(obj, uuid.UUID):
  694. if json_options.strict_uuid:
  695. binval = Binary.from_uuid(
  696. obj, uuid_representation=json_options.uuid_representation)
  697. return _encode_binary(binval, binval.subtype, json_options)
  698. else:
  699. return {"$uuid": obj.hex}
  700. if isinstance(obj, Decimal128):
  701. return {"$numberDecimal": str(obj)}
  702. if isinstance(obj, bool):
  703. return obj
  704. if (json_options.json_mode == JSONMode.CANONICAL and
  705. isinstance(obj, integer_types)):
  706. if -2 ** 31 <= obj < 2 ** 31:
  707. return {'$numberInt': text_type(obj)}
  708. return {'$numberLong': text_type(obj)}
  709. if json_options.json_mode != JSONMode.LEGACY and isinstance(obj, float):
  710. if math.isnan(obj):
  711. return {'$numberDouble': 'NaN'}
  712. elif math.isinf(obj):
  713. representation = 'Infinity' if obj > 0 else '-Infinity'
  714. return {'$numberDouble': representation}
  715. elif json_options.json_mode == JSONMode.CANONICAL:
  716. # repr() will return the shortest string guaranteed to produce the
  717. # original value, when float() is called on it. str produces a
  718. # shorter string in Python 2.
  719. return {'$numberDouble': text_type(repr(obj))}
  720. raise TypeError("%r is not JSON serializable" % obj)