__init__.py 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274
  1. # Copyright 2009-present MongoDB, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """BSON (Binary JSON) encoding and decoding.
  15. The mapping from Python types to BSON types is as follows:
  16. ======================================= ============= ===================
  17. Python Type BSON Type Supported Direction
  18. ======================================= ============= ===================
  19. None null both
  20. bool boolean both
  21. int [#int]_ int32 / int64 py -> bson
  22. long int64 py -> bson
  23. `bson.int64.Int64` int64 both
  24. float number (real) both
  25. string string py -> bson
  26. unicode string both
  27. list array both
  28. dict / `SON` object both
  29. datetime.datetime [#dt]_ [#dt2]_ date both
  30. `bson.regex.Regex` regex both
  31. compiled re [#re]_ regex py -> bson
  32. `bson.binary.Binary` binary both
  33. `bson.objectid.ObjectId` oid both
  34. `bson.dbref.DBRef` dbref both
  35. None undefined bson -> py
  36. unicode code bson -> py
  37. `bson.code.Code` code py -> bson
  38. unicode symbol bson -> py
  39. bytes (Python 3) [#bytes]_ binary both
  40. ======================================= ============= ===================
  41. Note that, when using Python 2.x, to save binary data it must be wrapped as
  42. an instance of `bson.binary.Binary`. Otherwise it will be saved as a BSON
  43. string and retrieved as unicode. Users of Python 3.x can use the Python bytes
  44. type.
  45. .. [#int] A Python int will be saved as a BSON int32 or BSON int64 depending
  46. on its size. A BSON int32 will always decode to a Python int. A BSON
  47. int64 will always decode to a :class:`~bson.int64.Int64`.
  48. .. [#dt] datetime.datetime instances will be rounded to the nearest
  49. millisecond when saved
  50. .. [#dt2] all datetime.datetime instances are treated as *naive*. clients
  51. should always use UTC.
  52. .. [#re] :class:`~bson.regex.Regex` instances and regular expression
  53. objects from ``re.compile()`` are both saved as BSON regular expressions.
  54. BSON regular expressions are decoded as :class:`~bson.regex.Regex`
  55. instances.
  56. .. [#bytes] The bytes type from Python 3.x is encoded as BSON binary with
  57. subtype 0. In Python 3.x it will be decoded back to bytes. In Python 2.x
  58. it will be decoded to an instance of :class:`~bson.binary.Binary` with
  59. subtype 0.
  60. """
  61. import calendar
  62. import datetime
  63. import itertools
  64. import platform
  65. import re
  66. import struct
  67. import sys
  68. import uuid
  69. from codecs import (utf_8_decode as _utf_8_decode,
  70. utf_8_encode as _utf_8_encode)
  71. from bson.binary import (Binary, UuidRepresentation, ALL_UUID_SUBTYPES,
  72. OLD_UUID_SUBTYPE,
  73. JAVA_LEGACY, CSHARP_LEGACY,
  74. UUIDLegacy, UUID_SUBTYPE)
  75. from bson.code import Code
  76. from bson.codec_options import (
  77. CodecOptions, DEFAULT_CODEC_OPTIONS, _raw_document_class)
  78. from bson.dbref import DBRef
  79. from bson.decimal128 import Decimal128
  80. from bson.errors import (InvalidBSON,
  81. InvalidDocument,
  82. InvalidStringData)
  83. from bson.int64 import Int64
  84. from bson.max_key import MaxKey
  85. from bson.min_key import MinKey
  86. from bson.objectid import ObjectId
  87. from bson.py3compat import (abc,
  88. b,
  89. PY3,
  90. iteritems,
  91. text_type,
  92. string_type,
  93. reraise)
  94. from bson.regex import Regex
  95. from bson.son import SON, RE_TYPE
  96. from bson.timestamp import Timestamp
  97. from bson.tz_util import utc
  98. try:
  99. from bson import _cbson
  100. _USE_C = True
  101. except ImportError:
  102. _USE_C = False
  103. EPOCH_AWARE = datetime.datetime.fromtimestamp(0, utc)
  104. EPOCH_NAIVE = datetime.datetime.utcfromtimestamp(0)
  105. BSONNUM = b"\x01" # Floating point
  106. BSONSTR = b"\x02" # UTF-8 string
  107. BSONOBJ = b"\x03" # Embedded document
  108. BSONARR = b"\x04" # Array
  109. BSONBIN = b"\x05" # Binary
  110. BSONUND = b"\x06" # Undefined
  111. BSONOID = b"\x07" # ObjectId
  112. BSONBOO = b"\x08" # Boolean
  113. BSONDAT = b"\x09" # UTC Datetime
  114. BSONNUL = b"\x0A" # Null
  115. BSONRGX = b"\x0B" # Regex
  116. BSONREF = b"\x0C" # DBRef
  117. BSONCOD = b"\x0D" # Javascript code
  118. BSONSYM = b"\x0E" # Symbol
  119. BSONCWS = b"\x0F" # Javascript code with scope
  120. BSONINT = b"\x10" # 32bit int
  121. BSONTIM = b"\x11" # Timestamp
  122. BSONLON = b"\x12" # 64bit int
  123. BSONDEC = b"\x13" # Decimal128
  124. BSONMIN = b"\xFF" # Min key
  125. BSONMAX = b"\x7F" # Max key
  126. _UNPACK_FLOAT_FROM = struct.Struct("<d").unpack_from
  127. _UNPACK_INT = struct.Struct("<i").unpack
  128. _UNPACK_INT_FROM = struct.Struct("<i").unpack_from
  129. _UNPACK_LENGTH_SUBTYPE_FROM = struct.Struct("<iB").unpack_from
  130. _UNPACK_LONG_FROM = struct.Struct("<q").unpack_from
  131. _UNPACK_TIMESTAMP_FROM = struct.Struct("<II").unpack_from
  132. if PY3:
  133. _OBJEND = 0
  134. # Only used to generate the _ELEMENT_GETTER dict
  135. def _maybe_ord(element_type):
  136. return ord(element_type)
  137. # Only used in _raise_unkown_type below
  138. def _elt_to_hex(element_type):
  139. return chr(element_type).encode()
  140. _supported_buffer_types = (bytes, bytearray)
  141. else:
  142. _OBJEND = b"\x00"
  143. def _maybe_ord(element_type):
  144. return element_type
  145. def _elt_to_hex(element_type):
  146. return element_type
  147. _supported_buffer_types = (bytes,)
  148. if platform.python_implementation() == 'Jython':
  149. # This is why we can't have nice things.
  150. # https://bugs.jython.org/issue2788
  151. def get_data_and_view(data):
  152. if isinstance(data, _supported_buffer_types):
  153. return data, data
  154. data = memoryview(data).tobytes()
  155. return data, data
  156. else:
  157. def get_data_and_view(data):
  158. if isinstance(data, _supported_buffer_types):
  159. return data, memoryview(data)
  160. view = memoryview(data)
  161. return view.tobytes(), view
  162. def _raise_unknown_type(element_type, element_name):
  163. """Unknown type helper."""
  164. raise InvalidBSON("Detected unknown BSON type %r for fieldname '%s'. Are "
  165. "you using the latest driver version?" % (
  166. _elt_to_hex(element_type), element_name))
  167. def _get_int(data, view, position, dummy0, dummy1, dummy2):
  168. """Decode a BSON int32 to python int."""
  169. return _UNPACK_INT_FROM(data, position)[0], position + 4
  170. def _get_c_string(data, view, position, opts):
  171. """Decode a BSON 'C' string to python unicode string."""
  172. end = data.index(b"\x00", position)
  173. return _utf_8_decode(view[position:end],
  174. opts.unicode_decode_error_handler, True)[0], end + 1
  175. def _get_float(data, view, position, dummy0, dummy1, dummy2):
  176. """Decode a BSON double to python float."""
  177. return _UNPACK_FLOAT_FROM(data, position)[0], position + 8
  178. def _get_string(data, view, position, obj_end, opts, dummy):
  179. """Decode a BSON string to python unicode string."""
  180. length = _UNPACK_INT_FROM(data, position)[0]
  181. position += 4
  182. if length < 1 or obj_end - position < length:
  183. raise InvalidBSON("invalid string length")
  184. end = position + length - 1
  185. if data[end] != _OBJEND:
  186. raise InvalidBSON("invalid end of string")
  187. return _utf_8_decode(view[position:end],
  188. opts.unicode_decode_error_handler, True)[0], end + 1
  189. def _get_object_size(data, position, obj_end):
  190. """Validate and return a BSON document's size."""
  191. try:
  192. obj_size = _UNPACK_INT_FROM(data, position)[0]
  193. except struct.error as exc:
  194. raise InvalidBSON(str(exc))
  195. end = position + obj_size - 1
  196. if data[end] != _OBJEND:
  197. raise InvalidBSON("bad eoo")
  198. if end >= obj_end:
  199. raise InvalidBSON("invalid object length")
  200. # If this is the top-level document, validate the total size too.
  201. if position == 0 and obj_size != obj_end:
  202. raise InvalidBSON("invalid object length")
  203. return obj_size, end
  204. def _get_object(data, view, position, obj_end, opts, dummy):
  205. """Decode a BSON subdocument to opts.document_class or bson.dbref.DBRef."""
  206. obj_size, end = _get_object_size(data, position, obj_end)
  207. if _raw_document_class(opts.document_class):
  208. return (opts.document_class(data[position:end + 1], opts),
  209. position + obj_size)
  210. obj = _elements_to_dict(data, view, position + 4, end, opts)
  211. position += obj_size
  212. if "$ref" in obj:
  213. return (DBRef(obj.pop("$ref"), obj.pop("$id", None),
  214. obj.pop("$db", None), obj), position)
  215. return obj, position
  216. def _get_array(data, view, position, obj_end, opts, element_name):
  217. """Decode a BSON array to python list."""
  218. size = _UNPACK_INT_FROM(data, position)[0]
  219. end = position + size - 1
  220. if data[end] != _OBJEND:
  221. raise InvalidBSON("bad eoo")
  222. position += 4
  223. end -= 1
  224. result = []
  225. # Avoid doing global and attribute lookups in the loop.
  226. append = result.append
  227. index = data.index
  228. getter = _ELEMENT_GETTER
  229. decoder_map = opts.type_registry._decoder_map
  230. while position < end:
  231. element_type = data[position]
  232. # Just skip the keys.
  233. position = index(b'\x00', position) + 1
  234. try:
  235. value, position = getter[element_type](
  236. data, view, position, obj_end, opts, element_name)
  237. except KeyError:
  238. _raise_unknown_type(element_type, element_name)
  239. if decoder_map:
  240. custom_decoder = decoder_map.get(type(value))
  241. if custom_decoder is not None:
  242. value = custom_decoder(value)
  243. append(value)
  244. if position != end + 1:
  245. raise InvalidBSON('bad array length')
  246. return result, position + 1
  247. def _get_binary(data, view, position, obj_end, opts, dummy1):
  248. """Decode a BSON binary to bson.binary.Binary or python UUID."""
  249. length, subtype = _UNPACK_LENGTH_SUBTYPE_FROM(data, position)
  250. position += 5
  251. if subtype == 2:
  252. length2 = _UNPACK_INT_FROM(data, position)[0]
  253. position += 4
  254. if length2 != length - 4:
  255. raise InvalidBSON("invalid binary (st 2) - lengths don't match!")
  256. length = length2
  257. end = position + length
  258. if length < 0 or end > obj_end:
  259. raise InvalidBSON('bad binary object length')
  260. # Convert UUID subtypes to native UUIDs.
  261. # TODO: PYTHON-2245 Decoding should follow UUID spec in PyMongo 4.0+
  262. if subtype in ALL_UUID_SUBTYPES:
  263. uuid_representation = opts.uuid_representation
  264. binary_value = Binary(data[position:end], subtype)
  265. if uuid_representation == UuidRepresentation.UNSPECIFIED:
  266. return binary_value, end
  267. if subtype == UUID_SUBTYPE:
  268. # Legacy behavior: use STANDARD with binary subtype 4.
  269. uuid_representation = UuidRepresentation.STANDARD
  270. elif uuid_representation == UuidRepresentation.STANDARD:
  271. # subtype == OLD_UUID_SUBTYPE
  272. # Legacy behavior: STANDARD is the same as PYTHON_LEGACY.
  273. uuid_representation = UuidRepresentation.PYTHON_LEGACY
  274. return binary_value.as_uuid(uuid_representation), end
  275. # Python3 special case. Decode subtype 0 to 'bytes'.
  276. if PY3 and subtype == 0:
  277. value = data[position:end]
  278. else:
  279. value = Binary(data[position:end], subtype)
  280. return value, end
  281. def _get_oid(data, view, position, dummy0, dummy1, dummy2):
  282. """Decode a BSON ObjectId to bson.objectid.ObjectId."""
  283. end = position + 12
  284. return ObjectId(data[position:end]), end
  285. def _get_boolean(data, view, position, dummy0, dummy1, dummy2):
  286. """Decode a BSON true/false to python True/False."""
  287. end = position + 1
  288. boolean_byte = data[position:end]
  289. if boolean_byte == b'\x00':
  290. return False, end
  291. elif boolean_byte == b'\x01':
  292. return True, end
  293. raise InvalidBSON('invalid boolean value: %r' % boolean_byte)
  294. def _get_date(data, view, position, dummy0, opts, dummy1):
  295. """Decode a BSON datetime to python datetime.datetime."""
  296. return _millis_to_datetime(
  297. _UNPACK_LONG_FROM(data, position)[0], opts), position + 8
  298. def _get_code(data, view, position, obj_end, opts, element_name):
  299. """Decode a BSON code to bson.code.Code."""
  300. code, position = _get_string(data, view, position, obj_end, opts, element_name)
  301. return Code(code), position
  302. def _get_code_w_scope(data, view, position, obj_end, opts, element_name):
  303. """Decode a BSON code_w_scope to bson.code.Code."""
  304. code_end = position + _UNPACK_INT_FROM(data, position)[0]
  305. code, position = _get_string(
  306. data, view, position + 4, code_end, opts, element_name)
  307. scope, position = _get_object(data, view, position, code_end, opts, element_name)
  308. if position != code_end:
  309. raise InvalidBSON('scope outside of javascript code boundaries')
  310. return Code(code, scope), position
  311. def _get_regex(data, view, position, dummy0, opts, dummy1):
  312. """Decode a BSON regex to bson.regex.Regex or a python pattern object."""
  313. pattern, position = _get_c_string(data, view, position, opts)
  314. bson_flags, position = _get_c_string(data, view, position, opts)
  315. bson_re = Regex(pattern, bson_flags)
  316. return bson_re, position
  317. def _get_ref(data, view, position, obj_end, opts, element_name):
  318. """Decode (deprecated) BSON DBPointer to bson.dbref.DBRef."""
  319. collection, position = _get_string(
  320. data, view, position, obj_end, opts, element_name)
  321. oid, position = _get_oid(data, view, position, obj_end, opts, element_name)
  322. return DBRef(collection, oid), position
  323. def _get_timestamp(data, view, position, dummy0, dummy1, dummy2):
  324. """Decode a BSON timestamp to bson.timestamp.Timestamp."""
  325. inc, timestamp = _UNPACK_TIMESTAMP_FROM(data, position)
  326. return Timestamp(timestamp, inc), position + 8
  327. def _get_int64(data, view, position, dummy0, dummy1, dummy2):
  328. """Decode a BSON int64 to bson.int64.Int64."""
  329. return Int64(_UNPACK_LONG_FROM(data, position)[0]), position + 8
  330. def _get_decimal128(data, view, position, dummy0, dummy1, dummy2):
  331. """Decode a BSON decimal128 to bson.decimal128.Decimal128."""
  332. end = position + 16
  333. return Decimal128.from_bid(data[position:end]), end
  334. # Each decoder function's signature is:
  335. # - data: bytes
  336. # - view: memoryview that references `data`
  337. # - position: int, beginning of object in 'data' to decode
  338. # - obj_end: int, end of object to decode in 'data' if variable-length type
  339. # - opts: a CodecOptions
  340. _ELEMENT_GETTER = {
  341. _maybe_ord(BSONNUM): _get_float,
  342. _maybe_ord(BSONSTR): _get_string,
  343. _maybe_ord(BSONOBJ): _get_object,
  344. _maybe_ord(BSONARR): _get_array,
  345. _maybe_ord(BSONBIN): _get_binary,
  346. _maybe_ord(BSONUND): lambda u, v, w, x, y, z: (None, w), # Deprecated undefined
  347. _maybe_ord(BSONOID): _get_oid,
  348. _maybe_ord(BSONBOO): _get_boolean,
  349. _maybe_ord(BSONDAT): _get_date,
  350. _maybe_ord(BSONNUL): lambda u, v, w, x, y, z: (None, w),
  351. _maybe_ord(BSONRGX): _get_regex,
  352. _maybe_ord(BSONREF): _get_ref, # Deprecated DBPointer
  353. _maybe_ord(BSONCOD): _get_code,
  354. _maybe_ord(BSONSYM): _get_string, # Deprecated symbol
  355. _maybe_ord(BSONCWS): _get_code_w_scope,
  356. _maybe_ord(BSONINT): _get_int,
  357. _maybe_ord(BSONTIM): _get_timestamp,
  358. _maybe_ord(BSONLON): _get_int64,
  359. _maybe_ord(BSONDEC): _get_decimal128,
  360. _maybe_ord(BSONMIN): lambda u, v, w, x, y, z: (MinKey(), w),
  361. _maybe_ord(BSONMAX): lambda u, v, w, x, y, z: (MaxKey(), w)}
  362. if _USE_C:
  363. def _element_to_dict(data, view, position, obj_end, opts):
  364. return _cbson._element_to_dict(data, position, obj_end, opts)
  365. else:
  366. def _element_to_dict(data, view, position, obj_end, opts):
  367. """Decode a single key, value pair."""
  368. element_type = data[position]
  369. position += 1
  370. element_name, position = _get_c_string(data, view, position, opts)
  371. try:
  372. value, position = _ELEMENT_GETTER[element_type](data, view, position,
  373. obj_end, opts,
  374. element_name)
  375. except KeyError:
  376. _raise_unknown_type(element_type, element_name)
  377. if opts.type_registry._decoder_map:
  378. custom_decoder = opts.type_registry._decoder_map.get(type(value))
  379. if custom_decoder is not None:
  380. value = custom_decoder(value)
  381. return element_name, value, position
  382. def _raw_to_dict(data, position, obj_end, opts, result):
  383. data, view = get_data_and_view(data)
  384. return _elements_to_dict(data, view, position, obj_end, opts, result)
  385. def _elements_to_dict(data, view, position, obj_end, opts, result=None):
  386. """Decode a BSON document into result."""
  387. if result is None:
  388. result = opts.document_class()
  389. end = obj_end - 1
  390. while position < end:
  391. key, value, position = _element_to_dict(data, view, position, obj_end, opts)
  392. result[key] = value
  393. if position != obj_end:
  394. raise InvalidBSON('bad object or element length')
  395. return result
  396. def _bson_to_dict(data, opts):
  397. """Decode a BSON string to document_class."""
  398. data, view = get_data_and_view(data)
  399. try:
  400. if _raw_document_class(opts.document_class):
  401. return opts.document_class(data, opts)
  402. _, end = _get_object_size(data, 0, len(data))
  403. return _elements_to_dict(data, view, 4, end, opts)
  404. except InvalidBSON:
  405. raise
  406. except Exception:
  407. # Change exception type to InvalidBSON but preserve traceback.
  408. _, exc_value, exc_tb = sys.exc_info()
  409. reraise(InvalidBSON, exc_value, exc_tb)
  410. if _USE_C:
  411. _bson_to_dict = _cbson._bson_to_dict
  412. _PACK_FLOAT = struct.Struct("<d").pack
  413. _PACK_INT = struct.Struct("<i").pack
  414. _PACK_LENGTH_SUBTYPE = struct.Struct("<iB").pack
  415. _PACK_LONG = struct.Struct("<q").pack
  416. _PACK_TIMESTAMP = struct.Struct("<II").pack
  417. _LIST_NAMES = tuple(b(str(i)) + b"\x00" for i in range(1000))
  418. def gen_list_name():
  419. """Generate "keys" for encoded lists in the sequence
  420. b"0\x00", b"1\x00", b"2\x00", ...
  421. The first 1000 keys are returned from a pre-built cache. All
  422. subsequent keys are generated on the fly.
  423. """
  424. for name in _LIST_NAMES:
  425. yield name
  426. counter = itertools.count(1000)
  427. while True:
  428. yield b(str(next(counter))) + b"\x00"
  429. def _make_c_string_check(string):
  430. """Make a 'C' string, checking for embedded NUL characters."""
  431. if isinstance(string, bytes):
  432. if b"\x00" in string:
  433. raise InvalidDocument("BSON keys / regex patterns must not "
  434. "contain a NUL character")
  435. try:
  436. _utf_8_decode(string, None, True)
  437. return string + b"\x00"
  438. except UnicodeError:
  439. raise InvalidStringData("strings in documents must be valid "
  440. "UTF-8: %r" % string)
  441. else:
  442. if "\x00" in string:
  443. raise InvalidDocument("BSON keys / regex patterns must not "
  444. "contain a NUL character")
  445. return _utf_8_encode(string)[0] + b"\x00"
  446. def _make_c_string(string):
  447. """Make a 'C' string."""
  448. if isinstance(string, bytes):
  449. try:
  450. _utf_8_decode(string, None, True)
  451. return string + b"\x00"
  452. except UnicodeError:
  453. raise InvalidStringData("strings in documents must be valid "
  454. "UTF-8: %r" % string)
  455. else:
  456. return _utf_8_encode(string)[0] + b"\x00"
  457. if PY3:
  458. def _make_name(string):
  459. """Make a 'C' string suitable for a BSON key."""
  460. # Keys can only be text in python 3.
  461. if "\x00" in string:
  462. raise InvalidDocument("BSON keys / regex patterns must not "
  463. "contain a NUL character")
  464. return _utf_8_encode(string)[0] + b"\x00"
  465. else:
  466. # Keys can be unicode or bytes in python 2.
  467. _make_name = _make_c_string_check
  468. def _encode_float(name, value, dummy0, dummy1):
  469. """Encode a float."""
  470. return b"\x01" + name + _PACK_FLOAT(value)
  471. if PY3:
  472. def _encode_bytes(name, value, dummy0, dummy1):
  473. """Encode a python bytes."""
  474. # Python3 special case. Store 'bytes' as BSON binary subtype 0.
  475. return b"\x05" + name + _PACK_INT(len(value)) + b"\x00" + value
  476. else:
  477. def _encode_bytes(name, value, dummy0, dummy1):
  478. """Encode a python str (python 2.x)."""
  479. try:
  480. _utf_8_decode(value, None, True)
  481. except UnicodeError:
  482. raise InvalidStringData("strings in documents must be valid "
  483. "UTF-8: %r" % (value,))
  484. return b"\x02" + name + _PACK_INT(len(value) + 1) + value + b"\x00"
  485. def _encode_mapping(name, value, check_keys, opts):
  486. """Encode a mapping type."""
  487. if _raw_document_class(value):
  488. return b'\x03' + name + value.raw
  489. data = b"".join([_element_to_bson(key, val, check_keys, opts)
  490. for key, val in iteritems(value)])
  491. return b"\x03" + name + _PACK_INT(len(data) + 5) + data + b"\x00"
  492. def _encode_dbref(name, value, check_keys, opts):
  493. """Encode bson.dbref.DBRef."""
  494. buf = bytearray(b"\x03" + name + b"\x00\x00\x00\x00")
  495. begin = len(buf) - 4
  496. buf += _name_value_to_bson(b"$ref\x00",
  497. value.collection, check_keys, opts)
  498. buf += _name_value_to_bson(b"$id\x00",
  499. value.id, check_keys, opts)
  500. if value.database is not None:
  501. buf += _name_value_to_bson(
  502. b"$db\x00", value.database, check_keys, opts)
  503. for key, val in iteritems(value._DBRef__kwargs):
  504. buf += _element_to_bson(key, val, check_keys, opts)
  505. buf += b"\x00"
  506. buf[begin:begin + 4] = _PACK_INT(len(buf) - begin)
  507. return bytes(buf)
  508. def _encode_list(name, value, check_keys, opts):
  509. """Encode a list/tuple."""
  510. lname = gen_list_name()
  511. data = b"".join([_name_value_to_bson(next(lname), item,
  512. check_keys, opts)
  513. for item in value])
  514. return b"\x04" + name + _PACK_INT(len(data) + 5) + data + b"\x00"
  515. def _encode_text(name, value, dummy0, dummy1):
  516. """Encode a python unicode (python 2.x) / str (python 3.x)."""
  517. value = _utf_8_encode(value)[0]
  518. return b"\x02" + name + _PACK_INT(len(value) + 1) + value + b"\x00"
  519. def _encode_binary(name, value, dummy0, dummy1):
  520. """Encode bson.binary.Binary."""
  521. subtype = value.subtype
  522. if subtype == 2:
  523. value = _PACK_INT(len(value)) + value
  524. return b"\x05" + name + _PACK_LENGTH_SUBTYPE(len(value), subtype) + value
  525. def _encode_uuid(name, value, dummy, opts):
  526. """Encode uuid.UUID."""
  527. uuid_representation = opts.uuid_representation
  528. binval = Binary.from_uuid(value, uuid_representation=uuid_representation)
  529. return _encode_binary(name, binval, dummy, opts)
  530. def _encode_objectid(name, value, dummy0, dummy1):
  531. """Encode bson.objectid.ObjectId."""
  532. return b"\x07" + name + value.binary
  533. def _encode_bool(name, value, dummy0, dummy1):
  534. """Encode a python boolean (True/False)."""
  535. return b"\x08" + name + (value and b"\x01" or b"\x00")
  536. def _encode_datetime(name, value, dummy0, dummy1):
  537. """Encode datetime.datetime."""
  538. millis = _datetime_to_millis(value)
  539. return b"\x09" + name + _PACK_LONG(millis)
  540. def _encode_none(name, dummy0, dummy1, dummy2):
  541. """Encode python None."""
  542. return b"\x0A" + name
  543. def _encode_regex(name, value, dummy0, dummy1):
  544. """Encode a python regex or bson.regex.Regex."""
  545. flags = value.flags
  546. # Python 2 common case
  547. if flags == 0:
  548. return b"\x0B" + name + _make_c_string_check(value.pattern) + b"\x00"
  549. # Python 3 common case
  550. elif flags == re.UNICODE:
  551. return b"\x0B" + name + _make_c_string_check(value.pattern) + b"u\x00"
  552. else:
  553. sflags = b""
  554. if flags & re.IGNORECASE:
  555. sflags += b"i"
  556. if flags & re.LOCALE:
  557. sflags += b"l"
  558. if flags & re.MULTILINE:
  559. sflags += b"m"
  560. if flags & re.DOTALL:
  561. sflags += b"s"
  562. if flags & re.UNICODE:
  563. sflags += b"u"
  564. if flags & re.VERBOSE:
  565. sflags += b"x"
  566. sflags += b"\x00"
  567. return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags
  568. def _encode_code(name, value, dummy, opts):
  569. """Encode bson.code.Code."""
  570. cstring = _make_c_string(value)
  571. cstrlen = len(cstring)
  572. if value.scope is None:
  573. return b"\x0D" + name + _PACK_INT(cstrlen) + cstring
  574. scope = _dict_to_bson(value.scope, False, opts, False)
  575. full_length = _PACK_INT(8 + cstrlen + len(scope))
  576. return b"\x0F" + name + full_length + _PACK_INT(cstrlen) + cstring + scope
  577. def _encode_int(name, value, dummy0, dummy1):
  578. """Encode a python int."""
  579. if -2147483648 <= value <= 2147483647:
  580. return b"\x10" + name + _PACK_INT(value)
  581. else:
  582. try:
  583. return b"\x12" + name + _PACK_LONG(value)
  584. except struct.error:
  585. raise OverflowError("BSON can only handle up to 8-byte ints")
  586. def _encode_timestamp(name, value, dummy0, dummy1):
  587. """Encode bson.timestamp.Timestamp."""
  588. return b"\x11" + name + _PACK_TIMESTAMP(value.inc, value.time)
  589. def _encode_long(name, value, dummy0, dummy1):
  590. """Encode a python long (python 2.x)"""
  591. try:
  592. return b"\x12" + name + _PACK_LONG(value)
  593. except struct.error:
  594. raise OverflowError("BSON can only handle up to 8-byte ints")
  595. def _encode_decimal128(name, value, dummy0, dummy1):
  596. """Encode bson.decimal128.Decimal128."""
  597. return b"\x13" + name + value.bid
  598. def _encode_minkey(name, dummy0, dummy1, dummy2):
  599. """Encode bson.min_key.MinKey."""
  600. return b"\xFF" + name
  601. def _encode_maxkey(name, dummy0, dummy1, dummy2):
  602. """Encode bson.max_key.MaxKey."""
  603. return b"\x7F" + name
  604. # Each encoder function's signature is:
  605. # - name: utf-8 bytes
  606. # - value: a Python data type, e.g. a Python int for _encode_int
  607. # - check_keys: bool, whether to check for invalid names
  608. # - opts: a CodecOptions
  609. _ENCODERS = {
  610. bool: _encode_bool,
  611. bytes: _encode_bytes,
  612. datetime.datetime: _encode_datetime,
  613. dict: _encode_mapping,
  614. float: _encode_float,
  615. int: _encode_int,
  616. list: _encode_list,
  617. # unicode in py2, str in py3
  618. text_type: _encode_text,
  619. tuple: _encode_list,
  620. type(None): _encode_none,
  621. uuid.UUID: _encode_uuid,
  622. Binary: _encode_binary,
  623. Int64: _encode_long,
  624. Code: _encode_code,
  625. DBRef: _encode_dbref,
  626. MaxKey: _encode_maxkey,
  627. MinKey: _encode_minkey,
  628. ObjectId: _encode_objectid,
  629. Regex: _encode_regex,
  630. RE_TYPE: _encode_regex,
  631. SON: _encode_mapping,
  632. Timestamp: _encode_timestamp,
  633. UUIDLegacy: _encode_binary,
  634. Decimal128: _encode_decimal128,
  635. # Special case. This will never be looked up directly.
  636. abc.Mapping: _encode_mapping,
  637. }
  638. _MARKERS = {
  639. 5: _encode_binary,
  640. 7: _encode_objectid,
  641. 11: _encode_regex,
  642. 13: _encode_code,
  643. 17: _encode_timestamp,
  644. 18: _encode_long,
  645. 100: _encode_dbref,
  646. 127: _encode_maxkey,
  647. 255: _encode_minkey,
  648. }
  649. if not PY3:
  650. _ENCODERS[long] = _encode_long
  651. _BUILT_IN_TYPES = tuple(t for t in _ENCODERS)
  652. def _name_value_to_bson(name, value, check_keys, opts,
  653. in_custom_call=False,
  654. in_fallback_call=False):
  655. """Encode a single name, value pair."""
  656. # First see if the type is already cached. KeyError will only ever
  657. # happen once per subtype.
  658. try:
  659. return _ENCODERS[type(value)](name, value, check_keys, opts)
  660. except KeyError:
  661. pass
  662. # Second, fall back to trying _type_marker. This has to be done
  663. # before the loop below since users could subclass one of our
  664. # custom types that subclasses a python built-in (e.g. Binary)
  665. marker = getattr(value, "_type_marker", None)
  666. if isinstance(marker, int) and marker in _MARKERS:
  667. func = _MARKERS[marker]
  668. # Cache this type for faster subsequent lookup.
  669. _ENCODERS[type(value)] = func
  670. return func(name, value, check_keys, opts)
  671. # Third, check if a type encoder is registered for this type.
  672. # Note that subtypes of registered custom types are not auto-encoded.
  673. if not in_custom_call and opts.type_registry._encoder_map:
  674. custom_encoder = opts.type_registry._encoder_map.get(type(value))
  675. if custom_encoder is not None:
  676. return _name_value_to_bson(
  677. name, custom_encoder(value), check_keys, opts,
  678. in_custom_call=True)
  679. # Fourth, test each base type. This will only happen once for
  680. # a subtype of a supported base type. Unlike in the C-extensions, this
  681. # is done after trying the custom type encoder because checking for each
  682. # subtype is expensive.
  683. for base in _BUILT_IN_TYPES:
  684. if isinstance(value, base):
  685. func = _ENCODERS[base]
  686. # Cache this type for faster subsequent lookup.
  687. _ENCODERS[type(value)] = func
  688. return func(name, value, check_keys, opts)
  689. # As a last resort, try using the fallback encoder, if the user has
  690. # provided one.
  691. fallback_encoder = opts.type_registry._fallback_encoder
  692. if not in_fallback_call and fallback_encoder is not None:
  693. return _name_value_to_bson(
  694. name, fallback_encoder(value), check_keys, opts,
  695. in_fallback_call=True)
  696. raise InvalidDocument(
  697. "cannot encode object: %r, of type: %r" % (value, type(value)))
  698. def _element_to_bson(key, value, check_keys, opts):
  699. """Encode a single key, value pair."""
  700. if not isinstance(key, string_type):
  701. raise InvalidDocument("documents must have only string keys, "
  702. "key was %r" % (key,))
  703. if check_keys:
  704. if key.startswith("$"):
  705. raise InvalidDocument("key %r must not start with '$'" % (key,))
  706. if "." in key:
  707. raise InvalidDocument("key %r must not contain '.'" % (key,))
  708. name = _make_name(key)
  709. return _name_value_to_bson(name, value, check_keys, opts)
  710. def _dict_to_bson(doc, check_keys, opts, top_level=True):
  711. """Encode a document to BSON."""
  712. if _raw_document_class(doc):
  713. return doc.raw
  714. try:
  715. elements = []
  716. if top_level and "_id" in doc:
  717. elements.append(_name_value_to_bson(b"_id\x00", doc["_id"],
  718. check_keys, opts))
  719. for (key, value) in iteritems(doc):
  720. if not top_level or key != "_id":
  721. elements.append(_element_to_bson(key, value,
  722. check_keys, opts))
  723. except AttributeError:
  724. raise TypeError("encoder expected a mapping type but got: %r" % (doc,))
  725. encoded = b"".join(elements)
  726. return _PACK_INT(len(encoded) + 5) + encoded + b"\x00"
  727. if _USE_C:
  728. _dict_to_bson = _cbson._dict_to_bson
  729. def _millis_to_datetime(millis, opts):
  730. """Convert milliseconds since epoch UTC to datetime."""
  731. diff = ((millis % 1000) + 1000) % 1000
  732. seconds = (millis - diff) // 1000
  733. micros = diff * 1000
  734. if opts.tz_aware:
  735. dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds,
  736. microseconds=micros)
  737. if opts.tzinfo:
  738. dt = dt.astimezone(opts.tzinfo)
  739. return dt
  740. else:
  741. return EPOCH_NAIVE + datetime.timedelta(seconds=seconds,
  742. microseconds=micros)
  743. def _datetime_to_millis(dtm):
  744. """Convert datetime to milliseconds since epoch UTC."""
  745. if dtm.utcoffset() is not None:
  746. dtm = dtm - dtm.utcoffset()
  747. return int(calendar.timegm(dtm.timetuple()) * 1000 +
  748. dtm.microsecond // 1000)
  749. _CODEC_OPTIONS_TYPE_ERROR = TypeError(
  750. "codec_options must be an instance of CodecOptions")
  751. def encode(document, check_keys=False, codec_options=DEFAULT_CODEC_OPTIONS):
  752. """Encode a document to BSON.
  753. A document can be any mapping type (like :class:`dict`).
  754. Raises :class:`TypeError` if `document` is not a mapping type,
  755. or contains keys that are not instances of
  756. :class:`basestring` (:class:`str` in python 3). Raises
  757. :class:`~bson.errors.InvalidDocument` if `document` cannot be
  758. converted to :class:`BSON`.
  759. :Parameters:
  760. - `document`: mapping type representing a document
  761. - `check_keys` (optional): check if keys start with '$' or
  762. contain '.', raising :class:`~bson.errors.InvalidDocument` in
  763. either case
  764. - `codec_options` (optional): An instance of
  765. :class:`~bson.codec_options.CodecOptions`.
  766. .. versionadded:: 3.9
  767. """
  768. if not isinstance(codec_options, CodecOptions):
  769. raise _CODEC_OPTIONS_TYPE_ERROR
  770. return _dict_to_bson(document, check_keys, codec_options)
  771. def decode(data, codec_options=DEFAULT_CODEC_OPTIONS):
  772. """Decode BSON to a document.
  773. By default, returns a BSON document represented as a Python
  774. :class:`dict`. To use a different :class:`MutableMapping` class,
  775. configure a :class:`~bson.codec_options.CodecOptions`::
  776. >>> import collections # From Python standard library.
  777. >>> import bson
  778. >>> from bson.codec_options import CodecOptions
  779. >>> data = bson.encode({'a': 1})
  780. >>> decoded_doc = bson.decode(data)
  781. <type 'dict'>
  782. >>> options = CodecOptions(document_class=collections.OrderedDict)
  783. >>> decoded_doc = bson.decode(data, codec_options=options)
  784. >>> type(decoded_doc)
  785. <class 'collections.OrderedDict'>
  786. :Parameters:
  787. - `data`: the BSON to decode. Any bytes-like object that implements
  788. the buffer protocol.
  789. - `codec_options` (optional): An instance of
  790. :class:`~bson.codec_options.CodecOptions`.
  791. .. versionadded:: 3.9
  792. """
  793. if not isinstance(codec_options, CodecOptions):
  794. raise _CODEC_OPTIONS_TYPE_ERROR
  795. return _bson_to_dict(data, codec_options)
  796. def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS):
  797. """Decode BSON data to multiple documents.
  798. `data` must be a bytes-like object implementing the buffer protocol that
  799. provides concatenated, valid, BSON-encoded documents.
  800. :Parameters:
  801. - `data`: BSON data
  802. - `codec_options` (optional): An instance of
  803. :class:`~bson.codec_options.CodecOptions`.
  804. .. versionchanged:: 3.9
  805. Supports bytes-like objects that implement the buffer protocol.
  806. .. versionchanged:: 3.0
  807. Removed `compile_re` option: PyMongo now always represents BSON regular
  808. expressions as :class:`~bson.regex.Regex` objects. Use
  809. :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
  810. BSON regular expression to a Python regular expression object.
  811. Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
  812. `codec_options`.
  813. .. versionchanged:: 2.7
  814. Added `compile_re` option. If set to False, PyMongo represented BSON
  815. regular expressions as :class:`~bson.regex.Regex` objects instead of
  816. attempting to compile BSON regular expressions as Python native
  817. regular expressions, thus preventing errors for some incompatible
  818. patterns, see `PYTHON-500`_.
  819. .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500
  820. """
  821. data, view = get_data_and_view(data)
  822. if not isinstance(codec_options, CodecOptions):
  823. raise _CODEC_OPTIONS_TYPE_ERROR
  824. data_len = len(data)
  825. docs = []
  826. position = 0
  827. end = data_len - 1
  828. use_raw = _raw_document_class(codec_options.document_class)
  829. try:
  830. while position < end:
  831. obj_size = _UNPACK_INT_FROM(data, position)[0]
  832. if data_len - position < obj_size:
  833. raise InvalidBSON("invalid object size")
  834. obj_end = position + obj_size - 1
  835. if data[obj_end] != _OBJEND:
  836. raise InvalidBSON("bad eoo")
  837. if use_raw:
  838. docs.append(
  839. codec_options.document_class(
  840. data[position:obj_end + 1], codec_options))
  841. else:
  842. docs.append(_elements_to_dict(data,
  843. view,
  844. position + 4,
  845. obj_end,
  846. codec_options))
  847. position += obj_size
  848. return docs
  849. except InvalidBSON:
  850. raise
  851. except Exception:
  852. # Change exception type to InvalidBSON but preserve traceback.
  853. _, exc_value, exc_tb = sys.exc_info()
  854. reraise(InvalidBSON, exc_value, exc_tb)
  855. if _USE_C:
  856. decode_all = _cbson.decode_all
  857. def _decode_selective(rawdoc, fields, codec_options):
  858. if _raw_document_class(codec_options.document_class):
  859. # If document_class is RawBSONDocument, use vanilla dictionary for
  860. # decoding command response.
  861. doc = {}
  862. else:
  863. # Else, use the specified document_class.
  864. doc = codec_options.document_class()
  865. for key, value in iteritems(rawdoc):
  866. if key in fields:
  867. if fields[key] == 1:
  868. doc[key] = _bson_to_dict(rawdoc.raw, codec_options)[key]
  869. else:
  870. doc[key] = _decode_selective(value, fields[key], codec_options)
  871. else:
  872. doc[key] = value
  873. return doc
  874. def _convert_raw_document_lists_to_streams(document):
  875. cursor = document.get('cursor')
  876. if cursor:
  877. for key in ('firstBatch', 'nextBatch'):
  878. batch = cursor.get(key)
  879. if batch:
  880. stream = b"".join(doc.raw for doc in batch)
  881. cursor[key] = [stream]
  882. def _decode_all_selective(data, codec_options, fields):
  883. """Decode BSON data to a single document while using user-provided
  884. custom decoding logic.
  885. `data` must be a string representing a valid, BSON-encoded document.
  886. :Parameters:
  887. - `data`: BSON data
  888. - `codec_options`: An instance of
  889. :class:`~bson.codec_options.CodecOptions` with user-specified type
  890. decoders. If no decoders are found, this method is the same as
  891. ``decode_all``.
  892. - `fields`: Map of document namespaces where data that needs
  893. to be custom decoded lives or None. For example, to custom decode a
  894. list of objects in 'field1.subfield1', the specified value should be
  895. ``{'field1': {'subfield1': 1}}``. If ``fields`` is an empty map or
  896. None, this method is the same as ``decode_all``.
  897. :Returns:
  898. - `document_list`: Single-member list containing the decoded document.
  899. .. versionadded:: 3.8
  900. """
  901. if not codec_options.type_registry._decoder_map:
  902. return decode_all(data, codec_options)
  903. if not fields:
  904. return decode_all(data, codec_options.with_options(type_registry=None))
  905. # Decode documents for internal use.
  906. from bson.raw_bson import RawBSONDocument
  907. internal_codec_options = codec_options.with_options(
  908. document_class=RawBSONDocument, type_registry=None)
  909. _doc = _bson_to_dict(data, internal_codec_options)
  910. return [_decode_selective(_doc, fields, codec_options,)]
  911. def decode_iter(data, codec_options=DEFAULT_CODEC_OPTIONS):
  912. """Decode BSON data to multiple documents as a generator.
  913. Works similarly to the decode_all function, but yields one document at a
  914. time.
  915. `data` must be a string of concatenated, valid, BSON-encoded
  916. documents.
  917. :Parameters:
  918. - `data`: BSON data
  919. - `codec_options` (optional): An instance of
  920. :class:`~bson.codec_options.CodecOptions`.
  921. .. versionchanged:: 3.0
  922. Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
  923. `codec_options`.
  924. .. versionadded:: 2.8
  925. """
  926. if not isinstance(codec_options, CodecOptions):
  927. raise _CODEC_OPTIONS_TYPE_ERROR
  928. position = 0
  929. end = len(data) - 1
  930. while position < end:
  931. obj_size = _UNPACK_INT_FROM(data, position)[0]
  932. elements = data[position:position + obj_size]
  933. position += obj_size
  934. yield _bson_to_dict(elements, codec_options)
  935. def decode_file_iter(file_obj, codec_options=DEFAULT_CODEC_OPTIONS):
  936. """Decode bson data from a file to multiple documents as a generator.
  937. Works similarly to the decode_all function, but reads from the file object
  938. in chunks and parses bson in chunks, yielding one document at a time.
  939. :Parameters:
  940. - `file_obj`: A file object containing BSON data.
  941. - `codec_options` (optional): An instance of
  942. :class:`~bson.codec_options.CodecOptions`.
  943. .. versionchanged:: 3.0
  944. Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
  945. `codec_options`.
  946. .. versionadded:: 2.8
  947. """
  948. while True:
  949. # Read size of next object.
  950. size_data = file_obj.read(4)
  951. if not size_data:
  952. break # Finished with file normaly.
  953. elif len(size_data) != 4:
  954. raise InvalidBSON("cut off in middle of objsize")
  955. obj_size = _UNPACK_INT_FROM(size_data, 0)[0] - 4
  956. elements = size_data + file_obj.read(max(0, obj_size))
  957. yield _bson_to_dict(elements, codec_options)
  958. def is_valid(bson):
  959. """Check that the given string represents valid :class:`BSON` data.
  960. Raises :class:`TypeError` if `bson` is not an instance of
  961. :class:`str` (:class:`bytes` in python 3). Returns ``True``
  962. if `bson` is valid :class:`BSON`, ``False`` otherwise.
  963. :Parameters:
  964. - `bson`: the data to be validated
  965. """
  966. if not isinstance(bson, bytes):
  967. raise TypeError("BSON data must be an instance of a subclass of bytes")
  968. try:
  969. _bson_to_dict(bson, DEFAULT_CODEC_OPTIONS)
  970. return True
  971. except Exception:
  972. return False
  973. class BSON(bytes):
  974. """BSON (Binary JSON) data.
  975. .. warning:: Using this class to encode and decode BSON adds a performance
  976. cost. For better performance use the module level functions
  977. :func:`encode` and :func:`decode` instead.
  978. """
  979. @classmethod
  980. def encode(cls, document, check_keys=False,
  981. codec_options=DEFAULT_CODEC_OPTIONS):
  982. """Encode a document to a new :class:`BSON` instance.
  983. A document can be any mapping type (like :class:`dict`).
  984. Raises :class:`TypeError` if `document` is not a mapping type,
  985. or contains keys that are not instances of
  986. :class:`basestring` (:class:`str` in python 3). Raises
  987. :class:`~bson.errors.InvalidDocument` if `document` cannot be
  988. converted to :class:`BSON`.
  989. :Parameters:
  990. - `document`: mapping type representing a document
  991. - `check_keys` (optional): check if keys start with '$' or
  992. contain '.', raising :class:`~bson.errors.InvalidDocument` in
  993. either case
  994. - `codec_options` (optional): An instance of
  995. :class:`~bson.codec_options.CodecOptions`.
  996. .. versionchanged:: 3.0
  997. Replaced `uuid_subtype` option with `codec_options`.
  998. """
  999. return cls(encode(document, check_keys, codec_options))
  1000. def decode(self, codec_options=DEFAULT_CODEC_OPTIONS):
  1001. """Decode this BSON data.
  1002. By default, returns a BSON document represented as a Python
  1003. :class:`dict`. To use a different :class:`MutableMapping` class,
  1004. configure a :class:`~bson.codec_options.CodecOptions`::
  1005. >>> import collections # From Python standard library.
  1006. >>> import bson
  1007. >>> from bson.codec_options import CodecOptions
  1008. >>> data = bson.BSON.encode({'a': 1})
  1009. >>> decoded_doc = bson.BSON(data).decode()
  1010. <type 'dict'>
  1011. >>> options = CodecOptions(document_class=collections.OrderedDict)
  1012. >>> decoded_doc = bson.BSON(data).decode(codec_options=options)
  1013. >>> type(decoded_doc)
  1014. <class 'collections.OrderedDict'>
  1015. :Parameters:
  1016. - `codec_options` (optional): An instance of
  1017. :class:`~bson.codec_options.CodecOptions`.
  1018. .. versionchanged:: 3.0
  1019. Removed `compile_re` option: PyMongo now always represents BSON
  1020. regular expressions as :class:`~bson.regex.Regex` objects. Use
  1021. :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
  1022. BSON regular expression to a Python regular expression object.
  1023. Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
  1024. `codec_options`.
  1025. .. versionchanged:: 2.7
  1026. Added `compile_re` option. If set to False, PyMongo represented BSON
  1027. regular expressions as :class:`~bson.regex.Regex` objects instead of
  1028. attempting to compile BSON regular expressions as Python native
  1029. regular expressions, thus preventing errors for some incompatible
  1030. patterns, see `PYTHON-500`_.
  1031. .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500
  1032. """
  1033. return decode(self, codec_options)
  1034. def has_c():
  1035. """Is the C extension installed?
  1036. """
  1037. return _USE_C