encryption.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. # Copyright 2019-present MongoDB, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Support for explicit client-side field level encryption."""
  15. import contextlib
  16. import os
  17. import subprocess
  18. import uuid
  19. import weakref
  20. try:
  21. from pymongocrypt.auto_encrypter import AutoEncrypter
  22. from pymongocrypt.errors import MongoCryptError
  23. from pymongocrypt.explicit_encrypter import ExplicitEncrypter
  24. from pymongocrypt.mongocrypt import MongoCryptOptions
  25. from pymongocrypt.state_machine import MongoCryptCallback
  26. _HAVE_PYMONGOCRYPT = True
  27. except ImportError:
  28. _HAVE_PYMONGOCRYPT = False
  29. MongoCryptCallback = object
  30. from bson import _dict_to_bson, decode, encode
  31. from bson.codec_options import CodecOptions
  32. from bson.binary import (Binary,
  33. STANDARD,
  34. UUID_SUBTYPE)
  35. from bson.errors import BSONError
  36. from bson.raw_bson import (DEFAULT_RAW_BSON_OPTIONS,
  37. RawBSONDocument,
  38. _inflate_bson)
  39. from bson.son import SON
  40. from pymongo.errors import (ConfigurationError,
  41. EncryptionError,
  42. InvalidOperation,
  43. ServerSelectionTimeoutError)
  44. from pymongo.mongo_client import MongoClient
  45. from pymongo.pool import _configured_socket, PoolOptions
  46. from pymongo.read_concern import ReadConcern
  47. from pymongo.ssl_support import get_ssl_context, HAVE_SSL
  48. from pymongo.uri_parser import parse_host
  49. from pymongo.write_concern import WriteConcern
  50. from pymongo.daemon import _spawn_daemon
  51. if HAVE_SSL:
  52. from ssl import CERT_REQUIRED
  53. else:
  54. CERT_REQUIRED = None
  55. _HTTPS_PORT = 443
  56. _KMS_CONNECT_TIMEOUT = 10 # TODO: CDRIVER-3262 will define this value.
  57. _MONGOCRYPTD_TIMEOUT_MS = 10000
  58. _DATA_KEY_OPTS = CodecOptions(document_class=SON, uuid_representation=STANDARD)
  59. # Use RawBSONDocument codec options to avoid needlessly decoding
  60. # documents from the key vault.
  61. _KEY_VAULT_OPTS = CodecOptions(document_class=RawBSONDocument,
  62. uuid_representation=STANDARD)
  63. @contextlib.contextmanager
  64. def _wrap_encryption_errors():
  65. """Context manager to wrap encryption related errors."""
  66. try:
  67. yield
  68. except BSONError:
  69. # BSON encoding/decoding errors are unrelated to encryption so
  70. # we should propagate them unchanged.
  71. raise
  72. except Exception as exc:
  73. raise EncryptionError(exc)
  74. class _EncryptionIO(MongoCryptCallback):
  75. def __init__(self, client, key_vault_coll, mongocryptd_client, opts):
  76. """Internal class to perform I/O on behalf of pymongocrypt."""
  77. # Use a weak ref to break reference cycle.
  78. if client is not None:
  79. self.client_ref = weakref.ref(client)
  80. else:
  81. self.client_ref = None
  82. self.key_vault_coll = key_vault_coll.with_options(
  83. codec_options=_KEY_VAULT_OPTS,
  84. read_concern=ReadConcern(level='majority'),
  85. write_concern=WriteConcern(w='majority'))
  86. self.mongocryptd_client = mongocryptd_client
  87. self.opts = opts
  88. self._spawned = False
  89. def kms_request(self, kms_context):
  90. """Complete a KMS request.
  91. :Parameters:
  92. - `kms_context`: A :class:`MongoCryptKmsContext`.
  93. :Returns:
  94. None
  95. """
  96. endpoint = kms_context.endpoint
  97. message = kms_context.message
  98. host, port = parse_host(endpoint, _HTTPS_PORT)
  99. # Enable strict certificate verification, OCSP, match hostname, and
  100. # SNI using the system default CA certificates.
  101. ctx = get_ssl_context(
  102. None, # certfile
  103. None, # keyfile
  104. None, # passphrase
  105. None, # ca_certs
  106. CERT_REQUIRED, # cert_reqs
  107. None, # crlfile
  108. True, # match_hostname
  109. True) # check_ocsp_endpoint
  110. opts = PoolOptions(connect_timeout=_KMS_CONNECT_TIMEOUT,
  111. socket_timeout=_KMS_CONNECT_TIMEOUT,
  112. ssl_context=ctx)
  113. conn = _configured_socket((host, port), opts)
  114. try:
  115. conn.sendall(message)
  116. while kms_context.bytes_needed > 0:
  117. data = conn.recv(kms_context.bytes_needed)
  118. kms_context.feed(data)
  119. finally:
  120. conn.close()
  121. def collection_info(self, database, filter):
  122. """Get the collection info for a namespace.
  123. The returned collection info is passed to libmongocrypt which reads
  124. the JSON schema.
  125. :Parameters:
  126. - `database`: The database on which to run listCollections.
  127. - `filter`: The filter to pass to listCollections.
  128. :Returns:
  129. The first document from the listCollections command response as BSON.
  130. """
  131. with self.client_ref()[database].list_collections(
  132. filter=RawBSONDocument(filter)) as cursor:
  133. for doc in cursor:
  134. return _dict_to_bson(doc, False, _DATA_KEY_OPTS)
  135. def spawn(self):
  136. """Spawn mongocryptd.
  137. Note this method is thread safe; at most one mongocryptd will start
  138. successfully.
  139. """
  140. self._spawned = True
  141. args = [self.opts._mongocryptd_spawn_path or 'mongocryptd']
  142. args.extend(self.opts._mongocryptd_spawn_args)
  143. _spawn_daemon(args)
  144. def mark_command(self, database, cmd):
  145. """Mark a command for encryption.
  146. :Parameters:
  147. - `database`: The database on which to run this command.
  148. - `cmd`: The BSON command to run.
  149. :Returns:
  150. The marked command response from mongocryptd.
  151. """
  152. if not self._spawned and not self.opts._mongocryptd_bypass_spawn:
  153. self.spawn()
  154. # Database.command only supports mutable mappings so we need to decode
  155. # the raw BSON command first.
  156. inflated_cmd = _inflate_bson(cmd, DEFAULT_RAW_BSON_OPTIONS)
  157. try:
  158. res = self.mongocryptd_client[database].command(
  159. inflated_cmd,
  160. codec_options=DEFAULT_RAW_BSON_OPTIONS)
  161. except ServerSelectionTimeoutError:
  162. if self.opts._mongocryptd_bypass_spawn:
  163. raise
  164. self.spawn()
  165. res = self.mongocryptd_client[database].command(
  166. inflated_cmd,
  167. codec_options=DEFAULT_RAW_BSON_OPTIONS)
  168. return res.raw
  169. def fetch_keys(self, filter):
  170. """Yields one or more keys from the key vault.
  171. :Parameters:
  172. - `filter`: The filter to pass to find.
  173. :Returns:
  174. A generator which yields the requested keys from the key vault.
  175. """
  176. with self.key_vault_coll.find(RawBSONDocument(filter)) as cursor:
  177. for key in cursor:
  178. yield key.raw
  179. def insert_data_key(self, data_key):
  180. """Insert a data key into the key vault.
  181. :Parameters:
  182. - `data_key`: The data key document to insert.
  183. :Returns:
  184. The _id of the inserted data key document.
  185. """
  186. raw_doc = RawBSONDocument(data_key)
  187. data_key_id = raw_doc.get('_id')
  188. if not isinstance(data_key_id, uuid.UUID):
  189. raise TypeError('data_key _id must be a UUID')
  190. self.key_vault_coll.insert_one(raw_doc)
  191. return Binary(data_key_id.bytes, subtype=UUID_SUBTYPE)
  192. def bson_encode(self, doc):
  193. """Encode a document to BSON.
  194. A document can be any mapping type (like :class:`dict`).
  195. :Parameters:
  196. - `doc`: mapping type representing a document
  197. :Returns:
  198. The encoded BSON bytes.
  199. """
  200. return encode(doc)
  201. def close(self):
  202. """Release resources.
  203. Note it is not safe to call this method from __del__ or any GC hooks.
  204. """
  205. self.client_ref = None
  206. self.key_vault_coll = None
  207. if self.mongocryptd_client:
  208. self.mongocryptd_client.close()
  209. self.mongocryptd_client = None
  210. class _Encrypter(object):
  211. """Encrypts and decrypts MongoDB commands.
  212. This class is used to support automatic encryption and decryption of
  213. MongoDB commands."""
  214. def __init__(self, client, opts):
  215. """Create a _Encrypter for a client.
  216. :Parameters:
  217. - `client`: The encrypted MongoClient.
  218. - `opts`: The encrypted client's :class:`AutoEncryptionOpts`.
  219. """
  220. if opts._schema_map is None:
  221. schema_map = None
  222. else:
  223. schema_map = _dict_to_bson(opts._schema_map, False, _DATA_KEY_OPTS)
  224. self._bypass_auto_encryption = opts._bypass_auto_encryption
  225. self._internal_client = None
  226. def _get_internal_client(encrypter, mongo_client):
  227. if mongo_client.max_pool_size is None:
  228. # Unlimited pool size, use the same client.
  229. return mongo_client
  230. # Else - limited pool size, use an internal client.
  231. if encrypter._internal_client is not None:
  232. return encrypter._internal_client
  233. internal_client = mongo_client._duplicate(
  234. minPoolSize=0, auto_encryption_opts=None)
  235. encrypter._internal_client = internal_client
  236. return internal_client
  237. if opts._key_vault_client is not None:
  238. key_vault_client = opts._key_vault_client
  239. else:
  240. key_vault_client = _get_internal_client(self, client)
  241. if opts._bypass_auto_encryption:
  242. metadata_client = None
  243. else:
  244. metadata_client = _get_internal_client(self, client)
  245. db, coll = opts._key_vault_namespace.split('.', 1)
  246. key_vault_coll = key_vault_client[db][coll]
  247. mongocryptd_client = MongoClient(
  248. opts._mongocryptd_uri, connect=False,
  249. serverSelectionTimeoutMS=_MONGOCRYPTD_TIMEOUT_MS)
  250. io_callbacks = _EncryptionIO(
  251. metadata_client, key_vault_coll, mongocryptd_client, opts)
  252. self._auto_encrypter = AutoEncrypter(io_callbacks, MongoCryptOptions(
  253. opts._kms_providers, schema_map))
  254. self._closed = False
  255. def encrypt(self, database, cmd, check_keys, codec_options):
  256. """Encrypt a MongoDB command.
  257. :Parameters:
  258. - `database`: The database for this command.
  259. - `cmd`: A command document.
  260. - `check_keys`: If True, check `cmd` for invalid keys.
  261. - `codec_options`: The CodecOptions to use while encoding `cmd`.
  262. :Returns:
  263. The encrypted command to execute.
  264. """
  265. self._check_closed()
  266. # Workaround for $clusterTime which is incompatible with
  267. # check_keys.
  268. cluster_time = check_keys and cmd.pop('$clusterTime', None)
  269. encoded_cmd = _dict_to_bson(cmd, check_keys, codec_options)
  270. with _wrap_encryption_errors():
  271. encrypted_cmd = self._auto_encrypter.encrypt(database, encoded_cmd)
  272. # TODO: PYTHON-1922 avoid decoding the encrypted_cmd.
  273. encrypt_cmd = _inflate_bson(
  274. encrypted_cmd, DEFAULT_RAW_BSON_OPTIONS)
  275. if cluster_time:
  276. encrypt_cmd['$clusterTime'] = cluster_time
  277. return encrypt_cmd
  278. def decrypt(self, response):
  279. """Decrypt a MongoDB command response.
  280. :Parameters:
  281. - `response`: A MongoDB command response as BSON.
  282. :Returns:
  283. The decrypted command response.
  284. """
  285. self._check_closed()
  286. with _wrap_encryption_errors():
  287. return self._auto_encrypter.decrypt(response)
  288. def _check_closed(self):
  289. if self._closed:
  290. raise InvalidOperation("Cannot use MongoClient after close")
  291. def close(self):
  292. """Cleanup resources."""
  293. self._closed = True
  294. self._auto_encrypter.close()
  295. if self._internal_client:
  296. self._internal_client.close()
  297. self._internal_client = None
  298. class Algorithm(object):
  299. """An enum that defines the supported encryption algorithms."""
  300. AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic = (
  301. "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic")
  302. AEAD_AES_256_CBC_HMAC_SHA_512_Random = (
  303. "AEAD_AES_256_CBC_HMAC_SHA_512-Random")
  304. class ClientEncryption(object):
  305. """Explicit client-side field level encryption."""
  306. def __init__(self, kms_providers, key_vault_namespace, key_vault_client,
  307. codec_options):
  308. """Explicit client-side field level encryption.
  309. The ClientEncryption class encapsulates explicit operations on a key
  310. vault collection that cannot be done directly on a MongoClient. Similar
  311. to configuring auto encryption on a MongoClient, it is constructed with
  312. a MongoClient (to a MongoDB cluster containing the key vault
  313. collection), KMS provider configuration, and keyVaultNamespace. It
  314. provides an API for explicitly encrypting and decrypting values, and
  315. creating data keys. It does not provide an API to query keys from the
  316. key vault collection, as this can be done directly on the MongoClient.
  317. See :ref:`explicit-client-side-encryption` for an example.
  318. :Parameters:
  319. - `kms_providers`: Map of KMS provider options. Two KMS providers
  320. are supported: "aws" and "local". The kmsProviders map values
  321. differ by provider:
  322. - `aws`: Map with "accessKeyId" and "secretAccessKey" as strings.
  323. These are the AWS access key ID and AWS secret access key used
  324. to generate KMS messages. An optional "sessionToken" may be
  325. included to support temporary AWS credentials.
  326. - `azure`: Map with "tenantId", "clientId", and "clientSecret" as
  327. strings. Additionally, "identityPlatformEndpoint" may also be
  328. specified as a string (defaults to 'login.microsoftonline.com').
  329. These are the Azure Active Directory credentials used to
  330. generate Azure Key Vault messages.
  331. - `gcp`: Map with "email" as a string and "privateKey"
  332. as `bytes` or a base64 encoded string (unicode on Python 2).
  333. Additionally, "endpoint" may also be specified as a string
  334. (defaults to 'oauth2.googleapis.com'). These are the
  335. credentials used to generate Google Cloud KMS messages.
  336. - `local`: Map with "key" as `bytes` (96 bytes in length) or
  337. a base64 encoded string (unicode on Python 2) which decodes
  338. to 96 bytes. "key" is the master key used to encrypt/decrypt
  339. data keys. This key should be generated and stored as securely
  340. as possible.
  341. - `key_vault_namespace`: The namespace for the key vault collection.
  342. The key vault collection contains all data keys used for encryption
  343. and decryption. Data keys are stored as documents in this MongoDB
  344. collection. Data keys are protected with encryption by a KMS
  345. provider.
  346. - `key_vault_client`: A MongoClient connected to a MongoDB cluster
  347. containing the `key_vault_namespace` collection.
  348. - `codec_options`: An instance of
  349. :class:`~bson.codec_options.CodecOptions` to use when encoding a
  350. value for encryption and decoding the decrypted BSON value. This
  351. should be the same CodecOptions instance configured on the
  352. MongoClient, Database, or Collection used to access application
  353. data.
  354. .. versionadded:: 3.9
  355. """
  356. if not _HAVE_PYMONGOCRYPT:
  357. raise ConfigurationError(
  358. "client-side field level encryption requires the pymongocrypt "
  359. "library: install a compatible version with: "
  360. "python -m pip install 'pymongo[encryption]'")
  361. if not isinstance(codec_options, CodecOptions):
  362. raise TypeError("codec_options must be an instance of "
  363. "bson.codec_options.CodecOptions")
  364. self._kms_providers = kms_providers
  365. self._key_vault_namespace = key_vault_namespace
  366. self._key_vault_client = key_vault_client
  367. self._codec_options = codec_options
  368. db, coll = key_vault_namespace.split('.', 1)
  369. key_vault_coll = key_vault_client[db][coll]
  370. self._io_callbacks = _EncryptionIO(None, key_vault_coll, None, None)
  371. self._encryption = ExplicitEncrypter(
  372. self._io_callbacks, MongoCryptOptions(kms_providers, None))
  373. def create_data_key(self, kms_provider, master_key=None,
  374. key_alt_names=None):
  375. """Create and insert a new data key into the key vault collection.
  376. :Parameters:
  377. - `kms_provider`: The KMS provider to use. Supported values are
  378. "aws" and "local".
  379. - `master_key`: Identifies a KMS-specific key used to encrypt the
  380. new data key. If the kmsProvider is "local" the `master_key` is
  381. not applicable and may be omitted.
  382. If the `kms_provider` is "aws" it is required and has the
  383. following fields::
  384. - `region` (string): Required. The AWS region, e.g. "us-east-1".
  385. - `key` (string): Required. The Amazon Resource Name (ARN) to
  386. the AWS customer.
  387. - `endpoint` (string): Optional. An alternate host to send KMS
  388. requests to. May include port number, e.g.
  389. "kms.us-east-1.amazonaws.com:443".
  390. If the `kms_provider` is "azure" it is required and has the
  391. following fields::
  392. - `keyVaultEndpoint` (string): Required. Host with optional
  393. port, e.g. "example.vault.azure.net".
  394. - `keyName` (string): Required. Key name in the key vault.
  395. - `keyVersion` (string): Optional. Version of the key to use.
  396. If the `kms_provider` is "gcp" it is required and has the
  397. following fields::
  398. - `projectId` (string): Required. The Google cloud project ID.
  399. - `location` (string): Required. The GCP location, e.g. "us-east1".
  400. - `keyRing` (string): Required. Name of the key ring that contains
  401. the key to use.
  402. - `keyName` (string): Required. Name of the key to use.
  403. - `keyVersion` (string): Optional. Version of the key to use.
  404. - `endpoint` (string): Optional. Host with optional port.
  405. Defaults to "cloudkms.googleapis.com".
  406. - `key_alt_names` (optional): An optional list of string alternate
  407. names used to reference a key. If a key is created with alternate
  408. names, then encryption may refer to the key by the unique alternate
  409. name instead of by ``key_id``. The following example shows creating
  410. and referring to a data key by alternate name::
  411. client_encryption.create_data_key("local", keyAltNames=["name1"])
  412. # reference the key with the alternate name
  413. client_encryption.encrypt("457-55-5462", keyAltName="name1",
  414. algorithm=Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Random)
  415. :Returns:
  416. The ``_id`` of the created data key document as a
  417. :class:`~bson.binary.Binary` with subtype
  418. :data:`~bson.binary.UUID_SUBTYPE`.
  419. """
  420. self._check_closed()
  421. with _wrap_encryption_errors():
  422. return self._encryption.create_data_key(
  423. kms_provider, master_key=master_key,
  424. key_alt_names=key_alt_names)
  425. def encrypt(self, value, algorithm, key_id=None, key_alt_name=None):
  426. """Encrypt a BSON value with a given key and algorithm.
  427. Note that exactly one of ``key_id`` or ``key_alt_name`` must be
  428. provided.
  429. :Parameters:
  430. - `value`: The BSON value to encrypt.
  431. - `algorithm` (string): The encryption algorithm to use. See
  432. :class:`Algorithm` for some valid options.
  433. - `key_id`: Identifies a data key by ``_id`` which must be a
  434. :class:`~bson.binary.Binary` with subtype 4 (
  435. :attr:`~bson.binary.UUID_SUBTYPE`).
  436. - `key_alt_name`: Identifies a key vault document by 'keyAltName'.
  437. :Returns:
  438. The encrypted value, a :class:`~bson.binary.Binary` with subtype 6.
  439. """
  440. self._check_closed()
  441. if (key_id is not None and not (
  442. isinstance(key_id, Binary) and
  443. key_id.subtype == UUID_SUBTYPE)):
  444. raise TypeError(
  445. 'key_id must be a bson.binary.Binary with subtype 4')
  446. doc = encode({'v': value}, codec_options=self._codec_options)
  447. with _wrap_encryption_errors():
  448. encrypted_doc = self._encryption.encrypt(
  449. doc, algorithm, key_id=key_id, key_alt_name=key_alt_name)
  450. return decode(encrypted_doc)['v']
  451. def decrypt(self, value):
  452. """Decrypt an encrypted value.
  453. :Parameters:
  454. - `value` (Binary): The encrypted value, a
  455. :class:`~bson.binary.Binary` with subtype 6.
  456. :Returns:
  457. The decrypted BSON value.
  458. """
  459. self._check_closed()
  460. if not (isinstance(value, Binary) and value.subtype == 6):
  461. raise TypeError(
  462. 'value to decrypt must be a bson.binary.Binary with subtype 6')
  463. with _wrap_encryption_errors():
  464. doc = encode({'v': value})
  465. decrypted_doc = self._encryption.decrypt(doc)
  466. return decode(decrypted_doc,
  467. codec_options=self._codec_options)['v']
  468. def __enter__(self):
  469. return self
  470. def __exit__(self, exc_type, exc_val, exc_tb):
  471. self.close()
  472. def _check_closed(self):
  473. if self._encryption is None:
  474. raise InvalidOperation("Cannot use closed ClientEncryption")
  475. def close(self):
  476. """Release resources.
  477. Note that using this class in a with-statement will automatically call
  478. :meth:`close`::
  479. with ClientEncryption(...) as client_encryption:
  480. encrypted = client_encryption.encrypt(value, ...)
  481. decrypted = client_encryption.decrypt(encrypted)
  482. """
  483. if self._io_callbacks:
  484. self._io_callbacks.close()
  485. self._encryption.close()
  486. self._io_callbacks = None
  487. self._encryption = None