session.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968
  1. """Session object for building, serializing, sending, and receiving messages.
  2. The Session object supports serialization, HMAC signatures,
  3. and metadata on messages.
  4. Also defined here are utilities for working with Sessions:
  5. * A SessionFactory to be used as a base class for configurables that work with
  6. Sessions.
  7. * A Message object for convenience that allows attribute-access to the msg dict.
  8. """
  9. # Copyright (c) Jupyter Development Team.
  10. # Distributed under the terms of the Modified BSD License.
  11. from binascii import b2a_hex
  12. import hashlib
  13. import hmac
  14. import logging
  15. import os
  16. import pprint
  17. import random
  18. import warnings
  19. from datetime import datetime
  20. try:
  21. import cPickle
  22. pickle = cPickle
  23. except:
  24. cPickle = None
  25. import pickle
  26. try:
  27. # py3
  28. PICKLE_PROTOCOL = pickle.DEFAULT_PROTOCOL
  29. except AttributeError:
  30. PICKLE_PROTOCOL = pickle.HIGHEST_PROTOCOL
  31. try:
  32. # We are using compare_digest to limit the surface of timing attacks
  33. from hmac import compare_digest
  34. except ImportError:
  35. # Python < 2.7.7: When digests don't match no feedback is provided,
  36. # limiting the surface of attack
  37. def compare_digest(a,b): return a == b
  38. try:
  39. from datetime import timezone
  40. utc = timezone.utc
  41. except ImportError:
  42. # Python 2
  43. from dateutil.tz import tzutc
  44. utc = tzutc()
  45. import zmq
  46. from zmq.utils import jsonapi
  47. from zmq.eventloop.ioloop import IOLoop
  48. from zmq.eventloop.zmqstream import ZMQStream
  49. from traitlets.config.configurable import Configurable, LoggingConfigurable
  50. from ipython_genutils.importstring import import_item
  51. from jupyter_client.jsonutil import extract_dates, squash_dates, date_default
  52. from ipython_genutils.py3compat import (str_to_bytes, str_to_unicode, unicode_type,
  53. iteritems)
  54. from traitlets import (CBytes, Unicode, Bool, Any, Instance, Set,
  55. DottedObjectName, CUnicode, Dict, Integer,
  56. TraitError,
  57. )
  58. from jupyter_client import protocol_version
  59. from jupyter_client.adapter import adapt
  60. from traitlets.log import get_logger
  61. #-----------------------------------------------------------------------------
  62. # utility functions
  63. #-----------------------------------------------------------------------------
  64. def squash_unicode(obj):
  65. """coerce unicode back to bytestrings."""
  66. if isinstance(obj,dict):
  67. for key in obj.keys():
  68. obj[key] = squash_unicode(obj[key])
  69. if isinstance(key, unicode_type):
  70. obj[squash_unicode(key)] = obj.pop(key)
  71. elif isinstance(obj, list):
  72. for i,v in enumerate(obj):
  73. obj[i] = squash_unicode(v)
  74. elif isinstance(obj, unicode_type):
  75. obj = obj.encode('utf8')
  76. return obj
  77. #-----------------------------------------------------------------------------
  78. # globals and defaults
  79. #-----------------------------------------------------------------------------
  80. # default values for the thresholds:
  81. MAX_ITEMS = 64
  82. MAX_BYTES = 1024
  83. # ISO8601-ify datetime objects
  84. # allow unicode
  85. # disallow nan, because it's not actually valid JSON
  86. json_packer = lambda obj: jsonapi.dumps(obj, default=date_default,
  87. ensure_ascii=False, allow_nan=False,
  88. )
  89. json_unpacker = lambda s: jsonapi.loads(s)
  90. pickle_packer = lambda o: pickle.dumps(squash_dates(o), PICKLE_PROTOCOL)
  91. pickle_unpacker = pickle.loads
  92. default_packer = json_packer
  93. default_unpacker = json_unpacker
  94. DELIM = b"<IDS|MSG>"
  95. # singleton dummy tracker, which will always report as done
  96. DONE = zmq.MessageTracker()
  97. #-----------------------------------------------------------------------------
  98. # Mixin tools for apps that use Sessions
  99. #-----------------------------------------------------------------------------
  100. def new_id():
  101. """Generate a new random id.
  102. Avoids problematic runtime import in stdlib uuid on Python 2.
  103. Returns
  104. -------
  105. id string (16 random bytes as hex-encoded text, chunks separated by '-')
  106. """
  107. buf = os.urandom(16)
  108. return u'-'.join(b2a_hex(x).decode('ascii') for x in (
  109. buf[:4], buf[4:]
  110. ))
  111. def new_id_bytes():
  112. """Return new_id as ascii bytes"""
  113. return new_id().encode('ascii')
  114. session_aliases = dict(
  115. ident = 'Session.session',
  116. user = 'Session.username',
  117. keyfile = 'Session.keyfile',
  118. )
  119. session_flags = {
  120. 'secure' : ({'Session' : { 'key' : new_id_bytes(),
  121. 'keyfile' : '' }},
  122. """Use HMAC digests for authentication of messages.
  123. Setting this flag will generate a new UUID to use as the HMAC key.
  124. """),
  125. 'no-secure' : ({'Session' : { 'key' : b'', 'keyfile' : '' }},
  126. """Don't authenticate messages."""),
  127. }
  128. def default_secure(cfg):
  129. """Set the default behavior for a config environment to be secure.
  130. If Session.key/keyfile have not been set, set Session.key to
  131. a new random UUID.
  132. """
  133. warnings.warn("default_secure is deprecated", DeprecationWarning)
  134. if 'Session' in cfg:
  135. if 'key' in cfg.Session or 'keyfile' in cfg.Session:
  136. return
  137. # key/keyfile not specified, generate new UUID:
  138. cfg.Session.key = new_id_bytes()
  139. def utcnow():
  140. """Return timezone-aware UTC timestamp"""
  141. return datetime.utcnow().replace(tzinfo=utc)
  142. #-----------------------------------------------------------------------------
  143. # Classes
  144. #-----------------------------------------------------------------------------
  145. class SessionFactory(LoggingConfigurable):
  146. """The Base class for configurables that have a Session, Context, logger,
  147. and IOLoop.
  148. """
  149. logname = Unicode('')
  150. def _logname_changed(self, name, old, new):
  151. self.log = logging.getLogger(new)
  152. # not configurable:
  153. context = Instance('zmq.Context')
  154. def _context_default(self):
  155. return zmq.Context()
  156. session = Instance('jupyter_client.session.Session',
  157. allow_none=True)
  158. loop = Instance('tornado.ioloop.IOLoop')
  159. def _loop_default(self):
  160. return IOLoop.current()
  161. def __init__(self, **kwargs):
  162. super(SessionFactory, self).__init__(**kwargs)
  163. if self.session is None:
  164. # construct the session
  165. self.session = Session(**kwargs)
  166. class Message(object):
  167. """A simple message object that maps dict keys to attributes.
  168. A Message can be created from a dict and a dict from a Message instance
  169. simply by calling dict(msg_obj)."""
  170. def __init__(self, msg_dict):
  171. dct = self.__dict__
  172. for k, v in iteritems(dict(msg_dict)):
  173. if isinstance(v, dict):
  174. v = Message(v)
  175. dct[k] = v
  176. # Having this iterator lets dict(msg_obj) work out of the box.
  177. def __iter__(self):
  178. return iter(iteritems(self.__dict__))
  179. def __repr__(self):
  180. return repr(self.__dict__)
  181. def __str__(self):
  182. return pprint.pformat(self.__dict__)
  183. def __contains__(self, k):
  184. return k in self.__dict__
  185. def __getitem__(self, k):
  186. return self.__dict__[k]
  187. def msg_header(msg_id, msg_type, username, session):
  188. """Create a new message header"""
  189. date = utcnow()
  190. version = protocol_version
  191. return locals()
  192. def extract_header(msg_or_header):
  193. """Given a message or header, return the header."""
  194. if not msg_or_header:
  195. return {}
  196. try:
  197. # See if msg_or_header is the entire message.
  198. h = msg_or_header['header']
  199. except KeyError:
  200. try:
  201. # See if msg_or_header is just the header
  202. h = msg_or_header['msg_id']
  203. except KeyError:
  204. raise
  205. else:
  206. h = msg_or_header
  207. if not isinstance(h, dict):
  208. h = dict(h)
  209. return h
  210. class Session(Configurable):
  211. """Object for handling serialization and sending of messages.
  212. The Session object handles building messages and sending them
  213. with ZMQ sockets or ZMQStream objects. Objects can communicate with each
  214. other over the network via Session objects, and only need to work with the
  215. dict-based IPython message spec. The Session will handle
  216. serialization/deserialization, security, and metadata.
  217. Sessions support configurable serialization via packer/unpacker traits,
  218. and signing with HMAC digests via the key/keyfile traits.
  219. Parameters
  220. ----------
  221. debug : bool
  222. whether to trigger extra debugging statements
  223. packer/unpacker : str : 'json', 'pickle' or import_string
  224. importstrings for methods to serialize message parts. If just
  225. 'json' or 'pickle', predefined JSON and pickle packers will be used.
  226. Otherwise, the entire importstring must be used.
  227. The functions must accept at least valid JSON input, and output *bytes*.
  228. For example, to use msgpack:
  229. packer = 'msgpack.packb', unpacker='msgpack.unpackb'
  230. pack/unpack : callables
  231. You can also set the pack/unpack callables for serialization directly.
  232. session : bytes
  233. the ID of this Session object. The default is to generate a new UUID.
  234. username : unicode
  235. username added to message headers. The default is to ask the OS.
  236. key : bytes
  237. The key used to initialize an HMAC signature. If unset, messages
  238. will not be signed or checked.
  239. keyfile : filepath
  240. The file containing a key. If this is set, `key` will be initialized
  241. to the contents of the file.
  242. """
  243. debug = Bool(False, config=True, help="""Debug output in the Session""")
  244. check_pid = Bool(True, config=True,
  245. help="""Whether to check PID to protect against calls after fork.
  246. This check can be disabled if fork-safety is handled elsewhere.
  247. """)
  248. packer = DottedObjectName('json',config=True,
  249. help="""The name of the packer for serializing messages.
  250. Should be one of 'json', 'pickle', or an import name
  251. for a custom callable serializer.""")
  252. def _packer_changed(self, name, old, new):
  253. if new.lower() == 'json':
  254. self.pack = json_packer
  255. self.unpack = json_unpacker
  256. self.unpacker = new
  257. elif new.lower() == 'pickle':
  258. self.pack = pickle_packer
  259. self.unpack = pickle_unpacker
  260. self.unpacker = new
  261. else:
  262. self.pack = import_item(str(new))
  263. unpacker = DottedObjectName('json', config=True,
  264. help="""The name of the unpacker for unserializing messages.
  265. Only used with custom functions for `packer`.""")
  266. def _unpacker_changed(self, name, old, new):
  267. if new.lower() == 'json':
  268. self.pack = json_packer
  269. self.unpack = json_unpacker
  270. self.packer = new
  271. elif new.lower() == 'pickle':
  272. self.pack = pickle_packer
  273. self.unpack = pickle_unpacker
  274. self.packer = new
  275. else:
  276. self.unpack = import_item(str(new))
  277. session = CUnicode(u'', config=True,
  278. help="""The UUID identifying this session.""")
  279. def _session_default(self):
  280. u = new_id()
  281. self.bsession = u.encode('ascii')
  282. return u
  283. def _session_changed(self, name, old, new):
  284. self.bsession = self.session.encode('ascii')
  285. # bsession is the session as bytes
  286. bsession = CBytes(b'')
  287. username = Unicode(str_to_unicode(os.environ.get('USER', 'username')),
  288. help="""Username for the Session. Default is your system username.""",
  289. config=True)
  290. metadata = Dict({}, config=True,
  291. help="""Metadata dictionary, which serves as the default top-level metadata dict for each message.""")
  292. # if 0, no adapting to do.
  293. adapt_version = Integer(0)
  294. # message signature related traits:
  295. key = CBytes(config=True,
  296. help="""execution key, for signing messages.""")
  297. def _key_default(self):
  298. return new_id_bytes()
  299. def _key_changed(self):
  300. self._new_auth()
  301. signature_scheme = Unicode('hmac-sha256', config=True,
  302. help="""The digest scheme used to construct the message signatures.
  303. Must have the form 'hmac-HASH'.""")
  304. def _signature_scheme_changed(self, name, old, new):
  305. if not new.startswith('hmac-'):
  306. raise TraitError("signature_scheme must start with 'hmac-', got %r" % new)
  307. hash_name = new.split('-', 1)[1]
  308. try:
  309. self.digest_mod = getattr(hashlib, hash_name)
  310. except AttributeError:
  311. raise TraitError("hashlib has no such attribute: %s" % hash_name)
  312. self._new_auth()
  313. digest_mod = Any()
  314. def _digest_mod_default(self):
  315. return hashlib.sha256
  316. auth = Instance(hmac.HMAC, allow_none=True)
  317. def _new_auth(self):
  318. if self.key:
  319. self.auth = hmac.HMAC(self.key, digestmod=self.digest_mod)
  320. else:
  321. self.auth = None
  322. digest_history = Set()
  323. digest_history_size = Integer(2**16, config=True,
  324. help="""The maximum number of digests to remember.
  325. The digest history will be culled when it exceeds this value.
  326. """
  327. )
  328. keyfile = Unicode('', config=True,
  329. help="""path to file containing execution key.""")
  330. def _keyfile_changed(self, name, old, new):
  331. with open(new, 'rb') as f:
  332. self.key = f.read().strip()
  333. # for protecting against sends from forks
  334. pid = Integer()
  335. # serialization traits:
  336. pack = Any(default_packer) # the actual packer function
  337. def _pack_changed(self, name, old, new):
  338. if not callable(new):
  339. raise TypeError("packer must be callable, not %s"%type(new))
  340. unpack = Any(default_unpacker) # the actual packer function
  341. def _unpack_changed(self, name, old, new):
  342. # unpacker is not checked - it is assumed to be
  343. if not callable(new):
  344. raise TypeError("unpacker must be callable, not %s"%type(new))
  345. # thresholds:
  346. copy_threshold = Integer(2**16, config=True,
  347. help="Threshold (in bytes) beyond which a buffer should be sent without copying.")
  348. buffer_threshold = Integer(MAX_BYTES, config=True,
  349. help="Threshold (in bytes) beyond which an object's buffer should be extracted to avoid pickling.")
  350. item_threshold = Integer(MAX_ITEMS, config=True,
  351. help="""The maximum number of items for a container to be introspected for custom serialization.
  352. Containers larger than this are pickled outright.
  353. """
  354. )
  355. def __init__(self, **kwargs):
  356. """create a Session object
  357. Parameters
  358. ----------
  359. debug : bool
  360. whether to trigger extra debugging statements
  361. packer/unpacker : str : 'json', 'pickle' or import_string
  362. importstrings for methods to serialize message parts. If just
  363. 'json' or 'pickle', predefined JSON and pickle packers will be used.
  364. Otherwise, the entire importstring must be used.
  365. The functions must accept at least valid JSON input, and output
  366. *bytes*.
  367. For example, to use msgpack:
  368. packer = 'msgpack.packb', unpacker='msgpack.unpackb'
  369. pack/unpack : callables
  370. You can also set the pack/unpack callables for serialization
  371. directly.
  372. session : unicode (must be ascii)
  373. the ID of this Session object. The default is to generate a new
  374. UUID.
  375. bsession : bytes
  376. The session as bytes
  377. username : unicode
  378. username added to message headers. The default is to ask the OS.
  379. key : bytes
  380. The key used to initialize an HMAC signature. If unset, messages
  381. will not be signed or checked.
  382. signature_scheme : str
  383. The message digest scheme. Currently must be of the form 'hmac-HASH',
  384. where 'HASH' is a hashing function available in Python's hashlib.
  385. The default is 'hmac-sha256'.
  386. This is ignored if 'key' is empty.
  387. keyfile : filepath
  388. The file containing a key. If this is set, `key` will be
  389. initialized to the contents of the file.
  390. """
  391. super(Session, self).__init__(**kwargs)
  392. self._check_packers()
  393. self.none = self.pack({})
  394. # ensure self._session_default() if necessary, so bsession is defined:
  395. self.session
  396. self.pid = os.getpid()
  397. self._new_auth()
  398. if not self.key:
  399. get_logger().warning("Message signing is disabled. This is insecure and not recommended!")
  400. def clone(self):
  401. """Create a copy of this Session
  402. Useful when connecting multiple times to a given kernel.
  403. This prevents a shared digest_history warning about duplicate digests
  404. due to multiple connections to IOPub in the same process.
  405. .. versionadded:: 5.1
  406. """
  407. # make a copy
  408. new_session = type(self)()
  409. for name in self.traits():
  410. setattr(new_session, name, getattr(self, name))
  411. # fork digest_history
  412. new_session.digest_history = set()
  413. new_session.digest_history.update(self.digest_history)
  414. return new_session
  415. @property
  416. def msg_id(self):
  417. """always return new uuid"""
  418. return new_id()
  419. def _check_packers(self):
  420. """check packers for datetime support."""
  421. pack = self.pack
  422. unpack = self.unpack
  423. # check simple serialization
  424. msg = dict(a=[1,'hi'])
  425. try:
  426. packed = pack(msg)
  427. except Exception as e:
  428. msg = "packer '{packer}' could not serialize a simple message: {e}{jsonmsg}"
  429. if self.packer == 'json':
  430. jsonmsg = "\nzmq.utils.jsonapi.jsonmod = %s" % jsonapi.jsonmod
  431. else:
  432. jsonmsg = ""
  433. raise ValueError(
  434. msg.format(packer=self.packer, e=e, jsonmsg=jsonmsg)
  435. )
  436. # ensure packed message is bytes
  437. if not isinstance(packed, bytes):
  438. raise ValueError("message packed to %r, but bytes are required"%type(packed))
  439. # check that unpack is pack's inverse
  440. try:
  441. unpacked = unpack(packed)
  442. assert unpacked == msg
  443. except Exception as e:
  444. msg = "unpacker '{unpacker}' could not handle output from packer '{packer}': {e}{jsonmsg}"
  445. if self.packer == 'json':
  446. jsonmsg = "\nzmq.utils.jsonapi.jsonmod = %s" % jsonapi.jsonmod
  447. else:
  448. jsonmsg = ""
  449. raise ValueError(
  450. msg.format(packer=self.packer, unpacker=self.unpacker, e=e, jsonmsg=jsonmsg)
  451. )
  452. # check datetime support
  453. msg = dict(t=utcnow())
  454. try:
  455. unpacked = unpack(pack(msg))
  456. if isinstance(unpacked['t'], datetime):
  457. raise ValueError("Shouldn't deserialize to datetime")
  458. except Exception:
  459. self.pack = lambda o: pack(squash_dates(o))
  460. self.unpack = lambda s: unpack(s)
  461. def msg_header(self, msg_type):
  462. return msg_header(self.msg_id, msg_type, self.username, self.session)
  463. def msg(self, msg_type, content=None, parent=None, header=None, metadata=None):
  464. """Return the nested message dict.
  465. This format is different from what is sent over the wire. The
  466. serialize/deserialize methods converts this nested message dict to the wire
  467. format, which is a list of message parts.
  468. """
  469. msg = {}
  470. header = self.msg_header(msg_type) if header is None else header
  471. msg['header'] = header
  472. msg['msg_id'] = header['msg_id']
  473. msg['msg_type'] = header['msg_type']
  474. msg['parent_header'] = {} if parent is None else extract_header(parent)
  475. msg['content'] = {} if content is None else content
  476. msg['metadata'] = self.metadata.copy()
  477. if metadata is not None:
  478. msg['metadata'].update(metadata)
  479. return msg
  480. def sign(self, msg_list):
  481. """Sign a message with HMAC digest. If no auth, return b''.
  482. Parameters
  483. ----------
  484. msg_list : list
  485. The [p_header,p_parent,p_content] part of the message list.
  486. """
  487. if self.auth is None:
  488. return b''
  489. h = self.auth.copy()
  490. for m in msg_list:
  491. h.update(m)
  492. return str_to_bytes(h.hexdigest())
  493. def serialize(self, msg, ident=None):
  494. """Serialize the message components to bytes.
  495. This is roughly the inverse of deserialize. The serialize/deserialize
  496. methods work with full message lists, whereas pack/unpack work with
  497. the individual message parts in the message list.
  498. Parameters
  499. ----------
  500. msg : dict or Message
  501. The next message dict as returned by the self.msg method.
  502. Returns
  503. -------
  504. msg_list : list
  505. The list of bytes objects to be sent with the format::
  506. [ident1, ident2, ..., DELIM, HMAC, p_header, p_parent,
  507. p_metadata, p_content, buffer1, buffer2, ...]
  508. In this list, the ``p_*`` entities are the packed or serialized
  509. versions, so if JSON is used, these are utf8 encoded JSON strings.
  510. """
  511. content = msg.get('content', {})
  512. if content is None:
  513. content = self.none
  514. elif isinstance(content, dict):
  515. content = self.pack(content)
  516. elif isinstance(content, bytes):
  517. # content is already packed, as in a relayed message
  518. pass
  519. elif isinstance(content, unicode_type):
  520. # should be bytes, but JSON often spits out unicode
  521. content = content.encode('utf8')
  522. else:
  523. raise TypeError("Content incorrect type: %s"%type(content))
  524. real_message = [self.pack(msg['header']),
  525. self.pack(msg['parent_header']),
  526. self.pack(msg['metadata']),
  527. content,
  528. ]
  529. to_send = []
  530. if isinstance(ident, list):
  531. # accept list of idents
  532. to_send.extend(ident)
  533. elif ident is not None:
  534. to_send.append(ident)
  535. to_send.append(DELIM)
  536. signature = self.sign(real_message)
  537. to_send.append(signature)
  538. to_send.extend(real_message)
  539. return to_send
  540. def send(self, stream, msg_or_type, content=None, parent=None, ident=None,
  541. buffers=None, track=False, header=None, metadata=None):
  542. """Build and send a message via stream or socket.
  543. The message format used by this function internally is as follows:
  544. [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
  545. buffer1,buffer2,...]
  546. The serialize/deserialize methods convert the nested message dict into this
  547. format.
  548. Parameters
  549. ----------
  550. stream : zmq.Socket or ZMQStream
  551. The socket-like object used to send the data.
  552. msg_or_type : str or Message/dict
  553. Normally, msg_or_type will be a msg_type unless a message is being
  554. sent more than once. If a header is supplied, this can be set to
  555. None and the msg_type will be pulled from the header.
  556. content : dict or None
  557. The content of the message (ignored if msg_or_type is a message).
  558. header : dict or None
  559. The header dict for the message (ignored if msg_to_type is a message).
  560. parent : Message or dict or None
  561. The parent or parent header describing the parent of this message
  562. (ignored if msg_or_type is a message).
  563. ident : bytes or list of bytes
  564. The zmq.IDENTITY routing path.
  565. metadata : dict or None
  566. The metadata describing the message
  567. buffers : list or None
  568. The already-serialized buffers to be appended to the message.
  569. track : bool
  570. Whether to track. Only for use with Sockets, because ZMQStream
  571. objects cannot track messages.
  572. Returns
  573. -------
  574. msg : dict
  575. The constructed message.
  576. """
  577. if not isinstance(stream, zmq.Socket):
  578. # ZMQStreams and dummy sockets do not support tracking.
  579. track = False
  580. if isinstance(msg_or_type, (Message, dict)):
  581. # We got a Message or message dict, not a msg_type so don't
  582. # build a new Message.
  583. msg = msg_or_type
  584. buffers = buffers or msg.get('buffers', [])
  585. else:
  586. msg = self.msg(msg_or_type, content=content, parent=parent,
  587. header=header, metadata=metadata)
  588. if self.check_pid and not os.getpid() == self.pid:
  589. get_logger().warning("WARNING: attempted to send message from fork\n%s",
  590. msg
  591. )
  592. return
  593. buffers = [] if buffers is None else buffers
  594. for idx, buf in enumerate(buffers):
  595. if isinstance(buf, memoryview):
  596. view = buf
  597. else:
  598. try:
  599. # check to see if buf supports the buffer protocol.
  600. view = memoryview(buf)
  601. except TypeError:
  602. raise TypeError("Buffer objects must support the buffer protocol.")
  603. # memoryview.contiguous is new in 3.3,
  604. # just skip the check on Python 2
  605. if hasattr(view, 'contiguous') and not view.contiguous:
  606. # zmq requires memoryviews to be contiguous
  607. raise ValueError("Buffer %i (%r) is not contiguous" % (idx, buf))
  608. if self.adapt_version:
  609. msg = adapt(msg, self.adapt_version)
  610. to_send = self.serialize(msg, ident)
  611. to_send.extend(buffers)
  612. longest = max([ len(s) for s in to_send ])
  613. copy = (longest < self.copy_threshold)
  614. if buffers and track and not copy:
  615. # only really track when we are doing zero-copy buffers
  616. tracker = stream.send_multipart(to_send, copy=False, track=True)
  617. else:
  618. # use dummy tracker, which will be done immediately
  619. tracker = DONE
  620. stream.send_multipart(to_send, copy=copy)
  621. if self.debug:
  622. pprint.pprint(msg)
  623. pprint.pprint(to_send)
  624. pprint.pprint(buffers)
  625. msg['tracker'] = tracker
  626. return msg
  627. def send_raw(self, stream, msg_list, flags=0, copy=True, ident=None):
  628. """Send a raw message via ident path.
  629. This method is used to send a already serialized message.
  630. Parameters
  631. ----------
  632. stream : ZMQStream or Socket
  633. The ZMQ stream or socket to use for sending the message.
  634. msg_list : list
  635. The serialized list of messages to send. This only includes the
  636. [p_header,p_parent,p_metadata,p_content,buffer1,buffer2,...] portion of
  637. the message.
  638. ident : ident or list
  639. A single ident or a list of idents to use in sending.
  640. """
  641. to_send = []
  642. if isinstance(ident, bytes):
  643. ident = [ident]
  644. if ident is not None:
  645. to_send.extend(ident)
  646. to_send.append(DELIM)
  647. to_send.append(self.sign(msg_list))
  648. to_send.extend(msg_list)
  649. stream.send_multipart(to_send, flags, copy=copy)
  650. def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True):
  651. """Receive and unpack a message.
  652. Parameters
  653. ----------
  654. socket : ZMQStream or Socket
  655. The socket or stream to use in receiving.
  656. Returns
  657. -------
  658. [idents], msg
  659. [idents] is a list of idents and msg is a nested message dict of
  660. same format as self.msg returns.
  661. """
  662. if isinstance(socket, ZMQStream):
  663. socket = socket.socket
  664. try:
  665. msg_list = socket.recv_multipart(mode, copy=copy)
  666. except zmq.ZMQError as e:
  667. if e.errno == zmq.EAGAIN:
  668. # We can convert EAGAIN to None as we know in this case
  669. # recv_multipart won't return None.
  670. return None,None
  671. else:
  672. raise
  673. # split multipart message into identity list and message dict
  674. # invalid large messages can cause very expensive string comparisons
  675. idents, msg_list = self.feed_identities(msg_list, copy)
  676. try:
  677. return idents, self.deserialize(msg_list, content=content, copy=copy)
  678. except Exception as e:
  679. # TODO: handle it
  680. raise e
  681. def feed_identities(self, msg_list, copy=True):
  682. """Split the identities from the rest of the message.
  683. Feed until DELIM is reached, then return the prefix as idents and
  684. remainder as msg_list. This is easily broken by setting an IDENT to DELIM,
  685. but that would be silly.
  686. Parameters
  687. ----------
  688. msg_list : a list of Message or bytes objects
  689. The message to be split.
  690. copy : bool
  691. flag determining whether the arguments are bytes or Messages
  692. Returns
  693. -------
  694. (idents, msg_list) : two lists
  695. idents will always be a list of bytes, each of which is a ZMQ
  696. identity. msg_list will be a list of bytes or zmq.Messages of the
  697. form [HMAC,p_header,p_parent,p_content,buffer1,buffer2,...] and
  698. should be unpackable/unserializable via self.deserialize at this
  699. point.
  700. """
  701. if copy:
  702. idx = msg_list.index(DELIM)
  703. return msg_list[:idx], msg_list[idx+1:]
  704. else:
  705. failed = True
  706. for idx,m in enumerate(msg_list):
  707. if m.bytes == DELIM:
  708. failed = False
  709. break
  710. if failed:
  711. raise ValueError("DELIM not in msg_list")
  712. idents, msg_list = msg_list[:idx], msg_list[idx+1:]
  713. return [m.bytes for m in idents], msg_list
  714. def _add_digest(self, signature):
  715. """add a digest to history to protect against replay attacks"""
  716. if self.digest_history_size == 0:
  717. # no history, never add digests
  718. return
  719. self.digest_history.add(signature)
  720. if len(self.digest_history) > self.digest_history_size:
  721. # threshold reached, cull 10%
  722. self._cull_digest_history()
  723. def _cull_digest_history(self):
  724. """cull the digest history
  725. Removes a randomly selected 10% of the digest history
  726. """
  727. current = len(self.digest_history)
  728. n_to_cull = max(int(current // 10), current - self.digest_history_size)
  729. if n_to_cull >= current:
  730. self.digest_history = set()
  731. return
  732. to_cull = random.sample(self.digest_history, n_to_cull)
  733. self.digest_history.difference_update(to_cull)
  734. def deserialize(self, msg_list, content=True, copy=True):
  735. """Unserialize a msg_list to a nested message dict.
  736. This is roughly the inverse of serialize. The serialize/deserialize
  737. methods work with full message lists, whereas pack/unpack work with
  738. the individual message parts in the message list.
  739. Parameters
  740. ----------
  741. msg_list : list of bytes or Message objects
  742. The list of message parts of the form [HMAC,p_header,p_parent,
  743. p_metadata,p_content,buffer1,buffer2,...].
  744. content : bool (True)
  745. Whether to unpack the content dict (True), or leave it packed
  746. (False).
  747. copy : bool (True)
  748. Whether msg_list contains bytes (True) or the non-copying Message
  749. objects in each place (False).
  750. Returns
  751. -------
  752. msg : dict
  753. The nested message dict with top-level keys [header, parent_header,
  754. content, buffers]. The buffers are returned as memoryviews.
  755. """
  756. minlen = 5
  757. message = {}
  758. if not copy:
  759. # pyzmq didn't copy the first parts of the message, so we'll do it
  760. for i in range(minlen):
  761. msg_list[i] = msg_list[i].bytes
  762. if self.auth is not None:
  763. signature = msg_list[0]
  764. if not signature:
  765. raise ValueError("Unsigned Message")
  766. if signature in self.digest_history:
  767. raise ValueError("Duplicate Signature: %r" % signature)
  768. if content:
  769. # Only store signature if we are unpacking content, don't store if just peeking.
  770. self._add_digest(signature)
  771. check = self.sign(msg_list[1:5])
  772. if not compare_digest(signature, check):
  773. raise ValueError("Invalid Signature: %r" % signature)
  774. if not len(msg_list) >= minlen:
  775. raise TypeError("malformed message, must have at least %i elements"%minlen)
  776. header = self.unpack(msg_list[1])
  777. message['header'] = extract_dates(header)
  778. message['msg_id'] = header['msg_id']
  779. message['msg_type'] = header['msg_type']
  780. message['parent_header'] = extract_dates(self.unpack(msg_list[2]))
  781. message['metadata'] = self.unpack(msg_list[3])
  782. if content:
  783. message['content'] = self.unpack(msg_list[4])
  784. else:
  785. message['content'] = msg_list[4]
  786. buffers = [memoryview(b) for b in msg_list[5:]]
  787. if buffers and buffers[0].shape is None:
  788. # force copy to workaround pyzmq #646
  789. buffers = [memoryview(b.bytes) for b in msg_list[5:]]
  790. message['buffers'] = buffers
  791. if self.debug:
  792. pprint.pprint(message)
  793. # adapt to the current version
  794. return adapt(message)
  795. def unserialize(self, *args, **kwargs):
  796. warnings.warn(
  797. "Session.unserialize is deprecated. Use Session.deserialize.",
  798. DeprecationWarning,
  799. )
  800. return self.deserialize(*args, **kwargs)
  801. def test_msg2obj():
  802. am = dict(x=1)
  803. ao = Message(am)
  804. assert ao.x == am['x']
  805. am['y'] = dict(z=1)
  806. ao = Message(am)
  807. assert ao.y.z == am['y']['z']
  808. k1, k2 = 'y', 'z'
  809. assert ao[k1][k2] == am[k1][k2]
  810. am2 = dict(ao)
  811. assert am['x'] == am2['x']
  812. assert am['y']['z'] == am2['y']['z']