stacksampler.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. """
  2. Statistical profiling for long-running Python processes. This was built to work
  3. with gevent, but would probably work if you ran the emitter in a separate OS
  4. thread too.
  5. Example usage
  6. -------------
  7. Add
  8. >>> import gevent
  9. >>> gevent.spawn(run_profiler, '0.0.0.0', 16384)
  10. in your program to start the profiler, and run the emitter in a new greenlet.
  11. Then curl localhost:16384 to get a list of stack frames and call counts.
  12. """
  13. from __future__ import print_function
  14. import atexit
  15. import collections
  16. import signal
  17. import sys
  18. import time
  19. from werkzeug.serving import BaseWSGIServer, WSGIRequestHandler
  20. from werkzeug.wrappers import Request, Response
  21. try:
  22. from nylas.logging import get_logger
  23. logger = get_logger()
  24. except ImportError:
  25. class _Logger(object):
  26. def info(self, msg):
  27. print(msg, file = sys.stderr)
  28. logger = _Logger()
  29. class Sampler(object):
  30. """
  31. A simple stack sampler for low-overhead CPU profiling: samples the call
  32. stack every `interval` seconds and keeps track of counts by frame. Because
  33. this uses signals, it only works on the main thread.
  34. """
  35. def __init__(self, interval = 0.005):
  36. self.interval = interval
  37. self._started = None
  38. self._stack_counts = collections.defaultdict(int)
  39. def start(self):
  40. self._started = time.time()
  41. try:
  42. signal.signal(signal.SIGVTALRM, self._sample)
  43. except ValueError:
  44. raise ValueError('Can only sample on the main thread')
  45. signal.setitimer(signal.ITIMER_VIRTUAL, self.interval)
  46. atexit.register(self.stop)
  47. def _sample(self, signum, frame):
  48. stack = []
  49. while frame is not None:
  50. stack.append(self._format_frame(frame))
  51. frame = frame.f_back
  52. stack = ';'.join(reversed(stack))
  53. self._stack_counts[stack] += 1
  54. signal.setitimer(signal.ITIMER_VIRTUAL, self.interval)
  55. def _format_frame(self, frame):
  56. return '{}({})'.format(frame.f_code.co_name,
  57. frame.f_globals.get('__name__'))
  58. def output_stats(self):
  59. if self._started is None:
  60. return ''
  61. elapsed = time.time() - self._started
  62. lines = ['elapsed {}'.format(elapsed),
  63. 'granularity {}'.format(self.interval)]
  64. ordered_stacks = sorted(self._stack_counts.items(),
  65. key = lambda kv: kv[1], reverse = True)
  66. lines.extend(['{} {}'.format(frame, count)
  67. for frame, count in ordered_stacks])
  68. return '\n'.join(lines) + '\n'
  69. def reset(self):
  70. self._started = time.time()
  71. self._stack_counts = collections.defaultdict(int)
  72. def stop(self):
  73. self.reset()
  74. signal.setitimer(signal.ITIMER_VIRTUAL, 0)
  75. def __del__(self):
  76. self.stop()
  77. class Emitter(object):
  78. """A really basic HTTP server that listens on (host, port) and serves the
  79. process's profile data when requested. Resets internal sampling stats if
  80. reset=true is passed."""
  81. def __init__(self, sampler, host, port):
  82. self.sampler = sampler
  83. self.host = host
  84. self.port = port
  85. def handle_request(self, environ, start_response):
  86. stats = self.sampler.output_stats()
  87. request = Request(environ)
  88. if request.args.get('reset') in ('1', 'true'):
  89. self.sampler.reset()
  90. response = Response(stats)
  91. return response(environ, start_response)
  92. def run(self):
  93. server = BaseWSGIServer(self.host, self.port, self.handle_request,
  94. _QuietHandler)
  95. server.log = lambda *args, **kwargs: None
  96. logger.info('Serving profiles on port {}'.format(self.port))
  97. server.serve_forever()
  98. class _QuietHandler(WSGIRequestHandler):
  99. def log_request(self, *args, **kwargs):
  100. """Suppress request logging so as not to pollute application logs."""
  101. pass
  102. def run_profiler(host = '127.0.0.1', port = 16384):
  103. sampler = Sampler()
  104. sampler.start()
  105. e = Emitter(sampler, host, port)
  106. e.run()