123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 |
- """
- Statistical profiling for long-running Python processes. This was built to work
- with gevent, but would probably work if you ran the emitter in a separate OS
- thread too.
- Example usage
- -------------
- Add
- >>> import gevent
- >>> gevent.spawn(run_profiler, '0.0.0.0', 16384)
- in your program to start the profiler, and run the emitter in a new greenlet.
- Then curl localhost:16384 to get a list of stack frames and call counts.
- """
- from __future__ import print_function
- import atexit
- import collections
- import signal
- import sys
- import time
- from werkzeug.serving import BaseWSGIServer, WSGIRequestHandler
- from werkzeug.wrappers import Request, Response
- try:
- from nylas.logging import get_logger
- logger = get_logger()
- except ImportError:
- class _Logger(object):
- def info(self, msg):
- print(msg, file = sys.stderr)
- logger = _Logger()
- class Sampler(object):
- """
- A simple stack sampler for low-overhead CPU profiling: samples the call
- stack every `interval` seconds and keeps track of counts by frame. Because
- this uses signals, it only works on the main thread.
- """
- def __init__(self, interval = 0.005):
- self.interval = interval
- self._started = None
- self._stack_counts = collections.defaultdict(int)
- def start(self):
- self._started = time.time()
- try:
- signal.signal(signal.SIGVTALRM, self._sample)
- except ValueError:
- raise ValueError('Can only sample on the main thread')
- signal.setitimer(signal.ITIMER_VIRTUAL, self.interval)
- atexit.register(self.stop)
- def _sample(self, signum, frame):
- stack = []
- while frame is not None:
- stack.append(self._format_frame(frame))
- frame = frame.f_back
- stack = ';'.join(reversed(stack))
- self._stack_counts[stack] += 1
- signal.setitimer(signal.ITIMER_VIRTUAL, self.interval)
- def _format_frame(self, frame):
- return '{}({})'.format(frame.f_code.co_name,
- frame.f_globals.get('__name__'))
- def output_stats(self):
- if self._started is None:
- return ''
- elapsed = time.time() - self._started
- lines = ['elapsed {}'.format(elapsed),
- 'granularity {}'.format(self.interval)]
- ordered_stacks = sorted(self._stack_counts.items(),
- key = lambda kv: kv[1], reverse = True)
- lines.extend(['{} {}'.format(frame, count)
- for frame, count in ordered_stacks])
- return '\n'.join(lines) + '\n'
- def reset(self):
- self._started = time.time()
- self._stack_counts = collections.defaultdict(int)
- def stop(self):
- self.reset()
- signal.setitimer(signal.ITIMER_VIRTUAL, 0)
- def __del__(self):
- self.stop()
- class Emitter(object):
- """A really basic HTTP server that listens on (host, port) and serves the
- process's profile data when requested. Resets internal sampling stats if
- reset=true is passed."""
- def __init__(self, sampler, host, port):
- self.sampler = sampler
- self.host = host
- self.port = port
- def handle_request(self, environ, start_response):
- stats = self.sampler.output_stats()
- request = Request(environ)
- if request.args.get('reset') in ('1', 'true'):
- self.sampler.reset()
- response = Response(stats)
- return response(environ, start_response)
- def run(self):
- server = BaseWSGIServer(self.host, self.port, self.handle_request,
- _QuietHandler)
- server.log = lambda *args, **kwargs: None
- logger.info('Serving profiles on port {}'.format(self.port))
- server.serve_forever()
- class _QuietHandler(WSGIRequestHandler):
- def log_request(self, *args, **kwargs):
- """Suppress request logging so as not to pollute application logs."""
- pass
- def run_profiler(host = '127.0.0.1', port = 16384):
- sampler = Sampler()
- sampler.start()
- e = Emitter(sampler, host, port)
- e.run()
|