bench.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. import sys
  2. import time
  3. import subprocess
  4. from six.moves.urllib.parse import urlencode
  5. import scrapy
  6. from scrapy.commands import ScrapyCommand
  7. from scrapy.linkextractors import LinkExtractor
  8. class Command(ScrapyCommand):
  9. default_settings = {
  10. 'LOG_LEVEL': 'INFO',
  11. 'LOGSTATS_INTERVAL': 1,
  12. 'CLOSESPIDER_TIMEOUT': 10,
  13. }
  14. def short_desc(self):
  15. return "Run quick benchmark test"
  16. def run(self, args, opts):
  17. with _BenchServer():
  18. self.crawler_process.crawl(_BenchSpider, total=100000)
  19. self.crawler_process.start()
  20. class _BenchServer(object):
  21. def __enter__(self):
  22. from scrapy.utils.test import get_testenv
  23. pargs = [sys.executable, '-u', '-m', 'scrapy.utils.benchserver']
  24. self.proc = subprocess.Popen(pargs, stdout=subprocess.PIPE,
  25. env=get_testenv())
  26. self.proc.stdout.readline()
  27. def __exit__(self, exc_type, exc_value, traceback):
  28. self.proc.kill()
  29. self.proc.wait()
  30. time.sleep(0.2)
  31. class _BenchSpider(scrapy.Spider):
  32. """A spider that follows all links"""
  33. name = 'follow'
  34. total = 10000
  35. show = 20
  36. baseurl = 'http://localhost:8998'
  37. link_extractor = LinkExtractor()
  38. def start_requests(self):
  39. qargs = {'total': self.total, 'show': self.show}
  40. url = '{}?{}'.format(self.baseurl, urlencode(qargs, doseq=1))
  41. return [scrapy.Request(url, dont_filter=True)]
  42. def parse(self, response):
  43. for link in self.link_extractor.extract_links(response):
  44. yield scrapy.Request(link.url, callback=self.parse)