useragent.py 749 B

1234567891011121314151617181920212223
  1. """Set User-Agent header per spider or use a default value from settings"""
  2. from scrapy import signals
  3. class UserAgentMiddleware(object):
  4. """This middleware allows spiders to override the user_agent"""
  5. def __init__(self, user_agent='Scrapy'):
  6. self.user_agent = user_agent
  7. @classmethod
  8. def from_crawler(cls, crawler):
  9. o = cls(crawler.settings['USER_AGENT'])
  10. crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
  11. return o
  12. def spider_opened(self, spider):
  13. self.user_agent = getattr(spider, 'user_agent', self.user_agent)
  14. def process_request(self, request, spider):
  15. if self.user_agent:
  16. request.headers.setdefault(b'User-Agent', self.user_agent)