test.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. """
  2. This module contains some assorted functions used in tests
  3. """
  4. from __future__ import absolute_import
  5. import os
  6. from importlib import import_module
  7. from twisted.trial.unittest import SkipTest
  8. from scrapy.exceptions import NotConfigured
  9. from scrapy.utils.boto import is_botocore
  10. def assert_aws_environ():
  11. """Asserts the current environment is suitable for running AWS testsi.
  12. Raises SkipTest with the reason if it's not.
  13. """
  14. skip_if_no_boto()
  15. if 'AWS_ACCESS_KEY_ID' not in os.environ:
  16. raise SkipTest("AWS keys not found")
  17. def assert_gcs_environ():
  18. if 'GCS_PROJECT_ID' not in os.environ:
  19. raise SkipTest("GCS_PROJECT_ID not found")
  20. def skip_if_no_boto():
  21. try:
  22. is_botocore()
  23. except NotConfigured as e:
  24. raise SkipTest(e)
  25. def get_s3_content_and_delete(bucket, path, with_key=False):
  26. """ Get content from s3 key, and delete key afterwards.
  27. """
  28. if is_botocore():
  29. import botocore.session
  30. session = botocore.session.get_session()
  31. client = session.create_client('s3')
  32. key = client.get_object(Bucket=bucket, Key=path)
  33. content = key['Body'].read()
  34. client.delete_object(Bucket=bucket, Key=path)
  35. else:
  36. import boto
  37. # assuming boto=2.2.2
  38. bucket = boto.connect_s3().get_bucket(bucket, validate=False)
  39. key = bucket.get_key(path)
  40. content = key.get_contents_as_string()
  41. bucket.delete_key(path)
  42. return (content, key) if with_key else content
  43. def get_gcs_content_and_delete(bucket, path):
  44. from google.cloud import storage
  45. client = storage.Client(project=os.environ.get('GCS_PROJECT_ID'))
  46. bucket = client.get_bucket(bucket)
  47. blob = bucket.get_blob(path)
  48. content = blob.download_as_string()
  49. acl = list(blob.acl) # loads acl before it will be deleted
  50. bucket.delete_blob(path)
  51. return content, acl, blob
  52. def get_crawler(spidercls=None, settings_dict=None):
  53. """Return an unconfigured Crawler object. If settings_dict is given, it
  54. will be used to populate the crawler settings with a project level
  55. priority.
  56. """
  57. from scrapy.crawler import CrawlerRunner
  58. from scrapy.spiders import Spider
  59. runner = CrawlerRunner(settings_dict)
  60. return runner.create_crawler(spidercls or Spider)
  61. def get_pythonpath():
  62. """Return a PYTHONPATH suitable to use in processes so that they find this
  63. installation of Scrapy"""
  64. scrapy_path = import_module('scrapy').__path__[0]
  65. return os.path.dirname(scrapy_path) + os.pathsep + os.environ.get('PYTHONPATH', '')
  66. def get_testenv():
  67. """Return a OS environment dict suitable to fork processes that need to import
  68. this installation of Scrapy, instead of a system installed one.
  69. """
  70. env = os.environ.copy()
  71. env['PYTHONPATH'] = get_pythonpath()
  72. return env
  73. def assert_samelines(testcase, text1, text2, msg=None):
  74. """Asserts text1 and text2 have the same lines, ignoring differences in
  75. line endings between platforms
  76. """
  77. testcase.assertEqual(text1.splitlines(), text2.splitlines(), msg)