test__datasource.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. from __future__ import division, absolute_import, print_function
  2. import os
  3. import sys
  4. import pytest
  5. from tempfile import mkdtemp, mkstemp, NamedTemporaryFile
  6. from shutil import rmtree
  7. import numpy.lib._datasource as datasource
  8. from numpy.testing import (
  9. assert_, assert_equal, assert_raises, assert_warns
  10. )
  11. if sys.version_info[0] >= 3:
  12. import urllib.request as urllib_request
  13. from urllib.parse import urlparse
  14. from urllib.error import URLError
  15. else:
  16. import urllib2 as urllib_request
  17. from urlparse import urlparse
  18. from urllib2 import URLError
  19. def urlopen_stub(url, data=None):
  20. '''Stub to replace urlopen for testing.'''
  21. if url == valid_httpurl():
  22. tmpfile = NamedTemporaryFile(prefix='urltmp_')
  23. return tmpfile
  24. else:
  25. raise URLError('Name or service not known')
  26. # setup and teardown
  27. old_urlopen = None
  28. def setup_module():
  29. global old_urlopen
  30. old_urlopen = urllib_request.urlopen
  31. urllib_request.urlopen = urlopen_stub
  32. def teardown_module():
  33. urllib_request.urlopen = old_urlopen
  34. # A valid website for more robust testing
  35. http_path = 'http://www.google.com/'
  36. http_file = 'index.html'
  37. http_fakepath = 'http://fake.abc.web/site/'
  38. http_fakefile = 'fake.txt'
  39. malicious_files = ['/etc/shadow', '../../shadow',
  40. '..\\system.dat', 'c:\\windows\\system.dat']
  41. magic_line = b'three is the magic number'
  42. # Utility functions used by many tests
  43. def valid_textfile(filedir):
  44. # Generate and return a valid temporary file.
  45. fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir, text=True)
  46. os.close(fd)
  47. return path
  48. def invalid_textfile(filedir):
  49. # Generate and return an invalid filename.
  50. fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir)
  51. os.close(fd)
  52. os.remove(path)
  53. return path
  54. def valid_httpurl():
  55. return http_path+http_file
  56. def invalid_httpurl():
  57. return http_fakepath+http_fakefile
  58. def valid_baseurl():
  59. return http_path
  60. def invalid_baseurl():
  61. return http_fakepath
  62. def valid_httpfile():
  63. return http_file
  64. def invalid_httpfile():
  65. return http_fakefile
  66. class TestDataSourceOpen(object):
  67. def setup(self):
  68. self.tmpdir = mkdtemp()
  69. self.ds = datasource.DataSource(self.tmpdir)
  70. def teardown(self):
  71. rmtree(self.tmpdir)
  72. del self.ds
  73. def test_ValidHTTP(self):
  74. fh = self.ds.open(valid_httpurl())
  75. assert_(fh)
  76. fh.close()
  77. def test_InvalidHTTP(self):
  78. url = invalid_httpurl()
  79. assert_raises(IOError, self.ds.open, url)
  80. try:
  81. self.ds.open(url)
  82. except IOError as e:
  83. # Regression test for bug fixed in r4342.
  84. assert_(e.errno is None)
  85. def test_InvalidHTTPCacheURLError(self):
  86. assert_raises(URLError, self.ds._cache, invalid_httpurl())
  87. def test_ValidFile(self):
  88. local_file = valid_textfile(self.tmpdir)
  89. fh = self.ds.open(local_file)
  90. assert_(fh)
  91. fh.close()
  92. def test_InvalidFile(self):
  93. invalid_file = invalid_textfile(self.tmpdir)
  94. assert_raises(IOError, self.ds.open, invalid_file)
  95. def test_ValidGzipFile(self):
  96. try:
  97. import gzip
  98. except ImportError:
  99. # We don't have the gzip capabilities to test.
  100. pytest.skip()
  101. # Test datasource's internal file_opener for Gzip files.
  102. filepath = os.path.join(self.tmpdir, 'foobar.txt.gz')
  103. fp = gzip.open(filepath, 'w')
  104. fp.write(magic_line)
  105. fp.close()
  106. fp = self.ds.open(filepath)
  107. result = fp.readline()
  108. fp.close()
  109. assert_equal(magic_line, result)
  110. def test_ValidBz2File(self):
  111. try:
  112. import bz2
  113. except ImportError:
  114. # We don't have the bz2 capabilities to test.
  115. pytest.skip()
  116. # Test datasource's internal file_opener for BZip2 files.
  117. filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
  118. fp = bz2.BZ2File(filepath, 'w')
  119. fp.write(magic_line)
  120. fp.close()
  121. fp = self.ds.open(filepath)
  122. result = fp.readline()
  123. fp.close()
  124. assert_equal(magic_line, result)
  125. @pytest.mark.skipif(sys.version_info[0] >= 3, reason="Python 2 only")
  126. def test_Bz2File_text_mode_warning(self):
  127. try:
  128. import bz2
  129. except ImportError:
  130. # We don't have the bz2 capabilities to test.
  131. pytest.skip()
  132. # Test datasource's internal file_opener for BZip2 files.
  133. filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
  134. fp = bz2.BZ2File(filepath, 'w')
  135. fp.write(magic_line)
  136. fp.close()
  137. with assert_warns(RuntimeWarning):
  138. fp = self.ds.open(filepath, 'rt')
  139. result = fp.readline()
  140. fp.close()
  141. assert_equal(magic_line, result)
  142. class TestDataSourceExists(object):
  143. def setup(self):
  144. self.tmpdir = mkdtemp()
  145. self.ds = datasource.DataSource(self.tmpdir)
  146. def teardown(self):
  147. rmtree(self.tmpdir)
  148. del self.ds
  149. def test_ValidHTTP(self):
  150. assert_(self.ds.exists(valid_httpurl()))
  151. def test_InvalidHTTP(self):
  152. assert_equal(self.ds.exists(invalid_httpurl()), False)
  153. def test_ValidFile(self):
  154. # Test valid file in destpath
  155. tmpfile = valid_textfile(self.tmpdir)
  156. assert_(self.ds.exists(tmpfile))
  157. # Test valid local file not in destpath
  158. localdir = mkdtemp()
  159. tmpfile = valid_textfile(localdir)
  160. assert_(self.ds.exists(tmpfile))
  161. rmtree(localdir)
  162. def test_InvalidFile(self):
  163. tmpfile = invalid_textfile(self.tmpdir)
  164. assert_equal(self.ds.exists(tmpfile), False)
  165. class TestDataSourceAbspath(object):
  166. def setup(self):
  167. self.tmpdir = os.path.abspath(mkdtemp())
  168. self.ds = datasource.DataSource(self.tmpdir)
  169. def teardown(self):
  170. rmtree(self.tmpdir)
  171. del self.ds
  172. def test_ValidHTTP(self):
  173. scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl())
  174. local_path = os.path.join(self.tmpdir, netloc,
  175. upath.strip(os.sep).strip('/'))
  176. assert_equal(local_path, self.ds.abspath(valid_httpurl()))
  177. def test_ValidFile(self):
  178. tmpfile = valid_textfile(self.tmpdir)
  179. tmpfilename = os.path.split(tmpfile)[-1]
  180. # Test with filename only
  181. assert_equal(tmpfile, self.ds.abspath(tmpfilename))
  182. # Test filename with complete path
  183. assert_equal(tmpfile, self.ds.abspath(tmpfile))
  184. def test_InvalidHTTP(self):
  185. scheme, netloc, upath, pms, qry, frg = urlparse(invalid_httpurl())
  186. invalidhttp = os.path.join(self.tmpdir, netloc,
  187. upath.strip(os.sep).strip('/'))
  188. assert_(invalidhttp != self.ds.abspath(valid_httpurl()))
  189. def test_InvalidFile(self):
  190. invalidfile = valid_textfile(self.tmpdir)
  191. tmpfile = valid_textfile(self.tmpdir)
  192. tmpfilename = os.path.split(tmpfile)[-1]
  193. # Test with filename only
  194. assert_(invalidfile != self.ds.abspath(tmpfilename))
  195. # Test filename with complete path
  196. assert_(invalidfile != self.ds.abspath(tmpfile))
  197. def test_sandboxing(self):
  198. tmpfile = valid_textfile(self.tmpdir)
  199. tmpfilename = os.path.split(tmpfile)[-1]
  200. tmp_path = lambda x: os.path.abspath(self.ds.abspath(x))
  201. assert_(tmp_path(valid_httpurl()).startswith(self.tmpdir))
  202. assert_(tmp_path(invalid_httpurl()).startswith(self.tmpdir))
  203. assert_(tmp_path(tmpfile).startswith(self.tmpdir))
  204. assert_(tmp_path(tmpfilename).startswith(self.tmpdir))
  205. for fn in malicious_files:
  206. assert_(tmp_path(http_path+fn).startswith(self.tmpdir))
  207. assert_(tmp_path(fn).startswith(self.tmpdir))
  208. def test_windows_os_sep(self):
  209. orig_os_sep = os.sep
  210. try:
  211. os.sep = '\\'
  212. self.test_ValidHTTP()
  213. self.test_ValidFile()
  214. self.test_InvalidHTTP()
  215. self.test_InvalidFile()
  216. self.test_sandboxing()
  217. finally:
  218. os.sep = orig_os_sep
  219. class TestRepositoryAbspath(object):
  220. def setup(self):
  221. self.tmpdir = os.path.abspath(mkdtemp())
  222. self.repos = datasource.Repository(valid_baseurl(), self.tmpdir)
  223. def teardown(self):
  224. rmtree(self.tmpdir)
  225. del self.repos
  226. def test_ValidHTTP(self):
  227. scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl())
  228. local_path = os.path.join(self.repos._destpath, netloc,
  229. upath.strip(os.sep).strip('/'))
  230. filepath = self.repos.abspath(valid_httpfile())
  231. assert_equal(local_path, filepath)
  232. def test_sandboxing(self):
  233. tmp_path = lambda x: os.path.abspath(self.repos.abspath(x))
  234. assert_(tmp_path(valid_httpfile()).startswith(self.tmpdir))
  235. for fn in malicious_files:
  236. assert_(tmp_path(http_path+fn).startswith(self.tmpdir))
  237. assert_(tmp_path(fn).startswith(self.tmpdir))
  238. def test_windows_os_sep(self):
  239. orig_os_sep = os.sep
  240. try:
  241. os.sep = '\\'
  242. self.test_ValidHTTP()
  243. self.test_sandboxing()
  244. finally:
  245. os.sep = orig_os_sep
  246. class TestRepositoryExists(object):
  247. def setup(self):
  248. self.tmpdir = mkdtemp()
  249. self.repos = datasource.Repository(valid_baseurl(), self.tmpdir)
  250. def teardown(self):
  251. rmtree(self.tmpdir)
  252. del self.repos
  253. def test_ValidFile(self):
  254. # Create local temp file
  255. tmpfile = valid_textfile(self.tmpdir)
  256. assert_(self.repos.exists(tmpfile))
  257. def test_InvalidFile(self):
  258. tmpfile = invalid_textfile(self.tmpdir)
  259. assert_equal(self.repos.exists(tmpfile), False)
  260. def test_RemoveHTTPFile(self):
  261. assert_(self.repos.exists(valid_httpurl()))
  262. def test_CachedHTTPFile(self):
  263. localfile = valid_httpurl()
  264. # Create a locally cached temp file with an URL based
  265. # directory structure. This is similar to what Repository.open
  266. # would do.
  267. scheme, netloc, upath, pms, qry, frg = urlparse(localfile)
  268. local_path = os.path.join(self.repos._destpath, netloc)
  269. os.mkdir(local_path, 0o0700)
  270. tmpfile = valid_textfile(local_path)
  271. assert_(self.repos.exists(tmpfile))
  272. class TestOpenFunc(object):
  273. def setup(self):
  274. self.tmpdir = mkdtemp()
  275. def teardown(self):
  276. rmtree(self.tmpdir)
  277. def test_DataSourceOpen(self):
  278. local_file = valid_textfile(self.tmpdir)
  279. # Test case where destpath is passed in
  280. fp = datasource.open(local_file, destpath=self.tmpdir)
  281. assert_(fp)
  282. fp.close()
  283. # Test case where default destpath is used
  284. fp = datasource.open(local_file)
  285. assert_(fp)
  286. fp.close()
  287. def test_del_attr_handling():
  288. # DataSource __del__ can be called
  289. # even if __init__ fails when the
  290. # Exception object is caught by the
  291. # caller as happens in refguide_check
  292. # is_deprecated() function
  293. ds = datasource.DataSource()
  294. # simulate failed __init__ by removing key attribute
  295. # produced within __init__ and expected by __del__
  296. del ds._istmpdest
  297. # should not raise an AttributeError if __del__
  298. # gracefully handles failed __init__:
  299. ds.__del__()