test_io.py 95 KB


  1. from __future__ import division, absolute_import, print_function
  2. import sys
  3. import gzip
  4. import os
  5. import threading
  6. import time
  7. import warnings
  8. import io
  9. import re
  10. import pytest
  11. from tempfile import NamedTemporaryFile
  12. from io import BytesIO, StringIO
  13. from datetime import datetime
  14. import locale
  15. import numpy as np
  16. import numpy.ma as ma
  17. from numpy.lib._iotools import ConverterError, ConversionWarning
  18. from numpy.compat import asbytes, bytes, Path
  19. from numpy.ma.testutils import assert_equal
  20. from numpy.testing import (
  21. assert_warns, assert_, assert_raises_regex, assert_raises,
  22. assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY,
  23. HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles,
  24. )
  25. class TextIO(BytesIO):
  26. """Helper IO class.
  27. Writes encode strings to bytes if needed, reads return bytes.
  28. This makes it easier to emulate files opened in binary mode
  29. without needing to explicitly convert strings to bytes in
  30. setting up the test data.
  31. """
  32. def __init__(self, s=""):
  33. BytesIO.__init__(self, asbytes(s))
  34. def write(self, s):
  35. BytesIO.write(self, asbytes(s))
  36. def writelines(self, lines):
  37. BytesIO.writelines(self, [asbytes(s) for s in lines])
  38. MAJVER, MINVER = sys.version_info[:2]
  39. IS_64BIT = sys.maxsize > 2**32
  40. try:
  41. import bz2
  42. HAS_BZ2 = True
  43. except ImportError:
  44. HAS_BZ2 = False
  45. try:
  46. import lzma
  47. HAS_LZMA = True
  48. except ImportError:
  49. HAS_LZMA = False
  50. def strptime(s, fmt=None):
  51. """
  52. This function is available in the datetime module only from Python >=
  53. 2.5.
  54. """
  55. if type(s) == bytes:
  56. s = s.decode("latin1")
  57. return datetime(*time.strptime(s, fmt)[:3])
  58. class RoundtripTest(object):
  59. def roundtrip(self, save_func, *args, **kwargs):
  60. """
  61. save_func : callable
  62. Function used to save arrays to file.
  63. file_on_disk : bool
  64. If true, store the file on disk, instead of in a
  65. string buffer.
  66. save_kwds : dict
  67. Parameters passed to `save_func`.
  68. load_kwds : dict
  69. Parameters passed to `numpy.load`.
  70. args : tuple of arrays
  71. Arrays stored to file.
  72. """
  73. save_kwds = kwargs.get('save_kwds', {})
  74. load_kwds = kwargs.get('load_kwds', {"allow_pickle": True})
  75. file_on_disk = kwargs.get('file_on_disk', False)
  76. if file_on_disk:
  77. target_file = NamedTemporaryFile(delete=False)
  78. load_file = target_file.name
  79. else:
  80. target_file = BytesIO()
  81. load_file = target_file
  82. try:
  83. arr = args
  84. save_func(target_file, *arr, **save_kwds)
  85. target_file.flush()
  86. target_file.seek(0)
  87. if sys.platform == 'win32' and not isinstance(target_file, BytesIO):
  88. target_file.close()
  89. arr_reloaded = np.load(load_file, **load_kwds)
  90. self.arr = arr
  91. self.arr_reloaded = arr_reloaded
  92. finally:
  93. if not isinstance(target_file, BytesIO):
  94. target_file.close()
  95. # holds an open file descriptor so it can't be deleted on win
  96. if 'arr_reloaded' in locals():
  97. if not isinstance(arr_reloaded, np.lib.npyio.NpzFile):
  98. os.remove(target_file.name)
  99. def check_roundtrips(self, a):
  100. self.roundtrip(a)
  101. self.roundtrip(a, file_on_disk=True)
  102. self.roundtrip(np.asfortranarray(a))
  103. self.roundtrip(np.asfortranarray(a), file_on_disk=True)
  104. if a.shape[0] > 1:
  105. # neither C nor Fortran contiguous for 2D arrays or more
  106. self.roundtrip(np.asfortranarray(a)[1:])
  107. self.roundtrip(np.asfortranarray(a)[1:], file_on_disk=True)
  108. def test_array(self):
  109. a = np.array([], float)
  110. self.check_roundtrips(a)
  111. a = np.array([[1, 2], [3, 4]], float)
  112. self.check_roundtrips(a)
  113. a = np.array([[1, 2], [3, 4]], int)
  114. self.check_roundtrips(a)
  115. a = np.array([[1 + 5j, 2 + 6j], [3 + 7j, 4 + 8j]], dtype=np.csingle)
  116. self.check_roundtrips(a)
  117. a = np.array([[1 + 5j, 2 + 6j], [3 + 7j, 4 + 8j]], dtype=np.cdouble)
  118. self.check_roundtrips(a)
  119. def test_array_object(self):
  120. a = np.array([], object)
  121. self.check_roundtrips(a)
  122. a = np.array([[1, 2], [3, 4]], object)
  123. self.check_roundtrips(a)
  124. def test_1D(self):
  125. a = np.array([1, 2, 3, 4], int)
  126. self.roundtrip(a)
  127. @pytest.mark.skipif(sys.platform == 'win32', reason="Fails on Win32")
  128. def test_mmap(self):
  129. a = np.array([[1, 2.5], [4, 7.3]])
  130. self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'})
  131. a = np.asfortranarray([[1, 2.5], [4, 7.3]])
  132. self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'})
  133. def test_record(self):
  134. a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
  135. self.check_roundtrips(a)
  136. @pytest.mark.slow
  137. def test_format_2_0(self):
  138. dt = [(("%d" % i) * 100, float) for i in range(500)]
  139. a = np.ones(1000, dtype=dt)
  140. with warnings.catch_warnings(record=True):
  141. warnings.filterwarnings('always', '', UserWarning)
  142. self.check_roundtrips(a)
  143. class TestSaveLoad(RoundtripTest):
  144. def roundtrip(self, *args, **kwargs):
  145. RoundtripTest.roundtrip(self, np.save, *args, **kwargs)
  146. assert_equal(self.arr[0], self.arr_reloaded)
  147. assert_equal(self.arr[0].dtype, self.arr_reloaded.dtype)
  148. assert_equal(self.arr[0].flags.fnc, self.arr_reloaded.flags.fnc)
  149. class TestSavezLoad(RoundtripTest):
  150. def roundtrip(self, *args, **kwargs):
  151. RoundtripTest.roundtrip(self, np.savez, *args, **kwargs)
  152. try:
  153. for n, arr in enumerate(self.arr):
  154. reloaded = self.arr_reloaded['arr_%d' % n]
  155. assert_equal(arr, reloaded)
  156. assert_equal(arr.dtype, reloaded.dtype)
  157. assert_equal(arr.flags.fnc, reloaded.flags.fnc)
  158. finally:
  159. # delete tempfile, must be done here on windows
  160. if self.arr_reloaded.fid:
  161. self.arr_reloaded.fid.close()
  162. os.remove(self.arr_reloaded.fid.name)
  163. @pytest.mark.skipif(not IS_64BIT, reason="Needs 64bit platform")
  164. @pytest.mark.slow
  165. def test_big_arrays(self):
  166. L = (1 << 31) + 100000
  167. a = np.empty(L, dtype=np.uint8)
  168. with temppath(prefix="numpy_test_big_arrays_", suffix=".npz") as tmp:
  169. np.savez(tmp, a=a)
  170. del a
  171. npfile = np.load(tmp)
  172. a = npfile['a'] # Should succeed
  173. npfile.close()
  174. del a # Avoid pyflakes unused variable warning.
  175. def test_multiple_arrays(self):
  176. a = np.array([[1, 2], [3, 4]], float)
  177. b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
  178. self.roundtrip(a, b)
  179. def test_named_arrays(self):
  180. a = np.array([[1, 2], [3, 4]], float)
  181. b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
  182. c = BytesIO()
  183. np.savez(c, file_a=a, file_b=b)
  184. c.seek(0)
  185. l = np.load(c)
  186. assert_equal(a, l['file_a'])
  187. assert_equal(b, l['file_b'])
  188. def test_BagObj(self):
  189. a = np.array([[1, 2], [3, 4]], float)
  190. b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
  191. c = BytesIO()
  192. np.savez(c, file_a=a, file_b=b)
  193. c.seek(0)
  194. l = np.load(c)
  195. assert_equal(sorted(dir(l.f)), ['file_a','file_b'])
  196. assert_equal(a, l.f.file_a)
  197. assert_equal(b, l.f.file_b)
  198. def test_savez_filename_clashes(self):
  199. # Test that issue #852 is fixed
  200. # and savez functions in multithreaded environment
  201. def writer(error_list):
  202. with temppath(suffix='.npz') as tmp:
  203. arr = np.random.randn(500, 500)
  204. try:
  205. np.savez(tmp, arr=arr)
  206. except OSError as err:
  207. error_list.append(err)
  208. errors = []
  209. threads = [threading.Thread(target=writer, args=(errors,))
  210. for j in range(3)]
  211. for t in threads:
  212. t.start()
  213. for t in threads:
  214. t.join()
  215. if errors:
  216. raise AssertionError(errors)
  217. def test_not_closing_opened_fid(self):
  218. # Test that issue #2178 is fixed:
  219. # verify could seek on 'loaded' file
  220. with temppath(suffix='.npz') as tmp:
  221. with open(tmp, 'wb') as fp:
  222. np.savez(fp, data='LOVELY LOAD')
  223. with open(tmp, 'rb', 10000) as fp:
  224. fp.seek(0)
  225. assert_(not fp.closed)
  226. np.load(fp)['data']
  227. # fp must not get closed by .load
  228. assert_(not fp.closed)
  229. fp.seek(0)
  230. assert_(not fp.closed)
  231. #FIXME: Is this still true?
  232. @pytest.mark.skipif(IS_PYPY, reason="Missing context manager on PyPy")
  233. def test_closing_fid(self):
  234. # Test that issue #1517 (too many opened files) remains closed
  235. # It might be a "weak" test since failed to get triggered on
  236. # e.g. Debian sid of 2012 Jul 05 but was reported to
  237. # trigger the failure on Ubuntu 10.04:
  238. # http://projects.scipy.org/numpy/ticket/1517#comment:2
  239. with temppath(suffix='.npz') as tmp:
  240. np.savez(tmp, data='LOVELY LOAD')
  241. # We need to check if the garbage collector can properly close
  242. # numpy npz file returned by np.load when their reference count
  243. # goes to zero. Python 3 running in debug mode raises a
  244. # ResourceWarning when file closing is left to the garbage
  245. # collector, so we catch the warnings. Because ResourceWarning
  246. # is unknown in Python < 3.x, we take the easy way out and
  247. # catch all warnings.
  248. with suppress_warnings() as sup:
  249. sup.filter(Warning) # TODO: specify exact message
  250. for i in range(1, 1025):
  251. try:
  252. np.load(tmp)["data"]
  253. except Exception as e:
  254. msg = "Failed to load data from a file: %s" % e
  255. raise AssertionError(msg)
  256. def test_closing_zipfile_after_load(self):
  257. # Check that zipfile owns file and can close it. This needs to
  258. # pass a file name to load for the test. On windows failure will
  259. # cause a second error will be raised when the attempt to remove
  260. # the open file is made.
  261. prefix = 'numpy_test_closing_zipfile_after_load_'
  262. with temppath(suffix='.npz', prefix=prefix) as tmp:
  263. np.savez(tmp, lab='place holder')
  264. data = np.load(tmp)
  265. fp = data.zip.fp
  266. data.close()
  267. assert_(fp.closed)
  268. class TestSaveTxt(object):
  269. def test_array(self):
  270. a = np.array([[1, 2], [3, 4]], float)
  271. fmt = "%.18e"
  272. c = BytesIO()
  273. np.savetxt(c, a, fmt=fmt)
  274. c.seek(0)
  275. assert_equal(c.readlines(),
  276. [asbytes((fmt + ' ' + fmt + '\n') % (1, 2)),
  277. asbytes((fmt + ' ' + fmt + '\n') % (3, 4))])
  278. a = np.array([[1, 2], [3, 4]], int)
  279. c = BytesIO()
  280. np.savetxt(c, a, fmt='%d')
  281. c.seek(0)
  282. assert_equal(c.readlines(), [b'1 2\n', b'3 4\n'])
  283. def test_1D(self):
  284. a = np.array([1, 2, 3, 4], int)
  285. c = BytesIO()
  286. np.savetxt(c, a, fmt='%d')
  287. c.seek(0)
  288. lines = c.readlines()
  289. assert_equal(lines, [b'1\n', b'2\n', b'3\n', b'4\n'])
  290. def test_0D_3D(self):
  291. c = BytesIO()
  292. assert_raises(ValueError, np.savetxt, c, np.array(1))
  293. assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]]))
  294. def test_structured(self):
  295. a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
  296. c = BytesIO()
  297. np.savetxt(c, a, fmt='%d')
  298. c.seek(0)
  299. assert_equal(c.readlines(), [b'1 2\n', b'3 4\n'])
  300. def test_structured_padded(self):
  301. # gh-13297
  302. a = np.array([(1, 2, 3),(4, 5, 6)], dtype=[
  303. ('foo', 'i4'), ('bar', 'i4'), ('baz', 'i4')
  304. ])
  305. c = BytesIO()
  306. np.savetxt(c, a[['foo', 'baz']], fmt='%d')
  307. c.seek(0)
  308. assert_equal(c.readlines(), [b'1 3\n', b'4 6\n'])
  309. @pytest.mark.skipif(Path is None, reason="No pathlib.Path")
  310. def test_multifield_view(self):
  311. a = np.ones(1, dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'f4')])
  312. v = a[['x', 'z']]
  313. with temppath(suffix='.npy') as path:
  314. path = Path(path)
  315. np.save(path, v)
  316. data = np.load(path)
  317. assert_array_equal(data, v)
  318. def test_delimiter(self):
  319. a = np.array([[1., 2.], [3., 4.]])
  320. c = BytesIO()
  321. np.savetxt(c, a, delimiter=',', fmt='%d')
  322. c.seek(0)
  323. assert_equal(c.readlines(), [b'1,2\n', b'3,4\n'])
  324. def test_format(self):
  325. a = np.array([(1, 2), (3, 4)])
  326. c = BytesIO()
  327. # Sequence of formats
  328. np.savetxt(c, a, fmt=['%02d', '%3.1f'])
  329. c.seek(0)
  330. assert_equal(c.readlines(), [b'01 2.0\n', b'03 4.0\n'])
  331. # A single multiformat string
  332. c = BytesIO()
  333. np.savetxt(c, a, fmt='%02d : %3.1f')
  334. c.seek(0)
  335. lines = c.readlines()
  336. assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n'])
  337. # Specify delimiter, should be overridden
  338. c = BytesIO()
  339. np.savetxt(c, a, fmt='%02d : %3.1f', delimiter=',')
  340. c.seek(0)
  341. lines = c.readlines()
  342. assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n'])
  343. # Bad fmt, should raise a ValueError
  344. c = BytesIO()
  345. assert_raises(ValueError, np.savetxt, c, a, fmt=99)
  346. def test_header_footer(self):
  347. # Test the functionality of the header and footer keyword argument.
  348. c = BytesIO()
  349. a = np.array([(1, 2), (3, 4)], dtype=int)
  350. test_header_footer = 'Test header / footer'
  351. # Test the header keyword argument
  352. np.savetxt(c, a, fmt='%1d', header=test_header_footer)
  353. c.seek(0)
  354. assert_equal(c.read(),
  355. asbytes('# ' + test_header_footer + '\n1 2\n3 4\n'))
  356. # Test the footer keyword argument
  357. c = BytesIO()
  358. np.savetxt(c, a, fmt='%1d', footer=test_header_footer)
  359. c.seek(0)
  360. assert_equal(c.read(),
  361. asbytes('1 2\n3 4\n# ' + test_header_footer + '\n'))
  362. # Test the commentstr keyword argument used on the header
  363. c = BytesIO()
  364. commentstr = '% '
  365. np.savetxt(c, a, fmt='%1d',
  366. header=test_header_footer, comments=commentstr)
  367. c.seek(0)
  368. assert_equal(c.read(),
  369. asbytes(commentstr + test_header_footer + '\n' + '1 2\n3 4\n'))
  370. # Test the commentstr keyword argument used on the footer
  371. c = BytesIO()
  372. commentstr = '% '
  373. np.savetxt(c, a, fmt='%1d',
  374. footer=test_header_footer, comments=commentstr)
  375. c.seek(0)
  376. assert_equal(c.read(),
  377. asbytes('1 2\n3 4\n' + commentstr + test_header_footer + '\n'))
  378. def test_file_roundtrip(self):
  379. with temppath() as name:
  380. a = np.array([(1, 2), (3, 4)])
  381. np.savetxt(name, a)
  382. b = np.loadtxt(name)
  383. assert_array_equal(a, b)
  384. def test_complex_arrays(self):
  385. ncols = 2
  386. nrows = 2
  387. a = np.zeros((ncols, nrows), dtype=np.complex128)
  388. re = np.pi
  389. im = np.e
  390. a[:] = re + 1.0j * im
  391. # One format only
  392. c = BytesIO()
  393. np.savetxt(c, a, fmt=' %+.3e')
  394. c.seek(0)
  395. lines = c.readlines()
  396. assert_equal(
  397. lines,
  398. [b' ( +3.142e+00+ +2.718e+00j) ( +3.142e+00+ +2.718e+00j)\n',
  399. b' ( +3.142e+00+ +2.718e+00j) ( +3.142e+00+ +2.718e+00j)\n'])
  400. # One format for each real and imaginary part
  401. c = BytesIO()
  402. np.savetxt(c, a, fmt=' %+.3e' * 2 * ncols)
  403. c.seek(0)
  404. lines = c.readlines()
  405. assert_equal(
  406. lines,
  407. [b' +3.142e+00 +2.718e+00 +3.142e+00 +2.718e+00\n',
  408. b' +3.142e+00 +2.718e+00 +3.142e+00 +2.718e+00\n'])
  409. # One format for each complex number
  410. c = BytesIO()
  411. np.savetxt(c, a, fmt=['(%.3e%+.3ej)'] * ncols)
  412. c.seek(0)
  413. lines = c.readlines()
  414. assert_equal(
  415. lines,
  416. [b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n',
  417. b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n'])
  418. def test_complex_negative_exponent(self):
  419. # Previous to 1.15, some formats generated x+-yj, gh 7895
  420. ncols = 2
  421. nrows = 2
  422. a = np.zeros((ncols, nrows), dtype=np.complex128)
  423. re = np.pi
  424. im = np.e
  425. a[:] = re - 1.0j * im
  426. c = BytesIO()
  427. np.savetxt(c, a, fmt='%.3e')
  428. c.seek(0)
  429. lines = c.readlines()
  430. assert_equal(
  431. lines,
  432. [b' (3.142e+00-2.718e+00j) (3.142e+00-2.718e+00j)\n',
  433. b' (3.142e+00-2.718e+00j) (3.142e+00-2.718e+00j)\n'])
  434. def test_custom_writer(self):
  435. class CustomWriter(list):
  436. def write(self, text):
  437. self.extend(text.split(b'\n'))
  438. w = CustomWriter()
  439. a = np.array([(1, 2), (3, 4)])
  440. np.savetxt(w, a)
  441. b = np.loadtxt(w)
  442. assert_array_equal(a, b)
  443. def test_unicode(self):
  444. utf8 = b'\xcf\x96'.decode('UTF-8')
  445. a = np.array([utf8], dtype=np.unicode)
  446. with tempdir() as tmpdir:
  447. # set encoding as on windows it may not be unicode even on py3
  448. np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'],
  449. encoding='UTF-8')
  450. def test_unicode_roundtrip(self):
  451. utf8 = b'\xcf\x96'.decode('UTF-8')
  452. a = np.array([utf8], dtype=np.unicode)
  453. # our gz wrapper support encoding
  454. suffixes = ['', '.gz']
  455. # stdlib 2 versions do not support encoding
  456. if MAJVER > 2:
  457. if HAS_BZ2:
  458. suffixes.append('.bz2')
  459. if HAS_LZMA:
  460. suffixes.extend(['.xz', '.lzma'])
  461. with tempdir() as tmpdir:
  462. for suffix in suffixes:
  463. np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
  464. fmt=['%s'], encoding='UTF-16-LE')
  465. b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
  466. encoding='UTF-16-LE', dtype=np.unicode)
  467. assert_array_equal(a, b)
  468. def test_unicode_bytestream(self):
  469. utf8 = b'\xcf\x96'.decode('UTF-8')
  470. a = np.array([utf8], dtype=np.unicode)
  471. s = BytesIO()
  472. np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
  473. s.seek(0)
  474. assert_equal(s.read().decode('UTF-8'), utf8 + '\n')
  475. def test_unicode_stringstream(self):
  476. utf8 = b'\xcf\x96'.decode('UTF-8')
  477. a = np.array([utf8], dtype=np.unicode)
  478. s = StringIO()
  479. np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
  480. s.seek(0)
  481. assert_equal(s.read(), utf8 + '\n')
  482. class LoadTxtBase(object):
  483. def check_compressed(self, fopen, suffixes):
  484. # Test that we can load data from a compressed file
  485. wanted = np.arange(6).reshape((2, 3))
  486. linesep = ('\n', '\r\n', '\r')
  487. for sep in linesep:
  488. data = '0 1 2' + sep + '3 4 5'
  489. for suffix in suffixes:
  490. with temppath(suffix=suffix) as name:
  491. with fopen(name, mode='wt', encoding='UTF-32-LE') as f:
  492. f.write(data)
  493. res = self.loadfunc(name, encoding='UTF-32-LE')
  494. assert_array_equal(res, wanted)
  495. with fopen(name, "rt", encoding='UTF-32-LE') as f:
  496. res = self.loadfunc(f)
  497. assert_array_equal(res, wanted)
  498. # Python2 .open does not support encoding
  499. @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3")
  500. def test_compressed_gzip(self):
  501. self.check_compressed(gzip.open, ('.gz',))
  502. @pytest.mark.skipif(not HAS_BZ2, reason="Needs bz2")
  503. @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3")
  504. def test_compressed_bz2(self):
  505. self.check_compressed(bz2.open, ('.bz2',))
  506. @pytest.mark.skipif(not HAS_LZMA, reason="Needs lzma")
  507. @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3")
  508. def test_compressed_lzma(self):
  509. self.check_compressed(lzma.open, ('.xz', '.lzma'))
  510. def test_encoding(self):
  511. with temppath() as path:
  512. with open(path, "wb") as f:
  513. f.write('0.\n1.\n2.'.encode("UTF-16"))
  514. x = self.loadfunc(path, encoding="UTF-16")
  515. assert_array_equal(x, [0., 1., 2.])
  516. def test_stringload(self):
  517. # umlaute
  518. nonascii = b'\xc3\xb6\xc3\xbc\xc3\xb6'.decode("UTF-8")
  519. with temppath() as path:
  520. with open(path, "wb") as f:
  521. f.write(nonascii.encode("UTF-16"))
  522. x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode)
  523. assert_array_equal(x, nonascii)
  524. def test_binary_decode(self):
  525. utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04'
  526. v = self.loadfunc(BytesIO(utf16), dtype=np.unicode, encoding='UTF-16')
  527. assert_array_equal(v, np.array(utf16.decode('UTF-16').split()))
  528. def test_converters_decode(self):
  529. # test converters that decode strings
  530. c = TextIO()
  531. c.write(b'\xcf\x96')
  532. c.seek(0)
  533. x = self.loadfunc(c, dtype=np.unicode,
  534. converters={0: lambda x: x.decode('UTF-8')})
  535. a = np.array([b'\xcf\x96'.decode('UTF-8')])
  536. assert_array_equal(x, a)
  537. def test_converters_nodecode(self):
  538. # test native string converters enabled by setting an encoding
  539. utf8 = b'\xcf\x96'.decode('UTF-8')
  540. with temppath() as path:
  541. with io.open(path, 'wt', encoding='UTF-8') as f:
  542. f.write(utf8)
  543. x = self.loadfunc(path, dtype=np.unicode,
  544. converters={0: lambda x: x + 't'},
  545. encoding='UTF-8')
  546. a = np.array([utf8 + 't'])
  547. assert_array_equal(x, a)
  548. class TestLoadTxt(LoadTxtBase):
  549. loadfunc = staticmethod(np.loadtxt)
  550. def setup(self):
  551. # lower chunksize for testing
  552. self.orig_chunk = np.lib.npyio._loadtxt_chunksize
  553. np.lib.npyio._loadtxt_chunksize = 1
  554. def teardown(self):
  555. np.lib.npyio._loadtxt_chunksize = self.orig_chunk
  556. def test_record(self):
  557. c = TextIO()
  558. c.write('1 2\n3 4')
  559. c.seek(0)
  560. x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)])
  561. a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
  562. assert_array_equal(x, a)
  563. d = TextIO()
  564. d.write('M 64.0 75.0\nF 25.0 60.0')
  565. d.seek(0)
  566. mydescriptor = {'names': ('gender', 'age', 'weight'),
  567. 'formats': ('S1', 'i4', 'f4')}
  568. b = np.array([('M', 64.0, 75.0),
  569. ('F', 25.0, 60.0)], dtype=mydescriptor)
  570. y = np.loadtxt(d, dtype=mydescriptor)
  571. assert_array_equal(y, b)
  572. def test_array(self):
  573. c = TextIO()
  574. c.write('1 2\n3 4')
  575. c.seek(0)
  576. x = np.loadtxt(c, dtype=int)
  577. a = np.array([[1, 2], [3, 4]], int)
  578. assert_array_equal(x, a)
  579. c.seek(0)
  580. x = np.loadtxt(c, dtype=float)
  581. a = np.array([[1, 2], [3, 4]], float)
  582. assert_array_equal(x, a)
  583. def test_1D(self):
  584. c = TextIO()
  585. c.write('1\n2\n3\n4\n')
  586. c.seek(0)
  587. x = np.loadtxt(c, dtype=int)
  588. a = np.array([1, 2, 3, 4], int)
  589. assert_array_equal(x, a)
  590. c = TextIO()
  591. c.write('1,2,3,4\n')
  592. c.seek(0)
  593. x = np.loadtxt(c, dtype=int, delimiter=',')
  594. a = np.array([1, 2, 3, 4], int)
  595. assert_array_equal(x, a)
  596. def test_missing(self):
  597. c = TextIO()
  598. c.write('1,2,3,,5\n')
  599. c.seek(0)
  600. x = np.loadtxt(c, dtype=int, delimiter=',',
  601. converters={3: lambda s: int(s or - 999)})
  602. a = np.array([1, 2, 3, -999, 5], int)
  603. assert_array_equal(x, a)
  604. def test_converters_with_usecols(self):
  605. c = TextIO()
  606. c.write('1,2,3,,5\n6,7,8,9,10\n')
  607. c.seek(0)
  608. x = np.loadtxt(c, dtype=int, delimiter=',',
  609. converters={3: lambda s: int(s or - 999)},
  610. usecols=(1, 3,))
  611. a = np.array([[2, -999], [7, 9]], int)
  612. assert_array_equal(x, a)
  613. def test_comments_unicode(self):
  614. c = TextIO()
  615. c.write('# comment\n1,2,3,5\n')
  616. c.seek(0)
  617. x = np.loadtxt(c, dtype=int, delimiter=',',
  618. comments=u'#')
  619. a = np.array([1, 2, 3, 5], int)
  620. assert_array_equal(x, a)
  621. def test_comments_byte(self):
  622. c = TextIO()
  623. c.write('# comment\n1,2,3,5\n')
  624. c.seek(0)
  625. x = np.loadtxt(c, dtype=int, delimiter=',',
  626. comments=b'#')
  627. a = np.array([1, 2, 3, 5], int)
  628. assert_array_equal(x, a)
  629. def test_comments_multiple(self):
  630. c = TextIO()
  631. c.write('# comment\n1,2,3\n@ comment2\n4,5,6 // comment3')
  632. c.seek(0)
  633. x = np.loadtxt(c, dtype=int, delimiter=',',
  634. comments=['#', '@', '//'])
  635. a = np.array([[1, 2, 3], [4, 5, 6]], int)
  636. assert_array_equal(x, a)
  637. def test_comments_multi_chars(self):
  638. c = TextIO()
  639. c.write('/* comment\n1,2,3,5\n')
  640. c.seek(0)
  641. x = np.loadtxt(c, dtype=int, delimiter=',',
  642. comments='/*')
  643. a = np.array([1, 2, 3, 5], int)
  644. assert_array_equal(x, a)
  645. # Check that '/*' is not transformed to ['/', '*']
  646. c = TextIO()
  647. c.write('*/ comment\n1,2,3,5\n')
  648. c.seek(0)
  649. assert_raises(ValueError, np.loadtxt, c, dtype=int, delimiter=',',
  650. comments='/*')
  651. def test_skiprows(self):
  652. c = TextIO()
  653. c.write('comment\n1,2,3,5\n')
  654. c.seek(0)
  655. x = np.loadtxt(c, dtype=int, delimiter=',',
  656. skiprows=1)
  657. a = np.array([1, 2, 3, 5], int)
  658. assert_array_equal(x, a)
  659. c = TextIO()
  660. c.write('# comment\n1,2,3,5\n')
  661. c.seek(0)
  662. x = np.loadtxt(c, dtype=int, delimiter=',',
  663. skiprows=1)
  664. a = np.array([1, 2, 3, 5], int)
  665. assert_array_equal(x, a)
  666. def test_usecols(self):
  667. a = np.array([[1, 2], [3, 4]], float)
  668. c = BytesIO()
  669. np.savetxt(c, a)
  670. c.seek(0)
  671. x = np.loadtxt(c, dtype=float, usecols=(1,))
  672. assert_array_equal(x, a[:, 1])
  673. a = np.array([[1, 2, 3], [3, 4, 5]], float)
  674. c = BytesIO()
  675. np.savetxt(c, a)
  676. c.seek(0)
  677. x = np.loadtxt(c, dtype=float, usecols=(1, 2))
  678. assert_array_equal(x, a[:, 1:])
  679. # Testing with arrays instead of tuples.
  680. c.seek(0)
  681. x = np.loadtxt(c, dtype=float, usecols=np.array([1, 2]))
  682. assert_array_equal(x, a[:, 1:])
  683. # Testing with an integer instead of a sequence
  684. for int_type in [int, np.int8, np.int16,
  685. np.int32, np.int64, np.uint8, np.uint16,
  686. np.uint32, np.uint64]:
  687. to_read = int_type(1)
  688. c.seek(0)
  689. x = np.loadtxt(c, dtype=float, usecols=to_read)
  690. assert_array_equal(x, a[:, 1])
  691. # Testing with some crazy custom integer type
  692. class CrazyInt(object):
  693. def __index__(self):
  694. return 1
  695. crazy_int = CrazyInt()
  696. c.seek(0)
  697. x = np.loadtxt(c, dtype=float, usecols=crazy_int)
  698. assert_array_equal(x, a[:, 1])
  699. c.seek(0)
  700. x = np.loadtxt(c, dtype=float, usecols=(crazy_int,))
  701. assert_array_equal(x, a[:, 1])
  702. # Checking with dtypes defined converters.
  703. data = '''JOE 70.1 25.3
  704. BOB 60.5 27.9
  705. '''
  706. c = TextIO(data)
  707. names = ['stid', 'temp']
  708. dtypes = ['S4', 'f8']
  709. arr = np.loadtxt(c, usecols=(0, 2), dtype=list(zip(names, dtypes)))
  710. assert_equal(arr['stid'], [b"JOE", b"BOB"])
  711. assert_equal(arr['temp'], [25.3, 27.9])
  712. # Testing non-ints in usecols
  713. c.seek(0)
  714. bogus_idx = 1.5
  715. assert_raises_regex(
  716. TypeError,
  717. '^usecols must be.*%s' % type(bogus_idx),
  718. np.loadtxt, c, usecols=bogus_idx
  719. )
  720. assert_raises_regex(
  721. TypeError,
  722. '^usecols must be.*%s' % type(bogus_idx),
  723. np.loadtxt, c, usecols=[0, bogus_idx, 0]
  724. )
  725. def test_fancy_dtype(self):
  726. c = TextIO()
  727. c.write('1,2,3.0\n4,5,6.0\n')
  728. c.seek(0)
  729. dt = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
  730. x = np.loadtxt(c, dtype=dt, delimiter=',')
  731. a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dt)
  732. assert_array_equal(x, a)
  733. def test_shaped_dtype(self):
  734. c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6")
  735. dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
  736. ('block', int, (2, 3))])
  737. x = np.loadtxt(c, dtype=dt)
  738. a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])],
  739. dtype=dt)
  740. assert_array_equal(x, a)
  741. def test_3d_shaped_dtype(self):
  742. c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6 7 8 9 10 11 12")
  743. dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
  744. ('block', int, (2, 2, 3))])
  745. x = np.loadtxt(c, dtype=dt)
  746. a = np.array([('aaaa', 1.0, 8.0,
  747. [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])],
  748. dtype=dt)
  749. assert_array_equal(x, a)
  750. def test_str_dtype(self):
  751. # see gh-8033
  752. c = ["str1", "str2"]
  753. for dt in (str, np.bytes_):
  754. a = np.array(["str1", "str2"], dtype=dt)
  755. x = np.loadtxt(c, dtype=dt)
  756. assert_array_equal(x, a)
  757. def test_empty_file(self):
  758. with suppress_warnings() as sup:
  759. sup.filter(message="loadtxt: Empty input file:")
  760. c = TextIO()
  761. x = np.loadtxt(c)
  762. assert_equal(x.shape, (0,))
  763. x = np.loadtxt(c, dtype=np.int64)
  764. assert_equal(x.shape, (0,))
  765. assert_(x.dtype == np.int64)
  766. def test_unused_converter(self):
  767. c = TextIO()
  768. c.writelines(['1 21\n', '3 42\n'])
  769. c.seek(0)
  770. data = np.loadtxt(c, usecols=(1,),
  771. converters={0: lambda s: int(s, 16)})
  772. assert_array_equal(data, [21, 42])
  773. c.seek(0)
  774. data = np.loadtxt(c, usecols=(1,),
  775. converters={1: lambda s: int(s, 16)})
  776. assert_array_equal(data, [33, 66])
  777. def test_dtype_with_object(self):
  778. # Test using an explicit dtype with an object
  779. data = """ 1; 2001-01-01
  780. 2; 2002-01-31 """
  781. ndtype = [('idx', int), ('code', object)]
  782. func = lambda s: strptime(s.strip(), "%Y-%m-%d")
  783. converters = {1: func}
  784. test = np.loadtxt(TextIO(data), delimiter=";", dtype=ndtype,
  785. converters=converters)
  786. control = np.array(
  787. [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))],
  788. dtype=ndtype)
  789. assert_equal(test, control)
  790. def test_uint64_type(self):
  791. tgt = (9223372043271415339, 9223372043271415853)
  792. c = TextIO()
  793. c.write("%s %s" % tgt)
  794. c.seek(0)
  795. res = np.loadtxt(c, dtype=np.uint64)
  796. assert_equal(res, tgt)
  797. def test_int64_type(self):
  798. tgt = (-9223372036854775807, 9223372036854775807)
  799. c = TextIO()
  800. c.write("%s %s" % tgt)
  801. c.seek(0)
  802. res = np.loadtxt(c, dtype=np.int64)
  803. assert_equal(res, tgt)
  804. def test_from_float_hex(self):
  805. # IEEE doubles and floats only, otherwise the float32
  806. # conversion may fail.
  807. tgt = np.logspace(-10, 10, 5).astype(np.float32)
  808. tgt = np.hstack((tgt, -tgt)).astype(float)
  809. inp = '\n'.join(map(float.hex, tgt))
  810. c = TextIO()
  811. c.write(inp)
  812. for dt in [float, np.float32]:
  813. c.seek(0)
  814. res = np.loadtxt(c, dtype=dt)
  815. assert_equal(res, tgt, err_msg="%s" % dt)
  816. def test_from_complex(self):
  817. tgt = (complex(1, 1), complex(1, -1))
  818. c = TextIO()
  819. c.write("%s %s" % tgt)
  820. c.seek(0)
  821. res = np.loadtxt(c, dtype=complex)
  822. assert_equal(res, tgt)
  823. def test_complex_misformatted(self):
  824. # test for backward compatibility
  825. # some complex formats used to generate x+-yj
  826. a = np.zeros((2, 2), dtype=np.complex128)
  827. re = np.pi
  828. im = np.e
  829. a[:] = re - 1.0j * im
  830. c = BytesIO()
  831. np.savetxt(c, a, fmt='%.16e')
  832. c.seek(0)
  833. txt = c.read()
  834. c.seek(0)
  835. # misformat the sign on the imaginary part, gh 7895
  836. txt_bad = txt.replace(b'e+00-', b'e00+-')
  837. assert_(txt_bad != txt)
  838. c.write(txt_bad)
  839. c.seek(0)
  840. res = np.loadtxt(c, dtype=complex)
  841. assert_equal(res, a)
  842. def test_universal_newline(self):
  843. with temppath() as name:
  844. with open(name, 'w') as f:
  845. f.write('1 21\r3 42\r')
  846. data = np.loadtxt(name)
  847. assert_array_equal(data, [[1, 21], [3, 42]])
  848. def test_empty_field_after_tab(self):
  849. c = TextIO()
  850. c.write('1 \t2 \t3\tstart \n4\t5\t6\t \n7\t8\t9.5\t')
  851. c.seek(0)
  852. dt = {'names': ('x', 'y', 'z', 'comment'),
  853. 'formats': ('<i4', '<i4', '<f4', '|S8')}
  854. x = np.loadtxt(c, dtype=dt, delimiter='\t')
  855. a = np.array([b'start ', b' ', b''])
  856. assert_array_equal(x['comment'], a)
  857. def test_structure_unpack(self):
  858. txt = TextIO("M 21 72\nF 35 58")
  859. dt = {'names': ('a', 'b', 'c'), 'formats': ('|S1', '<i4', '<f4')}
  860. a, b, c = np.loadtxt(txt, dtype=dt, unpack=True)
  861. assert_(a.dtype.str == '|S1')
  862. assert_(b.dtype.str == '<i4')
  863. assert_(c.dtype.str == '<f4')
  864. assert_array_equal(a, np.array([b'M', b'F']))
  865. assert_array_equal(b, np.array([21, 35]))
  866. assert_array_equal(c, np.array([72., 58.]))
  867. def test_ndmin_keyword(self):
  868. c = TextIO()
  869. c.write('1,2,3\n4,5,6')
  870. c.seek(0)
  871. assert_raises(ValueError, np.loadtxt, c, ndmin=3)
  872. c.seek(0)
  873. assert_raises(ValueError, np.loadtxt, c, ndmin=1.5)
  874. c.seek(0)
  875. x = np.loadtxt(c, dtype=int, delimiter=',', ndmin=1)
  876. a = np.array([[1, 2, 3], [4, 5, 6]])
  877. assert_array_equal(x, a)
  878. d = TextIO()
  879. d.write('0,1,2')
  880. d.seek(0)
  881. x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=2)
  882. assert_(x.shape == (1, 3))
  883. d.seek(0)
  884. x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=1)
  885. assert_(x.shape == (3,))
  886. d.seek(0)
  887. x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=0)
  888. assert_(x.shape == (3,))
  889. e = TextIO()
  890. e.write('0\n1\n2')
  891. e.seek(0)
  892. x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=2)
  893. assert_(x.shape == (3, 1))
  894. e.seek(0)
  895. x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=1)
  896. assert_(x.shape == (3,))
  897. e.seek(0)
  898. x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=0)
  899. assert_(x.shape == (3,))
  900. # Test ndmin kw with empty file.
  901. with suppress_warnings() as sup:
  902. sup.filter(message="loadtxt: Empty input file:")
  903. f = TextIO()
  904. assert_(np.loadtxt(f, ndmin=2).shape == (0, 1,))
  905. assert_(np.loadtxt(f, ndmin=1).shape == (0,))
  906. def test_generator_source(self):
  907. def count():
  908. for i in range(10):
  909. yield "%d" % i
  910. res = np.loadtxt(count())
  911. assert_array_equal(res, np.arange(10))
  912. def test_bad_line(self):
  913. c = TextIO()
  914. c.write('1 2 3\n4 5 6\n2 3')
  915. c.seek(0)
  916. # Check for exception and that exception contains line number
  917. assert_raises_regex(ValueError, "3", np.loadtxt, c)
  918. def test_none_as_string(self):
  919. # gh-5155, None should work as string when format demands it
  920. c = TextIO()
  921. c.write('100,foo,200\n300,None,400')
  922. c.seek(0)
  923. dt = np.dtype([('x', int), ('a', 'S10'), ('y', int)])
  924. np.loadtxt(c, delimiter=',', dtype=dt, comments=None) # Should succeed
  925. @pytest.mark.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968',
  926. reason="Wrong preferred encoding")
  927. def test_binary_load(self):
  928. butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\
  929. b"20,2,3,\xc3\x95scar\n\r"
  930. sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines()
  931. with temppath() as path:
  932. with open(path, "wb") as f:
  933. f.write(butf8)
  934. with open(path, "rb") as f:
  935. x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode)
  936. assert_array_equal(x, sutf8)
  937. # test broken latin1 conversion people now rely on
  938. with open(path, "rb") as f:
  939. x = np.loadtxt(f, encoding="UTF-8", dtype="S")
  940. x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar']
  941. assert_array_equal(x, np.array(x, dtype="S"))
  942. def test_max_rows(self):
  943. c = TextIO()
  944. c.write('1,2,3,5\n4,5,7,8\n2,1,4,5')
  945. c.seek(0)
  946. x = np.loadtxt(c, dtype=int, delimiter=',',
  947. max_rows=1)
  948. a = np.array([1, 2, 3, 5], int)
  949. assert_array_equal(x, a)
  950. def test_max_rows_with_skiprows(self):
  951. c = TextIO()
  952. c.write('comments\n1,2,3,5\n4,5,7,8\n2,1,4,5')
  953. c.seek(0)
  954. x = np.loadtxt(c, dtype=int, delimiter=',',
  955. skiprows=1, max_rows=1)
  956. a = np.array([1, 2, 3, 5], int)
  957. assert_array_equal(x, a)
  958. c = TextIO()
  959. c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5')
  960. c.seek(0)
  961. x = np.loadtxt(c, dtype=int, delimiter=',',
  962. skiprows=1, max_rows=2)
  963. a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int)
  964. assert_array_equal(x, a)
  965. def test_max_rows_with_read_continuation(self):
  966. c = TextIO()
  967. c.write('1,2,3,5\n4,5,7,8\n2,1,4,5')
  968. c.seek(0)
  969. x = np.loadtxt(c, dtype=int, delimiter=',',
  970. max_rows=2)
  971. a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int)
  972. assert_array_equal(x, a)
  973. # test continuation
  974. x = np.loadtxt(c, dtype=int, delimiter=',')
  975. a = np.array([2,1,4,5], int)
  976. assert_array_equal(x, a)
  977. def test_max_rows_larger(self):
  978. #test max_rows > num rows
  979. c = TextIO()
  980. c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5')
  981. c.seek(0)
  982. x = np.loadtxt(c, dtype=int, delimiter=',',
  983. skiprows=1, max_rows=6)
  984. a = np.array([[1, 2, 3, 5], [4, 5, 7, 8], [2, 1, 4, 5]], int)
  985. assert_array_equal(x, a)
  986. class Testfromregex(object):
  987. def test_record(self):
  988. c = TextIO()
  989. c.write('1.312 foo\n1.534 bar\n4.444 qux')
  990. c.seek(0)
  991. dt = [('num', np.float64), ('val', 'S3')]
  992. x = np.fromregex(c, r"([0-9.]+)\s+(...)", dt)
  993. a = np.array([(1.312, 'foo'), (1.534, 'bar'), (4.444, 'qux')],
  994. dtype=dt)
  995. assert_array_equal(x, a)
  996. def test_record_2(self):
  997. c = TextIO()
  998. c.write('1312 foo\n1534 bar\n4444 qux')
  999. c.seek(0)
  1000. dt = [('num', np.int32), ('val', 'S3')]
  1001. x = np.fromregex(c, r"(\d+)\s+(...)", dt)
  1002. a = np.array([(1312, 'foo'), (1534, 'bar'), (4444, 'qux')],
  1003. dtype=dt)
  1004. assert_array_equal(x, a)
  1005. def test_record_3(self):
  1006. c = TextIO()
  1007. c.write('1312 foo\n1534 bar\n4444 qux')
  1008. c.seek(0)
  1009. dt = [('num', np.float64)]
  1010. x = np.fromregex(c, r"(\d+)\s+...", dt)
  1011. a = np.array([(1312,), (1534,), (4444,)], dtype=dt)
  1012. assert_array_equal(x, a)
  1013. def test_record_unicode(self):
  1014. utf8 = b'\xcf\x96'
  1015. with temppath() as path:
  1016. with open(path, 'wb') as f:
  1017. f.write(b'1.312 foo' + utf8 + b' \n1.534 bar\n4.444 qux')
  1018. dt = [('num', np.float64), ('val', 'U4')]
  1019. x = np.fromregex(path, r"(?u)([0-9.]+)\s+(\w+)", dt, encoding='UTF-8')
  1020. a = np.array([(1.312, 'foo' + utf8.decode('UTF-8')), (1.534, 'bar'),
  1021. (4.444, 'qux')], dtype=dt)
  1022. assert_array_equal(x, a)
  1023. regexp = re.compile(r"([0-9.]+)\s+(\w+)", re.UNICODE)
  1024. x = np.fromregex(path, regexp, dt, encoding='UTF-8')
  1025. assert_array_equal(x, a)
  1026. def test_compiled_bytes(self):
  1027. regexp = re.compile(b'(\\d)')
  1028. c = BytesIO(b'123')
  1029. dt = [('num', np.float64)]
  1030. a = np.array([1, 2, 3], dtype=dt)
  1031. x = np.fromregex(c, regexp, dt)
  1032. assert_array_equal(x, a)
  1033. #####--------------------------------------------------------------------------
  1034. class TestFromTxt(LoadTxtBase):
  1035. loadfunc = staticmethod(np.genfromtxt)
  1036. def test_record(self):
  1037. # Test w/ explicit dtype
  1038. data = TextIO('1 2\n3 4')
  1039. test = np.ndfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)])
  1040. control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
  1041. assert_equal(test, control)
  1042. #
  1043. data = TextIO('M 64.0 75.0\nF 25.0 60.0')
  1044. descriptor = {'names': ('gender', 'age', 'weight'),
  1045. 'formats': ('S1', 'i4', 'f4')}
  1046. control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)],
  1047. dtype=descriptor)
  1048. test = np.ndfromtxt(data, dtype=descriptor)
  1049. assert_equal(test, control)
  1050. def test_array(self):
  1051. # Test outputting a standard ndarray
  1052. data = TextIO('1 2\n3 4')
  1053. control = np.array([[1, 2], [3, 4]], dtype=int)
  1054. test = np.ndfromtxt(data, dtype=int)
  1055. assert_array_equal(test, control)
  1056. #
  1057. data.seek(0)
  1058. control = np.array([[1, 2], [3, 4]], dtype=float)
  1059. test = np.loadtxt(data, dtype=float)
  1060. assert_array_equal(test, control)
  1061. def test_1D(self):
  1062. # Test squeezing to 1D
  1063. control = np.array([1, 2, 3, 4], int)
  1064. #
  1065. data = TextIO('1\n2\n3\n4\n')
  1066. test = np.ndfromtxt(data, dtype=int)
  1067. assert_array_equal(test, control)
  1068. #
  1069. data = TextIO('1,2,3,4\n')
  1070. test = np.ndfromtxt(data, dtype=int, delimiter=',')
  1071. assert_array_equal(test, control)
  1072. def test_comments(self):
  1073. # Test the stripping of comments
  1074. control = np.array([1, 2, 3, 5], int)
  1075. # Comment on its own line
  1076. data = TextIO('# comment\n1,2,3,5\n')
  1077. test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#')
  1078. assert_equal(test, control)
  1079. # Comment at the end of a line
  1080. data = TextIO('1,2,3,5# comment\n')
  1081. test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#')
  1082. assert_equal(test, control)
  1083. def test_skiprows(self):
  1084. # Test row skipping
  1085. control = np.array([1, 2, 3, 5], int)
  1086. kwargs = dict(dtype=int, delimiter=',')
  1087. #
  1088. data = TextIO('comment\n1,2,3,5\n')
  1089. test = np.ndfromtxt(data, skip_header=1, **kwargs)
  1090. assert_equal(test, control)
  1091. #
  1092. data = TextIO('# comment\n1,2,3,5\n')
  1093. test = np.loadtxt(data, skiprows=1, **kwargs)
  1094. assert_equal(test, control)
  1095. def test_skip_footer(self):
  1096. data = ["# %i" % i for i in range(1, 6)]
  1097. data.append("A, B, C")
  1098. data.extend(["%i,%3.1f,%03s" % (i, i, i) for i in range(51)])
  1099. data[-1] = "99,99"
  1100. kwargs = dict(delimiter=",", names=True, skip_header=5, skip_footer=10)
  1101. test = np.genfromtxt(TextIO("\n".join(data)), **kwargs)
  1102. ctrl = np.array([("%f" % i, "%f" % i, "%f" % i) for i in range(41)],
  1103. dtype=[(_, float) for _ in "ABC"])
  1104. assert_equal(test, ctrl)
  1105. def test_skip_footer_with_invalid(self):
  1106. with suppress_warnings() as sup:
  1107. sup.filter(ConversionWarning)
  1108. basestr = '1 1\n2 2\n3 3\n4 4\n5 \n6 \n7 \n'
  1109. # Footer too small to get rid of all invalid values
  1110. assert_raises(ValueError, np.genfromtxt,
  1111. TextIO(basestr), skip_footer=1)
  1112. # except ValueError:
  1113. # pass
  1114. a = np.genfromtxt(
  1115. TextIO(basestr), skip_footer=1, invalid_raise=False)
  1116. assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]))
  1117. #
  1118. a = np.genfromtxt(TextIO(basestr), skip_footer=3)
  1119. assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]))
  1120. #
  1121. basestr = '1 1\n2 \n3 3\n4 4\n5 \n6 6\n7 7\n'
  1122. a = np.genfromtxt(
  1123. TextIO(basestr), skip_footer=1, invalid_raise=False)
  1124. assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.], [6., 6.]]))
  1125. a = np.genfromtxt(
  1126. TextIO(basestr), skip_footer=3, invalid_raise=False)
  1127. assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.]]))
  1128. def test_header(self):
  1129. # Test retrieving a header
  1130. data = TextIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0')
  1131. with warnings.catch_warnings(record=True) as w:
  1132. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1133. test = np.ndfromtxt(data, dtype=None, names=True)
  1134. assert_(w[0].category is np.VisibleDeprecationWarning)
  1135. control = {'gender': np.array([b'M', b'F']),
  1136. 'age': np.array([64.0, 25.0]),
  1137. 'weight': np.array([75.0, 60.0])}
  1138. assert_equal(test['gender'], control['gender'])
  1139. assert_equal(test['age'], control['age'])
  1140. assert_equal(test['weight'], control['weight'])
  1141. def test_auto_dtype(self):
  1142. # Test the automatic definition of the output dtype
  1143. data = TextIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False')
  1144. with warnings.catch_warnings(record=True) as w:
  1145. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1146. test = np.ndfromtxt(data, dtype=None)
  1147. assert_(w[0].category is np.VisibleDeprecationWarning)
  1148. control = [np.array([b'A', b'BCD']),
  1149. np.array([64, 25]),
  1150. np.array([75.0, 60.0]),
  1151. np.array([3 + 4j, 5 + 6j]),
  1152. np.array([True, False]), ]
  1153. assert_equal(test.dtype.names, ['f0', 'f1', 'f2', 'f3', 'f4'])
  1154. for (i, ctrl) in enumerate(control):
  1155. assert_equal(test['f%i' % i], ctrl)
  1156. def test_auto_dtype_uniform(self):
  1157. # Tests whether the output dtype can be uniformized
  1158. data = TextIO('1 2 3 4\n5 6 7 8\n')
  1159. test = np.ndfromtxt(data, dtype=None)
  1160. control = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
  1161. assert_equal(test, control)
  1162. def test_fancy_dtype(self):
  1163. # Check that a nested dtype isn't MIA
  1164. data = TextIO('1,2,3.0\n4,5,6.0\n')
  1165. fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
  1166. test = np.ndfromtxt(data, dtype=fancydtype, delimiter=',')
  1167. control = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype)
  1168. assert_equal(test, control)
  1169. def test_names_overwrite(self):
  1170. # Test overwriting the names of the dtype
  1171. descriptor = {'names': ('g', 'a', 'w'),
  1172. 'formats': ('S1', 'i4', 'f4')}
  1173. data = TextIO(b'M 64.0 75.0\nF 25.0 60.0')
  1174. names = ('gender', 'age', 'weight')
  1175. test = np.ndfromtxt(data, dtype=descriptor, names=names)
  1176. descriptor['names'] = names
  1177. control = np.array([('M', 64.0, 75.0),
  1178. ('F', 25.0, 60.0)], dtype=descriptor)
  1179. assert_equal(test, control)
  1180. def test_commented_header(self):
  1181. # Check that names can be retrieved even if the line is commented out.
  1182. data = TextIO("""
  1183. #gender age weight
  1184. M 21 72.100000
  1185. F 35 58.330000
  1186. M 33 21.99
  1187. """)
  1188. # The # is part of the first name and should be deleted automatically.
  1189. with warnings.catch_warnings(record=True) as w:
  1190. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1191. test = np.genfromtxt(data, names=True, dtype=None)
  1192. assert_(w[0].category is np.VisibleDeprecationWarning)
  1193. ctrl = np.array([('M', 21, 72.1), ('F', 35, 58.33), ('M', 33, 21.99)],
  1194. dtype=[('gender', '|S1'), ('age', int), ('weight', float)])
  1195. assert_equal(test, ctrl)
  1196. # Ditto, but we should get rid of the first element
  1197. data = TextIO(b"""
  1198. # gender age weight
  1199. M 21 72.100000
  1200. F 35 58.330000
  1201. M 33 21.99
  1202. """)
  1203. with warnings.catch_warnings(record=True) as w:
  1204. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1205. test = np.genfromtxt(data, names=True, dtype=None)
  1206. assert_(w[0].category is np.VisibleDeprecationWarning)
  1207. assert_equal(test, ctrl)
  1208. def test_names_and_comments_none(self):
  1209. # Tests case when names is true but comments is None (gh-10780)
  1210. data = TextIO('col1 col2\n 1 2\n 3 4')
  1211. test = np.genfromtxt(data, dtype=(int, int), comments=None, names=True)
  1212. control = np.array([(1, 2), (3, 4)], dtype=[('col1', int), ('col2', int)])
  1213. assert_equal(test, control)
  1214. def test_autonames_and_usecols(self):
  1215. # Tests names and usecols
  1216. data = TextIO('A B C D\n aaaa 121 45 9.1')
  1217. with warnings.catch_warnings(record=True) as w:
  1218. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1219. test = np.ndfromtxt(data, usecols=('A', 'C', 'D'),
  1220. names=True, dtype=None)
  1221. assert_(w[0].category is np.VisibleDeprecationWarning)
  1222. control = np.array(('aaaa', 45, 9.1),
  1223. dtype=[('A', '|S4'), ('C', int), ('D', float)])
  1224. assert_equal(test, control)
  1225. def test_converters_with_usecols(self):
  1226. # Test the combination user-defined converters and usecol
  1227. data = TextIO('1,2,3,,5\n6,7,8,9,10\n')
  1228. test = np.ndfromtxt(data, dtype=int, delimiter=',',
  1229. converters={3: lambda s: int(s or - 999)},
  1230. usecols=(1, 3,))
  1231. control = np.array([[2, -999], [7, 9]], int)
  1232. assert_equal(test, control)
  1233. def test_converters_with_usecols_and_names(self):
  1234. # Tests names and usecols
  1235. data = TextIO('A B C D\n aaaa 121 45 9.1')
  1236. with warnings.catch_warnings(record=True) as w:
  1237. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1238. test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True,
  1239. dtype=None,
  1240. converters={'C': lambda s: 2 * int(s)})
  1241. assert_(w[0].category is np.VisibleDeprecationWarning)
  1242. control = np.array(('aaaa', 90, 9.1),
  1243. dtype=[('A', '|S4'), ('C', int), ('D', float)])
  1244. assert_equal(test, control)
  1245. def test_converters_cornercases(self):
  1246. # Test the conversion to datetime.
  1247. converter = {
  1248. 'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')}
  1249. data = TextIO('2009-02-03 12:00:00Z, 72214.0')
  1250. test = np.ndfromtxt(data, delimiter=',', dtype=None,
  1251. names=['date', 'stid'], converters=converter)
  1252. control = np.array((datetime(2009, 2, 3), 72214.),
  1253. dtype=[('date', np.object_), ('stid', float)])
  1254. assert_equal(test, control)
  1255. def test_converters_cornercases2(self):
  1256. # Test the conversion to datetime64.
  1257. converter = {
  1258. 'date': lambda s: np.datetime64(strptime(s, '%Y-%m-%d %H:%M:%SZ'))}
  1259. data = TextIO('2009-02-03 12:00:00Z, 72214.0')
  1260. test = np.ndfromtxt(data, delimiter=',', dtype=None,
  1261. names=['date', 'stid'], converters=converter)
  1262. control = np.array((datetime(2009, 2, 3), 72214.),
  1263. dtype=[('date', 'datetime64[us]'), ('stid', float)])
  1264. assert_equal(test, control)
  1265. def test_unused_converter(self):
  1266. # Test whether unused converters are forgotten
  1267. data = TextIO("1 21\n 3 42\n")
  1268. test = np.ndfromtxt(data, usecols=(1,),
  1269. converters={0: lambda s: int(s, 16)})
  1270. assert_equal(test, [21, 42])
  1271. #
  1272. data.seek(0)
  1273. test = np.ndfromtxt(data, usecols=(1,),
  1274. converters={1: lambda s: int(s, 16)})
  1275. assert_equal(test, [33, 66])
  1276. def test_invalid_converter(self):
  1277. strip_rand = lambda x: float((b'r' in x.lower() and x.split()[-1]) or
  1278. (b'r' not in x.lower() and x.strip() or 0.0))
  1279. strip_per = lambda x: float((b'%' in x.lower() and x.split()[0]) or
  1280. (b'%' not in x.lower() and x.strip() or 0.0))
  1281. s = TextIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n"
  1282. "L24U05,12/5/2003, 2 %,1,300, 150.5\r\n"
  1283. "D02N03,10/10/2004,R 1,,7,145.55")
  1284. kwargs = dict(
  1285. converters={2: strip_per, 3: strip_rand}, delimiter=",",
  1286. dtype=None)
  1287. assert_raises(ConverterError, np.genfromtxt, s, **kwargs)
  1288. def test_tricky_converter_bug1666(self):
  1289. # Test some corner cases
  1290. s = TextIO('q1,2\nq3,4')
  1291. cnv = lambda s: float(s[1:])
  1292. test = np.genfromtxt(s, delimiter=',', converters={0: cnv})
  1293. control = np.array([[1., 2.], [3., 4.]])
  1294. assert_equal(test, control)
  1295. def test_dtype_with_converters(self):
  1296. dstr = "2009; 23; 46"
  1297. test = np.ndfromtxt(TextIO(dstr,),
  1298. delimiter=";", dtype=float, converters={0: bytes})
  1299. control = np.array([('2009', 23., 46)],
  1300. dtype=[('f0', '|S4'), ('f1', float), ('f2', float)])
  1301. assert_equal(test, control)
  1302. test = np.ndfromtxt(TextIO(dstr,),
  1303. delimiter=";", dtype=float, converters={0: float})
  1304. control = np.array([2009., 23., 46],)
  1305. assert_equal(test, control)
  1306. def test_dtype_with_converters_and_usecols(self):
  1307. dstr = "1,5,-1,1:1\n2,8,-1,1:n\n3,3,-2,m:n\n"
  1308. dmap = {'1:1':0, '1:n':1, 'm:1':2, 'm:n':3}
  1309. dtyp = [('e1','i4'),('e2','i4'),('e3','i2'),('n', 'i1')]
  1310. conv = {0: int, 1: int, 2: int, 3: lambda r: dmap[r.decode()]}
  1311. test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',',
  1312. names=None, converters=conv)
  1313. control = np.rec.array([(1,5,-1,0), (2,8,-1,1), (3,3,-2,3)], dtype=dtyp)
  1314. assert_equal(test, control)
  1315. dtyp = [('e1','i4'),('e2','i4'),('n', 'i1')]
  1316. test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',',
  1317. usecols=(0,1,3), names=None, converters=conv)
  1318. control = np.rec.array([(1,5,0), (2,8,1), (3,3,3)], dtype=dtyp)
  1319. assert_equal(test, control)
  1320. def test_dtype_with_object(self):
  1321. # Test using an explicit dtype with an object
  1322. data = """ 1; 2001-01-01
  1323. 2; 2002-01-31 """
  1324. ndtype = [('idx', int), ('code', object)]
  1325. func = lambda s: strptime(s.strip(), "%Y-%m-%d")
  1326. converters = {1: func}
  1327. test = np.genfromtxt(TextIO(data), delimiter=";", dtype=ndtype,
  1328. converters=converters)
  1329. control = np.array(
  1330. [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))],
  1331. dtype=ndtype)
  1332. assert_equal(test, control)
  1333. ndtype = [('nest', [('idx', int), ('code', object)])]
  1334. with assert_raises_regex(NotImplementedError,
  1335. 'Nested fields.* not supported.*'):
  1336. test = np.genfromtxt(TextIO(data), delimiter=";",
  1337. dtype=ndtype, converters=converters)
  1338. # nested but empty fields also aren't supported
  1339. ndtype = [('idx', int), ('code', object), ('nest', [])]
  1340. with assert_raises_regex(NotImplementedError,
  1341. 'Nested fields.* not supported.*'):
  1342. test = np.genfromtxt(TextIO(data), delimiter=";",
  1343. dtype=ndtype, converters=converters)
  1344. def test_userconverters_with_explicit_dtype(self):
  1345. # Test user_converters w/ explicit (standard) dtype
  1346. data = TextIO('skip,skip,2001-01-01,1.0,skip')
  1347. test = np.genfromtxt(data, delimiter=",", names=None, dtype=float,
  1348. usecols=(2, 3), converters={2: bytes})
  1349. control = np.array([('2001-01-01', 1.)],
  1350. dtype=[('', '|S10'), ('', float)])
  1351. assert_equal(test, control)
  1352. def test_utf8_userconverters_with_explicit_dtype(self):
  1353. utf8 = b'\xcf\x96'
  1354. with temppath() as path:
  1355. with open(path, 'wb') as f:
  1356. f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip')
  1357. test = np.genfromtxt(path, delimiter=",", names=None, dtype=float,
  1358. usecols=(2, 3), converters={2: np.unicode},
  1359. encoding='UTF-8')
  1360. control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)],
  1361. dtype=[('', '|U11'), ('', float)])
  1362. assert_equal(test, control)
  1363. def test_spacedelimiter(self):
  1364. # Test space delimiter
  1365. data = TextIO("1 2 3 4 5\n6 7 8 9 10")
  1366. test = np.ndfromtxt(data)
  1367. control = np.array([[1., 2., 3., 4., 5.],
  1368. [6., 7., 8., 9., 10.]])
  1369. assert_equal(test, control)
  1370. def test_integer_delimiter(self):
  1371. # Test using an integer for delimiter
  1372. data = " 1 2 3\n 4 5 67\n890123 4"
  1373. test = np.genfromtxt(TextIO(data), delimiter=3)
  1374. control = np.array([[1, 2, 3], [4, 5, 67], [890, 123, 4]])
  1375. assert_equal(test, control)
  1376. def test_missing(self):
  1377. data = TextIO('1,2,3,,5\n')
  1378. test = np.ndfromtxt(data, dtype=int, delimiter=',',
  1379. converters={3: lambda s: int(s or - 999)})
  1380. control = np.array([1, 2, 3, -999, 5], int)
  1381. assert_equal(test, control)
  1382. def test_missing_with_tabs(self):
  1383. # Test w/ a delimiter tab
  1384. txt = "1\t2\t3\n\t2\t\n1\t\t3"
  1385. test = np.genfromtxt(TextIO(txt), delimiter="\t",
  1386. usemask=True,)
  1387. ctrl_d = np.array([(1, 2, 3), (np.nan, 2, np.nan), (1, np.nan, 3)],)
  1388. ctrl_m = np.array([(0, 0, 0), (1, 0, 1), (0, 1, 0)], dtype=bool)
  1389. assert_equal(test.data, ctrl_d)
  1390. assert_equal(test.mask, ctrl_m)
  1391. def test_usecols(self):
  1392. # Test the selection of columns
  1393. # Select 1 column
  1394. control = np.array([[1, 2], [3, 4]], float)
  1395. data = TextIO()
  1396. np.savetxt(data, control)
  1397. data.seek(0)
  1398. test = np.ndfromtxt(data, dtype=float, usecols=(1,))
  1399. assert_equal(test, control[:, 1])
  1400. #
  1401. control = np.array([[1, 2, 3], [3, 4, 5]], float)
  1402. data = TextIO()
  1403. np.savetxt(data, control)
  1404. data.seek(0)
  1405. test = np.ndfromtxt(data, dtype=float, usecols=(1, 2))
  1406. assert_equal(test, control[:, 1:])
  1407. # Testing with arrays instead of tuples.
  1408. data.seek(0)
  1409. test = np.ndfromtxt(data, dtype=float, usecols=np.array([1, 2]))
  1410. assert_equal(test, control[:, 1:])
  1411. def test_usecols_as_css(self):
  1412. # Test giving usecols with a comma-separated string
  1413. data = "1 2 3\n4 5 6"
  1414. test = np.genfromtxt(TextIO(data),
  1415. names="a, b, c", usecols="a, c")
  1416. ctrl = np.array([(1, 3), (4, 6)], dtype=[(_, float) for _ in "ac"])
  1417. assert_equal(test, ctrl)
  1418. def test_usecols_with_structured_dtype(self):
  1419. # Test usecols with an explicit structured dtype
  1420. data = TextIO("JOE 70.1 25.3\nBOB 60.5 27.9")
  1421. names = ['stid', 'temp']
  1422. dtypes = ['S4', 'f8']
  1423. test = np.ndfromtxt(
  1424. data, usecols=(0, 2), dtype=list(zip(names, dtypes)))
  1425. assert_equal(test['stid'], [b"JOE", b"BOB"])
  1426. assert_equal(test['temp'], [25.3, 27.9])
  1427. def test_usecols_with_integer(self):
  1428. # Test usecols with an integer
  1429. test = np.genfromtxt(TextIO(b"1 2 3\n4 5 6"), usecols=0)
  1430. assert_equal(test, np.array([1., 4.]))
  1431. def test_usecols_with_named_columns(self):
  1432. # Test usecols with named columns
  1433. ctrl = np.array([(1, 3), (4, 6)], dtype=[('a', float), ('c', float)])
  1434. data = "1 2 3\n4 5 6"
  1435. kwargs = dict(names="a, b, c")
  1436. test = np.genfromtxt(TextIO(data), usecols=(0, -1), **kwargs)
  1437. assert_equal(test, ctrl)
  1438. test = np.genfromtxt(TextIO(data),
  1439. usecols=('a', 'c'), **kwargs)
  1440. assert_equal(test, ctrl)
  1441. def test_empty_file(self):
  1442. # Test that an empty file raises the proper warning.
  1443. with suppress_warnings() as sup:
  1444. sup.filter(message="genfromtxt: Empty input file:")
  1445. data = TextIO()
  1446. test = np.genfromtxt(data)
  1447. assert_equal(test, np.array([]))
  1448. def test_fancy_dtype_alt(self):
  1449. # Check that a nested dtype isn't MIA
  1450. data = TextIO('1,2,3.0\n4,5,6.0\n')
  1451. fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
  1452. test = np.mafromtxt(data, dtype=fancydtype, delimiter=',')
  1453. control = ma.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype)
  1454. assert_equal(test, control)
  1455. def test_shaped_dtype(self):
  1456. c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6")
  1457. dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
  1458. ('block', int, (2, 3))])
  1459. x = np.ndfromtxt(c, dtype=dt)
  1460. a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])],
  1461. dtype=dt)
  1462. assert_array_equal(x, a)
  1463. def test_withmissing(self):
  1464. data = TextIO('A,B\n0,1\n2,N/A')
  1465. kwargs = dict(delimiter=",", missing_values="N/A", names=True)
  1466. test = np.mafromtxt(data, dtype=None, **kwargs)
  1467. control = ma.array([(0, 1), (2, -1)],
  1468. mask=[(False, False), (False, True)],
  1469. dtype=[('A', int), ('B', int)])
  1470. assert_equal(test, control)
  1471. assert_equal(test.mask, control.mask)
  1472. #
  1473. data.seek(0)
  1474. test = np.mafromtxt(data, **kwargs)
  1475. control = ma.array([(0, 1), (2, -1)],
  1476. mask=[(False, False), (False, True)],
  1477. dtype=[('A', float), ('B', float)])
  1478. assert_equal(test, control)
  1479. assert_equal(test.mask, control.mask)
  1480. def test_user_missing_values(self):
  1481. data = "A, B, C\n0, 0., 0j\n1, N/A, 1j\n-9, 2.2, N/A\n3, -99, 3j"
  1482. basekwargs = dict(dtype=None, delimiter=",", names=True,)
  1483. mdtype = [('A', int), ('B', float), ('C', complex)]
  1484. #
  1485. test = np.mafromtxt(TextIO(data), missing_values="N/A",
  1486. **basekwargs)
  1487. control = ma.array([(0, 0.0, 0j), (1, -999, 1j),
  1488. (-9, 2.2, -999j), (3, -99, 3j)],
  1489. mask=[(0, 0, 0), (0, 1, 0), (0, 0, 1), (0, 0, 0)],
  1490. dtype=mdtype)
  1491. assert_equal(test, control)
  1492. #
  1493. basekwargs['dtype'] = mdtype
  1494. test = np.mafromtxt(TextIO(data),
  1495. missing_values={0: -9, 1: -99, 2: -999j}, **basekwargs)
  1496. control = ma.array([(0, 0.0, 0j), (1, -999, 1j),
  1497. (-9, 2.2, -999j), (3, -99, 3j)],
  1498. mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)],
  1499. dtype=mdtype)
  1500. assert_equal(test, control)
  1501. #
  1502. test = np.mafromtxt(TextIO(data),
  1503. missing_values={0: -9, 'B': -99, 'C': -999j},
  1504. **basekwargs)
  1505. control = ma.array([(0, 0.0, 0j), (1, -999, 1j),
  1506. (-9, 2.2, -999j), (3, -99, 3j)],
  1507. mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)],
  1508. dtype=mdtype)
  1509. assert_equal(test, control)
  1510. def test_user_filling_values(self):
  1511. # Test with missing and filling values
  1512. ctrl = np.array([(0, 3), (4, -999)], dtype=[('a', int), ('b', int)])
  1513. data = "N/A, 2, 3\n4, ,???"
  1514. kwargs = dict(delimiter=",",
  1515. dtype=int,
  1516. names="a,b,c",
  1517. missing_values={0: "N/A", 'b': " ", 2: "???"},
  1518. filling_values={0: 0, 'b': 0, 2: -999})
  1519. test = np.genfromtxt(TextIO(data), **kwargs)
  1520. ctrl = np.array([(0, 2, 3), (4, 0, -999)],
  1521. dtype=[(_, int) for _ in "abc"])
  1522. assert_equal(test, ctrl)
  1523. #
  1524. test = np.genfromtxt(TextIO(data), usecols=(0, -1), **kwargs)
  1525. ctrl = np.array([(0, 3), (4, -999)], dtype=[(_, int) for _ in "ac"])
  1526. assert_equal(test, ctrl)
  1527. data2 = "1,2,*,4\n5,*,7,8\n"
  1528. test = np.genfromtxt(TextIO(data2), delimiter=',', dtype=int,
  1529. missing_values="*", filling_values=0)
  1530. ctrl = np.array([[1, 2, 0, 4], [5, 0, 7, 8]])
  1531. assert_equal(test, ctrl)
  1532. test = np.genfromtxt(TextIO(data2), delimiter=',', dtype=int,
  1533. missing_values="*", filling_values=-1)
  1534. ctrl = np.array([[1, 2, -1, 4], [5, -1, 7, 8]])
  1535. assert_equal(test, ctrl)
  1536. def test_withmissing_float(self):
  1537. data = TextIO('A,B\n0,1.5\n2,-999.00')
  1538. test = np.mafromtxt(data, dtype=None, delimiter=',',
  1539. missing_values='-999.0', names=True,)
  1540. control = ma.array([(0, 1.5), (2, -1.)],
  1541. mask=[(False, False), (False, True)],
  1542. dtype=[('A', int), ('B', float)])
  1543. assert_equal(test, control)
  1544. assert_equal(test.mask, control.mask)
  1545. def test_with_masked_column_uniform(self):
  1546. # Test masked column
  1547. data = TextIO('1 2 3\n4 5 6\n')
  1548. test = np.genfromtxt(data, dtype=None,
  1549. missing_values='2,5', usemask=True)
  1550. control = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[0, 1, 0], [0, 1, 0]])
  1551. assert_equal(test, control)
  1552. def test_with_masked_column_various(self):
  1553. # Test masked column
  1554. data = TextIO('True 2 3\nFalse 5 6\n')
  1555. test = np.genfromtxt(data, dtype=None,
  1556. missing_values='2,5', usemask=True)
  1557. control = ma.array([(1, 2, 3), (0, 5, 6)],
  1558. mask=[(0, 1, 0), (0, 1, 0)],
  1559. dtype=[('f0', bool), ('f1', bool), ('f2', int)])
  1560. assert_equal(test, control)
  1561. def test_invalid_raise(self):
  1562. # Test invalid raise
  1563. data = ["1, 1, 1, 1, 1"] * 50
  1564. for i in range(5):
  1565. data[10 * i] = "2, 2, 2, 2 2"
  1566. data.insert(0, "a, b, c, d, e")
  1567. mdata = TextIO("\n".join(data))
  1568. #
  1569. kwargs = dict(delimiter=",", dtype=None, names=True)
  1570. # XXX: is there a better way to get the return value of the
  1571. # callable in assert_warns ?
  1572. ret = {}
  1573. def f(_ret={}):
  1574. _ret['mtest'] = np.ndfromtxt(mdata, invalid_raise=False, **kwargs)
  1575. assert_warns(ConversionWarning, f, _ret=ret)
  1576. mtest = ret['mtest']
  1577. assert_equal(len(mtest), 45)
  1578. assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'abcde']))
  1579. #
  1580. mdata.seek(0)
  1581. assert_raises(ValueError, np.ndfromtxt, mdata,
  1582. delimiter=",", names=True)
  1583. def test_invalid_raise_with_usecols(self):
  1584. # Test invalid_raise with usecols
  1585. data = ["1, 1, 1, 1, 1"] * 50
  1586. for i in range(5):
  1587. data[10 * i] = "2, 2, 2, 2 2"
  1588. data.insert(0, "a, b, c, d, e")
  1589. mdata = TextIO("\n".join(data))
  1590. kwargs = dict(delimiter=",", dtype=None, names=True,
  1591. invalid_raise=False)
  1592. # XXX: is there a better way to get the return value of the
  1593. # callable in assert_warns ?
  1594. ret = {}
  1595. def f(_ret={}):
  1596. _ret['mtest'] = np.ndfromtxt(mdata, usecols=(0, 4), **kwargs)
  1597. assert_warns(ConversionWarning, f, _ret=ret)
  1598. mtest = ret['mtest']
  1599. assert_equal(len(mtest), 45)
  1600. assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'ae']))
  1601. #
  1602. mdata.seek(0)
  1603. mtest = np.ndfromtxt(mdata, usecols=(0, 1), **kwargs)
  1604. assert_equal(len(mtest), 50)
  1605. control = np.ones(50, dtype=[(_, int) for _ in 'ab'])
  1606. control[[10 * _ for _ in range(5)]] = (2, 2)
  1607. assert_equal(mtest, control)
  1608. def test_inconsistent_dtype(self):
  1609. # Test inconsistent dtype
  1610. data = ["1, 1, 1, 1, -1.1"] * 50
  1611. mdata = TextIO("\n".join(data))
  1612. converters = {4: lambda x: "(%s)" % x}
  1613. kwargs = dict(delimiter=",", converters=converters,
  1614. dtype=[(_, int) for _ in 'abcde'],)
  1615. assert_raises(ValueError, np.genfromtxt, mdata, **kwargs)
  1616. def test_default_field_format(self):
  1617. # Test default format
  1618. data = "0, 1, 2.3\n4, 5, 6.7"
  1619. mtest = np.ndfromtxt(TextIO(data),
  1620. delimiter=",", dtype=None, defaultfmt="f%02i")
  1621. ctrl = np.array([(0, 1, 2.3), (4, 5, 6.7)],
  1622. dtype=[("f00", int), ("f01", int), ("f02", float)])
  1623. assert_equal(mtest, ctrl)
  1624. def test_single_dtype_wo_names(self):
  1625. # Test single dtype w/o names
  1626. data = "0, 1, 2.3\n4, 5, 6.7"
  1627. mtest = np.ndfromtxt(TextIO(data),
  1628. delimiter=",", dtype=float, defaultfmt="f%02i")
  1629. ctrl = np.array([[0., 1., 2.3], [4., 5., 6.7]], dtype=float)
  1630. assert_equal(mtest, ctrl)
  1631. def test_single_dtype_w_explicit_names(self):
  1632. # Test single dtype w explicit names
  1633. data = "0, 1, 2.3\n4, 5, 6.7"
  1634. mtest = np.ndfromtxt(TextIO(data),
  1635. delimiter=",", dtype=float, names="a, b, c")
  1636. ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)],
  1637. dtype=[(_, float) for _ in "abc"])
  1638. assert_equal(mtest, ctrl)
  1639. def test_single_dtype_w_implicit_names(self):
  1640. # Test single dtype w implicit names
  1641. data = "a, b, c\n0, 1, 2.3\n4, 5, 6.7"
  1642. mtest = np.ndfromtxt(TextIO(data),
  1643. delimiter=",", dtype=float, names=True)
  1644. ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)],
  1645. dtype=[(_, float) for _ in "abc"])
  1646. assert_equal(mtest, ctrl)
  1647. def test_easy_structured_dtype(self):
  1648. # Test easy structured dtype
  1649. data = "0, 1, 2.3\n4, 5, 6.7"
  1650. mtest = np.ndfromtxt(TextIO(data), delimiter=",",
  1651. dtype=(int, float, float), defaultfmt="f_%02i")
  1652. ctrl = np.array([(0, 1., 2.3), (4, 5., 6.7)],
  1653. dtype=[("f_00", int), ("f_01", float), ("f_02", float)])
  1654. assert_equal(mtest, ctrl)
  1655. def test_autostrip(self):
  1656. # Test autostrip
  1657. data = "01/01/2003 , 1.3, abcde"
  1658. kwargs = dict(delimiter=",", dtype=None)
  1659. with warnings.catch_warnings(record=True) as w:
  1660. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1661. mtest = np.ndfromtxt(TextIO(data), **kwargs)
  1662. assert_(w[0].category is np.VisibleDeprecationWarning)
  1663. ctrl = np.array([('01/01/2003 ', 1.3, ' abcde')],
  1664. dtype=[('f0', '|S12'), ('f1', float), ('f2', '|S8')])
  1665. assert_equal(mtest, ctrl)
  1666. with warnings.catch_warnings(record=True) as w:
  1667. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1668. mtest = np.ndfromtxt(TextIO(data), autostrip=True, **kwargs)
  1669. assert_(w[0].category is np.VisibleDeprecationWarning)
  1670. ctrl = np.array([('01/01/2003', 1.3, 'abcde')],
  1671. dtype=[('f0', '|S10'), ('f1', float), ('f2', '|S5')])
  1672. assert_equal(mtest, ctrl)
  1673. def test_replace_space(self):
  1674. # Test the 'replace_space' option
  1675. txt = "A.A, B (B), C:C\n1, 2, 3.14"
  1676. # Test default: replace ' ' by '_' and delete non-alphanum chars
  1677. test = np.genfromtxt(TextIO(txt),
  1678. delimiter=",", names=True, dtype=None)
  1679. ctrl_dtype = [("AA", int), ("B_B", int), ("CC", float)]
  1680. ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype)
  1681. assert_equal(test, ctrl)
  1682. # Test: no replace, no delete
  1683. test = np.genfromtxt(TextIO(txt),
  1684. delimiter=",", names=True, dtype=None,
  1685. replace_space='', deletechars='')
  1686. ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", float)]
  1687. ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype)
  1688. assert_equal(test, ctrl)
  1689. # Test: no delete (spaces are replaced by _)
  1690. test = np.genfromtxt(TextIO(txt),
  1691. delimiter=",", names=True, dtype=None,
  1692. deletechars='')
  1693. ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", float)]
  1694. ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype)
  1695. assert_equal(test, ctrl)
  1696. def test_replace_space_known_dtype(self):
  1697. # Test the 'replace_space' (and related) options when dtype != None
  1698. txt = "A.A, B (B), C:C\n1, 2, 3"
  1699. # Test default: replace ' ' by '_' and delete non-alphanum chars
  1700. test = np.genfromtxt(TextIO(txt),
  1701. delimiter=",", names=True, dtype=int)
  1702. ctrl_dtype = [("AA", int), ("B_B", int), ("CC", int)]
  1703. ctrl = np.array((1, 2, 3), dtype=ctrl_dtype)
  1704. assert_equal(test, ctrl)
  1705. # Test: no replace, no delete
  1706. test = np.genfromtxt(TextIO(txt),
  1707. delimiter=",", names=True, dtype=int,
  1708. replace_space='', deletechars='')
  1709. ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", int)]
  1710. ctrl = np.array((1, 2, 3), dtype=ctrl_dtype)
  1711. assert_equal(test, ctrl)
  1712. # Test: no delete (spaces are replaced by _)
  1713. test = np.genfromtxt(TextIO(txt),
  1714. delimiter=",", names=True, dtype=int,
  1715. deletechars='')
  1716. ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", int)]
  1717. ctrl = np.array((1, 2, 3), dtype=ctrl_dtype)
  1718. assert_equal(test, ctrl)
  1719. def test_incomplete_names(self):
  1720. # Test w/ incomplete names
  1721. data = "A,,C\n0,1,2\n3,4,5"
  1722. kwargs = dict(delimiter=",", names=True)
  1723. # w/ dtype=None
  1724. ctrl = np.array([(0, 1, 2), (3, 4, 5)],
  1725. dtype=[(_, int) for _ in ('A', 'f0', 'C')])
  1726. test = np.ndfromtxt(TextIO(data), dtype=None, **kwargs)
  1727. assert_equal(test, ctrl)
  1728. # w/ default dtype
  1729. ctrl = np.array([(0, 1, 2), (3, 4, 5)],
  1730. dtype=[(_, float) for _ in ('A', 'f0', 'C')])
  1731. test = np.ndfromtxt(TextIO(data), **kwargs)
  1732. def test_names_auto_completion(self):
  1733. # Make sure that names are properly completed
  1734. data = "1 2 3\n 4 5 6"
  1735. test = np.genfromtxt(TextIO(data),
  1736. dtype=(int, float, int), names="a")
  1737. ctrl = np.array([(1, 2, 3), (4, 5, 6)],
  1738. dtype=[('a', int), ('f0', float), ('f1', int)])
  1739. assert_equal(test, ctrl)
  1740. def test_names_with_usecols_bug1636(self):
  1741. # Make sure we pick up the right names w/ usecols
  1742. data = "A,B,C,D,E\n0,1,2,3,4\n0,1,2,3,4\n0,1,2,3,4"
  1743. ctrl_names = ("A", "C", "E")
  1744. test = np.genfromtxt(TextIO(data),
  1745. dtype=(int, int, int), delimiter=",",
  1746. usecols=(0, 2, 4), names=True)
  1747. assert_equal(test.dtype.names, ctrl_names)
  1748. #
  1749. test = np.genfromtxt(TextIO(data),
  1750. dtype=(int, int, int), delimiter=",",
  1751. usecols=("A", "C", "E"), names=True)
  1752. assert_equal(test.dtype.names, ctrl_names)
  1753. #
  1754. test = np.genfromtxt(TextIO(data),
  1755. dtype=int, delimiter=",",
  1756. usecols=("A", "C", "E"), names=True)
  1757. assert_equal(test.dtype.names, ctrl_names)
  1758. def test_fixed_width_names(self):
  1759. # Test fix-width w/ names
  1760. data = " A B C\n 0 1 2.3\n 45 67 9."
  1761. kwargs = dict(delimiter=(5, 5, 4), names=True, dtype=None)
  1762. ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)],
  1763. dtype=[('A', int), ('B', int), ('C', float)])
  1764. test = np.ndfromtxt(TextIO(data), **kwargs)
  1765. assert_equal(test, ctrl)
  1766. #
  1767. kwargs = dict(delimiter=5, names=True, dtype=None)
  1768. ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)],
  1769. dtype=[('A', int), ('B', int), ('C', float)])
  1770. test = np.ndfromtxt(TextIO(data), **kwargs)
  1771. assert_equal(test, ctrl)
  1772. def test_filling_values(self):
  1773. # Test missing values
  1774. data = b"1, 2, 3\n1, , 5\n0, 6, \n"
  1775. kwargs = dict(delimiter=",", dtype=None, filling_values=-999)
  1776. ctrl = np.array([[1, 2, 3], [1, -999, 5], [0, 6, -999]], dtype=int)
  1777. test = np.ndfromtxt(TextIO(data), **kwargs)
  1778. assert_equal(test, ctrl)
  1779. def test_comments_is_none(self):
  1780. # Github issue 329 (None was previously being converted to 'None').
  1781. with warnings.catch_warnings(record=True) as w:
  1782. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1783. test = np.genfromtxt(TextIO("test1,testNonetherestofthedata"),
  1784. dtype=None, comments=None, delimiter=',')
  1785. assert_(w[0].category is np.VisibleDeprecationWarning)
  1786. assert_equal(test[1], b'testNonetherestofthedata')
  1787. with warnings.catch_warnings(record=True) as w:
  1788. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1789. test = np.genfromtxt(TextIO("test1, testNonetherestofthedata"),
  1790. dtype=None, comments=None, delimiter=',')
  1791. assert_(w[0].category is np.VisibleDeprecationWarning)
  1792. assert_equal(test[1], b' testNonetherestofthedata')
  1793. def test_latin1(self):
  1794. latin1 = b'\xf6\xfc\xf6'
  1795. norm = b"norm1,norm2,norm3\n"
  1796. enc = b"test1,testNonethe" + latin1 + b",test3\n"
  1797. s = norm + enc + norm
  1798. with warnings.catch_warnings(record=True) as w:
  1799. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1800. test = np.genfromtxt(TextIO(s),
  1801. dtype=None, comments=None, delimiter=',')
  1802. assert_(w[0].category is np.VisibleDeprecationWarning)
  1803. assert_equal(test[1, 0], b"test1")
  1804. assert_equal(test[1, 1], b"testNonethe" + latin1)
  1805. assert_equal(test[1, 2], b"test3")
  1806. test = np.genfromtxt(TextIO(s),
  1807. dtype=None, comments=None, delimiter=',',
  1808. encoding='latin1')
  1809. assert_equal(test[1, 0], u"test1")
  1810. assert_equal(test[1, 1], u"testNonethe" + latin1.decode('latin1'))
  1811. assert_equal(test[1, 2], u"test3")
  1812. with warnings.catch_warnings(record=True) as w:
  1813. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1814. test = np.genfromtxt(TextIO(b"0,testNonethe" + latin1),
  1815. dtype=None, comments=None, delimiter=',')
  1816. assert_(w[0].category is np.VisibleDeprecationWarning)
  1817. assert_equal(test['f0'], 0)
  1818. assert_equal(test['f1'], b"testNonethe" + latin1)
  1819. def test_binary_decode_autodtype(self):
  1820. utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04'
  1821. v = self.loadfunc(BytesIO(utf16), dtype=None, encoding='UTF-16')
  1822. assert_array_equal(v, np.array(utf16.decode('UTF-16').split()))
  1823. def test_utf8_byte_encoding(self):
  1824. utf8 = b"\xcf\x96"
  1825. norm = b"norm1,norm2,norm3\n"
  1826. enc = b"test1,testNonethe" + utf8 + b",test3\n"
  1827. s = norm + enc + norm
  1828. with warnings.catch_warnings(record=True) as w:
  1829. warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
  1830. test = np.genfromtxt(TextIO(s),
  1831. dtype=None, comments=None, delimiter=',')
  1832. assert_(w[0].category is np.VisibleDeprecationWarning)
  1833. ctl = np.array([
  1834. [b'norm1', b'norm2', b'norm3'],
  1835. [b'test1', b'testNonethe' + utf8, b'test3'],
  1836. [b'norm1', b'norm2', b'norm3']])
  1837. assert_array_equal(test, ctl)
  1838. def test_utf8_file(self):
  1839. utf8 = b"\xcf\x96"
  1840. with temppath() as path:
  1841. with open(path, "wb") as f:
  1842. f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
  1843. test = np.genfromtxt(path, dtype=None, comments=None,
  1844. delimiter=',', encoding="UTF-8")
  1845. ctl = np.array([
  1846. ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
  1847. ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
  1848. dtype=np.unicode)
  1849. assert_array_equal(test, ctl)
  1850. # test a mixed dtype
  1851. with open(path, "wb") as f:
  1852. f.write(b"0,testNonethe" + utf8)
  1853. test = np.genfromtxt(path, dtype=None, comments=None,
  1854. delimiter=',', encoding="UTF-8")
  1855. assert_equal(test['f0'], 0)
  1856. assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8"))
  1857. def test_utf8_file_nodtype_unicode(self):
  1858. # bytes encoding with non-latin1 -> unicode upcast
  1859. utf8 = u'\u03d6'
  1860. latin1 = u'\xf6\xfc\xf6'
  1861. # skip test if cannot encode utf8 test string with preferred
  1862. # encoding. The preferred encoding is assumed to be the default
  1863. # encoding of io.open. Will need to change this for PyTest, maybe
  1864. # using pytest.mark.xfail(raises=***).
  1865. try:
  1866. encoding = locale.getpreferredencoding()
  1867. utf8.encode(encoding)
  1868. except (UnicodeError, ImportError):
  1869. pytest.skip('Skipping test_utf8_file_nodtype_unicode, '
  1870. 'unable to encode utf8 in preferred encoding')
  1871. with temppath() as path:
  1872. with io.open(path, "wt") as f:
  1873. f.write(u"norm1,norm2,norm3\n")
  1874. f.write(u"norm1," + latin1 + u",norm3\n")
  1875. f.write(u"test1,testNonethe" + utf8 + u",test3\n")
  1876. with warnings.catch_warnings(record=True) as w:
  1877. warnings.filterwarnings('always', '',
  1878. np.VisibleDeprecationWarning)
  1879. test = np.genfromtxt(path, dtype=None, comments=None,
  1880. delimiter=',')
  1881. # Check for warning when encoding not specified.
  1882. assert_(w[0].category is np.VisibleDeprecationWarning)
  1883. ctl = np.array([
  1884. ["norm1", "norm2", "norm3"],
  1885. ["norm1", latin1, "norm3"],
  1886. ["test1", "testNonethe" + utf8, "test3"]],
  1887. dtype=np.unicode)
  1888. assert_array_equal(test, ctl)
  1889. def test_recfromtxt(self):
  1890. #
  1891. data = TextIO('A,B\n0,1\n2,3')
  1892. kwargs = dict(delimiter=",", missing_values="N/A", names=True)
  1893. test = np.recfromtxt(data, **kwargs)
  1894. control = np.array([(0, 1), (2, 3)],
  1895. dtype=[('A', int), ('B', int)])
  1896. assert_(isinstance(test, np.recarray))
  1897. assert_equal(test, control)
  1898. #
  1899. data = TextIO('A,B\n0,1\n2,N/A')
  1900. test = np.recfromtxt(data, dtype=None, usemask=True, **kwargs)
  1901. control = ma.array([(0, 1), (2, -1)],
  1902. mask=[(False, False), (False, True)],
  1903. dtype=[('A', int), ('B', int)])
  1904. assert_equal(test, control)
  1905. assert_equal(test.mask, control.mask)
  1906. assert_equal(test.A, [0, 2])
  1907. def test_recfromcsv(self):
  1908. #
  1909. data = TextIO('A,B\n0,1\n2,3')
  1910. kwargs = dict(missing_values="N/A", names=True, case_sensitive=True)
  1911. test = np.recfromcsv(data, dtype=None, **kwargs)
  1912. control = np.array([(0, 1), (2, 3)],
  1913. dtype=[('A', int), ('B', int)])
  1914. assert_(isinstance(test, np.recarray))
  1915. assert_equal(test, control)
  1916. #
  1917. data = TextIO('A,B\n0,1\n2,N/A')
  1918. test = np.recfromcsv(data, dtype=None, usemask=True, **kwargs)
  1919. control = ma.array([(0, 1), (2, -1)],
  1920. mask=[(False, False), (False, True)],
  1921. dtype=[('A', int), ('B', int)])
  1922. assert_equal(test, control)
  1923. assert_equal(test.mask, control.mask)
  1924. assert_equal(test.A, [0, 2])
  1925. #
  1926. data = TextIO('A,B\n0,1\n2,3')
  1927. test = np.recfromcsv(data, missing_values='N/A',)
  1928. control = np.array([(0, 1), (2, 3)],
  1929. dtype=[('a', int), ('b', int)])
  1930. assert_(isinstance(test, np.recarray))
  1931. assert_equal(test, control)
  1932. #
  1933. data = TextIO('A,B\n0,1\n2,3')
  1934. dtype = [('a', int), ('b', float)]
  1935. test = np.recfromcsv(data, missing_values='N/A', dtype=dtype)
  1936. control = np.array([(0, 1), (2, 3)],
  1937. dtype=dtype)
  1938. assert_(isinstance(test, np.recarray))
  1939. assert_equal(test, control)
  1940. #gh-10394
  1941. data = TextIO('color\n"red"\n"blue"')
  1942. test = np.recfromcsv(data, converters={0: lambda x: x.strip(b'\"')})
  1943. control = np.array([('red',), ('blue',)], dtype=[('color', (bytes, 4))])
  1944. assert_equal(test.dtype, control.dtype)
  1945. assert_equal(test, control)
  1946. def test_max_rows(self):
  1947. # Test the `max_rows` keyword argument.
  1948. data = '1 2\n3 4\n5 6\n7 8\n9 10\n'
  1949. txt = TextIO(data)
  1950. a1 = np.genfromtxt(txt, max_rows=3)
  1951. a2 = np.genfromtxt(txt)
  1952. assert_equal(a1, [[1, 2], [3, 4], [5, 6]])
  1953. assert_equal(a2, [[7, 8], [9, 10]])
  1954. # max_rows must be at least 1.
  1955. assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=0)
  1956. # An input with several invalid rows.
  1957. data = '1 1\n2 2\n0 \n3 3\n4 4\n5 \n6 \n7 \n'
  1958. test = np.genfromtxt(TextIO(data), max_rows=2)
  1959. control = np.array([[1., 1.], [2., 2.]])
  1960. assert_equal(test, control)
  1961. # Test keywords conflict
  1962. assert_raises(ValueError, np.genfromtxt, TextIO(data), skip_footer=1,
  1963. max_rows=4)
  1964. # Test with invalid value
  1965. assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=4)
  1966. # Test with invalid not raise
  1967. with suppress_warnings() as sup:
  1968. sup.filter(ConversionWarning)
  1969. test = np.genfromtxt(TextIO(data), max_rows=4, invalid_raise=False)
  1970. control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])
  1971. assert_equal(test, control)
  1972. test = np.genfromtxt(TextIO(data), max_rows=5, invalid_raise=False)
  1973. control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])
  1974. assert_equal(test, control)
  1975. # Structured array with field names.
  1976. data = 'a b\n#c d\n1 1\n2 2\n#0 \n3 3\n4 4\n5 5\n'
  1977. # Test with header, names and comments
  1978. txt = TextIO(data)
  1979. test = np.genfromtxt(txt, skip_header=1, max_rows=3, names=True)
  1980. control = np.array([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)],
  1981. dtype=[('c', '<f8'), ('d', '<f8')])
  1982. assert_equal(test, control)
  1983. # To continue reading the same "file", don't use skip_header or
  1984. # names, and use the previously determined dtype.
  1985. test = np.genfromtxt(txt, max_rows=None, dtype=test.dtype)
  1986. control = np.array([(4.0, 4.0), (5.0, 5.0)],
  1987. dtype=[('c', '<f8'), ('d', '<f8')])
  1988. assert_equal(test, control)
  1989. def test_gft_using_filename(self):
  1990. # Test that we can load data from a filename as well as a file
  1991. # object
  1992. tgt = np.arange(6).reshape((2, 3))
  1993. linesep = ('\n', '\r\n', '\r')
  1994. for sep in linesep:
  1995. data = '0 1 2' + sep + '3 4 5'
  1996. with temppath() as name:
  1997. with open(name, 'w') as f:
  1998. f.write(data)
  1999. res = np.genfromtxt(name)
  2000. assert_array_equal(res, tgt)
  2001. def test_gft_from_gzip(self):
  2002. # Test that we can load data from a gzipped file
  2003. wanted = np.arange(6).reshape((2, 3))
  2004. linesep = ('\n', '\r\n', '\r')
  2005. for sep in linesep:
  2006. data = '0 1 2' + sep + '3 4 5'
  2007. s = BytesIO()
  2008. with gzip.GzipFile(fileobj=s, mode='w') as g:
  2009. g.write(asbytes(data))
  2010. with temppath(suffix='.gz2') as name:
  2011. with open(name, 'w') as f:
  2012. f.write(data)
  2013. assert_array_equal(np.genfromtxt(name), wanted)
  2014. def test_gft_using_generator(self):
  2015. # gft doesn't work with unicode.
  2016. def count():
  2017. for i in range(10):
  2018. yield asbytes("%d" % i)
  2019. res = np.genfromtxt(count())
  2020. assert_array_equal(res, np.arange(10))
  2021. def test_auto_dtype_largeint(self):
  2022. # Regression test for numpy/numpy#5635 whereby large integers could
  2023. # cause OverflowErrors.
  2024. # Test the automatic definition of the output dtype
  2025. #
  2026. # 2**66 = 73786976294838206464 => should convert to float
  2027. # 2**34 = 17179869184 => should convert to int64
  2028. # 2**10 = 1024 => should convert to int (int32 on 32-bit systems,
  2029. # int64 on 64-bit systems)
  2030. data = TextIO('73786976294838206464 17179869184 1024')
  2031. test = np.ndfromtxt(data, dtype=None)
  2032. assert_equal(test.dtype.names, ['f0', 'f1', 'f2'])
  2033. assert_(test.dtype['f0'] == float)
  2034. assert_(test.dtype['f1'] == np.int64)
  2035. assert_(test.dtype['f2'] == np.integer)
  2036. assert_allclose(test['f0'], 73786976294838206464.)
  2037. assert_equal(test['f1'], 17179869184)
  2038. assert_equal(test['f2'], 1024)
  2039. @pytest.mark.skipif(Path is None, reason="No pathlib.Path")
  2040. class TestPathUsage(object):
  2041. # Test that pathlib.Path can be used
  2042. def test_loadtxt(self):
  2043. with temppath(suffix='.txt') as path:
  2044. path = Path(path)
  2045. a = np.array([[1.1, 2], [3, 4]])
  2046. np.savetxt(path, a)
  2047. x = np.loadtxt(path)
  2048. assert_array_equal(x, a)
  2049. def test_save_load(self):
  2050. # Test that pathlib.Path instances can be used with save.
  2051. with temppath(suffix='.npy') as path:
  2052. path = Path(path)
  2053. a = np.array([[1, 2], [3, 4]], int)
  2054. np.save(path, a)
  2055. data = np.load(path)
  2056. assert_array_equal(data, a)
  2057. def test_save_load_memmap(self):
  2058. # Test that pathlib.Path instances can be loaded mem-mapped.
  2059. with temppath(suffix='.npy') as path:
  2060. path = Path(path)
  2061. a = np.array([[1, 2], [3, 4]], int)
  2062. np.save(path, a)
  2063. data = np.load(path, mmap_mode='r')
  2064. assert_array_equal(data, a)
  2065. # close the mem-mapped file
  2066. del data
  2067. def test_save_load_memmap_readwrite(self):
  2068. # Test that pathlib.Path instances can be written mem-mapped.
  2069. with temppath(suffix='.npy') as path:
  2070. path = Path(path)
  2071. a = np.array([[1, 2], [3, 4]], int)
  2072. np.save(path, a)
  2073. b = np.load(path, mmap_mode='r+')
  2074. a[0][0] = 5
  2075. b[0][0] = 5
  2076. del b # closes the file
  2077. data = np.load(path)
  2078. assert_array_equal(data, a)
  2079. def test_savez_load(self):
  2080. # Test that pathlib.Path instances can be used with savez.
  2081. with temppath(suffix='.npz') as path:
  2082. path = Path(path)
  2083. np.savez(path, lab='place holder')
  2084. with np.load(path) as data:
  2085. assert_array_equal(data['lab'], 'place holder')
  2086. def test_savez_compressed_load(self):
  2087. # Test that pathlib.Path instances can be used with savez.
  2088. with temppath(suffix='.npz') as path:
  2089. path = Path(path)
  2090. np.savez_compressed(path, lab='place holder')
  2091. data = np.load(path)
  2092. assert_array_equal(data['lab'], 'place holder')
  2093. data.close()
  2094. def test_genfromtxt(self):
  2095. with temppath(suffix='.txt') as path:
  2096. path = Path(path)
  2097. a = np.array([(1, 2), (3, 4)])
  2098. np.savetxt(path, a)
  2099. data = np.genfromtxt(path)
  2100. assert_array_equal(a, data)
  2101. def test_ndfromtxt(self):
  2102. # Test outputting a standard ndarray
  2103. with temppath(suffix='.txt') as path:
  2104. path = Path(path)
  2105. with path.open('w') as f:
  2106. f.write(u'1 2\n3 4')
  2107. control = np.array([[1, 2], [3, 4]], dtype=int)
  2108. test = np.ndfromtxt(path, dtype=int)
  2109. assert_array_equal(test, control)
  2110. def test_mafromtxt(self):
  2111. # From `test_fancy_dtype_alt` above
  2112. with temppath(suffix='.txt') as path:
  2113. path = Path(path)
  2114. with path.open('w') as f:
  2115. f.write(u'1,2,3.0\n4,5,6.0\n')
  2116. test = np.mafromtxt(path, delimiter=',')
  2117. control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)])
  2118. assert_equal(test, control)
  2119. def test_recfromtxt(self):
  2120. with temppath(suffix='.txt') as path:
  2121. path = Path(path)
  2122. with path.open('w') as f:
  2123. f.write(u'A,B\n0,1\n2,3')
  2124. kwargs = dict(delimiter=",", missing_values="N/A", names=True)
  2125. test = np.recfromtxt(path, **kwargs)
  2126. control = np.array([(0, 1), (2, 3)],
  2127. dtype=[('A', int), ('B', int)])
  2128. assert_(isinstance(test, np.recarray))
  2129. assert_equal(test, control)
  2130. def test_recfromcsv(self):
  2131. with temppath(suffix='.txt') as path:
  2132. path = Path(path)
  2133. with path.open('w') as f:
  2134. f.write(u'A,B\n0,1\n2,3')
  2135. kwargs = dict(missing_values="N/A", names=True, case_sensitive=True)
  2136. test = np.recfromcsv(path, dtype=None, **kwargs)
  2137. control = np.array([(0, 1), (2, 3)],
  2138. dtype=[('A', int), ('B', int)])
  2139. assert_(isinstance(test, np.recarray))
  2140. assert_equal(test, control)
  2141. def test_gzip_load():
  2142. a = np.random.random((5, 5))
  2143. s = BytesIO()
  2144. f = gzip.GzipFile(fileobj=s, mode="w")
  2145. np.save(f, a)
  2146. f.close()
  2147. s.seek(0)
  2148. f = gzip.GzipFile(fileobj=s, mode="r")
  2149. assert_array_equal(np.load(f), a)
  2150. def test_gzip_loadtxt():
  2151. # Thanks to another windows brokenness, we can't use
  2152. # NamedTemporaryFile: a file created from this function cannot be
  2153. # reopened by another open call. So we first put the gzipped string
  2154. # of the test reference array, write it to a securely opened file,
  2155. # which is then read from by the loadtxt function
  2156. s = BytesIO()
  2157. g = gzip.GzipFile(fileobj=s, mode='w')
  2158. g.write(b'1 2 3\n')
  2159. g.close()
  2160. s.seek(0)
  2161. with temppath(suffix='.gz') as name:
  2162. with open(name, 'wb') as f:
  2163. f.write(s.read())
  2164. res = np.loadtxt(name)
  2165. s.close()
  2166. assert_array_equal(res, [1, 2, 3])
  2167. def test_gzip_loadtxt_from_string():
  2168. s = BytesIO()
  2169. f = gzip.GzipFile(fileobj=s, mode="w")
  2170. f.write(b'1 2 3\n')
  2171. f.close()
  2172. s.seek(0)
  2173. f = gzip.GzipFile(fileobj=s, mode="r")
  2174. assert_array_equal(np.loadtxt(f), [1, 2, 3])
  2175. def test_npzfile_dict():
  2176. s = BytesIO()
  2177. x = np.zeros((3, 3))
  2178. y = np.zeros((3, 3))
  2179. np.savez(s, x=x, y=y)
  2180. s.seek(0)
  2181. z = np.load(s)
  2182. assert_('x' in z)
  2183. assert_('y' in z)
  2184. assert_('x' in z.keys())
  2185. assert_('y' in z.keys())
  2186. for f, a in z.items():
  2187. assert_(f in ['x', 'y'])
  2188. assert_equal(a.shape, (3, 3))
  2189. assert_(len(z.items()) == 2)
  2190. for f in z:
  2191. assert_(f in ['x', 'y'])
  2192. assert_('x' in z.keys())
  2193. @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
  2194. def test_load_refcount():
  2195. # Check that objects returned by np.load are directly freed based on
  2196. # their refcount, rather than needing the gc to collect them.
  2197. f = BytesIO()
  2198. np.savez(f, [1, 2, 3])
  2199. f.seek(0)
  2200. with assert_no_gc_cycles():
  2201. np.load(f)
  2202. f.seek(0)
  2203. dt = [("a", 'u1', 2), ("b", 'u1', 2)]
  2204. with assert_no_gc_cycles():
  2205. x = np.loadtxt(TextIO("0 1 2 3"), dtype=dt)
  2206. assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt))