test_common.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. """
  2. Tests for the pandas.io.common functionalities
  3. """
  4. import mmap
  5. import os
  6. import pytest
  7. from pandas.compat import FileNotFoundError, StringIO, is_platform_windows
  8. import pandas.util._test_decorators as td
  9. import pandas as pd
  10. import pandas.util.testing as tm
  11. import pandas.io.common as icom
  12. class CustomFSPath(object):
  13. """For testing fspath on unknown objects"""
  14. def __init__(self, path):
  15. self.path = path
  16. def __fspath__(self):
  17. return self.path
  18. # Functions that consume a string path and return a string or path-like object
  19. path_types = [str, CustomFSPath]
  20. try:
  21. from pathlib import Path
  22. path_types.append(Path)
  23. except ImportError:
  24. pass
  25. try:
  26. from py.path import local as LocalPath
  27. path_types.append(LocalPath)
  28. except ImportError:
  29. pass
  30. HERE = os.path.abspath(os.path.dirname(__file__))
  31. # https://github.com/cython/cython/issues/1720
  32. @pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
  33. class TestCommonIOCapabilities(object):
  34. data1 = """index,A,B,C,D
  35. foo,2,3,4,5
  36. bar,7,8,9,10
  37. baz,12,13,14,15
  38. qux,12,13,14,15
  39. foo2,12,13,14,15
  40. bar2,12,13,14,15
  41. """
  42. def test_expand_user(self):
  43. filename = '~/sometest'
  44. expanded_name = icom._expand_user(filename)
  45. assert expanded_name != filename
  46. assert os.path.isabs(expanded_name)
  47. assert os.path.expanduser(filename) == expanded_name
  48. def test_expand_user_normal_path(self):
  49. filename = '/somefolder/sometest'
  50. expanded_name = icom._expand_user(filename)
  51. assert expanded_name == filename
  52. assert os.path.expanduser(filename) == expanded_name
  53. @td.skip_if_no('pathlib')
  54. def test_stringify_path_pathlib(self):
  55. rel_path = icom._stringify_path(Path('.'))
  56. assert rel_path == '.'
  57. redundant_path = icom._stringify_path(Path('foo//bar'))
  58. assert redundant_path == os.path.join('foo', 'bar')
  59. @td.skip_if_no('py.path')
  60. def test_stringify_path_localpath(self):
  61. path = os.path.join('foo', 'bar')
  62. abs_path = os.path.abspath(path)
  63. lpath = LocalPath(path)
  64. assert icom._stringify_path(lpath) == abs_path
  65. def test_stringify_path_fspath(self):
  66. p = CustomFSPath('foo/bar.csv')
  67. result = icom._stringify_path(p)
  68. assert result == 'foo/bar.csv'
  69. @pytest.mark.parametrize('extension,expected', [
  70. ('', None),
  71. ('.gz', 'gzip'),
  72. ('.bz2', 'bz2'),
  73. ('.zip', 'zip'),
  74. ('.xz', 'xz'),
  75. ])
  76. @pytest.mark.parametrize('path_type', path_types)
  77. def test_infer_compression_from_path(self, extension, expected, path_type):
  78. path = path_type('foo/bar.csv' + extension)
  79. compression = icom._infer_compression(path, compression='infer')
  80. assert compression == expected
  81. def test_get_filepath_or_buffer_with_path(self):
  82. filename = '~/sometest'
  83. filepath_or_buffer, _, _, should_close = icom.get_filepath_or_buffer(
  84. filename)
  85. assert filepath_or_buffer != filename
  86. assert os.path.isabs(filepath_or_buffer)
  87. assert os.path.expanduser(filename) == filepath_or_buffer
  88. assert not should_close
  89. def test_get_filepath_or_buffer_with_buffer(self):
  90. input_buffer = StringIO()
  91. filepath_or_buffer, _, _, should_close = icom.get_filepath_or_buffer(
  92. input_buffer)
  93. assert filepath_or_buffer == input_buffer
  94. assert not should_close
  95. def test_iterator(self):
  96. reader = pd.read_csv(StringIO(self.data1), chunksize=1)
  97. result = pd.concat(reader, ignore_index=True)
  98. expected = pd.read_csv(StringIO(self.data1))
  99. tm.assert_frame_equal(result, expected)
  100. # GH12153
  101. it = pd.read_csv(StringIO(self.data1), chunksize=1)
  102. first = next(it)
  103. tm.assert_frame_equal(first, expected.iloc[[0]])
  104. tm.assert_frame_equal(pd.concat(it), expected.iloc[1:])
  105. @pytest.mark.parametrize('reader, module, error_class, fn_ext', [
  106. (pd.read_csv, 'os', FileNotFoundError, 'csv'),
  107. (pd.read_fwf, 'os', FileNotFoundError, 'txt'),
  108. (pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'),
  109. (pd.read_feather, 'feather', Exception, 'feather'),
  110. (pd.read_hdf, 'tables', FileNotFoundError, 'h5'),
  111. (pd.read_stata, 'os', FileNotFoundError, 'dta'),
  112. (pd.read_sas, 'os', FileNotFoundError, 'sas7bdat'),
  113. (pd.read_json, 'os', ValueError, 'json'),
  114. (pd.read_msgpack, 'os', ValueError, 'mp'),
  115. (pd.read_pickle, 'os', FileNotFoundError, 'pickle'),
  116. ])
  117. def test_read_non_existant(self, reader, module, error_class, fn_ext):
  118. pytest.importorskip(module)
  119. path = os.path.join(HERE, 'data', 'does_not_exist.' + fn_ext)
  120. msg1 = (r"File (b')?.+does_not_exist\.{}'? does not exist"
  121. .format(fn_ext))
  122. msg2 = (r"\[Errno 2\] No such file or directory: '.+does_not_exist"
  123. r"\.{}'").format(fn_ext)
  124. msg3 = "Expected object or value"
  125. msg4 = "path_or_buf needs to be a string file path or file-like"
  126. msg5 = (r"\[Errno 2\] File .+does_not_exist\.{} does not exist:"
  127. r" '.+does_not_exist\.{}'").format(fn_ext, fn_ext)
  128. with pytest.raises(error_class, match=r"({}|{}|{}|{}|{})".format(
  129. msg1, msg2, msg3, msg4, msg5)):
  130. reader(path)
  131. @pytest.mark.parametrize('reader, module, error_class, fn_ext', [
  132. (pd.read_csv, 'os', FileNotFoundError, 'csv'),
  133. (pd.read_fwf, 'os', FileNotFoundError, 'txt'),
  134. (pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'),
  135. (pd.read_feather, 'feather', Exception, 'feather'),
  136. (pd.read_hdf, 'tables', FileNotFoundError, 'h5'),
  137. (pd.read_stata, 'os', FileNotFoundError, 'dta'),
  138. (pd.read_sas, 'os', FileNotFoundError, 'sas7bdat'),
  139. (pd.read_json, 'os', ValueError, 'json'),
  140. (pd.read_msgpack, 'os', ValueError, 'mp'),
  141. (pd.read_pickle, 'os', FileNotFoundError, 'pickle'),
  142. ])
  143. def test_read_expands_user_home_dir(self, reader, module,
  144. error_class, fn_ext, monkeypatch):
  145. pytest.importorskip(module)
  146. path = os.path.join('~', 'does_not_exist.' + fn_ext)
  147. monkeypatch.setattr(icom, '_expand_user',
  148. lambda x: os.path.join('foo', x))
  149. msg1 = (r"File (b')?.+does_not_exist\.{}'? does not exist"
  150. .format(fn_ext))
  151. msg2 = (r"\[Errno 2\] No such file or directory:"
  152. r" '.+does_not_exist\.{}'").format(fn_ext)
  153. msg3 = "Unexpected character found when decoding 'false'"
  154. msg4 = "path_or_buf needs to be a string file path or file-like"
  155. msg5 = (r"\[Errno 2\] File .+does_not_exist\.{} does not exist:"
  156. r" '.+does_not_exist\.{}'").format(fn_ext, fn_ext)
  157. with pytest.raises(error_class, match=r"({}|{}|{}|{}|{})".format(
  158. msg1, msg2, msg3, msg4, msg5)):
  159. reader(path)
  160. def test_read_non_existant_read_table(self):
  161. path = os.path.join(HERE, 'data', 'does_not_exist.' + 'csv')
  162. msg1 = r"File b'.+does_not_exist\.csv' does not exist"
  163. msg2 = (r"\[Errno 2\] File .+does_not_exist\.csv does not exist:"
  164. r" '.+does_not_exist\.csv'")
  165. with pytest.raises(FileNotFoundError, match=r"({}|{})".format(
  166. msg1, msg2)):
  167. with tm.assert_produces_warning(FutureWarning):
  168. pd.read_table(path)
  169. @pytest.mark.parametrize('reader, module, path', [
  170. (pd.read_csv, 'os', ('io', 'data', 'iris.csv')),
  171. (pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')),
  172. (pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')),
  173. (pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')),
  174. (pd.read_hdf, 'tables', ('io', 'data', 'legacy_hdf',
  175. 'datetimetz_object.h5')),
  176. (pd.read_stata, 'os', ('io', 'data', 'stata10_115.dta')),
  177. (pd.read_sas, 'os', ('io', 'sas', 'data', 'test1.sas7bdat')),
  178. (pd.read_json, 'os', ('io', 'json', 'data', 'tsframe_v012.json')),
  179. (pd.read_msgpack, 'os', ('io', 'msgpack', 'data', 'frame.mp')),
  180. (pd.read_pickle, 'os', ('io', 'data', 'categorical_0_14_1.pickle')),
  181. ])
  182. def test_read_fspath_all(self, reader, module, path, datapath):
  183. pytest.importorskip(module)
  184. path = datapath(*path)
  185. mypath = CustomFSPath(path)
  186. result = reader(mypath)
  187. expected = reader(path)
  188. if path.endswith('.pickle'):
  189. # categorical
  190. tm.assert_categorical_equal(result, expected)
  191. else:
  192. tm.assert_frame_equal(result, expected)
  193. def test_read_fspath_all_read_table(self, datapath):
  194. path = datapath('io', 'data', 'iris.csv')
  195. mypath = CustomFSPath(path)
  196. with tm.assert_produces_warning(FutureWarning):
  197. result = pd.read_table(mypath)
  198. with tm.assert_produces_warning(FutureWarning):
  199. expected = pd.read_table(path)
  200. if path.endswith('.pickle'):
  201. # categorical
  202. tm.assert_categorical_equal(result, expected)
  203. else:
  204. tm.assert_frame_equal(result, expected)
  205. @pytest.mark.parametrize('writer_name, writer_kwargs, module', [
  206. ('to_csv', {}, 'os'),
  207. ('to_excel', {'engine': 'xlwt'}, 'xlwt'),
  208. ('to_feather', {}, 'feather'),
  209. ('to_html', {}, 'os'),
  210. ('to_json', {}, 'os'),
  211. ('to_latex', {}, 'os'),
  212. ('to_msgpack', {}, 'os'),
  213. ('to_pickle', {}, 'os'),
  214. ('to_stata', {}, 'os'),
  215. ])
  216. def test_write_fspath_all(self, writer_name, writer_kwargs, module):
  217. p1 = tm.ensure_clean('string')
  218. p2 = tm.ensure_clean('fspath')
  219. df = pd.DataFrame({"A": [1, 2]})
  220. with p1 as string, p2 as fspath:
  221. pytest.importorskip(module)
  222. mypath = CustomFSPath(fspath)
  223. writer = getattr(df, writer_name)
  224. writer(string, **writer_kwargs)
  225. with open(string, 'rb') as f:
  226. expected = f.read()
  227. writer(mypath, **writer_kwargs)
  228. with open(fspath, 'rb') as f:
  229. result = f.read()
  230. assert result == expected
  231. def test_write_fspath_hdf5(self):
  232. # Same test as write_fspath_all, except HDF5 files aren't
  233. # necessarily byte-for-byte identical for a given dataframe, so we'll
  234. # have to read and compare equality
  235. pytest.importorskip('tables')
  236. df = pd.DataFrame({"A": [1, 2]})
  237. p1 = tm.ensure_clean('string')
  238. p2 = tm.ensure_clean('fspath')
  239. with p1 as string, p2 as fspath:
  240. mypath = CustomFSPath(fspath)
  241. df.to_hdf(mypath, key='bar')
  242. df.to_hdf(string, key='bar')
  243. result = pd.read_hdf(fspath, key='bar')
  244. expected = pd.read_hdf(string, key='bar')
  245. tm.assert_frame_equal(result, expected)
  246. @pytest.fixture
  247. def mmap_file(datapath):
  248. return datapath('io', 'data', 'test_mmap.csv')
  249. class TestMMapWrapper(object):
  250. def test_constructor_bad_file(self, mmap_file):
  251. non_file = StringIO('I am not a file')
  252. non_file.fileno = lambda: -1
  253. # the error raised is different on Windows
  254. if is_platform_windows():
  255. msg = "The parameter is incorrect"
  256. err = OSError
  257. else:
  258. msg = "[Errno 22]"
  259. err = mmap.error
  260. with pytest.raises(err, match=msg):
  261. icom.MMapWrapper(non_file)
  262. target = open(mmap_file, 'r')
  263. target.close()
  264. msg = "I/O operation on closed file"
  265. with pytest.raises(ValueError, match=msg):
  266. icom.MMapWrapper(target)
  267. def test_get_attr(self, mmap_file):
  268. with open(mmap_file, 'r') as target:
  269. wrapper = icom.MMapWrapper(target)
  270. attrs = dir(wrapper.mmap)
  271. attrs = [attr for attr in attrs
  272. if not attr.startswith('__')]
  273. attrs.append('__next__')
  274. for attr in attrs:
  275. assert hasattr(wrapper, attr)
  276. assert not hasattr(wrapper, 'foo')
  277. def test_next(self, mmap_file):
  278. with open(mmap_file, 'r') as target:
  279. wrapper = icom.MMapWrapper(target)
  280. lines = target.readlines()
  281. for line in lines:
  282. next_line = next(wrapper)
  283. assert next_line.strip() == line.strip()
  284. with pytest.raises(StopIteration, match=r'^$'):
  285. next(wrapper)
  286. def test_unknown_engine(self):
  287. with tm.ensure_clean() as path:
  288. df = tm.makeDataFrame()
  289. df.to_csv(path)
  290. with pytest.raises(ValueError, match='Unknown engine'):
  291. pd.read_csv(path, engine='pyt')