123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- """ test feather-format compat """
- from distutils.version import LooseVersion
- import numpy as np
- import pytest
- import pandas as pd
- import pandas.util.testing as tm
- from pandas.util.testing import assert_frame_equal, ensure_clean
- from pandas.io.feather_format import read_feather, to_feather # noqa:E402
- pyarrow = pytest.importorskip('pyarrow')
- pyarrow_version = LooseVersion(pyarrow.__version__)
- @pytest.mark.single
- class TestFeather(object):
- def check_error_on_write(self, df, exc):
- # check that we are raising the exception
- # on writing
- with pytest.raises(exc):
- with ensure_clean() as path:
- to_feather(df, path)
- def check_round_trip(self, df, expected=None, **kwargs):
- if expected is None:
- expected = df
- with ensure_clean() as path:
- to_feather(df, path)
- result = read_feather(path, **kwargs)
- assert_frame_equal(result, expected)
- def test_error(self):
- for obj in [pd.Series([1, 2, 3]), 1, 'foo', pd.Timestamp('20130101'),
- np.array([1, 2, 3])]:
- self.check_error_on_write(obj, ValueError)
- def test_basic(self):
- df = pd.DataFrame({'string': list('abc'),
- 'int': list(range(1, 4)),
- 'uint': np.arange(3, 6).astype('u1'),
- 'float': np.arange(4.0, 7.0, dtype='float64'),
- 'float_with_null': [1., np.nan, 3],
- 'bool': [True, False, True],
- 'bool_with_null': [True, np.nan, False],
- 'cat': pd.Categorical(list('abc')),
- 'dt': pd.date_range('20130101', periods=3),
- 'dttz': pd.date_range('20130101', periods=3,
- tz='US/Eastern'),
- 'dt_with_null': [pd.Timestamp('20130101'), pd.NaT,
- pd.Timestamp('20130103')],
- 'dtns': pd.date_range('20130101', periods=3,
- freq='ns')})
- assert df.dttz.dtype.tz.zone == 'US/Eastern'
- self.check_round_trip(df)
- def test_duplicate_columns(self):
- # https://github.com/wesm/feather/issues/53
- # not currently able to handle duplicate columns
- df = pd.DataFrame(np.arange(12).reshape(4, 3),
- columns=list('aaa')).copy()
- self.check_error_on_write(df, ValueError)
- def test_stringify_columns(self):
- df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy()
- self.check_error_on_write(df, ValueError)
- def test_read_columns(self):
- # GH 24025
- df = pd.DataFrame({'col1': list('abc'),
- 'col2': list(range(1, 4)),
- 'col3': list('xyz'),
- 'col4': list(range(4, 7))})
- columns = ['col1', 'col3']
- self.check_round_trip(df, expected=df[columns],
- columns=columns)
- def test_unsupported_other(self):
- # period
- df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)})
- # Some versions raise ValueError, others raise ArrowInvalid.
- self.check_error_on_write(df, Exception)
- def test_rw_nthreads(self):
- df = pd.DataFrame({'A': np.arange(100000)})
- expected_warning = (
- "the 'nthreads' keyword is deprecated, "
- "use 'use_threads' instead"
- )
- # TODO: make the warning work with check_stacklevel=True
- with tm.assert_produces_warning(
- FutureWarning, check_stacklevel=False) as w:
- self.check_round_trip(df, nthreads=2)
- # we have an extra FutureWarning because of #GH23752
- assert any(expected_warning in str(x) for x in w)
- # TODO: make the warning work with check_stacklevel=True
- with tm.assert_produces_warning(
- FutureWarning, check_stacklevel=False) as w:
- self.check_round_trip(df, nthreads=1)
- # we have an extra FutureWarnings because of #GH23752
- assert any(expected_warning in str(x) for x in w)
- def test_rw_use_threads(self):
- df = pd.DataFrame({'A': np.arange(100000)})
- self.check_round_trip(df, use_threads=True)
- self.check_round_trip(df, use_threads=False)
- def test_write_with_index(self):
- df = pd.DataFrame({'A': [1, 2, 3]})
- self.check_round_trip(df)
- # non-default index
- for index in [[2, 3, 4],
- pd.date_range('20130101', periods=3),
- list('abc'),
- [1, 3, 4],
- pd.MultiIndex.from_tuples([('a', 1), ('a', 2),
- ('b', 1)]),
- ]:
- df.index = index
- self.check_error_on_write(df, ValueError)
- # index with meta-data
- df.index = [0, 1, 2]
- df.index.name = 'foo'
- self.check_error_on_write(df, ValueError)
- # column multi-index
- df.index = [0, 1, 2]
- df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]),
- self.check_error_on_write(df, ValueError)
- def test_path_pathlib(self):
- df = tm.makeDataFrame().reset_index()
- result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
- tm.assert_frame_equal(df, result)
- def test_path_localpath(self):
- df = tm.makeDataFrame().reset_index()
- result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
- tm.assert_frame_equal(df, result)
|