123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271 |
- # -*- coding: utf-8 -*-
- # pylint: disable-msg=E1101,W0612
- from copy import deepcopy
- from distutils.version import LooseVersion
- from operator import methodcaller
- import numpy as np
- import pytest
- from pandas.compat import range
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import DataFrame, MultiIndex, Series, date_range
- import pandas.util.testing as tm
- from pandas.util.testing import (
- assert_almost_equal, assert_frame_equal, assert_series_equal)
- from .test_generic import Generic
- try:
- import xarray
- _XARRAY_INSTALLED = True
- except ImportError:
- _XARRAY_INSTALLED = False
- class TestDataFrame(Generic):
- _typ = DataFrame
- _comparator = lambda self, x, y: assert_frame_equal(x, y)
- def test_rename_mi(self):
- df = DataFrame([
- 11, 21, 31
- ], index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]))
- df.rename(str.lower)
- def test_set_axis_name(self):
- df = pd.DataFrame([[1, 2], [3, 4]])
- funcs = ['_set_axis_name', 'rename_axis']
- for func in funcs:
- result = methodcaller(func, 'foo')(df)
- assert df.index.name is None
- assert result.index.name == 'foo'
- result = methodcaller(func, 'cols', axis=1)(df)
- assert df.columns.name is None
- assert result.columns.name == 'cols'
- def test_set_axis_name_mi(self):
- df = DataFrame(
- np.empty((3, 3)),
- index=MultiIndex.from_tuples([("A", x) for x in list('aBc')]),
- columns=MultiIndex.from_tuples([('C', x) for x in list('xyz')])
- )
- level_names = ['L1', 'L2']
- funcs = ['_set_axis_name', 'rename_axis']
- for func in funcs:
- result = methodcaller(func, level_names)(df)
- assert result.index.names == level_names
- assert result.columns.names == [None, None]
- result = methodcaller(func, level_names, axis=1)(df)
- assert result.columns.names == ["L1", "L2"]
- assert result.index.names == [None, None]
- def test_nonzero_single_element(self):
- # allow single item via bool method
- df = DataFrame([[True]])
- assert df.bool()
- df = DataFrame([[False]])
- assert not df.bool()
- df = DataFrame([[False, False]])
- with pytest.raises(ValueError):
- df.bool()
- with pytest.raises(ValueError):
- bool(df)
- def test_get_numeric_data_preserve_dtype(self):
- # get the numeric data
- o = DataFrame({'A': [1, '2', 3.]})
- result = o._get_numeric_data()
- expected = DataFrame(index=[0, 1, 2], dtype=object)
- self._compare(result, expected)
- def test_metadata_propagation_indiv(self):
- # groupby
- df = DataFrame(
- {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
- 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
- 'C': np.random.randn(8),
- 'D': np.random.randn(8)})
- result = df.groupby('A').sum()
- self.check_metadata(df, result)
- # resample
- df = DataFrame(np.random.randn(1000, 2),
- index=date_range('20130101', periods=1000, freq='s'))
- result = df.resample('1T')
- self.check_metadata(df, result)
- # merging with override
- # GH 6923
- _metadata = DataFrame._metadata
- _finalize = DataFrame.__finalize__
- np.random.seed(10)
- df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['a', 'b'])
- df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['c', 'd'])
- DataFrame._metadata = ['filename']
- df1.filename = 'fname1.csv'
- df2.filename = 'fname2.csv'
- def finalize(self, other, method=None, **kwargs):
- for name in self._metadata:
- if method == 'merge':
- left, right = other.left, other.right
- value = getattr(left, name, '') + '|' + getattr(right,
- name, '')
- object.__setattr__(self, name, value)
- else:
- object.__setattr__(self, name, getattr(other, name, ''))
- return self
- DataFrame.__finalize__ = finalize
- result = df1.merge(df2, left_on=['a'], right_on=['c'], how='inner')
- assert result.filename == 'fname1.csv|fname2.csv'
- # concat
- # GH 6927
- DataFrame._metadata = ['filename']
- df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list('ab'))
- df1.filename = 'foo'
- def finalize(self, other, method=None, **kwargs):
- for name in self._metadata:
- if method == 'concat':
- value = '+'.join([getattr(
- o, name) for o in other.objs if getattr(o, name, None)
- ])
- object.__setattr__(self, name, value)
- else:
- object.__setattr__(self, name, getattr(other, name, None))
- return self
- DataFrame.__finalize__ = finalize
- result = pd.concat([df1, df1])
- assert result.filename == 'foo+foo'
- # reset
- DataFrame._metadata = _metadata
- DataFrame.__finalize__ = _finalize
- def test_set_attribute(self):
- # Test for consistent setattr behavior when an attribute and a column
- # have the same name (Issue #8994)
- df = DataFrame({'x': [1, 2, 3]})
- df.y = 2
- df['y'] = [2, 4, 6]
- df.y = 5
- assert df.y == 5
- assert_series_equal(df['y'], Series([2, 4, 6], name='y'))
- @pytest.mark.skipif(not _XARRAY_INSTALLED or _XARRAY_INSTALLED and
- LooseVersion(xarray.__version__) <
- LooseVersion('0.10.0'),
- reason='xarray >= 0.10.0 required')
- @pytest.mark.parametrize(
- "index", ['FloatIndex', 'IntIndex',
- 'StringIndex', 'UnicodeIndex',
- 'DateIndex', 'PeriodIndex',
- 'CategoricalIndex', 'TimedeltaIndex'])
- def test_to_xarray_index_types(self, index):
- from xarray import Dataset
- index = getattr(tm, 'make{}'.format(index))
- df = DataFrame({'a': list('abc'),
- 'b': list(range(1, 4)),
- 'c': np.arange(3, 6).astype('u1'),
- 'd': np.arange(4.0, 7.0, dtype='float64'),
- 'e': [True, False, True],
- 'f': pd.Categorical(list('abc')),
- 'g': pd.date_range('20130101', periods=3),
- 'h': pd.date_range('20130101',
- periods=3,
- tz='US/Eastern')}
- )
- df.index = index(3)
- df.index.name = 'foo'
- df.columns.name = 'bar'
- result = df.to_xarray()
- assert result.dims['foo'] == 3
- assert len(result.coords) == 1
- assert len(result.data_vars) == 8
- assert_almost_equal(list(result.coords.keys()), ['foo'])
- assert isinstance(result, Dataset)
- # idempotency
- # categoricals are not preserved
- # datetimes w/tz are not preserved
- # column names are lost
- expected = df.copy()
- expected['f'] = expected['f'].astype(object)
- expected['h'] = expected['h'].astype('datetime64[ns]')
- expected.columns.name = None
- assert_frame_equal(result.to_dataframe(), expected,
- check_index_type=False, check_categorical=False)
- @td.skip_if_no('xarray', min_version='0.7.0')
- def test_to_xarray(self):
- from xarray import Dataset
- df = DataFrame({'a': list('abc'),
- 'b': list(range(1, 4)),
- 'c': np.arange(3, 6).astype('u1'),
- 'd': np.arange(4.0, 7.0, dtype='float64'),
- 'e': [True, False, True],
- 'f': pd.Categorical(list('abc')),
- 'g': pd.date_range('20130101', periods=3),
- 'h': pd.date_range('20130101',
- periods=3,
- tz='US/Eastern')}
- )
- df.index.name = 'foo'
- result = df[0:0].to_xarray()
- assert result.dims['foo'] == 0
- assert isinstance(result, Dataset)
- # available in 0.7.1
- # MultiIndex
- df.index = pd.MultiIndex.from_product([['a'], range(3)],
- names=['one', 'two'])
- result = df.to_xarray()
- assert result.dims['one'] == 1
- assert result.dims['two'] == 3
- assert len(result.coords) == 2
- assert len(result.data_vars) == 8
- assert_almost_equal(list(result.coords.keys()), ['one', 'two'])
- assert isinstance(result, Dataset)
- result = result.to_dataframe()
- expected = df.copy()
- expected['f'] = expected['f'].astype(object)
- expected['h'] = expected['h'].astype('datetime64[ns]')
- expected.columns.name = None
- assert_frame_equal(result,
- expected,
- check_index_type=False)
- def test_deepcopy_empty(self):
- # This test covers empty frame copying with non-empty column sets
- # as reported in issue GH15370
- empty_frame = DataFrame(data=[], index=[], columns=['A'])
- empty_frame_copy = deepcopy(empty_frame)
- self._compare(empty_frame_copy, empty_frame)
|