test_frame.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. # -*- coding: utf-8 -*-
  2. # pylint: disable-msg=E1101,W0612
  3. from copy import deepcopy
  4. from distutils.version import LooseVersion
  5. from operator import methodcaller
  6. import numpy as np
  7. import pytest
  8. from pandas.compat import range
  9. import pandas.util._test_decorators as td
  10. import pandas as pd
  11. from pandas import DataFrame, MultiIndex, Series, date_range
  12. import pandas.util.testing as tm
  13. from pandas.util.testing import (
  14. assert_almost_equal, assert_frame_equal, assert_series_equal)
  15. from .test_generic import Generic
  16. try:
  17. import xarray
  18. _XARRAY_INSTALLED = True
  19. except ImportError:
  20. _XARRAY_INSTALLED = False
  21. class TestDataFrame(Generic):
  22. _typ = DataFrame
  23. _comparator = lambda self, x, y: assert_frame_equal(x, y)
  24. def test_rename_mi(self):
  25. df = DataFrame([
  26. 11, 21, 31
  27. ], index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]))
  28. df.rename(str.lower)
  29. def test_set_axis_name(self):
  30. df = pd.DataFrame([[1, 2], [3, 4]])
  31. funcs = ['_set_axis_name', 'rename_axis']
  32. for func in funcs:
  33. result = methodcaller(func, 'foo')(df)
  34. assert df.index.name is None
  35. assert result.index.name == 'foo'
  36. result = methodcaller(func, 'cols', axis=1)(df)
  37. assert df.columns.name is None
  38. assert result.columns.name == 'cols'
  39. def test_set_axis_name_mi(self):
  40. df = DataFrame(
  41. np.empty((3, 3)),
  42. index=MultiIndex.from_tuples([("A", x) for x in list('aBc')]),
  43. columns=MultiIndex.from_tuples([('C', x) for x in list('xyz')])
  44. )
  45. level_names = ['L1', 'L2']
  46. funcs = ['_set_axis_name', 'rename_axis']
  47. for func in funcs:
  48. result = methodcaller(func, level_names)(df)
  49. assert result.index.names == level_names
  50. assert result.columns.names == [None, None]
  51. result = methodcaller(func, level_names, axis=1)(df)
  52. assert result.columns.names == ["L1", "L2"]
  53. assert result.index.names == [None, None]
  54. def test_nonzero_single_element(self):
  55. # allow single item via bool method
  56. df = DataFrame([[True]])
  57. assert df.bool()
  58. df = DataFrame([[False]])
  59. assert not df.bool()
  60. df = DataFrame([[False, False]])
  61. with pytest.raises(ValueError):
  62. df.bool()
  63. with pytest.raises(ValueError):
  64. bool(df)
  65. def test_get_numeric_data_preserve_dtype(self):
  66. # get the numeric data
  67. o = DataFrame({'A': [1, '2', 3.]})
  68. result = o._get_numeric_data()
  69. expected = DataFrame(index=[0, 1, 2], dtype=object)
  70. self._compare(result, expected)
  71. def test_metadata_propagation_indiv(self):
  72. # groupby
  73. df = DataFrame(
  74. {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
  75. 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
  76. 'C': np.random.randn(8),
  77. 'D': np.random.randn(8)})
  78. result = df.groupby('A').sum()
  79. self.check_metadata(df, result)
  80. # resample
  81. df = DataFrame(np.random.randn(1000, 2),
  82. index=date_range('20130101', periods=1000, freq='s'))
  83. result = df.resample('1T')
  84. self.check_metadata(df, result)
  85. # merging with override
  86. # GH 6923
  87. _metadata = DataFrame._metadata
  88. _finalize = DataFrame.__finalize__
  89. np.random.seed(10)
  90. df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['a', 'b'])
  91. df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['c', 'd'])
  92. DataFrame._metadata = ['filename']
  93. df1.filename = 'fname1.csv'
  94. df2.filename = 'fname2.csv'
  95. def finalize(self, other, method=None, **kwargs):
  96. for name in self._metadata:
  97. if method == 'merge':
  98. left, right = other.left, other.right
  99. value = getattr(left, name, '') + '|' + getattr(right,
  100. name, '')
  101. object.__setattr__(self, name, value)
  102. else:
  103. object.__setattr__(self, name, getattr(other, name, ''))
  104. return self
  105. DataFrame.__finalize__ = finalize
  106. result = df1.merge(df2, left_on=['a'], right_on=['c'], how='inner')
  107. assert result.filename == 'fname1.csv|fname2.csv'
  108. # concat
  109. # GH 6927
  110. DataFrame._metadata = ['filename']
  111. df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list('ab'))
  112. df1.filename = 'foo'
  113. def finalize(self, other, method=None, **kwargs):
  114. for name in self._metadata:
  115. if method == 'concat':
  116. value = '+'.join([getattr(
  117. o, name) for o in other.objs if getattr(o, name, None)
  118. ])
  119. object.__setattr__(self, name, value)
  120. else:
  121. object.__setattr__(self, name, getattr(other, name, None))
  122. return self
  123. DataFrame.__finalize__ = finalize
  124. result = pd.concat([df1, df1])
  125. assert result.filename == 'foo+foo'
  126. # reset
  127. DataFrame._metadata = _metadata
  128. DataFrame.__finalize__ = _finalize
  129. def test_set_attribute(self):
  130. # Test for consistent setattr behavior when an attribute and a column
  131. # have the same name (Issue #8994)
  132. df = DataFrame({'x': [1, 2, 3]})
  133. df.y = 2
  134. df['y'] = [2, 4, 6]
  135. df.y = 5
  136. assert df.y == 5
  137. assert_series_equal(df['y'], Series([2, 4, 6], name='y'))
  138. @pytest.mark.skipif(not _XARRAY_INSTALLED or _XARRAY_INSTALLED and
  139. LooseVersion(xarray.__version__) <
  140. LooseVersion('0.10.0'),
  141. reason='xarray >= 0.10.0 required')
  142. @pytest.mark.parametrize(
  143. "index", ['FloatIndex', 'IntIndex',
  144. 'StringIndex', 'UnicodeIndex',
  145. 'DateIndex', 'PeriodIndex',
  146. 'CategoricalIndex', 'TimedeltaIndex'])
  147. def test_to_xarray_index_types(self, index):
  148. from xarray import Dataset
  149. index = getattr(tm, 'make{}'.format(index))
  150. df = DataFrame({'a': list('abc'),
  151. 'b': list(range(1, 4)),
  152. 'c': np.arange(3, 6).astype('u1'),
  153. 'd': np.arange(4.0, 7.0, dtype='float64'),
  154. 'e': [True, False, True],
  155. 'f': pd.Categorical(list('abc')),
  156. 'g': pd.date_range('20130101', periods=3),
  157. 'h': pd.date_range('20130101',
  158. periods=3,
  159. tz='US/Eastern')}
  160. )
  161. df.index = index(3)
  162. df.index.name = 'foo'
  163. df.columns.name = 'bar'
  164. result = df.to_xarray()
  165. assert result.dims['foo'] == 3
  166. assert len(result.coords) == 1
  167. assert len(result.data_vars) == 8
  168. assert_almost_equal(list(result.coords.keys()), ['foo'])
  169. assert isinstance(result, Dataset)
  170. # idempotency
  171. # categoricals are not preserved
  172. # datetimes w/tz are not preserved
  173. # column names are lost
  174. expected = df.copy()
  175. expected['f'] = expected['f'].astype(object)
  176. expected['h'] = expected['h'].astype('datetime64[ns]')
  177. expected.columns.name = None
  178. assert_frame_equal(result.to_dataframe(), expected,
  179. check_index_type=False, check_categorical=False)
  180. @td.skip_if_no('xarray', min_version='0.7.0')
  181. def test_to_xarray(self):
  182. from xarray import Dataset
  183. df = DataFrame({'a': list('abc'),
  184. 'b': list(range(1, 4)),
  185. 'c': np.arange(3, 6).astype('u1'),
  186. 'd': np.arange(4.0, 7.0, dtype='float64'),
  187. 'e': [True, False, True],
  188. 'f': pd.Categorical(list('abc')),
  189. 'g': pd.date_range('20130101', periods=3),
  190. 'h': pd.date_range('20130101',
  191. periods=3,
  192. tz='US/Eastern')}
  193. )
  194. df.index.name = 'foo'
  195. result = df[0:0].to_xarray()
  196. assert result.dims['foo'] == 0
  197. assert isinstance(result, Dataset)
  198. # available in 0.7.1
  199. # MultiIndex
  200. df.index = pd.MultiIndex.from_product([['a'], range(3)],
  201. names=['one', 'two'])
  202. result = df.to_xarray()
  203. assert result.dims['one'] == 1
  204. assert result.dims['two'] == 3
  205. assert len(result.coords) == 2
  206. assert len(result.data_vars) == 8
  207. assert_almost_equal(list(result.coords.keys()), ['one', 'two'])
  208. assert isinstance(result, Dataset)
  209. result = result.to_dataframe()
  210. expected = df.copy()
  211. expected['f'] = expected['f'].astype(object)
  212. expected['h'] = expected['h'].astype('datetime64[ns]')
  213. expected.columns.name = None
  214. assert_frame_equal(result,
  215. expected,
  216. check_index_type=False)
  217. def test_deepcopy_empty(self):
  218. # This test covers empty frame copying with non-empty column sets
  219. # as reported in issue GH15370
  220. empty_frame = DataFrame(data=[], index=[], columns=['A'])
  221. empty_frame_copy = deepcopy(empty_frame)
  222. self._compare(empty_frame_copy, empty_frame)