test_conversion.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. # -*- coding: utf-8 -*-
  2. from collections import OrderedDict
  3. import numpy as np
  4. import pytest
  5. from pandas.compat import range
  6. import pandas as pd
  7. from pandas import DataFrame, MultiIndex, date_range
  8. import pandas.util.testing as tm
  9. def test_tolist(idx):
  10. result = idx.tolist()
  11. exp = list(idx.values)
  12. assert result == exp
  13. def test_to_numpy(idx):
  14. result = idx.to_numpy()
  15. exp = idx.values
  16. tm.assert_numpy_array_equal(result, exp)
  17. def test_to_frame():
  18. tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
  19. index = MultiIndex.from_tuples(tuples)
  20. result = index.to_frame(index=False)
  21. expected = DataFrame(tuples)
  22. tm.assert_frame_equal(result, expected)
  23. result = index.to_frame()
  24. expected.index = index
  25. tm.assert_frame_equal(result, expected)
  26. tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
  27. index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
  28. result = index.to_frame(index=False)
  29. expected = DataFrame(tuples)
  30. expected.columns = ['first', 'second']
  31. tm.assert_frame_equal(result, expected)
  32. result = index.to_frame()
  33. expected.index = index
  34. tm.assert_frame_equal(result, expected)
  35. # See GH-22580
  36. index = MultiIndex.from_tuples(tuples)
  37. result = index.to_frame(index=False, name=['first', 'second'])
  38. expected = DataFrame(tuples)
  39. expected.columns = ['first', 'second']
  40. tm.assert_frame_equal(result, expected)
  41. result = index.to_frame(name=['first', 'second'])
  42. expected.index = index
  43. expected.columns = ['first', 'second']
  44. tm.assert_frame_equal(result, expected)
  45. msg = "'name' must be a list / sequence of column names."
  46. with pytest.raises(TypeError, match=msg):
  47. index.to_frame(name='first')
  48. msg = "'name' should have same length as number of levels on index."
  49. with pytest.raises(ValueError, match=msg):
  50. index.to_frame(name=['first'])
  51. # Tests for datetime index
  52. index = MultiIndex.from_product([range(5),
  53. pd.date_range('20130101', periods=3)])
  54. result = index.to_frame(index=False)
  55. expected = DataFrame(
  56. {0: np.repeat(np.arange(5, dtype='int64'), 3),
  57. 1: np.tile(pd.date_range('20130101', periods=3), 5)})
  58. tm.assert_frame_equal(result, expected)
  59. result = index.to_frame()
  60. expected.index = index
  61. tm.assert_frame_equal(result, expected)
  62. # See GH-22580
  63. result = index.to_frame(index=False, name=['first', 'second'])
  64. expected = DataFrame(
  65. {'first': np.repeat(np.arange(5, dtype='int64'), 3),
  66. 'second': np.tile(pd.date_range('20130101', periods=3), 5)})
  67. tm.assert_frame_equal(result, expected)
  68. result = index.to_frame(name=['first', 'second'])
  69. expected.index = index
  70. tm.assert_frame_equal(result, expected)
  71. def test_to_frame_dtype_fidelity():
  72. # GH 22420
  73. mi = pd.MultiIndex.from_arrays([
  74. pd.date_range('19910905', periods=6, tz='US/Eastern'),
  75. [1, 1, 1, 2, 2, 2],
  76. pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True),
  77. ['x', 'x', 'y', 'z', 'x', 'y']
  78. ], names=['dates', 'a', 'b', 'c'])
  79. original_dtypes = {name: mi.levels[i].dtype
  80. for i, name in enumerate(mi.names)}
  81. expected_df = pd.DataFrame(OrderedDict([
  82. ('dates', pd.date_range('19910905', periods=6, tz='US/Eastern')),
  83. ('a', [1, 1, 1, 2, 2, 2]),
  84. ('b', pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True)),
  85. ('c', ['x', 'x', 'y', 'z', 'x', 'y'])
  86. ]))
  87. df = mi.to_frame(index=False)
  88. df_dtypes = df.dtypes.to_dict()
  89. tm.assert_frame_equal(df, expected_df)
  90. assert original_dtypes == df_dtypes
  91. def test_to_frame_resulting_column_order():
  92. # GH 22420
  93. expected = ['z', 0, 'a']
  94. mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z'],
  95. ['q', 'w', 'e']], names=expected)
  96. result = mi.to_frame().columns.tolist()
  97. assert result == expected
  98. def test_to_hierarchical():
  99. index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
  100. 2, 'two')])
  101. with tm.assert_produces_warning(FutureWarning,
  102. check_stacklevel=False):
  103. result = index.to_hierarchical(3)
  104. expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
  105. codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
  106. [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]])
  107. tm.assert_index_equal(result, expected)
  108. assert result.names == index.names
  109. # K > 1
  110. with tm.assert_produces_warning(FutureWarning,
  111. check_stacklevel=False):
  112. result = index.to_hierarchical(3, 2)
  113. expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
  114. codes=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
  115. [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])
  116. tm.assert_index_equal(result, expected)
  117. assert result.names == index.names
  118. # non-sorted
  119. index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'),
  120. (2, 'a'), (2, 'b')],
  121. names=['N1', 'N2'])
  122. with tm.assert_produces_warning(FutureWarning,
  123. check_stacklevel=False):
  124. result = index.to_hierarchical(2)
  125. expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'),
  126. (1, 'b'),
  127. (2, 'a'), (2, 'a'),
  128. (2, 'b'), (2, 'b')],
  129. names=['N1', 'N2'])
  130. tm.assert_index_equal(result, expected)
  131. assert result.names == index.names
  132. def test_roundtrip_pickle_with_tz():
  133. return
  134. # GH 8367
  135. # round-trip of timezone
  136. index = MultiIndex.from_product(
  137. [[1, 2], ['a', 'b'], date_range('20130101', periods=3,
  138. tz='US/Eastern')
  139. ], names=['one', 'two', 'three'])
  140. unpickled = tm.round_trip_pickle(index)
  141. assert index.equal_levels(unpickled)
  142. def test_pickle(indices):
  143. return
  144. unpickled = tm.round_trip_pickle(indices)
  145. assert indices.equals(unpickled)
  146. original_name, indices.name = indices.name, 'foo'
  147. unpickled = tm.round_trip_pickle(indices)
  148. assert indices.equals(unpickled)
  149. indices.name = original_name
  150. def test_to_series(idx):
  151. # assert that we are creating a copy of the index
  152. s = idx.to_series()
  153. assert s.values is not idx.values
  154. assert s.index is not idx
  155. assert s.name == idx.name
  156. def test_to_series_with_arguments(idx):
  157. # GH18699
  158. # index kwarg
  159. s = idx.to_series(index=idx)
  160. assert s.values is not idx.values
  161. assert s.index is idx
  162. assert s.name == idx.name
  163. # name kwarg
  164. idx = idx
  165. s = idx.to_series(name='__test')
  166. assert s.values is not idx.values
  167. assert s.index is not idx
  168. assert s.name != idx.name
  169. def test_to_flat_index(idx):
  170. expected = pd.Index((('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
  171. ('baz', 'two'), ('qux', 'one'), ('qux', 'two')),
  172. tupleize_cols=False)
  173. result = idx.to_flat_index()
  174. tm.assert_index_equal(result, expected)