test_multiindex.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. import numpy as np
  2. import pytest
  3. import pandas._libs.index as _index
  4. from pandas.errors import PerformanceWarning
  5. import pandas as pd
  6. from pandas import DataFrame, Index, MultiIndex, Series
  7. from pandas.util import testing as tm
  8. class TestMultiIndexBasic(object):
  9. def test_multiindex_perf_warn(self):
  10. df = DataFrame({'jim': [0, 0, 1, 1],
  11. 'joe': ['x', 'x', 'z', 'y'],
  12. 'jolie': np.random.rand(4)}).set_index(['jim', 'joe'])
  13. with tm.assert_produces_warning(PerformanceWarning,
  14. clear=[pd.core.index]):
  15. df.loc[(1, 'z')]
  16. df = df.iloc[[2, 1, 3, 0]]
  17. with tm.assert_produces_warning(PerformanceWarning):
  18. df.loc[(0, )]
  19. def test_multiindex_contains_dropped(self):
  20. # GH 19027
  21. # test that dropped MultiIndex levels are not in the MultiIndex
  22. # despite continuing to be in the MultiIndex's levels
  23. idx = MultiIndex.from_product([[1, 2], [3, 4]])
  24. assert 2 in idx
  25. idx = idx.drop(2)
  26. # drop implementation keeps 2 in the levels
  27. assert 2 in idx.levels[0]
  28. # but it should no longer be in the index itself
  29. assert 2 not in idx
  30. # also applies to strings
  31. idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
  32. assert 'a' in idx
  33. idx = idx.drop('a')
  34. assert 'a' in idx.levels[0]
  35. assert 'a' not in idx
  36. @pytest.mark.parametrize("data, expected", [
  37. (MultiIndex.from_product([(), ()]), True),
  38. (MultiIndex.from_product([(1, 2), (3, 4)]), True),
  39. (MultiIndex.from_product([('a', 'b'), (1, 2)]), False),
  40. ])
  41. def test_multiindex_is_homogeneous_type(self, data, expected):
  42. assert data._is_homogeneous_type is expected
  43. def test_indexing_over_hashtable_size_cutoff(self):
  44. n = 10000
  45. old_cutoff = _index._SIZE_CUTOFF
  46. _index._SIZE_CUTOFF = 20000
  47. s = Series(np.arange(n),
  48. MultiIndex.from_arrays((["a"] * n, np.arange(n))))
  49. # hai it works!
  50. assert s[("a", 5)] == 5
  51. assert s[("a", 6)] == 6
  52. assert s[("a", 7)] == 7
  53. _index._SIZE_CUTOFF = old_cutoff
  54. def test_multi_nan_indexing(self):
  55. # GH 3588
  56. df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
  57. 'b': ["C1", "C2", "C3", "C4"],
  58. "c": [10, 15, np.nan, 20]})
  59. result = df.set_index(['a', 'b'], drop=False)
  60. expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
  61. 'b': ["C1", "C2", "C3", "C4"],
  62. "c": [10, 15, np.nan, 20]},
  63. index=[Index(['R1', 'R2', np.nan, 'R4'],
  64. name='a'),
  65. Index(['C1', 'C2', 'C3', 'C4'], name='b')])
  66. tm.assert_frame_equal(result, expected)