123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293 |
- # -*- coding: utf-8 -*-
- import re
- import numpy as np
- import pytest
- from pandas.compat import lrange, range
- from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
- import pandas as pd
- from pandas import IntervalIndex, MultiIndex, RangeIndex
- import pandas.util.testing as tm
- def test_labels_dtypes():
- # GH 8456
- i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
- assert i.codes[0].dtype == 'int8'
- assert i.codes[1].dtype == 'int8'
- i = MultiIndex.from_product([['a'], range(40)])
- assert i.codes[1].dtype == 'int8'
- i = MultiIndex.from_product([['a'], range(400)])
- assert i.codes[1].dtype == 'int16'
- i = MultiIndex.from_product([['a'], range(40000)])
- assert i.codes[1].dtype == 'int32'
- i = pd.MultiIndex.from_product([['a'], range(1000)])
- assert (i.codes[0] >= 0).all()
- assert (i.codes[1] >= 0).all()
- def test_values_boxed():
- tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT),
- (3, pd.Timestamp('2000-01-03')),
- (1, pd.Timestamp('2000-01-04')),
- (2, pd.Timestamp('2000-01-02')),
- (3, pd.Timestamp('2000-01-03'))]
- result = pd.MultiIndex.from_tuples(tuples)
- expected = construct_1d_object_array_from_listlike(tuples)
- tm.assert_numpy_array_equal(result.values, expected)
- # Check that code branches for boxed values produce identical results
- tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
- def test_values_multiindex_datetimeindex():
- # Test to ensure we hit the boxing / nobox part of MI.values
- ints = np.arange(10 ** 18, 10 ** 18 + 5)
- naive = pd.DatetimeIndex(ints)
- # TODO(GH-24559): Remove the FutureWarning
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- aware = pd.DatetimeIndex(ints, tz='US/Central')
- idx = pd.MultiIndex.from_arrays([naive, aware])
- result = idx.values
- outer = pd.DatetimeIndex([x[0] for x in result])
- tm.assert_index_equal(outer, naive)
- inner = pd.DatetimeIndex([x[1] for x in result])
- tm.assert_index_equal(inner, aware)
- # n_lev > n_lab
- result = idx[:2].values
- outer = pd.DatetimeIndex([x[0] for x in result])
- tm.assert_index_equal(outer, naive[:2])
- inner = pd.DatetimeIndex([x[1] for x in result])
- tm.assert_index_equal(inner, aware[:2])
- def test_values_multiindex_periodindex():
- # Test to ensure we hit the boxing / nobox part of MI.values
- ints = np.arange(2007, 2012)
- pidx = pd.PeriodIndex(ints, freq='D')
- idx = pd.MultiIndex.from_arrays([ints, pidx])
- result = idx.values
- outer = pd.Int64Index([x[0] for x in result])
- tm.assert_index_equal(outer, pd.Int64Index(ints))
- inner = pd.PeriodIndex([x[1] for x in result])
- tm.assert_index_equal(inner, pidx)
- # n_lev > n_lab
- result = idx[:2].values
- outer = pd.Int64Index([x[0] for x in result])
- tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
- inner = pd.PeriodIndex([x[1] for x in result])
- tm.assert_index_equal(inner, pidx[:2])
- def test_consistency():
- # need to construct an overflow
- major_axis = lrange(70000)
- minor_axis = lrange(10)
- major_codes = np.arange(70000)
- minor_codes = np.repeat(lrange(10), 7000)
- # the fact that is works means it's consistent
- index = MultiIndex(levels=[major_axis, minor_axis],
- codes=[major_codes, minor_codes])
- # inconsistent
- major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
- minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
- index = MultiIndex(levels=[major_axis, minor_axis],
- codes=[major_codes, minor_codes])
- assert index.is_unique is False
- def test_hash_collisions():
- # non-smoke test that we don't get hash collisions
- index = MultiIndex.from_product([np.arange(1000), np.arange(1000)],
- names=['one', 'two'])
- result = index.get_indexer(index.values)
- tm.assert_numpy_array_equal(result, np.arange(
- len(index), dtype='intp'))
- for i in [0, 1, len(index) - 2, len(index) - 1]:
- result = index.get_loc(index[i])
- assert result == i
- def test_dims():
- pass
- def take_invalid_kwargs():
- vals = [['A', 'B'],
- [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
- idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
- indices = [1, 2]
- msg = r"take\(\) got an unexpected keyword argument 'foo'"
- with pytest.raises(TypeError, match=msg):
- idx.take(indices, foo=2)
- msg = "the 'out' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- idx.take(indices, out=indices)
- msg = "the 'mode' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- idx.take(indices, mode='clip')
- def test_isna_behavior(idx):
- # should not segfault GH5123
- # NOTE: if MI representation changes, may make sense to allow
- # isna(MI)
- with pytest.raises(NotImplementedError):
- pd.isna(idx)
- def test_large_multiindex_error():
- # GH12527
- df_below_1000000 = pd.DataFrame(
- 1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]),
- columns=['dest'])
- with pytest.raises(KeyError):
- df_below_1000000.loc[(-1, 0), 'dest']
- with pytest.raises(KeyError):
- df_below_1000000.loc[(3, 0), 'dest']
- df_above_1000000 = pd.DataFrame(
- 1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]),
- columns=['dest'])
- with pytest.raises(KeyError):
- df_above_1000000.loc[(-1, 0), 'dest']
- with pytest.raises(KeyError):
- df_above_1000000.loc[(3, 0), 'dest']
- def test_million_record_attribute_error():
- # GH 18165
- r = list(range(1000000))
- df = pd.DataFrame({'a': r, 'b': r},
- index=pd.MultiIndex.from_tuples([(x, x) for x in r]))
- msg = "'Series' object has no attribute 'foo'"
- with pytest.raises(AttributeError, match=msg):
- df['a'].foo()
- def test_can_hold_identifiers(idx):
- key = idx[0]
- assert idx._can_hold_identifiers_and_holds_name(key) is True
- def test_metadata_immutable(idx):
- levels, codes = idx.levels, idx.codes
- # shouldn't be able to set at either the top level or base level
- mutable_regex = re.compile('does not support mutable operations')
- with pytest.raises(TypeError, match=mutable_regex):
- levels[0] = levels[0]
- with pytest.raises(TypeError, match=mutable_regex):
- levels[0][0] = levels[0][0]
- # ditto for labels
- with pytest.raises(TypeError, match=mutable_regex):
- codes[0] = codes[0]
- with pytest.raises(TypeError, match=mutable_regex):
- codes[0][0] = codes[0][0]
- # and for names
- names = idx.names
- with pytest.raises(TypeError, match=mutable_regex):
- names[0] = names[0]
- def test_level_setting_resets_attributes():
- ind = pd.MultiIndex.from_arrays([
- ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
- ])
- assert ind.is_monotonic
- ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True)
- # if this fails, probably didn't reset the cache correctly.
- assert not ind.is_monotonic
- def test_rangeindex_fallback_coercion_bug():
- # GH 12893
- foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
- bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
- df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1)
- df.index.names = ['fizz', 'buzz']
- str(df)
- expected = pd.DataFrame({'bar': np.arange(100),
- 'foo': np.arange(100)},
- index=pd.MultiIndex.from_product(
- [range(10), range(10)],
- names=['fizz', 'buzz']))
- tm.assert_frame_equal(df, expected, check_like=True)
- result = df.index.get_level_values('fizz')
- expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10)
- tm.assert_index_equal(result, expected)
- result = df.index.get_level_values('buzz')
- expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz')
- tm.assert_index_equal(result, expected)
- def test_hash_error(indices):
- index = indices
- with pytest.raises(TypeError, match=("unhashable type: %r" %
- type(index).__name__)):
- hash(indices)
- def test_mutability(indices):
- if not len(indices):
- return
- pytest.raises(TypeError, indices.__setitem__, 0, indices[0])
- def test_wrong_number_names(indices):
- with pytest.raises(ValueError, match="^Length"):
- indices.names = ["apple", "banana", "carrot"]
- def test_memory_usage(idx):
- result = idx.memory_usage()
- if len(idx):
- idx.get_loc(idx[0])
- result2 = idx.memory_usage()
- result3 = idx.memory_usage(deep=True)
- # RangeIndex, IntervalIndex
- # don't have engines
- if not isinstance(idx, (RangeIndex, IntervalIndex)):
- assert result2 > result
- if idx.inferred_type == 'object':
- assert result3 > result2
- else:
- # we report 0 for no-length
- assert result == 0
- def test_nlevels(idx):
- assert idx.nlevels == 2
|