123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372 |
- # -*- coding: utf-8 -*-
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import MultiIndex, Series
- import pandas.util.testing as tm
- @pytest.mark.parametrize("case", [0.5, "xxx"])
- @pytest.mark.parametrize("sort", [None, False])
- @pytest.mark.parametrize("method", ["intersection", "union",
- "difference", "symmetric_difference"])
- def test_set_ops_error_cases(idx, case, sort, method):
- # non-iterable input
- msg = "Input must be Index or array-like"
- with pytest.raises(TypeError, match=msg):
- getattr(idx, method)(case, sort=sort)
- @pytest.mark.parametrize("sort", [None, False])
- def test_intersection_base(idx, sort):
- first = idx[:5]
- second = idx[:3]
- intersect = first.intersection(second, sort=sort)
- if sort is None:
- tm.assert_index_equal(intersect, second.sort_values())
- assert tm.equalContents(intersect, second)
- # GH 10149
- cases = [klass(second.values)
- for klass in [np.array, Series, list]]
- for case in cases:
- result = first.intersection(case, sort=sort)
- if sort is None:
- tm.assert_index_equal(result, second.sort_values())
- assert tm.equalContents(result, second)
- msg = "other must be a MultiIndex or a list of tuples"
- with pytest.raises(TypeError, match=msg):
- first.intersection([1, 2, 3], sort=sort)
- @pytest.mark.parametrize("sort", [None, False])
- def test_union_base(idx, sort):
- first = idx[3:]
- second = idx[:5]
- everything = idx
- union = first.union(second, sort=sort)
- if sort is None:
- tm.assert_index_equal(union, everything.sort_values())
- assert tm.equalContents(union, everything)
- # GH 10149
- cases = [klass(second.values)
- for klass in [np.array, Series, list]]
- for case in cases:
- result = first.union(case, sort=sort)
- if sort is None:
- tm.assert_index_equal(result, everything.sort_values())
- assert tm.equalContents(result, everything)
- msg = "other must be a MultiIndex or a list of tuples"
- with pytest.raises(TypeError, match=msg):
- first.union([1, 2, 3], sort=sort)
- @pytest.mark.parametrize("sort", [None, False])
- def test_difference_base(idx, sort):
- second = idx[4:]
- answer = idx[:4]
- result = idx.difference(second, sort=sort)
- if sort is None:
- answer = answer.sort_values()
- assert result.equals(answer)
- tm.assert_index_equal(result, answer)
- # GH 10149
- cases = [klass(second.values)
- for klass in [np.array, Series, list]]
- for case in cases:
- result = idx.difference(case, sort=sort)
- tm.assert_index_equal(result, answer)
- msg = "other must be a MultiIndex or a list of tuples"
- with pytest.raises(TypeError, match=msg):
- idx.difference([1, 2, 3], sort=sort)
- @pytest.mark.parametrize("sort", [None, False])
- def test_symmetric_difference(idx, sort):
- first = idx[1:]
- second = idx[:-1]
- answer = idx[[-1, 0]]
- result = first.symmetric_difference(second, sort=sort)
- if sort is None:
- answer = answer.sort_values()
- tm.assert_index_equal(result, answer)
- # GH 10149
- cases = [klass(second.values)
- for klass in [np.array, Series, list]]
- for case in cases:
- result = first.symmetric_difference(case, sort=sort)
- tm.assert_index_equal(result, answer)
- msg = "other must be a MultiIndex or a list of tuples"
- with pytest.raises(TypeError, match=msg):
- first.symmetric_difference([1, 2, 3], sort=sort)
- def test_empty(idx):
- # GH 15270
- assert not idx.empty
- assert idx[:0].empty
- @pytest.mark.parametrize("sort", [None, False])
- def test_difference(idx, sort):
- first = idx
- result = first.difference(idx[-3:], sort=sort)
- vals = idx[:-3].values
- if sort is None:
- vals = sorted(vals)
- expected = MultiIndex.from_tuples(vals,
- sortorder=0,
- names=idx.names)
- assert isinstance(result, MultiIndex)
- assert result.equals(expected)
- assert result.names == idx.names
- tm.assert_index_equal(result, expected)
- # empty difference: reflexive
- result = idx.difference(idx, sort=sort)
- expected = idx[:0]
- assert result.equals(expected)
- assert result.names == idx.names
- # empty difference: superset
- result = idx[-3:].difference(idx, sort=sort)
- expected = idx[:0]
- assert result.equals(expected)
- assert result.names == idx.names
- # empty difference: degenerate
- result = idx[:0].difference(idx, sort=sort)
- expected = idx[:0]
- assert result.equals(expected)
- assert result.names == idx.names
- # names not the same
- chunklet = idx[-3:]
- chunklet.names = ['foo', 'baz']
- result = first.difference(chunklet, sort=sort)
- assert result.names == (None, None)
- # empty, but non-equal
- result = idx.difference(idx.sortlevel(1)[0], sort=sort)
- assert len(result) == 0
- # raise Exception called with non-MultiIndex
- result = first.difference(first.values, sort=sort)
- assert result.equals(first[:0])
- # name from empty array
- result = first.difference([], sort=sort)
- assert first.equals(result)
- assert first.names == result.names
- # name from non-empty array
- result = first.difference([('foo', 'one')], sort=sort)
- expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), (
- 'foo', 'two'), ('qux', 'one'), ('qux', 'two')])
- expected.names = first.names
- assert first.names == result.names
- msg = "other must be a MultiIndex or a list of tuples"
- with pytest.raises(TypeError, match=msg):
- first.difference([1, 2, 3, 4, 5], sort=sort)
- def test_difference_sort_special():
- # GH-24959
- idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']])
- # sort=None, the default
- result = idx.difference([])
- tm.assert_index_equal(result, idx)
- @pytest.mark.xfail(reason="Not implemented.")
- def test_difference_sort_special_true():
- # TODO decide on True behaviour
- idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']])
- result = idx.difference([], sort=True)
- expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
- tm.assert_index_equal(result, expected)
- def test_difference_sort_incomparable():
- # GH-24959
- idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000'), 2],
- ['a', 'b']])
- other = pd.MultiIndex.from_product([[3, pd.Timestamp('2000'), 4],
- ['c', 'd']])
- # sort=None, the default
- # MultiIndex.difference deviates here from other difference
- # implementations in not catching the TypeError
- with pytest.raises(TypeError):
- result = idx.difference(other)
- # sort=False
- result = idx.difference(other, sort=False)
- tm.assert_index_equal(result, idx)
- @pytest.mark.xfail(reason="Not implemented.")
- def test_difference_sort_incomparable_true():
- # TODO decide on True behaviour
- # # sort=True, raises
- idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000'), 2],
- ['a', 'b']])
- other = pd.MultiIndex.from_product([[3, pd.Timestamp('2000'), 4],
- ['c', 'd']])
- with pytest.raises(TypeError):
- idx.difference(other, sort=True)
- @pytest.mark.parametrize("sort", [None, False])
- def test_union(idx, sort):
- piece1 = idx[:5][::-1]
- piece2 = idx[3:]
- the_union = piece1.union(piece2, sort=sort)
- if sort is None:
- tm.assert_index_equal(the_union, idx.sort_values())
- assert tm.equalContents(the_union, idx)
- # corner case, pass self or empty thing:
- the_union = idx.union(idx, sort=sort)
- assert the_union is idx
- the_union = idx.union(idx[:0], sort=sort)
- assert the_union is idx
- # won't work in python 3
- # tuples = _index.values
- # result = _index[:4] | tuples[4:]
- # assert result.equals(tuples)
- # not valid for python 3
- # def test_union_with_regular_index(self):
- # other = Index(['A', 'B', 'C'])
- # result = other.union(idx)
- # assert ('foo', 'one') in result
- # assert 'B' in result
- # result2 = _index.union(other)
- # assert result.equals(result2)
- @pytest.mark.parametrize("sort", [None, False])
- def test_intersection(idx, sort):
- piece1 = idx[:5][::-1]
- piece2 = idx[3:]
- the_int = piece1.intersection(piece2, sort=sort)
- if sort is None:
- tm.assert_index_equal(the_int, idx[3:5])
- assert tm.equalContents(the_int, idx[3:5])
- # corner case, pass self
- the_int = idx.intersection(idx, sort=sort)
- assert the_int is idx
- # empty intersection: disjoint
- empty = idx[:2].intersection(idx[2:], sort=sort)
- expected = idx[:0]
- assert empty.equals(expected)
- # can't do in python 3
- # tuples = _index.values
- # result = _index & tuples
- # assert result.equals(tuples)
- def test_intersect_equal_sort():
- # GH-24959
- idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']])
- tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
- tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
- @pytest.mark.xfail(reason="Not implemented.")
- def test_intersect_equal_sort_true():
- # TODO decide on True behaviour
- idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']])
- sorted_ = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
- tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
- @pytest.mark.parametrize('slice_', [slice(None), slice(0)])
- def test_union_sort_other_empty(slice_):
- # https://github.com/pandas-dev/pandas/issues/24959
- idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']])
- # default, sort=None
- other = idx[slice_]
- tm.assert_index_equal(idx.union(other), idx)
- # MultiIndex does not special case empty.union(idx)
- # tm.assert_index_equal(other.union(idx), idx)
- # sort=False
- tm.assert_index_equal(idx.union(other, sort=False), idx)
- @pytest.mark.xfail(reason="Not implemented.")
- def test_union_sort_other_empty_sort(slice_):
- # TODO decide on True behaviour
- # # sort=True
- idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']])
- other = idx[:0]
- result = idx.union(other, sort=True)
- expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
- tm.assert_index_equal(result, expected)
- def test_union_sort_other_incomparable():
- # https://github.com/pandas-dev/pandas/issues/24959
- idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']])
- # default, sort=None
- result = idx.union(idx[:1])
- tm.assert_index_equal(result, idx)
- # sort=False
- result = idx.union(idx[:1], sort=False)
- tm.assert_index_equal(result, idx)
- @pytest.mark.xfail(reason="Not implemented.")
- def test_union_sort_other_incomparable_sort():
- # TODO decide on True behaviour
- # # sort=True
- idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']])
- with pytest.raises(TypeError, match='Cannot compare'):
- idx.union(idx[:1], sort=True)
- @pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
- 'symmetric_difference'])
- def test_setops_disallow_true(method):
- idx1 = pd.MultiIndex.from_product([['a', 'b'], [1, 2]])
- idx2 = pd.MultiIndex.from_product([['b', 'c'], [1, 2]])
- with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
- getattr(idx1, method)(idx2, sort=True)
|