dayuan
/
manyi


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
							import numpy as np
import pytest

import pandas as pd
import pandas.util.testing as tm


@pytest.mark.parametrize('ordered', [True, False])
@pytest.mark.parametrize('categories', [
    ['b', 'a', 'c'],
    ['a', 'b', 'c', 'd'],
])
def test_factorize(categories, ordered):
    cat = pd.Categorical(['b', 'b', 'a', 'c', None],
                         categories=categories,
                         ordered=ordered)
    labels, uniques = pd.factorize(cat)
    expected_labels = np.array([0, 0, 1, 2, -1], dtype=np.intp)
    expected_uniques = pd.Categorical(['b', 'a', 'c'],
                                      categories=categories,
                                      ordered=ordered)

    tm.assert_numpy_array_equal(labels, expected_labels)
    tm.assert_categorical_equal(uniques, expected_uniques)


def test_factorized_sort():
    cat = pd.Categorical(['b', 'b', None, 'a'])
    labels, uniques = pd.factorize(cat, sort=True)
    expected_labels = np.array([1, 1, -1, 0], dtype=np.intp)
    expected_uniques = pd.Categorical(['a', 'b'])

    tm.assert_numpy_array_equal(labels, expected_labels)
    tm.assert_categorical_equal(uniques, expected_uniques)


def test_factorized_sort_ordered():
    cat = pd.Categorical(['b', 'b', None, 'a'],
                         categories=['c', 'b', 'a'],
                         ordered=True)

    labels, uniques = pd.factorize(cat, sort=True)
    expected_labels = np.array([0, 0, -1, 1], dtype=np.intp)
    expected_uniques = pd.Categorical(['b', 'a'],
                                      categories=['c', 'b', 'a'],
                                      ordered=True)

    tm.assert_numpy_array_equal(labels, expected_labels)
    tm.assert_categorical_equal(uniques, expected_uniques)


def test_isin_cats():
    # GH2003
    cat = pd.Categorical(["a", "b", np.nan])

    result = cat.isin(["a", np.nan])
    expected = np.array([True, False, True], dtype=bool)
    tm.assert_numpy_array_equal(expected, result)

    result = cat.isin(["a", "c"])
    expected = np.array([True, False, False], dtype=bool)
    tm.assert_numpy_array_equal(expected, result)


@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
def test_isin_empty(empty):
    s = pd.Categorical(["a", "b"])
    expected = np.array([False, False], dtype=bool)

    result = s.isin(empty)
    tm.assert_numpy_array_equal(expected, result)


class TestTake(object):
    # https://github.com/pandas-dev/pandas/issues/20664

    def test_take_warns(self):
        cat = pd.Categorical(['a', 'b'])
        with tm.assert_produces_warning(FutureWarning):
            cat.take([0, -1])

    def test_take_positive_no_warning(self):
        cat = pd.Categorical(['a', 'b'])
        with tm.assert_produces_warning(None):
            cat.take([0, 0])

    def test_take_bounds(self, allow_fill):
        # https://github.com/pandas-dev/pandas/issues/20664
        cat = pd.Categorical(['a', 'b', 'a'])
        with pytest.raises(IndexError):
            cat.take([4, 5], allow_fill=allow_fill)

    def test_take_empty(self, allow_fill):
        # https://github.com/pandas-dev/pandas/issues/20664
        cat = pd.Categorical([], categories=['a', 'b'])
        with pytest.raises(IndexError):
            cat.take([0], allow_fill=allow_fill)

    def test_positional_take(self, ordered):
        cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'],
                             ordered=ordered)
        result = cat.take([0, 1, 2], allow_fill=False)
        expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories,
                                  ordered=ordered)
        tm.assert_categorical_equal(result, expected)

    def test_positional_take_unobserved(self, ordered):
        cat = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'],
                             ordered=ordered)
        result = cat.take([1, 0], allow_fill=False)
        expected = pd.Categorical(['b', 'a'], categories=cat.categories,
                                  ordered=ordered)
        tm.assert_categorical_equal(result, expected)

    def test_take_allow_fill(self):
        # https://github.com/pandas-dev/pandas/issues/23296
        cat = pd.Categorical(['a', 'a', 'b'])
        result = cat.take([0, -1, -1], allow_fill=True)
        expected = pd.Categorical(['a', np.nan, np.nan],
                                  categories=['a', 'b'])
        tm.assert_categorical_equal(result, expected)

    def test_take_fill_with_negative_one(self):
        # -1 was a category
        cat = pd.Categorical([-1, 0, 1])
        result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1)
        expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1])
        tm.assert_categorical_equal(result, expected)

    def test_take_fill_value(self):
        # https://github.com/pandas-dev/pandas/issues/23296
        cat = pd.Categorical(['a', 'b', 'c'])
        result = cat.take([0, 1, -1], fill_value='a', allow_fill=True)
        expected = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c'])
        tm.assert_categorical_equal(result, expected)

    def test_take_fill_value_new_raises(self):
        # https://github.com/pandas-dev/pandas/issues/23296
        cat = pd.Categorical(['a', 'b', 'c'])
        xpr = r"'fill_value' \('d'\) is not in this Categorical's categories."
        with pytest.raises(TypeError, match=xpr):
            cat.take([0, 1, -1], fill_value='d', allow_fill=True)