123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- # coding=utf-8
- import numpy as np
- import pytest
- from pandas import Categorical, Series
- import pandas.util.testing as tm
- def test_value_counts_nunique():
- # basics.rst doc example
- series = Series(np.random.randn(500))
- series[20:500] = np.nan
- series[10:20] = 5000
- result = series.nunique()
- assert result == 11
- # GH 18051
- s = Series(Categorical([]))
- assert s.nunique() == 0
- s = Series(Categorical([np.nan]))
- assert s.nunique() == 0
- def test_unique():
- # GH714 also, dtype=float
- s = Series([1.2345] * 100)
- s[::2] = np.nan
- result = s.unique()
- assert len(result) == 2
- s = Series([1.2345] * 100, dtype='f4')
- s[::2] = np.nan
- result = s.unique()
- assert len(result) == 2
- # NAs in object arrays #714
- s = Series(['foo'] * 100, dtype='O')
- s[::2] = np.nan
- result = s.unique()
- assert len(result) == 2
- # decision about None
- s = Series([1, 2, 3, None, None, None], dtype=object)
- result = s.unique()
- expected = np.array([1, 2, 3, None], dtype=object)
- tm.assert_numpy_array_equal(result, expected)
- # GH 18051
- s = Series(Categorical([]))
- tm.assert_categorical_equal(s.unique(), Categorical([]), check_dtype=False)
- s = Series(Categorical([np.nan]))
- tm.assert_categorical_equal(s.unique(), Categorical([np.nan]),
- check_dtype=False)
- def test_unique_data_ownership():
- # it works! #1807
- Series(Series(["a", "c", "b"]).unique()).sort_values()
- @pytest.mark.parametrize('data, expected', [
- (np.random.randint(0, 10, size=1000), False),
- (np.arange(1000), True),
- ([], True),
- ([np.nan], True),
- (['foo', 'bar', np.nan], True),
- (['foo', 'foo', np.nan], False),
- (['foo', 'bar', np.nan, np.nan], False)])
- def test_is_unique(data, expected):
- # GH11946 / GH25180
- s = Series(data)
- assert s.is_unique is expected
- def test_is_unique_class_ne(capsys):
- # GH 20661
- class Foo(object):
- def __init__(self, val):
- self._value = val
- def __ne__(self, other):
- raise Exception("NEQ not supported")
- with capsys.disabled():
- li = [Foo(i) for i in range(5)]
- s = Series(li, index=[i for i in range(5)])
- s.is_unique
- captured = capsys.readouterr()
- assert len(captured.err) == 0
- @pytest.mark.parametrize(
- 'keep, expected',
- [
- ('first', Series([False, False, False, False, True, True, False])),
- ('last', Series([False, True, True, False, False, False, False])),
- (False, Series([False, True, True, False, True, True, False]))
- ])
- def test_drop_duplicates(any_numpy_dtype, keep, expected):
- tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype))
- if tc.dtype == 'bool':
- pytest.skip('tested separately in test_drop_duplicates_bool')
- tm.assert_series_equal(tc.duplicated(keep=keep), expected)
- tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
- sc = tc.copy()
- sc.drop_duplicates(keep=keep, inplace=True)
- tm.assert_series_equal(sc, tc[~expected])
- @pytest.mark.parametrize('keep, expected',
- [('first', Series([False, False, True, True])),
- ('last', Series([True, True, False, False])),
- (False, Series([True, True, True, True]))])
- def test_drop_duplicates_bool(keep, expected):
- tc = Series([True, False, True, False])
- tm.assert_series_equal(tc.duplicated(keep=keep), expected)
- tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
- sc = tc.copy()
- sc.drop_duplicates(keep=keep, inplace=True)
- tm.assert_series_equal(sc, tc[~expected])
- @pytest.mark.parametrize('keep, expected', [
- ('first', Series([False, False, True, False, True], name='name')),
- ('last', Series([True, True, False, False, False], name='name')),
- (False, Series([True, True, True, False, True], name='name'))
- ])
- def test_duplicated_keep(keep, expected):
- s = Series(['a', 'b', 'b', 'c', 'a'], name='name')
- result = s.duplicated(keep=keep)
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize('keep, expected', [
- ('first', Series([False, False, True, False, True])),
- ('last', Series([True, True, False, False, False])),
- (False, Series([True, True, True, False, True]))
- ])
- def test_duplicated_nan_none(keep, expected):
- s = Series([np.nan, 3, 3, None, np.nan], dtype=object)
- result = s.duplicated(keep=keep)
- tm.assert_series_equal(result, expected)
|