test_algos.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas.util.testing as tm
  5. @pytest.mark.parametrize('ordered', [True, False])
  6. @pytest.mark.parametrize('categories', [
  7. ['b', 'a', 'c'],
  8. ['a', 'b', 'c', 'd'],
  9. ])
  10. def test_factorize(categories, ordered):
  11. cat = pd.Categorical(['b', 'b', 'a', 'c', None],
  12. categories=categories,
  13. ordered=ordered)
  14. labels, uniques = pd.factorize(cat)
  15. expected_labels = np.array([0, 0, 1, 2, -1], dtype=np.intp)
  16. expected_uniques = pd.Categorical(['b', 'a', 'c'],
  17. categories=categories,
  18. ordered=ordered)
  19. tm.assert_numpy_array_equal(labels, expected_labels)
  20. tm.assert_categorical_equal(uniques, expected_uniques)
  21. def test_factorized_sort():
  22. cat = pd.Categorical(['b', 'b', None, 'a'])
  23. labels, uniques = pd.factorize(cat, sort=True)
  24. expected_labels = np.array([1, 1, -1, 0], dtype=np.intp)
  25. expected_uniques = pd.Categorical(['a', 'b'])
  26. tm.assert_numpy_array_equal(labels, expected_labels)
  27. tm.assert_categorical_equal(uniques, expected_uniques)
  28. def test_factorized_sort_ordered():
  29. cat = pd.Categorical(['b', 'b', None, 'a'],
  30. categories=['c', 'b', 'a'],
  31. ordered=True)
  32. labels, uniques = pd.factorize(cat, sort=True)
  33. expected_labels = np.array([0, 0, -1, 1], dtype=np.intp)
  34. expected_uniques = pd.Categorical(['b', 'a'],
  35. categories=['c', 'b', 'a'],
  36. ordered=True)
  37. tm.assert_numpy_array_equal(labels, expected_labels)
  38. tm.assert_categorical_equal(uniques, expected_uniques)
  39. def test_isin_cats():
  40. # GH2003
  41. cat = pd.Categorical(["a", "b", np.nan])
  42. result = cat.isin(["a", np.nan])
  43. expected = np.array([True, False, True], dtype=bool)
  44. tm.assert_numpy_array_equal(expected, result)
  45. result = cat.isin(["a", "c"])
  46. expected = np.array([True, False, False], dtype=bool)
  47. tm.assert_numpy_array_equal(expected, result)
  48. @pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
  49. def test_isin_empty(empty):
  50. s = pd.Categorical(["a", "b"])
  51. expected = np.array([False, False], dtype=bool)
  52. result = s.isin(empty)
  53. tm.assert_numpy_array_equal(expected, result)
  54. class TestTake(object):
  55. # https://github.com/pandas-dev/pandas/issues/20664
  56. def test_take_warns(self):
  57. cat = pd.Categorical(['a', 'b'])
  58. with tm.assert_produces_warning(FutureWarning):
  59. cat.take([0, -1])
  60. def test_take_positive_no_warning(self):
  61. cat = pd.Categorical(['a', 'b'])
  62. with tm.assert_produces_warning(None):
  63. cat.take([0, 0])
  64. def test_take_bounds(self, allow_fill):
  65. # https://github.com/pandas-dev/pandas/issues/20664
  66. cat = pd.Categorical(['a', 'b', 'a'])
  67. with pytest.raises(IndexError):
  68. cat.take([4, 5], allow_fill=allow_fill)
  69. def test_take_empty(self, allow_fill):
  70. # https://github.com/pandas-dev/pandas/issues/20664
  71. cat = pd.Categorical([], categories=['a', 'b'])
  72. with pytest.raises(IndexError):
  73. cat.take([0], allow_fill=allow_fill)
  74. def test_positional_take(self, ordered):
  75. cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'],
  76. ordered=ordered)
  77. result = cat.take([0, 1, 2], allow_fill=False)
  78. expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories,
  79. ordered=ordered)
  80. tm.assert_categorical_equal(result, expected)
  81. def test_positional_take_unobserved(self, ordered):
  82. cat = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'],
  83. ordered=ordered)
  84. result = cat.take([1, 0], allow_fill=False)
  85. expected = pd.Categorical(['b', 'a'], categories=cat.categories,
  86. ordered=ordered)
  87. tm.assert_categorical_equal(result, expected)
  88. def test_take_allow_fill(self):
  89. # https://github.com/pandas-dev/pandas/issues/23296
  90. cat = pd.Categorical(['a', 'a', 'b'])
  91. result = cat.take([0, -1, -1], allow_fill=True)
  92. expected = pd.Categorical(['a', np.nan, np.nan],
  93. categories=['a', 'b'])
  94. tm.assert_categorical_equal(result, expected)
  95. def test_take_fill_with_negative_one(self):
  96. # -1 was a category
  97. cat = pd.Categorical([-1, 0, 1])
  98. result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1)
  99. expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1])
  100. tm.assert_categorical_equal(result, expected)
  101. def test_take_fill_value(self):
  102. # https://github.com/pandas-dev/pandas/issues/23296
  103. cat = pd.Categorical(['a', 'b', 'c'])
  104. result = cat.take([0, 1, -1], fill_value='a', allow_fill=True)
  105. expected = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c'])
  106. tm.assert_categorical_equal(result, expected)
  107. def test_take_fill_value_new_raises(self):
  108. # https://github.com/pandas-dev/pandas/issues/23296
  109. cat = pd.Categorical(['a', 'b', 'c'])
  110. xpr = r"'fill_value' \('d'\) is not in this Categorical's categories."
  111. with pytest.raises(TypeError, match=xpr):
  112. cat.take([0, 1, -1], fill_value='d', allow_fill=True)