test_missing.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # -*- coding: utf-8 -*-
  2. import collections
  3. import numpy as np
  4. import pytest
  5. from pandas.compat import lrange
  6. from pandas.core.dtypes.dtypes import CategoricalDtype
  7. from pandas import Categorical, Index, isna
  8. import pandas.util.testing as tm
  9. class TestCategoricalMissing(object):
  10. def test_na_flags_int_categories(self):
  11. # #1457
  12. categories = lrange(10)
  13. labels = np.random.randint(0, 10, 20)
  14. labels[::5] = -1
  15. cat = Categorical(labels, categories, fastpath=True)
  16. repr(cat)
  17. tm.assert_numpy_array_equal(isna(cat), labels == -1)
  18. def test_nan_handling(self):
  19. # Nans are represented as -1 in codes
  20. c = Categorical(["a", "b", np.nan, "a"])
  21. tm.assert_index_equal(c.categories, Index(["a", "b"]))
  22. tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
  23. dtype=np.int8))
  24. c[1] = np.nan
  25. tm.assert_index_equal(c.categories, Index(["a", "b"]))
  26. tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0],
  27. dtype=np.int8))
  28. # Adding nan to categories should make assigned nan point to the
  29. # category!
  30. c = Categorical(["a", "b", np.nan, "a"])
  31. tm.assert_index_equal(c.categories, Index(["a", "b"]))
  32. tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
  33. dtype=np.int8))
  34. def test_set_dtype_nans(self):
  35. c = Categorical(['a', 'b', np.nan])
  36. result = c._set_dtype(CategoricalDtype(['a', 'c']))
  37. tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1],
  38. dtype='int8'))
  39. def test_set_item_nan(self):
  40. cat = Categorical([1, 2, 3])
  41. cat[1] = np.nan
  42. exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
  43. tm.assert_categorical_equal(cat, exp)
  44. @pytest.mark.parametrize('fillna_kwargs, msg', [
  45. (dict(value=1, method='ffill'),
  46. "Cannot specify both 'value' and 'method'."),
  47. (dict(),
  48. "Must specify a fill 'value' or 'method'."),
  49. (dict(method='bad'),
  50. "Invalid fill method. Expecting .* bad"),
  51. ])
  52. def test_fillna_raises(self, fillna_kwargs, msg):
  53. # https://github.com/pandas-dev/pandas/issues/19682
  54. cat = Categorical([1, 2, 3])
  55. with pytest.raises(ValueError, match=msg):
  56. cat.fillna(**fillna_kwargs)
  57. @pytest.mark.parametrize("named", [True, False])
  58. def test_fillna_iterable_category(self, named):
  59. # https://github.com/pandas-dev/pandas/issues/21097
  60. if named:
  61. Point = collections.namedtuple("Point", "x y")
  62. else:
  63. Point = lambda *args: args # tuple
  64. cat = Categorical([Point(0, 0), Point(0, 1), None])
  65. result = cat.fillna(Point(0, 0))
  66. expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])
  67. tm.assert_categorical_equal(result, expected)