test_missing.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. import pytest
  4. from pandas._libs.tslib import iNaT
  5. import pandas as pd
  6. from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index
  7. from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
  8. import pandas.util.testing as tm
  9. def test_fillna(idx):
  10. # GH 11343
  11. # TODO: Remove or Refactor. Not Implemented for MultiIndex
  12. for name, index in [('idx', idx), ]:
  13. if len(index) == 0:
  14. pass
  15. elif isinstance(index, MultiIndex):
  16. idx = index.copy()
  17. msg = "isna is not defined for MultiIndex"
  18. with pytest.raises(NotImplementedError, match=msg):
  19. idx.fillna(idx[0])
  20. else:
  21. idx = index.copy()
  22. result = idx.fillna(idx[0])
  23. tm.assert_index_equal(result, idx)
  24. assert result is not idx
  25. msg = "'value' must be a scalar, passed: "
  26. with pytest.raises(TypeError, match=msg):
  27. idx.fillna([idx[0]])
  28. idx = index.copy()
  29. values = idx.values
  30. if isinstance(index, DatetimeIndexOpsMixin):
  31. values[1] = iNaT
  32. elif isinstance(index, (Int64Index, UInt64Index)):
  33. continue
  34. else:
  35. values[1] = np.nan
  36. if isinstance(index, PeriodIndex):
  37. idx = index.__class__(values, freq=index.freq)
  38. else:
  39. idx = index.__class__(values)
  40. expected = np.array([False] * len(idx), dtype=bool)
  41. expected[1] = True
  42. tm.assert_numpy_array_equal(idx._isnan, expected)
  43. assert idx.hasnans is True
  44. def test_dropna():
  45. # GH 6194
  46. idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5],
  47. [1, 2, np.nan, np.nan, 5],
  48. ['a', 'b', 'c', np.nan, 'e']])
  49. exp = pd.MultiIndex.from_arrays([[1, 5],
  50. [1, 5],
  51. ['a', 'e']])
  52. tm.assert_index_equal(idx.dropna(), exp)
  53. tm.assert_index_equal(idx.dropna(how='any'), exp)
  54. exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5],
  55. [1, 2, np.nan, 5],
  56. ['a', 'b', 'c', 'e']])
  57. tm.assert_index_equal(idx.dropna(how='all'), exp)
  58. msg = "invalid how option: xxx"
  59. with pytest.raises(ValueError, match=msg):
  60. idx.dropna(how='xxx')
  61. def test_nulls(idx):
  62. # this is really a smoke test for the methods
  63. # as these are adequately tested for function elsewhere
  64. msg = "isna is not defined for MultiIndex"
  65. with pytest.raises(NotImplementedError, match=msg):
  66. idx.isna()
  67. @pytest.mark.xfail
  68. def test_hasnans_isnans(idx):
  69. # GH 11343, added tests for hasnans / isnans
  70. index = idx.copy()
  71. # cases in indices doesn't include NaN
  72. expected = np.array([False] * len(index), dtype=bool)
  73. tm.assert_numpy_array_equal(index._isnan, expected)
  74. assert index.hasnans is False
  75. index = idx.copy()
  76. values = index.values
  77. values[1] = np.nan
  78. index = idx.__class__(values)
  79. expected = np.array([False] * len(index), dtype=bool)
  80. expected[1] = True
  81. tm.assert_numpy_array_equal(index._isnan, expected)
  82. assert index.hasnans is True
  83. def test_nan_stays_float():
  84. # GH 7031
  85. idx0 = pd.MultiIndex(levels=[["A", "B"], []],
  86. codes=[[1, 0], [-1, -1]],
  87. names=[0, 1])
  88. idx1 = pd.MultiIndex(levels=[["C"], ["D"]],
  89. codes=[[0], [0]],
  90. names=[0, 1])
  91. idxm = idx0.join(idx1, how='outer')
  92. assert pd.isna(idx0.get_level_values(1)).all()
  93. # the following failed in 0.14.1
  94. assert pd.isna(idxm.get_level_values(1)[:-1]).all()
  95. df0 = pd.DataFrame([[1, 2]], index=idx0)
  96. df1 = pd.DataFrame([[3, 4]], index=idx1)
  97. dfm = df0 - df1
  98. assert pd.isna(df0.index.get_level_values(1)).all()
  99. # the following failed in 0.14.1
  100. assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()