missing.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas.util.testing as tm
  5. from .base import BaseExtensionTests
  6. class BaseMissingTests(BaseExtensionTests):
  7. def test_isna(self, data_missing):
  8. expected = np.array([True, False])
  9. result = pd.isna(data_missing)
  10. tm.assert_numpy_array_equal(result, expected)
  11. result = pd.Series(data_missing).isna()
  12. expected = pd.Series(expected)
  13. self.assert_series_equal(result, expected)
  14. # GH 21189
  15. result = pd.Series(data_missing).drop([0, 1]).isna()
  16. expected = pd.Series([], dtype=bool)
  17. self.assert_series_equal(result, expected)
  18. def test_dropna_array(self, data_missing):
  19. result = data_missing.dropna()
  20. expected = data_missing[[1]]
  21. self.assert_extension_array_equal(result, expected)
  22. def test_dropna_series(self, data_missing):
  23. ser = pd.Series(data_missing)
  24. result = ser.dropna()
  25. expected = ser.iloc[[1]]
  26. self.assert_series_equal(result, expected)
  27. def test_dropna_frame(self, data_missing):
  28. df = pd.DataFrame({"A": data_missing})
  29. # defaults
  30. result = df.dropna()
  31. expected = df.iloc[[1]]
  32. self.assert_frame_equal(result, expected)
  33. # axis = 1
  34. result = df.dropna(axis='columns')
  35. expected = pd.DataFrame(index=[0, 1])
  36. self.assert_frame_equal(result, expected)
  37. # multiple
  38. df = pd.DataFrame({"A": data_missing,
  39. "B": [1, np.nan]})
  40. result = df.dropna()
  41. expected = df.iloc[:0]
  42. self.assert_frame_equal(result, expected)
  43. def test_fillna_scalar(self, data_missing):
  44. valid = data_missing[1]
  45. result = data_missing.fillna(valid)
  46. expected = data_missing.fillna(valid)
  47. self.assert_extension_array_equal(result, expected)
  48. def test_fillna_limit_pad(self, data_missing):
  49. arr = data_missing.take([1, 0, 0, 0, 1])
  50. result = pd.Series(arr).fillna(method='ffill', limit=2)
  51. expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
  52. self.assert_series_equal(result, expected)
  53. def test_fillna_limit_backfill(self, data_missing):
  54. arr = data_missing.take([1, 0, 0, 0, 1])
  55. result = pd.Series(arr).fillna(method='backfill', limit=2)
  56. expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
  57. self.assert_series_equal(result, expected)
  58. def test_fillna_series(self, data_missing):
  59. fill_value = data_missing[1]
  60. ser = pd.Series(data_missing)
  61. result = ser.fillna(fill_value)
  62. expected = pd.Series(data_missing._from_sequence(
  63. [fill_value, fill_value], dtype=data_missing.dtype))
  64. self.assert_series_equal(result, expected)
  65. # Fill with a series
  66. result = ser.fillna(expected)
  67. self.assert_series_equal(result, expected)
  68. # Fill with a series not affecting the missing values
  69. result = ser.fillna(ser)
  70. self.assert_series_equal(result, ser)
  71. @pytest.mark.parametrize('method', ['ffill', 'bfill'])
  72. def test_fillna_series_method(self, data_missing, method):
  73. fill_value = data_missing[1]
  74. if method == 'ffill':
  75. data_missing = data_missing[::-1]
  76. result = pd.Series(data_missing).fillna(method=method)
  77. expected = pd.Series(data_missing._from_sequence(
  78. [fill_value, fill_value], dtype=data_missing.dtype))
  79. self.assert_series_equal(result, expected)
  80. def test_fillna_frame(self, data_missing):
  81. fill_value = data_missing[1]
  82. result = pd.DataFrame({
  83. "A": data_missing,
  84. "B": [1, 2]
  85. }).fillna(fill_value)
  86. expected = pd.DataFrame({
  87. "A": data_missing._from_sequence([fill_value, fill_value],
  88. dtype=data_missing.dtype),
  89. "B": [1, 2],
  90. })
  91. self.assert_frame_equal(result, expected)
  92. def test_fillna_fill_other(self, data):
  93. result = pd.DataFrame({
  94. "A": data,
  95. "B": [np.nan] * len(data)
  96. }).fillna({"B": 0.0})
  97. expected = pd.DataFrame({
  98. "A": data,
  99. "B": [0.0] * len(result),
  100. })
  101. self.assert_frame_equal(result, expected)