test_converters.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # -*- coding: utf-8 -*-
  2. """
  3. Tests column conversion functionality during parsing
  4. for all of the parsers defined in parsers.py
  5. """
  6. import numpy as np
  7. import pytest
  8. from pandas.compat import StringIO, lmap, parse_date
  9. import pandas as pd
  10. from pandas import DataFrame, Index
  11. import pandas.util.testing as tm
  12. def test_converters_type_must_be_dict(all_parsers):
  13. parser = all_parsers
  14. data = """index,A,B,C,D
  15. foo,2,3,4,5
  16. """
  17. with pytest.raises(TypeError, match="Type converters.+"):
  18. parser.read_csv(StringIO(data), converters=0)
  19. @pytest.mark.parametrize("column", [3, "D"])
  20. @pytest.mark.parametrize("converter", [
  21. parse_date,
  22. lambda x: int(x.split("/")[2]) # Produce integer.
  23. ])
  24. def test_converters(all_parsers, column, converter):
  25. parser = all_parsers
  26. data = """A,B,C,D
  27. a,1,2,01/01/2009
  28. b,3,4,01/02/2009
  29. c,4,5,01/03/2009
  30. """
  31. result = parser.read_csv(StringIO(data), converters={column: converter})
  32. expected = parser.read_csv(StringIO(data))
  33. expected["D"] = expected["D"].map(converter)
  34. tm.assert_frame_equal(result, expected)
  35. def test_converters_no_implicit_conv(all_parsers):
  36. # see gh-2184
  37. parser = all_parsers
  38. data = """000102,1.2,A\n001245,2,B"""
  39. converters = {0: lambda x: x.strip()}
  40. result = parser.read_csv(StringIO(data), header=None,
  41. converters=converters)
  42. # Column 0 should not be casted to numeric and should remain as object.
  43. expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]])
  44. tm.assert_frame_equal(result, expected)
  45. def test_converters_euro_decimal_format(all_parsers):
  46. # see gh-583
  47. converters = dict()
  48. parser = all_parsers
  49. data = """Id;Number1;Number2;Text1;Text2;Number3
  50. 1;1521,1541;187101,9543;ABC;poi;4,7387
  51. 2;121,12;14897,76;DEF;uyt;0,3773
  52. 3;878,158;108013,434;GHI;rez;2,7356"""
  53. converters["Number1"] = converters["Number2"] =\
  54. converters["Number3"] = lambda x: float(x.replace(",", "."))
  55. result = parser.read_csv(StringIO(data), sep=";", converters=converters)
  56. expected = DataFrame([[1, 1521.1541, 187101.9543, "ABC", "poi", 4.7387],
  57. [2, 121.12, 14897.76, "DEF", "uyt", 0.3773],
  58. [3, 878.158, 108013.434, "GHI", "rez", 2.7356]],
  59. columns=["Id", "Number1", "Number2",
  60. "Text1", "Text2", "Number3"])
  61. tm.assert_frame_equal(result, expected)
  62. def test_converters_corner_with_nans(all_parsers):
  63. parser = all_parsers
  64. data = """id,score,days
  65. 1,2,12
  66. 2,2-5,
  67. 3,,14+
  68. 4,6-12,2"""
  69. # Example converters.
  70. def convert_days(x):
  71. x = x.strip()
  72. if not x:
  73. return np.nan
  74. is_plus = x.endswith("+")
  75. if is_plus:
  76. x = int(x[:-1]) + 1
  77. else:
  78. x = int(x)
  79. return x
  80. def convert_days_sentinel(x):
  81. x = x.strip()
  82. if not x:
  83. return np.nan
  84. is_plus = x.endswith("+")
  85. if is_plus:
  86. x = int(x[:-1]) + 1
  87. else:
  88. x = int(x)
  89. return x
  90. def convert_score(x):
  91. x = x.strip()
  92. if not x:
  93. return np.nan
  94. if x.find("-") > 0:
  95. val_min, val_max = lmap(int, x.split("-"))
  96. val = 0.5 * (val_min + val_max)
  97. else:
  98. val = float(x)
  99. return val
  100. results = []
  101. for day_converter in [convert_days, convert_days_sentinel]:
  102. result = parser.read_csv(StringIO(data),
  103. converters={"score": convert_score,
  104. "days": day_converter},
  105. na_values=["", None])
  106. assert pd.isna(result["days"][1])
  107. results.append(result)
  108. tm.assert_frame_equal(results[0], results[1])
  109. def test_converter_index_col_bug(all_parsers):
  110. # see gh-1835
  111. parser = all_parsers
  112. data = "A;B\n1;2\n3;4"
  113. rs = parser.read_csv(StringIO(data), sep=";", index_col="A",
  114. converters={"A": lambda x: x})
  115. xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A"))
  116. tm.assert_frame_equal(rs, xp)