123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- # -*- coding: utf-8 -*-
- """
- Tests column conversion functionality during parsing
- for all of the parsers defined in parsers.py
- """
- import numpy as np
- import pytest
- from pandas.compat import StringIO, lmap, parse_date
- import pandas as pd
- from pandas import DataFrame, Index
- import pandas.util.testing as tm
- def test_converters_type_must_be_dict(all_parsers):
- parser = all_parsers
- data = """index,A,B,C,D
- foo,2,3,4,5
- """
- with pytest.raises(TypeError, match="Type converters.+"):
- parser.read_csv(StringIO(data), converters=0)
- @pytest.mark.parametrize("column", [3, "D"])
- @pytest.mark.parametrize("converter", [
- parse_date,
- lambda x: int(x.split("/")[2]) # Produce integer.
- ])
- def test_converters(all_parsers, column, converter):
- parser = all_parsers
- data = """A,B,C,D
- a,1,2,01/01/2009
- b,3,4,01/02/2009
- c,4,5,01/03/2009
- """
- result = parser.read_csv(StringIO(data), converters={column: converter})
- expected = parser.read_csv(StringIO(data))
- expected["D"] = expected["D"].map(converter)
- tm.assert_frame_equal(result, expected)
- def test_converters_no_implicit_conv(all_parsers):
- # see gh-2184
- parser = all_parsers
- data = """000102,1.2,A\n001245,2,B"""
- converters = {0: lambda x: x.strip()}
- result = parser.read_csv(StringIO(data), header=None,
- converters=converters)
- # Column 0 should not be casted to numeric and should remain as object.
- expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]])
- tm.assert_frame_equal(result, expected)
- def test_converters_euro_decimal_format(all_parsers):
- # see gh-583
- converters = dict()
- parser = all_parsers
- data = """Id;Number1;Number2;Text1;Text2;Number3
- 1;1521,1541;187101,9543;ABC;poi;4,7387
- 2;121,12;14897,76;DEF;uyt;0,3773
- 3;878,158;108013,434;GHI;rez;2,7356"""
- converters["Number1"] = converters["Number2"] =\
- converters["Number3"] = lambda x: float(x.replace(",", "."))
- result = parser.read_csv(StringIO(data), sep=";", converters=converters)
- expected = DataFrame([[1, 1521.1541, 187101.9543, "ABC", "poi", 4.7387],
- [2, 121.12, 14897.76, "DEF", "uyt", 0.3773],
- [3, 878.158, 108013.434, "GHI", "rez", 2.7356]],
- columns=["Id", "Number1", "Number2",
- "Text1", "Text2", "Number3"])
- tm.assert_frame_equal(result, expected)
- def test_converters_corner_with_nans(all_parsers):
- parser = all_parsers
- data = """id,score,days
- 1,2,12
- 2,2-5,
- 3,,14+
- 4,6-12,2"""
- # Example converters.
- def convert_days(x):
- x = x.strip()
- if not x:
- return np.nan
- is_plus = x.endswith("+")
- if is_plus:
- x = int(x[:-1]) + 1
- else:
- x = int(x)
- return x
- def convert_days_sentinel(x):
- x = x.strip()
- if not x:
- return np.nan
- is_plus = x.endswith("+")
- if is_plus:
- x = int(x[:-1]) + 1
- else:
- x = int(x)
- return x
- def convert_score(x):
- x = x.strip()
- if not x:
- return np.nan
- if x.find("-") > 0:
- val_min, val_max = lmap(int, x.split("-"))
- val = 0.5 * (val_min + val_max)
- else:
- val = float(x)
- return val
- results = []
- for day_converter in [convert_days, convert_days_sentinel]:
- result = parser.read_csv(StringIO(data),
- converters={"score": convert_score,
- "days": day_converter},
- na_values=["", None])
- assert pd.isna(result["days"][1])
- results.append(result)
- tm.assert_frame_equal(results[0], results[1])
- def test_converter_index_col_bug(all_parsers):
- # see gh-1835
- parser = all_parsers
- data = "A;B\n1;2\n3;4"
- rs = parser.read_csv(StringIO(data), sep=";", index_col="A",
- converters={"A": lambda x: x})
- xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A"))
- tm.assert_frame_equal(rs, xp)
|