123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- # -*- coding: utf-8 -*-
- """
- Tests that features that are currently unsupported in
- either the Python or C parser are actually enforced
- and are clearly communicated to the user.
- Ultimately, the goal is to remove test cases from this
- test suite as new feature support is added to the parsers.
- """
- import pytest
- from pandas.compat import StringIO
- from pandas.errors import ParserError
- import pandas.util.testing as tm
- import pandas.io.parsers as parsers
- from pandas.io.parsers import read_csv
- @pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val)
- def python_engine(request):
- return request.param
- class TestUnsupportedFeatures(object):
- def test_mangle_dupe_cols_false(self):
- # see gh-12935
- data = 'a b c\n1 2 3'
- msg = 'is not supported'
- for engine in ('c', 'python'):
- with pytest.raises(ValueError, match=msg):
- read_csv(StringIO(data), engine=engine,
- mangle_dupe_cols=False)
- def test_c_engine(self):
- # see gh-6607
- data = 'a b c\n1 2 3'
- msg = 'does not support'
- # specify C engine with unsupported options (raise)
- with pytest.raises(ValueError, match=msg):
- read_csv(StringIO(data), engine='c',
- sep=None, delim_whitespace=False)
- with pytest.raises(ValueError, match=msg):
- read_csv(StringIO(data), engine='c', sep=r'\s')
- with pytest.raises(ValueError, match=msg):
- read_csv(StringIO(data), engine='c', sep='\t', quotechar=chr(128))
- with pytest.raises(ValueError, match=msg):
- read_csv(StringIO(data), engine='c', skipfooter=1)
- # specify C-unsupported options without python-unsupported options
- with tm.assert_produces_warning(parsers.ParserWarning):
- read_csv(StringIO(data), sep=None, delim_whitespace=False)
- with tm.assert_produces_warning(parsers.ParserWarning):
- read_csv(StringIO(data), sep=r'\s')
- with tm.assert_produces_warning(parsers.ParserWarning):
- read_csv(StringIO(data), sep='\t', quotechar=chr(128))
- with tm.assert_produces_warning(parsers.ParserWarning):
- read_csv(StringIO(data), skipfooter=1)
- text = """ A B C D E
- one two three four
- a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640
- a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744
- x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838"""
- msg = 'Error tokenizing data'
- with pytest.raises(ParserError, match=msg):
- read_csv(StringIO(text), sep='\\s+')
- with pytest.raises(ParserError, match=msg):
- read_csv(StringIO(text), engine='c', sep='\\s+')
- msg = "Only length-1 thousands markers supported"
- data = """A|B|C
- 1|2,334|5
- 10|13|10.
- """
- with pytest.raises(ValueError, match=msg):
- read_csv(StringIO(data), thousands=',,')
- with pytest.raises(ValueError, match=msg):
- read_csv(StringIO(data), thousands='')
- msg = "Only length-1 line terminators supported"
- data = 'a,b,c~~1,2,3~~4,5,6'
- with pytest.raises(ValueError, match=msg):
- read_csv(StringIO(data), lineterminator='~~')
- def test_python_engine(self, python_engine):
- from pandas.io.parsers import _python_unsupported as py_unsupported
- data = """1,2,3,,
- 1,2,3,4,
- 1,2,3,4,5
- 1,2,,,
- 1,2,3,4,"""
- for default in py_unsupported:
- msg = ('The %r option is not supported '
- 'with the %r engine' % (default, python_engine))
- kwargs = {default: object()}
- with pytest.raises(ValueError, match=msg):
- read_csv(StringIO(data), engine=python_engine, **kwargs)
- def test_python_engine_file_no_next(self, python_engine):
- # see gh-16530
- class NoNextBuffer(object):
- def __init__(self, csv_data):
- self.data = csv_data
- def __iter__(self):
- return self
- def read(self):
- return self.data
- data = "a\n1"
- msg = "The 'python' engine cannot iterate"
- with pytest.raises(ValueError, match=msg):
- read_csv(NoNextBuffer(data), engine=python_engine)
- class TestDeprecatedFeatures(object):
- @pytest.mark.parametrize("engine", ["c", "python"])
- @pytest.mark.parametrize("kwargs", [{"tupleize_cols": True},
- {"tupleize_cols": False}])
- def test_deprecated_args(self, engine, kwargs):
- data = "1,2,3"
- arg, _ = list(kwargs.items())[0]
- with tm.assert_produces_warning(
- FutureWarning, check_stacklevel=False):
- read_csv(StringIO(data), engine=engine, **kwargs)
|