test_unsupported.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. # -*- coding: utf-8 -*-
  2. """
  3. Tests that features that are currently unsupported in
  4. either the Python or C parser are actually enforced
  5. and are clearly communicated to the user.
  6. Ultimately, the goal is to remove test cases from this
  7. test suite as new feature support is added to the parsers.
  8. """
  9. import pytest
  10. from pandas.compat import StringIO
  11. from pandas.errors import ParserError
  12. import pandas.util.testing as tm
  13. import pandas.io.parsers as parsers
  14. from pandas.io.parsers import read_csv
  15. @pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val)
  16. def python_engine(request):
  17. return request.param
  18. class TestUnsupportedFeatures(object):
  19. def test_mangle_dupe_cols_false(self):
  20. # see gh-12935
  21. data = 'a b c\n1 2 3'
  22. msg = 'is not supported'
  23. for engine in ('c', 'python'):
  24. with pytest.raises(ValueError, match=msg):
  25. read_csv(StringIO(data), engine=engine,
  26. mangle_dupe_cols=False)
  27. def test_c_engine(self):
  28. # see gh-6607
  29. data = 'a b c\n1 2 3'
  30. msg = 'does not support'
  31. # specify C engine with unsupported options (raise)
  32. with pytest.raises(ValueError, match=msg):
  33. read_csv(StringIO(data), engine='c',
  34. sep=None, delim_whitespace=False)
  35. with pytest.raises(ValueError, match=msg):
  36. read_csv(StringIO(data), engine='c', sep=r'\s')
  37. with pytest.raises(ValueError, match=msg):
  38. read_csv(StringIO(data), engine='c', sep='\t', quotechar=chr(128))
  39. with pytest.raises(ValueError, match=msg):
  40. read_csv(StringIO(data), engine='c', skipfooter=1)
  41. # specify C-unsupported options without python-unsupported options
  42. with tm.assert_produces_warning(parsers.ParserWarning):
  43. read_csv(StringIO(data), sep=None, delim_whitespace=False)
  44. with tm.assert_produces_warning(parsers.ParserWarning):
  45. read_csv(StringIO(data), sep=r'\s')
  46. with tm.assert_produces_warning(parsers.ParserWarning):
  47. read_csv(StringIO(data), sep='\t', quotechar=chr(128))
  48. with tm.assert_produces_warning(parsers.ParserWarning):
  49. read_csv(StringIO(data), skipfooter=1)
  50. text = """ A B C D E
  51. one two three four
  52. a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640
  53. a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744
  54. x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838"""
  55. msg = 'Error tokenizing data'
  56. with pytest.raises(ParserError, match=msg):
  57. read_csv(StringIO(text), sep='\\s+')
  58. with pytest.raises(ParserError, match=msg):
  59. read_csv(StringIO(text), engine='c', sep='\\s+')
  60. msg = "Only length-1 thousands markers supported"
  61. data = """A|B|C
  62. 1|2,334|5
  63. 10|13|10.
  64. """
  65. with pytest.raises(ValueError, match=msg):
  66. read_csv(StringIO(data), thousands=',,')
  67. with pytest.raises(ValueError, match=msg):
  68. read_csv(StringIO(data), thousands='')
  69. msg = "Only length-1 line terminators supported"
  70. data = 'a,b,c~~1,2,3~~4,5,6'
  71. with pytest.raises(ValueError, match=msg):
  72. read_csv(StringIO(data), lineterminator='~~')
  73. def test_python_engine(self, python_engine):
  74. from pandas.io.parsers import _python_unsupported as py_unsupported
  75. data = """1,2,3,,
  76. 1,2,3,4,
  77. 1,2,3,4,5
  78. 1,2,,,
  79. 1,2,3,4,"""
  80. for default in py_unsupported:
  81. msg = ('The %r option is not supported '
  82. 'with the %r engine' % (default, python_engine))
  83. kwargs = {default: object()}
  84. with pytest.raises(ValueError, match=msg):
  85. read_csv(StringIO(data), engine=python_engine, **kwargs)
  86. def test_python_engine_file_no_next(self, python_engine):
  87. # see gh-16530
  88. class NoNextBuffer(object):
  89. def __init__(self, csv_data):
  90. self.data = csv_data
  91. def __iter__(self):
  92. return self
  93. def read(self):
  94. return self.data
  95. data = "a\n1"
  96. msg = "The 'python' engine cannot iterate"
  97. with pytest.raises(ValueError, match=msg):
  98. read_csv(NoNextBuffer(data), engine=python_engine)
  99. class TestDeprecatedFeatures(object):
  100. @pytest.mark.parametrize("engine", ["c", "python"])
  101. @pytest.mark.parametrize("kwargs", [{"tupleize_cols": True},
  102. {"tupleize_cols": False}])
  103. def test_deprecated_args(self, engine, kwargs):
  104. data = "1,2,3"
  105. arg, _ = list(kwargs.items())[0]
  106. with tm.assert_produces_warning(
  107. FutureWarning, check_stacklevel=False):
  108. read_csv(StringIO(data), engine=engine, **kwargs)