test_compression.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. import contextlib
  2. import os
  3. import warnings
  4. import pytest
  5. import pandas as pd
  6. import pandas.util.testing as tm
  7. import pandas.io.common as icom
  8. @contextlib.contextmanager
  9. def catch_to_csv_depr():
  10. # Catching warnings because Series.to_csv has
  11. # been deprecated. Remove this context when
  12. # Series.to_csv has been aligned.
  13. with warnings.catch_warnings(record=True):
  14. warnings.simplefilter("ignore", FutureWarning)
  15. yield
  16. @pytest.mark.parametrize('obj', [
  17. pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567],
  18. [12.32112, 123123.2, 321321.2]],
  19. columns=['X', 'Y', 'Z']),
  20. pd.Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
  21. @pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv'])
  22. def test_compression_size(obj, method, compression_only):
  23. with tm.ensure_clean() as path:
  24. with catch_to_csv_depr():
  25. getattr(obj, method)(path, compression=compression_only)
  26. compressed_size = os.path.getsize(path)
  27. getattr(obj, method)(path, compression=None)
  28. uncompressed_size = os.path.getsize(path)
  29. assert uncompressed_size > compressed_size
  30. @pytest.mark.parametrize('obj', [
  31. pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567],
  32. [12.32112, 123123.2, 321321.2]],
  33. columns=['X', 'Y', 'Z']),
  34. pd.Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
  35. @pytest.mark.parametrize('method', ['to_csv', 'to_json'])
  36. def test_compression_size_fh(obj, method, compression_only):
  37. with tm.ensure_clean() as path:
  38. f, handles = icom._get_handle(path, 'w', compression=compression_only)
  39. with catch_to_csv_depr():
  40. with f:
  41. getattr(obj, method)(f)
  42. assert not f.closed
  43. assert f.closed
  44. compressed_size = os.path.getsize(path)
  45. with tm.ensure_clean() as path:
  46. f, handles = icom._get_handle(path, 'w', compression=None)
  47. with catch_to_csv_depr():
  48. with f:
  49. getattr(obj, method)(f)
  50. assert not f.closed
  51. assert f.closed
  52. uncompressed_size = os.path.getsize(path)
  53. assert uncompressed_size > compressed_size
  54. @pytest.mark.parametrize('write_method, write_kwargs, read_method', [
  55. ('to_csv', {'index': False}, pd.read_csv),
  56. ('to_json', {}, pd.read_json),
  57. ('to_pickle', {}, pd.read_pickle),
  58. ])
  59. def test_dataframe_compression_defaults_to_infer(
  60. write_method, write_kwargs, read_method, compression_only):
  61. # GH22004
  62. input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=['X', 'Y', 'Z'])
  63. extension = icom._compression_to_extension[compression_only]
  64. with tm.ensure_clean('compressed' + extension) as path:
  65. getattr(input, write_method)(path, **write_kwargs)
  66. output = read_method(path, compression=compression_only)
  67. tm.assert_frame_equal(output, input)
  68. @pytest.mark.parametrize('write_method,write_kwargs,read_method,read_kwargs', [
  69. ('to_csv', {'index': False, 'header': True},
  70. pd.read_csv, {'squeeze': True}),
  71. ('to_json', {}, pd.read_json, {'typ': 'series'}),
  72. ('to_pickle', {}, pd.read_pickle, {}),
  73. ])
  74. def test_series_compression_defaults_to_infer(
  75. write_method, write_kwargs, read_method, read_kwargs,
  76. compression_only):
  77. # GH22004
  78. input = pd.Series([0, 5, -2, 10], name='X')
  79. extension = icom._compression_to_extension[compression_only]
  80. with tm.ensure_clean('compressed' + extension) as path:
  81. getattr(input, write_method)(path, **write_kwargs)
  82. output = read_method(path, compression=compression_only, **read_kwargs)
  83. tm.assert_series_equal(output, input, check_names=False)
  84. def test_compression_warning(compression_only):
  85. # Assert that passing a file object to to_csv while explicitly specifying a
  86. # compression protocol triggers a RuntimeWarning, as per GH21227.
  87. # Note that pytest has an issue that causes assert_produces_warning to fail
  88. # in Python 2 if the warning has occurred in previous tests
  89. # (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this
  90. # test fail in just Python 2 builds, it likely indicates that other tests
  91. # are producing RuntimeWarnings, thereby triggering the pytest bug.
  92. df = pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567],
  93. [12.32112, 123123.2, 321321.2]],
  94. columns=['X', 'Y', 'Z'])
  95. with tm.ensure_clean() as path:
  96. f, handles = icom._get_handle(path, 'w', compression=compression_only)
  97. with tm.assert_produces_warning(RuntimeWarning,
  98. check_stacklevel=False):
  99. with f:
  100. df.to_csv(f, compression=compression_only)