test_compression.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import pytest
  2. import pandas.util._test_decorators as td
  3. import pandas as pd
  4. import pandas.util.testing as tm
  5. from pandas.util.testing import assert_frame_equal
  6. def test_compression_roundtrip(compression):
  7. df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
  8. [12.32112, 123123.2, 321321.2]],
  9. index=['A', 'B'], columns=['X', 'Y', 'Z'])
  10. with tm.ensure_clean() as path:
  11. df.to_json(path, compression=compression)
  12. assert_frame_equal(df, pd.read_json(path,
  13. compression=compression))
  14. # explicitly ensure file was compressed.
  15. with tm.decompress_file(path, compression) as fh:
  16. result = fh.read().decode('utf8')
  17. assert_frame_equal(df, pd.read_json(result))
  18. def test_read_zipped_json(datapath):
  19. uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
  20. uncompressed_df = pd.read_json(uncompressed_path)
  21. compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
  22. compressed_df = pd.read_json(compressed_path, compression='zip')
  23. assert_frame_equal(uncompressed_df, compressed_df)
  24. @td.skip_if_not_us_locale
  25. def test_with_s3_url(compression, s3_resource):
  26. # Bucket "pandas-test" created in tests/io/conftest.py
  27. df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
  28. with tm.ensure_clean() as path:
  29. df.to_json(path, compression=compression)
  30. with open(path, 'rb') as f:
  31. s3_resource.Bucket("pandas-test").put_object(Key='test-1', Body=f)
  32. roundtripped_df = pd.read_json('s3://pandas-test/test-1',
  33. compression=compression)
  34. assert_frame_equal(df, roundtripped_df)
  35. def test_lines_with_compression(compression):
  36. with tm.ensure_clean() as path:
  37. df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
  38. df.to_json(path, orient='records', lines=True,
  39. compression=compression)
  40. roundtripped_df = pd.read_json(path, lines=True,
  41. compression=compression)
  42. assert_frame_equal(df, roundtripped_df)
  43. def test_chunksize_with_compression(compression):
  44. with tm.ensure_clean() as path:
  45. df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
  46. df.to_json(path, orient='records', lines=True,
  47. compression=compression)
  48. res = pd.read_json(path, lines=True, chunksize=1,
  49. compression=compression)
  50. roundtripped_df = pd.concat(res)
  51. assert_frame_equal(df, roundtripped_df)
  52. def test_write_unsupported_compression_type():
  53. df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
  54. with tm.ensure_clean() as path:
  55. msg = "Unrecognized compression type: unsupported"
  56. with pytest.raises(ValueError, match=msg):
  57. df.to_json(path, compression="unsupported")
  58. def test_read_unsupported_compression_type():
  59. with tm.ensure_clean() as path:
  60. msg = "Unrecognized compression type: unsupported"
  61. with pytest.raises(ValueError, match=msg):
  62. pd.read_json(path, compression="unsupported")
  63. @pytest.mark.parametrize("to_infer", [True, False])
  64. @pytest.mark.parametrize("read_infer", [True, False])
  65. def test_to_json_compression(compression_only,
  66. read_infer, to_infer):
  67. # see gh-15008
  68. compression = compression_only
  69. if compression == "zip":
  70. pytest.skip("{compression} is not supported "
  71. "for to_csv".format(compression=compression))
  72. # We'll complete file extension subsequently.
  73. filename = "test."
  74. if compression == "gzip":
  75. filename += "gz"
  76. else:
  77. # xz --> .xz
  78. # bz2 --> .bz2
  79. filename += compression
  80. df = pd.DataFrame({"A": [1]})
  81. to_compression = "infer" if to_infer else compression
  82. read_compression = "infer" if read_infer else compression
  83. with tm.ensure_clean(filename) as path:
  84. df.to_json(path, compression=to_compression)
  85. result = pd.read_json(path, compression=read_compression)
  86. tm.assert_frame_equal(result, df)