test_io.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. import collections
  4. from datetime import datetime
  5. import numpy as np
  6. import pytest
  7. from pandas.compat import StringIO, u
  8. import pandas as pd
  9. from pandas import DataFrame, Series
  10. import pandas.util.testing as tm
  11. from pandas.util.testing import (
  12. assert_almost_equal, assert_frame_equal, assert_series_equal, ensure_clean)
  13. from pandas.io.common import _get_handle
  14. class TestSeriesToCSV():
  15. def read_csv(self, path, **kwargs):
  16. params = dict(squeeze=True, index_col=0,
  17. header=None, parse_dates=True)
  18. params.update(**kwargs)
  19. header = params.get("header")
  20. out = pd.read_csv(path, **params)
  21. if header is None:
  22. out.name = out.index.name = None
  23. return out
  24. def test_from_csv_deprecation(self, datetime_series):
  25. # see gh-17812
  26. with ensure_clean() as path:
  27. datetime_series.to_csv(path, header=False)
  28. with tm.assert_produces_warning(FutureWarning,
  29. check_stacklevel=False):
  30. ts = self.read_csv(path)
  31. depr_ts = Series.from_csv(path)
  32. assert_series_equal(depr_ts, ts)
  33. @pytest.mark.parametrize("arg", ["path", "header", "both"])
  34. def test_to_csv_deprecation(self, arg, datetime_series):
  35. # see gh-19715
  36. with ensure_clean() as path:
  37. if arg == "path":
  38. kwargs = dict(path=path, header=False)
  39. elif arg == "header":
  40. kwargs = dict(path_or_buf=path)
  41. else: # Both discrepancies match.
  42. kwargs = dict(path=path)
  43. with tm.assert_produces_warning(FutureWarning):
  44. datetime_series.to_csv(**kwargs)
  45. # Make sure roundtrip still works.
  46. ts = self.read_csv(path)
  47. assert_series_equal(datetime_series, ts, check_names=False)
  48. def test_from_csv(self, datetime_series, string_series):
  49. with ensure_clean() as path:
  50. datetime_series.to_csv(path, header=False)
  51. ts = self.read_csv(path)
  52. assert_series_equal(datetime_series, ts, check_names=False)
  53. assert ts.name is None
  54. assert ts.index.name is None
  55. with tm.assert_produces_warning(FutureWarning,
  56. check_stacklevel=False):
  57. depr_ts = Series.from_csv(path)
  58. assert_series_equal(depr_ts, ts)
  59. # see gh-10483
  60. datetime_series.to_csv(path, header=True)
  61. ts_h = self.read_csv(path, header=0)
  62. assert ts_h.name == "ts"
  63. string_series.to_csv(path, header=False)
  64. series = self.read_csv(path)
  65. assert_series_equal(string_series, series, check_names=False)
  66. assert series.name is None
  67. assert series.index.name is None
  68. string_series.to_csv(path, header=True)
  69. series_h = self.read_csv(path, header=0)
  70. assert series_h.name == "series"
  71. with open(path, "w") as outfile:
  72. outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
  73. series = self.read_csv(path, sep="|")
  74. check_series = Series({datetime(1998, 1, 1): 1.0,
  75. datetime(1999, 1, 1): 2.0})
  76. assert_series_equal(check_series, series)
  77. series = self.read_csv(path, sep="|", parse_dates=False)
  78. check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
  79. assert_series_equal(check_series, series)
  80. def test_to_csv(self, datetime_series):
  81. import io
  82. with ensure_clean() as path:
  83. datetime_series.to_csv(path, header=False)
  84. with io.open(path, newline=None) as f:
  85. lines = f.readlines()
  86. assert (lines[1] != '\n')
  87. datetime_series.to_csv(path, index=False, header=False)
  88. arr = np.loadtxt(path)
  89. assert_almost_equal(arr, datetime_series.values)
  90. def test_to_csv_unicode_index(self):
  91. buf = StringIO()
  92. s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")])
  93. s.to_csv(buf, encoding="UTF-8", header=False)
  94. buf.seek(0)
  95. s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
  96. assert_series_equal(s, s2)
  97. def test_to_csv_float_format(self):
  98. with ensure_clean() as filename:
  99. ser = Series([0.123456, 0.234567, 0.567567])
  100. ser.to_csv(filename, float_format="%.2f", header=False)
  101. rs = self.read_csv(filename)
  102. xp = Series([0.12, 0.23, 0.57])
  103. assert_series_equal(rs, xp)
  104. def test_to_csv_list_entries(self):
  105. s = Series(['jack and jill', 'jesse and frank'])
  106. split = s.str.split(r'\s+and\s+')
  107. buf = StringIO()
  108. split.to_csv(buf, header=False)
  109. def test_to_csv_path_is_none(self):
  110. # GH 8215
  111. # Series.to_csv() was returning None, inconsistent with
  112. # DataFrame.to_csv() which returned string
  113. s = Series([1, 2, 3])
  114. csv_str = s.to_csv(path_or_buf=None, header=False)
  115. assert isinstance(csv_str, str)
  116. @pytest.mark.parametrize('s,encoding', [
  117. (Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
  118. name='X'), None),
  119. # GH 21241, 21118
  120. (Series(['abc', 'def', 'ghi'], name='X'), 'ascii'),
  121. (Series(["123", u"你好", u"世界"], name=u"中文"), 'gb2312'),
  122. (Series(["123", u"Γειά σου", u"Κόσμε"], name=u"Ελληνικά"), 'cp737')
  123. ])
  124. def test_to_csv_compression(self, s, encoding, compression):
  125. with ensure_clean() as filename:
  126. s.to_csv(filename, compression=compression, encoding=encoding,
  127. header=True)
  128. # test the round trip - to_csv -> read_csv
  129. result = pd.read_csv(filename, compression=compression,
  130. encoding=encoding, index_col=0, squeeze=True)
  131. assert_series_equal(s, result)
  132. # test the round trip using file handle - to_csv -> read_csv
  133. f, _handles = _get_handle(filename, 'w', compression=compression,
  134. encoding=encoding)
  135. with f:
  136. s.to_csv(f, encoding=encoding, header=True)
  137. result = pd.read_csv(filename, compression=compression,
  138. encoding=encoding, index_col=0, squeeze=True)
  139. assert_series_equal(s, result)
  140. # explicitly ensure file was compressed
  141. with tm.decompress_file(filename, compression) as fh:
  142. text = fh.read().decode(encoding or 'utf8')
  143. assert s.name in text
  144. with tm.decompress_file(filename, compression) as fh:
  145. assert_series_equal(s, pd.read_csv(fh,
  146. index_col=0,
  147. squeeze=True,
  148. encoding=encoding))
  149. class TestSeriesIO():
  150. def test_to_frame(self, datetime_series):
  151. datetime_series.name = None
  152. rs = datetime_series.to_frame()
  153. xp = pd.DataFrame(datetime_series.values, index=datetime_series.index)
  154. assert_frame_equal(rs, xp)
  155. datetime_series.name = 'testname'
  156. rs = datetime_series.to_frame()
  157. xp = pd.DataFrame(dict(testname=datetime_series.values),
  158. index=datetime_series.index)
  159. assert_frame_equal(rs, xp)
  160. rs = datetime_series.to_frame(name='testdifferent')
  161. xp = pd.DataFrame(dict(testdifferent=datetime_series.values),
  162. index=datetime_series.index)
  163. assert_frame_equal(rs, xp)
  164. def test_timeseries_periodindex(self):
  165. # GH2891
  166. from pandas import period_range
  167. prng = period_range('1/1/2011', '1/1/2012', freq='M')
  168. ts = Series(np.random.randn(len(prng)), prng)
  169. new_ts = tm.round_trip_pickle(ts)
  170. assert new_ts.index.freq == 'M'
  171. def test_pickle_preserve_name(self):
  172. for n in [777, 777., 'name', datetime(2001, 11, 11), (1, 2)]:
  173. unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n))
  174. assert unpickled.name == n
  175. def _pickle_roundtrip_name(self, obj):
  176. with ensure_clean() as path:
  177. obj.to_pickle(path)
  178. unpickled = pd.read_pickle(path)
  179. return unpickled
  180. def test_to_frame_expanddim(self):
  181. # GH 9762
  182. class SubclassedSeries(Series):
  183. @property
  184. def _constructor_expanddim(self):
  185. return SubclassedFrame
  186. class SubclassedFrame(DataFrame):
  187. pass
  188. s = SubclassedSeries([1, 2, 3], name='X')
  189. result = s.to_frame()
  190. assert isinstance(result, SubclassedFrame)
  191. expected = SubclassedFrame({'X': [1, 2, 3]})
  192. assert_frame_equal(result, expected)
  193. @pytest.mark.parametrize('mapping', (
  194. dict,
  195. collections.defaultdict(list),
  196. collections.OrderedDict))
  197. def test_to_dict(self, mapping, datetime_series):
  198. # GH16122
  199. tm.assert_series_equal(
  200. Series(datetime_series.to_dict(mapping), name='ts'),
  201. datetime_series)
  202. from_method = Series(datetime_series.to_dict(collections.Counter))
  203. from_constructor = Series(collections
  204. .Counter(datetime_series.iteritems()))
  205. tm.assert_series_equal(from_method, from_constructor)