test_repr.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. from datetime import datetime, timedelta
  4. import numpy as np
  5. import pandas.compat as compat
  6. from pandas.compat import lrange, range, u
  7. import pandas as pd
  8. from pandas import (
  9. Categorical, DataFrame, Index, Series, date_range, option_context,
  10. period_range, timedelta_range)
  11. from pandas.core.base import StringMixin
  12. from pandas.core.index import MultiIndex
  13. import pandas.util.testing as tm
  14. from .common import TestData
  15. class TestSeriesRepr(TestData):
  16. def test_multilevel_name_print(self):
  17. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
  18. 'three']],
  19. codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  20. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  21. names=['first', 'second'])
  22. s = Series(lrange(0, len(index)), index=index, name='sth')
  23. expected = ["first second", "foo one 0",
  24. " two 1", " three 2",
  25. "bar one 3", " two 4",
  26. "baz two 5", " three 6",
  27. "qux one 7", " two 8",
  28. " three 9", "Name: sth, dtype: int64"]
  29. expected = "\n".join(expected)
  30. assert repr(s) == expected
  31. def test_name_printing(self):
  32. # Test small Series.
  33. s = Series([0, 1, 2])
  34. s.name = "test"
  35. assert "Name: test" in repr(s)
  36. s.name = None
  37. assert "Name:" not in repr(s)
  38. # Test big Series (diff code path).
  39. s = Series(lrange(0, 1000))
  40. s.name = "test"
  41. assert "Name: test" in repr(s)
  42. s.name = None
  43. assert "Name:" not in repr(s)
  44. s = Series(index=date_range('20010101', '20020101'), name='test')
  45. assert "Name: test" in repr(s)
  46. def test_repr(self):
  47. str(self.ts)
  48. str(self.series)
  49. str(self.series.astype(int))
  50. str(self.objSeries)
  51. str(Series(tm.randn(1000), index=np.arange(1000)))
  52. str(Series(tm.randn(1000), index=np.arange(1000, 0, step=-1)))
  53. # empty
  54. str(self.empty)
  55. # with NaNs
  56. self.series[5:7] = np.NaN
  57. str(self.series)
  58. # with Nones
  59. ots = self.ts.astype('O')
  60. ots[::2] = None
  61. repr(ots)
  62. # various names
  63. for name in ['', 1, 1.2, 'foo', u('\u03B1\u03B2\u03B3'),
  64. 'loooooooooooooooooooooooooooooooooooooooooooooooooooong',
  65. ('foo', 'bar', 'baz'), (1, 2), ('foo', 1, 2.3),
  66. (u('\u03B1'), u('\u03B2'), u('\u03B3')),
  67. (u('\u03B1'), 'bar')]:
  68. self.series.name = name
  69. repr(self.series)
  70. biggie = Series(tm.randn(1000), index=np.arange(1000),
  71. name=('foo', 'bar', 'baz'))
  72. repr(biggie)
  73. # 0 as name
  74. ser = Series(np.random.randn(100), name=0)
  75. rep_str = repr(ser)
  76. assert "Name: 0" in rep_str
  77. # tidy repr
  78. ser = Series(np.random.randn(1001), name=0)
  79. rep_str = repr(ser)
  80. assert "Name: 0" in rep_str
  81. ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])
  82. assert "\t" not in repr(ser)
  83. assert "\r" not in repr(ser)
  84. assert "a\n" not in repr(ser)
  85. # with empty series (#4651)
  86. s = Series([], dtype=np.int64, name='foo')
  87. assert repr(s) == 'Series([], Name: foo, dtype: int64)'
  88. s = Series([], dtype=np.int64, name=None)
  89. assert repr(s) == 'Series([], dtype: int64)'
  90. def test_tidy_repr(self):
  91. a = Series([u("\u05d0")] * 1000)
  92. a.name = 'title1'
  93. repr(a) # should not raise exception
  94. def test_repr_bool_fails(self, capsys):
  95. s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)])
  96. # It works (with no Cython exception barf)!
  97. repr(s)
  98. captured = capsys.readouterr()
  99. assert captured.err == ''
  100. def test_repr_name_iterable_indexable(self):
  101. s = Series([1, 2, 3], name=np.int64(3))
  102. # it works!
  103. repr(s)
  104. s.name = (u("\u05d0"), ) * 2
  105. repr(s)
  106. def test_repr_should_return_str(self):
  107. # https://docs.python.org/3/reference/datamodel.html#object.__repr__
  108. # ...The return value must be a string object.
  109. # (str on py2.x, str (unicode) on py3)
  110. data = [8, 5, 3, 5]
  111. index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")]
  112. df = Series(data, index=index1)
  113. assert type(df.__repr__() == str) # both py2 / 3
  114. def test_repr_max_rows(self):
  115. # GH 6863
  116. with pd.option_context('max_rows', None):
  117. str(Series(range(1001))) # should not raise exception
  118. def test_unicode_string_with_unicode(self):
  119. df = Series([u("\u05d0")], name=u("\u05d1"))
  120. if compat.PY3:
  121. str(df)
  122. else:
  123. compat.text_type(df)
  124. def test_bytestring_with_unicode(self):
  125. df = Series([u("\u05d0")], name=u("\u05d1"))
  126. if compat.PY3:
  127. bytes(df)
  128. else:
  129. str(df)
  130. def test_timeseries_repr_object_dtype(self):
  131. index = Index([datetime(2000, 1, 1) + timedelta(i)
  132. for i in range(1000)], dtype=object)
  133. ts = Series(np.random.randn(len(index)), index)
  134. repr(ts)
  135. ts = tm.makeTimeSeries(1000)
  136. assert repr(ts).splitlines()[-1].startswith('Freq:')
  137. ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)]
  138. repr(ts2).splitlines()[-1]
  139. def test_latex_repr(self):
  140. result = r"""\begin{tabular}{ll}
  141. \toprule
  142. {} & 0 \\
  143. \midrule
  144. 0 & $\alpha$ \\
  145. 1 & b \\
  146. 2 & c \\
  147. \bottomrule
  148. \end{tabular}
  149. """
  150. with option_context('display.latex.escape', False,
  151. 'display.latex.repr', True):
  152. s = Series([r'$\alpha$', 'b', 'c'])
  153. assert result == s._repr_latex_()
  154. assert s._repr_latex_() is None
  155. def test_index_repr_in_frame_with_nan(self):
  156. # see gh-25061
  157. i = Index([1, np.nan])
  158. s = Series([1, 2], index=i)
  159. exp = """1.0 1\nNaN 2\ndtype: int64"""
  160. assert repr(s) == exp
  161. class TestCategoricalRepr(object):
  162. def test_categorical_repr_unicode(self):
  163. # GH#21002 if len(index) > 60, sys.getdefaultencoding()=='ascii',
  164. # and we are working in PY2, then rendering a Categorical could raise
  165. # UnicodeDecodeError by trying to decode when it shouldn't
  166. class County(StringMixin):
  167. name = u'San Sebastián'
  168. state = u'PR'
  169. def __unicode__(self):
  170. return self.name + u', ' + self.state
  171. cat = pd.Categorical([County() for n in range(61)])
  172. idx = pd.Index(cat)
  173. ser = idx.to_series()
  174. if compat.PY3:
  175. # no reloading of sys, just check that the default (utf8) works
  176. # as expected
  177. repr(ser)
  178. str(ser)
  179. else:
  180. # set sys.defaultencoding to ascii, then change it back after
  181. # the test
  182. with tm.set_defaultencoding('ascii'):
  183. repr(ser)
  184. str(ser)
  185. def test_categorical_repr(self):
  186. a = Series(Categorical([1, 2, 3, 4]))
  187. exp = u("0 1\n1 2\n2 3\n3 4\n" +
  188. "dtype: category\nCategories (4, int64): [1, 2, 3, 4]")
  189. assert exp == a.__unicode__()
  190. a = Series(Categorical(["a", "b"] * 25))
  191. exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" +
  192. "Length: 50, dtype: category\nCategories (2, object): [a, b]")
  193. with option_context("display.max_rows", 5):
  194. assert exp == repr(a)
  195. levs = list("abcdefghijklmnopqrstuvwxyz")
  196. a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
  197. exp = u("0 a\n1 b\n" + "dtype: category\n"
  198. "Categories (26, object): [a < b < c < d ... w < x < y < z]")
  199. assert exp == a.__unicode__()
  200. def test_categorical_series_repr(self):
  201. s = Series(Categorical([1, 2, 3]))
  202. exp = """0 1
  203. 1 2
  204. 2 3
  205. dtype: category
  206. Categories (3, int64): [1, 2, 3]"""
  207. assert repr(s) == exp
  208. s = Series(Categorical(np.arange(10)))
  209. exp = """0 0
  210. 1 1
  211. 2 2
  212. 3 3
  213. 4 4
  214. 5 5
  215. 6 6
  216. 7 7
  217. 8 8
  218. 9 9
  219. dtype: category
  220. Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]"""
  221. assert repr(s) == exp
  222. def test_categorical_series_repr_ordered(self):
  223. s = Series(Categorical([1, 2, 3], ordered=True))
  224. exp = """0 1
  225. 1 2
  226. 2 3
  227. dtype: category
  228. Categories (3, int64): [1 < 2 < 3]"""
  229. assert repr(s) == exp
  230. s = Series(Categorical(np.arange(10), ordered=True))
  231. exp = """0 0
  232. 1 1
  233. 2 2
  234. 3 3
  235. 4 4
  236. 5 5
  237. 6 6
  238. 7 7
  239. 8 8
  240. 9 9
  241. dtype: category
  242. Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]"""
  243. assert repr(s) == exp
  244. def test_categorical_series_repr_datetime(self):
  245. idx = date_range('2011-01-01 09:00', freq='H', periods=5)
  246. s = Series(Categorical(idx))
  247. exp = """0 2011-01-01 09:00:00
  248. 1 2011-01-01 10:00:00
  249. 2 2011-01-01 11:00:00
  250. 3 2011-01-01 12:00:00
  251. 4 2011-01-01 13:00:00
  252. dtype: category
  253. Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
  254. 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa
  255. assert repr(s) == exp
  256. idx = date_range('2011-01-01 09:00', freq='H', periods=5,
  257. tz='US/Eastern')
  258. s = Series(Categorical(idx))
  259. exp = """0 2011-01-01 09:00:00-05:00
  260. 1 2011-01-01 10:00:00-05:00
  261. 2 2011-01-01 11:00:00-05:00
  262. 3 2011-01-01 12:00:00-05:00
  263. 4 2011-01-01 13:00:00-05:00
  264. dtype: category
  265. Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
  266. 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
  267. 2011-01-01 13:00:00-05:00]""" # noqa
  268. assert repr(s) == exp
  269. def test_categorical_series_repr_datetime_ordered(self):
  270. idx = date_range('2011-01-01 09:00', freq='H', periods=5)
  271. s = Series(Categorical(idx, ordered=True))
  272. exp = """0 2011-01-01 09:00:00
  273. 1 2011-01-01 10:00:00
  274. 2 2011-01-01 11:00:00
  275. 3 2011-01-01 12:00:00
  276. 4 2011-01-01 13:00:00
  277. dtype: category
  278. Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
  279. 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
  280. assert repr(s) == exp
  281. idx = date_range('2011-01-01 09:00', freq='H', periods=5,
  282. tz='US/Eastern')
  283. s = Series(Categorical(idx, ordered=True))
  284. exp = """0 2011-01-01 09:00:00-05:00
  285. 1 2011-01-01 10:00:00-05:00
  286. 2 2011-01-01 11:00:00-05:00
  287. 3 2011-01-01 12:00:00-05:00
  288. 4 2011-01-01 13:00:00-05:00
  289. dtype: category
  290. Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
  291. 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
  292. 2011-01-01 13:00:00-05:00]""" # noqa
  293. assert repr(s) == exp
  294. def test_categorical_series_repr_period(self):
  295. idx = period_range('2011-01-01 09:00', freq='H', periods=5)
  296. s = Series(Categorical(idx))
  297. exp = """0 2011-01-01 09:00
  298. 1 2011-01-01 10:00
  299. 2 2011-01-01 11:00
  300. 3 2011-01-01 12:00
  301. 4 2011-01-01 13:00
  302. dtype: category
  303. Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
  304. 2011-01-01 13:00]""" # noqa
  305. assert repr(s) == exp
  306. idx = period_range('2011-01', freq='M', periods=5)
  307. s = Series(Categorical(idx))
  308. exp = """0 2011-01
  309. 1 2011-02
  310. 2 2011-03
  311. 3 2011-04
  312. 4 2011-05
  313. dtype: category
  314. Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
  315. assert repr(s) == exp
  316. def test_categorical_series_repr_period_ordered(self):
  317. idx = period_range('2011-01-01 09:00', freq='H', periods=5)
  318. s = Series(Categorical(idx, ordered=True))
  319. exp = """0 2011-01-01 09:00
  320. 1 2011-01-01 10:00
  321. 2 2011-01-01 11:00
  322. 3 2011-01-01 12:00
  323. 4 2011-01-01 13:00
  324. dtype: category
  325. Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
  326. 2011-01-01 13:00]""" # noqa
  327. assert repr(s) == exp
  328. idx = period_range('2011-01', freq='M', periods=5)
  329. s = Series(Categorical(idx, ordered=True))
  330. exp = """0 2011-01
  331. 1 2011-02
  332. 2 2011-03
  333. 3 2011-04
  334. 4 2011-05
  335. dtype: category
  336. Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
  337. assert repr(s) == exp
  338. def test_categorical_series_repr_timedelta(self):
  339. idx = timedelta_range('1 days', periods=5)
  340. s = Series(Categorical(idx))
  341. exp = """0 1 days
  342. 1 2 days
  343. 2 3 days
  344. 3 4 days
  345. 4 5 days
  346. dtype: category
  347. Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
  348. assert repr(s) == exp
  349. idx = timedelta_range('1 hours', periods=10)
  350. s = Series(Categorical(idx))
  351. exp = """0 0 days 01:00:00
  352. 1 1 days 01:00:00
  353. 2 2 days 01:00:00
  354. 3 3 days 01:00:00
  355. 4 4 days 01:00:00
  356. 5 5 days 01:00:00
  357. 6 6 days 01:00:00
  358. 7 7 days 01:00:00
  359. 8 8 days 01:00:00
  360. 9 9 days 01:00:00
  361. dtype: category
  362. Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
  363. 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
  364. 8 days 01:00:00, 9 days 01:00:00]""" # noqa
  365. assert repr(s) == exp
  366. def test_categorical_series_repr_timedelta_ordered(self):
  367. idx = timedelta_range('1 days', periods=5)
  368. s = Series(Categorical(idx, ordered=True))
  369. exp = """0 1 days
  370. 1 2 days
  371. 2 3 days
  372. 3 4 days
  373. 4 5 days
  374. dtype: category
  375. Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
  376. assert repr(s) == exp
  377. idx = timedelta_range('1 hours', periods=10)
  378. s = Series(Categorical(idx, ordered=True))
  379. exp = """0 0 days 01:00:00
  380. 1 1 days 01:00:00
  381. 2 2 days 01:00:00
  382. 3 3 days 01:00:00
  383. 4 4 days 01:00:00
  384. 5 5 days 01:00:00
  385. 6 6 days 01:00:00
  386. 7 7 days 01:00:00
  387. 8 8 days 01:00:00
  388. 9 9 days 01:00:00
  389. dtype: category
  390. Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
  391. 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
  392. 8 days 01:00:00 < 9 days 01:00:00]""" # noqa
  393. assert repr(s) == exp