test_hist_method.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. # coding: utf-8
  2. """ Test cases for .hist method """
  3. import numpy as np
  4. from numpy.random import randn
  5. import pytest
  6. import pandas.util._test_decorators as td
  7. from pandas import DataFrame, Series
  8. from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
  9. import pandas.util.testing as tm
  10. from pandas.plotting._compat import _mpl_ge_2_2_0
  11. from pandas.plotting._core import grouped_hist
  12. @td.skip_if_no_mpl
  13. class TestSeriesPlots(TestPlotBase):
  14. def setup_method(self, method):
  15. TestPlotBase.setup_method(self, method)
  16. import matplotlib as mpl
  17. mpl.rcdefaults()
  18. self.ts = tm.makeTimeSeries()
  19. self.ts.name = 'ts'
  20. @pytest.mark.slow
  21. def test_hist_legacy(self):
  22. _check_plot_works(self.ts.hist)
  23. _check_plot_works(self.ts.hist, grid=False)
  24. _check_plot_works(self.ts.hist, figsize=(8, 10))
  25. # _check_plot_works adds an ax so catch warning. see GH #13188
  26. with tm.assert_produces_warning(UserWarning):
  27. _check_plot_works(self.ts.hist, by=self.ts.index.month)
  28. with tm.assert_produces_warning(UserWarning):
  29. _check_plot_works(self.ts.hist, by=self.ts.index.month, bins=5)
  30. fig, ax = self.plt.subplots(1, 1)
  31. _check_plot_works(self.ts.hist, ax=ax)
  32. _check_plot_works(self.ts.hist, ax=ax, figure=fig)
  33. _check_plot_works(self.ts.hist, figure=fig)
  34. tm.close()
  35. fig, (ax1, ax2) = self.plt.subplots(1, 2)
  36. _check_plot_works(self.ts.hist, figure=fig, ax=ax1)
  37. _check_plot_works(self.ts.hist, figure=fig, ax=ax2)
  38. with pytest.raises(ValueError):
  39. self.ts.hist(by=self.ts.index, figure=fig)
  40. @pytest.mark.slow
  41. def test_hist_bins_legacy(self):
  42. df = DataFrame(np.random.randn(10, 2))
  43. ax = df.hist(bins=2)[0][0]
  44. assert len(ax.patches) == 2
  45. @pytest.mark.slow
  46. def test_hist_layout(self):
  47. df = self.hist_df
  48. with pytest.raises(ValueError):
  49. df.height.hist(layout=(1, 1))
  50. with pytest.raises(ValueError):
  51. df.height.hist(layout=[1, 1])
  52. @pytest.mark.slow
  53. def test_hist_layout_with_by(self):
  54. df = self.hist_df
  55. # _check_plot_works adds an `ax` kwarg to the method call
  56. # so we get a warning about an axis being cleared, even
  57. # though we don't explicing pass one, see GH #13188
  58. with tm.assert_produces_warning(UserWarning):
  59. axes = _check_plot_works(df.height.hist, by=df.gender,
  60. layout=(2, 1))
  61. self._check_axes_shape(axes, axes_num=2, layout=(2, 1))
  62. with tm.assert_produces_warning(UserWarning):
  63. axes = _check_plot_works(df.height.hist, by=df.gender,
  64. layout=(3, -1))
  65. self._check_axes_shape(axes, axes_num=2, layout=(3, 1))
  66. with tm.assert_produces_warning(UserWarning):
  67. axes = _check_plot_works(df.height.hist, by=df.category,
  68. layout=(4, 1))
  69. self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
  70. with tm.assert_produces_warning(UserWarning):
  71. axes = _check_plot_works(
  72. df.height.hist, by=df.category, layout=(2, -1))
  73. self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
  74. with tm.assert_produces_warning(UserWarning):
  75. axes = _check_plot_works(
  76. df.height.hist, by=df.category, layout=(3, -1))
  77. self._check_axes_shape(axes, axes_num=4, layout=(3, 2))
  78. with tm.assert_produces_warning(UserWarning):
  79. axes = _check_plot_works(
  80. df.height.hist, by=df.category, layout=(-1, 4))
  81. self._check_axes_shape(axes, axes_num=4, layout=(1, 4))
  82. with tm.assert_produces_warning(UserWarning):
  83. axes = _check_plot_works(
  84. df.height.hist, by=df.classroom, layout=(2, 2))
  85. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  86. axes = df.height.hist(by=df.category, layout=(4, 2), figsize=(12, 7))
  87. self._check_axes_shape(
  88. axes, axes_num=4, layout=(4, 2), figsize=(12, 7))
  89. @pytest.mark.slow
  90. def test_hist_no_overlap(self):
  91. from matplotlib.pyplot import subplot, gcf
  92. x = Series(randn(2))
  93. y = Series(randn(2))
  94. subplot(121)
  95. x.hist()
  96. subplot(122)
  97. y.hist()
  98. fig = gcf()
  99. axes = fig.axes
  100. assert len(axes) == 2
  101. @pytest.mark.slow
  102. def test_hist_by_no_extra_plots(self):
  103. df = self.hist_df
  104. axes = df.height.hist(by=df.gender) # noqa
  105. assert len(self.plt.get_fignums()) == 1
  106. @pytest.mark.slow
  107. def test_plot_fails_when_ax_differs_from_figure(self):
  108. from pylab import figure
  109. fig1 = figure()
  110. fig2 = figure()
  111. ax1 = fig1.add_subplot(111)
  112. with pytest.raises(AssertionError):
  113. self.ts.hist(ax=ax1, figure=fig2)
  114. @td.skip_if_no_mpl
  115. class TestDataFramePlots(TestPlotBase):
  116. @pytest.mark.slow
  117. def test_hist_df_legacy(self):
  118. from matplotlib.patches import Rectangle
  119. with tm.assert_produces_warning(UserWarning):
  120. _check_plot_works(self.hist_df.hist)
  121. # make sure layout is handled
  122. df = DataFrame(randn(100, 3))
  123. with tm.assert_produces_warning(UserWarning):
  124. axes = _check_plot_works(df.hist, grid=False)
  125. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  126. assert not axes[1, 1].get_visible()
  127. df = DataFrame(randn(100, 1))
  128. _check_plot_works(df.hist)
  129. # make sure layout is handled
  130. df = DataFrame(randn(100, 6))
  131. with tm.assert_produces_warning(UserWarning):
  132. axes = _check_plot_works(df.hist, layout=(4, 2))
  133. self._check_axes_shape(axes, axes_num=6, layout=(4, 2))
  134. # make sure sharex, sharey is handled
  135. with tm.assert_produces_warning(UserWarning):
  136. _check_plot_works(df.hist, sharex=True, sharey=True)
  137. # handle figsize arg
  138. with tm.assert_produces_warning(UserWarning):
  139. _check_plot_works(df.hist, figsize=(8, 10))
  140. # check bins argument
  141. with tm.assert_produces_warning(UserWarning):
  142. _check_plot_works(df.hist, bins=5)
  143. # make sure xlabelsize and xrot are handled
  144. ser = df[0]
  145. xf, yf = 20, 18
  146. xrot, yrot = 30, 40
  147. axes = ser.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
  148. self._check_ticks_props(axes, xlabelsize=xf, xrot=xrot,
  149. ylabelsize=yf, yrot=yrot)
  150. xf, yf = 20, 18
  151. xrot, yrot = 30, 40
  152. axes = df.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
  153. self._check_ticks_props(axes, xlabelsize=xf, xrot=xrot,
  154. ylabelsize=yf, yrot=yrot)
  155. tm.close()
  156. # make sure kwargs to hist are handled
  157. if _mpl_ge_2_2_0():
  158. kwargs = {"density": True}
  159. else:
  160. kwargs = {"normed": True}
  161. ax = ser.hist(cumulative=True, bins=4, **kwargs)
  162. # height of last bin (index 5) must be 1.0
  163. rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
  164. tm.assert_almost_equal(rects[-1].get_height(), 1.0)
  165. tm.close()
  166. ax = ser.hist(log=True)
  167. # scale of y must be 'log'
  168. self._check_ax_scales(ax, yaxis='log')
  169. tm.close()
  170. # propagate attr exception from matplotlib.Axes.hist
  171. with pytest.raises(AttributeError):
  172. ser.hist(foo='bar')
  173. @pytest.mark.slow
  174. def test_hist_layout(self):
  175. df = DataFrame(randn(100, 3))
  176. layout_to_expected_size = (
  177. {'layout': None, 'expected_size': (2, 2)}, # default is 2x2
  178. {'layout': (2, 2), 'expected_size': (2, 2)},
  179. {'layout': (4, 1), 'expected_size': (4, 1)},
  180. {'layout': (1, 4), 'expected_size': (1, 4)},
  181. {'layout': (3, 3), 'expected_size': (3, 3)},
  182. {'layout': (-1, 4), 'expected_size': (1, 4)},
  183. {'layout': (4, -1), 'expected_size': (4, 1)},
  184. {'layout': (-1, 2), 'expected_size': (2, 2)},
  185. {'layout': (2, -1), 'expected_size': (2, 2)}
  186. )
  187. for layout_test in layout_to_expected_size:
  188. axes = df.hist(layout=layout_test['layout'])
  189. expected = layout_test['expected_size']
  190. self._check_axes_shape(axes, axes_num=3, layout=expected)
  191. # layout too small for all 4 plots
  192. with pytest.raises(ValueError):
  193. df.hist(layout=(1, 1))
  194. # invalid format for layout
  195. with pytest.raises(ValueError):
  196. df.hist(layout=(1,))
  197. with pytest.raises(ValueError):
  198. df.hist(layout=(-1, -1))
  199. @pytest.mark.slow
  200. # GH 9351
  201. def test_tight_layout(self):
  202. if self.mpl_ge_2_0_1:
  203. df = DataFrame(randn(100, 3))
  204. _check_plot_works(df.hist)
  205. self.plt.tight_layout()
  206. tm.close()
  207. @td.skip_if_no_mpl
  208. class TestDataFrameGroupByPlots(TestPlotBase):
  209. @pytest.mark.slow
  210. def test_grouped_hist_legacy(self):
  211. from matplotlib.patches import Rectangle
  212. df = DataFrame(randn(500, 2), columns=['A', 'B'])
  213. df['C'] = np.random.randint(0, 4, 500)
  214. df['D'] = ['X'] * 500
  215. axes = grouped_hist(df.A, by=df.C)
  216. self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
  217. tm.close()
  218. axes = df.hist(by=df.C)
  219. self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
  220. tm.close()
  221. # group by a key with single value
  222. axes = df.hist(by='D', rot=30)
  223. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  224. self._check_ticks_props(axes, xrot=30)
  225. tm.close()
  226. # make sure kwargs to hist are handled
  227. xf, yf = 20, 18
  228. xrot, yrot = 30, 40
  229. if _mpl_ge_2_2_0():
  230. kwargs = {"density": True}
  231. else:
  232. kwargs = {"normed": True}
  233. axes = grouped_hist(df.A, by=df.C, cumulative=True,
  234. bins=4, xlabelsize=xf, xrot=xrot,
  235. ylabelsize=yf, yrot=yrot, **kwargs)
  236. # height of last bin (index 5) must be 1.0
  237. for ax in axes.ravel():
  238. rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
  239. height = rects[-1].get_height()
  240. tm.assert_almost_equal(height, 1.0)
  241. self._check_ticks_props(axes, xlabelsize=xf, xrot=xrot,
  242. ylabelsize=yf, yrot=yrot)
  243. tm.close()
  244. axes = grouped_hist(df.A, by=df.C, log=True)
  245. # scale of y must be 'log'
  246. self._check_ax_scales(axes, yaxis='log')
  247. tm.close()
  248. # propagate attr exception from matplotlib.Axes.hist
  249. with pytest.raises(AttributeError):
  250. grouped_hist(df.A, by=df.C, foo='bar')
  251. with tm.assert_produces_warning(FutureWarning):
  252. df.hist(by='C', figsize='default')
  253. @pytest.mark.slow
  254. def test_grouped_hist_legacy2(self):
  255. n = 10
  256. weight = Series(np.random.normal(166, 20, size=n))
  257. height = Series(np.random.normal(60, 10, size=n))
  258. with tm.RNGContext(42):
  259. gender_int = np.random.choice([0, 1], size=n)
  260. df_int = DataFrame({'height': height, 'weight': weight,
  261. 'gender': gender_int})
  262. gb = df_int.groupby('gender')
  263. axes = gb.hist()
  264. assert len(axes) == 2
  265. assert len(self.plt.get_fignums()) == 2
  266. tm.close()
  267. @pytest.mark.slow
  268. def test_grouped_hist_layout(self):
  269. df = self.hist_df
  270. pytest.raises(ValueError, df.hist, column='weight', by=df.gender,
  271. layout=(1, 1))
  272. pytest.raises(ValueError, df.hist, column='height', by=df.category,
  273. layout=(1, 3))
  274. pytest.raises(ValueError, df.hist, column='height', by=df.category,
  275. layout=(-1, -1))
  276. with tm.assert_produces_warning(UserWarning):
  277. axes = _check_plot_works(df.hist, column='height', by=df.gender,
  278. layout=(2, 1))
  279. self._check_axes_shape(axes, axes_num=2, layout=(2, 1))
  280. with tm.assert_produces_warning(UserWarning):
  281. axes = _check_plot_works(df.hist, column='height', by=df.gender,
  282. layout=(2, -1))
  283. self._check_axes_shape(axes, axes_num=2, layout=(2, 1))
  284. axes = df.hist(column='height', by=df.category, layout=(4, 1))
  285. self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
  286. axes = df.hist(column='height', by=df.category, layout=(-1, 1))
  287. self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
  288. axes = df.hist(column='height', by=df.category,
  289. layout=(4, 2), figsize=(12, 8))
  290. self._check_axes_shape(
  291. axes, axes_num=4, layout=(4, 2), figsize=(12, 8))
  292. tm.close()
  293. # GH 6769
  294. with tm.assert_produces_warning(UserWarning):
  295. axes = _check_plot_works(
  296. df.hist, column='height', by='classroom', layout=(2, 2))
  297. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  298. # without column
  299. with tm.assert_produces_warning(UserWarning):
  300. axes = _check_plot_works(df.hist, by='classroom')
  301. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  302. axes = df.hist(by='gender', layout=(3, 5))
  303. self._check_axes_shape(axes, axes_num=2, layout=(3, 5))
  304. axes = df.hist(column=['height', 'weight', 'category'])
  305. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  306. @pytest.mark.slow
  307. def test_grouped_hist_multiple_axes(self):
  308. # GH 6970, GH 7069
  309. df = self.hist_df
  310. fig, axes = self.plt.subplots(2, 3)
  311. returned = df.hist(column=['height', 'weight', 'category'], ax=axes[0])
  312. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  313. tm.assert_numpy_array_equal(returned, axes[0])
  314. assert returned[0].figure is fig
  315. returned = df.hist(by='classroom', ax=axes[1])
  316. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  317. tm.assert_numpy_array_equal(returned, axes[1])
  318. assert returned[0].figure is fig
  319. with pytest.raises(ValueError):
  320. fig, axes = self.plt.subplots(2, 3)
  321. # pass different number of axes from required
  322. axes = df.hist(column='height', ax=axes)
  323. @pytest.mark.slow
  324. def test_axis_share_x(self):
  325. df = self.hist_df
  326. # GH4089
  327. ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True)
  328. # share x
  329. assert ax1._shared_x_axes.joined(ax1, ax2)
  330. assert ax2._shared_x_axes.joined(ax1, ax2)
  331. # don't share y
  332. assert not ax1._shared_y_axes.joined(ax1, ax2)
  333. assert not ax2._shared_y_axes.joined(ax1, ax2)
  334. @pytest.mark.slow
  335. def test_axis_share_y(self):
  336. df = self.hist_df
  337. ax1, ax2 = df.hist(column='height', by=df.gender, sharey=True)
  338. # share y
  339. assert ax1._shared_y_axes.joined(ax1, ax2)
  340. assert ax2._shared_y_axes.joined(ax1, ax2)
  341. # don't share x
  342. assert not ax1._shared_x_axes.joined(ax1, ax2)
  343. assert not ax2._shared_x_axes.joined(ax1, ax2)
  344. @pytest.mark.slow
  345. def test_axis_share_xy(self):
  346. df = self.hist_df
  347. ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True,
  348. sharey=True)
  349. # share both x and y
  350. assert ax1._shared_x_axes.joined(ax1, ax2)
  351. assert ax2._shared_x_axes.joined(ax1, ax2)
  352. assert ax1._shared_y_axes.joined(ax1, ax2)
  353. assert ax2._shared_y_axes.joined(ax1, ax2)