test_boxplot_method.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. # coding: utf-8
  2. import itertools
  3. import string
  4. import numpy as np
  5. from numpy import random
  6. import pytest
  7. from pandas.compat import lzip, range
  8. import pandas.util._test_decorators as td
  9. from pandas import DataFrame, MultiIndex, Series
  10. from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
  11. import pandas.util.testing as tm
  12. import pandas.plotting as plotting
  13. """ Test cases for .boxplot method """
  14. @td.skip_if_no_mpl
  15. class TestDataFramePlots(TestPlotBase):
  16. @pytest.mark.slow
  17. def test_boxplot_legacy1(self):
  18. df = DataFrame(np.random.randn(6, 4),
  19. index=list(string.ascii_letters[:6]),
  20. columns=['one', 'two', 'three', 'four'])
  21. df['indic'] = ['foo', 'bar'] * 3
  22. df['indic2'] = ['foo', 'bar', 'foo'] * 2
  23. _check_plot_works(df.boxplot, return_type='dict')
  24. _check_plot_works(df.boxplot, column=[
  25. 'one', 'two'], return_type='dict')
  26. # _check_plot_works adds an ax so catch warning. see GH #13188
  27. with tm.assert_produces_warning(UserWarning):
  28. _check_plot_works(df.boxplot, column=['one', 'two'],
  29. by='indic')
  30. _check_plot_works(df.boxplot, column='one', by=['indic', 'indic2'])
  31. with tm.assert_produces_warning(UserWarning):
  32. _check_plot_works(df.boxplot, by='indic')
  33. with tm.assert_produces_warning(UserWarning):
  34. _check_plot_works(df.boxplot, by=['indic', 'indic2'])
  35. _check_plot_works(plotting._core.boxplot, data=df['one'],
  36. return_type='dict')
  37. _check_plot_works(df.boxplot, notch=1, return_type='dict')
  38. with tm.assert_produces_warning(UserWarning):
  39. _check_plot_works(df.boxplot, by='indic', notch=1)
  40. @pytest.mark.slow
  41. def test_boxplot_legacy2(self):
  42. df = DataFrame(np.random.rand(10, 2), columns=['Col1', 'Col2'])
  43. df['X'] = Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B'])
  44. df['Y'] = Series(['A'] * 10)
  45. with tm.assert_produces_warning(UserWarning):
  46. _check_plot_works(df.boxplot, by='X')
  47. # When ax is supplied and required number of axes is 1,
  48. # passed ax should be used:
  49. fig, ax = self.plt.subplots()
  50. axes = df.boxplot('Col1', by='X', ax=ax)
  51. ax_axes = ax.axes
  52. assert ax_axes is axes
  53. fig, ax = self.plt.subplots()
  54. axes = df.groupby('Y').boxplot(ax=ax, return_type='axes')
  55. ax_axes = ax.axes
  56. assert ax_axes is axes['A']
  57. # Multiple columns with an ax argument should use same figure
  58. fig, ax = self.plt.subplots()
  59. with tm.assert_produces_warning(UserWarning):
  60. axes = df.boxplot(column=['Col1', 'Col2'],
  61. by='X', ax=ax, return_type='axes')
  62. assert axes['Col1'].get_figure() is fig
  63. # When by is None, check that all relevant lines are present in the
  64. # dict
  65. fig, ax = self.plt.subplots()
  66. d = df.boxplot(ax=ax, return_type='dict')
  67. lines = list(itertools.chain.from_iterable(d.values()))
  68. assert len(ax.get_lines()) == len(lines)
  69. @pytest.mark.slow
  70. def test_boxplot_return_type_none(self):
  71. # GH 12216; return_type=None & by=None -> axes
  72. result = self.hist_df.boxplot()
  73. assert isinstance(result, self.plt.Axes)
  74. @pytest.mark.slow
  75. def test_boxplot_return_type_legacy(self):
  76. # API change in https://github.com/pandas-dev/pandas/pull/7096
  77. import matplotlib as mpl # noqa
  78. df = DataFrame(np.random.randn(6, 4),
  79. index=list(string.ascii_letters[:6]),
  80. columns=['one', 'two', 'three', 'four'])
  81. with pytest.raises(ValueError):
  82. df.boxplot(return_type='NOTATYPE')
  83. result = df.boxplot()
  84. self._check_box_return_type(result, 'axes')
  85. with tm.assert_produces_warning(False):
  86. result = df.boxplot(return_type='dict')
  87. self._check_box_return_type(result, 'dict')
  88. with tm.assert_produces_warning(False):
  89. result = df.boxplot(return_type='axes')
  90. self._check_box_return_type(result, 'axes')
  91. with tm.assert_produces_warning(False):
  92. result = df.boxplot(return_type='both')
  93. self._check_box_return_type(result, 'both')
  94. @pytest.mark.slow
  95. def test_boxplot_axis_limits(self):
  96. def _check_ax_limits(col, ax):
  97. y_min, y_max = ax.get_ylim()
  98. assert y_min <= col.min()
  99. assert y_max >= col.max()
  100. df = self.hist_df.copy()
  101. df['age'] = np.random.randint(1, 20, df.shape[0])
  102. # One full row
  103. height_ax, weight_ax = df.boxplot(['height', 'weight'], by='category')
  104. _check_ax_limits(df['height'], height_ax)
  105. _check_ax_limits(df['weight'], weight_ax)
  106. assert weight_ax._sharey == height_ax
  107. # Two rows, one partial
  108. p = df.boxplot(['height', 'weight', 'age'], by='category')
  109. height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0]
  110. dummy_ax = p[1, 1]
  111. _check_ax_limits(df['height'], height_ax)
  112. _check_ax_limits(df['weight'], weight_ax)
  113. _check_ax_limits(df['age'], age_ax)
  114. assert weight_ax._sharey == height_ax
  115. assert age_ax._sharey == height_ax
  116. assert dummy_ax._sharey is None
  117. @pytest.mark.slow
  118. def test_boxplot_empty_column(self):
  119. df = DataFrame(np.random.randn(20, 4))
  120. df.loc[:, 0] = np.nan
  121. _check_plot_works(df.boxplot, return_type='axes')
  122. @pytest.mark.slow
  123. def test_figsize(self):
  124. df = DataFrame(np.random.rand(10, 5),
  125. columns=['A', 'B', 'C', 'D', 'E'])
  126. result = df.boxplot(return_type='axes', figsize=(12, 8))
  127. assert result.figure.bbox_inches.width == 12
  128. assert result.figure.bbox_inches.height == 8
  129. def test_fontsize(self):
  130. df = DataFrame({"a": [1, 2, 3, 4, 5, 6]})
  131. self._check_ticks_props(df.boxplot("a", fontsize=16),
  132. xlabelsize=16, ylabelsize=16)
  133. @td.skip_if_no_mpl
  134. class TestDataFrameGroupByPlots(TestPlotBase):
  135. @pytest.mark.slow
  136. def test_boxplot_legacy1(self):
  137. grouped = self.hist_df.groupby(by='gender')
  138. with tm.assert_produces_warning(UserWarning):
  139. axes = _check_plot_works(grouped.boxplot, return_type='axes')
  140. self._check_axes_shape(list(axes.values), axes_num=2, layout=(1, 2))
  141. axes = _check_plot_works(grouped.boxplot, subplots=False,
  142. return_type='axes')
  143. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  144. @pytest.mark.slow
  145. def test_boxplot_legacy2(self):
  146. tuples = lzip(string.ascii_letters[:10], range(10))
  147. df = DataFrame(np.random.rand(10, 3),
  148. index=MultiIndex.from_tuples(tuples))
  149. grouped = df.groupby(level=1)
  150. with tm.assert_produces_warning(UserWarning):
  151. axes = _check_plot_works(grouped.boxplot, return_type='axes')
  152. self._check_axes_shape(list(axes.values), axes_num=10, layout=(4, 3))
  153. axes = _check_plot_works(grouped.boxplot, subplots=False,
  154. return_type='axes')
  155. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  156. @pytest.mark.slow
  157. def test_boxplot_legacy3(self):
  158. tuples = lzip(string.ascii_letters[:10], range(10))
  159. df = DataFrame(np.random.rand(10, 3),
  160. index=MultiIndex.from_tuples(tuples))
  161. grouped = df.unstack(level=1).groupby(level=0, axis=1)
  162. with tm.assert_produces_warning(UserWarning):
  163. axes = _check_plot_works(grouped.boxplot, return_type='axes')
  164. self._check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2))
  165. axes = _check_plot_works(grouped.boxplot, subplots=False,
  166. return_type='axes')
  167. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  168. @pytest.mark.slow
  169. def test_grouped_plot_fignums(self):
  170. n = 10
  171. weight = Series(np.random.normal(166, 20, size=n))
  172. height = Series(np.random.normal(60, 10, size=n))
  173. with tm.RNGContext(42):
  174. gender = np.random.choice(['male', 'female'], size=n)
  175. df = DataFrame({'height': height, 'weight': weight, 'gender': gender})
  176. gb = df.groupby('gender')
  177. res = gb.plot()
  178. assert len(self.plt.get_fignums()) == 2
  179. assert len(res) == 2
  180. tm.close()
  181. res = gb.boxplot(return_type='axes')
  182. assert len(self.plt.get_fignums()) == 1
  183. assert len(res) == 2
  184. tm.close()
  185. # now works with GH 5610 as gender is excluded
  186. res = df.groupby('gender').hist()
  187. tm.close()
  188. @pytest.mark.slow
  189. def test_grouped_box_return_type(self):
  190. df = self.hist_df
  191. # old style: return_type=None
  192. result = df.boxplot(by='gender')
  193. assert isinstance(result, np.ndarray)
  194. self._check_box_return_type(
  195. result, None,
  196. expected_keys=['height', 'weight', 'category'])
  197. # now for groupby
  198. result = df.groupby('gender').boxplot(return_type='dict')
  199. self._check_box_return_type(
  200. result, 'dict', expected_keys=['Male', 'Female'])
  201. columns2 = 'X B C D A G Y N Q O'.split()
  202. df2 = DataFrame(random.randn(50, 10), columns=columns2)
  203. categories2 = 'A B C D E F G H I J'.split()
  204. df2['category'] = categories2 * 5
  205. for t in ['dict', 'axes', 'both']:
  206. returned = df.groupby('classroom').boxplot(return_type=t)
  207. self._check_box_return_type(
  208. returned, t, expected_keys=['A', 'B', 'C'])
  209. returned = df.boxplot(by='classroom', return_type=t)
  210. self._check_box_return_type(
  211. returned, t,
  212. expected_keys=['height', 'weight', 'category'])
  213. returned = df2.groupby('category').boxplot(return_type=t)
  214. self._check_box_return_type(returned, t, expected_keys=categories2)
  215. returned = df2.boxplot(by='category', return_type=t)
  216. self._check_box_return_type(returned, t, expected_keys=columns2)
  217. @pytest.mark.slow
  218. def test_grouped_box_layout(self):
  219. df = self.hist_df
  220. pytest.raises(ValueError, df.boxplot, column=['weight', 'height'],
  221. by=df.gender, layout=(1, 1))
  222. pytest.raises(ValueError, df.boxplot,
  223. column=['height', 'weight', 'category'],
  224. layout=(2, 1), return_type='dict')
  225. pytest.raises(ValueError, df.boxplot, column=['weight', 'height'],
  226. by=df.gender, layout=(-1, -1))
  227. # _check_plot_works adds an ax so catch warning. see GH #13188
  228. with tm.assert_produces_warning(UserWarning):
  229. box = _check_plot_works(df.groupby('gender').boxplot,
  230. column='height', return_type='dict')
  231. self._check_axes_shape(self.plt.gcf().axes, axes_num=2, layout=(1, 2))
  232. with tm.assert_produces_warning(UserWarning):
  233. box = _check_plot_works(df.groupby('category').boxplot,
  234. column='height',
  235. return_type='dict')
  236. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2))
  237. # GH 6769
  238. with tm.assert_produces_warning(UserWarning):
  239. box = _check_plot_works(df.groupby('classroom').boxplot,
  240. column='height', return_type='dict')
  241. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
  242. # GH 5897
  243. axes = df.boxplot(column=['height', 'weight', 'category'], by='gender',
  244. return_type='axes')
  245. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
  246. for ax in [axes['height']]:
  247. self._check_visible(ax.get_xticklabels(), visible=False)
  248. self._check_visible([ax.xaxis.get_label()], visible=False)
  249. for ax in [axes['weight'], axes['category']]:
  250. self._check_visible(ax.get_xticklabels())
  251. self._check_visible([ax.xaxis.get_label()])
  252. box = df.groupby('classroom').boxplot(
  253. column=['height', 'weight', 'category'], return_type='dict')
  254. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
  255. with tm.assert_produces_warning(UserWarning):
  256. box = _check_plot_works(df.groupby('category').boxplot,
  257. column='height',
  258. layout=(3, 2), return_type='dict')
  259. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2))
  260. with tm.assert_produces_warning(UserWarning):
  261. box = _check_plot_works(df.groupby('category').boxplot,
  262. column='height',
  263. layout=(3, -1), return_type='dict')
  264. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2))
  265. box = df.boxplot(column=['height', 'weight', 'category'], by='gender',
  266. layout=(4, 1))
  267. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(4, 1))
  268. box = df.boxplot(column=['height', 'weight', 'category'], by='gender',
  269. layout=(-1, 1))
  270. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(3, 1))
  271. box = df.groupby('classroom').boxplot(
  272. column=['height', 'weight', 'category'], layout=(1, 4),
  273. return_type='dict')
  274. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 4))
  275. box = df.groupby('classroom').boxplot( # noqa
  276. column=['height', 'weight', 'category'], layout=(1, -1),
  277. return_type='dict')
  278. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3))
  279. @pytest.mark.slow
  280. def test_grouped_box_multiple_axes(self):
  281. # GH 6970, GH 7069
  282. df = self.hist_df
  283. # check warning to ignore sharex / sharey
  284. # this check should be done in the first function which
  285. # passes multiple axes to plot, hist or boxplot
  286. # location should be changed if other test is added
  287. # which has earlier alphabetical order
  288. with tm.assert_produces_warning(UserWarning):
  289. fig, axes = self.plt.subplots(2, 2)
  290. df.groupby('category').boxplot(
  291. column='height', return_type='axes', ax=axes)
  292. self._check_axes_shape(self.plt.gcf().axes,
  293. axes_num=4, layout=(2, 2))
  294. fig, axes = self.plt.subplots(2, 3)
  295. with tm.assert_produces_warning(UserWarning):
  296. returned = df.boxplot(column=['height', 'weight', 'category'],
  297. by='gender', return_type='axes', ax=axes[0])
  298. returned = np.array(list(returned.values))
  299. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  300. tm.assert_numpy_array_equal(returned, axes[0])
  301. assert returned[0].figure is fig
  302. # draw on second row
  303. with tm.assert_produces_warning(UserWarning):
  304. returned = df.groupby('classroom').boxplot(
  305. column=['height', 'weight', 'category'],
  306. return_type='axes', ax=axes[1])
  307. returned = np.array(list(returned.values))
  308. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  309. tm.assert_numpy_array_equal(returned, axes[1])
  310. assert returned[0].figure is fig
  311. with pytest.raises(ValueError):
  312. fig, axes = self.plt.subplots(2, 3)
  313. # pass different number of axes from required
  314. with tm.assert_produces_warning(UserWarning):
  315. axes = df.groupby('classroom').boxplot(ax=axes)
  316. def test_fontsize(self):
  317. df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]})
  318. self._check_ticks_props(df.boxplot("a", by="b", fontsize=16),
  319. xlabelsize=16, ylabelsize=16)