_core.py 125 KB


  1. # being a bit too dynamic
  2. # pylint: disable=E1101
  3. from __future__ import division
  4. from collections import namedtuple
  5. from distutils.version import LooseVersion
  6. import re
  7. import warnings
  8. import numpy as np
  9. import pandas.compat as compat
  10. from pandas.compat import lrange, map, range, string_types, zip
  11. from pandas.errors import AbstractMethodError
  12. from pandas.util._decorators import Appender, cache_readonly
  13. from pandas.core.dtypes.common import (
  14. is_hashable, is_integer, is_iterator, is_list_like, is_number)
  15. from pandas.core.dtypes.generic import (
  16. ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCPeriodIndex, ABCSeries)
  17. from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike
  18. from pandas.core.base import PandasObject
  19. import pandas.core.common as com
  20. from pandas.core.config import get_option
  21. from pandas.core.generic import _shared_doc_kwargs, _shared_docs
  22. from pandas.io.formats.printing import pprint_thing
  23. from pandas.plotting._compat import _mpl_ge_3_0_0
  24. from pandas.plotting._style import _get_standard_colors, plot_params
  25. from pandas.plotting._tools import (
  26. _flatten, _get_all_lines, _get_xlim, _handle_shared_axes, _set_ticks_props,
  27. _subplots, format_date_labels, table)
  28. try:
  29. from pandas.plotting import _converter
  30. except ImportError:
  31. _HAS_MPL = False
  32. else:
  33. _HAS_MPL = True
  34. if get_option('plotting.matplotlib.register_converters'):
  35. _converter.register(explicit=False)
  36. def _raise_if_no_mpl():
  37. # TODO(mpl_converter): remove once converter is explicit
  38. if not _HAS_MPL:
  39. raise ImportError("matplotlib is required for plotting.")
  40. def _get_standard_kind(kind):
  41. return {'density': 'kde'}.get(kind, kind)
  42. def _gca(rc=None):
  43. import matplotlib.pyplot as plt
  44. with plt.rc_context(rc):
  45. return plt.gca()
  46. def _gcf():
  47. import matplotlib.pyplot as plt
  48. return plt.gcf()
  49. class MPLPlot(object):
  50. """
  51. Base class for assembling a pandas plot using matplotlib
  52. Parameters
  53. ----------
  54. data :
  55. """
  56. @property
  57. def _kind(self):
  58. """Specify kind str. Must be overridden in child class"""
  59. raise NotImplementedError
  60. _layout_type = 'vertical'
  61. _default_rot = 0
  62. orientation = None
  63. _pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog',
  64. 'mark_right', 'stacked']
  65. _attr_defaults = {'logy': False, 'logx': False, 'loglog': False,
  66. 'mark_right': True, 'stacked': False}
  67. def __init__(self, data, kind=None, by=None, subplots=False, sharex=None,
  68. sharey=False, use_index=True,
  69. figsize=None, grid=None, legend=True, rot=None,
  70. ax=None, fig=None, title=None, xlim=None, ylim=None,
  71. xticks=None, yticks=None,
  72. sort_columns=False, fontsize=None,
  73. secondary_y=False, colormap=None,
  74. table=False, layout=None, **kwds):
  75. _raise_if_no_mpl()
  76. _converter._WARN = False
  77. self.data = data
  78. self.by = by
  79. self.kind = kind
  80. self.sort_columns = sort_columns
  81. self.subplots = subplots
  82. if sharex is None:
  83. if ax is None:
  84. self.sharex = True
  85. else:
  86. # if we get an axis, the users should do the visibility
  87. # setting...
  88. self.sharex = False
  89. else:
  90. self.sharex = sharex
  91. self.sharey = sharey
  92. self.figsize = figsize
  93. self.layout = layout
  94. self.xticks = xticks
  95. self.yticks = yticks
  96. self.xlim = xlim
  97. self.ylim = ylim
  98. self.title = title
  99. self.use_index = use_index
  100. self.fontsize = fontsize
  101. if rot is not None:
  102. self.rot = rot
  103. # need to know for format_date_labels since it's rotated to 30 by
  104. # default
  105. self._rot_set = True
  106. else:
  107. self._rot_set = False
  108. self.rot = self._default_rot
  109. if grid is None:
  110. grid = False if secondary_y else self.plt.rcParams['axes.grid']
  111. self.grid = grid
  112. self.legend = legend
  113. self.legend_handles = []
  114. self.legend_labels = []
  115. for attr in self._pop_attributes:
  116. value = kwds.pop(attr, self._attr_defaults.get(attr, None))
  117. setattr(self, attr, value)
  118. self.ax = ax
  119. self.fig = fig
  120. self.axes = None
  121. # parse errorbar input if given
  122. xerr = kwds.pop('xerr', None)
  123. yerr = kwds.pop('yerr', None)
  124. self.errors = {kw: self._parse_errorbars(kw, err)
  125. for kw, err in zip(['xerr', 'yerr'], [xerr, yerr])}
  126. if not isinstance(secondary_y, (bool, tuple, list,
  127. np.ndarray, ABCIndexClass)):
  128. secondary_y = [secondary_y]
  129. self.secondary_y = secondary_y
  130. # ugly TypeError if user passes matplotlib's `cmap` name.
  131. # Probably better to accept either.
  132. if 'cmap' in kwds and colormap:
  133. raise TypeError("Only specify one of `cmap` and `colormap`.")
  134. elif 'cmap' in kwds:
  135. self.colormap = kwds.pop('cmap')
  136. else:
  137. self.colormap = colormap
  138. self.table = table
  139. self.kwds = kwds
  140. self._validate_color_args()
  141. def _validate_color_args(self):
  142. if 'color' not in self.kwds and 'colors' in self.kwds:
  143. warnings.warn(("'colors' is being deprecated. Please use 'color'"
  144. "instead of 'colors'"))
  145. colors = self.kwds.pop('colors')
  146. self.kwds['color'] = colors
  147. if ('color' in self.kwds and self.nseries == 1 and
  148. not is_list_like(self.kwds['color'])):
  149. # support series.plot(color='green')
  150. self.kwds['color'] = [self.kwds['color']]
  151. if ('color' in self.kwds and isinstance(self.kwds['color'], tuple) and
  152. self.nseries == 1 and len(self.kwds['color']) in (3, 4)):
  153. # support RGB and RGBA tuples in series plot
  154. self.kwds['color'] = [self.kwds['color']]
  155. if ('color' in self.kwds or 'colors' in self.kwds) and \
  156. self.colormap is not None:
  157. warnings.warn("'color' and 'colormap' cannot be used "
  158. "simultaneously. Using 'color'")
  159. if 'color' in self.kwds and self.style is not None:
  160. if is_list_like(self.style):
  161. styles = self.style
  162. else:
  163. styles = [self.style]
  164. # need only a single match
  165. for s in styles:
  166. if re.match('^[a-z]+?', s) is not None:
  167. raise ValueError(
  168. "Cannot pass 'style' string with a color "
  169. "symbol and 'color' keyword argument. Please"
  170. " use one or the other or pass 'style' "
  171. "without a color symbol")
  172. def _iter_data(self, data=None, keep_index=False, fillna=None):
  173. if data is None:
  174. data = self.data
  175. if fillna is not None:
  176. data = data.fillna(fillna)
  177. # TODO: unused?
  178. # if self.sort_columns:
  179. # columns = com.try_sort(data.columns)
  180. # else:
  181. # columns = data.columns
  182. for col, values in data.iteritems():
  183. if keep_index is True:
  184. yield col, values
  185. else:
  186. yield col, values.values
  187. @property
  188. def nseries(self):
  189. if self.data.ndim == 1:
  190. return 1
  191. else:
  192. return self.data.shape[1]
  193. def draw(self):
  194. self.plt.draw_if_interactive()
  195. def generate(self):
  196. self._args_adjust()
  197. self._compute_plot_data()
  198. self._setup_subplots()
  199. self._make_plot()
  200. self._add_table()
  201. self._make_legend()
  202. self._adorn_subplots()
  203. for ax in self.axes:
  204. self._post_plot_logic_common(ax, self.data)
  205. self._post_plot_logic(ax, self.data)
  206. def _args_adjust(self):
  207. pass
  208. def _has_plotted_object(self, ax):
  209. """check whether ax has data"""
  210. return (len(ax.lines) != 0 or
  211. len(ax.artists) != 0 or
  212. len(ax.containers) != 0)
  213. def _maybe_right_yaxis(self, ax, axes_num):
  214. if not self.on_right(axes_num):
  215. # secondary axes may be passed via ax kw
  216. return self._get_ax_layer(ax)
  217. if hasattr(ax, 'right_ax'):
  218. # if it has right_ax proparty, ``ax`` must be left axes
  219. return ax.right_ax
  220. elif hasattr(ax, 'left_ax'):
  221. # if it has left_ax proparty, ``ax`` must be right axes
  222. return ax
  223. else:
  224. # otherwise, create twin axes
  225. orig_ax, new_ax = ax, ax.twinx()
  226. # TODO: use Matplotlib public API when available
  227. new_ax._get_lines = orig_ax._get_lines
  228. new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill
  229. orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax
  230. if not self._has_plotted_object(orig_ax): # no data on left y
  231. orig_ax.get_yaxis().set_visible(False)
  232. if self.logy or self.loglog:
  233. new_ax.set_yscale('log')
  234. return new_ax
  235. def _setup_subplots(self):
  236. if self.subplots:
  237. fig, axes = _subplots(naxes=self.nseries,
  238. sharex=self.sharex, sharey=self.sharey,
  239. figsize=self.figsize, ax=self.ax,
  240. layout=self.layout,
  241. layout_type=self._layout_type)
  242. else:
  243. if self.ax is None:
  244. fig = self.plt.figure(figsize=self.figsize)
  245. axes = fig.add_subplot(111)
  246. else:
  247. fig = self.ax.get_figure()
  248. if self.figsize is not None:
  249. fig.set_size_inches(self.figsize)
  250. axes = self.ax
  251. axes = _flatten(axes)
  252. if self.logx or self.loglog:
  253. [a.set_xscale('log') for a in axes]
  254. if self.logy or self.loglog:
  255. [a.set_yscale('log') for a in axes]
  256. self.fig = fig
  257. self.axes = axes
  258. @property
  259. def result(self):
  260. """
  261. Return result axes
  262. """
  263. if self.subplots:
  264. if self.layout is not None and not is_list_like(self.ax):
  265. return self.axes.reshape(*self.layout)
  266. else:
  267. return self.axes
  268. else:
  269. sec_true = isinstance(self.secondary_y, bool) and self.secondary_y
  270. all_sec = (is_list_like(self.secondary_y) and
  271. len(self.secondary_y) == self.nseries)
  272. if (sec_true or all_sec):
  273. # if all data is plotted on secondary, return right axes
  274. return self._get_ax_layer(self.axes[0], primary=False)
  275. else:
  276. return self.axes[0]
  277. def _compute_plot_data(self):
  278. data = self.data
  279. if isinstance(data, ABCSeries):
  280. label = self.label
  281. if label is None and data.name is None:
  282. label = 'None'
  283. data = data.to_frame(name=label)
  284. # GH16953, _convert is needed as fallback, for ``Series``
  285. # with ``dtype == object``
  286. data = data._convert(datetime=True, timedelta=True)
  287. numeric_data = data.select_dtypes(include=[np.number,
  288. "datetime",
  289. "datetimetz",
  290. "timedelta"])
  291. try:
  292. is_empty = numeric_data.empty
  293. except AttributeError:
  294. is_empty = not len(numeric_data)
  295. # no empty frames or series allowed
  296. if is_empty:
  297. raise TypeError('Empty {0!r}: no numeric data to '
  298. 'plot'.format(numeric_data.__class__.__name__))
  299. self.data = numeric_data
  300. def _make_plot(self):
  301. raise AbstractMethodError(self)
  302. def _add_table(self):
  303. if self.table is False:
  304. return
  305. elif self.table is True:
  306. data = self.data.transpose()
  307. else:
  308. data = self.table
  309. ax = self._get_ax(0)
  310. table(ax, data)
  311. def _post_plot_logic_common(self, ax, data):
  312. """Common post process for each axes"""
  313. def get_label(i):
  314. try:
  315. return pprint_thing(data.index[i])
  316. except Exception:
  317. return ''
  318. if self.orientation == 'vertical' or self.orientation is None:
  319. if self._need_to_set_index:
  320. xticklabels = [get_label(x) for x in ax.get_xticks()]
  321. ax.set_xticklabels(xticklabels)
  322. self._apply_axis_properties(ax.xaxis, rot=self.rot,
  323. fontsize=self.fontsize)
  324. self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize)
  325. if hasattr(ax, 'right_ax'):
  326. self._apply_axis_properties(ax.right_ax.yaxis,
  327. fontsize=self.fontsize)
  328. elif self.orientation == 'horizontal':
  329. if self._need_to_set_index:
  330. yticklabels = [get_label(y) for y in ax.get_yticks()]
  331. ax.set_yticklabels(yticklabels)
  332. self._apply_axis_properties(ax.yaxis, rot=self.rot,
  333. fontsize=self.fontsize)
  334. self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize)
  335. if hasattr(ax, 'right_ax'):
  336. self._apply_axis_properties(ax.right_ax.yaxis,
  337. fontsize=self.fontsize)
  338. else: # pragma no cover
  339. raise ValueError
  340. def _post_plot_logic(self, ax, data):
  341. """Post process for each axes. Overridden in child classes"""
  342. pass
  343. def _adorn_subplots(self):
  344. """Common post process unrelated to data"""
  345. if len(self.axes) > 0:
  346. all_axes = self._get_subplots()
  347. nrows, ncols = self._get_axes_layout()
  348. _handle_shared_axes(axarr=all_axes, nplots=len(all_axes),
  349. naxes=nrows * ncols, nrows=nrows,
  350. ncols=ncols, sharex=self.sharex,
  351. sharey=self.sharey)
  352. for ax in self.axes:
  353. if self.yticks is not None:
  354. ax.set_yticks(self.yticks)
  355. if self.xticks is not None:
  356. ax.set_xticks(self.xticks)
  357. if self.ylim is not None:
  358. ax.set_ylim(self.ylim)
  359. if self.xlim is not None:
  360. ax.set_xlim(self.xlim)
  361. ax.grid(self.grid)
  362. if self.title:
  363. if self.subplots:
  364. if is_list_like(self.title):
  365. if len(self.title) != self.nseries:
  366. msg = ('The length of `title` must equal the number '
  367. 'of columns if using `title` of type `list` '
  368. 'and `subplots=True`.\n'
  369. 'length of title = {}\n'
  370. 'number of columns = {}').format(
  371. len(self.title), self.nseries)
  372. raise ValueError(msg)
  373. for (ax, title) in zip(self.axes, self.title):
  374. ax.set_title(title)
  375. else:
  376. self.fig.suptitle(self.title)
  377. else:
  378. if is_list_like(self.title):
  379. msg = ('Using `title` of type `list` is not supported '
  380. 'unless `subplots=True` is passed')
  381. raise ValueError(msg)
  382. self.axes[0].set_title(self.title)
  383. def _apply_axis_properties(self, axis, rot=None, fontsize=None):
  384. labels = axis.get_majorticklabels() + axis.get_minorticklabels()
  385. for label in labels:
  386. if rot is not None:
  387. label.set_rotation(rot)
  388. if fontsize is not None:
  389. label.set_fontsize(fontsize)
  390. @property
  391. def legend_title(self):
  392. if not isinstance(self.data.columns, ABCMultiIndex):
  393. name = self.data.columns.name
  394. if name is not None:
  395. name = pprint_thing(name)
  396. return name
  397. else:
  398. stringified = map(pprint_thing,
  399. self.data.columns.names)
  400. return ','.join(stringified)
  401. def _add_legend_handle(self, handle, label, index=None):
  402. if label is not None:
  403. if self.mark_right and index is not None:
  404. if self.on_right(index):
  405. label = label + ' (right)'
  406. self.legend_handles.append(handle)
  407. self.legend_labels.append(label)
  408. def _make_legend(self):
  409. ax, leg = self._get_ax_legend(self.axes[0])
  410. handles = []
  411. labels = []
  412. title = ''
  413. if not self.subplots:
  414. if leg is not None:
  415. title = leg.get_title().get_text()
  416. handles = leg.legendHandles
  417. labels = [x.get_text() for x in leg.get_texts()]
  418. if self.legend:
  419. if self.legend == 'reverse':
  420. self.legend_handles = reversed(self.legend_handles)
  421. self.legend_labels = reversed(self.legend_labels)
  422. handles += self.legend_handles
  423. labels += self.legend_labels
  424. if self.legend_title is not None:
  425. title = self.legend_title
  426. if len(handles) > 0:
  427. ax.legend(handles, labels, loc='best', title=title)
  428. elif self.subplots and self.legend:
  429. for ax in self.axes:
  430. if ax.get_visible():
  431. ax.legend(loc='best')
  432. def _get_ax_legend(self, ax):
  433. leg = ax.get_legend()
  434. other_ax = (getattr(ax, 'left_ax', None) or
  435. getattr(ax, 'right_ax', None))
  436. other_leg = None
  437. if other_ax is not None:
  438. other_leg = other_ax.get_legend()
  439. if leg is None and other_leg is not None:
  440. leg = other_leg
  441. ax = other_ax
  442. return ax, leg
  443. @cache_readonly
  444. def plt(self):
  445. import matplotlib.pyplot as plt
  446. return plt
  447. _need_to_set_index = False
  448. def _get_xticks(self, convert_period=False):
  449. index = self.data.index
  450. is_datetype = index.inferred_type in ('datetime', 'date',
  451. 'datetime64', 'time')
  452. if self.use_index:
  453. if convert_period and isinstance(index, ABCPeriodIndex):
  454. self.data = self.data.reindex(index=index.sort_values())
  455. x = self.data.index.to_timestamp()._mpl_repr()
  456. elif index.is_numeric():
  457. """
  458. Matplotlib supports numeric values or datetime objects as
  459. xaxis values. Taking LBYL approach here, by the time
  460. matplotlib raises exception when using non numeric/datetime
  461. values for xaxis, several actions are already taken by plt.
  462. """
  463. x = index._mpl_repr()
  464. elif is_datetype:
  465. self.data = self.data[notna(self.data.index)]
  466. self.data = self.data.sort_index()
  467. x = self.data.index._mpl_repr()
  468. else:
  469. self._need_to_set_index = True
  470. x = lrange(len(index))
  471. else:
  472. x = lrange(len(index))
  473. return x
  474. @classmethod
  475. def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds):
  476. mask = isna(y)
  477. if mask.any():
  478. y = np.ma.array(y)
  479. y = np.ma.masked_where(mask, y)
  480. if isinstance(x, ABCIndexClass):
  481. x = x._mpl_repr()
  482. if is_errorbar:
  483. if 'xerr' in kwds:
  484. kwds['xerr'] = np.array(kwds.get('xerr'))
  485. if 'yerr' in kwds:
  486. kwds['yerr'] = np.array(kwds.get('yerr'))
  487. return ax.errorbar(x, y, **kwds)
  488. else:
  489. # prevent style kwarg from going to errorbar, where it is
  490. # unsupported
  491. if style is not None:
  492. args = (x, y, style)
  493. else:
  494. args = (x, y)
  495. return ax.plot(*args, **kwds)
  496. def _get_index_name(self):
  497. if isinstance(self.data.index, ABCMultiIndex):
  498. name = self.data.index.names
  499. if com._any_not_none(*name):
  500. name = ','.join(pprint_thing(x) for x in name)
  501. else:
  502. name = None
  503. else:
  504. name = self.data.index.name
  505. if name is not None:
  506. name = pprint_thing(name)
  507. return name
  508. @classmethod
  509. def _get_ax_layer(cls, ax, primary=True):
  510. """get left (primary) or right (secondary) axes"""
  511. if primary:
  512. return getattr(ax, 'left_ax', ax)
  513. else:
  514. return getattr(ax, 'right_ax', ax)
  515. def _get_ax(self, i):
  516. # get the twinx ax if appropriate
  517. if self.subplots:
  518. ax = self.axes[i]
  519. ax = self._maybe_right_yaxis(ax, i)
  520. self.axes[i] = ax
  521. else:
  522. ax = self.axes[0]
  523. ax = self._maybe_right_yaxis(ax, i)
  524. ax.get_yaxis().set_visible(True)
  525. return ax
  526. def on_right(self, i):
  527. if isinstance(self.secondary_y, bool):
  528. return self.secondary_y
  529. if isinstance(self.secondary_y, (tuple, list,
  530. np.ndarray, ABCIndexClass)):
  531. return self.data.columns[i] in self.secondary_y
  532. def _apply_style_colors(self, colors, kwds, col_num, label):
  533. """
  534. Manage style and color based on column number and its label.
  535. Returns tuple of appropriate style and kwds which "color" may be added.
  536. """
  537. style = None
  538. if self.style is not None:
  539. if isinstance(self.style, list):
  540. try:
  541. style = self.style[col_num]
  542. except IndexError:
  543. pass
  544. elif isinstance(self.style, dict):
  545. style = self.style.get(label, style)
  546. else:
  547. style = self.style
  548. has_color = 'color' in kwds or self.colormap is not None
  549. nocolor_style = style is None or re.match('[a-z]+', style) is None
  550. if (has_color or self.subplots) and nocolor_style:
  551. kwds['color'] = colors[col_num % len(colors)]
  552. return style, kwds
  553. def _get_colors(self, num_colors=None, color_kwds='color'):
  554. if num_colors is None:
  555. num_colors = self.nseries
  556. return _get_standard_colors(num_colors=num_colors,
  557. colormap=self.colormap,
  558. color=self.kwds.get(color_kwds))
  559. def _parse_errorbars(self, label, err):
  560. """
  561. Look for error keyword arguments and return the actual errorbar data
  562. or return the error DataFrame/dict
  563. Error bars can be specified in several ways:
  564. Series: the user provides a pandas.Series object of the same
  565. length as the data
  566. ndarray: provides a np.ndarray of the same length as the data
  567. DataFrame/dict: error values are paired with keys matching the
  568. key in the plotted DataFrame
  569. str: the name of the column within the plotted DataFrame
  570. """
  571. if err is None:
  572. return None
  573. def match_labels(data, e):
  574. e = e.reindex(data.index)
  575. return e
  576. # key-matched DataFrame
  577. if isinstance(err, ABCDataFrame):
  578. err = match_labels(self.data, err)
  579. # key-matched dict
  580. elif isinstance(err, dict):
  581. pass
  582. # Series of error values
  583. elif isinstance(err, ABCSeries):
  584. # broadcast error series across data
  585. err = match_labels(self.data, err)
  586. err = np.atleast_2d(err)
  587. err = np.tile(err, (self.nseries, 1))
  588. # errors are a column in the dataframe
  589. elif isinstance(err, string_types):
  590. evalues = self.data[err].values
  591. self.data = self.data[self.data.columns.drop(err)]
  592. err = np.atleast_2d(evalues)
  593. err = np.tile(err, (self.nseries, 1))
  594. elif is_list_like(err):
  595. if is_iterator(err):
  596. err = np.atleast_2d(list(err))
  597. else:
  598. # raw error values
  599. err = np.atleast_2d(err)
  600. err_shape = err.shape
  601. # asymmetrical error bars
  602. if err.ndim == 3:
  603. if (err_shape[0] != self.nseries) or \
  604. (err_shape[1] != 2) or \
  605. (err_shape[2] != len(self.data)):
  606. msg = "Asymmetrical error bars should be provided " + \
  607. "with the shape (%u, 2, %u)" % \
  608. (self.nseries, len(self.data))
  609. raise ValueError(msg)
  610. # broadcast errors to each data series
  611. if len(err) == 1:
  612. err = np.tile(err, (self.nseries, 1))
  613. elif is_number(err):
  614. err = np.tile([err], (self.nseries, len(self.data)))
  615. else:
  616. msg = "No valid {label} detected".format(label=label)
  617. raise ValueError(msg)
  618. return err
  619. def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True):
  620. errors = {}
  621. for kw, flag in zip(['xerr', 'yerr'], [xerr, yerr]):
  622. if flag:
  623. err = self.errors[kw]
  624. # user provided label-matched dataframe of errors
  625. if isinstance(err, (ABCDataFrame, dict)):
  626. if label is not None and label in err.keys():
  627. err = err[label]
  628. else:
  629. err = None
  630. elif index is not None and err is not None:
  631. err = err[index]
  632. if err is not None:
  633. errors[kw] = err
  634. return errors
  635. def _get_subplots(self):
  636. from matplotlib.axes import Subplot
  637. return [ax for ax in self.axes[0].get_figure().get_axes()
  638. if isinstance(ax, Subplot)]
  639. def _get_axes_layout(self):
  640. axes = self._get_subplots()
  641. x_set = set()
  642. y_set = set()
  643. for ax in axes:
  644. # check axes coordinates to estimate layout
  645. points = ax.get_position().get_points()
  646. x_set.add(points[0][0])
  647. y_set.add(points[0][1])
  648. return (len(y_set), len(x_set))
  649. class PlanePlot(MPLPlot):
  650. """
  651. Abstract class for plotting on plane, currently scatter and hexbin.
  652. """
  653. _layout_type = 'single'
  654. def __init__(self, data, x, y, **kwargs):
  655. MPLPlot.__init__(self, data, **kwargs)
  656. if x is None or y is None:
  657. raise ValueError(self._kind + ' requires an x and y column')
  658. if is_integer(x) and not self.data.columns.holds_integer():
  659. x = self.data.columns[x]
  660. if is_integer(y) and not self.data.columns.holds_integer():
  661. y = self.data.columns[y]
  662. if len(self.data[x]._get_numeric_data()) == 0:
  663. raise ValueError(self._kind + ' requires x column to be numeric')
  664. if len(self.data[y]._get_numeric_data()) == 0:
  665. raise ValueError(self._kind + ' requires y column to be numeric')
  666. self.x = x
  667. self.y = y
  668. @property
  669. def nseries(self):
  670. return 1
  671. def _post_plot_logic(self, ax, data):
  672. x, y = self.x, self.y
  673. ax.set_ylabel(pprint_thing(y))
  674. ax.set_xlabel(pprint_thing(x))
  675. def _plot_colorbar(self, ax, **kwds):
  676. # Addresses issues #10611 and #10678:
  677. # When plotting scatterplots and hexbinplots in IPython
  678. # inline backend the colorbar axis height tends not to
  679. # exactly match the parent axis height.
  680. # The difference is due to small fractional differences
  681. # in floating points with similar representation.
  682. # To deal with this, this method forces the colorbar
  683. # height to take the height of the parent axes.
  684. # For a more detailed description of the issue
  685. # see the following link:
  686. # https://github.com/ipython/ipython/issues/11215
  687. img = ax.collections[0]
  688. cbar = self.fig.colorbar(img, ax=ax, **kwds)
  689. if _mpl_ge_3_0_0():
  690. # The workaround below is no longer necessary.
  691. return
  692. points = ax.get_position().get_points()
  693. cbar_points = cbar.ax.get_position().get_points()
  694. cbar.ax.set_position([cbar_points[0, 0],
  695. points[0, 1],
  696. cbar_points[1, 0] - cbar_points[0, 0],
  697. points[1, 1] - points[0, 1]])
  698. # To see the discrepancy in axis heights uncomment
  699. # the following two lines:
  700. # print(points[1, 1] - points[0, 1])
  701. # print(cbar_points[1, 1] - cbar_points[0, 1])
  702. class ScatterPlot(PlanePlot):
  703. _kind = 'scatter'
  704. def __init__(self, data, x, y, s=None, c=None, **kwargs):
  705. if s is None:
  706. # hide the matplotlib default for size, in case we want to change
  707. # the handling of this argument later
  708. s = 20
  709. super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs)
  710. if is_integer(c) and not self.data.columns.holds_integer():
  711. c = self.data.columns[c]
  712. self.c = c
  713. def _make_plot(self):
  714. x, y, c, data = self.x, self.y, self.c, self.data
  715. ax = self.axes[0]
  716. c_is_column = is_hashable(c) and c in self.data.columns
  717. # plot a colorbar only if a colormap is provided or necessary
  718. cb = self.kwds.pop('colorbar', self.colormap or c_is_column)
  719. # pandas uses colormap, matplotlib uses cmap.
  720. cmap = self.colormap or 'Greys'
  721. cmap = self.plt.cm.get_cmap(cmap)
  722. color = self.kwds.pop("color", None)
  723. if c is not None and color is not None:
  724. raise TypeError('Specify exactly one of `c` and `color`')
  725. elif c is None and color is None:
  726. c_values = self.plt.rcParams['patch.facecolor']
  727. elif color is not None:
  728. c_values = color
  729. elif c_is_column:
  730. c_values = self.data[c].values
  731. else:
  732. c_values = c
  733. if self.legend and hasattr(self, 'label'):
  734. label = self.label
  735. else:
  736. label = None
  737. scatter = ax.scatter(data[x].values, data[y].values, c=c_values,
  738. label=label, cmap=cmap, **self.kwds)
  739. if cb:
  740. cbar_label = c if c_is_column else ''
  741. self._plot_colorbar(ax, label=cbar_label)
  742. if label is not None:
  743. self._add_legend_handle(scatter, label)
  744. else:
  745. self.legend = False
  746. errors_x = self._get_errorbars(label=x, index=0, yerr=False)
  747. errors_y = self._get_errorbars(label=y, index=0, xerr=False)
  748. if len(errors_x) > 0 or len(errors_y) > 0:
  749. err_kwds = dict(errors_x, **errors_y)
  750. err_kwds['ecolor'] = scatter.get_facecolor()[0]
  751. ax.errorbar(data[x].values, data[y].values,
  752. linestyle='none', **err_kwds)
  753. class HexBinPlot(PlanePlot):
  754. _kind = 'hexbin'
  755. def __init__(self, data, x, y, C=None, **kwargs):
  756. super(HexBinPlot, self).__init__(data, x, y, **kwargs)
  757. if is_integer(C) and not self.data.columns.holds_integer():
  758. C = self.data.columns[C]
  759. self.C = C
  760. def _make_plot(self):
  761. x, y, data, C = self.x, self.y, self.data, self.C
  762. ax = self.axes[0]
  763. # pandas uses colormap, matplotlib uses cmap.
  764. cmap = self.colormap or 'BuGn'
  765. cmap = self.plt.cm.get_cmap(cmap)
  766. cb = self.kwds.pop('colorbar', True)
  767. if C is None:
  768. c_values = None
  769. else:
  770. c_values = data[C].values
  771. ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap,
  772. **self.kwds)
  773. if cb:
  774. self._plot_colorbar(ax)
  775. def _make_legend(self):
  776. pass
  777. class LinePlot(MPLPlot):
  778. _kind = 'line'
  779. _default_rot = 0
  780. orientation = 'vertical'
  781. def __init__(self, data, **kwargs):
  782. MPLPlot.__init__(self, data, **kwargs)
  783. if self.stacked:
  784. self.data = self.data.fillna(value=0)
  785. self.x_compat = plot_params['x_compat']
  786. if 'x_compat' in self.kwds:
  787. self.x_compat = bool(self.kwds.pop('x_compat'))
  788. def _is_ts_plot(self):
  789. # this is slightly deceptive
  790. return not self.x_compat and self.use_index and self._use_dynamic_x()
  791. def _use_dynamic_x(self):
  792. from pandas.plotting._timeseries import _use_dynamic_x
  793. return _use_dynamic_x(self._get_ax(0), self.data)
  794. def _make_plot(self):
  795. if self._is_ts_plot():
  796. from pandas.plotting._timeseries import _maybe_convert_index
  797. data = _maybe_convert_index(self._get_ax(0), self.data)
  798. x = data.index # dummy, not used
  799. plotf = self._ts_plot
  800. it = self._iter_data(data=data, keep_index=True)
  801. else:
  802. x = self._get_xticks(convert_period=True)
  803. plotf = self._plot
  804. it = self._iter_data()
  805. stacking_id = self._get_stacking_id()
  806. is_errorbar = com._any_not_none(*self.errors.values())
  807. colors = self._get_colors()
  808. for i, (label, y) in enumerate(it):
  809. ax = self._get_ax(i)
  810. kwds = self.kwds.copy()
  811. style, kwds = self._apply_style_colors(colors, kwds, i, label)
  812. errors = self._get_errorbars(label=label, index=i)
  813. kwds = dict(kwds, **errors)
  814. label = pprint_thing(label) # .encode('utf-8')
  815. kwds['label'] = label
  816. newlines = plotf(ax, x, y, style=style, column_num=i,
  817. stacking_id=stacking_id,
  818. is_errorbar=is_errorbar,
  819. **kwds)
  820. self._add_legend_handle(newlines[0], label, index=i)
  821. lines = _get_all_lines(ax)
  822. left, right = _get_xlim(lines)
  823. ax.set_xlim(left, right)
  824. @classmethod
  825. def _plot(cls, ax, x, y, style=None, column_num=None,
  826. stacking_id=None, **kwds):
  827. # column_num is used to get the target column from protf in line and
  828. # area plots
  829. if column_num == 0:
  830. cls._initialize_stacker(ax, stacking_id, len(y))
  831. y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label'])
  832. lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds)
  833. cls._update_stacker(ax, stacking_id, y)
  834. return lines
  835. @classmethod
  836. def _ts_plot(cls, ax, x, data, style=None, **kwds):
  837. from pandas.plotting._timeseries import (_maybe_resample,
  838. _decorate_axes,
  839. format_dateaxis)
  840. # accept x to be consistent with normal plot func,
  841. # x is not passed to tsplot as it uses data.index as x coordinate
  842. # column_num must be in kwds for stacking purpose
  843. freq, data = _maybe_resample(data, ax, kwds)
  844. # Set ax with freq info
  845. _decorate_axes(ax, freq, kwds)
  846. # digging deeper
  847. if hasattr(ax, 'left_ax'):
  848. _decorate_axes(ax.left_ax, freq, kwds)
  849. if hasattr(ax, 'right_ax'):
  850. _decorate_axes(ax.right_ax, freq, kwds)
  851. ax._plot_data.append((data, cls._kind, kwds))
  852. lines = cls._plot(ax, data.index, data.values, style=style, **kwds)
  853. # set date formatter, locators and rescale limits
  854. format_dateaxis(ax, ax.freq, data.index)
  855. return lines
  856. def _get_stacking_id(self):
  857. if self.stacked:
  858. return id(self.data)
  859. else:
  860. return None
  861. @classmethod
  862. def _initialize_stacker(cls, ax, stacking_id, n):
  863. if stacking_id is None:
  864. return
  865. if not hasattr(ax, '_stacker_pos_prior'):
  866. ax._stacker_pos_prior = {}
  867. if not hasattr(ax, '_stacker_neg_prior'):
  868. ax._stacker_neg_prior = {}
  869. ax._stacker_pos_prior[stacking_id] = np.zeros(n)
  870. ax._stacker_neg_prior[stacking_id] = np.zeros(n)
  871. @classmethod
  872. def _get_stacked_values(cls, ax, stacking_id, values, label):
  873. if stacking_id is None:
  874. return values
  875. if not hasattr(ax, '_stacker_pos_prior'):
  876. # stacker may not be initialized for subplots
  877. cls._initialize_stacker(ax, stacking_id, len(values))
  878. if (values >= 0).all():
  879. return ax._stacker_pos_prior[stacking_id] + values
  880. elif (values <= 0).all():
  881. return ax._stacker_neg_prior[stacking_id] + values
  882. raise ValueError('When stacked is True, each column must be either '
  883. 'all positive or negative.'
  884. '{0} contains both positive and negative values'
  885. .format(label))
  886. @classmethod
  887. def _update_stacker(cls, ax, stacking_id, values):
  888. if stacking_id is None:
  889. return
  890. if (values >= 0).all():
  891. ax._stacker_pos_prior[stacking_id] += values
  892. elif (values <= 0).all():
  893. ax._stacker_neg_prior[stacking_id] += values
  894. def _post_plot_logic(self, ax, data):
  895. condition = (not self._use_dynamic_x() and
  896. data.index.is_all_dates and
  897. not self.subplots or
  898. (self.subplots and self.sharex))
  899. index_name = self._get_index_name()
  900. if condition:
  901. # irregular TS rotated 30 deg. by default
  902. # probably a better place to check / set this.
  903. if not self._rot_set:
  904. self.rot = 30
  905. format_date_labels(ax, rot=self.rot)
  906. if index_name is not None and self.use_index:
  907. ax.set_xlabel(index_name)
  908. class AreaPlot(LinePlot):
  909. _kind = 'area'
  910. def __init__(self, data, **kwargs):
  911. kwargs.setdefault('stacked', True)
  912. data = data.fillna(value=0)
  913. LinePlot.__init__(self, data, **kwargs)
  914. if not self.stacked:
  915. # use smaller alpha to distinguish overlap
  916. self.kwds.setdefault('alpha', 0.5)
  917. if self.logy or self.loglog:
  918. raise ValueError("Log-y scales are not supported in area plot")
  919. @classmethod
  920. def _plot(cls, ax, x, y, style=None, column_num=None,
  921. stacking_id=None, is_errorbar=False, **kwds):
  922. if column_num == 0:
  923. cls._initialize_stacker(ax, stacking_id, len(y))
  924. y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label'])
  925. # need to remove label, because subplots uses mpl legend as it is
  926. line_kwds = kwds.copy()
  927. line_kwds.pop('label')
  928. lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds)
  929. # get data from the line to get coordinates for fill_between
  930. xdata, y_values = lines[0].get_data(orig=False)
  931. # unable to use ``_get_stacked_values`` here to get starting point
  932. if stacking_id is None:
  933. start = np.zeros(len(y))
  934. elif (y >= 0).all():
  935. start = ax._stacker_pos_prior[stacking_id]
  936. elif (y <= 0).all():
  937. start = ax._stacker_neg_prior[stacking_id]
  938. else:
  939. start = np.zeros(len(y))
  940. if 'color' not in kwds:
  941. kwds['color'] = lines[0].get_color()
  942. rect = ax.fill_between(xdata, start, y_values, **kwds)
  943. cls._update_stacker(ax, stacking_id, y)
  944. # LinePlot expects list of artists
  945. res = [rect]
  946. return res
  947. def _post_plot_logic(self, ax, data):
  948. LinePlot._post_plot_logic(self, ax, data)
  949. if self.ylim is None:
  950. if (data >= 0).all().all():
  951. ax.set_ylim(0, None)
  952. elif (data <= 0).all().all():
  953. ax.set_ylim(None, 0)
  954. class BarPlot(MPLPlot):
  955. _kind = 'bar'
  956. _default_rot = 90
  957. orientation = 'vertical'
  958. def __init__(self, data, **kwargs):
  959. # we have to treat a series differently than a
  960. # 1-column DataFrame w.r.t. color handling
  961. self._is_series = isinstance(data, ABCSeries)
  962. self.bar_width = kwargs.pop('width', 0.5)
  963. pos = kwargs.pop('position', 0.5)
  964. kwargs.setdefault('align', 'center')
  965. self.tick_pos = np.arange(len(data))
  966. self.bottom = kwargs.pop('bottom', 0)
  967. self.left = kwargs.pop('left', 0)
  968. self.log = kwargs.pop('log', False)
  969. MPLPlot.__init__(self, data, **kwargs)
  970. if self.stacked or self.subplots:
  971. self.tickoffset = self.bar_width * pos
  972. if kwargs['align'] == 'edge':
  973. self.lim_offset = self.bar_width / 2
  974. else:
  975. self.lim_offset = 0
  976. else:
  977. if kwargs['align'] == 'edge':
  978. w = self.bar_width / self.nseries
  979. self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5
  980. self.lim_offset = w * 0.5
  981. else:
  982. self.tickoffset = self.bar_width * pos
  983. self.lim_offset = 0
  984. self.ax_pos = self.tick_pos - self.tickoffset
  985. def _args_adjust(self):
  986. if is_list_like(self.bottom):
  987. self.bottom = np.array(self.bottom)
  988. if is_list_like(self.left):
  989. self.left = np.array(self.left)
  990. @classmethod
  991. def _plot(cls, ax, x, y, w, start=0, log=False, **kwds):
  992. return ax.bar(x, y, w, bottom=start, log=log, **kwds)
  993. @property
  994. def _start_base(self):
  995. return self.bottom
  996. def _make_plot(self):
  997. import matplotlib as mpl
  998. colors = self._get_colors()
  999. ncolors = len(colors)
  1000. pos_prior = neg_prior = np.zeros(len(self.data))
  1001. K = self.nseries
  1002. for i, (label, y) in enumerate(self._iter_data(fillna=0)):
  1003. ax = self._get_ax(i)
  1004. kwds = self.kwds.copy()
  1005. if self._is_series:
  1006. kwds['color'] = colors
  1007. else:
  1008. kwds['color'] = colors[i % ncolors]
  1009. errors = self._get_errorbars(label=label, index=i)
  1010. kwds = dict(kwds, **errors)
  1011. label = pprint_thing(label)
  1012. if (('yerr' in kwds) or ('xerr' in kwds)) \
  1013. and (kwds.get('ecolor') is None):
  1014. kwds['ecolor'] = mpl.rcParams['xtick.color']
  1015. start = 0
  1016. if self.log and (y >= 1).all():
  1017. start = 1
  1018. start = start + self._start_base
  1019. if self.subplots:
  1020. w = self.bar_width / 2
  1021. rect = self._plot(ax, self.ax_pos + w, y, self.bar_width,
  1022. start=start, label=label,
  1023. log=self.log, **kwds)
  1024. ax.set_title(label)
  1025. elif self.stacked:
  1026. mask = y > 0
  1027. start = np.where(mask, pos_prior, neg_prior) + self._start_base
  1028. w = self.bar_width / 2
  1029. rect = self._plot(ax, self.ax_pos + w, y, self.bar_width,
  1030. start=start, label=label,
  1031. log=self.log, **kwds)
  1032. pos_prior = pos_prior + np.where(mask, y, 0)
  1033. neg_prior = neg_prior + np.where(mask, 0, y)
  1034. else:
  1035. w = self.bar_width / K
  1036. rect = self._plot(ax, self.ax_pos + (i + 0.5) * w, y, w,
  1037. start=start, label=label,
  1038. log=self.log, **kwds)
  1039. self._add_legend_handle(rect, label, index=i)
  1040. def _post_plot_logic(self, ax, data):
  1041. if self.use_index:
  1042. str_index = [pprint_thing(key) for key in data.index]
  1043. else:
  1044. str_index = [pprint_thing(key) for key in range(data.shape[0])]
  1045. name = self._get_index_name()
  1046. s_edge = self.ax_pos[0] - 0.25 + self.lim_offset
  1047. e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset
  1048. self._decorate_ticks(ax, name, str_index, s_edge, e_edge)
  1049. def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge):
  1050. ax.set_xlim((start_edge, end_edge))
  1051. ax.set_xticks(self.tick_pos)
  1052. ax.set_xticklabels(ticklabels)
  1053. if name is not None and self.use_index:
  1054. ax.set_xlabel(name)
  1055. class BarhPlot(BarPlot):
  1056. _kind = 'barh'
  1057. _default_rot = 0
  1058. orientation = 'horizontal'
  1059. @property
  1060. def _start_base(self):
  1061. return self.left
  1062. @classmethod
  1063. def _plot(cls, ax, x, y, w, start=0, log=False, **kwds):
  1064. return ax.barh(x, y, w, left=start, log=log, **kwds)
  1065. def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge):
  1066. # horizontal bars
  1067. ax.set_ylim((start_edge, end_edge))
  1068. ax.set_yticks(self.tick_pos)
  1069. ax.set_yticklabels(ticklabels)
  1070. if name is not None and self.use_index:
  1071. ax.set_ylabel(name)
  1072. class HistPlot(LinePlot):
  1073. _kind = 'hist'
  1074. def __init__(self, data, bins=10, bottom=0, **kwargs):
  1075. self.bins = bins # use mpl default
  1076. self.bottom = bottom
  1077. # Do not call LinePlot.__init__ which may fill nan
  1078. MPLPlot.__init__(self, data, **kwargs)
  1079. def _args_adjust(self):
  1080. if is_integer(self.bins):
  1081. # create common bin edge
  1082. values = (self.data._convert(datetime=True)._get_numeric_data())
  1083. values = np.ravel(values)
  1084. values = values[~isna(values)]
  1085. hist, self.bins = np.histogram(
  1086. values, bins=self.bins,
  1087. range=self.kwds.get('range', None),
  1088. weights=self.kwds.get('weights', None))
  1089. if is_list_like(self.bottom):
  1090. self.bottom = np.array(self.bottom)
  1091. @classmethod
  1092. def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0,
  1093. stacking_id=None, **kwds):
  1094. if column_num == 0:
  1095. cls._initialize_stacker(ax, stacking_id, len(bins) - 1)
  1096. y = y[~isna(y)]
  1097. base = np.zeros(len(bins) - 1)
  1098. bottom = bottom + \
  1099. cls._get_stacked_values(ax, stacking_id, base, kwds['label'])
  1100. # ignore style
  1101. n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds)
  1102. cls._update_stacker(ax, stacking_id, n)
  1103. return patches
  1104. def _make_plot(self):
  1105. colors = self._get_colors()
  1106. stacking_id = self._get_stacking_id()
  1107. for i, (label, y) in enumerate(self._iter_data()):
  1108. ax = self._get_ax(i)
  1109. kwds = self.kwds.copy()
  1110. label = pprint_thing(label)
  1111. kwds['label'] = label
  1112. style, kwds = self._apply_style_colors(colors, kwds, i, label)
  1113. if style is not None:
  1114. kwds['style'] = style
  1115. kwds = self._make_plot_keywords(kwds, y)
  1116. artists = self._plot(ax, y, column_num=i,
  1117. stacking_id=stacking_id, **kwds)
  1118. self._add_legend_handle(artists[0], label, index=i)
  1119. def _make_plot_keywords(self, kwds, y):
  1120. """merge BoxPlot/KdePlot properties to passed kwds"""
  1121. # y is required for KdePlot
  1122. kwds['bottom'] = self.bottom
  1123. kwds['bins'] = self.bins
  1124. return kwds
  1125. def _post_plot_logic(self, ax, data):
  1126. if self.orientation == 'horizontal':
  1127. ax.set_xlabel('Frequency')
  1128. else:
  1129. ax.set_ylabel('Frequency')
  1130. @property
  1131. def orientation(self):
  1132. if self.kwds.get('orientation', None) == 'horizontal':
  1133. return 'horizontal'
  1134. else:
  1135. return 'vertical'
  1136. _kde_docstring = """
  1137. Generate Kernel Density Estimate plot using Gaussian kernels.
  1138. In statistics, `kernel density estimation`_ (KDE) is a non-parametric
  1139. way to estimate the probability density function (PDF) of a random
  1140. variable. This function uses Gaussian kernels and includes automatic
  1141. bandwidth determination.
  1142. .. _kernel density estimation:
  1143. https://en.wikipedia.org/wiki/Kernel_density_estimation
  1144. Parameters
  1145. ----------
  1146. bw_method : str, scalar or callable, optional
  1147. The method used to calculate the estimator bandwidth. This can be
  1148. 'scott', 'silverman', a scalar constant or a callable.
  1149. If None (default), 'scott' is used.
  1150. See :class:`scipy.stats.gaussian_kde` for more information.
  1151. ind : NumPy array or integer, optional
  1152. Evaluation points for the estimated PDF. If None (default),
  1153. 1000 equally spaced points are used. If `ind` is a NumPy array, the
  1154. KDE is evaluated at the points passed. If `ind` is an integer,
  1155. `ind` number of equally spaced points are used.
  1156. **kwds : optional
  1157. Additional keyword arguments are documented in
  1158. :meth:`pandas.%(this-datatype)s.plot`.
  1159. Returns
  1160. -------
  1161. axes : matplotlib.axes.Axes or numpy.ndarray of them
  1162. See Also
  1163. --------
  1164. scipy.stats.gaussian_kde : Representation of a kernel-density
  1165. estimate using Gaussian kernels. This is the function used
  1166. internally to estimate the PDF.
  1167. %(sibling-datatype)s.plot.kde : Generate a KDE plot for a
  1168. %(sibling-datatype)s.
  1169. Examples
  1170. --------
  1171. %(examples)s
  1172. """
  1173. class KdePlot(HistPlot):
  1174. _kind = 'kde'
  1175. orientation = 'vertical'
  1176. def __init__(self, data, bw_method=None, ind=None, **kwargs):
  1177. MPLPlot.__init__(self, data, **kwargs)
  1178. self.bw_method = bw_method
  1179. self.ind = ind
  1180. def _args_adjust(self):
  1181. pass
  1182. def _get_ind(self, y):
  1183. if self.ind is None:
  1184. # np.nanmax() and np.nanmin() ignores the missing values
  1185. sample_range = np.nanmax(y) - np.nanmin(y)
  1186. ind = np.linspace(np.nanmin(y) - 0.5 * sample_range,
  1187. np.nanmax(y) + 0.5 * sample_range, 1000)
  1188. elif is_integer(self.ind):
  1189. sample_range = np.nanmax(y) - np.nanmin(y)
  1190. ind = np.linspace(np.nanmin(y) - 0.5 * sample_range,
  1191. np.nanmax(y) + 0.5 * sample_range, self.ind)
  1192. else:
  1193. ind = self.ind
  1194. return ind
  1195. @classmethod
  1196. def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
  1197. column_num=None, stacking_id=None, **kwds):
  1198. from scipy.stats import gaussian_kde
  1199. from scipy import __version__ as spv
  1200. y = remove_na_arraylike(y)
  1201. if LooseVersion(spv) >= '0.11.0':
  1202. gkde = gaussian_kde(y, bw_method=bw_method)
  1203. else:
  1204. gkde = gaussian_kde(y)
  1205. if bw_method is not None:
  1206. msg = ('bw_method was added in Scipy 0.11.0.' +
  1207. ' Scipy version in use is {spv}.'.format(spv=spv))
  1208. warnings.warn(msg)
  1209. y = gkde.evaluate(ind)
  1210. lines = MPLPlot._plot(ax, ind, y, style=style, **kwds)
  1211. return lines
  1212. def _make_plot_keywords(self, kwds, y):
  1213. kwds['bw_method'] = self.bw_method
  1214. kwds['ind'] = self._get_ind(y)
  1215. return kwds
  1216. def _post_plot_logic(self, ax, data):
  1217. ax.set_ylabel('Density')
  1218. class PiePlot(MPLPlot):
  1219. _kind = 'pie'
  1220. _layout_type = 'horizontal'
  1221. def __init__(self, data, kind=None, **kwargs):
  1222. data = data.fillna(value=0)
  1223. if (data < 0).any().any():
  1224. raise ValueError("{0} doesn't allow negative values".format(kind))
  1225. MPLPlot.__init__(self, data, kind=kind, **kwargs)
  1226. def _args_adjust(self):
  1227. self.grid = False
  1228. self.logy = False
  1229. self.logx = False
  1230. self.loglog = False
  1231. def _validate_color_args(self):
  1232. pass
  1233. def _make_plot(self):
  1234. colors = self._get_colors(
  1235. num_colors=len(self.data), color_kwds='colors')
  1236. self.kwds.setdefault('colors', colors)
  1237. for i, (label, y) in enumerate(self._iter_data()):
  1238. ax = self._get_ax(i)
  1239. if label is not None:
  1240. label = pprint_thing(label)
  1241. ax.set_ylabel(label)
  1242. kwds = self.kwds.copy()
  1243. def blank_labeler(label, value):
  1244. if value == 0:
  1245. return ''
  1246. else:
  1247. return label
  1248. idx = [pprint_thing(v) for v in self.data.index]
  1249. labels = kwds.pop('labels', idx)
  1250. # labels is used for each wedge's labels
  1251. # Blank out labels for values of 0 so they don't overlap
  1252. # with nonzero wedges
  1253. if labels is not None:
  1254. blabels = [blank_labeler(l, value) for
  1255. l, value in zip(labels, y)]
  1256. else:
  1257. blabels = None
  1258. results = ax.pie(y, labels=blabels, **kwds)
  1259. if kwds.get('autopct', None) is not None:
  1260. patches, texts, autotexts = results
  1261. else:
  1262. patches, texts = results
  1263. autotexts = []
  1264. if self.fontsize is not None:
  1265. for t in texts + autotexts:
  1266. t.set_fontsize(self.fontsize)
  1267. # leglabels is used for legend labels
  1268. leglabels = labels if labels is not None else idx
  1269. for p, l in zip(patches, leglabels):
  1270. self._add_legend_handle(p, l)
  1271. class BoxPlot(LinePlot):
  1272. _kind = 'box'
  1273. _layout_type = 'horizontal'
  1274. _valid_return_types = (None, 'axes', 'dict', 'both')
  1275. # namedtuple to hold results
  1276. BP = namedtuple("Boxplot", ['ax', 'lines'])
  1277. def __init__(self, data, return_type='axes', **kwargs):
  1278. # Do not call LinePlot.__init__ which may fill nan
  1279. if return_type not in self._valid_return_types:
  1280. raise ValueError(
  1281. "return_type must be {None, 'axes', 'dict', 'both'}")
  1282. self.return_type = return_type
  1283. MPLPlot.__init__(self, data, **kwargs)
  1284. def _args_adjust(self):
  1285. if self.subplots:
  1286. # Disable label ax sharing. Otherwise, all subplots shows last
  1287. # column label
  1288. if self.orientation == 'vertical':
  1289. self.sharex = False
  1290. else:
  1291. self.sharey = False
  1292. @classmethod
  1293. def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds):
  1294. if y.ndim == 2:
  1295. y = [remove_na_arraylike(v) for v in y]
  1296. # Boxplot fails with empty arrays, so need to add a NaN
  1297. # if any cols are empty
  1298. # GH 8181
  1299. y = [v if v.size > 0 else np.array([np.nan]) for v in y]
  1300. else:
  1301. y = remove_na_arraylike(y)
  1302. bp = ax.boxplot(y, **kwds)
  1303. if return_type == 'dict':
  1304. return bp, bp
  1305. elif return_type == 'both':
  1306. return cls.BP(ax=ax, lines=bp), bp
  1307. else:
  1308. return ax, bp
  1309. def _validate_color_args(self):
  1310. if 'color' in self.kwds:
  1311. if self.colormap is not None:
  1312. warnings.warn("'color' and 'colormap' cannot be used "
  1313. "simultaneously. Using 'color'")
  1314. self.color = self.kwds.pop('color')
  1315. if isinstance(self.color, dict):
  1316. valid_keys = ['boxes', 'whiskers', 'medians', 'caps']
  1317. for key, values in compat.iteritems(self.color):
  1318. if key not in valid_keys:
  1319. raise ValueError("color dict contains invalid "
  1320. "key '{0}' "
  1321. "The key must be either {1}"
  1322. .format(key, valid_keys))
  1323. else:
  1324. self.color = None
  1325. # get standard colors for default
  1326. colors = _get_standard_colors(num_colors=3,
  1327. colormap=self.colormap,
  1328. color=None)
  1329. # use 2 colors by default, for box/whisker and median
  1330. # flier colors isn't needed here
  1331. # because it can be specified by ``sym`` kw
  1332. self._boxes_c = colors[0]
  1333. self._whiskers_c = colors[0]
  1334. self._medians_c = colors[2]
  1335. self._caps_c = 'k' # mpl default
  1336. def _get_colors(self, num_colors=None, color_kwds='color'):
  1337. pass
  1338. def maybe_color_bp(self, bp):
  1339. if isinstance(self.color, dict):
  1340. boxes = self.color.get('boxes', self._boxes_c)
  1341. whiskers = self.color.get('whiskers', self._whiskers_c)
  1342. medians = self.color.get('medians', self._medians_c)
  1343. caps = self.color.get('caps', self._caps_c)
  1344. else:
  1345. # Other types are forwarded to matplotlib
  1346. # If None, use default colors
  1347. boxes = self.color or self._boxes_c
  1348. whiskers = self.color or self._whiskers_c
  1349. medians = self.color or self._medians_c
  1350. caps = self.color or self._caps_c
  1351. from matplotlib.artist import setp
  1352. setp(bp['boxes'], color=boxes, alpha=1)
  1353. setp(bp['whiskers'], color=whiskers, alpha=1)
  1354. setp(bp['medians'], color=medians, alpha=1)
  1355. setp(bp['caps'], color=caps, alpha=1)
  1356. def _make_plot(self):
  1357. if self.subplots:
  1358. from pandas.core.series import Series
  1359. self._return_obj = Series()
  1360. for i, (label, y) in enumerate(self._iter_data()):
  1361. ax = self._get_ax(i)
  1362. kwds = self.kwds.copy()
  1363. ret, bp = self._plot(ax, y, column_num=i,
  1364. return_type=self.return_type, **kwds)
  1365. self.maybe_color_bp(bp)
  1366. self._return_obj[label] = ret
  1367. label = [pprint_thing(label)]
  1368. self._set_ticklabels(ax, label)
  1369. else:
  1370. y = self.data.values.T
  1371. ax = self._get_ax(0)
  1372. kwds = self.kwds.copy()
  1373. ret, bp = self._plot(ax, y, column_num=0,
  1374. return_type=self.return_type, **kwds)
  1375. self.maybe_color_bp(bp)
  1376. self._return_obj = ret
  1377. labels = [l for l, _ in self._iter_data()]
  1378. labels = [pprint_thing(l) for l in labels]
  1379. if not self.use_index:
  1380. labels = [pprint_thing(key) for key in range(len(labels))]
  1381. self._set_ticklabels(ax, labels)
  1382. def _set_ticklabels(self, ax, labels):
  1383. if self.orientation == 'vertical':
  1384. ax.set_xticklabels(labels)
  1385. else:
  1386. ax.set_yticklabels(labels)
  1387. def _make_legend(self):
  1388. pass
  1389. def _post_plot_logic(self, ax, data):
  1390. pass
  1391. @property
  1392. def orientation(self):
  1393. if self.kwds.get('vert', True):
  1394. return 'vertical'
  1395. else:
  1396. return 'horizontal'
  1397. @property
  1398. def result(self):
  1399. if self.return_type is None:
  1400. return super(BoxPlot, self).result
  1401. else:
  1402. return self._return_obj
  1403. # kinds supported by both dataframe and series
  1404. _common_kinds = ['line', 'bar', 'barh',
  1405. 'kde', 'density', 'area', 'hist', 'box']
  1406. # kinds supported by dataframe
  1407. _dataframe_kinds = ['scatter', 'hexbin']
  1408. # kinds supported only by series or dataframe single column
  1409. _series_kinds = ['pie']
  1410. _all_kinds = _common_kinds + _dataframe_kinds + _series_kinds
  1411. _klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot,
  1412. ScatterPlot, HexBinPlot, AreaPlot, PiePlot]
  1413. _plot_klass = {klass._kind: klass for klass in _klasses}
  1414. def _plot(data, x=None, y=None, subplots=False,
  1415. ax=None, kind='line', **kwds):
  1416. kind = _get_standard_kind(kind.lower().strip())
  1417. if kind in _all_kinds:
  1418. klass = _plot_klass[kind]
  1419. else:
  1420. raise ValueError("%r is not a valid plot kind" % kind)
  1421. if kind in _dataframe_kinds:
  1422. if isinstance(data, ABCDataFrame):
  1423. plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax,
  1424. kind=kind, **kwds)
  1425. else:
  1426. raise ValueError("plot kind %r can only be used for data frames"
  1427. % kind)
  1428. elif kind in _series_kinds:
  1429. if isinstance(data, ABCDataFrame):
  1430. if y is None and subplots is False:
  1431. msg = "{0} requires either y column or 'subplots=True'"
  1432. raise ValueError(msg.format(kind))
  1433. elif y is not None:
  1434. if is_integer(y) and not data.columns.holds_integer():
  1435. y = data.columns[y]
  1436. # converted to series actually. copy to not modify
  1437. data = data[y].copy()
  1438. data.index.name = y
  1439. plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
  1440. else:
  1441. if isinstance(data, ABCDataFrame):
  1442. data_cols = data.columns
  1443. if x is not None:
  1444. if is_integer(x) and not data.columns.holds_integer():
  1445. x = data_cols[x]
  1446. elif not isinstance(data[x], ABCSeries):
  1447. raise ValueError("x must be a label or position")
  1448. data = data.set_index(x)
  1449. if y is not None:
  1450. # check if we have y as int or list of ints
  1451. int_ylist = is_list_like(y) and all(is_integer(c) for c in y)
  1452. int_y_arg = is_integer(y) or int_ylist
  1453. if int_y_arg and not data.columns.holds_integer():
  1454. y = data_cols[y]
  1455. label_kw = kwds['label'] if 'label' in kwds else False
  1456. for kw in ['xerr', 'yerr']:
  1457. if (kw in kwds) and \
  1458. (isinstance(kwds[kw], string_types) or
  1459. is_integer(kwds[kw])):
  1460. try:
  1461. kwds[kw] = data[kwds[kw]]
  1462. except (IndexError, KeyError, TypeError):
  1463. pass
  1464. # don't overwrite
  1465. data = data[y].copy()
  1466. if isinstance(data, ABCSeries):
  1467. label_name = label_kw or y
  1468. data.name = label_name
  1469. else:
  1470. match = is_list_like(label_kw) and len(label_kw) == len(y)
  1471. if label_kw and not match:
  1472. raise ValueError(
  1473. "label should be list-like and same length as y"
  1474. )
  1475. label_name = label_kw or data.columns
  1476. data.columns = label_name
  1477. plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
  1478. plot_obj.generate()
  1479. plot_obj.draw()
  1480. return plot_obj.result
  1481. df_kind = """- 'scatter' : scatter plot
  1482. - 'hexbin' : hexbin plot"""
  1483. series_kind = ""
  1484. df_coord = """x : label or position, default None
  1485. y : label, position or list of label, positions, default None
  1486. Allows plotting of one column versus another"""
  1487. series_coord = ""
  1488. df_unique = """stacked : boolean, default False in line and
  1489. bar plots, and True in area plot. If True, create stacked plot.
  1490. sort_columns : boolean, default False
  1491. Sort column names to determine plot ordering
  1492. secondary_y : boolean or sequence, default False
  1493. Whether to plot on the secondary y-axis
  1494. If a list/tuple, which columns to plot on secondary y-axis"""
  1495. series_unique = """label : label argument to provide to plot
  1496. secondary_y : boolean or sequence of ints, default False
  1497. If True then y-axis will be on the right"""
  1498. df_ax = """ax : matplotlib axes object, default None
  1499. subplots : boolean, default False
  1500. Make separate subplots for each column
  1501. sharex : boolean, default True if ax is None else False
  1502. In case subplots=True, share x axis and set some x axis labels to
  1503. invisible; defaults to True if ax is None otherwise False if an ax
  1504. is passed in; Be aware, that passing in both an ax and sharex=True
  1505. will alter all x axis labels for all axis in a figure!
  1506. sharey : boolean, default False
  1507. In case subplots=True, share y axis and set some y axis labels to
  1508. invisible
  1509. layout : tuple (optional)
  1510. (rows, columns) for the layout of subplots"""
  1511. series_ax = """ax : matplotlib axes object
  1512. If not passed, uses gca()"""
  1513. df_note = """- If `kind` = 'scatter' and the argument `c` is the name of a dataframe
  1514. column, the values of that column are used to color each point.
  1515. - If `kind` = 'hexbin', you can control the size of the bins with the
  1516. `gridsize` argument. By default, a histogram of the counts around each
  1517. `(x, y)` point is computed. You can specify alternative aggregations
  1518. by passing values to the `C` and `reduce_C_function` arguments.
  1519. `C` specifies the value at each `(x, y)` point and `reduce_C_function`
  1520. is a function of one argument that reduces all the values in a bin to
  1521. a single number (e.g. `mean`, `max`, `sum`, `std`)."""
  1522. series_note = ""
  1523. _shared_doc_df_kwargs = dict(klass='DataFrame', klass_obj='df',
  1524. klass_kind=df_kind, klass_coord=df_coord,
  1525. klass_ax=df_ax, klass_unique=df_unique,
  1526. klass_note=df_note)
  1527. _shared_doc_series_kwargs = dict(klass='Series', klass_obj='s',
  1528. klass_kind=series_kind,
  1529. klass_coord=series_coord, klass_ax=series_ax,
  1530. klass_unique=series_unique,
  1531. klass_note=series_note)
  1532. _shared_docs['plot'] = """
  1533. Make plots of %(klass)s using matplotlib / pylab.
  1534. *New in version 0.17.0:* Each plot kind has a corresponding method on the
  1535. ``%(klass)s.plot`` accessor:
  1536. ``%(klass_obj)s.plot(kind='line')`` is equivalent to
  1537. ``%(klass_obj)s.plot.line()``.
  1538. Parameters
  1539. ----------
  1540. data : %(klass)s
  1541. %(klass_coord)s
  1542. kind : str
  1543. - 'line' : line plot (default)
  1544. - 'bar' : vertical bar plot
  1545. - 'barh' : horizontal bar plot
  1546. - 'hist' : histogram
  1547. - 'box' : boxplot
  1548. - 'kde' : Kernel Density Estimation plot
  1549. - 'density' : same as 'kde'
  1550. - 'area' : area plot
  1551. - 'pie' : pie plot
  1552. %(klass_kind)s
  1553. %(klass_ax)s
  1554. figsize : a tuple (width, height) in inches
  1555. use_index : boolean, default True
  1556. Use index as ticks for x axis
  1557. title : string or list
  1558. Title to use for the plot. If a string is passed, print the string at
  1559. the top of the figure. If a list is passed and `subplots` is True,
  1560. print each item in the list above the corresponding subplot.
  1561. grid : boolean, default None (matlab style default)
  1562. Axis grid lines
  1563. legend : False/True/'reverse'
  1564. Place legend on axis subplots
  1565. style : list or dict
  1566. matplotlib line style per column
  1567. logx : boolean, default False
  1568. Use log scaling on x axis
  1569. logy : boolean, default False
  1570. Use log scaling on y axis
  1571. loglog : boolean, default False
  1572. Use log scaling on both x and y axes
  1573. xticks : sequence
  1574. Values to use for the xticks
  1575. yticks : sequence
  1576. Values to use for the yticks
  1577. xlim : 2-tuple/list
  1578. ylim : 2-tuple/list
  1579. rot : int, default None
  1580. Rotation for ticks (xticks for vertical, yticks for horizontal plots)
  1581. fontsize : int, default None
  1582. Font size for xticks and yticks
  1583. colormap : str or matplotlib colormap object, default None
  1584. Colormap to select colors from. If string, load colormap with that name
  1585. from matplotlib.
  1586. colorbar : boolean, optional
  1587. If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots)
  1588. position : float
  1589. Specify relative alignments for bar plot layout.
  1590. From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center)
  1591. table : boolean, Series or DataFrame, default False
  1592. If True, draw a table using the data in the DataFrame and the data will
  1593. be transposed to meet matplotlib's default layout.
  1594. If a Series or DataFrame is passed, use passed data to draw a table.
  1595. yerr : DataFrame, Series, array-like, dict and str
  1596. See :ref:`Plotting with Error Bars <visualization.errorbars>` for
  1597. detail.
  1598. xerr : same types as yerr.
  1599. %(klass_unique)s
  1600. mark_right : boolean, default True
  1601. When using a secondary_y axis, automatically mark the column
  1602. labels with "(right)" in the legend
  1603. `**kwds` : keywords
  1604. Options to pass to matplotlib plotting method
  1605. Returns
  1606. -------
  1607. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  1608. Notes
  1609. -----
  1610. - See matplotlib documentation online for more on this subject
  1611. - If `kind` = 'bar' or 'barh', you can specify relative alignments
  1612. for bar plot layout by `position` keyword.
  1613. From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center)
  1614. %(klass_note)s
  1615. """
  1616. @Appender(_shared_docs['plot'] % _shared_doc_df_kwargs)
  1617. def plot_frame(data, x=None, y=None, kind='line', ax=None,
  1618. subplots=False, sharex=None, sharey=False, layout=None,
  1619. figsize=None, use_index=True, title=None, grid=None,
  1620. legend=True, style=None, logx=False, logy=False, loglog=False,
  1621. xticks=None, yticks=None, xlim=None, ylim=None,
  1622. rot=None, fontsize=None, colormap=None, table=False,
  1623. yerr=None, xerr=None,
  1624. secondary_y=False, sort_columns=False,
  1625. **kwds):
  1626. return _plot(data, kind=kind, x=x, y=y, ax=ax,
  1627. subplots=subplots, sharex=sharex, sharey=sharey,
  1628. layout=layout, figsize=figsize, use_index=use_index,
  1629. title=title, grid=grid, legend=legend,
  1630. style=style, logx=logx, logy=logy, loglog=loglog,
  1631. xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim,
  1632. rot=rot, fontsize=fontsize, colormap=colormap, table=table,
  1633. yerr=yerr, xerr=xerr,
  1634. secondary_y=secondary_y, sort_columns=sort_columns,
  1635. **kwds)
  1636. @Appender(_shared_docs['plot'] % _shared_doc_series_kwargs)
  1637. def plot_series(data, kind='line', ax=None, # Series unique
  1638. figsize=None, use_index=True, title=None, grid=None,
  1639. legend=False, style=None, logx=False, logy=False, loglog=False,
  1640. xticks=None, yticks=None, xlim=None, ylim=None,
  1641. rot=None, fontsize=None, colormap=None, table=False,
  1642. yerr=None, xerr=None,
  1643. label=None, secondary_y=False, # Series unique
  1644. **kwds):
  1645. import matplotlib.pyplot as plt
  1646. if ax is None and len(plt.get_fignums()) > 0:
  1647. ax = _gca()
  1648. ax = MPLPlot._get_ax_layer(ax)
  1649. return _plot(data, kind=kind, ax=ax,
  1650. figsize=figsize, use_index=use_index, title=title,
  1651. grid=grid, legend=legend,
  1652. style=style, logx=logx, logy=logy, loglog=loglog,
  1653. xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim,
  1654. rot=rot, fontsize=fontsize, colormap=colormap, table=table,
  1655. yerr=yerr, xerr=xerr,
  1656. label=label, secondary_y=secondary_y,
  1657. **kwds)
  1658. _shared_docs['boxplot'] = """
  1659. Make a box plot from DataFrame columns.
  1660. Make a box-and-whisker plot from DataFrame columns, optionally grouped
  1661. by some other columns. A box plot is a method for graphically depicting
  1662. groups of numerical data through their quartiles.
  1663. The box extends from the Q1 to Q3 quartile values of the data,
  1664. with a line at the median (Q2). The whiskers extend from the edges
  1665. of box to show the range of the data. The position of the whiskers
  1666. is set by default to `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box.
  1667. Outlier points are those past the end of the whiskers.
  1668. For further details see
  1669. Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.
  1670. Parameters
  1671. ----------
  1672. column : str or list of str, optional
  1673. Column name or list of names, or vector.
  1674. Can be any valid input to :meth:`pandas.DataFrame.groupby`.
  1675. by : str or array-like, optional
  1676. Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.
  1677. One box-plot will be done per value of columns in `by`.
  1678. ax : object of class matplotlib.axes.Axes, optional
  1679. The matplotlib axes to be used by boxplot.
  1680. fontsize : float or str
  1681. Tick label font size in points or as a string (e.g., `large`).
  1682. rot : int or float, default 0
  1683. The rotation angle of labels (in degrees)
  1684. with respect to the screen coordinate system.
  1685. grid : boolean, default True
  1686. Setting this to True will show the grid.
  1687. figsize : A tuple (width, height) in inches
  1688. The size of the figure to create in matplotlib.
  1689. layout : tuple (rows, columns), optional
  1690. For example, (3, 5) will display the subplots
  1691. using 3 columns and 5 rows, starting from the top-left.
  1692. return_type : {'axes', 'dict', 'both'} or None, default 'axes'
  1693. The kind of object to return. The default is ``axes``.
  1694. * 'axes' returns the matplotlib axes the boxplot is drawn on.
  1695. * 'dict' returns a dictionary whose values are the matplotlib
  1696. Lines of the boxplot.
  1697. * 'both' returns a namedtuple with the axes and dict.
  1698. * when grouping with ``by``, a Series mapping columns to
  1699. ``return_type`` is returned.
  1700. If ``return_type`` is `None`, a NumPy array
  1701. of axes with the same shape as ``layout`` is returned.
  1702. **kwds
  1703. All other plotting keyword arguments to be passed to
  1704. :func:`matplotlib.pyplot.boxplot`.
  1705. Returns
  1706. -------
  1707. result :
  1708. The return type depends on the `return_type` parameter:
  1709. * 'axes' : object of class matplotlib.axes.Axes
  1710. * 'dict' : dict of matplotlib.lines.Line2D objects
  1711. * 'both' : a namedtuple with structure (ax, lines)
  1712. For data grouped with ``by``:
  1713. * :class:`~pandas.Series`
  1714. * :class:`~numpy.array` (for ``return_type = None``)
  1715. See Also
  1716. --------
  1717. Series.plot.hist: Make a histogram.
  1718. matplotlib.pyplot.boxplot : Matplotlib equivalent plot.
  1719. Notes
  1720. -----
  1721. Use ``return_type='dict'`` when you want to tweak the appearance
  1722. of the lines after plotting. In this case a dict containing the Lines
  1723. making up the boxes, caps, fliers, medians, and whiskers is returned.
  1724. Examples
  1725. --------
  1726. Boxplots can be created for every column in the dataframe
  1727. by ``df.boxplot()`` or indicating the columns to be used:
  1728. .. plot::
  1729. :context: close-figs
  1730. >>> np.random.seed(1234)
  1731. >>> df = pd.DataFrame(np.random.randn(10,4),
  1732. ... columns=['Col1', 'Col2', 'Col3', 'Col4'])
  1733. >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3'])
  1734. Boxplots of variables distributions grouped by the values of a third
  1735. variable can be created using the option ``by``. For instance:
  1736. .. plot::
  1737. :context: close-figs
  1738. >>> df = pd.DataFrame(np.random.randn(10, 2),
  1739. ... columns=['Col1', 'Col2'])
  1740. >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
  1741. ... 'B', 'B', 'B', 'B', 'B'])
  1742. >>> boxplot = df.boxplot(by='X')
  1743. A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot
  1744. in order to group the data by combination of the variables in the x-axis:
  1745. .. plot::
  1746. :context: close-figs
  1747. >>> df = pd.DataFrame(np.random.randn(10,3),
  1748. ... columns=['Col1', 'Col2', 'Col3'])
  1749. >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
  1750. ... 'B', 'B', 'B', 'B', 'B'])
  1751. >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',
  1752. ... 'B', 'A', 'B', 'A', 'B'])
  1753. >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])
  1754. The layout of boxplot can be adjusted giving a tuple to ``layout``:
  1755. .. plot::
  1756. :context: close-figs
  1757. >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
  1758. ... layout=(2, 1))
  1759. Additional formatting can be done to the boxplot, like suppressing the grid
  1760. (``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)
  1761. or changing the fontsize (i.e. ``fontsize=15``):
  1762. .. plot::
  1763. :context: close-figs
  1764. >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15)
  1765. The parameter ``return_type`` can be used to select the type of element
  1766. returned by `boxplot`. When ``return_type='axes'`` is selected,
  1767. the matplotlib axes on which the boxplot is drawn are returned:
  1768. >>> boxplot = df.boxplot(column=['Col1','Col2'], return_type='axes')
  1769. >>> type(boxplot)
  1770. <class 'matplotlib.axes._subplots.AxesSubplot'>
  1771. When grouping with ``by``, a Series mapping columns to ``return_type``
  1772. is returned:
  1773. >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
  1774. ... return_type='axes')
  1775. >>> type(boxplot)
  1776. <class 'pandas.core.series.Series'>
  1777. If ``return_type`` is `None`, a NumPy array of axes with the same shape
  1778. as ``layout`` is returned:
  1779. >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
  1780. ... return_type=None)
  1781. >>> type(boxplot)
  1782. <class 'numpy.ndarray'>
  1783. """
  1784. @Appender(_shared_docs['boxplot'] % _shared_doc_kwargs)
  1785. def boxplot(data, column=None, by=None, ax=None, fontsize=None,
  1786. rot=0, grid=True, figsize=None, layout=None, return_type=None,
  1787. **kwds):
  1788. # validate return_type:
  1789. if return_type not in BoxPlot._valid_return_types:
  1790. raise ValueError("return_type must be {'axes', 'dict', 'both'}")
  1791. if isinstance(data, ABCSeries):
  1792. data = data.to_frame('x')
  1793. column = 'x'
  1794. def _get_colors():
  1795. # num_colors=3 is required as method maybe_color_bp takes the colors
  1796. # in positions 0 and 2.
  1797. return _get_standard_colors(color=kwds.get('color'), num_colors=3)
  1798. def maybe_color_bp(bp):
  1799. if 'color' not in kwds:
  1800. from matplotlib.artist import setp
  1801. setp(bp['boxes'], color=colors[0], alpha=1)
  1802. setp(bp['whiskers'], color=colors[0], alpha=1)
  1803. setp(bp['medians'], color=colors[2], alpha=1)
  1804. def plot_group(keys, values, ax):
  1805. keys = [pprint_thing(x) for x in keys]
  1806. values = [np.asarray(remove_na_arraylike(v)) for v in values]
  1807. bp = ax.boxplot(values, **kwds)
  1808. if fontsize is not None:
  1809. ax.tick_params(axis='both', labelsize=fontsize)
  1810. if kwds.get('vert', 1):
  1811. ax.set_xticklabels(keys, rotation=rot)
  1812. else:
  1813. ax.set_yticklabels(keys, rotation=rot)
  1814. maybe_color_bp(bp)
  1815. # Return axes in multiplot case, maybe revisit later # 985
  1816. if return_type == 'dict':
  1817. return bp
  1818. elif return_type == 'both':
  1819. return BoxPlot.BP(ax=ax, lines=bp)
  1820. else:
  1821. return ax
  1822. colors = _get_colors()
  1823. if column is None:
  1824. columns = None
  1825. else:
  1826. if isinstance(column, (list, tuple)):
  1827. columns = column
  1828. else:
  1829. columns = [column]
  1830. if by is not None:
  1831. # Prefer array return type for 2-D plots to match the subplot layout
  1832. # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580
  1833. result = _grouped_plot_by_column(plot_group, data, columns=columns,
  1834. by=by, grid=grid, figsize=figsize,
  1835. ax=ax, layout=layout,
  1836. return_type=return_type)
  1837. else:
  1838. if return_type is None:
  1839. return_type = 'axes'
  1840. if layout is not None:
  1841. raise ValueError("The 'layout' keyword is not supported when "
  1842. "'by' is None")
  1843. if ax is None:
  1844. rc = {'figure.figsize': figsize} if figsize is not None else {}
  1845. ax = _gca(rc)
  1846. data = data._get_numeric_data()
  1847. if columns is None:
  1848. columns = data.columns
  1849. else:
  1850. data = data[columns]
  1851. result = plot_group(columns, data.values.T, ax)
  1852. ax.grid(grid)
  1853. return result
  1854. @Appender(_shared_docs['boxplot'] % _shared_doc_kwargs)
  1855. def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0,
  1856. grid=True, figsize=None, layout=None,
  1857. return_type=None, **kwds):
  1858. import matplotlib.pyplot as plt
  1859. _converter._WARN = False
  1860. ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize,
  1861. grid=grid, rot=rot, figsize=figsize, layout=layout,
  1862. return_type=return_type, **kwds)
  1863. plt.draw_if_interactive()
  1864. return ax
  1865. def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False,
  1866. **kwargs):
  1867. """
  1868. Make a scatter plot from two DataFrame columns
  1869. Parameters
  1870. ----------
  1871. data : DataFrame
  1872. x : Column name for the x-axis values
  1873. y : Column name for the y-axis values
  1874. ax : Matplotlib axis object
  1875. figsize : A tuple (width, height) in inches
  1876. grid : Setting this to True will show the grid
  1877. kwargs : other plotting keyword arguments
  1878. To be passed to scatter function
  1879. Returns
  1880. -------
  1881. fig : matplotlib.Figure
  1882. """
  1883. import matplotlib.pyplot as plt
  1884. kwargs.setdefault('edgecolors', 'none')
  1885. def plot_group(group, ax):
  1886. xvals = group[x].values
  1887. yvals = group[y].values
  1888. ax.scatter(xvals, yvals, **kwargs)
  1889. ax.grid(grid)
  1890. if by is not None:
  1891. fig = _grouped_plot(plot_group, data, by=by, figsize=figsize, ax=ax)
  1892. else:
  1893. if ax is None:
  1894. fig = plt.figure()
  1895. ax = fig.add_subplot(111)
  1896. else:
  1897. fig = ax.get_figure()
  1898. plot_group(data, ax)
  1899. ax.set_ylabel(pprint_thing(y))
  1900. ax.set_xlabel(pprint_thing(x))
  1901. ax.grid(grid)
  1902. return fig
  1903. def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None,
  1904. xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
  1905. sharey=False, figsize=None, layout=None, bins=10, **kwds):
  1906. """
  1907. Make a histogram of the DataFrame's.
  1908. A `histogram`_ is a representation of the distribution of data.
  1909. This function calls :meth:`matplotlib.pyplot.hist`, on each series in
  1910. the DataFrame, resulting in one histogram per column.
  1911. .. _histogram: https://en.wikipedia.org/wiki/Histogram
  1912. Parameters
  1913. ----------
  1914. data : DataFrame
  1915. The pandas object holding the data.
  1916. column : string or sequence
  1917. If passed, will be used to limit data to a subset of columns.
  1918. by : object, optional
  1919. If passed, then used to form histograms for separate groups.
  1920. grid : boolean, default True
  1921. Whether to show axis grid lines.
  1922. xlabelsize : int, default None
  1923. If specified changes the x-axis label size.
  1924. xrot : float, default None
  1925. Rotation of x axis labels. For example, a value of 90 displays the
  1926. x labels rotated 90 degrees clockwise.
  1927. ylabelsize : int, default None
  1928. If specified changes the y-axis label size.
  1929. yrot : float, default None
  1930. Rotation of y axis labels. For example, a value of 90 displays the
  1931. y labels rotated 90 degrees clockwise.
  1932. ax : Matplotlib axes object, default None
  1933. The axes to plot the histogram on.
  1934. sharex : boolean, default True if ax is None else False
  1935. In case subplots=True, share x axis and set some x axis labels to
  1936. invisible; defaults to True if ax is None otherwise False if an ax
  1937. is passed in.
  1938. Note that passing in both an ax and sharex=True will alter all x axis
  1939. labels for all subplots in a figure.
  1940. sharey : boolean, default False
  1941. In case subplots=True, share y axis and set some y axis labels to
  1942. invisible.
  1943. figsize : tuple
  1944. The size in inches of the figure to create. Uses the value in
  1945. `matplotlib.rcParams` by default.
  1946. layout : tuple, optional
  1947. Tuple of (rows, columns) for the layout of the histograms.
  1948. bins : integer or sequence, default 10
  1949. Number of histogram bins to be used. If an integer is given, bins + 1
  1950. bin edges are calculated and returned. If bins is a sequence, gives
  1951. bin edges, including left edge of first bin and right edge of last
  1952. bin. In this case, bins is returned unmodified.
  1953. **kwds
  1954. All other plotting keyword arguments to be passed to
  1955. :meth:`matplotlib.pyplot.hist`.
  1956. Returns
  1957. -------
  1958. axes : matplotlib.AxesSubplot or numpy.ndarray of them
  1959. See Also
  1960. --------
  1961. matplotlib.pyplot.hist : Plot a histogram using matplotlib.
  1962. Examples
  1963. --------
  1964. .. plot::
  1965. :context: close-figs
  1966. This example draws a histogram based on the length and width of
  1967. some animals, displayed in three bins
  1968. >>> df = pd.DataFrame({
  1969. ... 'length': [1.5, 0.5, 1.2, 0.9, 3],
  1970. ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]
  1971. ... }, index= ['pig', 'rabbit', 'duck', 'chicken', 'horse'])
  1972. >>> hist = df.hist(bins=3)
  1973. """
  1974. _raise_if_no_mpl()
  1975. _converter._WARN = False
  1976. if by is not None:
  1977. axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid,
  1978. figsize=figsize, sharex=sharex, sharey=sharey,
  1979. layout=layout, bins=bins, xlabelsize=xlabelsize,
  1980. xrot=xrot, ylabelsize=ylabelsize,
  1981. yrot=yrot, **kwds)
  1982. return axes
  1983. if column is not None:
  1984. if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
  1985. column = [column]
  1986. data = data[column]
  1987. data = data._get_numeric_data()
  1988. naxes = len(data.columns)
  1989. fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False,
  1990. sharex=sharex, sharey=sharey, figsize=figsize,
  1991. layout=layout)
  1992. _axes = _flatten(axes)
  1993. for i, col in enumerate(com.try_sort(data.columns)):
  1994. ax = _axes[i]
  1995. ax.hist(data[col].dropna().values, bins=bins, **kwds)
  1996. ax.set_title(col)
  1997. ax.grid(grid)
  1998. _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
  1999. ylabelsize=ylabelsize, yrot=yrot)
  2000. fig.subplots_adjust(wspace=0.3, hspace=0.3)
  2001. return axes
  2002. def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None,
  2003. xrot=None, ylabelsize=None, yrot=None, figsize=None,
  2004. bins=10, **kwds):
  2005. """
  2006. Draw histogram of the input series using matplotlib.
  2007. Parameters
  2008. ----------
  2009. by : object, optional
  2010. If passed, then used to form histograms for separate groups
  2011. ax : matplotlib axis object
  2012. If not passed, uses gca()
  2013. grid : boolean, default True
  2014. Whether to show axis grid lines
  2015. xlabelsize : int, default None
  2016. If specified changes the x-axis label size
  2017. xrot : float, default None
  2018. rotation of x axis labels
  2019. ylabelsize : int, default None
  2020. If specified changes the y-axis label size
  2021. yrot : float, default None
  2022. rotation of y axis labels
  2023. figsize : tuple, default None
  2024. figure size in inches by default
  2025. bins : integer or sequence, default 10
  2026. Number of histogram bins to be used. If an integer is given, bins + 1
  2027. bin edges are calculated and returned. If bins is a sequence, gives
  2028. bin edges, including left edge of first bin and right edge of last
  2029. bin. In this case, bins is returned unmodified.
  2030. bins : integer, default 10
  2031. Number of histogram bins to be used
  2032. `**kwds` : keywords
  2033. To be passed to the actual plotting function
  2034. See Also
  2035. --------
  2036. matplotlib.axes.Axes.hist : Plot a histogram using matplotlib.
  2037. """
  2038. import matplotlib.pyplot as plt
  2039. if by is None:
  2040. if kwds.get('layout', None) is not None:
  2041. raise ValueError("The 'layout' keyword is not supported when "
  2042. "'by' is None")
  2043. # hack until the plotting interface is a bit more unified
  2044. fig = kwds.pop('figure', plt.gcf() if plt.get_fignums() else
  2045. plt.figure(figsize=figsize))
  2046. if (figsize is not None and tuple(figsize) !=
  2047. tuple(fig.get_size_inches())):
  2048. fig.set_size_inches(*figsize, forward=True)
  2049. if ax is None:
  2050. ax = fig.gca()
  2051. elif ax.get_figure() != fig:
  2052. raise AssertionError('passed axis not bound to passed figure')
  2053. values = self.dropna().values
  2054. ax.hist(values, bins=bins, **kwds)
  2055. ax.grid(grid)
  2056. axes = np.array([ax])
  2057. _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
  2058. ylabelsize=ylabelsize, yrot=yrot)
  2059. else:
  2060. if 'figure' in kwds:
  2061. raise ValueError("Cannot pass 'figure' when using the "
  2062. "'by' argument, since a new 'Figure' instance "
  2063. "will be created")
  2064. axes = grouped_hist(self, by=by, ax=ax, grid=grid, figsize=figsize,
  2065. bins=bins, xlabelsize=xlabelsize, xrot=xrot,
  2066. ylabelsize=ylabelsize, yrot=yrot, **kwds)
  2067. if hasattr(axes, 'ndim'):
  2068. if axes.ndim == 1 and len(axes) == 1:
  2069. return axes[0]
  2070. return axes
  2071. def grouped_hist(data, column=None, by=None, ax=None, bins=50, figsize=None,
  2072. layout=None, sharex=False, sharey=False, rot=90, grid=True,
  2073. xlabelsize=None, xrot=None, ylabelsize=None, yrot=None,
  2074. **kwargs):
  2075. """
  2076. Grouped histogram
  2077. Parameters
  2078. ----------
  2079. data : Series/DataFrame
  2080. column : object, optional
  2081. by : object, optional
  2082. ax : axes, optional
  2083. bins : int, default 50
  2084. figsize : tuple, optional
  2085. layout : optional
  2086. sharex : boolean, default False
  2087. sharey : boolean, default False
  2088. rot : int, default 90
  2089. grid : bool, default True
  2090. kwargs : dict, keyword arguments passed to matplotlib.Axes.hist
  2091. Returns
  2092. -------
  2093. axes : collection of Matplotlib Axes
  2094. """
  2095. _raise_if_no_mpl()
  2096. _converter._WARN = False
  2097. def plot_group(group, ax):
  2098. ax.hist(group.dropna().values, bins=bins, **kwargs)
  2099. xrot = xrot or rot
  2100. fig, axes = _grouped_plot(plot_group, data, column=column,
  2101. by=by, sharex=sharex, sharey=sharey, ax=ax,
  2102. figsize=figsize, layout=layout, rot=rot)
  2103. _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
  2104. ylabelsize=ylabelsize, yrot=yrot)
  2105. fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9,
  2106. hspace=0.5, wspace=0.3)
  2107. return axes
  2108. def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
  2109. rot=0, grid=True, ax=None, figsize=None,
  2110. layout=None, sharex=False, sharey=True, **kwds):
  2111. """
  2112. Make box plots from DataFrameGroupBy data.
  2113. Parameters
  2114. ----------
  2115. grouped : Grouped DataFrame
  2116. subplots :
  2117. * ``False`` - no subplots will be used
  2118. * ``True`` - create a subplot for each group
  2119. column : column name or list of names, or vector
  2120. Can be any valid input to groupby
  2121. fontsize : int or string
  2122. rot : label rotation angle
  2123. grid : Setting this to True will show the grid
  2124. ax : Matplotlib axis object, default None
  2125. figsize : A tuple (width, height) in inches
  2126. layout : tuple (optional)
  2127. (rows, columns) for the layout of the plot
  2128. sharex : bool, default False
  2129. Whether x-axes will be shared among subplots
  2130. .. versionadded:: 0.23.1
  2131. sharey : bool, default True
  2132. Whether y-axes will be shared among subplots
  2133. .. versionadded:: 0.23.1
  2134. `**kwds` : Keyword Arguments
  2135. All other plotting keyword arguments to be passed to
  2136. matplotlib's boxplot function
  2137. Returns
  2138. -------
  2139. dict of key/value = group key/DataFrame.boxplot return value
  2140. or DataFrame.boxplot return value in case subplots=figures=False
  2141. Examples
  2142. --------
  2143. >>> import itertools
  2144. >>> tuples = [t for t in itertools.product(range(1000), range(4))]
  2145. >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
  2146. >>> data = np.random.randn(len(index),4)
  2147. >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
  2148. >>>
  2149. >>> grouped = df.groupby(level='lvl1')
  2150. >>> boxplot_frame_groupby(grouped)
  2151. >>>
  2152. >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1)
  2153. >>> boxplot_frame_groupby(grouped, subplots=False)
  2154. """
  2155. _raise_if_no_mpl()
  2156. _converter._WARN = False
  2157. if subplots is True:
  2158. naxes = len(grouped)
  2159. fig, axes = _subplots(naxes=naxes, squeeze=False,
  2160. ax=ax, sharex=sharex, sharey=sharey,
  2161. figsize=figsize, layout=layout)
  2162. axes = _flatten(axes)
  2163. from pandas.core.series import Series
  2164. ret = Series()
  2165. for (key, group), ax in zip(grouped, axes):
  2166. d = group.boxplot(ax=ax, column=column, fontsize=fontsize,
  2167. rot=rot, grid=grid, **kwds)
  2168. ax.set_title(pprint_thing(key))
  2169. ret.loc[key] = d
  2170. fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1,
  2171. right=0.9, wspace=0.2)
  2172. else:
  2173. from pandas.core.reshape.concat import concat
  2174. keys, frames = zip(*grouped)
  2175. if grouped.axis == 0:
  2176. df = concat(frames, keys=keys, axis=1)
  2177. else:
  2178. if len(frames) > 1:
  2179. df = frames[0].join(frames[1::])
  2180. else:
  2181. df = frames[0]
  2182. ret = df.boxplot(column=column, fontsize=fontsize, rot=rot,
  2183. grid=grid, ax=ax, figsize=figsize,
  2184. layout=layout, **kwds)
  2185. return ret
  2186. def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
  2187. figsize=None, sharex=True, sharey=True, layout=None,
  2188. rot=0, ax=None, **kwargs):
  2189. if figsize == 'default':
  2190. # allowed to specify mpl default with 'default'
  2191. warnings.warn("figsize='default' is deprecated. Specify figure"
  2192. "size by tuple instead", FutureWarning, stacklevel=4)
  2193. figsize = None
  2194. grouped = data.groupby(by)
  2195. if column is not None:
  2196. grouped = grouped[column]
  2197. naxes = len(grouped)
  2198. fig, axes = _subplots(naxes=naxes, figsize=figsize,
  2199. sharex=sharex, sharey=sharey, ax=ax,
  2200. layout=layout)
  2201. _axes = _flatten(axes)
  2202. for i, (key, group) in enumerate(grouped):
  2203. ax = _axes[i]
  2204. if numeric_only and isinstance(group, ABCDataFrame):
  2205. group = group._get_numeric_data()
  2206. plotf(group, ax, **kwargs)
  2207. ax.set_title(pprint_thing(key))
  2208. return fig, axes
  2209. def _grouped_plot_by_column(plotf, data, columns=None, by=None,
  2210. numeric_only=True, grid=False,
  2211. figsize=None, ax=None, layout=None,
  2212. return_type=None, **kwargs):
  2213. grouped = data.groupby(by)
  2214. if columns is None:
  2215. if not isinstance(by, (list, tuple)):
  2216. by = [by]
  2217. columns = data._get_numeric_data().columns.difference(by)
  2218. naxes = len(columns)
  2219. fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True,
  2220. figsize=figsize, ax=ax, layout=layout)
  2221. _axes = _flatten(axes)
  2222. ax_values = []
  2223. for i, col in enumerate(columns):
  2224. ax = _axes[i]
  2225. gp_col = grouped[col]
  2226. keys, values = zip(*gp_col)
  2227. re_plotf = plotf(keys, values, ax, **kwargs)
  2228. ax.set_title(col)
  2229. ax.set_xlabel(pprint_thing(by))
  2230. ax_values.append(re_plotf)
  2231. ax.grid(grid)
  2232. from pandas.core.series import Series
  2233. result = Series(ax_values, index=columns)
  2234. # Return axes in multiplot case, maybe revisit later # 985
  2235. if return_type is None:
  2236. result = axes
  2237. byline = by[0] if len(by) == 1 else by
  2238. fig.suptitle('Boxplot grouped by {byline}'.format(byline=byline))
  2239. fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
  2240. return result
  2241. class BasePlotMethods(PandasObject):
  2242. def __init__(self, data):
  2243. self._parent = data # can be Series or DataFrame
  2244. def __call__(self, *args, **kwargs):
  2245. raise NotImplementedError
  2246. class SeriesPlotMethods(BasePlotMethods):
  2247. """
  2248. Series plotting accessor and method.
  2249. Examples
  2250. --------
  2251. >>> s.plot.line()
  2252. >>> s.plot.bar()
  2253. >>> s.plot.hist()
  2254. Plotting methods can also be accessed by calling the accessor as a method
  2255. with the ``kind`` argument:
  2256. ``s.plot(kind='line')`` is equivalent to ``s.plot.line()``
  2257. """
  2258. def __call__(self, kind='line', ax=None,
  2259. figsize=None, use_index=True, title=None, grid=None,
  2260. legend=False, style=None, logx=False, logy=False,
  2261. loglog=False, xticks=None, yticks=None,
  2262. xlim=None, ylim=None,
  2263. rot=None, fontsize=None, colormap=None, table=False,
  2264. yerr=None, xerr=None,
  2265. label=None, secondary_y=False, **kwds):
  2266. return plot_series(self._parent, kind=kind, ax=ax, figsize=figsize,
  2267. use_index=use_index, title=title, grid=grid,
  2268. legend=legend, style=style, logx=logx, logy=logy,
  2269. loglog=loglog, xticks=xticks, yticks=yticks,
  2270. xlim=xlim, ylim=ylim, rot=rot, fontsize=fontsize,
  2271. colormap=colormap, table=table, yerr=yerr,
  2272. xerr=xerr, label=label, secondary_y=secondary_y,
  2273. **kwds)
  2274. __call__.__doc__ = plot_series.__doc__
  2275. def line(self, **kwds):
  2276. """
  2277. Line plot.
  2278. Parameters
  2279. ----------
  2280. `**kwds` : optional
  2281. Additional keyword arguments are documented in
  2282. :meth:`pandas.Series.plot`.
  2283. Returns
  2284. -------
  2285. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  2286. Examples
  2287. --------
  2288. .. plot::
  2289. :context: close-figs
  2290. >>> s = pd.Series([1, 3, 2])
  2291. >>> s.plot.line()
  2292. """
  2293. return self(kind='line', **kwds)
  2294. def bar(self, **kwds):
  2295. """
  2296. Vertical bar plot.
  2297. Parameters
  2298. ----------
  2299. `**kwds` : optional
  2300. Additional keyword arguments are documented in
  2301. :meth:`pandas.Series.plot`.
  2302. Returns
  2303. -------
  2304. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  2305. """
  2306. return self(kind='bar', **kwds)
  2307. def barh(self, **kwds):
  2308. """
  2309. Horizontal bar plot.
  2310. Parameters
  2311. ----------
  2312. `**kwds` : optional
  2313. Additional keyword arguments are documented in
  2314. :meth:`pandas.Series.plot`.
  2315. Returns
  2316. -------
  2317. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  2318. """
  2319. return self(kind='barh', **kwds)
  2320. def box(self, **kwds):
  2321. """
  2322. Boxplot.
  2323. Parameters
  2324. ----------
  2325. `**kwds` : optional
  2326. Additional keyword arguments are documented in
  2327. :meth:`pandas.Series.plot`.
  2328. Returns
  2329. -------
  2330. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  2331. """
  2332. return self(kind='box', **kwds)
  2333. def hist(self, bins=10, **kwds):
  2334. """
  2335. Histogram.
  2336. Parameters
  2337. ----------
  2338. bins : integer, default 10
  2339. Number of histogram bins to be used
  2340. `**kwds` : optional
  2341. Additional keyword arguments are documented in
  2342. :meth:`pandas.Series.plot`.
  2343. Returns
  2344. -------
  2345. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  2346. """
  2347. return self(kind='hist', bins=bins, **kwds)
  2348. @Appender(_kde_docstring % {
  2349. 'this-datatype': 'Series',
  2350. 'sibling-datatype': 'DataFrame',
  2351. 'examples': """
  2352. Given a Series of points randomly sampled from an unknown
  2353. distribution, estimate its PDF using KDE with automatic
  2354. bandwidth determination and plot the results, evaluating them at
  2355. 1000 equally spaced points (default):
  2356. .. plot::
  2357. :context: close-figs
  2358. >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
  2359. >>> ax = s.plot.kde()
  2360. A scalar bandwidth can be specified. Using a small bandwidth value can
  2361. lead to over-fitting, while using a large bandwidth value may result
  2362. in under-fitting:
  2363. .. plot::
  2364. :context: close-figs
  2365. >>> ax = s.plot.kde(bw_method=0.3)
  2366. .. plot::
  2367. :context: close-figs
  2368. >>> ax = s.plot.kde(bw_method=3)
  2369. Finally, the `ind` parameter determines the evaluation points for the
  2370. plot of the estimated PDF:
  2371. .. plot::
  2372. :context: close-figs
  2373. >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
  2374. """.strip()
  2375. })
  2376. def kde(self, bw_method=None, ind=None, **kwds):
  2377. return self(kind='kde', bw_method=bw_method, ind=ind, **kwds)
  2378. density = kde
  2379. def area(self, **kwds):
  2380. """
  2381. Area plot.
  2382. Parameters
  2383. ----------
  2384. `**kwds` : optional
  2385. Additional keyword arguments are documented in
  2386. :meth:`pandas.Series.plot`.
  2387. Returns
  2388. -------
  2389. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  2390. """
  2391. return self(kind='area', **kwds)
  2392. def pie(self, **kwds):
  2393. """
  2394. Pie chart.
  2395. Parameters
  2396. ----------
  2397. `**kwds` : optional
  2398. Additional keyword arguments are documented in
  2399. :meth:`pandas.Series.plot`.
  2400. Returns
  2401. -------
  2402. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  2403. """
  2404. return self(kind='pie', **kwds)
  2405. class FramePlotMethods(BasePlotMethods):
  2406. """DataFrame plotting accessor and method
  2407. Examples
  2408. --------
  2409. >>> df.plot.line()
  2410. >>> df.plot.scatter('x', 'y')
  2411. >>> df.plot.hexbin()
  2412. These plotting methods can also be accessed by calling the accessor as a
  2413. method with the ``kind`` argument:
  2414. ``df.plot(kind='line')`` is equivalent to ``df.plot.line()``
  2415. """
  2416. def __call__(self, x=None, y=None, kind='line', ax=None,
  2417. subplots=False, sharex=None, sharey=False, layout=None,
  2418. figsize=None, use_index=True, title=None, grid=None,
  2419. legend=True, style=None, logx=False, logy=False, loglog=False,
  2420. xticks=None, yticks=None, xlim=None, ylim=None,
  2421. rot=None, fontsize=None, colormap=None, table=False,
  2422. yerr=None, xerr=None,
  2423. secondary_y=False, sort_columns=False, **kwds):
  2424. return plot_frame(self._parent, kind=kind, x=x, y=y, ax=ax,
  2425. subplots=subplots, sharex=sharex, sharey=sharey,
  2426. layout=layout, figsize=figsize, use_index=use_index,
  2427. title=title, grid=grid, legend=legend, style=style,
  2428. logx=logx, logy=logy, loglog=loglog, xticks=xticks,
  2429. yticks=yticks, xlim=xlim, ylim=ylim, rot=rot,
  2430. fontsize=fontsize, colormap=colormap, table=table,
  2431. yerr=yerr, xerr=xerr, secondary_y=secondary_y,
  2432. sort_columns=sort_columns, **kwds)
  2433. __call__.__doc__ = plot_frame.__doc__
  2434. def line(self, x=None, y=None, **kwds):
  2435. """
  2436. Plot DataFrame columns as lines.
  2437. This function is useful to plot lines using DataFrame's values
  2438. as coordinates.
  2439. Parameters
  2440. ----------
  2441. x : int or str, optional
  2442. Columns to use for the horizontal axis.
  2443. Either the location or the label of the columns to be used.
  2444. By default, it will use the DataFrame indices.
  2445. y : int, str, or list of them, optional
  2446. The values to be plotted.
  2447. Either the location or the label of the columns to be used.
  2448. By default, it will use the remaining DataFrame numeric columns.
  2449. **kwds
  2450. Keyword arguments to pass on to :meth:`pandas.DataFrame.plot`.
  2451. Returns
  2452. -------
  2453. axes : :class:`matplotlib.axes.Axes` or :class:`numpy.ndarray`
  2454. Returns an ndarray when ``subplots=True``.
  2455. See Also
  2456. --------
  2457. matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.
  2458. Examples
  2459. --------
  2460. .. plot::
  2461. :context: close-figs
  2462. The following example shows the populations for some animals
  2463. over the years.
  2464. >>> df = pd.DataFrame({
  2465. ... 'pig': [20, 18, 489, 675, 1776],
  2466. ... 'horse': [4, 25, 281, 600, 1900]
  2467. ... }, index=[1990, 1997, 2003, 2009, 2014])
  2468. >>> lines = df.plot.line()
  2469. .. plot::
  2470. :context: close-figs
  2471. An example with subplots, so an array of axes is returned.
  2472. >>> axes = df.plot.line(subplots=True)
  2473. >>> type(axes)
  2474. <class 'numpy.ndarray'>
  2475. .. plot::
  2476. :context: close-figs
  2477. The following example shows the relationship between both
  2478. populations.
  2479. >>> lines = df.plot.line(x='pig', y='horse')
  2480. """
  2481. return self(kind='line', x=x, y=y, **kwds)
  2482. def bar(self, x=None, y=None, **kwds):
  2483. """
  2484. Vertical bar plot.
  2485. A bar plot is a plot that presents categorical data with
  2486. rectangular bars with lengths proportional to the values that they
  2487. represent. A bar plot shows comparisons among discrete categories. One
  2488. axis of the plot shows the specific categories being compared, and the
  2489. other axis represents a measured value.
  2490. Parameters
  2491. ----------
  2492. x : label or position, optional
  2493. Allows plotting of one column versus another. If not specified,
  2494. the index of the DataFrame is used.
  2495. y : label or position, optional
  2496. Allows plotting of one column versus another. If not specified,
  2497. all numerical columns are used.
  2498. **kwds
  2499. Additional keyword arguments are documented in
  2500. :meth:`pandas.DataFrame.plot`.
  2501. Returns
  2502. -------
  2503. axes : matplotlib.axes.Axes or np.ndarray of them
  2504. An ndarray is returned with one :class:`matplotlib.axes.Axes`
  2505. per column when ``subplots=True``.
  2506. See Also
  2507. --------
  2508. pandas.DataFrame.plot.barh : Horizontal bar plot.
  2509. pandas.DataFrame.plot : Make plots of a DataFrame.
  2510. matplotlib.pyplot.bar : Make a bar plot with matplotlib.
  2511. Examples
  2512. --------
  2513. Basic plot.
  2514. .. plot::
  2515. :context: close-figs
  2516. >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
  2517. >>> ax = df.plot.bar(x='lab', y='val', rot=0)
  2518. Plot a whole dataframe to a bar plot. Each column is assigned a
  2519. distinct color, and each row is nested in a group along the
  2520. horizontal axis.
  2521. .. plot::
  2522. :context: close-figs
  2523. >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
  2524. >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
  2525. >>> index = ['snail', 'pig', 'elephant',
  2526. ... 'rabbit', 'giraffe', 'coyote', 'horse']
  2527. >>> df = pd.DataFrame({'speed': speed,
  2528. ... 'lifespan': lifespan}, index=index)
  2529. >>> ax = df.plot.bar(rot=0)
  2530. Instead of nesting, the figure can be split by column with
  2531. ``subplots=True``. In this case, a :class:`numpy.ndarray` of
  2532. :class:`matplotlib.axes.Axes` are returned.
  2533. .. plot::
  2534. :context: close-figs
  2535. >>> axes = df.plot.bar(rot=0, subplots=True)
  2536. >>> axes[1].legend(loc=2) # doctest: +SKIP
  2537. Plot a single column.
  2538. .. plot::
  2539. :context: close-figs
  2540. >>> ax = df.plot.bar(y='speed', rot=0)
  2541. Plot only selected categories for the DataFrame.
  2542. .. plot::
  2543. :context: close-figs
  2544. >>> ax = df.plot.bar(x='lifespan', rot=0)
  2545. """
  2546. return self(kind='bar', x=x, y=y, **kwds)
  2547. def barh(self, x=None, y=None, **kwds):
  2548. """
  2549. Make a horizontal bar plot.
  2550. A horizontal bar plot is a plot that presents quantitative data with
  2551. rectangular bars with lengths proportional to the values that they
  2552. represent. A bar plot shows comparisons among discrete categories. One
  2553. axis of the plot shows the specific categories being compared, and the
  2554. other axis represents a measured value.
  2555. Parameters
  2556. ----------
  2557. x : label or position, default DataFrame.index
  2558. Column to be used for categories.
  2559. y : label or position, default All numeric columns in dataframe
  2560. Columns to be plotted from the DataFrame.
  2561. **kwds
  2562. Keyword arguments to pass on to :meth:`pandas.DataFrame.plot`.
  2563. Returns
  2564. -------
  2565. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them.
  2566. See Also
  2567. --------
  2568. pandas.DataFrame.plot.bar: Vertical bar plot.
  2569. pandas.DataFrame.plot : Make plots of DataFrame using matplotlib.
  2570. matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.
  2571. Examples
  2572. --------
  2573. Basic example
  2574. .. plot::
  2575. :context: close-figs
  2576. >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
  2577. >>> ax = df.plot.barh(x='lab', y='val')
  2578. Plot a whole DataFrame to a horizontal bar plot
  2579. .. plot::
  2580. :context: close-figs
  2581. >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
  2582. >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
  2583. >>> index = ['snail', 'pig', 'elephant',
  2584. ... 'rabbit', 'giraffe', 'coyote', 'horse']
  2585. >>> df = pd.DataFrame({'speed': speed,
  2586. ... 'lifespan': lifespan}, index=index)
  2587. >>> ax = df.plot.barh()
  2588. Plot a column of the DataFrame to a horizontal bar plot
  2589. .. plot::
  2590. :context: close-figs
  2591. >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
  2592. >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
  2593. >>> index = ['snail', 'pig', 'elephant',
  2594. ... 'rabbit', 'giraffe', 'coyote', 'horse']
  2595. >>> df = pd.DataFrame({'speed': speed,
  2596. ... 'lifespan': lifespan}, index=index)
  2597. >>> ax = df.plot.barh(y='speed')
  2598. Plot DataFrame versus the desired column
  2599. .. plot::
  2600. :context: close-figs
  2601. >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
  2602. >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
  2603. >>> index = ['snail', 'pig', 'elephant',
  2604. ... 'rabbit', 'giraffe', 'coyote', 'horse']
  2605. >>> df = pd.DataFrame({'speed': speed,
  2606. ... 'lifespan': lifespan}, index=index)
  2607. >>> ax = df.plot.barh(x='lifespan')
  2608. """
  2609. return self(kind='barh', x=x, y=y, **kwds)
  2610. def box(self, by=None, **kwds):
  2611. r"""
  2612. Make a box plot of the DataFrame columns.
  2613. A box plot is a method for graphically depicting groups of numerical
  2614. data through their quartiles.
  2615. The box extends from the Q1 to Q3 quartile values of the data,
  2616. with a line at the median (Q2). The whiskers extend from the edges
  2617. of box to show the range of the data. The position of the whiskers
  2618. is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the
  2619. box. Outlier points are those past the end of the whiskers.
  2620. For further details see Wikipedia's
  2621. entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__.
  2622. A consideration when using this chart is that the box and the whiskers
  2623. can overlap, which is very common when plotting small sets of data.
  2624. Parameters
  2625. ----------
  2626. by : string or sequence
  2627. Column in the DataFrame to group by.
  2628. **kwds : optional
  2629. Additional keywords are documented in
  2630. :meth:`pandas.DataFrame.plot`.
  2631. Returns
  2632. -------
  2633. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  2634. See Also
  2635. --------
  2636. pandas.DataFrame.boxplot: Another method to draw a box plot.
  2637. pandas.Series.plot.box: Draw a box plot from a Series object.
  2638. matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.
  2639. Examples
  2640. --------
  2641. Draw a box plot from a DataFrame with four columns of randomly
  2642. generated data.
  2643. .. plot::
  2644. :context: close-figs
  2645. >>> data = np.random.randn(25, 4)
  2646. >>> df = pd.DataFrame(data, columns=list('ABCD'))
  2647. >>> ax = df.plot.box()
  2648. """
  2649. return self(kind='box', by=by, **kwds)
  2650. def hist(self, by=None, bins=10, **kwds):
  2651. """
  2652. Draw one histogram of the DataFrame's columns.
  2653. A histogram is a representation of the distribution of data.
  2654. This function groups the values of all given Series in the DataFrame
  2655. into bins and draws all bins in one :class:`matplotlib.axes.Axes`.
  2656. This is useful when the DataFrame's Series are in a similar scale.
  2657. Parameters
  2658. ----------
  2659. by : str or sequence, optional
  2660. Column in the DataFrame to group by.
  2661. bins : int, default 10
  2662. Number of histogram bins to be used.
  2663. **kwds
  2664. Additional keyword arguments are documented in
  2665. :meth:`pandas.DataFrame.plot`.
  2666. Returns
  2667. -------
  2668. axes : matplotlib.AxesSubplot histogram.
  2669. See Also
  2670. --------
  2671. DataFrame.hist : Draw histograms per DataFrame's Series.
  2672. Series.hist : Draw a histogram with Series' data.
  2673. Examples
  2674. --------
  2675. When we draw a dice 6000 times, we expect to get each value around 1000
  2676. times. But when we draw two dices and sum the result, the distribution
  2677. is going to be quite different. A histogram illustrates those
  2678. distributions.
  2679. .. plot::
  2680. :context: close-figs
  2681. >>> df = pd.DataFrame(
  2682. ... np.random.randint(1, 7, 6000),
  2683. ... columns = ['one'])
  2684. >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
  2685. >>> ax = df.plot.hist(bins=12, alpha=0.5)
  2686. """
  2687. return self(kind='hist', by=by, bins=bins, **kwds)
  2688. @Appender(_kde_docstring % {
  2689. 'this-datatype': 'DataFrame',
  2690. 'sibling-datatype': 'Series',
  2691. 'examples': """
  2692. Given several Series of points randomly sampled from unknown
  2693. distributions, estimate their PDFs using KDE with automatic
  2694. bandwidth determination and plot the results, evaluating them at
  2695. 1000 equally spaced points (default):
  2696. .. plot::
  2697. :context: close-figs
  2698. >>> df = pd.DataFrame({
  2699. ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
  2700. ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
  2701. ... })
  2702. >>> ax = df.plot.kde()
  2703. A scalar bandwidth can be specified. Using a small bandwidth value can
  2704. lead to over-fitting, while using a large bandwidth value may result
  2705. in under-fitting:
  2706. .. plot::
  2707. :context: close-figs
  2708. >>> ax = df.plot.kde(bw_method=0.3)
  2709. .. plot::
  2710. :context: close-figs
  2711. >>> ax = df.plot.kde(bw_method=3)
  2712. Finally, the `ind` parameter determines the evaluation points for the
  2713. plot of the estimated PDF:
  2714. .. plot::
  2715. :context: close-figs
  2716. >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
  2717. """.strip()
  2718. })
  2719. def kde(self, bw_method=None, ind=None, **kwds):
  2720. return self(kind='kde', bw_method=bw_method, ind=ind, **kwds)
  2721. density = kde
  2722. def area(self, x=None, y=None, **kwds):
  2723. """
  2724. Draw a stacked area plot.
  2725. An area plot displays quantitative data visually.
  2726. This function wraps the matplotlib area function.
  2727. Parameters
  2728. ----------
  2729. x : label or position, optional
  2730. Coordinates for the X axis. By default uses the index.
  2731. y : label or position, optional
  2732. Column to plot. By default uses all columns.
  2733. stacked : bool, default True
  2734. Area plots are stacked by default. Set to False to create a
  2735. unstacked plot.
  2736. **kwds : optional
  2737. Additional keyword arguments are documented in
  2738. :meth:`pandas.DataFrame.plot`.
  2739. Returns
  2740. -------
  2741. matplotlib.axes.Axes or numpy.ndarray
  2742. Area plot, or array of area plots if subplots is True
  2743. See Also
  2744. --------
  2745. DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.
  2746. Examples
  2747. --------
  2748. Draw an area plot based on basic business metrics:
  2749. .. plot::
  2750. :context: close-figs
  2751. >>> df = pd.DataFrame({
  2752. ... 'sales': [3, 2, 3, 9, 10, 6],
  2753. ... 'signups': [5, 5, 6, 12, 14, 13],
  2754. ... 'visits': [20, 42, 28, 62, 81, 50],
  2755. ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',
  2756. ... freq='M'))
  2757. >>> ax = df.plot.area()
  2758. Area plots are stacked by default. To produce an unstacked plot,
  2759. pass ``stacked=False``:
  2760. .. plot::
  2761. :context: close-figs
  2762. >>> ax = df.plot.area(stacked=False)
  2763. Draw an area plot for a single column:
  2764. .. plot::
  2765. :context: close-figs
  2766. >>> ax = df.plot.area(y='sales')
  2767. Draw with a different `x`:
  2768. .. plot::
  2769. :context: close-figs
  2770. >>> df = pd.DataFrame({
  2771. ... 'sales': [3, 2, 3],
  2772. ... 'visits': [20, 42, 28],
  2773. ... 'day': [1, 2, 3],
  2774. ... })
  2775. >>> ax = df.plot.area(x='day')
  2776. """
  2777. return self(kind='area', x=x, y=y, **kwds)
  2778. def pie(self, y=None, **kwds):
  2779. """
  2780. Generate a pie plot.
  2781. A pie plot is a proportional representation of the numerical data in a
  2782. column. This function wraps :meth:`matplotlib.pyplot.pie` for the
  2783. specified column. If no column reference is passed and
  2784. ``subplots=True`` a pie plot is drawn for each numerical column
  2785. independently.
  2786. Parameters
  2787. ----------
  2788. y : int or label, optional
  2789. Label or position of the column to plot.
  2790. If not provided, ``subplots=True`` argument must be passed.
  2791. **kwds
  2792. Keyword arguments to pass on to :meth:`pandas.DataFrame.plot`.
  2793. Returns
  2794. -------
  2795. axes : matplotlib.axes.Axes or np.ndarray of them.
  2796. A NumPy array is returned when `subplots` is True.
  2797. See Also
  2798. --------
  2799. Series.plot.pie : Generate a pie plot for a Series.
  2800. DataFrame.plot : Make plots of a DataFrame.
  2801. Examples
  2802. --------
  2803. In the example below we have a DataFrame with the information about
  2804. planet's mass and radius. We pass the the 'mass' column to the
  2805. pie function to get a pie plot.
  2806. .. plot::
  2807. :context: close-figs
  2808. >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],
  2809. ... 'radius': [2439.7, 6051.8, 6378.1]},
  2810. ... index=['Mercury', 'Venus', 'Earth'])
  2811. >>> plot = df.plot.pie(y='mass', figsize=(5, 5))
  2812. .. plot::
  2813. :context: close-figs
  2814. >>> plot = df.plot.pie(subplots=True, figsize=(6, 3))
  2815. """
  2816. return self(kind='pie', y=y, **kwds)
  2817. def scatter(self, x, y, s=None, c=None, **kwds):
  2818. """
  2819. Create a scatter plot with varying marker point size and color.
  2820. The coordinates of each point are defined by two dataframe columns and
  2821. filled circles are used to represent each point. This kind of plot is
  2822. useful to see complex correlations between two variables. Points could
  2823. be for instance natural 2D coordinates like longitude and latitude in
  2824. a map or, in general, any pair of metrics that can be plotted against
  2825. each other.
  2826. Parameters
  2827. ----------
  2828. x : int or str
  2829. The column name or column position to be used as horizontal
  2830. coordinates for each point.
  2831. y : int or str
  2832. The column name or column position to be used as vertical
  2833. coordinates for each point.
  2834. s : scalar or array_like, optional
  2835. The size of each point. Possible values are:
  2836. - A single scalar so all points have the same size.
  2837. - A sequence of scalars, which will be used for each point's size
  2838. recursively. For instance, when passing [2,14] all points size
  2839. will be either 2 or 14, alternatively.
  2840. c : str, int or array_like, optional
  2841. The color of each point. Possible values are:
  2842. - A single color string referred to by name, RGB or RGBA code,
  2843. for instance 'red' or '#a98d19'.
  2844. - A sequence of color strings referred to by name, RGB or RGBA
  2845. code, which will be used for each point's color recursively. For
  2846. instance ['green','yellow'] all points will be filled in green or
  2847. yellow, alternatively.
  2848. - A column name or position whose values will be used to color the
  2849. marker points according to a colormap.
  2850. **kwds
  2851. Keyword arguments to pass on to :meth:`pandas.DataFrame.plot`.
  2852. Returns
  2853. -------
  2854. axes : :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  2855. See Also
  2856. --------
  2857. matplotlib.pyplot.scatter : Scatter plot using multiple input data
  2858. formats.
  2859. Examples
  2860. --------
  2861. Let's see how to draw a scatter plot using coordinates from the values
  2862. in a DataFrame's columns.
  2863. .. plot::
  2864. :context: close-figs
  2865. >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],
  2866. ... [6.4, 3.2, 1], [5.9, 3.0, 2]],
  2867. ... columns=['length', 'width', 'species'])
  2868. >>> ax1 = df.plot.scatter(x='length',
  2869. ... y='width',
  2870. ... c='DarkBlue')
  2871. And now with the color determined by a column as well.
  2872. .. plot::
  2873. :context: close-figs
  2874. >>> ax2 = df.plot.scatter(x='length',
  2875. ... y='width',
  2876. ... c='species',
  2877. ... colormap='viridis')
  2878. """
  2879. return self(kind='scatter', x=x, y=y, c=c, s=s, **kwds)
  2880. def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None,
  2881. **kwds):
  2882. """
  2883. Generate a hexagonal binning plot.
  2884. Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`
  2885. (the default), this is a histogram of the number of occurrences
  2886. of the observations at ``(x[i], y[i])``.
  2887. If `C` is specified, specifies values at given coordinates
  2888. ``(x[i], y[i])``. These values are accumulated for each hexagonal
  2889. bin and then reduced according to `reduce_C_function`,
  2890. having as default the NumPy's mean function (:meth:`numpy.mean`).
  2891. (If `C` is specified, it must also be a 1-D sequence
  2892. of the same length as `x` and `y`, or a column label.)
  2893. Parameters
  2894. ----------
  2895. x : int or str
  2896. The column label or position for x points.
  2897. y : int or str
  2898. The column label or position for y points.
  2899. C : int or str, optional
  2900. The column label or position for the value of `(x, y)` point.
  2901. reduce_C_function : callable, default `np.mean`
  2902. Function of one argument that reduces all the values in a bin to
  2903. a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).
  2904. gridsize : int or tuple of (int, int), default 100
  2905. The number of hexagons in the x-direction.
  2906. The corresponding number of hexagons in the y-direction is
  2907. chosen in a way that the hexagons are approximately regular.
  2908. Alternatively, gridsize can be a tuple with two elements
  2909. specifying the number of hexagons in the x-direction and the
  2910. y-direction.
  2911. **kwds
  2912. Additional keyword arguments are documented in
  2913. :meth:`pandas.DataFrame.plot`.
  2914. Returns
  2915. -------
  2916. matplotlib.AxesSubplot
  2917. The matplotlib ``Axes`` on which the hexbin is plotted.
  2918. See Also
  2919. --------
  2920. DataFrame.plot : Make plots of a DataFrame.
  2921. matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,
  2922. the matplotlib function that is used under the hood.
  2923. Examples
  2924. --------
  2925. The following examples are generated with random data from
  2926. a normal distribution.
  2927. .. plot::
  2928. :context: close-figs
  2929. >>> n = 10000
  2930. >>> df = pd.DataFrame({'x': np.random.randn(n),
  2931. ... 'y': np.random.randn(n)})
  2932. >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)
  2933. The next example uses `C` and `np.sum` as `reduce_C_function`.
  2934. Note that `'observations'` values ranges from 1 to 5 but the result
  2935. plot shows values up to more than 25. This is because of the
  2936. `reduce_C_function`.
  2937. .. plot::
  2938. :context: close-figs
  2939. >>> n = 500
  2940. >>> df = pd.DataFrame({
  2941. ... 'coord_x': np.random.uniform(-3, 3, size=n),
  2942. ... 'coord_y': np.random.uniform(30, 50, size=n),
  2943. ... 'observations': np.random.randint(1,5, size=n)
  2944. ... })
  2945. >>> ax = df.plot.hexbin(x='coord_x',
  2946. ... y='coord_y',
  2947. ... C='observations',
  2948. ... reduce_C_function=np.sum,
  2949. ... gridsize=10,
  2950. ... cmap="viridis")
  2951. """
  2952. if reduce_C_function is not None:
  2953. kwds['reduce_C_function'] = reduce_C_function
  2954. if gridsize is not None:
  2955. kwds['gridsize'] = gridsize
  2956. return self(kind='hexbin', x=x, y=y, C=C, **kwds)