format.py 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626
  1. # -*- coding: utf-8 -*-
  2. """
  3. Internal module for formatting output data in csv, html,
  4. and latex files. This module also applies to display formatting.
  5. """
  6. from __future__ import print_function
  7. from functools import partial
  8. import numpy as np
  9. from pandas._libs import lib
  10. from pandas._libs.tslib import format_array_from_datetime
  11. from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
  12. from pandas.compat import StringIO, lzip, map, u, zip
  13. from pandas.core.dtypes.common import (
  14. is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
  15. is_extension_array_dtype, is_float, is_float_dtype, is_integer,
  16. is_integer_dtype, is_list_like, is_numeric_dtype, is_scalar,
  17. is_timedelta64_dtype)
  18. from pandas.core.dtypes.generic import (
  19. ABCIndexClass, ABCMultiIndex, ABCSeries, ABCSparseArray)
  20. from pandas.core.dtypes.missing import isna, notna
  21. from pandas import compat
  22. from pandas.core.base import PandasObject
  23. import pandas.core.common as com
  24. from pandas.core.config import get_option, set_option
  25. from pandas.core.index import Index, ensure_index
  26. from pandas.core.indexes.datetimes import DatetimeIndex
  27. from pandas.io.common import _expand_user, _stringify_path
  28. from pandas.io.formats.printing import adjoin, justify, pprint_thing
  29. from pandas.io.formats.terminal import get_terminal_size
  30. # pylint: disable=W0141
  31. common_docstring = """
  32. Parameters
  33. ----------
  34. buf : StringIO-like, optional
  35. Buffer to write to.
  36. columns : sequence, optional, default None
  37. The subset of columns to write. Writes all columns by default.
  38. col_space : int, optional
  39. The minimum width of each column.
  40. header : bool, optional
  41. %(header)s.
  42. index : bool, optional, default True
  43. Whether to print index (row) labels.
  44. na_rep : str, optional, default 'NaN'
  45. String representation of NAN to use.
  46. formatters : list or dict of one-param. functions, optional
  47. Formatter functions to apply to columns' elements by position or
  48. name.
  49. The result of each function must be a unicode string.
  50. List must be of length equal to the number of columns.
  51. float_format : one-parameter function, optional, default None
  52. Formatter function to apply to columns' elements if they are
  53. floats. The result of this function must be a unicode string.
  54. sparsify : bool, optional, default True
  55. Set to False for a DataFrame with a hierarchical index to print
  56. every multiindex key at each row.
  57. index_names : bool, optional, default True
  58. Prints the names of the indexes.
  59. justify : str, default None
  60. How to justify the column labels. If None uses the option from
  61. the print configuration (controlled by set_option), 'right' out
  62. of the box. Valid values are
  63. * left
  64. * right
  65. * center
  66. * justify
  67. * justify-all
  68. * start
  69. * end
  70. * inherit
  71. * match-parent
  72. * initial
  73. * unset.
  74. max_rows : int, optional
  75. Maximum number of rows to display in the console.
  76. max_cols : int, optional
  77. Maximum number of columns to display in the console.
  78. show_dimensions : bool, default False
  79. Display DataFrame dimensions (number of rows by number of columns).
  80. decimal : str, default '.'
  81. Character recognized as decimal separator, e.g. ',' in Europe.
  82. .. versionadded:: 0.18.0
  83. """
  84. _VALID_JUSTIFY_PARAMETERS = ("left", "right", "center", "justify",
  85. "justify-all", "start", "end", "inherit",
  86. "match-parent", "initial", "unset")
  87. return_docstring = """
  88. Returns
  89. -------
  90. str (or unicode, depending on data and options)
  91. String representation of the dataframe.
  92. """
  93. class CategoricalFormatter(object):
  94. def __init__(self, categorical, buf=None, length=True, na_rep='NaN',
  95. footer=True):
  96. self.categorical = categorical
  97. self.buf = buf if buf is not None else StringIO(u(""))
  98. self.na_rep = na_rep
  99. self.length = length
  100. self.footer = footer
  101. def _get_footer(self):
  102. footer = ''
  103. if self.length:
  104. if footer:
  105. footer += ', '
  106. footer += "Length: {length}".format(length=len(self.categorical))
  107. level_info = self.categorical._repr_categories_info()
  108. # Levels are added in a newline
  109. if footer:
  110. footer += '\n'
  111. footer += level_info
  112. return compat.text_type(footer)
  113. def _get_formatted_values(self):
  114. return format_array(self.categorical.get_values(), None,
  115. float_format=None, na_rep=self.na_rep)
  116. def to_string(self):
  117. categorical = self.categorical
  118. if len(categorical) == 0:
  119. if self.footer:
  120. return self._get_footer()
  121. else:
  122. return u('')
  123. fmt_values = self._get_formatted_values()
  124. result = [u('{i}').format(i=i) for i in fmt_values]
  125. result = [i.strip() for i in result]
  126. result = u(', ').join(result)
  127. result = [u('[') + result + u(']')]
  128. if self.footer:
  129. footer = self._get_footer()
  130. if footer:
  131. result.append(footer)
  132. return compat.text_type(u('\n').join(result))
  133. class SeriesFormatter(object):
  134. def __init__(self, series, buf=None, length=True, header=True, index=True,
  135. na_rep='NaN', name=False, float_format=None, dtype=True,
  136. max_rows=None):
  137. self.series = series
  138. self.buf = buf if buf is not None else StringIO()
  139. self.name = name
  140. self.na_rep = na_rep
  141. self.header = header
  142. self.length = length
  143. self.index = index
  144. self.max_rows = max_rows
  145. if float_format is None:
  146. float_format = get_option("display.float_format")
  147. self.float_format = float_format
  148. self.dtype = dtype
  149. self.adj = _get_adjustment()
  150. self._chk_truncate()
  151. def _chk_truncate(self):
  152. from pandas.core.reshape.concat import concat
  153. max_rows = self.max_rows
  154. truncate_v = max_rows and (len(self.series) > max_rows)
  155. series = self.series
  156. if truncate_v:
  157. if max_rows == 1:
  158. row_num = max_rows
  159. series = series.iloc[:max_rows]
  160. else:
  161. row_num = max_rows // 2
  162. series = concat((series.iloc[:row_num],
  163. series.iloc[-row_num:]))
  164. self.tr_row_num = row_num
  165. self.tr_series = series
  166. self.truncate_v = truncate_v
  167. def _get_footer(self):
  168. name = self.series.name
  169. footer = u('')
  170. if getattr(self.series.index, 'freq', None) is not None:
  171. footer += 'Freq: {freq}'.format(freq=self.series.index.freqstr)
  172. if self.name is not False and name is not None:
  173. if footer:
  174. footer += ', '
  175. series_name = pprint_thing(name,
  176. escape_chars=('\t', '\r', '\n'))
  177. footer += ((u"Name: {sname}".format(sname=series_name))
  178. if name is not None else "")
  179. if (self.length is True or
  180. (self.length == 'truncate' and self.truncate_v)):
  181. if footer:
  182. footer += ', '
  183. footer += 'Length: {length}'.format(length=len(self.series))
  184. if self.dtype is not False and self.dtype is not None:
  185. name = getattr(self.tr_series.dtype, 'name', None)
  186. if name:
  187. if footer:
  188. footer += ', '
  189. footer += u'dtype: {typ}'.format(typ=pprint_thing(name))
  190. # level infos are added to the end and in a new line, like it is done
  191. # for Categoricals
  192. if is_categorical_dtype(self.tr_series.dtype):
  193. level_info = self.tr_series._values._repr_categories_info()
  194. if footer:
  195. footer += "\n"
  196. footer += level_info
  197. return compat.text_type(footer)
  198. def _get_formatted_index(self):
  199. index = self.tr_series.index
  200. is_multi = isinstance(index, ABCMultiIndex)
  201. if is_multi:
  202. have_header = any(name for name in index.names)
  203. fmt_index = index.format(names=True)
  204. else:
  205. have_header = index.name is not None
  206. fmt_index = index.format(name=True)
  207. return fmt_index, have_header
  208. def _get_formatted_values(self):
  209. values_to_format = self.tr_series._formatting_values()
  210. return format_array(values_to_format, None,
  211. float_format=self.float_format, na_rep=self.na_rep)
  212. def to_string(self):
  213. series = self.tr_series
  214. footer = self._get_footer()
  215. if len(series) == 0:
  216. return 'Series([], ' + footer + ')'
  217. fmt_index, have_header = self._get_formatted_index()
  218. fmt_values = self._get_formatted_values()
  219. if self.truncate_v:
  220. n_header_rows = 0
  221. row_num = self.tr_row_num
  222. width = self.adj.len(fmt_values[row_num - 1])
  223. if width > 3:
  224. dot_str = '...'
  225. else:
  226. dot_str = '..'
  227. # Series uses mode=center because it has single value columns
  228. # DataFrame uses mode=left
  229. dot_str = self.adj.justify([dot_str], width, mode='center')[0]
  230. fmt_values.insert(row_num + n_header_rows, dot_str)
  231. fmt_index.insert(row_num + 1, '')
  232. if self.index:
  233. result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
  234. else:
  235. result = self.adj.adjoin(3, fmt_values)
  236. if self.header and have_header:
  237. result = fmt_index[0] + '\n' + result
  238. if footer:
  239. result += '\n' + footer
  240. return compat.text_type(u('').join(result))
  241. class TextAdjustment(object):
  242. def __init__(self):
  243. self.encoding = get_option("display.encoding")
  244. def len(self, text):
  245. return compat.strlen(text, encoding=self.encoding)
  246. def justify(self, texts, max_len, mode='right'):
  247. return justify(texts, max_len, mode=mode)
  248. def adjoin(self, space, *lists, **kwargs):
  249. return adjoin(space, *lists, strlen=self.len,
  250. justfunc=self.justify, **kwargs)
  251. class EastAsianTextAdjustment(TextAdjustment):
  252. def __init__(self):
  253. super(EastAsianTextAdjustment, self).__init__()
  254. if get_option("display.unicode.ambiguous_as_wide"):
  255. self.ambiguous_width = 2
  256. else:
  257. self.ambiguous_width = 1
  258. def len(self, text):
  259. return compat.east_asian_len(text, encoding=self.encoding,
  260. ambiguous_width=self.ambiguous_width)
  261. def justify(self, texts, max_len, mode='right'):
  262. # re-calculate padding space per str considering East Asian Width
  263. def _get_pad(t):
  264. return max_len - self.len(t) + len(t)
  265. if mode == 'left':
  266. return [x.ljust(_get_pad(x)) for x in texts]
  267. elif mode == 'center':
  268. return [x.center(_get_pad(x)) for x in texts]
  269. else:
  270. return [x.rjust(_get_pad(x)) for x in texts]
  271. def _get_adjustment():
  272. use_east_asian_width = get_option("display.unicode.east_asian_width")
  273. if use_east_asian_width:
  274. return EastAsianTextAdjustment()
  275. else:
  276. return TextAdjustment()
  277. class TableFormatter(object):
  278. is_truncated = False
  279. show_dimensions = None
  280. @property
  281. def should_show_dimensions(self):
  282. return (self.show_dimensions is True or
  283. (self.show_dimensions == 'truncate' and self.is_truncated))
  284. def _get_formatter(self, i):
  285. if isinstance(self.formatters, (list, tuple)):
  286. if is_integer(i):
  287. return self.formatters[i]
  288. else:
  289. return None
  290. else:
  291. if is_integer(i) and i not in self.columns:
  292. i = self.columns[i]
  293. return self.formatters.get(i, None)
  294. class DataFrameFormatter(TableFormatter):
  295. """
  296. Render a DataFrame
  297. self.to_string() : console-friendly tabular output
  298. self.to_html() : html table
  299. self.to_latex() : LaTeX tabular environment table
  300. """
  301. __doc__ = __doc__ if __doc__ else ''
  302. __doc__ += common_docstring + return_docstring
  303. def __init__(self, frame, buf=None, columns=None, col_space=None,
  304. header=True, index=True, na_rep='NaN', formatters=None,
  305. justify=None, float_format=None, sparsify=None,
  306. index_names=True, line_width=None, max_rows=None,
  307. max_cols=None, show_dimensions=False, decimal='.',
  308. table_id=None, render_links=False, **kwds):
  309. self.frame = frame
  310. if buf is not None:
  311. self.buf = _expand_user(_stringify_path(buf))
  312. else:
  313. self.buf = StringIO()
  314. self.show_index_names = index_names
  315. if sparsify is None:
  316. sparsify = get_option("display.multi_sparse")
  317. self.sparsify = sparsify
  318. self.float_format = float_format
  319. self.formatters = formatters if formatters is not None else {}
  320. self.na_rep = na_rep
  321. self.decimal = decimal
  322. self.col_space = col_space
  323. self.header = header
  324. self.index = index
  325. self.line_width = line_width
  326. self.max_rows = max_rows
  327. self.max_cols = max_cols
  328. self.max_rows_displayed = min(max_rows or len(self.frame),
  329. len(self.frame))
  330. self.show_dimensions = show_dimensions
  331. self.table_id = table_id
  332. self.render_links = render_links
  333. if justify is None:
  334. self.justify = get_option("display.colheader_justify")
  335. else:
  336. self.justify = justify
  337. self.kwds = kwds
  338. if columns is not None:
  339. self.columns = ensure_index(columns)
  340. self.frame = self.frame[self.columns]
  341. else:
  342. self.columns = frame.columns
  343. self._chk_truncate()
  344. self.adj = _get_adjustment()
  345. def _chk_truncate(self):
  346. """
  347. Checks whether the frame should be truncated. If so, slices
  348. the frame up.
  349. """
  350. from pandas.core.reshape.concat import concat
  351. # Cut the data to the information actually printed
  352. max_cols = self.max_cols
  353. max_rows = self.max_rows
  354. if max_cols == 0 or max_rows == 0: # assume we are in the terminal
  355. # (why else = 0)
  356. (w, h) = get_terminal_size()
  357. self.w = w
  358. self.h = h
  359. if self.max_rows == 0:
  360. dot_row = 1
  361. prompt_row = 1
  362. if self.show_dimensions:
  363. show_dimension_rows = 3
  364. n_add_rows = (self.header + dot_row + show_dimension_rows +
  365. prompt_row)
  366. # rows available to fill with actual data
  367. max_rows_adj = self.h - n_add_rows
  368. self.max_rows_adj = max_rows_adj
  369. # Format only rows and columns that could potentially fit the
  370. # screen
  371. if max_cols == 0 and len(self.frame.columns) > w:
  372. max_cols = w
  373. if max_rows == 0 and len(self.frame) > h:
  374. max_rows = h
  375. if not hasattr(self, 'max_rows_adj'):
  376. self.max_rows_adj = max_rows
  377. if not hasattr(self, 'max_cols_adj'):
  378. self.max_cols_adj = max_cols
  379. max_cols_adj = self.max_cols_adj
  380. max_rows_adj = self.max_rows_adj
  381. truncate_h = max_cols_adj and (len(self.columns) > max_cols_adj)
  382. truncate_v = max_rows_adj and (len(self.frame) > max_rows_adj)
  383. frame = self.frame
  384. if truncate_h:
  385. if max_cols_adj == 0:
  386. col_num = len(frame.columns)
  387. elif max_cols_adj == 1:
  388. frame = frame.iloc[:, :max_cols]
  389. col_num = max_cols
  390. else:
  391. col_num = (max_cols_adj // 2)
  392. frame = concat((frame.iloc[:, :col_num],
  393. frame.iloc[:, -col_num:]), axis=1)
  394. self.tr_col_num = col_num
  395. if truncate_v:
  396. if max_rows_adj == 1:
  397. row_num = max_rows
  398. frame = frame.iloc[:max_rows, :]
  399. else:
  400. row_num = max_rows_adj // 2
  401. frame = concat((frame.iloc[:row_num, :],
  402. frame.iloc[-row_num:, :]))
  403. self.tr_row_num = row_num
  404. self.tr_frame = frame
  405. self.truncate_h = truncate_h
  406. self.truncate_v = truncate_v
  407. self.is_truncated = self.truncate_h or self.truncate_v
  408. def _to_str_columns(self):
  409. """
  410. Render a DataFrame to a list of columns (as lists of strings).
  411. """
  412. frame = self.tr_frame
  413. # may include levels names also
  414. str_index = self._get_formatted_index(frame)
  415. if not is_list_like(self.header) and not self.header:
  416. stringified = []
  417. for i, c in enumerate(frame):
  418. fmt_values = self._format_col(i)
  419. fmt_values = _make_fixed_width(fmt_values, self.justify,
  420. minimum=(self.col_space or 0),
  421. adj=self.adj)
  422. stringified.append(fmt_values)
  423. else:
  424. if is_list_like(self.header):
  425. if len(self.header) != len(self.columns):
  426. raise ValueError(('Writing {ncols} cols but got {nalias} '
  427. 'aliases'
  428. .format(ncols=len(self.columns),
  429. nalias=len(self.header))))
  430. str_columns = [[label] for label in self.header]
  431. else:
  432. str_columns = self._get_formatted_column_labels(frame)
  433. stringified = []
  434. for i, c in enumerate(frame):
  435. cheader = str_columns[i]
  436. header_colwidth = max(self.col_space or 0,
  437. *(self.adj.len(x) for x in cheader))
  438. fmt_values = self._format_col(i)
  439. fmt_values = _make_fixed_width(fmt_values, self.justify,
  440. minimum=header_colwidth,
  441. adj=self.adj)
  442. max_len = max(max(self.adj.len(x) for x in fmt_values),
  443. header_colwidth)
  444. cheader = self.adj.justify(cheader, max_len, mode=self.justify)
  445. stringified.append(cheader + fmt_values)
  446. strcols = stringified
  447. if self.index:
  448. strcols.insert(0, str_index)
  449. # Add ... to signal truncated
  450. truncate_h = self.truncate_h
  451. truncate_v = self.truncate_v
  452. if truncate_h:
  453. col_num = self.tr_col_num
  454. strcols.insert(self.tr_col_num + 1, [' ...'] * (len(str_index)))
  455. if truncate_v:
  456. n_header_rows = len(str_index) - len(frame)
  457. row_num = self.tr_row_num
  458. for ix, col in enumerate(strcols):
  459. # infer from above row
  460. cwidth = self.adj.len(strcols[ix][row_num])
  461. is_dot_col = False
  462. if truncate_h:
  463. is_dot_col = ix == col_num + 1
  464. if cwidth > 3 or is_dot_col:
  465. my_str = '...'
  466. else:
  467. my_str = '..'
  468. if ix == 0:
  469. dot_mode = 'left'
  470. elif is_dot_col:
  471. cwidth = 4
  472. dot_mode = 'right'
  473. else:
  474. dot_mode = 'right'
  475. dot_str = self.adj.justify([my_str], cwidth, mode=dot_mode)[0]
  476. strcols[ix].insert(row_num + n_header_rows, dot_str)
  477. return strcols
  478. def to_string(self):
  479. """
  480. Render a DataFrame to a console-friendly tabular output.
  481. """
  482. from pandas import Series
  483. frame = self.frame
  484. if len(frame.columns) == 0 or len(frame.index) == 0:
  485. info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}')
  486. .format(name=type(self.frame).__name__,
  487. col=pprint_thing(frame.columns),
  488. idx=pprint_thing(frame.index)))
  489. text = info_line
  490. else:
  491. strcols = self._to_str_columns()
  492. if self.line_width is None: # no need to wrap around just print
  493. # the whole frame
  494. text = self.adj.adjoin(1, *strcols)
  495. elif (not isinstance(self.max_cols, int) or
  496. self.max_cols > 0): # need to wrap around
  497. text = self._join_multiline(*strcols)
  498. else: # max_cols == 0. Try to fit frame to terminal
  499. text = self.adj.adjoin(1, *strcols).split('\n')
  500. max_len = Series(text).str.len().max()
  501. # plus truncate dot col
  502. dif = max_len - self.w
  503. # '+ 1' to avoid too wide repr (GH PR #17023)
  504. adj_dif = dif + 1
  505. col_lens = Series([Series(ele).apply(len).max()
  506. for ele in strcols])
  507. n_cols = len(col_lens)
  508. counter = 0
  509. while adj_dif > 0 and n_cols > 1:
  510. counter += 1
  511. mid = int(round(n_cols / 2.))
  512. mid_ix = col_lens.index[mid]
  513. col_len = col_lens[mid_ix]
  514. # adjoin adds one
  515. adj_dif -= (col_len + 1)
  516. col_lens = col_lens.drop(mid_ix)
  517. n_cols = len(col_lens)
  518. # subtract index column
  519. max_cols_adj = n_cols - self.index
  520. # GH-21180. Ensure that we print at least two.
  521. max_cols_adj = max(max_cols_adj, 2)
  522. self.max_cols_adj = max_cols_adj
  523. # Call again _chk_truncate to cut frame appropriately
  524. # and then generate string representation
  525. self._chk_truncate()
  526. strcols = self._to_str_columns()
  527. text = self.adj.adjoin(1, *strcols)
  528. self.buf.writelines(text)
  529. if self.should_show_dimensions:
  530. self.buf.write("\n\n[{nrows} rows x {ncols} columns]"
  531. .format(nrows=len(frame), ncols=len(frame.columns)))
  532. def _join_multiline(self, *strcols):
  533. lwidth = self.line_width
  534. adjoin_width = 1
  535. strcols = list(strcols)
  536. if self.index:
  537. idx = strcols.pop(0)
  538. lwidth -= np.array([self.adj.len(x)
  539. for x in idx]).max() + adjoin_width
  540. col_widths = [np.array([self.adj.len(x) for x in col]).max() if
  541. len(col) > 0 else 0 for col in strcols]
  542. col_bins = _binify(col_widths, lwidth)
  543. nbins = len(col_bins)
  544. if self.truncate_v:
  545. nrows = self.max_rows_adj + 1
  546. else:
  547. nrows = len(self.frame)
  548. str_lst = []
  549. st = 0
  550. for i, ed in enumerate(col_bins):
  551. row = strcols[st:ed]
  552. if self.index:
  553. row.insert(0, idx)
  554. if nbins > 1:
  555. if ed <= len(strcols) and i < nbins - 1:
  556. row.append([' \\'] + [' '] * (nrows - 1))
  557. else:
  558. row.append([' '] * nrows)
  559. str_lst.append(self.adj.adjoin(adjoin_width, *row))
  560. st = ed
  561. return '\n\n'.join(str_lst)
  562. def to_latex(self, column_format=None, longtable=False, encoding=None,
  563. multicolumn=False, multicolumn_format=None, multirow=False):
  564. """
  565. Render a DataFrame to a LaTeX tabular/longtable environment output.
  566. """
  567. from pandas.io.formats.latex import LatexFormatter
  568. latex_renderer = LatexFormatter(self, column_format=column_format,
  569. longtable=longtable,
  570. multicolumn=multicolumn,
  571. multicolumn_format=multicolumn_format,
  572. multirow=multirow)
  573. if encoding is None:
  574. encoding = 'ascii' if compat.PY2 else 'utf-8'
  575. if hasattr(self.buf, 'write'):
  576. latex_renderer.write_result(self.buf)
  577. elif isinstance(self.buf, compat.string_types):
  578. import codecs
  579. with codecs.open(self.buf, 'w', encoding=encoding) as f:
  580. latex_renderer.write_result(f)
  581. else:
  582. raise TypeError('buf is not a file name and it has no write '
  583. 'method')
  584. def _format_col(self, i):
  585. frame = self.tr_frame
  586. formatter = self._get_formatter(i)
  587. values_to_format = frame.iloc[:, i]._formatting_values()
  588. return format_array(values_to_format, formatter,
  589. float_format=self.float_format, na_rep=self.na_rep,
  590. space=self.col_space, decimal=self.decimal)
  591. def to_html(self, classes=None, notebook=False, border=None):
  592. """
  593. Render a DataFrame to a html table.
  594. Parameters
  595. ----------
  596. classes : str or list-like
  597. classes to include in the `class` attribute of the opening
  598. ``<table>`` tag, in addition to the default "dataframe".
  599. notebook : {True, False}, optional, default False
  600. Whether the generated HTML is for IPython Notebook.
  601. border : int
  602. A ``border=border`` attribute is included in the opening
  603. ``<table>`` tag. Default ``pd.options.html.border``.
  604. .. versionadded:: 0.19.0
  605. """
  606. from pandas.io.formats.html import HTMLFormatter, NotebookFormatter
  607. Klass = NotebookFormatter if notebook else HTMLFormatter
  608. html = Klass(self, classes=classes, border=border).render()
  609. if hasattr(self.buf, 'write'):
  610. buffer_put_lines(self.buf, html)
  611. elif isinstance(self.buf, compat.string_types):
  612. with open(self.buf, 'w') as f:
  613. buffer_put_lines(f, html)
  614. else:
  615. raise TypeError('buf is not a file name and it has no write '
  616. ' method')
  617. def _get_formatted_column_labels(self, frame):
  618. from pandas.core.index import _sparsify
  619. columns = frame.columns
  620. if isinstance(columns, ABCMultiIndex):
  621. fmt_columns = columns.format(sparsify=False, adjoin=False)
  622. fmt_columns = lzip(*fmt_columns)
  623. dtypes = self.frame.dtypes._values
  624. # if we have a Float level, they don't use leading space at all
  625. restrict_formatting = any(l.is_floating for l in columns.levels)
  626. need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
  627. def space_format(x, y):
  628. if (y not in self.formatters and
  629. need_leadsp[x] and not restrict_formatting):
  630. return ' ' + y
  631. return y
  632. str_columns = list(zip(*[[space_format(x, y) for y in x]
  633. for x in fmt_columns]))
  634. if self.sparsify and len(str_columns):
  635. str_columns = _sparsify(str_columns)
  636. str_columns = [list(x) for x in zip(*str_columns)]
  637. else:
  638. fmt_columns = columns.format()
  639. dtypes = self.frame.dtypes
  640. need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
  641. str_columns = [[' ' + x if not self._get_formatter(i) and
  642. need_leadsp[x] else x]
  643. for i, (col, x) in enumerate(zip(columns,
  644. fmt_columns))]
  645. if self.show_row_idx_names:
  646. for x in str_columns:
  647. x.append('')
  648. # self.str_columns = str_columns
  649. return str_columns
  650. @property
  651. def has_index_names(self):
  652. return _has_names(self.frame.index)
  653. @property
  654. def has_column_names(self):
  655. return _has_names(self.frame.columns)
  656. @property
  657. def show_row_idx_names(self):
  658. return all((self.has_index_names,
  659. self.index,
  660. self.show_index_names))
  661. @property
  662. def show_col_idx_names(self):
  663. return all((self.has_column_names,
  664. self.show_index_names,
  665. self.header))
  666. def _get_formatted_index(self, frame):
  667. # Note: this is only used by to_string() and to_latex(), not by
  668. # to_html().
  669. index = frame.index
  670. columns = frame.columns
  671. fmt = self._get_formatter('__index__')
  672. if isinstance(index, ABCMultiIndex):
  673. fmt_index = index.format(
  674. sparsify=self.sparsify, adjoin=False,
  675. names=self.show_row_idx_names, formatter=fmt)
  676. else:
  677. fmt_index = [index.format(
  678. name=self.show_row_idx_names, formatter=fmt)]
  679. fmt_index = [tuple(_make_fixed_width(list(x), justify='left',
  680. minimum=(self.col_space or 0),
  681. adj=self.adj)) for x in fmt_index]
  682. adjoined = self.adj.adjoin(1, *fmt_index).split('\n')
  683. # empty space for columns
  684. if self.show_col_idx_names:
  685. col_header = ['{x}'.format(x=x)
  686. for x in self._get_column_name_list()]
  687. else:
  688. col_header = [''] * columns.nlevels
  689. if self.header:
  690. return col_header + adjoined
  691. else:
  692. return adjoined
  693. def _get_column_name_list(self):
  694. names = []
  695. columns = self.frame.columns
  696. if isinstance(columns, ABCMultiIndex):
  697. names.extend('' if name is None else name
  698. for name in columns.names)
  699. else:
  700. names.append('' if columns.name is None else columns.name)
  701. return names
  702. # ----------------------------------------------------------------------
  703. # Array formatters
  704. def format_array(values, formatter, float_format=None, na_rep='NaN',
  705. digits=None, space=None, justify='right', decimal='.',
  706. leading_space=None):
  707. """
  708. Format an array for printing.
  709. Parameters
  710. ----------
  711. values
  712. formatter
  713. float_format
  714. na_rep
  715. digits
  716. space
  717. justify
  718. decimal
  719. leading_space : bool, optional
  720. Whether the array should be formatted with a leading space.
  721. When an array as a column of a Series or DataFrame, we do want
  722. the leading space to pad between columns.
  723. When formatting an Index subclass
  724. (e.g. IntervalIndex._format_native_types), we don't want the
  725. leading space since it should be left-aligned.
  726. Returns
  727. -------
  728. List[str]
  729. """
  730. if is_datetime64_dtype(values.dtype):
  731. fmt_klass = Datetime64Formatter
  732. elif is_datetime64tz_dtype(values):
  733. fmt_klass = Datetime64TZFormatter
  734. elif is_timedelta64_dtype(values.dtype):
  735. fmt_klass = Timedelta64Formatter
  736. elif is_extension_array_dtype(values.dtype):
  737. fmt_klass = ExtensionArrayFormatter
  738. elif is_float_dtype(values.dtype):
  739. fmt_klass = FloatArrayFormatter
  740. elif is_integer_dtype(values.dtype):
  741. fmt_klass = IntArrayFormatter
  742. else:
  743. fmt_klass = GenericArrayFormatter
  744. if space is None:
  745. space = get_option("display.column_space")
  746. if float_format is None:
  747. float_format = get_option("display.float_format")
  748. if digits is None:
  749. digits = get_option("display.precision")
  750. fmt_obj = fmt_klass(values, digits=digits, na_rep=na_rep,
  751. float_format=float_format, formatter=formatter,
  752. space=space, justify=justify, decimal=decimal,
  753. leading_space=leading_space)
  754. return fmt_obj.get_result()
  755. class GenericArrayFormatter(object):
  756. def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
  757. space=12, float_format=None, justify='right', decimal='.',
  758. quoting=None, fixed_width=True, leading_space=None):
  759. self.values = values
  760. self.digits = digits
  761. self.na_rep = na_rep
  762. self.space = space
  763. self.formatter = formatter
  764. self.float_format = float_format
  765. self.justify = justify
  766. self.decimal = decimal
  767. self.quoting = quoting
  768. self.fixed_width = fixed_width
  769. self.leading_space = leading_space
  770. def get_result(self):
  771. fmt_values = self._format_strings()
  772. return _make_fixed_width(fmt_values, self.justify)
  773. def _format_strings(self):
  774. if self.float_format is None:
  775. float_format = get_option("display.float_format")
  776. if float_format is None:
  777. fmt_str = ('{{x: .{prec:d}g}}'
  778. .format(prec=get_option("display.precision")))
  779. float_format = lambda x: fmt_str.format(x=x)
  780. else:
  781. float_format = self.float_format
  782. formatter = (
  783. self.formatter if self.formatter is not None else
  784. (lambda x: pprint_thing(x, escape_chars=('\t', '\r', '\n'))))
  785. def _format(x):
  786. if self.na_rep is not None and is_scalar(x) and isna(x):
  787. if x is None:
  788. return 'None'
  789. elif x is NaT:
  790. return 'NaT'
  791. return self.na_rep
  792. elif isinstance(x, PandasObject):
  793. return u'{x}'.format(x=x)
  794. else:
  795. # object dtype
  796. return u'{x}'.format(x=formatter(x))
  797. vals = self.values
  798. if isinstance(vals, Index):
  799. vals = vals._values
  800. elif isinstance(vals, ABCSparseArray):
  801. vals = vals.values
  802. is_float_type = lib.map_infer(vals, is_float) & notna(vals)
  803. leading_space = self.leading_space
  804. if leading_space is None:
  805. leading_space = is_float_type.any()
  806. fmt_values = []
  807. for i, v in enumerate(vals):
  808. if not is_float_type[i] and leading_space:
  809. fmt_values.append(u' {v}'.format(v=_format(v)))
  810. elif is_float_type[i]:
  811. fmt_values.append(float_format(v))
  812. else:
  813. if leading_space is False:
  814. # False specifically, so that the default is
  815. # to include a space if we get here.
  816. tpl = u'{v}'
  817. else:
  818. tpl = u' {v}'
  819. fmt_values.append(tpl.format(v=_format(v)))
  820. return fmt_values
  821. class FloatArrayFormatter(GenericArrayFormatter):
  822. """
  823. """
  824. def __init__(self, *args, **kwargs):
  825. GenericArrayFormatter.__init__(self, *args, **kwargs)
  826. # float_format is expected to be a string
  827. # formatter should be used to pass a function
  828. if self.float_format is not None and self.formatter is None:
  829. # GH21625, GH22270
  830. self.fixed_width = False
  831. if callable(self.float_format):
  832. self.formatter = self.float_format
  833. self.float_format = None
  834. def _value_formatter(self, float_format=None, threshold=None):
  835. """Returns a function to be applied on each value to format it
  836. """
  837. # the float_format parameter supersedes self.float_format
  838. if float_format is None:
  839. float_format = self.float_format
  840. # we are going to compose different functions, to first convert to
  841. # a string, then replace the decimal symbol, and finally chop according
  842. # to the threshold
  843. # when there is no float_format, we use str instead of '%g'
  844. # because str(0.0) = '0.0' while '%g' % 0.0 = '0'
  845. if float_format:
  846. def base_formatter(v):
  847. return float_format(value=v) if notna(v) else self.na_rep
  848. else:
  849. def base_formatter(v):
  850. return str(v) if notna(v) else self.na_rep
  851. if self.decimal != '.':
  852. def decimal_formatter(v):
  853. return base_formatter(v).replace('.', self.decimal, 1)
  854. else:
  855. decimal_formatter = base_formatter
  856. if threshold is None:
  857. return decimal_formatter
  858. def formatter(value):
  859. if notna(value):
  860. if abs(value) > threshold:
  861. return decimal_formatter(value)
  862. else:
  863. return decimal_formatter(0.0)
  864. else:
  865. return self.na_rep
  866. return formatter
  867. def get_result_as_array(self):
  868. """
  869. Returns the float values converted into strings using
  870. the parameters given at initialisation, as a numpy array
  871. """
  872. if self.formatter is not None:
  873. return np.array([self.formatter(x) for x in self.values])
  874. if self.fixed_width:
  875. threshold = get_option("display.chop_threshold")
  876. else:
  877. threshold = None
  878. # if we have a fixed_width, we'll need to try different float_format
  879. def format_values_with(float_format):
  880. formatter = self._value_formatter(float_format, threshold)
  881. # default formatter leaves a space to the left when formatting
  882. # floats, must be consistent for left-justifying NaNs (GH #25061)
  883. if self.justify == 'left':
  884. na_rep = ' ' + self.na_rep
  885. else:
  886. na_rep = self.na_rep
  887. # separate the wheat from the chaff
  888. values = self.values
  889. mask = isna(values)
  890. if hasattr(values, 'to_dense'): # sparse numpy ndarray
  891. values = values.to_dense()
  892. values = np.array(values, dtype='object')
  893. values[mask] = na_rep
  894. imask = (~mask).ravel()
  895. values.flat[imask] = np.array([formatter(val)
  896. for val in values.ravel()[imask]])
  897. if self.fixed_width:
  898. return _trim_zeros(values, na_rep)
  899. return values
  900. # There is a special default string when we are fixed-width
  901. # The default is otherwise to use str instead of a formatting string
  902. if self.float_format is None:
  903. if self.fixed_width:
  904. float_format = partial('{value: .{digits:d}f}'.format,
  905. digits=self.digits)
  906. else:
  907. float_format = self.float_format
  908. else:
  909. float_format = lambda value: self.float_format % value
  910. formatted_values = format_values_with(float_format)
  911. if not self.fixed_width:
  912. return formatted_values
  913. # we need do convert to engineering format if some values are too small
  914. # and would appear as 0, or if some values are too big and take too
  915. # much space
  916. if len(formatted_values) > 0:
  917. maxlen = max(len(x) for x in formatted_values)
  918. too_long = maxlen > self.digits + 6
  919. else:
  920. too_long = False
  921. with np.errstate(invalid='ignore'):
  922. abs_vals = np.abs(self.values)
  923. # this is pretty arbitrary for now
  924. # large values: more that 8 characters including decimal symbol
  925. # and first digit, hence > 1e6
  926. has_large_values = (abs_vals > 1e6).any()
  927. has_small_values = ((abs_vals < 10**(-self.digits)) &
  928. (abs_vals > 0)).any()
  929. if has_small_values or (too_long and has_large_values):
  930. float_format = partial('{value: .{digits:d}e}'.format,
  931. digits=self.digits)
  932. formatted_values = format_values_with(float_format)
  933. return formatted_values
  934. def _format_strings(self):
  935. # shortcut
  936. if self.formatter is not None:
  937. return [self.formatter(x) for x in self.values]
  938. return list(self.get_result_as_array())
  939. class IntArrayFormatter(GenericArrayFormatter):
  940. def _format_strings(self):
  941. formatter = self.formatter or (lambda x: '{x: d}'.format(x=x))
  942. fmt_values = [formatter(x) for x in self.values]
  943. return fmt_values
  944. class Datetime64Formatter(GenericArrayFormatter):
  945. def __init__(self, values, nat_rep='NaT', date_format=None, **kwargs):
  946. super(Datetime64Formatter, self).__init__(values, **kwargs)
  947. self.nat_rep = nat_rep
  948. self.date_format = date_format
  949. def _format_strings(self):
  950. """ we by definition have DO NOT have a TZ """
  951. values = self.values
  952. if not isinstance(values, DatetimeIndex):
  953. values = DatetimeIndex(values)
  954. if self.formatter is not None and callable(self.formatter):
  955. return [self.formatter(x) for x in values]
  956. fmt_values = format_array_from_datetime(
  957. values.asi8.ravel(),
  958. format=_get_format_datetime64_from_values(values,
  959. self.date_format),
  960. na_rep=self.nat_rep).reshape(values.shape)
  961. return fmt_values.tolist()
  962. class ExtensionArrayFormatter(GenericArrayFormatter):
  963. def _format_strings(self):
  964. values = self.values
  965. if isinstance(values, (ABCIndexClass, ABCSeries)):
  966. values = values._values
  967. formatter = values._formatter(boxed=True)
  968. if is_categorical_dtype(values.dtype):
  969. # Categorical is special for now, so that we can preserve tzinfo
  970. array = values.get_values()
  971. else:
  972. array = np.asarray(values)
  973. fmt_values = format_array(array,
  974. formatter,
  975. float_format=self.float_format,
  976. na_rep=self.na_rep, digits=self.digits,
  977. space=self.space, justify=self.justify,
  978. leading_space=self.leading_space)
  979. return fmt_values
  980. def format_percentiles(percentiles):
  981. """
  982. Outputs rounded and formatted percentiles.
  983. Parameters
  984. ----------
  985. percentiles : list-like, containing floats from interval [0,1]
  986. Returns
  987. -------
  988. formatted : list of strings
  989. Notes
  990. -----
  991. Rounding precision is chosen so that: (1) if any two elements of
  992. ``percentiles`` differ, they remain different after rounding
  993. (2) no entry is *rounded* to 0% or 100%.
  994. Any non-integer is always rounded to at least 1 decimal place.
  995. Examples
  996. --------
  997. Keeps all entries different after rounding:
  998. >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])
  999. ['1.999%', '2.001%', '50%', '66.667%', '99.99%']
  1000. No element is rounded to 0% or 100% (unless already equal to it).
  1001. Duplicates are allowed:
  1002. >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
  1003. ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
  1004. """
  1005. percentiles = np.asarray(percentiles)
  1006. # It checks for np.NaN as well
  1007. with np.errstate(invalid='ignore'):
  1008. if not is_numeric_dtype(percentiles) or not np.all(percentiles >= 0) \
  1009. or not np.all(percentiles <= 1):
  1010. raise ValueError("percentiles should all be in the interval [0,1]")
  1011. percentiles = 100 * percentiles
  1012. int_idx = (percentiles.astype(int) == percentiles)
  1013. if np.all(int_idx):
  1014. out = percentiles.astype(int).astype(str)
  1015. return [i + '%' for i in out]
  1016. unique_pcts = np.unique(percentiles)
  1017. to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
  1018. to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
  1019. # Least precision that keeps percentiles unique after rounding
  1020. prec = -np.floor(np.log10(np.min(
  1021. np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)
  1022. ))).astype(int)
  1023. prec = max(1, prec)
  1024. out = np.empty_like(percentiles, dtype=object)
  1025. out[int_idx] = percentiles[int_idx].astype(int).astype(str)
  1026. out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
  1027. return [i + '%' for i in out]
  1028. def _is_dates_only(values):
  1029. # return a boolean if we are only dates (and don't have a timezone)
  1030. values = DatetimeIndex(values)
  1031. if values.tz is not None:
  1032. return False
  1033. values_int = values.asi8
  1034. consider_values = values_int != iNaT
  1035. one_day_nanos = (86400 * 1e9)
  1036. even_days = np.logical_and(consider_values,
  1037. values_int % int(one_day_nanos) != 0).sum() == 0
  1038. if even_days:
  1039. return True
  1040. return False
  1041. def _format_datetime64(x, tz=None, nat_rep='NaT'):
  1042. if x is None or (is_scalar(x) and isna(x)):
  1043. return nat_rep
  1044. if tz is not None or not isinstance(x, Timestamp):
  1045. if getattr(x, 'tzinfo', None) is not None:
  1046. x = Timestamp(x).tz_convert(tz)
  1047. else:
  1048. x = Timestamp(x).tz_localize(tz)
  1049. return str(x)
  1050. def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None):
  1051. if x is None or (is_scalar(x) and isna(x)):
  1052. return nat_rep
  1053. if not isinstance(x, Timestamp):
  1054. x = Timestamp(x)
  1055. if date_format:
  1056. return x.strftime(date_format)
  1057. else:
  1058. return x._date_repr
  1059. def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None):
  1060. if is_dates_only:
  1061. return lambda x, tz=None: _format_datetime64_dateonly(
  1062. x, nat_rep=nat_rep, date_format=date_format)
  1063. else:
  1064. return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep)
  1065. def _get_format_datetime64_from_values(values, date_format):
  1066. """ given values and a date_format, return a string format """
  1067. is_dates_only = _is_dates_only(values)
  1068. if is_dates_only:
  1069. return date_format or "%Y-%m-%d"
  1070. return date_format
  1071. class Datetime64TZFormatter(Datetime64Formatter):
  1072. def _format_strings(self):
  1073. """ we by definition have a TZ """
  1074. values = self.values.astype(object)
  1075. is_dates_only = _is_dates_only(values)
  1076. formatter = (self.formatter or
  1077. _get_format_datetime64(is_dates_only,
  1078. date_format=self.date_format))
  1079. fmt_values = [formatter(x) for x in values]
  1080. return fmt_values
  1081. class Timedelta64Formatter(GenericArrayFormatter):
  1082. def __init__(self, values, nat_rep='NaT', box=False, **kwargs):
  1083. super(Timedelta64Formatter, self).__init__(values, **kwargs)
  1084. self.nat_rep = nat_rep
  1085. self.box = box
  1086. def _format_strings(self):
  1087. formatter = (self.formatter or
  1088. _get_format_timedelta64(self.values, nat_rep=self.nat_rep,
  1089. box=self.box))
  1090. fmt_values = np.array([formatter(x) for x in self.values])
  1091. return fmt_values
  1092. def _get_format_timedelta64(values, nat_rep='NaT', box=False):
  1093. """
  1094. Return a formatter function for a range of timedeltas.
  1095. These will all have the same format argument
  1096. If box, then show the return in quotes
  1097. """
  1098. values_int = values.astype(np.int64)
  1099. consider_values = values_int != iNaT
  1100. one_day_nanos = (86400 * 1e9)
  1101. even_days = np.logical_and(consider_values,
  1102. values_int % one_day_nanos != 0).sum() == 0
  1103. all_sub_day = np.logical_and(
  1104. consider_values, np.abs(values_int) >= one_day_nanos).sum() == 0
  1105. if even_days:
  1106. format = None
  1107. elif all_sub_day:
  1108. format = 'sub_day'
  1109. else:
  1110. format = 'long'
  1111. def _formatter(x):
  1112. if x is None or (is_scalar(x) and isna(x)):
  1113. return nat_rep
  1114. if not isinstance(x, Timedelta):
  1115. x = Timedelta(x)
  1116. result = x._repr_base(format=format)
  1117. if box:
  1118. result = "'{res}'".format(res=result)
  1119. return result
  1120. return _formatter
  1121. def _make_fixed_width(strings, justify='right', minimum=None, adj=None):
  1122. if len(strings) == 0 or justify == 'all':
  1123. return strings
  1124. if adj is None:
  1125. adj = _get_adjustment()
  1126. max_len = max(adj.len(x) for x in strings)
  1127. if minimum is not None:
  1128. max_len = max(minimum, max_len)
  1129. conf_max = get_option("display.max_colwidth")
  1130. if conf_max is not None and max_len > conf_max:
  1131. max_len = conf_max
  1132. def just(x):
  1133. if conf_max is not None:
  1134. if (conf_max > 3) & (adj.len(x) > max_len):
  1135. x = x[:max_len - 3] + '...'
  1136. return x
  1137. strings = [just(x) for x in strings]
  1138. result = adj.justify(strings, max_len, mode=justify)
  1139. return result
  1140. def _trim_zeros(str_floats, na_rep='NaN'):
  1141. """
  1142. Trims zeros, leaving just one before the decimal points if need be.
  1143. """
  1144. trimmed = str_floats
  1145. def _is_number(x):
  1146. return (x != na_rep and not x.endswith('inf'))
  1147. def _cond(values):
  1148. finite = [x for x in values if _is_number(x)]
  1149. return (len(finite) > 0 and all(x.endswith('0') for x in finite) and
  1150. not (any(('e' in x) or ('E' in x) for x in finite)))
  1151. while _cond(trimmed):
  1152. trimmed = [x[:-1] if _is_number(x) else x for x in trimmed]
  1153. # leave one 0 after the decimal points if need be.
  1154. return [x + "0" if x.endswith('.') and _is_number(x) else x
  1155. for x in trimmed]
  1156. def _has_names(index):
  1157. if isinstance(index, ABCMultiIndex):
  1158. return com._any_not_none(*index.names)
  1159. else:
  1160. return index.name is not None
  1161. class EngFormatter(object):
  1162. """
  1163. Formats float values according to engineering format.
  1164. Based on matplotlib.ticker.EngFormatter
  1165. """
  1166. # The SI engineering prefixes
  1167. ENG_PREFIXES = {
  1168. -24: "y",
  1169. -21: "z",
  1170. -18: "a",
  1171. -15: "f",
  1172. -12: "p",
  1173. -9: "n",
  1174. -6: "u",
  1175. -3: "m",
  1176. 0: "",
  1177. 3: "k",
  1178. 6: "M",
  1179. 9: "G",
  1180. 12: "T",
  1181. 15: "P",
  1182. 18: "E",
  1183. 21: "Z",
  1184. 24: "Y"
  1185. }
  1186. def __init__(self, accuracy=None, use_eng_prefix=False):
  1187. self.accuracy = accuracy
  1188. self.use_eng_prefix = use_eng_prefix
  1189. def __call__(self, num):
  1190. """ Formats a number in engineering notation, appending a letter
  1191. representing the power of 1000 of the original number. Some examples:
  1192. >>> format_eng(0) # for self.accuracy = 0
  1193. ' 0'
  1194. >>> format_eng(1000000) # for self.accuracy = 1,
  1195. # self.use_eng_prefix = True
  1196. ' 1.0M'
  1197. >>> format_eng("-1e-6") # for self.accuracy = 2
  1198. # self.use_eng_prefix = False
  1199. '-1.00E-06'
  1200. @param num: the value to represent
  1201. @type num: either a numeric value or a string that can be converted to
  1202. a numeric value (as per decimal.Decimal constructor)
  1203. @return: engineering formatted string
  1204. """
  1205. import decimal
  1206. import math
  1207. dnum = decimal.Decimal(str(num))
  1208. if decimal.Decimal.is_nan(dnum):
  1209. return 'NaN'
  1210. if decimal.Decimal.is_infinite(dnum):
  1211. return 'inf'
  1212. sign = 1
  1213. if dnum < 0: # pragma: no cover
  1214. sign = -1
  1215. dnum = -dnum
  1216. if dnum != 0:
  1217. pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3))
  1218. else:
  1219. pow10 = decimal.Decimal(0)
  1220. pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))
  1221. pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))
  1222. int_pow10 = int(pow10)
  1223. if self.use_eng_prefix:
  1224. prefix = self.ENG_PREFIXES[int_pow10]
  1225. else:
  1226. if int_pow10 < 0:
  1227. prefix = 'E-{pow10:02d}'.format(pow10=-int_pow10)
  1228. else:
  1229. prefix = 'E+{pow10:02d}'.format(pow10=int_pow10)
  1230. mant = sign * dnum / (10**pow10)
  1231. if self.accuracy is None: # pragma: no cover
  1232. format_str = u("{mant: g}{prefix}")
  1233. else:
  1234. format_str = (u("{{mant: .{acc:d}f}}{{prefix}}")
  1235. .format(acc=self.accuracy))
  1236. formatted = format_str.format(mant=mant, prefix=prefix)
  1237. return formatted # .strip()
  1238. def set_eng_float_format(accuracy=3, use_eng_prefix=False):
  1239. """
  1240. Alter default behavior on how float is formatted in DataFrame.
  1241. Format float in engineering format. By accuracy, we mean the number of
  1242. decimal digits after the floating point.
  1243. See also EngFormatter.
  1244. """
  1245. set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))
  1246. set_option("display.column_space", max(12, accuracy + 9))
  1247. def _binify(cols, line_width):
  1248. adjoin_width = 1
  1249. bins = []
  1250. curr_width = 0
  1251. i_last_column = len(cols) - 1
  1252. for i, w in enumerate(cols):
  1253. w_adjoined = w + adjoin_width
  1254. curr_width += w_adjoined
  1255. if i_last_column == i:
  1256. wrap = curr_width + 1 > line_width and i > 0
  1257. else:
  1258. wrap = curr_width + 2 > line_width and i > 0
  1259. if wrap:
  1260. bins.append(i)
  1261. curr_width = w_adjoined
  1262. bins.append(len(cols))
  1263. return bins
  1264. def get_level_lengths(levels, sentinel=''):
  1265. """For each index in each level the function returns lengths of indexes.
  1266. Parameters
  1267. ----------
  1268. levels : list of lists
  1269. List of values on for level.
  1270. sentinel : string, optional
  1271. Value which states that no new index starts on there.
  1272. Returns
  1273. ----------
  1274. Returns list of maps. For each level returns map of indexes (key is index
  1275. in row and value is length of index).
  1276. """
  1277. if len(levels) == 0:
  1278. return []
  1279. control = [True] * len(levels[0])
  1280. result = []
  1281. for level in levels:
  1282. last_index = 0
  1283. lengths = {}
  1284. for i, key in enumerate(level):
  1285. if control[i] and key == sentinel:
  1286. pass
  1287. else:
  1288. control[i] = False
  1289. lengths[last_index] = i - last_index
  1290. last_index = i
  1291. lengths[last_index] = len(level) - last_index
  1292. result.append(lengths)
  1293. return result
  1294. def buffer_put_lines(buf, lines):
  1295. """
  1296. Appends lines to a buffer.
  1297. Parameters
  1298. ----------
  1299. buf
  1300. The buffer to write to
  1301. lines
  1302. The lines to append.
  1303. """
  1304. if any(isinstance(x, compat.text_type) for x in lines):
  1305. lines = [compat.text_type(x) for x in lines]
  1306. buf.write('\n'.join(lines))