printing.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. """
  2. printing tools
  3. """
  4. import sys
  5. from pandas.compat import u
  6. from pandas.core.dtypes.inference import is_sequence
  7. from pandas import compat
  8. from pandas.core.config import get_option
  9. def adjoin(space, *lists, **kwargs):
  10. """
  11. Glues together two sets of strings using the amount of space requested.
  12. The idea is to prettify.
  13. ----------
  14. space : int
  15. number of spaces for padding
  16. lists : str
  17. list of str which being joined
  18. strlen : callable
  19. function used to calculate the length of each str. Needed for unicode
  20. handling.
  21. justfunc : callable
  22. function used to justify str. Needed for unicode handling.
  23. """
  24. strlen = kwargs.pop('strlen', len)
  25. justfunc = kwargs.pop('justfunc', justify)
  26. out_lines = []
  27. newLists = []
  28. lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
  29. # not the last one
  30. lengths.append(max(map(len, lists[-1])))
  31. maxLen = max(map(len, lists))
  32. for i, lst in enumerate(lists):
  33. nl = justfunc(lst, lengths[i], mode='left')
  34. nl.extend([' ' * lengths[i]] * (maxLen - len(lst)))
  35. newLists.append(nl)
  36. toJoin = zip(*newLists)
  37. for lines in toJoin:
  38. out_lines.append(_join_unicode(lines))
  39. return _join_unicode(out_lines, sep='\n')
  40. def justify(texts, max_len, mode='right'):
  41. """
  42. Perform ljust, center, rjust against string or list-like
  43. """
  44. if mode == 'left':
  45. return [x.ljust(max_len) for x in texts]
  46. elif mode == 'center':
  47. return [x.center(max_len) for x in texts]
  48. else:
  49. return [x.rjust(max_len) for x in texts]
  50. def _join_unicode(lines, sep=''):
  51. try:
  52. return sep.join(lines)
  53. except UnicodeDecodeError:
  54. sep = compat.text_type(sep)
  55. return sep.join([x.decode('utf-8') if isinstance(x, str) else x
  56. for x in lines])
  57. # Unicode consolidation
  58. # ---------------------
  59. #
  60. # pprinting utility functions for generating Unicode text or
  61. # bytes(3.x)/str(2.x) representations of objects.
  62. # Try to use these as much as possible rather then rolling your own.
  63. #
  64. # When to use
  65. # -----------
  66. #
  67. # 1) If you're writing code internal to pandas (no I/O directly involved),
  68. # use pprint_thing().
  69. #
  70. # It will always return unicode text which can handled by other
  71. # parts of the package without breakage.
  72. #
  73. # 2) if you need to write something out to file, use
  74. # pprint_thing_encoded(encoding).
  75. #
  76. # If no encoding is specified, it defaults to utf-8. Since encoding pure
  77. # ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
  78. # working with straight ascii.
  79. def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
  80. """
  81. internal. pprinter for iterables. you should probably use pprint_thing()
  82. rather then calling this directly.
  83. bounds length of printed sequence, depending on options
  84. """
  85. if isinstance(seq, set):
  86. fmt = u("{{{body}}}")
  87. else:
  88. fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})")
  89. if max_seq_items is False:
  90. nitems = len(seq)
  91. else:
  92. nitems = max_seq_items or get_option("max_seq_items") or len(seq)
  93. s = iter(seq)
  94. # handle sets, no slicing
  95. r = [pprint_thing(next(s),
  96. _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
  97. for i in range(min(nitems, len(seq)))]
  98. body = ", ".join(r)
  99. if nitems < len(seq):
  100. body += ", ..."
  101. elif isinstance(seq, tuple) and len(seq) == 1:
  102. body += ','
  103. return fmt.format(body=body)
  104. def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
  105. """
  106. internal. pprinter for iterables. you should probably use pprint_thing()
  107. rather then calling this directly.
  108. """
  109. fmt = u("{{{things}}}")
  110. pairs = []
  111. pfmt = u("{key}: {val}")
  112. if max_seq_items is False:
  113. nitems = len(seq)
  114. else:
  115. nitems = max_seq_items or get_option("max_seq_items") or len(seq)
  116. for k, v in list(seq.items())[:nitems]:
  117. pairs.append(
  118. pfmt.format(
  119. key=pprint_thing(k, _nest_lvl + 1,
  120. max_seq_items=max_seq_items, **kwds),
  121. val=pprint_thing(v, _nest_lvl + 1,
  122. max_seq_items=max_seq_items, **kwds)))
  123. if nitems < len(seq):
  124. return fmt.format(things=", ".join(pairs) + ", ...")
  125. else:
  126. return fmt.format(things=", ".join(pairs))
  127. def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
  128. quote_strings=False, max_seq_items=None):
  129. """
  130. This function is the sanctioned way of converting objects
  131. to a unicode representation.
  132. properly handles nested sequences containing unicode strings
  133. (unicode(object) does not)
  134. Parameters
  135. ----------
  136. thing : anything to be formatted
  137. _nest_lvl : internal use only. pprint_thing() is mutually-recursive
  138. with pprint_sequence, this argument is used to keep track of the
  139. current nesting level, and limit it.
  140. escape_chars : list or dict, optional
  141. Characters to escape. If a dict is passed the values are the
  142. replacements
  143. default_escapes : bool, default False
  144. Whether the input escape characters replaces or adds to the defaults
  145. max_seq_items : False, int, default None
  146. Pass thru to other pretty printers to limit sequence printing
  147. Returns
  148. -------
  149. result - unicode object on py2, str on py3. Always Unicode.
  150. """
  151. def as_escaped_unicode(thing, escape_chars=escape_chars):
  152. # Unicode is fine, else we try to decode using utf-8 and 'replace'
  153. # if that's not it either, we have no way of knowing and the user
  154. # should deal with it himself.
  155. try:
  156. result = compat.text_type(thing) # we should try this first
  157. except UnicodeDecodeError:
  158. # either utf-8 or we replace errors
  159. result = str(thing).decode('utf-8', "replace")
  160. translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', }
  161. if isinstance(escape_chars, dict):
  162. if default_escapes:
  163. translate.update(escape_chars)
  164. else:
  165. translate = escape_chars
  166. escape_chars = list(escape_chars.keys())
  167. else:
  168. escape_chars = escape_chars or tuple()
  169. for c in escape_chars:
  170. result = result.replace(c, translate[c])
  171. return compat.text_type(result)
  172. if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'):
  173. return compat.text_type(thing)
  174. elif (isinstance(thing, dict) and
  175. _nest_lvl < get_option("display.pprint_nest_depth")):
  176. result = _pprint_dict(thing, _nest_lvl, quote_strings=True,
  177. max_seq_items=max_seq_items)
  178. elif (is_sequence(thing) and
  179. _nest_lvl < get_option("display.pprint_nest_depth")):
  180. result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
  181. quote_strings=quote_strings,
  182. max_seq_items=max_seq_items)
  183. elif isinstance(thing, compat.string_types) and quote_strings:
  184. if compat.PY3:
  185. fmt = u("'{thing}'")
  186. else:
  187. fmt = u("u'{thing}'")
  188. result = fmt.format(thing=as_escaped_unicode(thing))
  189. else:
  190. result = as_escaped_unicode(thing)
  191. return compat.text_type(result) # always unicode
  192. def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds):
  193. value = pprint_thing(object) # get unicode representation of object
  194. return value.encode(encoding, errors, **kwds)
  195. def _enable_data_resource_formatter(enable):
  196. if 'IPython' not in sys.modules:
  197. # definitely not in IPython
  198. return
  199. from IPython import get_ipython
  200. ip = get_ipython()
  201. if ip is None:
  202. # still not in IPython
  203. return
  204. formatters = ip.display_formatter.formatters
  205. mimetype = "application/vnd.dataresource+json"
  206. if enable:
  207. if mimetype not in formatters:
  208. # define tableschema formatter
  209. from IPython.core.formatters import BaseFormatter
  210. class TableSchemaFormatter(BaseFormatter):
  211. print_method = '_repr_data_resource_'
  212. _return_type = (dict,)
  213. # register it:
  214. formatters[mimetype] = TableSchemaFormatter()
  215. # enable it if it's been disabled:
  216. formatters[mimetype].enabled = True
  217. else:
  218. # unregister tableschema mime-type
  219. if mimetype in formatters:
  220. formatters[mimetype].enabled = False
  221. default_pprint = lambda x, max_seq_items=None: \
  222. pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True,
  223. max_seq_items=max_seq_items)
  224. def format_object_summary(obj, formatter, is_justify=True, name=None,
  225. indent_for_name=True):
  226. """
  227. Return the formatted obj as a unicode string
  228. Parameters
  229. ----------
  230. obj : object
  231. must be iterable and support __getitem__
  232. formatter : callable
  233. string formatter for an element
  234. is_justify : boolean
  235. should justify the display
  236. name : name, optional
  237. defaults to the class name of the obj
  238. indent_for_name : bool, default True
  239. Whether subsequent lines should be be indented to
  240. align with the name.
  241. Returns
  242. -------
  243. summary string
  244. """
  245. from pandas.io.formats.console import get_console_size
  246. from pandas.io.formats.format import _get_adjustment
  247. display_width, _ = get_console_size()
  248. if display_width is None:
  249. display_width = get_option('display.width') or 80
  250. if name is None:
  251. name = obj.__class__.__name__
  252. if indent_for_name:
  253. name_len = len(name)
  254. space1 = "\n%s" % (' ' * (name_len + 1))
  255. space2 = "\n%s" % (' ' * (name_len + 2))
  256. else:
  257. space1 = "\n"
  258. space2 = "\n " # space for the opening '['
  259. n = len(obj)
  260. sep = ','
  261. max_seq_items = get_option('display.max_seq_items') or n
  262. # are we a truncated display
  263. is_truncated = n > max_seq_items
  264. # adj can optionally handle unicode eastern asian width
  265. adj = _get_adjustment()
  266. def _extend_line(s, line, value, display_width, next_line_prefix):
  267. if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >=
  268. display_width):
  269. s += line.rstrip()
  270. line = next_line_prefix
  271. line += value
  272. return s, line
  273. def best_len(values):
  274. if values:
  275. return max(adj.len(x) for x in values)
  276. else:
  277. return 0
  278. close = u', '
  279. if n == 0:
  280. summary = u'[]{}'.format(close)
  281. elif n == 1:
  282. first = formatter(obj[0])
  283. summary = u'[{}]{}'.format(first, close)
  284. elif n == 2:
  285. first = formatter(obj[0])
  286. last = formatter(obj[-1])
  287. summary = u'[{}, {}]{}'.format(first, last, close)
  288. else:
  289. if n > max_seq_items:
  290. n = min(max_seq_items // 2, 10)
  291. head = [formatter(x) for x in obj[:n]]
  292. tail = [formatter(x) for x in obj[-n:]]
  293. else:
  294. head = []
  295. tail = [formatter(x) for x in obj]
  296. # adjust all values to max length if needed
  297. if is_justify:
  298. # however, if we are not truncated and we are only a single
  299. # line, then don't justify
  300. if (is_truncated or
  301. not (len(', '.join(head)) < display_width and
  302. len(', '.join(tail)) < display_width)):
  303. max_len = max(best_len(head), best_len(tail))
  304. head = [x.rjust(max_len) for x in head]
  305. tail = [x.rjust(max_len) for x in tail]
  306. summary = ""
  307. line = space2
  308. for i in range(len(head)):
  309. word = head[i] + sep + ' '
  310. summary, line = _extend_line(summary, line, word,
  311. display_width, space2)
  312. if is_truncated:
  313. # remove trailing space of last line
  314. summary += line.rstrip() + space2 + '...'
  315. line = space2
  316. for i in range(len(tail) - 1):
  317. word = tail[i] + sep + ' '
  318. summary, line = _extend_line(summary, line, word,
  319. display_width, space2)
  320. # last value: no sep added + 1 space of width used for trailing ','
  321. summary, line = _extend_line(summary, line, tail[-1],
  322. display_width - 2, space2)
  323. summary += line
  324. # right now close is either '' or ', '
  325. # Now we want to include the ']', but not the maybe space.
  326. close = ']' + close.rstrip(' ')
  327. summary += close
  328. if len(summary) > (display_width):
  329. summary += space1
  330. else: # one row
  331. summary += ' '
  332. # remove initial space
  333. summary = '[' + summary[len(space2):]
  334. return summary
  335. def format_object_attrs(obj):
  336. """
  337. Return a list of tuples of the (attr, formatted_value)
  338. for common attrs, including dtype, name, length
  339. Parameters
  340. ----------
  341. obj : object
  342. must be iterable
  343. Returns
  344. -------
  345. list
  346. """
  347. attrs = []
  348. if hasattr(obj, 'dtype'):
  349. attrs.append(('dtype', "'{}'".format(obj.dtype)))
  350. if getattr(obj, 'name', None) is not None:
  351. attrs.append(('name', default_pprint(obj.name)))
  352. max_seq_items = get_option('display.max_seq_items') or len(obj)
  353. if len(obj) > max_seq_items:
  354. attrs.append(('length', len(obj)))
  355. return attrs