tabulate.py 57 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514
  1. # -*- coding: utf-8 -*-
  2. """Pretty-print tabular data."""
  3. from __future__ import print_function
  4. from __future__ import unicode_literals
  5. from collections import namedtuple, Iterable
  6. from platform import python_version_tuple
  7. import re
  8. import math
  9. if python_version_tuple()[0] < "3":
  10. from itertools import izip_longest
  11. from functools import partial
  12. _none_type = type(None)
  13. _bool_type = bool
  14. _int_type = int
  15. _long_type = long
  16. _float_type = float
  17. _text_type = unicode
  18. _binary_type = str
  19. def _is_file(f):
  20. return isinstance(f, file)
  21. else:
  22. from itertools import zip_longest as izip_longest
  23. from functools import reduce, partial
  24. _none_type = type(None)
  25. _bool_type = bool
  26. _int_type = int
  27. _long_type = int
  28. _float_type = float
  29. _text_type = str
  30. _binary_type = bytes
  31. basestring = str
  32. import io
  33. def _is_file(f):
  34. return isinstance(f, io.IOBase)
  35. try:
  36. import wcwidth # optional wide-character (CJK) support
  37. except ImportError:
  38. wcwidth = None
  39. __all__ = ["tabulate", "tabulate_formats", "simple_separated_format"]
  40. __version__ = "0.8.2"
  41. # minimum extra space in headers
  42. MIN_PADDING = 2
  43. # Whether or not to preserve leading/trailing whitespace in data.
  44. PRESERVE_WHITESPACE = False
  45. _DEFAULT_FLOATFMT="g"
  46. _DEFAULT_MISSINGVAL=""
  47. # if True, enable wide-character (CJK) support
  48. WIDE_CHARS_MODE = wcwidth is not None
  49. Line = namedtuple("Line", ["begin", "hline", "sep", "end"])
  50. DataRow = namedtuple("DataRow", ["begin", "sep", "end"])
  51. # A table structure is suppposed to be:
  52. #
  53. # --- lineabove ---------
  54. # headerrow
  55. # --- linebelowheader ---
  56. # datarow
  57. # --- linebewteenrows ---
  58. # ... (more datarows) ...
  59. # --- linebewteenrows ---
  60. # last datarow
  61. # --- linebelow ---------
  62. #
  63. # TableFormat's line* elements can be
  64. #
  65. # - either None, if the element is not used,
  66. # - or a Line tuple,
  67. # - or a function: [col_widths], [col_alignments] -> string.
  68. #
  69. # TableFormat's *row elements can be
  70. #
  71. # - either None, if the element is not used,
  72. # - or a DataRow tuple,
  73. # - or a function: [cell_values], [col_widths], [col_alignments] -> string.
  74. #
  75. # padding (an integer) is the amount of white space around data values.
  76. #
  77. # with_header_hide:
  78. #
  79. # - either None, to display all table elements unconditionally,
  80. # - or a list of elements not to be displayed if the table has column headers.
  81. #
  82. TableFormat = namedtuple("TableFormat", ["lineabove", "linebelowheader",
  83. "linebetweenrows", "linebelow",
  84. "headerrow", "datarow",
  85. "padding", "with_header_hide"])
  86. def _pipe_segment_with_colons(align, colwidth):
  87. """Return a segment of a horizontal line with optional colons which
  88. indicate column's alignment (as in `pipe` output format)."""
  89. w = colwidth
  90. if align in ["right", "decimal"]:
  91. return ('-' * (w - 1)) + ":"
  92. elif align == "center":
  93. return ":" + ('-' * (w - 2)) + ":"
  94. elif align == "left":
  95. return ":" + ('-' * (w - 1))
  96. else:
  97. return '-' * w
  98. def _pipe_line_with_colons(colwidths, colaligns):
  99. """Return a horizontal line with optional colons to indicate column's
  100. alignment (as in `pipe` output format)."""
  101. segments = [_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)]
  102. return "|" + "|".join(segments) + "|"
  103. def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns):
  104. alignment = { "left": '',
  105. "right": 'align="right"| ',
  106. "center": 'align="center"| ',
  107. "decimal": 'align="right"| ' }
  108. # hard-coded padding _around_ align attribute and value together
  109. # rather than padding parameter which affects only the value
  110. values_with_attrs = [' ' + alignment.get(a, '') + c + ' '
  111. for c, a in zip(cell_values, colaligns)]
  112. colsep = separator*2
  113. return (separator + colsep.join(values_with_attrs)).rstrip()
  114. def _textile_row_with_attrs(cell_values, colwidths, colaligns):
  115. cell_values[0] += ' '
  116. alignment = { "left": "<.", "right": ">.", "center": "=.", "decimal": ">." }
  117. values = (alignment.get(a, '') + v for a, v in zip(colaligns, cell_values))
  118. return '|' + '|'.join(values) + '|'
  119. def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore):
  120. # this table header will be suppressed if there is a header row
  121. return "\n".join(["<table>", "<tbody>"])
  122. def _html_row_with_attrs(celltag, cell_values, colwidths, colaligns):
  123. alignment = { "left": '',
  124. "right": ' style="text-align: right;"',
  125. "center": ' style="text-align: center;"',
  126. "decimal": ' style="text-align: right;"' }
  127. values_with_attrs = ["<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ''), c)
  128. for c, a in zip(cell_values, colaligns)]
  129. rowhtml = "<tr>" + "".join(values_with_attrs).rstrip() + "</tr>"
  130. if celltag == "th": # it's a header row, create a new table header
  131. rowhtml = "\n".join(["<table>",
  132. "<thead>",
  133. rowhtml,
  134. "</thead>",
  135. "<tbody>"])
  136. return rowhtml
  137. def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, header=''):
  138. alignment = { "left": '',
  139. "right": '<style="text-align: right;">',
  140. "center": '<style="text-align: center;">',
  141. "decimal": '<style="text-align: right;">' }
  142. values_with_attrs = ["{0}{1} {2} ".format(celltag,
  143. alignment.get(a, ''),
  144. header+c+header)
  145. for c, a in zip(cell_values, colaligns)]
  146. return "".join(values_with_attrs)+"||"
  147. def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False):
  148. alignment = { "left": "l", "right": "r", "center": "c", "decimal": "r" }
  149. tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns])
  150. return "\n".join(["\\begin{tabular}{" + tabular_columns_fmt + "}",
  151. "\\toprule" if booktabs else "\hline"])
  152. LATEX_ESCAPE_RULES = {r"&": r"\&", r"%": r"\%", r"$": r"\$", r"#": r"\#",
  153. r"_": r"\_", r"^": r"\^{}", r"{": r"\{", r"}": r"\}",
  154. r"~": r"\textasciitilde{}", "\\": r"\textbackslash{}",
  155. r"<": r"\ensuremath{<}", r">": r"\ensuremath{>}"}
  156. def _latex_row(cell_values, colwidths, colaligns, escrules=LATEX_ESCAPE_RULES):
  157. def escape_char(c):
  158. return escrules.get(c, c)
  159. escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values]
  160. rowfmt = DataRow("", "&", "\\\\")
  161. return _build_simple_row(escaped_values, rowfmt)
  162. def _rst_escape_first_column(rows, headers):
  163. def escape_empty(val):
  164. if isinstance(val, (_text_type, _binary_type)) and not val.strip():
  165. return ".."
  166. else:
  167. return val
  168. new_headers = list(headers)
  169. new_rows = []
  170. if headers:
  171. new_headers[0] = escape_empty(headers[0])
  172. for row in rows:
  173. new_row = list(row)
  174. if new_row:
  175. new_row[0] = escape_empty(row[0])
  176. new_rows.append(new_row)
  177. return new_rows, new_headers
  178. _table_formats = {"simple":
  179. TableFormat(lineabove=Line("", "-", " ", ""),
  180. linebelowheader=Line("", "-", " ", ""),
  181. linebetweenrows=None,
  182. linebelow=Line("", "-", " ", ""),
  183. headerrow=DataRow("", " ", ""),
  184. datarow=DataRow("", " ", ""),
  185. padding=0,
  186. with_header_hide=["lineabove", "linebelow"]),
  187. "plain":
  188. TableFormat(lineabove=None, linebelowheader=None,
  189. linebetweenrows=None, linebelow=None,
  190. headerrow=DataRow("", " ", ""),
  191. datarow=DataRow("", " ", ""),
  192. padding=0, with_header_hide=None),
  193. "grid":
  194. TableFormat(lineabove=Line("+", "-", "+", "+"),
  195. linebelowheader=Line("+", "=", "+", "+"),
  196. linebetweenrows=Line("+", "-", "+", "+"),
  197. linebelow=Line("+", "-", "+", "+"),
  198. headerrow=DataRow("|", "|", "|"),
  199. datarow=DataRow("|", "|", "|"),
  200. padding=1, with_header_hide=None),
  201. "fancy_grid":
  202. TableFormat(lineabove=Line("╒", "═", "╤", "╕"),
  203. linebelowheader=Line("╞", "═", "╪", "╡"),
  204. linebetweenrows=Line("├", "─", "┼", "┤"),
  205. linebelow=Line("╘", "═", "╧", "╛"),
  206. headerrow=DataRow("│", "│", "│"),
  207. datarow=DataRow("│", "│", "│"),
  208. padding=1, with_header_hide=None),
  209. "pipe":
  210. TableFormat(lineabove=_pipe_line_with_colons,
  211. linebelowheader=_pipe_line_with_colons,
  212. linebetweenrows=None,
  213. linebelow=None,
  214. headerrow=DataRow("|", "|", "|"),
  215. datarow=DataRow("|", "|", "|"),
  216. padding=1,
  217. with_header_hide=["lineabove"]),
  218. "orgtbl":
  219. TableFormat(lineabove=None,
  220. linebelowheader=Line("|", "-", "+", "|"),
  221. linebetweenrows=None,
  222. linebelow=None,
  223. headerrow=DataRow("|", "|", "|"),
  224. datarow=DataRow("|", "|", "|"),
  225. padding=1, with_header_hide=None),
  226. "jira":
  227. TableFormat(lineabove=None,
  228. linebelowheader=None,
  229. linebetweenrows=None,
  230. linebelow=None,
  231. headerrow=DataRow("||", "||", "||"),
  232. datarow=DataRow("|", "|", "|"),
  233. padding=1, with_header_hide=None),
  234. "presto":
  235. TableFormat(lineabove=None,
  236. linebelowheader=Line("", "-", "+", ""),
  237. linebetweenrows=None,
  238. linebelow=None,
  239. headerrow=DataRow("", "|", ""),
  240. datarow=DataRow("", "|", ""),
  241. padding=1, with_header_hide=None),
  242. "psql":
  243. TableFormat(lineabove=Line("+", "-", "+", "+"),
  244. linebelowheader=Line("|", "-", "+", "|"),
  245. linebetweenrows=None,
  246. linebelow=Line("+", "-", "+", "+"),
  247. headerrow=DataRow("|", "|", "|"),
  248. datarow=DataRow("|", "|", "|"),
  249. padding=1, with_header_hide=None),
  250. "rst":
  251. TableFormat(lineabove=Line("", "=", " ", ""),
  252. linebelowheader=Line("", "=", " ", ""),
  253. linebetweenrows=None,
  254. linebelow=Line("", "=", " ", ""),
  255. headerrow=DataRow("", " ", ""),
  256. datarow=DataRow("", " ", ""),
  257. padding=0, with_header_hide=None),
  258. "mediawiki":
  259. TableFormat(lineabove=Line("{| class=\"wikitable\" style=\"text-align: left;\"",
  260. "", "", "\n|+ <!-- caption -->\n|-"),
  261. linebelowheader=Line("|-", "", "", ""),
  262. linebetweenrows=Line("|-", "", "", ""),
  263. linebelow=Line("|}", "", "", ""),
  264. headerrow=partial(_mediawiki_row_with_attrs, "!"),
  265. datarow=partial(_mediawiki_row_with_attrs, "|"),
  266. padding=0, with_header_hide=None),
  267. "moinmoin":
  268. TableFormat(lineabove=None,
  269. linebelowheader=None,
  270. linebetweenrows=None,
  271. linebelow=None,
  272. headerrow=partial(_moin_row_with_attrs,"||",header="'''"),
  273. datarow=partial(_moin_row_with_attrs,"||"),
  274. padding=1, with_header_hide=None),
  275. "youtrack":
  276. TableFormat(lineabove=None,
  277. linebelowheader=None,
  278. linebetweenrows=None,
  279. linebelow=None,
  280. headerrow=DataRow("|| ", " || ", " || "),
  281. datarow=DataRow("| ", " | ", " |"),
  282. padding=1, with_header_hide=None),
  283. "html":
  284. TableFormat(lineabove=_html_begin_table_without_header,
  285. linebelowheader="",
  286. linebetweenrows=None,
  287. linebelow=Line("</tbody>\n</table>", "", "", ""),
  288. headerrow=partial(_html_row_with_attrs, "th"),
  289. datarow=partial(_html_row_with_attrs, "td"),
  290. padding=0, with_header_hide=["lineabove"]),
  291. "latex":
  292. TableFormat(lineabove=_latex_line_begin_tabular,
  293. linebelowheader=Line("\\hline", "", "", ""),
  294. linebetweenrows=None,
  295. linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
  296. headerrow=_latex_row,
  297. datarow=_latex_row,
  298. padding=1, with_header_hide=None),
  299. "latex_raw":
  300. TableFormat(lineabove=_latex_line_begin_tabular,
  301. linebelowheader=Line("\\hline", "", "", ""),
  302. linebetweenrows=None,
  303. linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
  304. headerrow=partial(_latex_row, escrules={}),
  305. datarow=partial(_latex_row, escrules={}),
  306. padding=1, with_header_hide=None),
  307. "latex_booktabs":
  308. TableFormat(lineabove=partial(_latex_line_begin_tabular, booktabs=True),
  309. linebelowheader=Line("\\midrule", "", "", ""),
  310. linebetweenrows=None,
  311. linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", ""),
  312. headerrow=_latex_row,
  313. datarow=_latex_row,
  314. padding=1, with_header_hide=None),
  315. "tsv":
  316. TableFormat(lineabove=None, linebelowheader=None,
  317. linebetweenrows=None, linebelow=None,
  318. headerrow=DataRow("", "\t", ""),
  319. datarow=DataRow("", "\t", ""),
  320. padding=0, with_header_hide=None),
  321. "textile":
  322. TableFormat(lineabove=None, linebelowheader=None,
  323. linebetweenrows=None, linebelow=None,
  324. headerrow=DataRow("|_. ", "|_.", "|"),
  325. datarow=_textile_row_with_attrs,
  326. padding=1, with_header_hide=None)}
  327. tabulate_formats = list(sorted(_table_formats.keys()))
  328. # The table formats for which multiline cells will be folded into subsequent
  329. # table rows. The key is the original format specified at the API. The value is
  330. # the format that will be used to represent the original format.
  331. multiline_formats = {
  332. "plain": "plain",
  333. "simple": "simple",
  334. "grid": "grid",
  335. "fancy_grid": "fancy_grid",
  336. "pipe": "pipe",
  337. "orgtbl": "orgtbl",
  338. "jira": "jira",
  339. "presto": "presto",
  340. "psql": "psql",
  341. "rst": "rst",
  342. }
  343. # TODO: Add multiline support for the remaining table formats:
  344. # - mediawiki: Replace \n with <br>
  345. # - moinmoin: TBD
  346. # - youtrack: TBD
  347. # - html: Replace \n with <br>
  348. # - latex*: Use "makecell" package: In header, replace X\nY with
  349. # \thead{X\\Y} and in data row, replace X\nY with \makecell{X\\Y}
  350. # - tsv: TBD
  351. # - textile: Replace \n with <br/> (must be well-formed XML)
  352. _multiline_codes = re.compile(r"\r|\n|\r\n")
  353. _multiline_codes_bytes = re.compile(b"\r|\n|\r\n")
  354. _invisible_codes = re.compile(r"\x1b\[\d+[;\d]*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes
  355. _invisible_codes_bytes = re.compile(b"\x1b\[\d+[;\d]*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes
  356. def simple_separated_format(separator):
  357. """Construct a simple TableFormat with columns separated by a separator.
  358. >>> tsv = simple_separated_format("\\t") ; \
  359. tabulate([["foo", 1], ["spam", 23]], tablefmt=tsv) == 'foo \\t 1\\nspam\\t23'
  360. True
  361. """
  362. return TableFormat(None, None, None, None,
  363. headerrow=DataRow('', separator, ''),
  364. datarow=DataRow('', separator, ''),
  365. padding=0, with_header_hide=None)
  366. def _isconvertible(conv, string):
  367. try:
  368. n = conv(string)
  369. return True
  370. except (ValueError, TypeError):
  371. return False
  372. def _isnumber(string):
  373. """
  374. >>> _isnumber("123.45")
  375. True
  376. >>> _isnumber("123")
  377. True
  378. >>> _isnumber("spam")
  379. False
  380. >>> _isnumber("123e45678")
  381. False
  382. >>> _isnumber("inf")
  383. True
  384. """
  385. if not _isconvertible(float, string):
  386. return False
  387. elif isinstance(string, (_text_type, _binary_type)) and (
  388. math.isinf(float(string)) or math.isnan(float(string))):
  389. return string.lower() in ['inf', '-inf', 'nan']
  390. return True
  391. def _isint(string, inttype=int):
  392. """
  393. >>> _isint("123")
  394. True
  395. >>> _isint("123.45")
  396. False
  397. """
  398. return type(string) is inttype or\
  399. (isinstance(string, _binary_type) or isinstance(string, _text_type))\
  400. and\
  401. _isconvertible(inttype, string)
  402. def _isbool(string):
  403. """
  404. >>> _isbool(True)
  405. True
  406. >>> _isbool("False")
  407. True
  408. >>> _isbool(1)
  409. False
  410. """
  411. return type(string) is _bool_type or\
  412. (isinstance(string, (_binary_type, _text_type))\
  413. and\
  414. string in ("True", "False"))
  415. def _type(string, has_invisible=True, numparse=True):
  416. """The least generic type (type(None), int, float, str, unicode).
  417. >>> _type(None) is type(None)
  418. True
  419. >>> _type("foo") is type("")
  420. True
  421. >>> _type("1") is type(1)
  422. True
  423. >>> _type('\x1b[31m42\x1b[0m') is type(42)
  424. True
  425. >>> _type('\x1b[31m42\x1b[0m') is type(42)
  426. True
  427. """
  428. if has_invisible and \
  429. (isinstance(string, _text_type) or isinstance(string, _binary_type)):
  430. string = _strip_invisible(string)
  431. if string is None:
  432. return _none_type
  433. elif hasattr(string, "isoformat"): # datetime.datetime, date, and time
  434. return _text_type
  435. elif _isbool(string):
  436. return _bool_type
  437. elif _isint(string) and numparse:
  438. return int
  439. elif _isint(string, _long_type) and numparse:
  440. return int
  441. elif _isnumber(string) and numparse:
  442. return float
  443. elif isinstance(string, _binary_type):
  444. return _binary_type
  445. else:
  446. return _text_type
  447. def _afterpoint(string):
  448. """Symbols after a decimal point, -1 if the string lacks the decimal point.
  449. >>> _afterpoint("123.45")
  450. 2
  451. >>> _afterpoint("1001")
  452. -1
  453. >>> _afterpoint("eggs")
  454. -1
  455. >>> _afterpoint("123e45")
  456. 2
  457. """
  458. if _isnumber(string):
  459. if _isint(string):
  460. return -1
  461. else:
  462. pos = string.rfind(".")
  463. pos = string.lower().rfind("e") if pos < 0 else pos
  464. if pos >= 0:
  465. return len(string) - pos - 1
  466. else:
  467. return -1 # no point
  468. else:
  469. return -1 # not a number
  470. def _padleft(width, s):
  471. """Flush right.
  472. >>> _padleft(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430'
  473. True
  474. """
  475. fmt = "{0:>%ds}" % width
  476. return fmt.format(s)
  477. def _padright(width, s):
  478. """Flush left.
  479. >>> _padright(6, '\u044f\u0439\u0446\u0430') == '\u044f\u0439\u0446\u0430 '
  480. True
  481. """
  482. fmt = "{0:<%ds}" % width
  483. return fmt.format(s)
  484. def _padboth(width, s):
  485. """Center string.
  486. >>> _padboth(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430 '
  487. True
  488. """
  489. fmt = "{0:^%ds}" % width
  490. return fmt.format(s)
  491. def _padnone(ignore_width, s):
  492. return s
  493. def _strip_invisible(s):
  494. "Remove invisible ANSI color codes."
  495. if isinstance(s, _text_type):
  496. return re.sub(_invisible_codes, "", s)
  497. else: # a bytestring
  498. return re.sub(_invisible_codes_bytes, "", s)
  499. def _visible_width(s):
  500. """Visible width of a printed string. ANSI color codes are removed.
  501. >>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world")
  502. (5, 5)
  503. """
  504. # optional wide-character support
  505. if wcwidth is not None and WIDE_CHARS_MODE:
  506. len_fn = wcwidth.wcswidth
  507. else:
  508. len_fn = len
  509. if isinstance(s, _text_type) or isinstance(s, _binary_type):
  510. return len_fn(_strip_invisible(s))
  511. else:
  512. return len_fn(_text_type(s))
  513. def _is_multiline(s):
  514. if isinstance(s, _text_type):
  515. return bool(re.search(_multiline_codes, s))
  516. else: # a bytestring
  517. return bool(re.search(_multiline_codes_bytes, s))
  518. def _multiline_width(multiline_s, line_width_fn=len):
  519. """Visible width of a potentially multiline content."""
  520. return max(map(line_width_fn, re.split("[\r\n]", multiline_s)))
  521. def _choose_width_fn(has_invisible, enable_widechars, is_multiline):
  522. """Return a function to calculate visible cell width."""
  523. if has_invisible:
  524. line_width_fn = _visible_width
  525. elif enable_widechars: # optional wide-character support if available
  526. line_width_fn = wcwidth.wcswidth
  527. else:
  528. line_width_fn = len
  529. if is_multiline:
  530. width_fn = lambda s: _multiline_width(s, line_width_fn)
  531. else:
  532. width_fn = line_width_fn
  533. return width_fn
  534. def _align_column_choose_padfn(strings, alignment, has_invisible):
  535. if alignment == "right":
  536. if not PRESERVE_WHITESPACE:
  537. strings = [s.strip() for s in strings]
  538. padfn = _padleft
  539. elif alignment == "center":
  540. if not PRESERVE_WHITESPACE:
  541. strings = [s.strip() for s in strings]
  542. padfn = _padboth
  543. elif alignment == "decimal":
  544. if has_invisible:
  545. decimals = [_afterpoint(_strip_invisible(s)) for s in strings]
  546. else:
  547. decimals = [_afterpoint(s) for s in strings]
  548. maxdecimals = max(decimals)
  549. strings = [s + (maxdecimals - decs) * " "
  550. for s, decs in zip(strings, decimals)]
  551. padfn = _padleft
  552. elif not alignment:
  553. padfn = _padnone
  554. else:
  555. if not PRESERVE_WHITESPACE:
  556. strings = [s.strip() for s in strings]
  557. padfn = _padright
  558. return strings, padfn
  559. def _align_column(strings, alignment, minwidth=0,
  560. has_invisible=True, enable_widechars=False, is_multiline=False):
  561. """[string] -> [padded_string]"""
  562. strings, padfn = _align_column_choose_padfn(strings, alignment, has_invisible)
  563. width_fn = _choose_width_fn(has_invisible, enable_widechars, is_multiline)
  564. s_widths = list(map(width_fn, strings))
  565. maxwidth = max(max(s_widths), minwidth)
  566. # TODO: refactor column alignment in single-line and multiline modes
  567. if is_multiline:
  568. if not enable_widechars and not has_invisible:
  569. padded_strings = [
  570. "\n".join([padfn(maxwidth, s) for s in ms.splitlines()])
  571. for ms in strings]
  572. else:
  573. # enable wide-character width corrections
  574. s_lens = [max((len(s) for s in re.split("[\r\n]", ms))) for ms in strings]
  575. visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)]
  576. # wcswidth and _visible_width don't count invisible characters;
  577. # padfn doesn't need to apply another correction
  578. padded_strings = ["\n".join([padfn(w, s) for s in (ms.splitlines() or ms)])
  579. for ms, w in zip(strings, visible_widths)]
  580. else: # single-line cell values
  581. if not enable_widechars and not has_invisible:
  582. padded_strings = [padfn(maxwidth, s) for s in strings]
  583. else:
  584. # enable wide-character width corrections
  585. s_lens = list(map(len, strings))
  586. visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)]
  587. # wcswidth and _visible_width don't count invisible characters;
  588. # padfn doesn't need to apply another correction
  589. padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)]
  590. return padded_strings
  591. def _more_generic(type1, type2):
  592. types = { _none_type: 0, _bool_type: 1, int: 2, float: 3, _binary_type: 4, _text_type: 5 }
  593. invtypes = { 5: _text_type, 4: _binary_type, 3: float, 2: int, 1: _bool_type, 0: _none_type }
  594. moregeneric = max(types.get(type1, 5), types.get(type2, 5))
  595. return invtypes[moregeneric]
  596. def _column_type(strings, has_invisible=True, numparse=True):
  597. """The least generic type all column values are convertible to.
  598. >>> _column_type([True, False]) is _bool_type
  599. True
  600. >>> _column_type(["1", "2"]) is _int_type
  601. True
  602. >>> _column_type(["1", "2.3"]) is _float_type
  603. True
  604. >>> _column_type(["1", "2.3", "four"]) is _text_type
  605. True
  606. >>> _column_type(["four", '\u043f\u044f\u0442\u044c']) is _text_type
  607. True
  608. >>> _column_type([None, "brux"]) is _text_type
  609. True
  610. >>> _column_type([1, 2, None]) is _int_type
  611. True
  612. >>> import datetime as dt
  613. >>> _column_type([dt.datetime(1991,2,19), dt.time(17,35)]) is _text_type
  614. True
  615. """
  616. types = [_type(s, has_invisible, numparse) for s in strings ]
  617. return reduce(_more_generic, types, _bool_type)
  618. def _format(val, valtype, floatfmt, missingval="", has_invisible=True):
  619. """Format a value accoding to its type.
  620. Unicode is supported:
  621. >>> hrow = ['\u0431\u0443\u043a\u0432\u0430', '\u0446\u0438\u0444\u0440\u0430'] ; \
  622. tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] ; \
  623. good_result = '\\u0431\\u0443\\u043a\\u0432\\u0430 \\u0446\\u0438\\u0444\\u0440\\u0430\\n------- -------\\n\\u0430\\u0437 2\\n\\u0431\\u0443\\u043a\\u0438 4' ; \
  624. tabulate(tbl, headers=hrow) == good_result
  625. True
  626. """
  627. if val is None:
  628. return missingval
  629. if valtype in [int, _text_type]:
  630. return "{0}".format(val)
  631. elif valtype is _binary_type:
  632. try:
  633. return _text_type(val, "ascii")
  634. except TypeError:
  635. return _text_type(val)
  636. elif valtype is float:
  637. is_a_colored_number = has_invisible and isinstance(val, (_text_type, _binary_type))
  638. if is_a_colored_number:
  639. raw_val = _strip_invisible(val)
  640. formatted_val = format(float(raw_val), floatfmt)
  641. return val.replace(raw_val, formatted_val)
  642. else:
  643. return format(float(val), floatfmt)
  644. else:
  645. return "{0}".format(val)
  646. def _align_header(header, alignment, width, visible_width, is_multiline=False, width_fn=None):
  647. "Pad string header to width chars given known visible_width of the header."
  648. if is_multiline:
  649. header_lines = re.split(_multiline_codes, header)
  650. padded_lines = [_align_header(h, alignment, width, width_fn(h)) for h in header_lines]
  651. return "\n".join(padded_lines)
  652. # else: not multiline
  653. ninvisible = len(header) - visible_width
  654. width += ninvisible
  655. if alignment == "left":
  656. return _padright(width, header)
  657. elif alignment == "center":
  658. return _padboth(width, header)
  659. elif not alignment:
  660. return "{0}".format(header)
  661. else:
  662. return _padleft(width, header)
  663. def _prepend_row_index(rows, index):
  664. """Add a left-most index column."""
  665. if index is None or index is False:
  666. return rows
  667. if len(index) != len(rows):
  668. print('index=', index)
  669. print('rows=', rows)
  670. raise ValueError('index must be as long as the number of data rows')
  671. rows = [[v]+list(row) for v,row in zip(index, rows)]
  672. return rows
  673. def _bool(val):
  674. "A wrapper around standard bool() which doesn't throw on NumPy arrays"
  675. try:
  676. return bool(val)
  677. except ValueError: # val is likely to be a numpy array with many elements
  678. return False
  679. def _normalize_tabular_data(tabular_data, headers, showindex="default"):
  680. """Transform a supported data type to a list of lists, and a list of headers.
  681. Supported tabular data types:
  682. * list-of-lists or another iterable of iterables
  683. * list of named tuples (usually used with headers="keys")
  684. * list of dicts (usually used with headers="keys")
  685. * list of OrderedDicts (usually used with headers="keys")
  686. * 2D NumPy arrays
  687. * NumPy record arrays (usually used with headers="keys")
  688. * dict of iterables (usually used with headers="keys")
  689. * pandas.DataFrame (usually used with headers="keys")
  690. The first row can be used as headers if headers="firstrow",
  691. column indices can be used as headers if headers="keys".
  692. If showindex="default", show row indices of the pandas.DataFrame.
  693. If showindex="always", show row indices for all types of data.
  694. If showindex="never", don't show row indices for all types of data.
  695. If showindex is an iterable, show its values as row indices.
  696. """
  697. try:
  698. bool(headers)
  699. is_headers2bool_broken = False
  700. except ValueError: # numpy.ndarray, pandas.core.index.Index, ...
  701. is_headers2bool_broken = True
  702. headers = list(headers)
  703. index = None
  704. if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"):
  705. # dict-like and pandas.DataFrame?
  706. if hasattr(tabular_data.values, "__call__"):
  707. # likely a conventional dict
  708. keys = tabular_data.keys()
  709. rows = list(izip_longest(*tabular_data.values())) # columns have to be transposed
  710. elif hasattr(tabular_data, "index"):
  711. # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
  712. keys = list(tabular_data)
  713. if tabular_data.index.name is not None:
  714. if isinstance(tabular_data.index.name, list):
  715. keys[:0] = tabular_data.index.name
  716. else:
  717. keys[:0] = [tabular_data.index.name]
  718. vals = tabular_data.values # values matrix doesn't need to be transposed
  719. # for DataFrames add an index per default
  720. index = list(tabular_data.index)
  721. rows = [list(row) for row in vals]
  722. else:
  723. raise ValueError("tabular data doesn't appear to be a dict or a DataFrame")
  724. if headers == "keys":
  725. headers = list(map(_text_type,keys)) # headers should be strings
  726. else: # it's a usual an iterable of iterables, or a NumPy array
  727. rows = list(tabular_data)
  728. if (headers == "keys" and not rows):
  729. # an empty table (issue #81)
  730. headers = []
  731. elif (headers == "keys" and
  732. hasattr(tabular_data, "dtype") and
  733. getattr(tabular_data.dtype, "names")):
  734. # numpy record array
  735. headers = tabular_data.dtype.names
  736. elif (headers == "keys"
  737. and len(rows) > 0
  738. and isinstance(rows[0], tuple)
  739. and hasattr(rows[0], "_fields")):
  740. # namedtuple
  741. headers = list(map(_text_type, rows[0]._fields))
  742. elif (len(rows) > 0
  743. and isinstance(rows[0], dict)):
  744. # dict or OrderedDict
  745. uniq_keys = set() # implements hashed lookup
  746. keys = [] # storage for set
  747. if headers == "firstrow":
  748. firstdict = rows[0] if len(rows) > 0 else {}
  749. keys.extend(firstdict.keys())
  750. uniq_keys.update(keys)
  751. rows = rows[1:]
  752. for row in rows:
  753. for k in row.keys():
  754. #Save unique items in input order
  755. if k not in uniq_keys:
  756. keys.append(k)
  757. uniq_keys.add(k)
  758. if headers == 'keys':
  759. headers = keys
  760. elif isinstance(headers, dict):
  761. # a dict of headers for a list of dicts
  762. headers = [headers.get(k, k) for k in keys]
  763. headers = list(map(_text_type, headers))
  764. elif headers == "firstrow":
  765. if len(rows) > 0:
  766. headers = [firstdict.get(k, k) for k in keys]
  767. headers = list(map(_text_type, headers))
  768. else:
  769. headers = []
  770. elif headers:
  771. raise ValueError('headers for a list of dicts is not a dict or a keyword')
  772. rows = [[row.get(k) for k in keys] for row in rows]
  773. elif (headers == "keys"
  774. and hasattr(tabular_data, "description")
  775. and hasattr(tabular_data, "fetchone")
  776. and hasattr(tabular_data, "rowcount")):
  777. # Python Database API cursor object (PEP 0249)
  778. # print tabulate(cursor, headers='keys')
  779. headers = [column[0] for column in tabular_data.description]
  780. elif headers == "keys" and len(rows) > 0:
  781. # keys are column indices
  782. headers = list(map(_text_type, range(len(rows[0]))))
  783. # take headers from the first row if necessary
  784. if headers == "firstrow" and len(rows) > 0:
  785. if index is not None:
  786. headers = [index[0]] + list(rows[0])
  787. index = index[1:]
  788. else:
  789. headers = rows[0]
  790. headers = list(map(_text_type, headers)) # headers should be strings
  791. rows = rows[1:]
  792. headers = list(map(_text_type,headers))
  793. rows = list(map(list,rows))
  794. # add or remove an index column
  795. showindex_is_a_str = type(showindex) in [_text_type, _binary_type]
  796. if showindex == "default" and index is not None:
  797. rows = _prepend_row_index(rows, index)
  798. elif isinstance(showindex, Iterable) and not showindex_is_a_str:
  799. rows = _prepend_row_index(rows, list(showindex))
  800. elif showindex == "always" or (_bool(showindex) and not showindex_is_a_str):
  801. if index is None:
  802. index = list(range(len(rows)))
  803. rows = _prepend_row_index(rows, index)
  804. elif showindex == "never" or (not _bool(showindex) and not showindex_is_a_str):
  805. pass
  806. # pad with empty headers for initial columns if necessary
  807. if headers and len(rows) > 0:
  808. nhs = len(headers)
  809. ncols = len(rows[0])
  810. if nhs < ncols:
  811. headers = [""]*(ncols - nhs) + headers
  812. return rows, headers
  813. def tabulate(tabular_data, headers=(), tablefmt="simple",
  814. floatfmt=_DEFAULT_FLOATFMT, numalign="decimal", stralign="left",
  815. missingval=_DEFAULT_MISSINGVAL, showindex="default", disable_numparse=False):
  816. """Format a fixed width table for pretty printing.
  817. >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]]))
  818. --- ---------
  819. 1 2.34
  820. -56 8.999
  821. 2 10001
  822. --- ---------
  823. The first required argument (`tabular_data`) can be a
  824. list-of-lists (or another iterable of iterables), a list of named
  825. tuples, a dictionary of iterables, an iterable of dictionaries,
  826. a two-dimensional NumPy array, NumPy record array, or a Pandas'
  827. dataframe.
  828. Table headers
  829. -------------
  830. To print nice column headers, supply the second argument (`headers`):
  831. - `headers` can be an explicit list of column headers
  832. - if `headers="firstrow"`, then the first row of data is used
  833. - if `headers="keys"`, then dictionary keys or column indices are used
  834. Otherwise a headerless table is produced.
  835. If the number of headers is less than the number of columns, they
  836. are supposed to be names of the last columns. This is consistent
  837. with the plain-text format of R and Pandas' dataframes.
  838. >>> print(tabulate([["sex","age"],["Alice","F",24],["Bob","M",19]],
  839. ... headers="firstrow"))
  840. sex age
  841. ----- ----- -----
  842. Alice F 24
  843. Bob M 19
  844. By default, pandas.DataFrame data have an additional column called
  845. row index. To add a similar column to all other types of data,
  846. use `showindex="always"` or `showindex=True`. To suppress row indices
  847. for all types of data, pass `showindex="never" or `showindex=False`.
  848. To add a custom row index column, pass `showindex=some_iterable`.
  849. >>> print(tabulate([["F",24],["M",19]], showindex="always"))
  850. - - --
  851. 0 F 24
  852. 1 M 19
  853. - - --
  854. Column alignment
  855. ----------------
  856. `tabulate` tries to detect column types automatically, and aligns
  857. the values properly. By default it aligns decimal points of the
  858. numbers (or flushes integer numbers to the right), and flushes
  859. everything else to the left. Possible column alignments
  860. (`numalign`, `stralign`) are: "right", "center", "left", "decimal"
  861. (only for `numalign`), and None (to disable alignment).
  862. Table formats
  863. -------------
  864. `floatfmt` is a format specification used for columns which
  865. contain numeric data with a decimal point. This can also be
  866. a list or tuple of format strings, one per column.
  867. `None` values are replaced with a `missingval` string (like
  868. `floatfmt`, this can also be a list of values for different
  869. columns):
  870. >>> print(tabulate([["spam", 1, None],
  871. ... ["eggs", 42, 3.14],
  872. ... ["other", None, 2.7]], missingval="?"))
  873. ----- -- ----
  874. spam 1 ?
  875. eggs 42 3.14
  876. other ? 2.7
  877. ----- -- ----
  878. Various plain-text table formats (`tablefmt`) are supported:
  879. 'plain', 'simple', 'grid', 'pipe', 'orgtbl', 'rst', 'mediawiki',
  880. 'latex', 'latex_raw' and 'latex_booktabs'. Variable `tabulate_formats`
  881. contains the list of currently supported formats.
  882. "plain" format doesn't use any pseudographics to draw tables,
  883. it separates columns with a double space:
  884. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  885. ... ["strings", "numbers"], "plain"))
  886. strings numbers
  887. spam 41.9999
  888. eggs 451
  889. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="plain"))
  890. spam 41.9999
  891. eggs 451
  892. "simple" format is like Pandoc simple_tables:
  893. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  894. ... ["strings", "numbers"], "simple"))
  895. strings numbers
  896. --------- ---------
  897. spam 41.9999
  898. eggs 451
  899. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="simple"))
  900. ---- --------
  901. spam 41.9999
  902. eggs 451
  903. ---- --------
  904. "grid" is similar to tables produced by Emacs table.el package or
  905. Pandoc grid_tables:
  906. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  907. ... ["strings", "numbers"], "grid"))
  908. +-----------+-----------+
  909. | strings | numbers |
  910. +===========+===========+
  911. | spam | 41.9999 |
  912. +-----------+-----------+
  913. | eggs | 451 |
  914. +-----------+-----------+
  915. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="grid"))
  916. +------+----------+
  917. | spam | 41.9999 |
  918. +------+----------+
  919. | eggs | 451 |
  920. +------+----------+
  921. "fancy_grid" draws a grid using box-drawing characters:
  922. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  923. ... ["strings", "numbers"], "fancy_grid"))
  924. ╒═══════════╤═══════════╕
  925. │ strings │ numbers │
  926. ╞═══════════╪═══════════╡
  927. │ spam │ 41.9999 │
  928. ├───────────┼───────────┤
  929. │ eggs │ 451 │
  930. ╘═══════════╧═══════════╛
  931. "pipe" is like tables in PHP Markdown Extra extension or Pandoc
  932. pipe_tables:
  933. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  934. ... ["strings", "numbers"], "pipe"))
  935. | strings | numbers |
  936. |:----------|----------:|
  937. | spam | 41.9999 |
  938. | eggs | 451 |
  939. "presto" is like tables produce by the Presto CLI:
  940. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  941. ... ["strings", "numbers"], "presto"))
  942. strings | numbers
  943. -----------+-----------
  944. spam | 41.9999
  945. eggs | 451
  946. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="pipe"))
  947. |:-----|---------:|
  948. | spam | 41.9999 |
  949. | eggs | 451 |
  950. "orgtbl" is like tables in Emacs org-mode and orgtbl-mode. They
  951. are slightly different from "pipe" format by not using colons to
  952. define column alignment, and using a "+" sign to indicate line
  953. intersections:
  954. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  955. ... ["strings", "numbers"], "orgtbl"))
  956. | strings | numbers |
  957. |-----------+-----------|
  958. | spam | 41.9999 |
  959. | eggs | 451 |
  960. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="orgtbl"))
  961. | spam | 41.9999 |
  962. | eggs | 451 |
  963. "rst" is like a simple table format from reStructuredText; please
  964. note that reStructuredText accepts also "grid" tables:
  965. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  966. ... ["strings", "numbers"], "rst"))
  967. ========= =========
  968. strings numbers
  969. ========= =========
  970. spam 41.9999
  971. eggs 451
  972. ========= =========
  973. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="rst"))
  974. ==== ========
  975. spam 41.9999
  976. eggs 451
  977. ==== ========
  978. "mediawiki" produces a table markup used in Wikipedia and on other
  979. MediaWiki-based sites:
  980. >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
  981. ... headers="firstrow", tablefmt="mediawiki"))
  982. {| class="wikitable" style="text-align: left;"
  983. |+ <!-- caption -->
  984. |-
  985. ! strings !! align="right"| numbers
  986. |-
  987. | spam || align="right"| 41.9999
  988. |-
  989. | eggs || align="right"| 451
  990. |}
  991. "html" produces HTML markup:
  992. >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
  993. ... headers="firstrow", tablefmt="html"))
  994. <table>
  995. <thead>
  996. <tr><th>strings </th><th style="text-align: right;"> numbers</th></tr>
  997. </thead>
  998. <tbody>
  999. <tr><td>spam </td><td style="text-align: right;"> 41.9999</td></tr>
  1000. <tr><td>eggs </td><td style="text-align: right;"> 451 </td></tr>
  1001. </tbody>
  1002. </table>
  1003. "latex" produces a tabular environment of LaTeX document markup:
  1004. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex"))
  1005. \\begin{tabular}{lr}
  1006. \\hline
  1007. spam & 41.9999 \\\\
  1008. eggs & 451 \\\\
  1009. \\hline
  1010. \\end{tabular}
  1011. "latex_raw" is similar to "latex", but doesn't escape special characters,
  1012. such as backslash and underscore, so LaTeX commands may embedded into
  1013. cells' values:
  1014. >>> print(tabulate([["spam$_9$", 41.9999], ["\\\\emph{eggs}", "451.0"]], tablefmt="latex_raw"))
  1015. \\begin{tabular}{lr}
  1016. \\hline
  1017. spam$_9$ & 41.9999 \\\\
  1018. \\emph{eggs} & 451 \\\\
  1019. \\hline
  1020. \\end{tabular}
  1021. "latex_booktabs" produces a tabular environment of LaTeX document markup
  1022. using the booktabs.sty package:
  1023. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_booktabs"))
  1024. \\begin{tabular}{lr}
  1025. \\toprule
  1026. spam & 41.9999 \\\\
  1027. eggs & 451 \\\\
  1028. \\bottomrule
  1029. \end{tabular}
  1030. Number parsing
  1031. --------------
  1032. By default, anything which can be parsed as a number is a number.
  1033. This ensures numbers represented as strings are aligned properly.
  1034. This can lead to weird results for particular strings such as
  1035. specific git SHAs e.g. "42992e1" will be parsed into the number
  1036. 429920 and aligned as such.
  1037. To completely disable number parsing (and alignment), use
  1038. `disable_numparse=True`. For more fine grained control, a list column
  1039. indices is used to disable number parsing only on those columns
  1040. e.g. `disable_numparse=[0, 2]` would disable number parsing only on the
  1041. first and third columns.
  1042. """
  1043. if tabular_data is None:
  1044. tabular_data = []
  1045. list_of_lists, headers = _normalize_tabular_data(
  1046. tabular_data, headers, showindex=showindex)
  1047. # empty values in the first column of RST tables should be escaped (issue #82)
  1048. # "" should be escaped as "\\ " or ".."
  1049. if tablefmt == 'rst':
  1050. list_of_lists, headers = _rst_escape_first_column(list_of_lists, headers)
  1051. # optimization: look for ANSI control codes once,
  1052. # enable smart width functions only if a control code is found
  1053. plain_text = '\t'.join(['\t'.join(map(_text_type, headers))] + \
  1054. ['\t'.join(map(_text_type, row)) for row in list_of_lists])
  1055. has_invisible = re.search(_invisible_codes, plain_text)
  1056. enable_widechars = wcwidth is not None and WIDE_CHARS_MODE
  1057. if tablefmt in multiline_formats and _is_multiline(plain_text):
  1058. tablefmt = multiline_formats.get(tablefmt, tablefmt)
  1059. is_multiline = True
  1060. else:
  1061. is_multiline = False
  1062. width_fn = _choose_width_fn(has_invisible, enable_widechars, is_multiline)
  1063. # format rows and columns, convert numeric values to strings
  1064. cols = list(izip_longest(*list_of_lists))
  1065. numparses = _expand_numparse(disable_numparse, len(cols))
  1066. coltypes = [_column_type(col, numparse=np) for col, np in
  1067. zip(cols, numparses)]
  1068. if isinstance(floatfmt, basestring): #old version
  1069. float_formats = len(cols) * [floatfmt] # just duplicate the string to use in each column
  1070. else: # if floatfmt is list, tuple etc we have one per column
  1071. float_formats = list(floatfmt)
  1072. if len(float_formats) < len(cols):
  1073. float_formats.extend( (len(cols)-len(float_formats)) * [_DEFAULT_FLOATFMT] )
  1074. if isinstance(missingval, basestring):
  1075. missing_vals = len(cols) * [missingval]
  1076. else:
  1077. missing_vals = list(missingval)
  1078. if len(missing_vals) < len(cols):
  1079. missing_vals.extend( (len(cols)-len(missing_vals)) * [_DEFAULT_MISSINGVAL] )
  1080. cols = [[_format(v, ct, fl_fmt, miss_v, has_invisible) for v in c]
  1081. for c, ct, fl_fmt, miss_v in zip(cols, coltypes, float_formats, missing_vals)]
  1082. # align columns
  1083. aligns = [numalign if ct in [int,float] else stralign for ct in coltypes]
  1084. minwidths = [width_fn(h) + MIN_PADDING for h in headers] if headers else [0]*len(cols)
  1085. cols = [_align_column(c, a, minw, has_invisible, enable_widechars, is_multiline)
  1086. for c, a, minw in zip(cols, aligns, minwidths)]
  1087. if headers:
  1088. # align headers and add headers
  1089. t_cols = cols or [['']] * len(headers)
  1090. t_aligns = aligns or [stralign] * len(headers)
  1091. minwidths = [max(minw, max(width_fn(cl) for cl in c)) for minw, c in zip(minwidths, t_cols)]
  1092. headers = [_align_header(h, a, minw, width_fn(h), is_multiline, width_fn)
  1093. for h, a, minw in zip(headers, t_aligns, minwidths)]
  1094. rows = list(zip(*cols))
  1095. else:
  1096. minwidths = [max(width_fn(cl) for cl in c) for c in cols]
  1097. rows = list(zip(*cols))
  1098. if not isinstance(tablefmt, TableFormat):
  1099. tablefmt = _table_formats.get(tablefmt, _table_formats["simple"])
  1100. return _format_table(tablefmt, headers, rows, minwidths, aligns, is_multiline)
  1101. def _expand_numparse(disable_numparse, column_count):
  1102. """
  1103. Return a list of bools of length `column_count` which indicates whether
  1104. number parsing should be used on each column.
  1105. If `disable_numparse` is a list of indices, each of those indices are False,
  1106. and everything else is True.
  1107. If `disable_numparse` is a bool, then the returned list is all the same.
  1108. """
  1109. if isinstance(disable_numparse, Iterable):
  1110. numparses = [True] * column_count
  1111. for index in disable_numparse:
  1112. numparses[index] = False
  1113. return numparses
  1114. else:
  1115. return [not disable_numparse] * column_count
  1116. def _pad_row(cells, padding):
  1117. if cells:
  1118. pad = " "*padding
  1119. padded_cells = [pad + cell + pad for cell in cells]
  1120. return padded_cells
  1121. else:
  1122. return cells
  1123. def _build_simple_row(padded_cells, rowfmt):
  1124. "Format row according to DataRow format without padding."
  1125. begin, sep, end = rowfmt
  1126. return (begin + sep.join(padded_cells) + end).rstrip()
  1127. def _build_row(padded_cells, colwidths, colaligns, rowfmt):
  1128. "Return a string which represents a row of data cells."
  1129. if not rowfmt:
  1130. return None
  1131. if hasattr(rowfmt, "__call__"):
  1132. return rowfmt(padded_cells, colwidths, colaligns)
  1133. else:
  1134. return _build_simple_row(padded_cells, rowfmt)
  1135. def _append_basic_row(lines, padded_cells, colwidths, colaligns, rowfmt):
  1136. lines.append(_build_row(padded_cells, colwidths, colaligns, rowfmt))
  1137. return lines
  1138. def _append_multiline_row(lines, padded_multiline_cells, padded_widths, colaligns, rowfmt, pad):
  1139. colwidths = [w - 2*pad for w in padded_widths]
  1140. cells_lines = [c.splitlines() for c in padded_multiline_cells]
  1141. nlines = max(map(len, cells_lines)) # number of lines in the row
  1142. # vertically pad cells where some lines are missing
  1143. cells_lines = [(cl + [' '*w]*(nlines - len(cl))) for cl, w in zip(cells_lines, colwidths)]
  1144. lines_cells = [[cl[i] for cl in cells_lines] for i in range(nlines)]
  1145. for ln in lines_cells:
  1146. padded_ln = _pad_row(ln, pad)
  1147. _append_basic_row(lines, padded_ln, colwidths, colaligns, rowfmt)
  1148. return lines
  1149. def _build_line(colwidths, colaligns, linefmt):
  1150. "Return a string which represents a horizontal line."
  1151. if not linefmt:
  1152. return None
  1153. if hasattr(linefmt, "__call__"):
  1154. return linefmt(colwidths, colaligns)
  1155. else:
  1156. begin, fill, sep, end = linefmt
  1157. cells = [fill*w for w in colwidths]
  1158. return _build_simple_row(cells, (begin, sep, end))
  1159. def _append_line(lines, colwidths, colaligns, linefmt):
  1160. lines.append(_build_line(colwidths, colaligns, linefmt))
  1161. return lines
  1162. def _format_table(fmt, headers, rows, colwidths, colaligns, is_multiline):
  1163. """Produce a plain-text representation of the table."""
  1164. lines = []
  1165. hidden = fmt.with_header_hide if (headers and fmt.with_header_hide) else []
  1166. pad = fmt.padding
  1167. headerrow = fmt.headerrow
  1168. padded_widths = [(w + 2*pad) for w in colwidths]
  1169. if is_multiline:
  1170. pad_row = lambda row, _: row # do it later, in _append_multiline_row
  1171. append_row = partial(_append_multiline_row, pad=pad)
  1172. else:
  1173. pad_row = _pad_row
  1174. append_row = _append_basic_row
  1175. padded_headers = pad_row(headers, pad)
  1176. padded_rows = [pad_row(row, pad) for row in rows]
  1177. if fmt.lineabove and "lineabove" not in hidden:
  1178. _append_line(lines, padded_widths, colaligns, fmt.lineabove)
  1179. if padded_headers:
  1180. append_row(lines, padded_headers, padded_widths, colaligns, headerrow)
  1181. if fmt.linebelowheader and "linebelowheader" not in hidden:
  1182. _append_line(lines, padded_widths, colaligns, fmt.linebelowheader)
  1183. if padded_rows and fmt.linebetweenrows and "linebetweenrows" not in hidden:
  1184. # initial rows with a line below
  1185. for row in padded_rows[:-1]:
  1186. append_row(lines, row, padded_widths, colaligns, fmt.datarow)
  1187. _append_line(lines, padded_widths, colaligns, fmt.linebetweenrows)
  1188. # the last row without a line below
  1189. append_row(lines, padded_rows[-1], padded_widths, colaligns, fmt.datarow)
  1190. else:
  1191. for row in padded_rows:
  1192. append_row(lines, row, padded_widths, colaligns, fmt.datarow)
  1193. if fmt.linebelow and "linebelow" not in hidden:
  1194. _append_line(lines, padded_widths, colaligns, fmt.linebelow)
  1195. if headers or rows:
  1196. return "\n".join(lines)
  1197. else: # a completely empty table
  1198. return ""
  1199. def _main():
  1200. """\
  1201. Usage: tabulate [options] [FILE ...]
  1202. Pretty-print tabular data.
  1203. See also https://bitbucket.org/astanin/python-tabulate
  1204. FILE a filename of the file with tabular data;
  1205. if "-" or missing, read data from stdin.
  1206. Options:
  1207. -h, --help show this message
  1208. -1, --header use the first row of data as a table header
  1209. -o FILE, --output FILE print table to FILE (default: stdout)
  1210. -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace)
  1211. -F FPFMT, --float FPFMT floating point number format (default: g)
  1212. -f FMT, --format FMT set output table format; supported formats:
  1213. plain, simple, grid, fancy_grid, pipe, orgtbl,
  1214. rst, mediawiki, html, latex, latex_raw,
  1215. latex_booktabs, tsv
  1216. (default: simple)
  1217. """
  1218. import getopt
  1219. import sys
  1220. import textwrap
  1221. usage = textwrap.dedent(_main.__doc__)
  1222. try:
  1223. opts, args = getopt.getopt(sys.argv[1:],
  1224. "h1o:s:F:f:",
  1225. ["help", "header", "output", "sep=", "float=", "format="])
  1226. except getopt.GetoptError as e:
  1227. print(e)
  1228. print(usage)
  1229. sys.exit(2)
  1230. headers = []
  1231. floatfmt = _DEFAULT_FLOATFMT
  1232. tablefmt = "simple"
  1233. sep = r"\s+"
  1234. outfile = "-"
  1235. for opt, value in opts:
  1236. if opt in ["-1", "--header"]:
  1237. headers = "firstrow"
  1238. elif opt in ["-o", "--output"]:
  1239. outfile = value
  1240. elif opt in ["-F", "--float"]:
  1241. floatfmt = value
  1242. elif opt in ["-f", "--format"]:
  1243. if value not in tabulate_formats:
  1244. print("%s is not a supported table format" % value)
  1245. print(usage)
  1246. sys.exit(3)
  1247. tablefmt = value
  1248. elif opt in ["-s", "--sep"]:
  1249. sep = value
  1250. elif opt in ["-h", "--help"]:
  1251. print(usage)
  1252. sys.exit(0)
  1253. files = [sys.stdin] if not args else args
  1254. with (sys.stdout if outfile == "-" else open(outfile, "w")) as out:
  1255. for f in files:
  1256. if f == "-":
  1257. f = sys.stdin
  1258. if _is_file(f):
  1259. _pprint_file(f, headers=headers, tablefmt=tablefmt,
  1260. sep=sep, floatfmt=floatfmt, file=out)
  1261. else:
  1262. with open(f) as fobj:
  1263. _pprint_file(fobj, headers=headers, tablefmt=tablefmt,
  1264. sep=sep, floatfmt=floatfmt, file=out)
  1265. def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, file):
  1266. rows = fobject.readlines()
  1267. table = [re.split(sep, r.rstrip()) for r in rows if r.strip()]
  1268. print(tabulate(table, headers, tablefmt, floatfmt=floatfmt), file=file)
  1269. if __name__ == "__main__":
  1270. _main()