latex.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. # -*- coding: utf-8 -*-
  2. """
  3. Module for formatting output data in Latex.
  4. """
  5. from __future__ import print_function
  6. import numpy as np
  7. from pandas.compat import map, range, u, zip
  8. from pandas.core.dtypes.generic import ABCMultiIndex
  9. from pandas import compat
  10. from pandas.io.formats.format import TableFormatter
  11. class LatexFormatter(TableFormatter):
  12. """ Used to render a DataFrame to a LaTeX tabular/longtable environment
  13. output.
  14. Parameters
  15. ----------
  16. formatter : `DataFrameFormatter`
  17. column_format : str, default None
  18. The columns format as specified in `LaTeX table format
  19. <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3 columns
  20. longtable : boolean, default False
  21. Use a longtable environment instead of tabular.
  22. See Also
  23. --------
  24. HTMLFormatter
  25. """
  26. def __init__(self, formatter, column_format=None, longtable=False,
  27. multicolumn=False, multicolumn_format=None, multirow=False):
  28. self.fmt = formatter
  29. self.frame = self.fmt.frame
  30. self.bold_rows = self.fmt.kwds.get('bold_rows', False)
  31. self.column_format = column_format
  32. self.longtable = longtable
  33. self.multicolumn = multicolumn
  34. self.multicolumn_format = multicolumn_format
  35. self.multirow = multirow
  36. def write_result(self, buf):
  37. """
  38. Render a DataFrame to a LaTeX tabular/longtable environment output.
  39. """
  40. # string representation of the columns
  41. if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
  42. info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}')
  43. .format(name=type(self.frame).__name__,
  44. col=self.frame.columns,
  45. idx=self.frame.index))
  46. strcols = [[info_line]]
  47. else:
  48. strcols = self.fmt._to_str_columns()
  49. def get_col_type(dtype):
  50. if issubclass(dtype.type, np.number):
  51. return 'r'
  52. else:
  53. return 'l'
  54. # reestablish the MultiIndex that has been joined by _to_str_column
  55. if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex):
  56. out = self.frame.index.format(
  57. adjoin=False, sparsify=self.fmt.sparsify,
  58. names=self.fmt.has_index_names, na_rep=self.fmt.na_rep
  59. )
  60. # index.format will sparsify repeated entries with empty strings
  61. # so pad these with some empty space
  62. def pad_empties(x):
  63. for pad in reversed(x):
  64. if pad:
  65. break
  66. return [x[0]] + [i if i else ' ' * len(pad) for i in x[1:]]
  67. out = (pad_empties(i) for i in out)
  68. # Add empty spaces for each column level
  69. clevels = self.frame.columns.nlevels
  70. out = [[' ' * len(i[-1])] * clevels + i for i in out]
  71. # Add the column names to the last index column
  72. cnames = self.frame.columns.names
  73. if any(cnames):
  74. new_names = [i if i else '{}' for i in cnames]
  75. out[self.frame.index.nlevels - 1][:clevels] = new_names
  76. # Get rid of old multiindex column and add new ones
  77. strcols = out + strcols[1:]
  78. column_format = self.column_format
  79. if column_format is None:
  80. dtypes = self.frame.dtypes._values
  81. column_format = ''.join(map(get_col_type, dtypes))
  82. if self.fmt.index:
  83. index_format = 'l' * self.frame.index.nlevels
  84. column_format = index_format + column_format
  85. elif not isinstance(column_format,
  86. compat.string_types): # pragma: no cover
  87. raise AssertionError('column_format must be str or unicode, '
  88. 'not {typ}'.format(typ=type(column_format)))
  89. if not self.longtable:
  90. buf.write('\\begin{{tabular}}{{{fmt}}}\n'
  91. .format(fmt=column_format))
  92. buf.write('\\toprule\n')
  93. else:
  94. buf.write('\\begin{{longtable}}{{{fmt}}}\n'
  95. .format(fmt=column_format))
  96. buf.write('\\toprule\n')
  97. ilevels = self.frame.index.nlevels
  98. clevels = self.frame.columns.nlevels
  99. nlevels = clevels
  100. if self.fmt.has_index_names and self.fmt.show_index_names:
  101. nlevels += 1
  102. strrows = list(zip(*strcols))
  103. self.clinebuf = []
  104. for i, row in enumerate(strrows):
  105. if i == nlevels and self.fmt.header:
  106. buf.write('\\midrule\n') # End of header
  107. if self.longtable:
  108. buf.write('\\endhead\n')
  109. buf.write('\\midrule\n')
  110. buf.write('\\multicolumn{{{n}}}{{r}}{{{{Continued on next '
  111. 'page}}}} \\\\\n'.format(n=len(row)))
  112. buf.write('\\midrule\n')
  113. buf.write('\\endfoot\n\n')
  114. buf.write('\\bottomrule\n')
  115. buf.write('\\endlastfoot\n')
  116. if self.fmt.kwds.get('escape', True):
  117. # escape backslashes first
  118. crow = [(x.replace('\\', '\\textbackslash ')
  119. .replace('_', '\\_')
  120. .replace('%', '\\%').replace('$', '\\$')
  121. .replace('#', '\\#').replace('{', '\\{')
  122. .replace('}', '\\}').replace('~', '\\textasciitilde ')
  123. .replace('^', '\\textasciicircum ')
  124. .replace('&', '\\&')
  125. if (x and x != '{}') else '{}') for x in row]
  126. else:
  127. crow = [x if x else '{}' for x in row]
  128. if self.bold_rows and self.fmt.index:
  129. # bold row labels
  130. crow = ['\\textbf{{{x}}}'.format(x=x)
  131. if j < ilevels and x.strip() not in ['', '{}'] else x
  132. for j, x in enumerate(crow)]
  133. if i < clevels and self.fmt.header and self.multicolumn:
  134. # sum up columns to multicolumns
  135. crow = self._format_multicolumn(crow, ilevels)
  136. if (i >= nlevels and self.fmt.index and self.multirow and
  137. ilevels > 1):
  138. # sum up rows to multirows
  139. crow = self._format_multirow(crow, ilevels, i, strrows)
  140. buf.write(' & '.join(crow))
  141. buf.write(' \\\\\n')
  142. if self.multirow and i < len(strrows) - 1:
  143. self._print_cline(buf, i, len(strcols))
  144. if not self.longtable:
  145. buf.write('\\bottomrule\n')
  146. buf.write('\\end{tabular}\n')
  147. else:
  148. buf.write('\\end{longtable}\n')
  149. def _format_multicolumn(self, row, ilevels):
  150. r"""
  151. Combine columns belonging to a group to a single multicolumn entry
  152. according to self.multicolumn_format
  153. e.g.:
  154. a & & & b & c &
  155. will become
  156. \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c}
  157. """
  158. row2 = list(row[:ilevels])
  159. ncol = 1
  160. coltext = ''
  161. def append_col():
  162. # write multicolumn if needed
  163. if ncol > 1:
  164. row2.append('\\multicolumn{{{ncol:d}}}{{{fmt:s}}}{{{txt:s}}}'
  165. .format(ncol=ncol, fmt=self.multicolumn_format,
  166. txt=coltext.strip()))
  167. # don't modify where not needed
  168. else:
  169. row2.append(coltext)
  170. for c in row[ilevels:]:
  171. # if next col has text, write the previous
  172. if c.strip():
  173. if coltext:
  174. append_col()
  175. coltext = c
  176. ncol = 1
  177. # if not, add it to the previous multicolumn
  178. else:
  179. ncol += 1
  180. # write last column name
  181. if coltext:
  182. append_col()
  183. return row2
  184. def _format_multirow(self, row, ilevels, i, rows):
  185. r"""
  186. Check following rows, whether row should be a multirow
  187. e.g.: becomes:
  188. a & 0 & \multirow{2}{*}{a} & 0 &
  189. & 1 & & 1 &
  190. b & 0 & \cline{1-2}
  191. b & 0 &
  192. """
  193. for j in range(ilevels):
  194. if row[j].strip():
  195. nrow = 1
  196. for r in rows[i + 1:]:
  197. if not r[j].strip():
  198. nrow += 1
  199. else:
  200. break
  201. if nrow > 1:
  202. # overwrite non-multirow entry
  203. row[j] = '\\multirow{{{nrow:d}}}{{*}}{{{row:s}}}'.format(
  204. nrow=nrow, row=row[j].strip())
  205. # save when to end the current block with \cline
  206. self.clinebuf.append([i + nrow - 1, j + 1])
  207. return row
  208. def _print_cline(self, buf, i, icol):
  209. """
  210. Print clines after multirow-blocks are finished
  211. """
  212. for cl in self.clinebuf:
  213. if cl[0] == i:
  214. buf.write('\\cline{{{cl:d}-{icol:d}}}\n'
  215. .format(cl=cl[1], icol=icol))
  216. # remove entries that have been written to buffer
  217. self.clinebuf = [x for x in self.clinebuf if x[0] != i]