excel.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664
  1. """Utilities for conversion to writer-agnostic Excel representation
  2. """
  3. import itertools
  4. import re
  5. import warnings
  6. import numpy as np
  7. from pandas.compat import reduce
  8. from pandas.core.dtypes import missing
  9. from pandas.core.dtypes.common import is_float, is_scalar
  10. from pandas.core.dtypes.generic import ABCMultiIndex, ABCPeriodIndex
  11. from pandas import Index
  12. import pandas.core.common as com
  13. from pandas.io.formats.css import CSSResolver, CSSWarning
  14. from pandas.io.formats.format import get_level_lengths
  15. from pandas.io.formats.printing import pprint_thing
  16. class ExcelCell(object):
  17. __fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend')
  18. __slots__ = __fields__
  19. def __init__(self, row, col, val, style=None, mergestart=None,
  20. mergeend=None):
  21. self.row = row
  22. self.col = col
  23. self.val = val
  24. self.style = style
  25. self.mergestart = mergestart
  26. self.mergeend = mergeend
  27. class CSSToExcelConverter(object):
  28. """A callable for converting CSS declarations to ExcelWriter styles
  29. Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow),
  30. focusing on font styling, backgrounds, borders and alignment.
  31. Operates by first computing CSS styles in a fairly generic
  32. way (see :meth:`compute_css`) then determining Excel style
  33. properties from CSS properties (see :meth:`build_xlstyle`).
  34. Parameters
  35. ----------
  36. inherited : str, optional
  37. CSS declarations understood to be the containing scope for the
  38. CSS processed by :meth:`__call__`.
  39. """
  40. # NB: Most of the methods here could be classmethods, as only __init__
  41. # and __call__ make use of instance attributes. We leave them as
  42. # instancemethods so that users can easily experiment with extensions
  43. # without monkey-patching.
  44. def __init__(self, inherited=None):
  45. if inherited is not None:
  46. inherited = self.compute_css(inherited,
  47. self.compute_css.INITIAL_STYLE)
  48. self.inherited = inherited
  49. compute_css = CSSResolver()
  50. def __call__(self, declarations_str):
  51. """Convert CSS declarations to ExcelWriter style
  52. Parameters
  53. ----------
  54. declarations_str : str
  55. List of CSS declarations.
  56. e.g. "font-weight: bold; background: blue"
  57. Returns
  58. -------
  59. xlstyle : dict
  60. A style as interpreted by ExcelWriter when found in
  61. ExcelCell.style.
  62. """
  63. # TODO: memoize?
  64. properties = self.compute_css(declarations_str, self.inherited)
  65. return self.build_xlstyle(properties)
  66. def build_xlstyle(self, props):
  67. out = {
  68. 'alignment': self.build_alignment(props),
  69. 'border': self.build_border(props),
  70. 'fill': self.build_fill(props),
  71. 'font': self.build_font(props),
  72. 'number_format': self.build_number_format(props),
  73. }
  74. # TODO: handle cell width and height: needs support in pandas.io.excel
  75. def remove_none(d):
  76. """Remove key where value is None, through nested dicts"""
  77. for k, v in list(d.items()):
  78. if v is None:
  79. del d[k]
  80. elif isinstance(v, dict):
  81. remove_none(v)
  82. if not v:
  83. del d[k]
  84. remove_none(out)
  85. return out
  86. VERTICAL_MAP = {
  87. 'top': 'top',
  88. 'text-top': 'top',
  89. 'middle': 'center',
  90. 'baseline': 'bottom',
  91. 'bottom': 'bottom',
  92. 'text-bottom': 'bottom',
  93. # OpenXML also has 'justify', 'distributed'
  94. }
  95. def build_alignment(self, props):
  96. # TODO: text-indent, padding-left -> alignment.indent
  97. return {'horizontal': props.get('text-align'),
  98. 'vertical': self.VERTICAL_MAP.get(props.get('vertical-align')),
  99. 'wrap_text': (None if props.get('white-space') is None else
  100. props['white-space'] not in
  101. ('nowrap', 'pre', 'pre-line'))
  102. }
  103. def build_border(self, props):
  104. return {side: {
  105. 'style': self._border_style(props.get('border-{side}-style'
  106. .format(side=side)),
  107. props.get('border-{side}-width'
  108. .format(side=side))),
  109. 'color': self.color_to_excel(
  110. props.get('border-{side}-color'.format(side=side))),
  111. } for side in ['top', 'right', 'bottom', 'left']}
  112. def _border_style(self, style, width):
  113. # convert styles and widths to openxml, one of:
  114. # 'dashDot'
  115. # 'dashDotDot'
  116. # 'dashed'
  117. # 'dotted'
  118. # 'double'
  119. # 'hair'
  120. # 'medium'
  121. # 'mediumDashDot'
  122. # 'mediumDashDotDot'
  123. # 'mediumDashed'
  124. # 'slantDashDot'
  125. # 'thick'
  126. # 'thin'
  127. if width is None and style is None:
  128. return None
  129. if style == 'none' or style == 'hidden':
  130. return None
  131. if width is None:
  132. width = '2pt'
  133. width = float(width[:-2])
  134. if width < 1e-5:
  135. return None
  136. elif width < 1.3:
  137. width_name = 'thin'
  138. elif width < 2.8:
  139. width_name = 'medium'
  140. else:
  141. width_name = 'thick'
  142. if style in (None, 'groove', 'ridge', 'inset', 'outset'):
  143. # not handled
  144. style = 'solid'
  145. if style == 'double':
  146. return 'double'
  147. if style == 'solid':
  148. return width_name
  149. if style == 'dotted':
  150. if width_name in ('hair', 'thin'):
  151. return 'dotted'
  152. return 'mediumDashDotDot'
  153. if style == 'dashed':
  154. if width_name in ('hair', 'thin'):
  155. return 'dashed'
  156. return 'mediumDashed'
  157. def build_fill(self, props):
  158. # TODO: perhaps allow for special properties
  159. # -excel-pattern-bgcolor and -excel-pattern-type
  160. fill_color = props.get('background-color')
  161. if fill_color not in (None, 'transparent', 'none'):
  162. return {
  163. 'fgColor': self.color_to_excel(fill_color),
  164. 'patternType': 'solid',
  165. }
  166. BOLD_MAP = {'bold': True, 'bolder': True, '600': True, '700': True,
  167. '800': True, '900': True,
  168. 'normal': False, 'lighter': False, '100': False, '200': False,
  169. '300': False, '400': False, '500': False}
  170. ITALIC_MAP = {'normal': False, 'italic': True, 'oblique': True}
  171. def build_font(self, props):
  172. size = props.get('font-size')
  173. if size is not None:
  174. assert size.endswith('pt')
  175. size = float(size[:-2])
  176. font_names_tmp = re.findall(r'''(?x)
  177. (
  178. "(?:[^"]|\\")+"
  179. |
  180. '(?:[^']|\\')+'
  181. |
  182. [^'",]+
  183. )(?=,|\s*$)
  184. ''', props.get('font-family', ''))
  185. font_names = []
  186. for name in font_names_tmp:
  187. if name[:1] == '"':
  188. name = name[1:-1].replace('\\"', '"')
  189. elif name[:1] == '\'':
  190. name = name[1:-1].replace('\\\'', '\'')
  191. else:
  192. name = name.strip()
  193. if name:
  194. font_names.append(name)
  195. family = None
  196. for name in font_names:
  197. if name == 'serif':
  198. family = 1 # roman
  199. break
  200. elif name == 'sans-serif':
  201. family = 2 # swiss
  202. break
  203. elif name == 'cursive':
  204. family = 4 # script
  205. break
  206. elif name == 'fantasy':
  207. family = 5 # decorative
  208. break
  209. decoration = props.get('text-decoration')
  210. if decoration is not None:
  211. decoration = decoration.split()
  212. else:
  213. decoration = ()
  214. return {
  215. 'name': font_names[0] if font_names else None,
  216. 'family': family,
  217. 'size': size,
  218. 'bold': self.BOLD_MAP.get(props.get('font-weight')),
  219. 'italic': self.ITALIC_MAP.get(props.get('font-style')),
  220. 'underline': ('single' if
  221. 'underline' in decoration
  222. else None),
  223. 'strike': ('line-through' in decoration) or None,
  224. 'color': self.color_to_excel(props.get('color')),
  225. # shadow if nonzero digit before shadow color
  226. 'shadow': (bool(re.search('^[^#(]*[1-9]',
  227. props['text-shadow']))
  228. if 'text-shadow' in props else None),
  229. # 'vertAlign':,
  230. # 'charset': ,
  231. # 'scheme': ,
  232. # 'outline': ,
  233. # 'condense': ,
  234. }
  235. NAMED_COLORS = {
  236. 'maroon': '800000',
  237. 'brown': 'A52A2A',
  238. 'red': 'FF0000',
  239. 'pink': 'FFC0CB',
  240. 'orange': 'FFA500',
  241. 'yellow': 'FFFF00',
  242. 'olive': '808000',
  243. 'green': '008000',
  244. 'purple': '800080',
  245. 'fuchsia': 'FF00FF',
  246. 'lime': '00FF00',
  247. 'teal': '008080',
  248. 'aqua': '00FFFF',
  249. 'blue': '0000FF',
  250. 'navy': '000080',
  251. 'black': '000000',
  252. 'gray': '808080',
  253. 'grey': '808080',
  254. 'silver': 'C0C0C0',
  255. 'white': 'FFFFFF',
  256. }
  257. def color_to_excel(self, val):
  258. if val is None:
  259. return None
  260. if val.startswith('#') and len(val) == 7:
  261. return val[1:].upper()
  262. if val.startswith('#') and len(val) == 4:
  263. return (val[1] * 2 + val[2] * 2 + val[3] * 2).upper()
  264. try:
  265. return self.NAMED_COLORS[val]
  266. except KeyError:
  267. warnings.warn('Unhandled color format: {val!r}'.format(val=val),
  268. CSSWarning)
  269. def build_number_format(self, props):
  270. return {'format_code': props.get('number-format')}
  271. class ExcelFormatter(object):
  272. """
  273. Class for formatting a DataFrame to a list of ExcelCells,
  274. Parameters
  275. ----------
  276. df : DataFrame or Styler
  277. na_rep: na representation
  278. float_format : string, default None
  279. Format string for floating point numbers
  280. cols : sequence, optional
  281. Columns to write
  282. header : boolean or list of string, default True
  283. Write out column names. If a list of string is given it is
  284. assumed to be aliases for the column names
  285. index : boolean, default True
  286. output row names (index)
  287. index_label : string or sequence, default None
  288. Column label for index column(s) if desired. If None is given, and
  289. `header` and `index` are True, then the index names are used. A
  290. sequence should be given if the DataFrame uses MultiIndex.
  291. merge_cells : boolean, default False
  292. Format MultiIndex and Hierarchical Rows as merged cells.
  293. inf_rep : string, default `'inf'`
  294. representation for np.inf values (which aren't representable in Excel)
  295. A `'-'` sign will be added in front of -inf.
  296. style_converter : callable, optional
  297. This translates Styler styles (CSS) into ExcelWriter styles.
  298. Defaults to ``CSSToExcelConverter()``.
  299. It should have signature css_declarations string -> excel style.
  300. This is only called for body cells.
  301. """
  302. def __init__(self, df, na_rep='', float_format=None, cols=None,
  303. header=True, index=True, index_label=None, merge_cells=False,
  304. inf_rep='inf', style_converter=None):
  305. self.rowcounter = 0
  306. self.na_rep = na_rep
  307. if hasattr(df, 'render'):
  308. self.styler = df
  309. df = df.data
  310. if style_converter is None:
  311. style_converter = CSSToExcelConverter()
  312. self.style_converter = style_converter
  313. else:
  314. self.styler = None
  315. self.df = df
  316. if cols is not None:
  317. # all missing, raise
  318. if not len(Index(cols) & df.columns):
  319. raise KeyError(
  320. "passes columns are not ALL present dataframe")
  321. # deprecatedin gh-17295
  322. # 1 missing is ok (for now)
  323. if len(Index(cols) & df.columns) != len(cols):
  324. warnings.warn(
  325. "Not all names specified in 'columns' are found; "
  326. "this will raise a KeyError in the future",
  327. FutureWarning)
  328. self.df = df.reindex(columns=cols)
  329. self.columns = self.df.columns
  330. self.float_format = float_format
  331. self.index = index
  332. self.index_label = index_label
  333. self.header = header
  334. self.merge_cells = merge_cells
  335. self.inf_rep = inf_rep
  336. @property
  337. def header_style(self):
  338. return {"font": {"bold": True},
  339. "borders": {"top": "thin",
  340. "right": "thin",
  341. "bottom": "thin",
  342. "left": "thin"},
  343. "alignment": {"horizontal": "center",
  344. "vertical": "top"}}
  345. def _format_value(self, val):
  346. if is_scalar(val) and missing.isna(val):
  347. val = self.na_rep
  348. elif is_float(val):
  349. if missing.isposinf_scalar(val):
  350. val = self.inf_rep
  351. elif missing.isneginf_scalar(val):
  352. val = '-{inf}'.format(inf=self.inf_rep)
  353. elif self.float_format is not None:
  354. val = float(self.float_format % val)
  355. return val
  356. def _format_header_mi(self):
  357. if self.columns.nlevels > 1:
  358. if not self.index:
  359. raise NotImplementedError("Writing to Excel with MultiIndex"
  360. " columns and no index "
  361. "('index'=False) is not yet "
  362. "implemented.")
  363. has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
  364. if not (has_aliases or self.header):
  365. return
  366. columns = self.columns
  367. level_strs = columns.format(sparsify=self.merge_cells, adjoin=False,
  368. names=False)
  369. level_lengths = get_level_lengths(level_strs)
  370. coloffset = 0
  371. lnum = 0
  372. if self.index and isinstance(self.df.index, ABCMultiIndex):
  373. coloffset = len(self.df.index[0]) - 1
  374. if self.merge_cells:
  375. # Format multi-index as a merged cells.
  376. for lnum in range(len(level_lengths)):
  377. name = columns.names[lnum]
  378. yield ExcelCell(lnum, coloffset, name, self.header_style)
  379. for lnum, (spans, levels, level_codes) in enumerate(zip(
  380. level_lengths, columns.levels, columns.codes)):
  381. values = levels.take(level_codes)
  382. for i in spans:
  383. if spans[i] > 1:
  384. yield ExcelCell(lnum, coloffset + i + 1, values[i],
  385. self.header_style, lnum,
  386. coloffset + i + spans[i])
  387. else:
  388. yield ExcelCell(lnum, coloffset + i + 1, values[i],
  389. self.header_style)
  390. else:
  391. # Format in legacy format with dots to indicate levels.
  392. for i, values in enumerate(zip(*level_strs)):
  393. v = ".".join(map(pprint_thing, values))
  394. yield ExcelCell(lnum, coloffset + i + 1, v, self.header_style)
  395. self.rowcounter = lnum
  396. def _format_header_regular(self):
  397. has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
  398. if has_aliases or self.header:
  399. coloffset = 0
  400. if self.index:
  401. coloffset = 1
  402. if isinstance(self.df.index, ABCMultiIndex):
  403. coloffset = len(self.df.index[0])
  404. colnames = self.columns
  405. if has_aliases:
  406. if len(self.header) != len(self.columns):
  407. raise ValueError('Writing {cols} cols but got {alias} '
  408. 'aliases'.format(cols=len(self.columns),
  409. alias=len(self.header)))
  410. else:
  411. colnames = self.header
  412. for colindex, colname in enumerate(colnames):
  413. yield ExcelCell(self.rowcounter, colindex + coloffset, colname,
  414. self.header_style)
  415. def _format_header(self):
  416. if isinstance(self.columns, ABCMultiIndex):
  417. gen = self._format_header_mi()
  418. else:
  419. gen = self._format_header_regular()
  420. gen2 = ()
  421. if self.df.index.names:
  422. row = [x if x is not None else ''
  423. for x in self.df.index.names] + [''] * len(self.columns)
  424. if reduce(lambda x, y: x and y, map(lambda x: x != '', row)):
  425. gen2 = (ExcelCell(self.rowcounter, colindex, val,
  426. self.header_style)
  427. for colindex, val in enumerate(row))
  428. self.rowcounter += 1
  429. return itertools.chain(gen, gen2)
  430. def _format_body(self):
  431. if isinstance(self.df.index, ABCMultiIndex):
  432. return self._format_hierarchical_rows()
  433. else:
  434. return self._format_regular_rows()
  435. def _format_regular_rows(self):
  436. has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
  437. if has_aliases or self.header:
  438. self.rowcounter += 1
  439. # output index and index_label?
  440. if self.index:
  441. # check aliases
  442. # if list only take first as this is not a MultiIndex
  443. if (self.index_label and
  444. isinstance(self.index_label, (list, tuple, np.ndarray,
  445. Index))):
  446. index_label = self.index_label[0]
  447. # if string good to go
  448. elif self.index_label and isinstance(self.index_label, str):
  449. index_label = self.index_label
  450. else:
  451. index_label = self.df.index.names[0]
  452. if isinstance(self.columns, ABCMultiIndex):
  453. self.rowcounter += 1
  454. if index_label and self.header is not False:
  455. yield ExcelCell(self.rowcounter - 1, 0, index_label,
  456. self.header_style)
  457. # write index_values
  458. index_values = self.df.index
  459. if isinstance(self.df.index, ABCPeriodIndex):
  460. index_values = self.df.index.to_timestamp()
  461. for idx, idxval in enumerate(index_values):
  462. yield ExcelCell(self.rowcounter + idx, 0, idxval,
  463. self.header_style)
  464. coloffset = 1
  465. else:
  466. coloffset = 0
  467. for cell in self._generate_body(coloffset):
  468. yield cell
  469. def _format_hierarchical_rows(self):
  470. has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
  471. if has_aliases or self.header:
  472. self.rowcounter += 1
  473. gcolidx = 0
  474. if self.index:
  475. index_labels = self.df.index.names
  476. # check for aliases
  477. if (self.index_label and
  478. isinstance(self.index_label, (list, tuple, np.ndarray,
  479. Index))):
  480. index_labels = self.index_label
  481. # MultiIndex columns require an extra row
  482. # with index names (blank if None) for
  483. # unambigous round-trip, unless not merging,
  484. # in which case the names all go on one row Issue #11328
  485. if isinstance(self.columns, ABCMultiIndex) and self.merge_cells:
  486. self.rowcounter += 1
  487. # if index labels are not empty go ahead and dump
  488. if com._any_not_none(*index_labels) and self.header is not False:
  489. for cidx, name in enumerate(index_labels):
  490. yield ExcelCell(self.rowcounter - 1, cidx, name,
  491. self.header_style)
  492. if self.merge_cells:
  493. # Format hierarchical rows as merged cells.
  494. level_strs = self.df.index.format(sparsify=True, adjoin=False,
  495. names=False)
  496. level_lengths = get_level_lengths(level_strs)
  497. for spans, levels, level_codes in zip(level_lengths,
  498. self.df.index.levels,
  499. self.df.index.codes):
  500. values = levels.take(level_codes,
  501. allow_fill=levels._can_hold_na,
  502. fill_value=True)
  503. for i in spans:
  504. if spans[i] > 1:
  505. yield ExcelCell(self.rowcounter + i, gcolidx,
  506. values[i], self.header_style,
  507. self.rowcounter + i + spans[i] - 1,
  508. gcolidx)
  509. else:
  510. yield ExcelCell(self.rowcounter + i, gcolidx,
  511. values[i], self.header_style)
  512. gcolidx += 1
  513. else:
  514. # Format hierarchical rows with non-merged values.
  515. for indexcolvals in zip(*self.df.index):
  516. for idx, indexcolval in enumerate(indexcolvals):
  517. yield ExcelCell(self.rowcounter + idx, gcolidx,
  518. indexcolval, self.header_style)
  519. gcolidx += 1
  520. for cell in self._generate_body(gcolidx):
  521. yield cell
  522. def _generate_body(self, coloffset):
  523. if self.styler is None:
  524. styles = None
  525. else:
  526. styles = self.styler._compute().ctx
  527. if not styles:
  528. styles = None
  529. xlstyle = None
  530. # Write the body of the frame data series by series.
  531. for colidx in range(len(self.columns)):
  532. series = self.df.iloc[:, colidx]
  533. for i, val in enumerate(series):
  534. if styles is not None:
  535. xlstyle = self.style_converter(';'.join(styles[i, colidx]))
  536. yield ExcelCell(self.rowcounter + i, colidx + coloffset, val,
  537. xlstyle)
  538. def get_formatted_cells(self):
  539. for cell in itertools.chain(self._format_header(),
  540. self._format_body()):
  541. cell.val = self._format_value(cell.val)
  542. yield cell
  543. def write(self, writer, sheet_name='Sheet1', startrow=0,
  544. startcol=0, freeze_panes=None, engine=None):
  545. """
  546. writer : string or ExcelWriter object
  547. File path or existing ExcelWriter
  548. sheet_name : string, default 'Sheet1'
  549. Name of sheet which will contain DataFrame
  550. startrow :
  551. upper left cell row to dump data frame
  552. startcol :
  553. upper left cell column to dump data frame
  554. freeze_panes : tuple of integer (length 2), default None
  555. Specifies the one-based bottommost row and rightmost column that
  556. is to be frozen
  557. engine : string, default None
  558. write engine to use if writer is a path - you can also set this
  559. via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``,
  560. and ``io.excel.xlsm.writer``.
  561. """
  562. from pandas.io.excel import ExcelWriter
  563. from pandas.io.common import _stringify_path
  564. if isinstance(writer, ExcelWriter):
  565. need_save = False
  566. else:
  567. writer = ExcelWriter(_stringify_path(writer), engine=engine)
  568. need_save = True
  569. formatted_cells = self.get_formatted_cells()
  570. writer.write_cells(formatted_cells, sheet_name,
  571. startrow=startrow, startcol=startcol,
  572. freeze_panes=freeze_panes)
  573. if need_save:
  574. writer.save()