123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664 |
- """Utilities for conversion to writer-agnostic Excel representation
- """
- import itertools
- import re
- import warnings
- import numpy as np
- from pandas.compat import reduce
- from pandas.core.dtypes import missing
- from pandas.core.dtypes.common import is_float, is_scalar
- from pandas.core.dtypes.generic import ABCMultiIndex, ABCPeriodIndex
- from pandas import Index
- import pandas.core.common as com
- from pandas.io.formats.css import CSSResolver, CSSWarning
- from pandas.io.formats.format import get_level_lengths
- from pandas.io.formats.printing import pprint_thing
- class ExcelCell(object):
- __fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend')
- __slots__ = __fields__
- def __init__(self, row, col, val, style=None, mergestart=None,
- mergeend=None):
- self.row = row
- self.col = col
- self.val = val
- self.style = style
- self.mergestart = mergestart
- self.mergeend = mergeend
- class CSSToExcelConverter(object):
- """A callable for converting CSS declarations to ExcelWriter styles
- Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow),
- focusing on font styling, backgrounds, borders and alignment.
- Operates by first computing CSS styles in a fairly generic
- way (see :meth:`compute_css`) then determining Excel style
- properties from CSS properties (see :meth:`build_xlstyle`).
- Parameters
- ----------
- inherited : str, optional
- CSS declarations understood to be the containing scope for the
- CSS processed by :meth:`__call__`.
- """
- # NB: Most of the methods here could be classmethods, as only __init__
- # and __call__ make use of instance attributes. We leave them as
- # instancemethods so that users can easily experiment with extensions
- # without monkey-patching.
- def __init__(self, inherited=None):
- if inherited is not None:
- inherited = self.compute_css(inherited,
- self.compute_css.INITIAL_STYLE)
- self.inherited = inherited
- compute_css = CSSResolver()
- def __call__(self, declarations_str):
- """Convert CSS declarations to ExcelWriter style
- Parameters
- ----------
- declarations_str : str
- List of CSS declarations.
- e.g. "font-weight: bold; background: blue"
- Returns
- -------
- xlstyle : dict
- A style as interpreted by ExcelWriter when found in
- ExcelCell.style.
- """
- # TODO: memoize?
- properties = self.compute_css(declarations_str, self.inherited)
- return self.build_xlstyle(properties)
- def build_xlstyle(self, props):
- out = {
- 'alignment': self.build_alignment(props),
- 'border': self.build_border(props),
- 'fill': self.build_fill(props),
- 'font': self.build_font(props),
- 'number_format': self.build_number_format(props),
- }
- # TODO: handle cell width and height: needs support in pandas.io.excel
- def remove_none(d):
- """Remove key where value is None, through nested dicts"""
- for k, v in list(d.items()):
- if v is None:
- del d[k]
- elif isinstance(v, dict):
- remove_none(v)
- if not v:
- del d[k]
- remove_none(out)
- return out
- VERTICAL_MAP = {
- 'top': 'top',
- 'text-top': 'top',
- 'middle': 'center',
- 'baseline': 'bottom',
- 'bottom': 'bottom',
- 'text-bottom': 'bottom',
- # OpenXML also has 'justify', 'distributed'
- }
- def build_alignment(self, props):
- # TODO: text-indent, padding-left -> alignment.indent
- return {'horizontal': props.get('text-align'),
- 'vertical': self.VERTICAL_MAP.get(props.get('vertical-align')),
- 'wrap_text': (None if props.get('white-space') is None else
- props['white-space'] not in
- ('nowrap', 'pre', 'pre-line'))
- }
- def build_border(self, props):
- return {side: {
- 'style': self._border_style(props.get('border-{side}-style'
- .format(side=side)),
- props.get('border-{side}-width'
- .format(side=side))),
- 'color': self.color_to_excel(
- props.get('border-{side}-color'.format(side=side))),
- } for side in ['top', 'right', 'bottom', 'left']}
- def _border_style(self, style, width):
- # convert styles and widths to openxml, one of:
- # 'dashDot'
- # 'dashDotDot'
- # 'dashed'
- # 'dotted'
- # 'double'
- # 'hair'
- # 'medium'
- # 'mediumDashDot'
- # 'mediumDashDotDot'
- # 'mediumDashed'
- # 'slantDashDot'
- # 'thick'
- # 'thin'
- if width is None and style is None:
- return None
- if style == 'none' or style == 'hidden':
- return None
- if width is None:
- width = '2pt'
- width = float(width[:-2])
- if width < 1e-5:
- return None
- elif width < 1.3:
- width_name = 'thin'
- elif width < 2.8:
- width_name = 'medium'
- else:
- width_name = 'thick'
- if style in (None, 'groove', 'ridge', 'inset', 'outset'):
- # not handled
- style = 'solid'
- if style == 'double':
- return 'double'
- if style == 'solid':
- return width_name
- if style == 'dotted':
- if width_name in ('hair', 'thin'):
- return 'dotted'
- return 'mediumDashDotDot'
- if style == 'dashed':
- if width_name in ('hair', 'thin'):
- return 'dashed'
- return 'mediumDashed'
- def build_fill(self, props):
- # TODO: perhaps allow for special properties
- # -excel-pattern-bgcolor and -excel-pattern-type
- fill_color = props.get('background-color')
- if fill_color not in (None, 'transparent', 'none'):
- return {
- 'fgColor': self.color_to_excel(fill_color),
- 'patternType': 'solid',
- }
- BOLD_MAP = {'bold': True, 'bolder': True, '600': True, '700': True,
- '800': True, '900': True,
- 'normal': False, 'lighter': False, '100': False, '200': False,
- '300': False, '400': False, '500': False}
- ITALIC_MAP = {'normal': False, 'italic': True, 'oblique': True}
- def build_font(self, props):
- size = props.get('font-size')
- if size is not None:
- assert size.endswith('pt')
- size = float(size[:-2])
- font_names_tmp = re.findall(r'''(?x)
- (
- "(?:[^"]|\\")+"
- |
- '(?:[^']|\\')+'
- |
- [^'",]+
- )(?=,|\s*$)
- ''', props.get('font-family', ''))
- font_names = []
- for name in font_names_tmp:
- if name[:1] == '"':
- name = name[1:-1].replace('\\"', '"')
- elif name[:1] == '\'':
- name = name[1:-1].replace('\\\'', '\'')
- else:
- name = name.strip()
- if name:
- font_names.append(name)
- family = None
- for name in font_names:
- if name == 'serif':
- family = 1 # roman
- break
- elif name == 'sans-serif':
- family = 2 # swiss
- break
- elif name == 'cursive':
- family = 4 # script
- break
- elif name == 'fantasy':
- family = 5 # decorative
- break
- decoration = props.get('text-decoration')
- if decoration is not None:
- decoration = decoration.split()
- else:
- decoration = ()
- return {
- 'name': font_names[0] if font_names else None,
- 'family': family,
- 'size': size,
- 'bold': self.BOLD_MAP.get(props.get('font-weight')),
- 'italic': self.ITALIC_MAP.get(props.get('font-style')),
- 'underline': ('single' if
- 'underline' in decoration
- else None),
- 'strike': ('line-through' in decoration) or None,
- 'color': self.color_to_excel(props.get('color')),
- # shadow if nonzero digit before shadow color
- 'shadow': (bool(re.search('^[^#(]*[1-9]',
- props['text-shadow']))
- if 'text-shadow' in props else None),
- # 'vertAlign':,
- # 'charset': ,
- # 'scheme': ,
- # 'outline': ,
- # 'condense': ,
- }
- NAMED_COLORS = {
- 'maroon': '800000',
- 'brown': 'A52A2A',
- 'red': 'FF0000',
- 'pink': 'FFC0CB',
- 'orange': 'FFA500',
- 'yellow': 'FFFF00',
- 'olive': '808000',
- 'green': '008000',
- 'purple': '800080',
- 'fuchsia': 'FF00FF',
- 'lime': '00FF00',
- 'teal': '008080',
- 'aqua': '00FFFF',
- 'blue': '0000FF',
- 'navy': '000080',
- 'black': '000000',
- 'gray': '808080',
- 'grey': '808080',
- 'silver': 'C0C0C0',
- 'white': 'FFFFFF',
- }
- def color_to_excel(self, val):
- if val is None:
- return None
- if val.startswith('#') and len(val) == 7:
- return val[1:].upper()
- if val.startswith('#') and len(val) == 4:
- return (val[1] * 2 + val[2] * 2 + val[3] * 2).upper()
- try:
- return self.NAMED_COLORS[val]
- except KeyError:
- warnings.warn('Unhandled color format: {val!r}'.format(val=val),
- CSSWarning)
- def build_number_format(self, props):
- return {'format_code': props.get('number-format')}
- class ExcelFormatter(object):
- """
- Class for formatting a DataFrame to a list of ExcelCells,
- Parameters
- ----------
- df : DataFrame or Styler
- na_rep: na representation
- float_format : string, default None
- Format string for floating point numbers
- cols : sequence, optional
- Columns to write
- header : boolean or list of string, default True
- Write out column names. If a list of string is given it is
- assumed to be aliases for the column names
- index : boolean, default True
- output row names (index)
- index_label : string or sequence, default None
- Column label for index column(s) if desired. If None is given, and
- `header` and `index` are True, then the index names are used. A
- sequence should be given if the DataFrame uses MultiIndex.
- merge_cells : boolean, default False
- Format MultiIndex and Hierarchical Rows as merged cells.
- inf_rep : string, default `'inf'`
- representation for np.inf values (which aren't representable in Excel)
- A `'-'` sign will be added in front of -inf.
- style_converter : callable, optional
- This translates Styler styles (CSS) into ExcelWriter styles.
- Defaults to ``CSSToExcelConverter()``.
- It should have signature css_declarations string -> excel style.
- This is only called for body cells.
- """
- def __init__(self, df, na_rep='', float_format=None, cols=None,
- header=True, index=True, index_label=None, merge_cells=False,
- inf_rep='inf', style_converter=None):
- self.rowcounter = 0
- self.na_rep = na_rep
- if hasattr(df, 'render'):
- self.styler = df
- df = df.data
- if style_converter is None:
- style_converter = CSSToExcelConverter()
- self.style_converter = style_converter
- else:
- self.styler = None
- self.df = df
- if cols is not None:
- # all missing, raise
- if not len(Index(cols) & df.columns):
- raise KeyError(
- "passes columns are not ALL present dataframe")
- # deprecatedin gh-17295
- # 1 missing is ok (for now)
- if len(Index(cols) & df.columns) != len(cols):
- warnings.warn(
- "Not all names specified in 'columns' are found; "
- "this will raise a KeyError in the future",
- FutureWarning)
- self.df = df.reindex(columns=cols)
- self.columns = self.df.columns
- self.float_format = float_format
- self.index = index
- self.index_label = index_label
- self.header = header
- self.merge_cells = merge_cells
- self.inf_rep = inf_rep
- @property
- def header_style(self):
- return {"font": {"bold": True},
- "borders": {"top": "thin",
- "right": "thin",
- "bottom": "thin",
- "left": "thin"},
- "alignment": {"horizontal": "center",
- "vertical": "top"}}
- def _format_value(self, val):
- if is_scalar(val) and missing.isna(val):
- val = self.na_rep
- elif is_float(val):
- if missing.isposinf_scalar(val):
- val = self.inf_rep
- elif missing.isneginf_scalar(val):
- val = '-{inf}'.format(inf=self.inf_rep)
- elif self.float_format is not None:
- val = float(self.float_format % val)
- return val
- def _format_header_mi(self):
- if self.columns.nlevels > 1:
- if not self.index:
- raise NotImplementedError("Writing to Excel with MultiIndex"
- " columns and no index "
- "('index'=False) is not yet "
- "implemented.")
- has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
- if not (has_aliases or self.header):
- return
- columns = self.columns
- level_strs = columns.format(sparsify=self.merge_cells, adjoin=False,
- names=False)
- level_lengths = get_level_lengths(level_strs)
- coloffset = 0
- lnum = 0
- if self.index and isinstance(self.df.index, ABCMultiIndex):
- coloffset = len(self.df.index[0]) - 1
- if self.merge_cells:
- # Format multi-index as a merged cells.
- for lnum in range(len(level_lengths)):
- name = columns.names[lnum]
- yield ExcelCell(lnum, coloffset, name, self.header_style)
- for lnum, (spans, levels, level_codes) in enumerate(zip(
- level_lengths, columns.levels, columns.codes)):
- values = levels.take(level_codes)
- for i in spans:
- if spans[i] > 1:
- yield ExcelCell(lnum, coloffset + i + 1, values[i],
- self.header_style, lnum,
- coloffset + i + spans[i])
- else:
- yield ExcelCell(lnum, coloffset + i + 1, values[i],
- self.header_style)
- else:
- # Format in legacy format with dots to indicate levels.
- for i, values in enumerate(zip(*level_strs)):
- v = ".".join(map(pprint_thing, values))
- yield ExcelCell(lnum, coloffset + i + 1, v, self.header_style)
- self.rowcounter = lnum
- def _format_header_regular(self):
- has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
- if has_aliases or self.header:
- coloffset = 0
- if self.index:
- coloffset = 1
- if isinstance(self.df.index, ABCMultiIndex):
- coloffset = len(self.df.index[0])
- colnames = self.columns
- if has_aliases:
- if len(self.header) != len(self.columns):
- raise ValueError('Writing {cols} cols but got {alias} '
- 'aliases'.format(cols=len(self.columns),
- alias=len(self.header)))
- else:
- colnames = self.header
- for colindex, colname in enumerate(colnames):
- yield ExcelCell(self.rowcounter, colindex + coloffset, colname,
- self.header_style)
- def _format_header(self):
- if isinstance(self.columns, ABCMultiIndex):
- gen = self._format_header_mi()
- else:
- gen = self._format_header_regular()
- gen2 = ()
- if self.df.index.names:
- row = [x if x is not None else ''
- for x in self.df.index.names] + [''] * len(self.columns)
- if reduce(lambda x, y: x and y, map(lambda x: x != '', row)):
- gen2 = (ExcelCell(self.rowcounter, colindex, val,
- self.header_style)
- for colindex, val in enumerate(row))
- self.rowcounter += 1
- return itertools.chain(gen, gen2)
- def _format_body(self):
- if isinstance(self.df.index, ABCMultiIndex):
- return self._format_hierarchical_rows()
- else:
- return self._format_regular_rows()
- def _format_regular_rows(self):
- has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
- if has_aliases or self.header:
- self.rowcounter += 1
- # output index and index_label?
- if self.index:
- # check aliases
- # if list only take first as this is not a MultiIndex
- if (self.index_label and
- isinstance(self.index_label, (list, tuple, np.ndarray,
- Index))):
- index_label = self.index_label[0]
- # if string good to go
- elif self.index_label and isinstance(self.index_label, str):
- index_label = self.index_label
- else:
- index_label = self.df.index.names[0]
- if isinstance(self.columns, ABCMultiIndex):
- self.rowcounter += 1
- if index_label and self.header is not False:
- yield ExcelCell(self.rowcounter - 1, 0, index_label,
- self.header_style)
- # write index_values
- index_values = self.df.index
- if isinstance(self.df.index, ABCPeriodIndex):
- index_values = self.df.index.to_timestamp()
- for idx, idxval in enumerate(index_values):
- yield ExcelCell(self.rowcounter + idx, 0, idxval,
- self.header_style)
- coloffset = 1
- else:
- coloffset = 0
- for cell in self._generate_body(coloffset):
- yield cell
- def _format_hierarchical_rows(self):
- has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
- if has_aliases or self.header:
- self.rowcounter += 1
- gcolidx = 0
- if self.index:
- index_labels = self.df.index.names
- # check for aliases
- if (self.index_label and
- isinstance(self.index_label, (list, tuple, np.ndarray,
- Index))):
- index_labels = self.index_label
- # MultiIndex columns require an extra row
- # with index names (blank if None) for
- # unambigous round-trip, unless not merging,
- # in which case the names all go on one row Issue #11328
- if isinstance(self.columns, ABCMultiIndex) and self.merge_cells:
- self.rowcounter += 1
- # if index labels are not empty go ahead and dump
- if com._any_not_none(*index_labels) and self.header is not False:
- for cidx, name in enumerate(index_labels):
- yield ExcelCell(self.rowcounter - 1, cidx, name,
- self.header_style)
- if self.merge_cells:
- # Format hierarchical rows as merged cells.
- level_strs = self.df.index.format(sparsify=True, adjoin=False,
- names=False)
- level_lengths = get_level_lengths(level_strs)
- for spans, levels, level_codes in zip(level_lengths,
- self.df.index.levels,
- self.df.index.codes):
- values = levels.take(level_codes,
- allow_fill=levels._can_hold_na,
- fill_value=True)
- for i in spans:
- if spans[i] > 1:
- yield ExcelCell(self.rowcounter + i, gcolidx,
- values[i], self.header_style,
- self.rowcounter + i + spans[i] - 1,
- gcolidx)
- else:
- yield ExcelCell(self.rowcounter + i, gcolidx,
- values[i], self.header_style)
- gcolidx += 1
- else:
- # Format hierarchical rows with non-merged values.
- for indexcolvals in zip(*self.df.index):
- for idx, indexcolval in enumerate(indexcolvals):
- yield ExcelCell(self.rowcounter + idx, gcolidx,
- indexcolval, self.header_style)
- gcolidx += 1
- for cell in self._generate_body(gcolidx):
- yield cell
- def _generate_body(self, coloffset):
- if self.styler is None:
- styles = None
- else:
- styles = self.styler._compute().ctx
- if not styles:
- styles = None
- xlstyle = None
- # Write the body of the frame data series by series.
- for colidx in range(len(self.columns)):
- series = self.df.iloc[:, colidx]
- for i, val in enumerate(series):
- if styles is not None:
- xlstyle = self.style_converter(';'.join(styles[i, colidx]))
- yield ExcelCell(self.rowcounter + i, colidx + coloffset, val,
- xlstyle)
- def get_formatted_cells(self):
- for cell in itertools.chain(self._format_header(),
- self._format_body()):
- cell.val = self._format_value(cell.val)
- yield cell
- def write(self, writer, sheet_name='Sheet1', startrow=0,
- startcol=0, freeze_panes=None, engine=None):
- """
- writer : string or ExcelWriter object
- File path or existing ExcelWriter
- sheet_name : string, default 'Sheet1'
- Name of sheet which will contain DataFrame
- startrow :
- upper left cell row to dump data frame
- startcol :
- upper left cell column to dump data frame
- freeze_panes : tuple of integer (length 2), default None
- Specifies the one-based bottommost row and rightmost column that
- is to be frozen
- engine : string, default None
- write engine to use if writer is a path - you can also set this
- via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``,
- and ``io.excel.xlsm.writer``.
- """
- from pandas.io.excel import ExcelWriter
- from pandas.io.common import _stringify_path
- if isinstance(writer, ExcelWriter):
- need_save = False
- else:
- writer = ExcelWriter(_stringify_path(writer), engine=engine)
- need_save = True
- formatted_cells = self.get_formatted_cells()
- writer.write_cells(formatted_cells, sheet_name,
- startrow=startrow, startcol=startcol,
- freeze_panes=freeze_panes)
- if need_save:
- writer.save()
|