dataframe.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. from __future__ import absolute_import
  2. # Copyright (c) 2010-2019 openpyxl
  3. import operator
  4. from openpyxl.compat import accumulate
  5. def dataframe_to_rows(df, index=True, header=True):
  6. """
  7. Convert a Pandas dataframe into something suitable for passing into a worksheet.
  8. If index is True then the index will be included, starting one row below the header.
  9. If header is True then column headers will be included starting one column to the right.
  10. Formatting should be done by client code.
  11. """
  12. import numpy
  13. from pandas import Timestamp
  14. blocks = df._data.blocks
  15. ncols = sum(b.shape[0] for b in blocks)
  16. data = [None] * ncols
  17. for b in blocks:
  18. values = b.values
  19. if b.dtype.type == numpy.datetime64:
  20. values = numpy.array([Timestamp(v) for v in values.ravel()])
  21. values = values.reshape(b.shape)
  22. result = values.tolist()
  23. for col_loc, col in zip(b.mgr_locs, result):
  24. data[col_loc] = col
  25. if header:
  26. if df.columns.nlevels > 1:
  27. rows = expand_levels(df.columns.levels, df.columns.labels)
  28. else:
  29. rows = [list(df.columns.values)]
  30. for row in rows:
  31. n = []
  32. for v in row:
  33. if isinstance(v, numpy.datetime64):
  34. v = Timestamp(v)
  35. n.append(v)
  36. row = n
  37. if index:
  38. row = [None]*df.index.nlevels + row
  39. yield row
  40. if index:
  41. yield df.index.names
  42. for idx, v in enumerate(df.index):
  43. row = [data[j][idx] for j in range(ncols)]
  44. if index:
  45. row = [v] + row
  46. yield row
  47. def expand_levels(levels, labels):
  48. """
  49. Multiindexes need expanding so that subtitles repeat
  50. """
  51. for label, order in zip(levels, labels):
  52. current = None
  53. row = []
  54. for idx in order:
  55. if current == idx:
  56. row.append(None)
  57. else:
  58. row.append(label[idx])
  59. current = idx
  60. yield row