align.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. """Core eval alignment algorithms
  2. """
  3. from functools import partial, wraps
  4. import warnings
  5. import numpy as np
  6. from pandas.compat import range, zip
  7. from pandas.errors import PerformanceWarning
  8. import pandas as pd
  9. from pandas import compat
  10. import pandas.core.common as com
  11. from pandas.core.computation.common import _result_type_many
  12. def _align_core_single_unary_op(term):
  13. if isinstance(term.value, np.ndarray):
  14. typ = partial(np.asanyarray, dtype=term.value.dtype)
  15. else:
  16. typ = type(term.value)
  17. ret = typ,
  18. if not hasattr(term.value, 'axes'):
  19. ret += None,
  20. else:
  21. ret += _zip_axes_from_type(typ, term.value.axes),
  22. return ret
  23. def _zip_axes_from_type(typ, new_axes):
  24. axes = {ax_name: new_axes[ax_ind]
  25. for ax_ind, ax_name in compat.iteritems(typ._AXIS_NAMES)}
  26. return axes
  27. def _any_pandas_objects(terms):
  28. """Check a sequence of terms for instances of PandasObject."""
  29. return any(isinstance(term.value, pd.core.generic.PandasObject)
  30. for term in terms)
  31. def _filter_special_cases(f):
  32. @wraps(f)
  33. def wrapper(terms):
  34. # single unary operand
  35. if len(terms) == 1:
  36. return _align_core_single_unary_op(terms[0])
  37. term_values = (term.value for term in terms)
  38. # we don't have any pandas objects
  39. if not _any_pandas_objects(terms):
  40. return _result_type_many(*term_values), None
  41. return f(terms)
  42. return wrapper
  43. @_filter_special_cases
  44. def _align_core(terms):
  45. term_index = [i for i, term in enumerate(terms)
  46. if hasattr(term.value, 'axes')]
  47. term_dims = [terms[i].value.ndim for i in term_index]
  48. ndims = pd.Series(dict(zip(term_index, term_dims)))
  49. # initial axes are the axes of the largest-axis'd term
  50. biggest = terms[ndims.idxmax()].value
  51. typ = biggest._constructor
  52. axes = biggest.axes
  53. naxes = len(axes)
  54. gt_than_one_axis = naxes > 1
  55. for value in (terms[i].value for i in term_index):
  56. is_series = isinstance(value, pd.Series)
  57. is_series_and_gt_one_axis = is_series and gt_than_one_axis
  58. for axis, items in enumerate(value.axes):
  59. if is_series_and_gt_one_axis:
  60. ax, itm = naxes - 1, value.index
  61. else:
  62. ax, itm = axis, items
  63. if not axes[ax].is_(itm):
  64. axes[ax] = axes[ax].join(itm, how='outer')
  65. for i, ndim in compat.iteritems(ndims):
  66. for axis, items in zip(range(ndim), axes):
  67. ti = terms[i].value
  68. if hasattr(ti, 'reindex'):
  69. transpose = isinstance(ti, pd.Series) and naxes > 1
  70. reindexer = axes[naxes - 1] if transpose else items
  71. term_axis_size = len(ti.axes[axis])
  72. reindexer_size = len(reindexer)
  73. ordm = np.log10(max(1, abs(reindexer_size - term_axis_size)))
  74. if ordm >= 1 and reindexer_size >= 10000:
  75. w = ('Alignment difference on axis {axis} is larger '
  76. 'than an order of magnitude on term {term!r}, by '
  77. 'more than {ordm:.4g}; performance may suffer'
  78. ).format(axis=axis, term=terms[i].name, ordm=ordm)
  79. warnings.warn(w, category=PerformanceWarning, stacklevel=6)
  80. f = partial(ti.reindex, reindexer, axis=axis, copy=False)
  81. terms[i].update(f())
  82. terms[i].update(terms[i].value.values)
  83. return typ, _zip_axes_from_type(typ, axes)
  84. def _align(terms):
  85. """Align a set of terms"""
  86. try:
  87. # flatten the parse tree (a nested list, really)
  88. terms = list(com.flatten(terms))
  89. except TypeError:
  90. # can't iterate so it must just be a constant or single variable
  91. if isinstance(terms.value, pd.core.generic.NDFrame):
  92. typ = type(terms.value)
  93. return typ, _zip_axes_from_type(typ, terms.value.axes)
  94. return np.result_type(terms.type), None
  95. # if all resolved variables are numeric scalars
  96. if all(term.is_scalar for term in terms):
  97. return _result_type_many(*(term.value for term in terms)).type, None
  98. # perform the main alignment
  99. typ, axes = _align_core(terms)
  100. return typ, axes
  101. def _reconstruct_object(typ, obj, axes, dtype):
  102. """Reconstruct an object given its type, raw value, and possibly empty
  103. (None) axes.
  104. Parameters
  105. ----------
  106. typ : object
  107. A type
  108. obj : object
  109. The value to use in the type constructor
  110. axes : dict
  111. The axes to use to construct the resulting pandas object
  112. Returns
  113. -------
  114. ret : typ
  115. An object of type ``typ`` with the value `obj` and possible axes
  116. `axes`.
  117. """
  118. try:
  119. typ = typ.type
  120. except AttributeError:
  121. pass
  122. res_t = np.result_type(obj.dtype, dtype)
  123. if (not isinstance(typ, partial) and
  124. issubclass(typ, pd.core.generic.PandasObject)):
  125. return typ(obj, dtype=res_t, **axes)
  126. # special case for pathological things like ~True/~False
  127. if hasattr(res_t, 'type') and typ == np.bool_ and res_t != np.bool_:
  128. ret_value = res_t.type(obj)
  129. else:
  130. ret_value = typ(obj).astype(res_t)
  131. # The condition is to distinguish 0-dim array (returned in case of
  132. # scalar) and 1 element array
  133. # e.g. np.array(0) and np.array([0])
  134. if len(obj.shape) == 1 and len(obj) == 1:
  135. if not isinstance(ret_value, np.ndarray):
  136. ret_value = np.array([ret_value]).astype(res_t)
  137. return ret_value