ops.py 72 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309
  1. """
  2. Arithmetic operations for PandasObjects
  3. This is not a public API.
  4. """
  5. # necessary to enforce truediv in Python 2.X
  6. from __future__ import division
  7. import datetime
  8. import operator
  9. import textwrap
  10. import warnings
  11. import numpy as np
  12. from pandas._libs import algos as libalgos, lib, ops as libops
  13. import pandas.compat as compat
  14. from pandas.compat import bind_method
  15. from pandas.errors import NullFrequencyError
  16. from pandas.util._decorators import Appender
  17. from pandas.core.dtypes.cast import (
  18. construct_1d_object_array_from_listlike, find_common_type,
  19. maybe_upcast_putmask)
  20. from pandas.core.dtypes.common import (
  21. ensure_object, is_bool_dtype, is_categorical_dtype, is_datetime64_dtype,
  22. is_datetime64tz_dtype, is_datetimelike_v_numeric, is_extension_array_dtype,
  23. is_integer_dtype, is_list_like, is_object_dtype, is_period_dtype,
  24. is_scalar, is_timedelta64_dtype, needs_i8_conversion)
  25. from pandas.core.dtypes.generic import (
  26. ABCDataFrame, ABCIndex, ABCIndexClass, ABCPanel, ABCSeries, ABCSparseArray,
  27. ABCSparseSeries)
  28. from pandas.core.dtypes.missing import isna, notna
  29. import pandas as pd
  30. import pandas.core.common as com
  31. import pandas.core.missing as missing
  32. # -----------------------------------------------------------------------------
  33. # Ops Wrapping Utilities
  34. def get_op_result_name(left, right):
  35. """
  36. Find the appropriate name to pin to an operation result. This result
  37. should always be either an Index or a Series.
  38. Parameters
  39. ----------
  40. left : {Series, Index}
  41. right : object
  42. Returns
  43. -------
  44. name : object
  45. Usually a string
  46. """
  47. # `left` is always a pd.Series when called from within ops
  48. if isinstance(right, (ABCSeries, pd.Index)):
  49. name = _maybe_match_name(left, right)
  50. else:
  51. name = left.name
  52. return name
  53. def _maybe_match_name(a, b):
  54. """
  55. Try to find a name to attach to the result of an operation between
  56. a and b. If only one of these has a `name` attribute, return that
  57. name. Otherwise return a consensus name if they match of None if
  58. they have different names.
  59. Parameters
  60. ----------
  61. a : object
  62. b : object
  63. Returns
  64. -------
  65. name : str or None
  66. See Also
  67. --------
  68. pandas.core.common.consensus_name_attr
  69. """
  70. a_has = hasattr(a, 'name')
  71. b_has = hasattr(b, 'name')
  72. if a_has and b_has:
  73. if a.name == b.name:
  74. return a.name
  75. else:
  76. # TODO: what if they both have np.nan for their names?
  77. return None
  78. elif a_has:
  79. return a.name
  80. elif b_has:
  81. return b.name
  82. return None
  83. def maybe_upcast_for_op(obj):
  84. """
  85. Cast non-pandas objects to pandas types to unify behavior of arithmetic
  86. and comparison operations.
  87. Parameters
  88. ----------
  89. obj: object
  90. Returns
  91. -------
  92. out : object
  93. Notes
  94. -----
  95. Be careful to call this *after* determining the `name` attribute to be
  96. attached to the result of the arithmetic operation.
  97. """
  98. if type(obj) is datetime.timedelta:
  99. # GH#22390 cast up to Timedelta to rely on Timedelta
  100. # implementation; otherwise operation against numeric-dtype
  101. # raises TypeError
  102. return pd.Timedelta(obj)
  103. elif isinstance(obj, np.timedelta64) and not isna(obj):
  104. # In particular non-nanosecond timedelta64 needs to be cast to
  105. # nanoseconds, or else we get undesired behavior like
  106. # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
  107. # The isna check is to avoid casting timedelta64("NaT"), which would
  108. # return NaT and incorrectly be treated as a datetime-NaT.
  109. return pd.Timedelta(obj)
  110. elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj):
  111. # GH#22390 Unfortunately we need to special-case right-hand
  112. # timedelta64 dtypes because numpy casts integer dtypes to
  113. # timedelta64 when operating with timedelta64
  114. return pd.TimedeltaIndex(obj)
  115. return obj
  116. # -----------------------------------------------------------------------------
  117. # Reversed Operations not available in the stdlib operator module.
  118. # Defining these instead of using lambdas allows us to reference them by name.
  119. def radd(left, right):
  120. return right + left
  121. def rsub(left, right):
  122. return right - left
  123. def rmul(left, right):
  124. return right * left
  125. def rdiv(left, right):
  126. return right / left
  127. def rtruediv(left, right):
  128. return right / left
  129. def rfloordiv(left, right):
  130. return right // left
  131. def rmod(left, right):
  132. # check if right is a string as % is the string
  133. # formatting operation; this is a TypeError
  134. # otherwise perform the op
  135. if isinstance(right, compat.string_types):
  136. raise TypeError("{typ} cannot perform the operation mod".format(
  137. typ=type(left).__name__))
  138. return right % left
  139. def rdivmod(left, right):
  140. return divmod(right, left)
  141. def rpow(left, right):
  142. return right ** left
  143. def rand_(left, right):
  144. return operator.and_(right, left)
  145. def ror_(left, right):
  146. return operator.or_(right, left)
  147. def rxor(left, right):
  148. return operator.xor(right, left)
  149. # -----------------------------------------------------------------------------
  150. def make_invalid_op(name):
  151. """
  152. Return a binary method that always raises a TypeError.
  153. Parameters
  154. ----------
  155. name : str
  156. Returns
  157. -------
  158. invalid_op : function
  159. """
  160. def invalid_op(self, other=None):
  161. raise TypeError("cannot perform {name} with this index type: "
  162. "{typ}".format(name=name, typ=type(self).__name__))
  163. invalid_op.__name__ = name
  164. return invalid_op
  165. def _gen_eval_kwargs(name):
  166. """
  167. Find the keyword arguments to pass to numexpr for the given operation.
  168. Parameters
  169. ----------
  170. name : str
  171. Returns
  172. -------
  173. eval_kwargs : dict
  174. Examples
  175. --------
  176. >>> _gen_eval_kwargs("__add__")
  177. {}
  178. >>> _gen_eval_kwargs("rtruediv")
  179. {'reversed': True, 'truediv': True}
  180. """
  181. kwargs = {}
  182. # Series and Panel appear to only pass __add__, __radd__, ...
  183. # but DataFrame gets both these dunder names _and_ non-dunder names
  184. # add, radd, ...
  185. name = name.replace('__', '')
  186. if name.startswith('r'):
  187. if name not in ['radd', 'rand', 'ror', 'rxor']:
  188. # Exclude commutative operations
  189. kwargs['reversed'] = True
  190. if name in ['truediv', 'rtruediv']:
  191. kwargs['truediv'] = True
  192. if name in ['ne']:
  193. kwargs['masker'] = True
  194. return kwargs
  195. def _gen_fill_zeros(name):
  196. """
  197. Find the appropriate fill value to use when filling in undefined values
  198. in the results of the given operation caused by operating on
  199. (generally dividing by) zero.
  200. Parameters
  201. ----------
  202. name : str
  203. Returns
  204. -------
  205. fill_value : {None, np.nan, np.inf}
  206. """
  207. name = name.strip('__')
  208. if 'div' in name:
  209. # truediv, floordiv, div, and reversed variants
  210. fill_value = np.inf
  211. elif 'mod' in name:
  212. # mod, rmod
  213. fill_value = np.nan
  214. else:
  215. fill_value = None
  216. return fill_value
  217. def _get_frame_op_default_axis(name):
  218. """
  219. Only DataFrame cares about default_axis, specifically:
  220. special methods have default_axis=None and flex methods
  221. have default_axis='columns'.
  222. Parameters
  223. ----------
  224. name : str
  225. Returns
  226. -------
  227. default_axis: str or None
  228. """
  229. if name.replace('__r', '__') in ['__and__', '__or__', '__xor__']:
  230. # bool methods
  231. return 'columns'
  232. elif name.startswith('__'):
  233. # __add__, __mul__, ...
  234. return None
  235. else:
  236. # add, mul, ...
  237. return 'columns'
  238. def _get_opstr(op, cls):
  239. """
  240. Find the operation string, if any, to pass to numexpr for this
  241. operation.
  242. Parameters
  243. ----------
  244. op : binary operator
  245. cls : class
  246. Returns
  247. -------
  248. op_str : string or None
  249. """
  250. # numexpr is available for non-sparse classes
  251. subtyp = getattr(cls, '_subtyp', '')
  252. use_numexpr = 'sparse' not in subtyp
  253. if not use_numexpr:
  254. # if we're not using numexpr, then don't pass a str_rep
  255. return None
  256. return {operator.add: '+',
  257. radd: '+',
  258. operator.mul: '*',
  259. rmul: '*',
  260. operator.sub: '-',
  261. rsub: '-',
  262. operator.truediv: '/',
  263. rtruediv: '/',
  264. operator.floordiv: '//',
  265. rfloordiv: '//',
  266. operator.mod: None, # TODO: Why None for mod but '%' for rmod?
  267. rmod: '%',
  268. operator.pow: '**',
  269. rpow: '**',
  270. operator.eq: '==',
  271. operator.ne: '!=',
  272. operator.le: '<=',
  273. operator.lt: '<',
  274. operator.ge: '>=',
  275. operator.gt: '>',
  276. operator.and_: '&',
  277. rand_: '&',
  278. operator.or_: '|',
  279. ror_: '|',
  280. operator.xor: '^',
  281. rxor: '^',
  282. divmod: None,
  283. rdivmod: None}[op]
  284. def _get_op_name(op, special):
  285. """
  286. Find the name to attach to this method according to conventions
  287. for special and non-special methods.
  288. Parameters
  289. ----------
  290. op : binary operator
  291. special : bool
  292. Returns
  293. -------
  294. op_name : str
  295. """
  296. opname = op.__name__.strip('_')
  297. if special:
  298. opname = '__{opname}__'.format(opname=opname)
  299. return opname
  300. # -----------------------------------------------------------------------------
  301. # Docstring Generation and Templates
  302. _op_descriptions = {
  303. # Arithmetic Operators
  304. 'add': {'op': '+',
  305. 'desc': 'Addition',
  306. 'reverse': 'radd'},
  307. 'sub': {'op': '-',
  308. 'desc': 'Subtraction',
  309. 'reverse': 'rsub'},
  310. 'mul': {'op': '*',
  311. 'desc': 'Multiplication',
  312. 'reverse': 'rmul',
  313. 'df_examples': None},
  314. 'mod': {'op': '%',
  315. 'desc': 'Modulo',
  316. 'reverse': 'rmod'},
  317. 'pow': {'op': '**',
  318. 'desc': 'Exponential power',
  319. 'reverse': 'rpow',
  320. 'df_examples': None},
  321. 'truediv': {'op': '/',
  322. 'desc': 'Floating division',
  323. 'reverse': 'rtruediv',
  324. 'df_examples': None},
  325. 'floordiv': {'op': '//',
  326. 'desc': 'Integer division',
  327. 'reverse': 'rfloordiv',
  328. 'df_examples': None},
  329. 'divmod': {'op': 'divmod',
  330. 'desc': 'Integer division and modulo',
  331. 'reverse': 'rdivmod',
  332. 'df_examples': None},
  333. # Comparison Operators
  334. 'eq': {'op': '==',
  335. 'desc': 'Equal to',
  336. 'reverse': None},
  337. 'ne': {'op': '!=',
  338. 'desc': 'Not equal to',
  339. 'reverse': None},
  340. 'lt': {'op': '<',
  341. 'desc': 'Less than',
  342. 'reverse': None},
  343. 'le': {'op': '<=',
  344. 'desc': 'Less than or equal to',
  345. 'reverse': None},
  346. 'gt': {'op': '>',
  347. 'desc': 'Greater than',
  348. 'reverse': None},
  349. 'ge': {'op': '>=',
  350. 'desc': 'Greater than or equal to',
  351. 'reverse': None}
  352. }
  353. _op_names = list(_op_descriptions.keys())
  354. for key in _op_names:
  355. _op_descriptions[key]['reversed'] = False
  356. reverse_op = _op_descriptions[key]['reverse']
  357. if reverse_op is not None:
  358. _op_descriptions[reverse_op] = _op_descriptions[key].copy()
  359. _op_descriptions[reverse_op]['reversed'] = True
  360. _op_descriptions[reverse_op]['reverse'] = key
  361. _flex_doc_SERIES = """
  362. {desc} of series and other, element-wise (binary operator `{op_name}`).
  363. Equivalent to ``{equiv}``, but with support to substitute a fill_value for
  364. missing data in one of the inputs.
  365. Parameters
  366. ----------
  367. other : Series or scalar value
  368. fill_value : None or float value, default None (NaN)
  369. Fill existing missing (NaN) values, and any new element needed for
  370. successful Series alignment, with this value before computation.
  371. If data in both corresponding Series locations is missing
  372. the result will be missing
  373. level : int or name
  374. Broadcast across a level, matching Index values on the
  375. passed MultiIndex level
  376. Returns
  377. -------
  378. result : Series
  379. See Also
  380. --------
  381. Series.{reverse}
  382. Examples
  383. --------
  384. >>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd'])
  385. >>> a
  386. a 1.0
  387. b 1.0
  388. c 1.0
  389. d NaN
  390. dtype: float64
  391. >>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e'])
  392. >>> b
  393. a 1.0
  394. b NaN
  395. d 1.0
  396. e NaN
  397. dtype: float64
  398. >>> a.add(b, fill_value=0)
  399. a 2.0
  400. b 1.0
  401. c 1.0
  402. d 1.0
  403. e NaN
  404. dtype: float64
  405. """
  406. _arith_doc_FRAME = """
  407. Binary operator %s with support to substitute a fill_value for missing data in
  408. one of the inputs
  409. Parameters
  410. ----------
  411. other : Series, DataFrame, or constant
  412. axis : {0, 1, 'index', 'columns'}
  413. For Series input, axis to match Series index on
  414. fill_value : None or float value, default None
  415. Fill existing missing (NaN) values, and any new element needed for
  416. successful DataFrame alignment, with this value before computation.
  417. If data in both corresponding DataFrame locations is missing
  418. the result will be missing
  419. level : int or name
  420. Broadcast across a level, matching Index values on the
  421. passed MultiIndex level
  422. Returns
  423. -------
  424. result : DataFrame
  425. Notes
  426. -----
  427. Mismatched indices will be unioned together
  428. """
  429. _flex_doc_FRAME = """
  430. {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
  431. Equivalent to ``{equiv}``, but with support to substitute a fill_value
  432. for missing data in one of the inputs. With reverse version, `{reverse}`.
  433. Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to
  434. arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`.
  435. Parameters
  436. ----------
  437. other : scalar, sequence, Series, or DataFrame
  438. Any single or multiple element data structure, or list-like object.
  439. axis : {{0 or 'index', 1 or 'columns'}}
  440. Whether to compare by the index (0 or 'index') or columns
  441. (1 or 'columns'). For Series input, axis to match Series index on.
  442. level : int or label
  443. Broadcast across a level, matching Index values on the
  444. passed MultiIndex level.
  445. fill_value : float or None, default None
  446. Fill existing missing (NaN) values, and any new element needed for
  447. successful DataFrame alignment, with this value before computation.
  448. If data in both corresponding DataFrame locations is missing
  449. the result will be missing.
  450. Returns
  451. -------
  452. DataFrame
  453. Result of the arithmetic operation.
  454. See Also
  455. --------
  456. DataFrame.add : Add DataFrames.
  457. DataFrame.sub : Subtract DataFrames.
  458. DataFrame.mul : Multiply DataFrames.
  459. DataFrame.div : Divide DataFrames (float division).
  460. DataFrame.truediv : Divide DataFrames (float division).
  461. DataFrame.floordiv : Divide DataFrames (integer division).
  462. DataFrame.mod : Calculate modulo (remainder after division).
  463. DataFrame.pow : Calculate exponential power.
  464. Notes
  465. -----
  466. Mismatched indices will be unioned together.
  467. Examples
  468. --------
  469. >>> df = pd.DataFrame({{'angles': [0, 3, 4],
  470. ... 'degrees': [360, 180, 360]}},
  471. ... index=['circle', 'triangle', 'rectangle'])
  472. >>> df
  473. angles degrees
  474. circle 0 360
  475. triangle 3 180
  476. rectangle 4 360
  477. Add a scalar with operator version which return the same
  478. results.
  479. >>> df + 1
  480. angles degrees
  481. circle 1 361
  482. triangle 4 181
  483. rectangle 5 361
  484. >>> df.add(1)
  485. angles degrees
  486. circle 1 361
  487. triangle 4 181
  488. rectangle 5 361
  489. Divide by constant with reverse version.
  490. >>> df.div(10)
  491. angles degrees
  492. circle 0.0 36.0
  493. triangle 0.3 18.0
  494. rectangle 0.4 36.0
  495. >>> df.rdiv(10)
  496. angles degrees
  497. circle inf 0.027778
  498. triangle 3.333333 0.055556
  499. rectangle 2.500000 0.027778
  500. Subtract a list and Series by axis with operator version.
  501. >>> df - [1, 2]
  502. angles degrees
  503. circle -1 358
  504. triangle 2 178
  505. rectangle 3 358
  506. >>> df.sub([1, 2], axis='columns')
  507. angles degrees
  508. circle -1 358
  509. triangle 2 178
  510. rectangle 3 358
  511. >>> df.sub(pd.Series([1, 1, 1], index=['circle', 'triangle', 'rectangle']),
  512. ... axis='index')
  513. angles degrees
  514. circle -1 359
  515. triangle 2 179
  516. rectangle 3 359
  517. Multiply a DataFrame of different shape with operator version.
  518. >>> other = pd.DataFrame({{'angles': [0, 3, 4]}},
  519. ... index=['circle', 'triangle', 'rectangle'])
  520. >>> other
  521. angles
  522. circle 0
  523. triangle 3
  524. rectangle 4
  525. >>> df * other
  526. angles degrees
  527. circle 0 NaN
  528. triangle 9 NaN
  529. rectangle 16 NaN
  530. >>> df.mul(other, fill_value=0)
  531. angles degrees
  532. circle 0 0.0
  533. triangle 9 0.0
  534. rectangle 16 0.0
  535. Divide by a MultiIndex by level.
  536. >>> df_multindex = pd.DataFrame({{'angles': [0, 3, 4, 4, 5, 6],
  537. ... 'degrees': [360, 180, 360, 360, 540, 720]}},
  538. ... index=[['A', 'A', 'A', 'B', 'B', 'B'],
  539. ... ['circle', 'triangle', 'rectangle',
  540. ... 'square', 'pentagon', 'hexagon']])
  541. >>> df_multindex
  542. angles degrees
  543. A circle 0 360
  544. triangle 3 180
  545. rectangle 4 360
  546. B square 4 360
  547. pentagon 5 540
  548. hexagon 6 720
  549. >>> df.div(df_multindex, level=1, fill_value=0)
  550. angles degrees
  551. A circle NaN 1.0
  552. triangle 1.0 1.0
  553. rectangle 1.0 1.0
  554. B square 0.0 0.0
  555. pentagon 0.0 0.0
  556. hexagon 0.0 0.0
  557. """
  558. _flex_comp_doc_FRAME = """
  559. {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
  560. Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison
  561. operators.
  562. Equivalent to `==`, `=!`, `<=`, `<`, `>=`, `>` with support to choose axis
  563. (rows or columns) and level for comparison.
  564. Parameters
  565. ----------
  566. other : scalar, sequence, Series, or DataFrame
  567. Any single or multiple element data structure, or list-like object.
  568. axis : {{0 or 'index', 1 or 'columns'}}, default 'columns'
  569. Whether to compare by the index (0 or 'index') or columns
  570. (1 or 'columns').
  571. level : int or label
  572. Broadcast across a level, matching Index values on the passed
  573. MultiIndex level.
  574. Returns
  575. -------
  576. DataFrame of bool
  577. Result of the comparison.
  578. See Also
  579. --------
  580. DataFrame.eq : Compare DataFrames for equality elementwise.
  581. DataFrame.ne : Compare DataFrames for inequality elementwise.
  582. DataFrame.le : Compare DataFrames for less than inequality
  583. or equality elementwise.
  584. DataFrame.lt : Compare DataFrames for strictly less than
  585. inequality elementwise.
  586. DataFrame.ge : Compare DataFrames for greater than inequality
  587. or equality elementwise.
  588. DataFrame.gt : Compare DataFrames for strictly greater than
  589. inequality elementwise.
  590. Notes
  591. --------
  592. Mismatched indices will be unioned together.
  593. `NaN` values are considered different (i.e. `NaN` != `NaN`).
  594. Examples
  595. --------
  596. >>> df = pd.DataFrame({{'cost': [250, 150, 100],
  597. ... 'revenue': [100, 250, 300]}},
  598. ... index=['A', 'B', 'C'])
  599. >>> df
  600. cost revenue
  601. A 250 100
  602. B 150 250
  603. C 100 300
  604. Comparison with a scalar, using either the operator or method:
  605. >>> df == 100
  606. cost revenue
  607. A False True
  608. B False False
  609. C True False
  610. >>> df.eq(100)
  611. cost revenue
  612. A False True
  613. B False False
  614. C True False
  615. When `other` is a :class:`Series`, the columns of a DataFrame are aligned
  616. with the index of `other` and broadcast:
  617. >>> df != pd.Series([100, 250], index=["cost", "revenue"])
  618. cost revenue
  619. A True True
  620. B True False
  621. C False True
  622. Use the method to control the broadcast axis:
  623. >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis='index')
  624. cost revenue
  625. A True False
  626. B True True
  627. C True True
  628. D True True
  629. When comparing to an arbitrary sequence, the number of columns must
  630. match the number elements in `other`:
  631. >>> df == [250, 100]
  632. cost revenue
  633. A True True
  634. B False False
  635. C False False
  636. Use the method to control the axis:
  637. >>> df.eq([250, 250, 100], axis='index')
  638. cost revenue
  639. A True False
  640. B False True
  641. C True False
  642. Compare to a DataFrame of different shape.
  643. >>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}},
  644. ... index=['A', 'B', 'C', 'D'])
  645. >>> other
  646. revenue
  647. A 300
  648. B 250
  649. C 100
  650. D 150
  651. >>> df.gt(other)
  652. cost revenue
  653. A False False
  654. B False False
  655. C False True
  656. D False False
  657. Compare to a MultiIndex by level.
  658. >>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
  659. ... 'revenue': [100, 250, 300, 200, 175, 225]}},
  660. ... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
  661. ... ['A', 'B', 'C', 'A', 'B', 'C']])
  662. >>> df_multindex
  663. cost revenue
  664. Q1 A 250 100
  665. B 150 250
  666. C 100 300
  667. Q2 A 150 200
  668. B 300 175
  669. C 220 225
  670. >>> df.le(df_multindex, level=1)
  671. cost revenue
  672. Q1 A True True
  673. B True True
  674. C True True
  675. Q2 A False True
  676. B True False
  677. C True False
  678. """
  679. _flex_doc_PANEL = """
  680. {desc} of series and other, element-wise (binary operator `{op_name}`).
  681. Equivalent to ``{equiv}``.
  682. Parameters
  683. ----------
  684. other : DataFrame or Panel
  685. axis : {{items, major_axis, minor_axis}}
  686. Axis to broadcast over
  687. Returns
  688. -------
  689. Panel
  690. See Also
  691. --------
  692. Panel.{reverse}
  693. """
  694. _agg_doc_PANEL = """
  695. Wrapper method for {op_name}
  696. Parameters
  697. ----------
  698. other : DataFrame or Panel
  699. axis : {{items, major_axis, minor_axis}}
  700. Axis to broadcast over
  701. Returns
  702. -------
  703. Panel
  704. """
  705. def _make_flex_doc(op_name, typ):
  706. """
  707. Make the appropriate substitutions for the given operation and class-typ
  708. into either _flex_doc_SERIES or _flex_doc_FRAME to return the docstring
  709. to attach to a generated method.
  710. Parameters
  711. ----------
  712. op_name : str {'__add__', '__sub__', ... '__eq__', '__ne__', ...}
  713. typ : str {series, 'dataframe']}
  714. Returns
  715. -------
  716. doc : str
  717. """
  718. op_name = op_name.replace('__', '')
  719. op_desc = _op_descriptions[op_name]
  720. if op_desc['reversed']:
  721. equiv = 'other ' + op_desc['op'] + ' ' + typ
  722. else:
  723. equiv = typ + ' ' + op_desc['op'] + ' other'
  724. if typ == 'series':
  725. base_doc = _flex_doc_SERIES
  726. doc = base_doc.format(desc=op_desc['desc'], op_name=op_name,
  727. equiv=equiv, reverse=op_desc['reverse'])
  728. elif typ == 'dataframe':
  729. base_doc = _flex_doc_FRAME
  730. doc = base_doc.format(desc=op_desc['desc'], op_name=op_name,
  731. equiv=equiv, reverse=op_desc['reverse'])
  732. elif typ == 'panel':
  733. base_doc = _flex_doc_PANEL
  734. doc = base_doc.format(desc=op_desc['desc'], op_name=op_name,
  735. equiv=equiv, reverse=op_desc['reverse'])
  736. else:
  737. raise AssertionError('Invalid typ argument.')
  738. return doc
  739. # -----------------------------------------------------------------------------
  740. # Masking NA values and fallbacks for operations numpy does not support
  741. def fill_binop(left, right, fill_value):
  742. """
  743. If a non-None fill_value is given, replace null entries in left and right
  744. with this value, but only in positions where _one_ of left/right is null,
  745. not both.
  746. Parameters
  747. ----------
  748. left : array-like
  749. right : array-like
  750. fill_value : object
  751. Returns
  752. -------
  753. left : array-like
  754. right : array-like
  755. Notes
  756. -----
  757. Makes copies if fill_value is not None
  758. """
  759. # TODO: can we make a no-copy implementation?
  760. if fill_value is not None:
  761. left_mask = isna(left)
  762. right_mask = isna(right)
  763. left = left.copy()
  764. right = right.copy()
  765. # one but not both
  766. mask = left_mask ^ right_mask
  767. left[left_mask & mask] = fill_value
  768. right[right_mask & mask] = fill_value
  769. return left, right
  770. def mask_cmp_op(x, y, op, allowed_types):
  771. """
  772. Apply the function `op` to only non-null points in x and y.
  773. Parameters
  774. ----------
  775. x : array-like
  776. y : array-like
  777. op : binary operation
  778. allowed_types : class or tuple of classes
  779. Returns
  780. -------
  781. result : ndarray[bool]
  782. """
  783. # TODO: Can we make the allowed_types arg unnecessary?
  784. xrav = x.ravel()
  785. result = np.empty(x.size, dtype=bool)
  786. if isinstance(y, allowed_types):
  787. yrav = y.ravel()
  788. mask = notna(xrav) & notna(yrav)
  789. result[mask] = op(np.array(list(xrav[mask])),
  790. np.array(list(yrav[mask])))
  791. else:
  792. mask = notna(xrav)
  793. result[mask] = op(np.array(list(xrav[mask])), y)
  794. if op == operator.ne: # pragma: no cover
  795. np.putmask(result, ~mask, True)
  796. else:
  797. np.putmask(result, ~mask, False)
  798. result = result.reshape(x.shape)
  799. return result
  800. def masked_arith_op(x, y, op):
  801. """
  802. If the given arithmetic operation fails, attempt it again on
  803. only the non-null elements of the input array(s).
  804. Parameters
  805. ----------
  806. x : np.ndarray
  807. y : np.ndarray, Series, Index
  808. op : binary operator
  809. """
  810. # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes
  811. # the logic valid for both Series and DataFrame ops.
  812. xrav = x.ravel()
  813. assert isinstance(x, (np.ndarray, ABCSeries)), type(x)
  814. if isinstance(y, (np.ndarray, ABCSeries, ABCIndexClass)):
  815. dtype = find_common_type([x.dtype, y.dtype])
  816. result = np.empty(x.size, dtype=dtype)
  817. # PeriodIndex.ravel() returns int64 dtype, so we have
  818. # to work around that case. See GH#19956
  819. yrav = y if is_period_dtype(y) else y.ravel()
  820. mask = notna(xrav) & notna(yrav)
  821. if yrav.shape != mask.shape:
  822. # FIXME: GH#5284, GH#5035, GH#19448
  823. # Without specifically raising here we get mismatched
  824. # errors in Py3 (TypeError) vs Py2 (ValueError)
  825. # Note: Only = an issue in DataFrame case
  826. raise ValueError('Cannot broadcast operands together.')
  827. if mask.any():
  828. with np.errstate(all='ignore'):
  829. result[mask] = op(xrav[mask],
  830. com.values_from_object(yrav[mask]))
  831. else:
  832. assert is_scalar(y), type(y)
  833. assert isinstance(x, np.ndarray), type(x)
  834. # mask is only meaningful for x
  835. result = np.empty(x.size, dtype=x.dtype)
  836. mask = notna(xrav)
  837. # 1 ** np.nan is 1. So we have to unmask those.
  838. if op == pow:
  839. mask = np.where(x == 1, False, mask)
  840. elif op == rpow:
  841. mask = np.where(y == 1, False, mask)
  842. if mask.any():
  843. with np.errstate(all='ignore'):
  844. result[mask] = op(xrav[mask], y)
  845. result, changed = maybe_upcast_putmask(result, ~mask, np.nan)
  846. result = result.reshape(x.shape) # 2D compat
  847. return result
  848. def invalid_comparison(left, right, op):
  849. """
  850. If a comparison has mismatched types and is not necessarily meaningful,
  851. follow python3 conventions by:
  852. - returning all-False for equality
  853. - returning all-True for inequality
  854. - raising TypeError otherwise
  855. Parameters
  856. ----------
  857. left : array-like
  858. right : scalar, array-like
  859. op : operator.{eq, ne, lt, le, gt}
  860. Raises
  861. ------
  862. TypeError : on inequality comparisons
  863. """
  864. if op is operator.eq:
  865. res_values = np.zeros(left.shape, dtype=bool)
  866. elif op is operator.ne:
  867. res_values = np.ones(left.shape, dtype=bool)
  868. else:
  869. raise TypeError("Invalid comparison between dtype={dtype} and {typ}"
  870. .format(dtype=left.dtype, typ=type(right).__name__))
  871. return res_values
  872. # -----------------------------------------------------------------------------
  873. # Dispatch logic
  874. def should_series_dispatch(left, right, op):
  875. """
  876. Identify cases where a DataFrame operation should dispatch to its
  877. Series counterpart.
  878. Parameters
  879. ----------
  880. left : DataFrame
  881. right : DataFrame
  882. op : binary operator
  883. Returns
  884. -------
  885. override : bool
  886. """
  887. if left._is_mixed_type or right._is_mixed_type:
  888. return True
  889. if not len(left.columns) or not len(right.columns):
  890. # ensure obj.dtypes[0] exists for each obj
  891. return False
  892. ldtype = left.dtypes.iloc[0]
  893. rdtype = right.dtypes.iloc[0]
  894. if ((is_timedelta64_dtype(ldtype) and is_integer_dtype(rdtype)) or
  895. (is_timedelta64_dtype(rdtype) and is_integer_dtype(ldtype))):
  896. # numpy integer dtypes as timedelta64 dtypes in this scenario
  897. return True
  898. if is_datetime64_dtype(ldtype) and is_object_dtype(rdtype):
  899. # in particular case where right is an array of DateOffsets
  900. return True
  901. return False
  902. def dispatch_to_series(left, right, func, str_rep=None, axis=None):
  903. """
  904. Evaluate the frame operation func(left, right) by evaluating
  905. column-by-column, dispatching to the Series implementation.
  906. Parameters
  907. ----------
  908. left : DataFrame
  909. right : scalar or DataFrame
  910. func : arithmetic or comparison operator
  911. str_rep : str or None, default None
  912. axis : {None, 0, 1, "index", "columns"}
  913. Returns
  914. -------
  915. DataFrame
  916. """
  917. # Note: we use iloc to access columns for compat with cases
  918. # with non-unique columns.
  919. import pandas.core.computation.expressions as expressions
  920. right = lib.item_from_zerodim(right)
  921. if lib.is_scalar(right) or np.ndim(right) == 0:
  922. def column_op(a, b):
  923. return {i: func(a.iloc[:, i], b)
  924. for i in range(len(a.columns))}
  925. elif isinstance(right, ABCDataFrame):
  926. assert right._indexed_same(left)
  927. def column_op(a, b):
  928. return {i: func(a.iloc[:, i], b.iloc[:, i])
  929. for i in range(len(a.columns))}
  930. elif isinstance(right, ABCSeries) and axis == "columns":
  931. # We only get here if called via left._combine_match_columns,
  932. # in which case we specifically want to operate row-by-row
  933. assert right.index.equals(left.columns)
  934. def column_op(a, b):
  935. return {i: func(a.iloc[:, i], b.iloc[i])
  936. for i in range(len(a.columns))}
  937. elif isinstance(right, ABCSeries):
  938. assert right.index.equals(left.index) # Handle other cases later
  939. def column_op(a, b):
  940. return {i: func(a.iloc[:, i], b)
  941. for i in range(len(a.columns))}
  942. else:
  943. # Remaining cases have less-obvious dispatch rules
  944. raise NotImplementedError(right)
  945. new_data = expressions.evaluate(column_op, str_rep, left, right)
  946. result = left._constructor(new_data, index=left.index, copy=False)
  947. # Pin columns instead of passing to constructor for compat with
  948. # non-unique columns case
  949. result.columns = left.columns
  950. return result
  951. def dispatch_to_index_op(op, left, right, index_class):
  952. """
  953. Wrap Series left in the given index_class to delegate the operation op
  954. to the index implementation. DatetimeIndex and TimedeltaIndex perform
  955. type checking, timezone handling, overflow checks, etc.
  956. Parameters
  957. ----------
  958. op : binary operator (operator.add, operator.sub, ...)
  959. left : Series
  960. right : object
  961. index_class : DatetimeIndex or TimedeltaIndex
  962. Returns
  963. -------
  964. result : object, usually DatetimeIndex, TimedeltaIndex, or Series
  965. """
  966. left_idx = index_class(left)
  967. # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes,
  968. # left_idx may inherit a freq from a cached DatetimeIndex.
  969. # See discussion in GH#19147.
  970. if getattr(left_idx, 'freq', None) is not None:
  971. left_idx = left_idx._shallow_copy(freq=None)
  972. try:
  973. result = op(left_idx, right)
  974. except NullFrequencyError:
  975. # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
  976. # on add/sub of integers (or int-like). We re-raise as a TypeError.
  977. raise TypeError('incompatible type for a datetime/timedelta '
  978. 'operation [{name}]'.format(name=op.__name__))
  979. return result
  980. def dispatch_to_extension_op(op, left, right):
  981. """
  982. Assume that left or right is a Series backed by an ExtensionArray,
  983. apply the operator defined by op.
  984. """
  985. # The op calls will raise TypeError if the op is not defined
  986. # on the ExtensionArray
  987. # unbox Series and Index to arrays
  988. if isinstance(left, (ABCSeries, ABCIndexClass)):
  989. new_left = left._values
  990. else:
  991. new_left = left
  992. if isinstance(right, (ABCSeries, ABCIndexClass)):
  993. new_right = right._values
  994. else:
  995. new_right = right
  996. res_values = op(new_left, new_right)
  997. res_name = get_op_result_name(left, right)
  998. if op.__name__ in ['divmod', 'rdivmod']:
  999. return _construct_divmod_result(
  1000. left, res_values, left.index, res_name)
  1001. return _construct_result(left, res_values, left.index, res_name)
  1002. # -----------------------------------------------------------------------------
  1003. # Functions that add arithmetic methods to objects, given arithmetic factory
  1004. # methods
  1005. def _get_method_wrappers(cls):
  1006. """
  1007. Find the appropriate operation-wrappers to use when defining flex/special
  1008. arithmetic, boolean, and comparison operations with the given class.
  1009. Parameters
  1010. ----------
  1011. cls : class
  1012. Returns
  1013. -------
  1014. arith_flex : function or None
  1015. comp_flex : function or None
  1016. arith_special : function
  1017. comp_special : function
  1018. bool_special : function
  1019. Notes
  1020. -----
  1021. None is only returned for SparseArray
  1022. """
  1023. if issubclass(cls, ABCSparseSeries):
  1024. # Be sure to catch this before ABCSeries and ABCSparseArray,
  1025. # as they will both come see SparseSeries as a subclass
  1026. arith_flex = _flex_method_SERIES
  1027. comp_flex = _flex_method_SERIES
  1028. arith_special = _arith_method_SPARSE_SERIES
  1029. comp_special = _arith_method_SPARSE_SERIES
  1030. bool_special = _bool_method_SERIES
  1031. # TODO: I don't think the functions defined by bool_method are tested
  1032. elif issubclass(cls, ABCSeries):
  1033. # Just Series; SparseSeries is caught above
  1034. arith_flex = _flex_method_SERIES
  1035. comp_flex = _flex_method_SERIES
  1036. arith_special = _arith_method_SERIES
  1037. comp_special = _comp_method_SERIES
  1038. bool_special = _bool_method_SERIES
  1039. elif issubclass(cls, ABCSparseArray):
  1040. arith_flex = None
  1041. comp_flex = None
  1042. arith_special = _arith_method_SPARSE_ARRAY
  1043. comp_special = _arith_method_SPARSE_ARRAY
  1044. bool_special = _arith_method_SPARSE_ARRAY
  1045. elif issubclass(cls, ABCPanel):
  1046. arith_flex = _flex_method_PANEL
  1047. comp_flex = _comp_method_PANEL
  1048. arith_special = _arith_method_PANEL
  1049. comp_special = _comp_method_PANEL
  1050. bool_special = _arith_method_PANEL
  1051. elif issubclass(cls, ABCDataFrame):
  1052. # Same for DataFrame and SparseDataFrame
  1053. arith_flex = _arith_method_FRAME
  1054. comp_flex = _flex_comp_method_FRAME
  1055. arith_special = _arith_method_FRAME
  1056. comp_special = _comp_method_FRAME
  1057. bool_special = _arith_method_FRAME
  1058. return arith_flex, comp_flex, arith_special, comp_special, bool_special
  1059. def _create_methods(cls, arith_method, comp_method, bool_method, special):
  1060. # creates actual methods based upon arithmetic, comp and bool method
  1061. # constructors.
  1062. have_divmod = issubclass(cls, ABCSeries)
  1063. # divmod is available for Series and SparseSeries
  1064. # yapf: disable
  1065. new_methods = dict(
  1066. add=arith_method(cls, operator.add, special),
  1067. radd=arith_method(cls, radd, special),
  1068. sub=arith_method(cls, operator.sub, special),
  1069. mul=arith_method(cls, operator.mul, special),
  1070. truediv=arith_method(cls, operator.truediv, special),
  1071. floordiv=arith_method(cls, operator.floordiv, special),
  1072. # Causes a floating point exception in the tests when numexpr enabled,
  1073. # so for now no speedup
  1074. mod=arith_method(cls, operator.mod, special),
  1075. pow=arith_method(cls, operator.pow, special),
  1076. # not entirely sure why this is necessary, but previously was included
  1077. # so it's here to maintain compatibility
  1078. rmul=arith_method(cls, rmul, special),
  1079. rsub=arith_method(cls, rsub, special),
  1080. rtruediv=arith_method(cls, rtruediv, special),
  1081. rfloordiv=arith_method(cls, rfloordiv, special),
  1082. rpow=arith_method(cls, rpow, special),
  1083. rmod=arith_method(cls, rmod, special))
  1084. # yapf: enable
  1085. new_methods['div'] = new_methods['truediv']
  1086. new_methods['rdiv'] = new_methods['rtruediv']
  1087. if have_divmod:
  1088. # divmod doesn't have an op that is supported by numexpr
  1089. new_methods['divmod'] = arith_method(cls, divmod, special)
  1090. new_methods['rdivmod'] = arith_method(cls, rdivmod, special)
  1091. new_methods.update(dict(
  1092. eq=comp_method(cls, operator.eq, special),
  1093. ne=comp_method(cls, operator.ne, special),
  1094. lt=comp_method(cls, operator.lt, special),
  1095. gt=comp_method(cls, operator.gt, special),
  1096. le=comp_method(cls, operator.le, special),
  1097. ge=comp_method(cls, operator.ge, special)))
  1098. if bool_method:
  1099. new_methods.update(
  1100. dict(and_=bool_method(cls, operator.and_, special),
  1101. or_=bool_method(cls, operator.or_, special),
  1102. # For some reason ``^`` wasn't used in original.
  1103. xor=bool_method(cls, operator.xor, special),
  1104. rand_=bool_method(cls, rand_, special),
  1105. ror_=bool_method(cls, ror_, special),
  1106. rxor=bool_method(cls, rxor, special)))
  1107. if special:
  1108. dunderize = lambda x: '__{name}__'.format(name=x.strip('_'))
  1109. else:
  1110. dunderize = lambda x: x
  1111. new_methods = {dunderize(k): v for k, v in new_methods.items()}
  1112. return new_methods
  1113. def add_methods(cls, new_methods):
  1114. for name, method in new_methods.items():
  1115. # For most methods, if we find that the class already has a method
  1116. # of the same name, it is OK to over-write it. The exception is
  1117. # inplace methods (__iadd__, __isub__, ...) for SparseArray, which
  1118. # retain the np.ndarray versions.
  1119. force = not (issubclass(cls, ABCSparseArray) and
  1120. name.startswith('__i'))
  1121. if force or name not in cls.__dict__:
  1122. bind_method(cls, name, method)
  1123. # ----------------------------------------------------------------------
  1124. # Arithmetic
  1125. def add_special_arithmetic_methods(cls):
  1126. """
  1127. Adds the full suite of special arithmetic methods (``__add__``,
  1128. ``__sub__``, etc.) to the class.
  1129. Parameters
  1130. ----------
  1131. cls : class
  1132. special methods will be defined and pinned to this class
  1133. """
  1134. _, _, arith_method, comp_method, bool_method = _get_method_wrappers(cls)
  1135. new_methods = _create_methods(cls, arith_method, comp_method, bool_method,
  1136. special=True)
  1137. # inplace operators (I feel like these should get passed an `inplace=True`
  1138. # or just be removed
  1139. def _wrap_inplace_method(method):
  1140. """
  1141. return an inplace wrapper for this method
  1142. """
  1143. def f(self, other):
  1144. result = method(self, other)
  1145. # this makes sure that we are aligned like the input
  1146. # we are updating inplace so we want to ignore is_copy
  1147. self._update_inplace(result.reindex_like(self, copy=False)._data,
  1148. verify_is_copy=False)
  1149. return self
  1150. f.__name__ = "__i{name}__".format(name=method.__name__.strip("__"))
  1151. return f
  1152. new_methods.update(
  1153. dict(__iadd__=_wrap_inplace_method(new_methods["__add__"]),
  1154. __isub__=_wrap_inplace_method(new_methods["__sub__"]),
  1155. __imul__=_wrap_inplace_method(new_methods["__mul__"]),
  1156. __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]),
  1157. __ifloordiv__=_wrap_inplace_method(new_methods["__floordiv__"]),
  1158. __imod__=_wrap_inplace_method(new_methods["__mod__"]),
  1159. __ipow__=_wrap_inplace_method(new_methods["__pow__"])))
  1160. if not compat.PY3:
  1161. new_methods["__idiv__"] = _wrap_inplace_method(new_methods["__div__"])
  1162. new_methods.update(
  1163. dict(__iand__=_wrap_inplace_method(new_methods["__and__"]),
  1164. __ior__=_wrap_inplace_method(new_methods["__or__"]),
  1165. __ixor__=_wrap_inplace_method(new_methods["__xor__"])))
  1166. add_methods(cls, new_methods=new_methods)
  1167. def add_flex_arithmetic_methods(cls):
  1168. """
  1169. Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``)
  1170. to the class.
  1171. Parameters
  1172. ----------
  1173. cls : class
  1174. flex methods will be defined and pinned to this class
  1175. """
  1176. flex_arith_method, flex_comp_method, _, _, _ = _get_method_wrappers(cls)
  1177. new_methods = _create_methods(cls, flex_arith_method,
  1178. flex_comp_method, bool_method=None,
  1179. special=False)
  1180. new_methods.update(dict(multiply=new_methods['mul'],
  1181. subtract=new_methods['sub'],
  1182. divide=new_methods['div']))
  1183. # opt out of bool flex methods for now
  1184. assert not any(kname in new_methods for kname in ('ror_', 'rxor', 'rand_'))
  1185. add_methods(cls, new_methods=new_methods)
  1186. # -----------------------------------------------------------------------------
  1187. # Series
  1188. def _align_method_SERIES(left, right, align_asobject=False):
  1189. """ align lhs and rhs Series """
  1190. # ToDo: Different from _align_method_FRAME, list, tuple and ndarray
  1191. # are not coerced here
  1192. # because Series has inconsistencies described in #13637
  1193. if isinstance(right, ABCSeries):
  1194. # avoid repeated alignment
  1195. if not left.index.equals(right.index):
  1196. if align_asobject:
  1197. # to keep original value's dtype for bool ops
  1198. left = left.astype(object)
  1199. right = right.astype(object)
  1200. left, right = left.align(right, copy=False)
  1201. return left, right
  1202. def _construct_result(left, result, index, name, dtype=None):
  1203. """
  1204. If the raw op result has a non-None name (e.g. it is an Index object) and
  1205. the name argument is None, then passing name to the constructor will
  1206. not be enough; we still need to override the name attribute.
  1207. """
  1208. out = left._constructor(result, index=index, dtype=dtype)
  1209. out.name = name
  1210. return out
  1211. def _construct_divmod_result(left, result, index, name, dtype=None):
  1212. """divmod returns a tuple of like indexed series instead of a single series.
  1213. """
  1214. constructor = left._constructor
  1215. return (
  1216. constructor(result[0], index=index, name=name, dtype=dtype),
  1217. constructor(result[1], index=index, name=name, dtype=dtype),
  1218. )
  1219. def _arith_method_SERIES(cls, op, special):
  1220. """
  1221. Wrapper function for Series arithmetic operations, to avoid
  1222. code duplication.
  1223. """
  1224. str_rep = _get_opstr(op, cls)
  1225. op_name = _get_op_name(op, special)
  1226. eval_kwargs = _gen_eval_kwargs(op_name)
  1227. fill_zeros = _gen_fill_zeros(op_name)
  1228. construct_result = (_construct_divmod_result
  1229. if op in [divmod, rdivmod] else _construct_result)
  1230. def na_op(x, y):
  1231. import pandas.core.computation.expressions as expressions
  1232. try:
  1233. result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
  1234. except TypeError:
  1235. result = masked_arith_op(x, y, op)
  1236. result = missing.fill_zeros(result, x, y, op_name, fill_zeros)
  1237. return result
  1238. def safe_na_op(lvalues, rvalues):
  1239. """
  1240. return the result of evaluating na_op on the passed in values
  1241. try coercion to object type if the native types are not compatible
  1242. Parameters
  1243. ----------
  1244. lvalues : array-like
  1245. rvalues : array-like
  1246. Raises
  1247. ------
  1248. TypeError: invalid operation
  1249. """
  1250. try:
  1251. with np.errstate(all='ignore'):
  1252. return na_op(lvalues, rvalues)
  1253. except Exception:
  1254. if is_object_dtype(lvalues):
  1255. return libalgos.arrmap_object(lvalues,
  1256. lambda x: op(x, rvalues))
  1257. raise
  1258. def wrapper(left, right):
  1259. if isinstance(right, ABCDataFrame):
  1260. return NotImplemented
  1261. left, right = _align_method_SERIES(left, right)
  1262. res_name = get_op_result_name(left, right)
  1263. right = maybe_upcast_for_op(right)
  1264. if is_categorical_dtype(left):
  1265. raise TypeError("{typ} cannot perform the operation "
  1266. "{op}".format(typ=type(left).__name__, op=str_rep))
  1267. elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
  1268. # Give dispatch_to_index_op a chance for tests like
  1269. # test_dt64_series_add_intlike, which the index dispatching handles
  1270. # specifically.
  1271. result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
  1272. return construct_result(left, result,
  1273. index=left.index, name=res_name,
  1274. dtype=result.dtype)
  1275. elif (is_extension_array_dtype(left) or
  1276. (is_extension_array_dtype(right) and not is_scalar(right))):
  1277. # GH#22378 disallow scalar to exclude e.g. "category", "Int64"
  1278. return dispatch_to_extension_op(op, left, right)
  1279. elif is_timedelta64_dtype(left):
  1280. result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex)
  1281. return construct_result(left, result,
  1282. index=left.index, name=res_name)
  1283. elif is_timedelta64_dtype(right):
  1284. # We should only get here with non-scalar or timedelta64('NaT')
  1285. # values for right
  1286. # Note: we cannot use dispatch_to_index_op because
  1287. # that may incorrectly raise TypeError when we
  1288. # should get NullFrequencyError
  1289. result = op(pd.Index(left), right)
  1290. return construct_result(left, result,
  1291. index=left.index, name=res_name,
  1292. dtype=result.dtype)
  1293. lvalues = left.values
  1294. rvalues = right
  1295. if isinstance(rvalues, ABCSeries):
  1296. rvalues = rvalues.values
  1297. result = safe_na_op(lvalues, rvalues)
  1298. return construct_result(left, result,
  1299. index=left.index, name=res_name, dtype=None)
  1300. wrapper.__name__ = op_name
  1301. return wrapper
  1302. def _comp_method_OBJECT_ARRAY(op, x, y):
  1303. if isinstance(y, list):
  1304. y = construct_1d_object_array_from_listlike(y)
  1305. if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)):
  1306. if not is_object_dtype(y.dtype):
  1307. y = y.astype(np.object_)
  1308. if isinstance(y, (ABCSeries, ABCIndex)):
  1309. y = y.values
  1310. result = libops.vec_compare(x, y, op)
  1311. else:
  1312. result = libops.scalar_compare(x, y, op)
  1313. return result
  1314. def _comp_method_SERIES(cls, op, special):
  1315. """
  1316. Wrapper function for Series arithmetic operations, to avoid
  1317. code duplication.
  1318. """
  1319. op_name = _get_op_name(op, special)
  1320. masker = _gen_eval_kwargs(op_name).get('masker', False)
  1321. def na_op(x, y):
  1322. # TODO:
  1323. # should have guarantess on what x, y can be type-wise
  1324. # Extension Dtypes are not called here
  1325. # Checking that cases that were once handled here are no longer
  1326. # reachable.
  1327. assert not (is_categorical_dtype(y) and not is_scalar(y))
  1328. if is_object_dtype(x.dtype):
  1329. result = _comp_method_OBJECT_ARRAY(op, x, y)
  1330. elif is_datetimelike_v_numeric(x, y):
  1331. return invalid_comparison(x, y, op)
  1332. else:
  1333. # we want to compare like types
  1334. # we only want to convert to integer like if
  1335. # we are not NotImplemented, otherwise
  1336. # we would allow datetime64 (but viewed as i8) against
  1337. # integer comparisons
  1338. # we have a datetime/timedelta and may need to convert
  1339. assert not needs_i8_conversion(x)
  1340. mask = None
  1341. if not is_scalar(y) and needs_i8_conversion(y):
  1342. mask = isna(x) | isna(y)
  1343. y = y.view('i8')
  1344. x = x.view('i8')
  1345. method = getattr(x, op_name, None)
  1346. if method is not None:
  1347. with np.errstate(all='ignore'):
  1348. result = method(y)
  1349. if result is NotImplemented:
  1350. return invalid_comparison(x, y, op)
  1351. else:
  1352. result = op(x, y)
  1353. if mask is not None and mask.any():
  1354. result[mask] = masker
  1355. return result
  1356. def wrapper(self, other, axis=None):
  1357. # Validate the axis parameter
  1358. if axis is not None:
  1359. self._get_axis_number(axis)
  1360. res_name = get_op_result_name(self, other)
  1361. if isinstance(other, list):
  1362. # TODO: same for tuples?
  1363. other = np.asarray(other)
  1364. if isinstance(other, ABCDataFrame): # pragma: no cover
  1365. # Defer to DataFrame implementation; fail early
  1366. return NotImplemented
  1367. elif isinstance(other, ABCSeries) and not self._indexed_same(other):
  1368. raise ValueError("Can only compare identically-labeled "
  1369. "Series objects")
  1370. elif is_categorical_dtype(self):
  1371. # Dispatch to Categorical implementation; pd.CategoricalIndex
  1372. # behavior is non-canonical GH#19513
  1373. res_values = dispatch_to_index_op(op, self, other, pd.Categorical)
  1374. return self._constructor(res_values, index=self.index,
  1375. name=res_name)
  1376. elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self):
  1377. # Dispatch to DatetimeIndex to ensure identical
  1378. # Series/Index behavior
  1379. if (isinstance(other, datetime.date) and
  1380. not isinstance(other, datetime.datetime)):
  1381. # https://github.com/pandas-dev/pandas/issues/21152
  1382. # Compatibility for difference between Series comparison w/
  1383. # datetime and date
  1384. msg = (
  1385. "Comparing Series of datetimes with 'datetime.date'. "
  1386. "Currently, the 'datetime.date' is coerced to a "
  1387. "datetime. In the future pandas will not coerce, "
  1388. "and {future}. "
  1389. "To retain the current behavior, "
  1390. "convert the 'datetime.date' to a datetime with "
  1391. "'pd.Timestamp'."
  1392. )
  1393. if op in {operator.lt, operator.le, operator.gt, operator.ge}:
  1394. future = "a TypeError will be raised"
  1395. else:
  1396. future = (
  1397. "'the values will not compare equal to the "
  1398. "'datetime.date'"
  1399. )
  1400. msg = '\n'.join(textwrap.wrap(msg.format(future=future)))
  1401. warnings.warn(msg, FutureWarning, stacklevel=2)
  1402. other = pd.Timestamp(other)
  1403. res_values = dispatch_to_index_op(op, self, other,
  1404. pd.DatetimeIndex)
  1405. return self._constructor(res_values, index=self.index,
  1406. name=res_name)
  1407. elif is_timedelta64_dtype(self):
  1408. res_values = dispatch_to_index_op(op, self, other,
  1409. pd.TimedeltaIndex)
  1410. return self._constructor(res_values, index=self.index,
  1411. name=res_name)
  1412. elif (is_extension_array_dtype(self) or
  1413. (is_extension_array_dtype(other) and not is_scalar(other))):
  1414. # Note: the `not is_scalar(other)` condition rules out
  1415. # e.g. other == "category"
  1416. return dispatch_to_extension_op(op, self, other)
  1417. elif isinstance(other, ABCSeries):
  1418. # By this point we have checked that self._indexed_same(other)
  1419. res_values = na_op(self.values, other.values)
  1420. # rename is needed in case res_name is None and res_values.name
  1421. # is not.
  1422. return self._constructor(res_values, index=self.index,
  1423. name=res_name).rename(res_name)
  1424. elif isinstance(other, (np.ndarray, pd.Index)):
  1425. # do not check length of zerodim array
  1426. # as it will broadcast
  1427. if other.ndim != 0 and len(self) != len(other):
  1428. raise ValueError('Lengths must match to compare')
  1429. res_values = na_op(self.values, np.asarray(other))
  1430. result = self._constructor(res_values, index=self.index)
  1431. # rename is needed in case res_name is None and self.name
  1432. # is not.
  1433. return result.__finalize__(self).rename(res_name)
  1434. elif is_scalar(other) and isna(other):
  1435. # numpy does not like comparisons vs None
  1436. if op is operator.ne:
  1437. res_values = np.ones(len(self), dtype=bool)
  1438. else:
  1439. res_values = np.zeros(len(self), dtype=bool)
  1440. return self._constructor(res_values, index=self.index,
  1441. name=res_name, dtype='bool')
  1442. else:
  1443. values = self.get_values()
  1444. with np.errstate(all='ignore'):
  1445. res = na_op(values, other)
  1446. if is_scalar(res):
  1447. raise TypeError('Could not compare {typ} type with Series'
  1448. .format(typ=type(other)))
  1449. # always return a full value series here
  1450. res_values = com.values_from_object(res)
  1451. return self._constructor(res_values, index=self.index,
  1452. name=res_name, dtype='bool')
  1453. wrapper.__name__ = op_name
  1454. return wrapper
  1455. def _bool_method_SERIES(cls, op, special):
  1456. """
  1457. Wrapper function for Series arithmetic operations, to avoid
  1458. code duplication.
  1459. """
  1460. op_name = _get_op_name(op, special)
  1461. def na_op(x, y):
  1462. try:
  1463. result = op(x, y)
  1464. except TypeError:
  1465. assert not isinstance(y, (list, ABCSeries, ABCIndexClass))
  1466. if isinstance(y, np.ndarray):
  1467. # bool-bool dtype operations should be OK, should not get here
  1468. assert not (is_bool_dtype(x) and is_bool_dtype(y))
  1469. x = ensure_object(x)
  1470. y = ensure_object(y)
  1471. result = libops.vec_binop(x, y, op)
  1472. else:
  1473. # let null fall thru
  1474. assert lib.is_scalar(y)
  1475. if not isna(y):
  1476. y = bool(y)
  1477. try:
  1478. result = libops.scalar_binop(x, y, op)
  1479. except (TypeError, ValueError, AttributeError,
  1480. OverflowError, NotImplementedError):
  1481. raise TypeError("cannot compare a dtyped [{dtype}] array "
  1482. "with a scalar of type [{typ}]"
  1483. .format(dtype=x.dtype,
  1484. typ=type(y).__name__))
  1485. return result
  1486. fill_int = lambda x: x.fillna(0)
  1487. fill_bool = lambda x: x.fillna(False).astype(bool)
  1488. def wrapper(self, other):
  1489. is_self_int_dtype = is_integer_dtype(self.dtype)
  1490. self, other = _align_method_SERIES(self, other, align_asobject=True)
  1491. res_name = get_op_result_name(self, other)
  1492. if isinstance(other, ABCDataFrame):
  1493. # Defer to DataFrame implementation; fail early
  1494. return NotImplemented
  1495. elif isinstance(other, (ABCSeries, ABCIndexClass)):
  1496. is_other_int_dtype = is_integer_dtype(other.dtype)
  1497. other = fill_int(other) if is_other_int_dtype else fill_bool(other)
  1498. ovalues = other.values
  1499. finalizer = lambda x: x
  1500. else:
  1501. # scalars, list, tuple, np.array
  1502. is_other_int_dtype = is_integer_dtype(np.asarray(other))
  1503. if is_list_like(other) and not isinstance(other, np.ndarray):
  1504. # TODO: Can we do this before the is_integer_dtype check?
  1505. # could the is_integer_dtype check be checking the wrong
  1506. # thing? e.g. other = [[0, 1], [2, 3], [4, 5]]?
  1507. other = construct_1d_object_array_from_listlike(other)
  1508. ovalues = other
  1509. finalizer = lambda x: x.__finalize__(self)
  1510. # For int vs int `^`, `|`, `&` are bitwise operators and return
  1511. # integer dtypes. Otherwise these are boolean ops
  1512. filler = (fill_int if is_self_int_dtype and is_other_int_dtype
  1513. else fill_bool)
  1514. res_values = na_op(self.values, ovalues)
  1515. unfilled = self._constructor(res_values,
  1516. index=self.index, name=res_name)
  1517. filled = filler(unfilled)
  1518. return finalizer(filled)
  1519. wrapper.__name__ = op_name
  1520. return wrapper
  1521. def _flex_method_SERIES(cls, op, special):
  1522. name = _get_op_name(op, special)
  1523. doc = _make_flex_doc(name, 'series')
  1524. @Appender(doc)
  1525. def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
  1526. # validate axis
  1527. if axis is not None:
  1528. self._get_axis_number(axis)
  1529. if isinstance(other, ABCSeries):
  1530. return self._binop(other, op, level=level, fill_value=fill_value)
  1531. elif isinstance(other, (np.ndarray, list, tuple)):
  1532. if len(other) != len(self):
  1533. raise ValueError('Lengths must be equal')
  1534. other = self._constructor(other, self.index)
  1535. return self._binop(other, op, level=level, fill_value=fill_value)
  1536. else:
  1537. if fill_value is not None:
  1538. self = self.fillna(fill_value)
  1539. return self._constructor(op(self, other),
  1540. self.index).__finalize__(self)
  1541. flex_wrapper.__name__ = name
  1542. return flex_wrapper
  1543. # -----------------------------------------------------------------------------
  1544. # DataFrame
  1545. def _combine_series_frame(self, other, func, fill_value=None, axis=None,
  1546. level=None):
  1547. """
  1548. Apply binary operator `func` to self, other using alignment and fill
  1549. conventions determined by the fill_value, axis, and level kwargs.
  1550. Parameters
  1551. ----------
  1552. self : DataFrame
  1553. other : Series
  1554. func : binary operator
  1555. fill_value : object, default None
  1556. axis : {0, 1, 'columns', 'index', None}, default None
  1557. level : int or None, default None
  1558. Returns
  1559. -------
  1560. result : DataFrame
  1561. """
  1562. if fill_value is not None:
  1563. raise NotImplementedError("fill_value {fill} not supported."
  1564. .format(fill=fill_value))
  1565. if axis is not None:
  1566. axis = self._get_axis_number(axis)
  1567. if axis == 0:
  1568. return self._combine_match_index(other, func, level=level)
  1569. else:
  1570. return self._combine_match_columns(other, func, level=level)
  1571. else:
  1572. if not len(other):
  1573. return self * np.nan
  1574. if not len(self):
  1575. # Ambiguous case, use _series so works with DataFrame
  1576. return self._constructor(data=self._series, index=self.index,
  1577. columns=self.columns)
  1578. # default axis is columns
  1579. return self._combine_match_columns(other, func, level=level)
  1580. def _align_method_FRAME(left, right, axis):
  1581. """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """
  1582. def to_series(right):
  1583. msg = ('Unable to coerce to Series, length must be {req_len}: '
  1584. 'given {given_len}')
  1585. if axis is not None and left._get_axis_name(axis) == 'index':
  1586. if len(left.index) != len(right):
  1587. raise ValueError(msg.format(req_len=len(left.index),
  1588. given_len=len(right)))
  1589. right = left._constructor_sliced(right, index=left.index)
  1590. else:
  1591. if len(left.columns) != len(right):
  1592. raise ValueError(msg.format(req_len=len(left.columns),
  1593. given_len=len(right)))
  1594. right = left._constructor_sliced(right, index=left.columns)
  1595. return right
  1596. if isinstance(right, np.ndarray):
  1597. if right.ndim == 1:
  1598. right = to_series(right)
  1599. elif right.ndim == 2:
  1600. if right.shape == left.shape:
  1601. right = left._constructor(right, index=left.index,
  1602. columns=left.columns)
  1603. elif right.shape[0] == left.shape[0] and right.shape[1] == 1:
  1604. # Broadcast across columns
  1605. right = np.broadcast_to(right, left.shape)
  1606. right = left._constructor(right,
  1607. index=left.index,
  1608. columns=left.columns)
  1609. elif right.shape[1] == left.shape[1] and right.shape[0] == 1:
  1610. # Broadcast along rows
  1611. right = to_series(right[0, :])
  1612. else:
  1613. raise ValueError("Unable to coerce to DataFrame, shape "
  1614. "must be {req_shape}: given {given_shape}"
  1615. .format(req_shape=left.shape,
  1616. given_shape=right.shape))
  1617. elif right.ndim > 2:
  1618. raise ValueError('Unable to coerce to Series/DataFrame, dim '
  1619. 'must be <= 2: {dim}'.format(dim=right.shape))
  1620. elif (is_list_like(right) and
  1621. not isinstance(right, (ABCSeries, ABCDataFrame))):
  1622. # GH17901
  1623. right = to_series(right)
  1624. return right
  1625. def _arith_method_FRAME(cls, op, special):
  1626. str_rep = _get_opstr(op, cls)
  1627. op_name = _get_op_name(op, special)
  1628. eval_kwargs = _gen_eval_kwargs(op_name)
  1629. fill_zeros = _gen_fill_zeros(op_name)
  1630. default_axis = _get_frame_op_default_axis(op_name)
  1631. def na_op(x, y):
  1632. import pandas.core.computation.expressions as expressions
  1633. try:
  1634. result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
  1635. except TypeError:
  1636. result = masked_arith_op(x, y, op)
  1637. result = missing.fill_zeros(result, x, y, op_name, fill_zeros)
  1638. return result
  1639. if op_name in _op_descriptions:
  1640. # i.e. include "add" but not "__add__"
  1641. doc = _make_flex_doc(op_name, 'dataframe')
  1642. else:
  1643. doc = _arith_doc_FRAME % op_name
  1644. @Appender(doc)
  1645. def f(self, other, axis=default_axis, level=None, fill_value=None):
  1646. other = _align_method_FRAME(self, other, axis)
  1647. if isinstance(other, ABCDataFrame):
  1648. # Another DataFrame
  1649. pass_op = op if should_series_dispatch(self, other, op) else na_op
  1650. return self._combine_frame(other, pass_op, fill_value, level)
  1651. elif isinstance(other, ABCSeries):
  1652. # For these values of `axis`, we end up dispatching to Series op,
  1653. # so do not want the masked op.
  1654. pass_op = op if axis in [0, "columns", None] else na_op
  1655. return _combine_series_frame(self, other, pass_op,
  1656. fill_value=fill_value, axis=axis,
  1657. level=level)
  1658. else:
  1659. if fill_value is not None:
  1660. self = self.fillna(fill_value)
  1661. assert np.ndim(other) == 0
  1662. return self._combine_const(other, op)
  1663. f.__name__ = op_name
  1664. return f
  1665. def _flex_comp_method_FRAME(cls, op, special):
  1666. str_rep = _get_opstr(op, cls)
  1667. op_name = _get_op_name(op, special)
  1668. default_axis = _get_frame_op_default_axis(op_name)
  1669. def na_op(x, y):
  1670. try:
  1671. with np.errstate(invalid='ignore'):
  1672. result = op(x, y)
  1673. except TypeError:
  1674. result = mask_cmp_op(x, y, op, (np.ndarray, ABCSeries))
  1675. return result
  1676. doc = _flex_comp_doc_FRAME.format(op_name=op_name,
  1677. desc=_op_descriptions[op_name]['desc'])
  1678. @Appender(doc)
  1679. def f(self, other, axis=default_axis, level=None):
  1680. other = _align_method_FRAME(self, other, axis)
  1681. if isinstance(other, ABCDataFrame):
  1682. # Another DataFrame
  1683. if not self._indexed_same(other):
  1684. self, other = self.align(other, 'outer',
  1685. level=level, copy=False)
  1686. return dispatch_to_series(self, other, na_op, str_rep)
  1687. elif isinstance(other, ABCSeries):
  1688. return _combine_series_frame(self, other, na_op,
  1689. fill_value=None, axis=axis,
  1690. level=level)
  1691. else:
  1692. assert np.ndim(other) == 0, other
  1693. return self._combine_const(other, na_op)
  1694. f.__name__ = op_name
  1695. return f
  1696. def _comp_method_FRAME(cls, func, special):
  1697. str_rep = _get_opstr(func, cls)
  1698. op_name = _get_op_name(func, special)
  1699. @Appender('Wrapper for comparison method {name}'.format(name=op_name))
  1700. def f(self, other):
  1701. other = _align_method_FRAME(self, other, axis=None)
  1702. if isinstance(other, ABCDataFrame):
  1703. # Another DataFrame
  1704. if not self._indexed_same(other):
  1705. raise ValueError('Can only compare identically-labeled '
  1706. 'DataFrame objects')
  1707. return dispatch_to_series(self, other, func, str_rep)
  1708. elif isinstance(other, ABCSeries):
  1709. return _combine_series_frame(self, other, func,
  1710. fill_value=None, axis=None,
  1711. level=None)
  1712. else:
  1713. # straight boolean comparisons we want to allow all columns
  1714. # (regardless of dtype to pass thru) See #4537 for discussion.
  1715. res = self._combine_const(other, func)
  1716. return res.fillna(True).astype(bool)
  1717. f.__name__ = op_name
  1718. return f
  1719. # -----------------------------------------------------------------------------
  1720. # Panel
  1721. def _arith_method_PANEL(cls, op, special):
  1722. # work only for scalars
  1723. op_name = _get_op_name(op, special)
  1724. def f(self, other):
  1725. if not is_scalar(other):
  1726. raise ValueError('Simple arithmetic with {name} can only be '
  1727. 'done with scalar values'
  1728. .format(name=self._constructor.__name__))
  1729. return self._combine(other, op)
  1730. f.__name__ = op_name
  1731. return f
  1732. def _comp_method_PANEL(cls, op, special):
  1733. str_rep = _get_opstr(op, cls)
  1734. op_name = _get_op_name(op, special)
  1735. def na_op(x, y):
  1736. import pandas.core.computation.expressions as expressions
  1737. try:
  1738. result = expressions.evaluate(op, str_rep, x, y)
  1739. except TypeError:
  1740. result = mask_cmp_op(x, y, op, np.ndarray)
  1741. return result
  1742. @Appender('Wrapper for comparison method {name}'.format(name=op_name))
  1743. def f(self, other, axis=None):
  1744. # Validate the axis parameter
  1745. if axis is not None:
  1746. self._get_axis_number(axis)
  1747. if isinstance(other, self._constructor):
  1748. return self._compare_constructor(other, na_op)
  1749. elif isinstance(other, (self._constructor_sliced, ABCDataFrame,
  1750. ABCSeries)):
  1751. raise Exception("input needs alignment for this object [{object}]"
  1752. .format(object=self._constructor))
  1753. else:
  1754. return self._combine_const(other, na_op)
  1755. f.__name__ = op_name
  1756. return f
  1757. def _flex_method_PANEL(cls, op, special):
  1758. str_rep = _get_opstr(op, cls)
  1759. op_name = _get_op_name(op, special)
  1760. eval_kwargs = _gen_eval_kwargs(op_name)
  1761. fill_zeros = _gen_fill_zeros(op_name)
  1762. def na_op(x, y):
  1763. import pandas.core.computation.expressions as expressions
  1764. try:
  1765. result = expressions.evaluate(op, str_rep, x, y,
  1766. errors='raise',
  1767. **eval_kwargs)
  1768. except TypeError:
  1769. result = op(x, y)
  1770. # handles discrepancy between numpy and numexpr on division/mod
  1771. # by 0 though, given that these are generally (always?)
  1772. # non-scalars, I'm not sure whether it's worth it at the moment
  1773. result = missing.fill_zeros(result, x, y, op_name, fill_zeros)
  1774. return result
  1775. if op_name in _op_descriptions:
  1776. doc = _make_flex_doc(op_name, 'panel')
  1777. else:
  1778. # doc strings substitors
  1779. doc = _agg_doc_PANEL.format(op_name=op_name)
  1780. @Appender(doc)
  1781. def f(self, other, axis=0):
  1782. return self._combine(other, na_op, axis=axis)
  1783. f.__name__ = op_name
  1784. return f
  1785. # -----------------------------------------------------------------------------
  1786. # Sparse
  1787. def _cast_sparse_series_op(left, right, opname):
  1788. """
  1789. For SparseSeries operation, coerce to float64 if the result is expected
  1790. to have NaN or inf values
  1791. Parameters
  1792. ----------
  1793. left : SparseArray
  1794. right : SparseArray
  1795. opname : str
  1796. Returns
  1797. -------
  1798. left : SparseArray
  1799. right : SparseArray
  1800. """
  1801. from pandas.core.sparse.api import SparseDtype
  1802. opname = opname.strip('_')
  1803. # TODO: This should be moved to the array?
  1804. if is_integer_dtype(left) and is_integer_dtype(right):
  1805. # series coerces to float64 if result should have NaN/inf
  1806. if opname in ('floordiv', 'mod') and (right.values == 0).any():
  1807. left = left.astype(SparseDtype(np.float64, left.fill_value))
  1808. right = right.astype(SparseDtype(np.float64, right.fill_value))
  1809. elif opname in ('rfloordiv', 'rmod') and (left.values == 0).any():
  1810. left = left.astype(SparseDtype(np.float64, left.fill_value))
  1811. right = right.astype(SparseDtype(np.float64, right.fill_value))
  1812. return left, right
  1813. def _arith_method_SPARSE_SERIES(cls, op, special):
  1814. """
  1815. Wrapper function for Series arithmetic operations, to avoid
  1816. code duplication.
  1817. """
  1818. op_name = _get_op_name(op, special)
  1819. def wrapper(self, other):
  1820. if isinstance(other, ABCDataFrame):
  1821. return NotImplemented
  1822. elif isinstance(other, ABCSeries):
  1823. if not isinstance(other, ABCSparseSeries):
  1824. other = other.to_sparse(fill_value=self.fill_value)
  1825. return _sparse_series_op(self, other, op, op_name)
  1826. elif is_scalar(other):
  1827. with np.errstate(all='ignore'):
  1828. new_values = op(self.values, other)
  1829. return self._constructor(new_values,
  1830. index=self.index,
  1831. name=self.name)
  1832. else: # pragma: no cover
  1833. raise TypeError('operation with {other} not supported'
  1834. .format(other=type(other)))
  1835. wrapper.__name__ = op_name
  1836. return wrapper
  1837. def _sparse_series_op(left, right, op, name):
  1838. left, right = left.align(right, join='outer', copy=False)
  1839. new_index = left.index
  1840. new_name = get_op_result_name(left, right)
  1841. from pandas.core.arrays.sparse import _sparse_array_op
  1842. lvalues, rvalues = _cast_sparse_series_op(left.values, right.values, name)
  1843. result = _sparse_array_op(lvalues, rvalues, op, name)
  1844. return left._constructor(result, index=new_index, name=new_name)
  1845. def _arith_method_SPARSE_ARRAY(cls, op, special):
  1846. """
  1847. Wrapper function for Series arithmetic operations, to avoid
  1848. code duplication.
  1849. """
  1850. op_name = _get_op_name(op, special)
  1851. def wrapper(self, other):
  1852. from pandas.core.arrays.sparse.array import (
  1853. SparseArray, _sparse_array_op, _wrap_result, _get_fill)
  1854. if isinstance(other, np.ndarray):
  1855. if len(self) != len(other):
  1856. raise AssertionError("length mismatch: {self} vs. {other}"
  1857. .format(self=len(self), other=len(other)))
  1858. if not isinstance(other, SparseArray):
  1859. dtype = getattr(other, 'dtype', None)
  1860. other = SparseArray(other, fill_value=self.fill_value,
  1861. dtype=dtype)
  1862. return _sparse_array_op(self, other, op, op_name)
  1863. elif is_scalar(other):
  1864. with np.errstate(all='ignore'):
  1865. fill = op(_get_fill(self), np.asarray(other))
  1866. result = op(self.sp_values, other)
  1867. return _wrap_result(op_name, result, self.sp_index, fill)
  1868. else: # pragma: no cover
  1869. raise TypeError('operation with {other} not supported'
  1870. .format(other=type(other)))
  1871. wrapper.__name__ = op_name
  1872. return wrapper