window.py 81 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649
  1. """
  2. Provide a generic structure to support window functions,
  3. similar to how we have a Groupby object.
  4. """
  5. from __future__ import division
  6. from collections import defaultdict
  7. from datetime import timedelta
  8. from textwrap import dedent
  9. import warnings
  10. import numpy as np
  11. import pandas._libs.window as libwindow
  12. import pandas.compat as compat
  13. from pandas.compat.numpy import function as nv
  14. from pandas.util._decorators import Appender, Substitution, cache_readonly
  15. from pandas.core.dtypes.common import (
  16. ensure_float64, is_bool, is_float_dtype, is_integer, is_integer_dtype,
  17. is_list_like, is_scalar, is_timedelta64_dtype, needs_i8_conversion)
  18. from pandas.core.dtypes.generic import (
  19. ABCDataFrame, ABCDateOffset, ABCDatetimeIndex, ABCPeriodIndex, ABCSeries,
  20. ABCTimedeltaIndex)
  21. from pandas.core.base import PandasObject, SelectionMixin
  22. import pandas.core.common as com
  23. from pandas.core.generic import _shared_docs
  24. from pandas.core.groupby.base import GroupByMixin
  25. _shared_docs = dict(**_shared_docs)
  26. _doc_template = """
  27. Returns
  28. -------
  29. Series or DataFrame
  30. Return type is determined by the caller.
  31. See Also
  32. --------
  33. Series.%(name)s : Series %(name)s.
  34. DataFrame.%(name)s : DataFrame %(name)s.
  35. """
  36. class _Window(PandasObject, SelectionMixin):
  37. _attributes = ['window', 'min_periods', 'center', 'win_type',
  38. 'axis', 'on', 'closed']
  39. exclusions = set()
  40. def __init__(self, obj, window=None, min_periods=None,
  41. center=False, win_type=None, axis=0, on=None, closed=None,
  42. **kwargs):
  43. self.__dict__.update(kwargs)
  44. self.blocks = []
  45. self.obj = obj
  46. self.on = on
  47. self.closed = closed
  48. self.window = window
  49. self.min_periods = min_periods
  50. self.center = center
  51. self.win_type = win_type
  52. self.win_freq = None
  53. self.axis = obj._get_axis_number(axis) if axis is not None else None
  54. self.validate()
  55. @property
  56. def _constructor(self):
  57. return Window
  58. @property
  59. def is_datetimelike(self):
  60. return None
  61. @property
  62. def _on(self):
  63. return None
  64. @property
  65. def is_freq_type(self):
  66. return self.win_type == 'freq'
  67. def validate(self):
  68. if self.center is not None and not is_bool(self.center):
  69. raise ValueError("center must be a boolean")
  70. if (self.min_periods is not None and
  71. not is_integer(self.min_periods)):
  72. raise ValueError("min_periods must be an integer")
  73. if (self.closed is not None and
  74. self.closed not in ['right', 'both', 'left', 'neither']):
  75. raise ValueError("closed must be 'right', 'left', 'both' or "
  76. "'neither'")
  77. def _convert_freq(self):
  78. """
  79. Resample according to the how, return a new object.
  80. """
  81. obj = self._selected_obj
  82. index = None
  83. return obj, index
  84. def _create_blocks(self):
  85. """
  86. Split data into blocks & return conformed data.
  87. """
  88. obj, index = self._convert_freq()
  89. if index is not None:
  90. index = self._on
  91. # filter out the on from the object
  92. if self.on is not None:
  93. if obj.ndim == 2:
  94. obj = obj.reindex(columns=obj.columns.difference([self.on]),
  95. copy=False)
  96. blocks = obj._to_dict_of_blocks(copy=False).values()
  97. return blocks, obj, index
  98. def _gotitem(self, key, ndim, subset=None):
  99. """
  100. Sub-classes to define. Return a sliced object.
  101. Parameters
  102. ----------
  103. key : str / list of selections
  104. ndim : 1,2
  105. requested ndim of result
  106. subset : object, default None
  107. subset to act on
  108. """
  109. # create a new object to prevent aliasing
  110. if subset is None:
  111. subset = self.obj
  112. self = self._shallow_copy(subset)
  113. self._reset_cache()
  114. if subset.ndim == 2:
  115. if is_scalar(key) and key in subset or is_list_like(key):
  116. self._selection = key
  117. return self
  118. def __getattr__(self, attr):
  119. if attr in self._internal_names_set:
  120. return object.__getattribute__(self, attr)
  121. if attr in self.obj:
  122. return self[attr]
  123. raise AttributeError("%r object has no attribute %r" %
  124. (type(self).__name__, attr))
  125. def _dir_additions(self):
  126. return self.obj._dir_additions()
  127. def _get_window(self, other=None):
  128. return self.window
  129. @property
  130. def _window_type(self):
  131. return self.__class__.__name__
  132. def __unicode__(self):
  133. """
  134. Provide a nice str repr of our rolling object.
  135. """
  136. attrs = ["{k}={v}".format(k=k, v=getattr(self, k))
  137. for k in self._attributes
  138. if getattr(self, k, None) is not None]
  139. return "{klass} [{attrs}]".format(klass=self._window_type,
  140. attrs=','.join(attrs))
  141. def __iter__(self):
  142. url = 'https://github.com/pandas-dev/pandas/issues/11704'
  143. raise NotImplementedError('See issue #11704 {url}'.format(url=url))
  144. def _get_index(self, index=None):
  145. """
  146. Return index as ndarrays.
  147. Returns
  148. -------
  149. tuple of (index, index_as_ndarray)
  150. """
  151. if self.is_freq_type:
  152. if index is None:
  153. index = self._on
  154. return index, index.asi8
  155. return index, index
  156. def _prep_values(self, values=None, kill_inf=True):
  157. if values is None:
  158. values = getattr(self._selected_obj, 'values', self._selected_obj)
  159. # GH #12373 : rolling functions error on float32 data
  160. # make sure the data is coerced to float64
  161. if is_float_dtype(values.dtype):
  162. values = ensure_float64(values)
  163. elif is_integer_dtype(values.dtype):
  164. values = ensure_float64(values)
  165. elif needs_i8_conversion(values.dtype):
  166. raise NotImplementedError("ops for {action} for this "
  167. "dtype {dtype} are not "
  168. "implemented".format(
  169. action=self._window_type,
  170. dtype=values.dtype))
  171. else:
  172. try:
  173. values = ensure_float64(values)
  174. except (ValueError, TypeError):
  175. raise TypeError("cannot handle this type -> {0}"
  176. "".format(values.dtype))
  177. if kill_inf:
  178. values = values.copy()
  179. values[np.isinf(values)] = np.NaN
  180. return values
  181. def _wrap_result(self, result, block=None, obj=None):
  182. """
  183. Wrap a single result.
  184. """
  185. if obj is None:
  186. obj = self._selected_obj
  187. index = obj.index
  188. if isinstance(result, np.ndarray):
  189. # coerce if necessary
  190. if block is not None:
  191. if is_timedelta64_dtype(block.values.dtype):
  192. from pandas import to_timedelta
  193. result = to_timedelta(
  194. result.ravel(), unit='ns').values.reshape(result.shape)
  195. if result.ndim == 1:
  196. from pandas import Series
  197. return Series(result, index, name=obj.name)
  198. return type(obj)(result, index=index, columns=block.columns)
  199. return result
  200. def _wrap_results(self, results, blocks, obj):
  201. """
  202. Wrap the results.
  203. Parameters
  204. ----------
  205. results : list of ndarrays
  206. blocks : list of blocks
  207. obj : conformed data (may be resampled)
  208. """
  209. from pandas import Series, concat
  210. from pandas.core.index import ensure_index
  211. final = []
  212. for result, block in zip(results, blocks):
  213. result = self._wrap_result(result, block=block, obj=obj)
  214. if result.ndim == 1:
  215. return result
  216. final.append(result)
  217. # if we have an 'on' column
  218. # we want to put it back into the results
  219. # in the same location
  220. columns = self._selected_obj.columns
  221. if self.on is not None and not self._on.equals(obj.index):
  222. name = self._on.name
  223. final.append(Series(self._on, index=obj.index, name=name))
  224. if self._selection is not None:
  225. selection = ensure_index(self._selection)
  226. # need to reorder to include original location of
  227. # the on column (if its not already there)
  228. if name not in selection:
  229. columns = self.obj.columns
  230. indexer = columns.get_indexer(selection.tolist() + [name])
  231. columns = columns.take(sorted(indexer))
  232. if not len(final):
  233. return obj.astype('float64')
  234. return concat(final, axis=1).reindex(columns=columns, copy=False)
  235. def _center_window(self, result, window):
  236. """
  237. Center the result in the window.
  238. """
  239. if self.axis > result.ndim - 1:
  240. raise ValueError("Requested axis is larger then no. of argument "
  241. "dimensions")
  242. offset = _offset(window, True)
  243. if offset > 0:
  244. if isinstance(result, (ABCSeries, ABCDataFrame)):
  245. result = result.slice_shift(-offset, axis=self.axis)
  246. else:
  247. lead_indexer = [slice(None)] * result.ndim
  248. lead_indexer[self.axis] = slice(offset, None)
  249. result = np.copy(result[tuple(lead_indexer)])
  250. return result
  251. def aggregate(self, arg, *args, **kwargs):
  252. result, how = self._aggregate(arg, *args, **kwargs)
  253. if result is None:
  254. return self.apply(arg, raw=False, args=args, kwargs=kwargs)
  255. return result
  256. agg = aggregate
  257. _shared_docs['sum'] = dedent("""
  258. Calculate %(name)s sum of given DataFrame or Series.
  259. Parameters
  260. ----------
  261. *args, **kwargs
  262. For compatibility with other %(name)s methods. Has no effect
  263. on the computed value.
  264. Returns
  265. -------
  266. Series or DataFrame
  267. Same type as the input, with the same index, containing the
  268. %(name)s sum.
  269. See Also
  270. --------
  271. Series.sum : Reducing sum for Series.
  272. DataFrame.sum : Reducing sum for DataFrame.
  273. Examples
  274. --------
  275. >>> s = pd.Series([1, 2, 3, 4, 5])
  276. >>> s
  277. 0 1
  278. 1 2
  279. 2 3
  280. 3 4
  281. 4 5
  282. dtype: int64
  283. >>> s.rolling(3).sum()
  284. 0 NaN
  285. 1 NaN
  286. 2 6.0
  287. 3 9.0
  288. 4 12.0
  289. dtype: float64
  290. >>> s.expanding(3).sum()
  291. 0 NaN
  292. 1 NaN
  293. 2 6.0
  294. 3 10.0
  295. 4 15.0
  296. dtype: float64
  297. >>> s.rolling(3, center=True).sum()
  298. 0 NaN
  299. 1 6.0
  300. 2 9.0
  301. 3 12.0
  302. 4 NaN
  303. dtype: float64
  304. For DataFrame, each %(name)s sum is computed column-wise.
  305. >>> df = pd.DataFrame({"A": s, "B": s ** 2})
  306. >>> df
  307. A B
  308. 0 1 1
  309. 1 2 4
  310. 2 3 9
  311. 3 4 16
  312. 4 5 25
  313. >>> df.rolling(3).sum()
  314. A B
  315. 0 NaN NaN
  316. 1 NaN NaN
  317. 2 6.0 14.0
  318. 3 9.0 29.0
  319. 4 12.0 50.0
  320. """)
  321. _shared_docs['mean'] = dedent("""
  322. Calculate the %(name)s mean of the values.
  323. Parameters
  324. ----------
  325. *args
  326. Under Review.
  327. **kwargs
  328. Under Review.
  329. Returns
  330. -------
  331. Series or DataFrame
  332. Returned object type is determined by the caller of the %(name)s
  333. calculation.
  334. See Also
  335. --------
  336. Series.%(name)s : Calling object with Series data.
  337. DataFrame.%(name)s : Calling object with DataFrames.
  338. Series.mean : Equivalent method for Series.
  339. DataFrame.mean : Equivalent method for DataFrame.
  340. Examples
  341. --------
  342. The below examples will show rolling mean calculations with window sizes of
  343. two and three, respectively.
  344. >>> s = pd.Series([1, 2, 3, 4])
  345. >>> s.rolling(2).mean()
  346. 0 NaN
  347. 1 1.5
  348. 2 2.5
  349. 3 3.5
  350. dtype: float64
  351. >>> s.rolling(3).mean()
  352. 0 NaN
  353. 1 NaN
  354. 2 2.0
  355. 3 3.0
  356. dtype: float64
  357. """)
  358. class Window(_Window):
  359. """
  360. Provides rolling window calculations.
  361. .. versionadded:: 0.18.0
  362. Parameters
  363. ----------
  364. window : int, or offset
  365. Size of the moving window. This is the number of observations used for
  366. calculating the statistic. Each window will be a fixed size.
  367. If its an offset then this will be the time period of each window. Each
  368. window will be a variable sized based on the observations included in
  369. the time-period. This is only valid for datetimelike indexes. This is
  370. new in 0.19.0
  371. min_periods : int, default None
  372. Minimum number of observations in window required to have a value
  373. (otherwise result is NA). For a window that is specified by an offset,
  374. `min_periods` will default to 1. Otherwise, `min_periods` will default
  375. to the size of the window.
  376. center : bool, default False
  377. Set the labels at the center of the window.
  378. win_type : str, default None
  379. Provide a window type. If ``None``, all points are evenly weighted.
  380. See the notes below for further information.
  381. on : str, optional
  382. For a DataFrame, column on which to calculate
  383. the rolling window, rather than the index
  384. axis : int or str, default 0
  385. closed : str, default None
  386. Make the interval closed on the 'right', 'left', 'both' or
  387. 'neither' endpoints.
  388. For offset-based windows, it defaults to 'right'.
  389. For fixed windows, defaults to 'both'. Remaining cases not implemented
  390. for fixed windows.
  391. .. versionadded:: 0.20.0
  392. Returns
  393. -------
  394. a Window or Rolling sub-classed for the particular operation
  395. See Also
  396. --------
  397. expanding : Provides expanding transformations.
  398. ewm : Provides exponential weighted functions.
  399. Notes
  400. -----
  401. By default, the result is set to the right edge of the window. This can be
  402. changed to the center of the window by setting ``center=True``.
  403. To learn more about the offsets & frequency strings, please see `this link
  404. <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
  405. The recognized win_types are:
  406. * ``boxcar``
  407. * ``triang``
  408. * ``blackman``
  409. * ``hamming``
  410. * ``bartlett``
  411. * ``parzen``
  412. * ``bohman``
  413. * ``blackmanharris``
  414. * ``nuttall``
  415. * ``barthann``
  416. * ``kaiser`` (needs beta)
  417. * ``gaussian`` (needs std)
  418. * ``general_gaussian`` (needs power, width)
  419. * ``slepian`` (needs width).
  420. If ``win_type=None`` all points are evenly weighted. To learn more about
  421. different window types see `scipy.signal window functions
  422. <https://docs.scipy.org/doc/scipy/reference/signal.html#window-functions>`__.
  423. Examples
  424. --------
  425. >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
  426. >>> df
  427. B
  428. 0 0.0
  429. 1 1.0
  430. 2 2.0
  431. 3 NaN
  432. 4 4.0
  433. Rolling sum with a window length of 2, using the 'triang'
  434. window type.
  435. >>> df.rolling(2, win_type='triang').sum()
  436. B
  437. 0 NaN
  438. 1 1.0
  439. 2 2.5
  440. 3 NaN
  441. 4 NaN
  442. Rolling sum with a window length of 2, min_periods defaults
  443. to the window length.
  444. >>> df.rolling(2).sum()
  445. B
  446. 0 NaN
  447. 1 1.0
  448. 2 3.0
  449. 3 NaN
  450. 4 NaN
  451. Same as above, but explicitly set the min_periods
  452. >>> df.rolling(2, min_periods=1).sum()
  453. B
  454. 0 0.0
  455. 1 1.0
  456. 2 3.0
  457. 3 2.0
  458. 4 4.0
  459. A ragged (meaning not-a-regular frequency), time-indexed DataFrame
  460. >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
  461. ... index = [pd.Timestamp('20130101 09:00:00'),
  462. ... pd.Timestamp('20130101 09:00:02'),
  463. ... pd.Timestamp('20130101 09:00:03'),
  464. ... pd.Timestamp('20130101 09:00:05'),
  465. ... pd.Timestamp('20130101 09:00:06')])
  466. >>> df
  467. B
  468. 2013-01-01 09:00:00 0.0
  469. 2013-01-01 09:00:02 1.0
  470. 2013-01-01 09:00:03 2.0
  471. 2013-01-01 09:00:05 NaN
  472. 2013-01-01 09:00:06 4.0
  473. Contrasting to an integer rolling window, this will roll a variable
  474. length window corresponding to the time period.
  475. The default for min_periods is 1.
  476. >>> df.rolling('2s').sum()
  477. B
  478. 2013-01-01 09:00:00 0.0
  479. 2013-01-01 09:00:02 1.0
  480. 2013-01-01 09:00:03 3.0
  481. 2013-01-01 09:00:05 NaN
  482. 2013-01-01 09:00:06 4.0
  483. """
  484. def validate(self):
  485. super(Window, self).validate()
  486. window = self.window
  487. if isinstance(window, (list, tuple, np.ndarray)):
  488. pass
  489. elif is_integer(window):
  490. if window <= 0:
  491. raise ValueError("window must be > 0 ")
  492. try:
  493. import scipy.signal as sig
  494. except ImportError:
  495. raise ImportError('Please install scipy to generate window '
  496. 'weight')
  497. if not isinstance(self.win_type, compat.string_types):
  498. raise ValueError('Invalid win_type {0}'.format(self.win_type))
  499. if getattr(sig, self.win_type, None) is None:
  500. raise ValueError('Invalid win_type {0}'.format(self.win_type))
  501. else:
  502. raise ValueError('Invalid window {0}'.format(window))
  503. def _prep_window(self, **kwargs):
  504. """
  505. Provide validation for our window type, return the window
  506. we have already been validated.
  507. """
  508. window = self._get_window()
  509. if isinstance(window, (list, tuple, np.ndarray)):
  510. return com.asarray_tuplesafe(window).astype(float)
  511. elif is_integer(window):
  512. import scipy.signal as sig
  513. # the below may pop from kwargs
  514. def _validate_win_type(win_type, kwargs):
  515. arg_map = {'kaiser': ['beta'],
  516. 'gaussian': ['std'],
  517. 'general_gaussian': ['power', 'width'],
  518. 'slepian': ['width']}
  519. if win_type in arg_map:
  520. return tuple([win_type] + _pop_args(win_type,
  521. arg_map[win_type],
  522. kwargs))
  523. return win_type
  524. def _pop_args(win_type, arg_names, kwargs):
  525. msg = '%s window requires %%s' % win_type
  526. all_args = []
  527. for n in arg_names:
  528. if n not in kwargs:
  529. raise ValueError(msg % n)
  530. all_args.append(kwargs.pop(n))
  531. return all_args
  532. win_type = _validate_win_type(self.win_type, kwargs)
  533. # GH #15662. `False` makes symmetric window, rather than periodic.
  534. return sig.get_window(win_type, window, False).astype(float)
  535. def _apply_window(self, mean=True, **kwargs):
  536. """
  537. Applies a moving window of type ``window_type`` on the data.
  538. Parameters
  539. ----------
  540. mean : bool, default True
  541. If True computes weighted mean, else weighted sum
  542. Returns
  543. -------
  544. y : same type as input argument
  545. """
  546. window = self._prep_window(**kwargs)
  547. center = self.center
  548. blocks, obj, index = self._create_blocks()
  549. results = []
  550. for b in blocks:
  551. try:
  552. values = self._prep_values(b.values)
  553. except TypeError:
  554. results.append(b.values.copy())
  555. continue
  556. if values.size == 0:
  557. results.append(values.copy())
  558. continue
  559. offset = _offset(window, center)
  560. additional_nans = np.array([np.NaN] * offset)
  561. def f(arg, *args, **kwargs):
  562. minp = _use_window(self.min_periods, len(window))
  563. return libwindow.roll_window(np.concatenate((arg,
  564. additional_nans))
  565. if center else arg, window, minp,
  566. avg=mean)
  567. result = np.apply_along_axis(f, self.axis, values)
  568. if center:
  569. result = self._center_window(result, window)
  570. results.append(result)
  571. return self._wrap_results(results, blocks, obj)
  572. _agg_see_also_doc = dedent("""
  573. See Also
  574. --------
  575. pandas.DataFrame.rolling.aggregate
  576. pandas.DataFrame.aggregate
  577. """)
  578. _agg_examples_doc = dedent("""
  579. Examples
  580. --------
  581. >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'])
  582. >>> df
  583. A B C
  584. 0 -2.385977 -0.102758 0.438822
  585. 1 -1.004295 0.905829 -0.954544
  586. 2 0.735167 -0.165272 -1.619346
  587. 3 -0.702657 -1.340923 -0.706334
  588. 4 -0.246845 0.211596 -0.901819
  589. 5 2.463718 3.157577 -1.380906
  590. 6 -1.142255 2.340594 -0.039875
  591. 7 1.396598 -1.647453 1.677227
  592. 8 -0.543425 1.761277 -0.220481
  593. 9 -0.640505 0.289374 -1.550670
  594. >>> df.rolling(3, win_type='boxcar').agg('mean')
  595. A B C
  596. 0 NaN NaN NaN
  597. 1 NaN NaN NaN
  598. 2 -0.885035 0.212600 -0.711689
  599. 3 -0.323928 -0.200122 -1.093408
  600. 4 -0.071445 -0.431533 -1.075833
  601. 5 0.504739 0.676083 -0.996353
  602. 6 0.358206 1.903256 -0.774200
  603. 7 0.906020 1.283573 0.085482
  604. 8 -0.096361 0.818139 0.472290
  605. 9 0.070889 0.134399 -0.031308
  606. """)
  607. @Substitution(see_also=_agg_see_also_doc,
  608. examples=_agg_examples_doc,
  609. versionadded='',
  610. klass='Series/DataFrame',
  611. axis='')
  612. @Appender(_shared_docs['aggregate'])
  613. def aggregate(self, arg, *args, **kwargs):
  614. result, how = self._aggregate(arg, *args, **kwargs)
  615. if result is None:
  616. # these must apply directly
  617. result = arg(self)
  618. return result
  619. agg = aggregate
  620. @Substitution(name='window')
  621. @Appender(_shared_docs['sum'])
  622. def sum(self, *args, **kwargs):
  623. nv.validate_window_func('sum', args, kwargs)
  624. return self._apply_window(mean=False, **kwargs)
  625. @Substitution(name='window')
  626. @Appender(_shared_docs['mean'])
  627. def mean(self, *args, **kwargs):
  628. nv.validate_window_func('mean', args, kwargs)
  629. return self._apply_window(mean=True, **kwargs)
  630. class _GroupByMixin(GroupByMixin):
  631. """
  632. Provide the groupby facilities.
  633. """
  634. def __init__(self, obj, *args, **kwargs):
  635. parent = kwargs.pop('parent', None) # noqa
  636. groupby = kwargs.pop('groupby', None)
  637. if groupby is None:
  638. groupby, obj = obj, obj.obj
  639. self._groupby = groupby
  640. self._groupby.mutated = True
  641. self._groupby.grouper.mutated = True
  642. super(GroupByMixin, self).__init__(obj, *args, **kwargs)
  643. count = GroupByMixin._dispatch('count')
  644. corr = GroupByMixin._dispatch('corr', other=None, pairwise=None)
  645. cov = GroupByMixin._dispatch('cov', other=None, pairwise=None)
  646. def _apply(self, func, name, window=None, center=None,
  647. check_minp=None, **kwargs):
  648. """
  649. Dispatch to apply; we are stripping all of the _apply kwargs and
  650. performing the original function call on the grouped object.
  651. """
  652. def f(x, name=name, *args):
  653. x = self._shallow_copy(x)
  654. if isinstance(name, compat.string_types):
  655. return getattr(x, name)(*args, **kwargs)
  656. return x.apply(name, *args, **kwargs)
  657. return self._groupby.apply(f)
  658. class _Rolling(_Window):
  659. @property
  660. def _constructor(self):
  661. return Rolling
  662. def _apply(self, func, name=None, window=None, center=None,
  663. check_minp=None, **kwargs):
  664. """
  665. Rolling statistical measure using supplied function.
  666. Designed to be used with passed-in Cython array-based functions.
  667. Parameters
  668. ----------
  669. func : str/callable to apply
  670. name : str, optional
  671. name of this function
  672. window : int/array, default to _get_window()
  673. center : bool, default to self.center
  674. check_minp : function, default to _use_window
  675. Returns
  676. -------
  677. y : type of input
  678. """
  679. if center is None:
  680. center = self.center
  681. if window is None:
  682. window = self._get_window()
  683. if check_minp is None:
  684. check_minp = _use_window
  685. blocks, obj, index = self._create_blocks()
  686. index, indexi = self._get_index(index=index)
  687. results = []
  688. for b in blocks:
  689. values = self._prep_values(b.values)
  690. if values.size == 0:
  691. results.append(values.copy())
  692. continue
  693. # if we have a string function name, wrap it
  694. if isinstance(func, compat.string_types):
  695. cfunc = getattr(libwindow, func, None)
  696. if cfunc is None:
  697. raise ValueError("we do not support this function "
  698. "in libwindow.{func}".format(func=func))
  699. def func(arg, window, min_periods=None, closed=None):
  700. minp = check_minp(min_periods, window)
  701. # ensure we are only rolling on floats
  702. arg = ensure_float64(arg)
  703. return cfunc(arg,
  704. window, minp, indexi, closed, **kwargs)
  705. # calculation function
  706. if center:
  707. offset = _offset(window, center)
  708. additional_nans = np.array([np.NaN] * offset)
  709. def calc(x):
  710. return func(np.concatenate((x, additional_nans)),
  711. window, min_periods=self.min_periods,
  712. closed=self.closed)
  713. else:
  714. def calc(x):
  715. return func(x, window, min_periods=self.min_periods,
  716. closed=self.closed)
  717. with np.errstate(all='ignore'):
  718. if values.ndim > 1:
  719. result = np.apply_along_axis(calc, self.axis, values)
  720. else:
  721. result = calc(values)
  722. if center:
  723. result = self._center_window(result, window)
  724. results.append(result)
  725. return self._wrap_results(results, blocks, obj)
  726. class _Rolling_and_Expanding(_Rolling):
  727. _shared_docs['count'] = dedent(r"""
  728. The %(name)s count of any non-NaN observations inside the window.
  729. Returns
  730. -------
  731. Series or DataFrame
  732. Returned object type is determined by the caller of the %(name)s
  733. calculation.
  734. See Also
  735. --------
  736. pandas.Series.%(name)s : Calling object with Series data.
  737. pandas.DataFrame.%(name)s : Calling object with DataFrames.
  738. pandas.DataFrame.count : Count of the full DataFrame.
  739. Examples
  740. --------
  741. >>> s = pd.Series([2, 3, np.nan, 10])
  742. >>> s.rolling(2).count()
  743. 0 1.0
  744. 1 2.0
  745. 2 1.0
  746. 3 1.0
  747. dtype: float64
  748. >>> s.rolling(3).count()
  749. 0 1.0
  750. 1 2.0
  751. 2 2.0
  752. 3 2.0
  753. dtype: float64
  754. >>> s.rolling(4).count()
  755. 0 1.0
  756. 1 2.0
  757. 2 2.0
  758. 3 3.0
  759. dtype: float64
  760. """)
  761. def count(self):
  762. blocks, obj, index = self._create_blocks()
  763. # Validate the index
  764. self._get_index(index=index)
  765. window = self._get_window()
  766. window = min(window, len(obj)) if not self.center else window
  767. results = []
  768. for b in blocks:
  769. result = b.notna().astype(int)
  770. result = self._constructor(result, window=window, min_periods=0,
  771. center=self.center,
  772. closed=self.closed).sum()
  773. results.append(result)
  774. return self._wrap_results(results, blocks, obj)
  775. _shared_docs['apply'] = dedent(r"""
  776. The %(name)s function's apply function.
  777. Parameters
  778. ----------
  779. func : function
  780. Must produce a single value from an ndarray input if ``raw=True``
  781. or a Series if ``raw=False``.
  782. raw : bool, default None
  783. * ``False`` : passes each row or column as a Series to the
  784. function.
  785. * ``True`` or ``None`` : the passed function will receive ndarray
  786. objects instead.
  787. If you are just applying a NumPy reduction function this will
  788. achieve much better performance.
  789. The `raw` parameter is required and will show a FutureWarning if
  790. not passed. In the future `raw` will default to False.
  791. .. versionadded:: 0.23.0
  792. *args, **kwargs
  793. Arguments and keyword arguments to be passed into func.
  794. Returns
  795. -------
  796. Series or DataFrame
  797. Return type is determined by the caller.
  798. See Also
  799. --------
  800. Series.%(name)s : Series %(name)s.
  801. DataFrame.%(name)s : DataFrame %(name)s.
  802. """)
  803. def apply(self, func, raw=None, args=(), kwargs={}):
  804. from pandas import Series
  805. # TODO: _level is unused?
  806. _level = kwargs.pop('_level', None) # noqa
  807. window = self._get_window()
  808. offset = _offset(window, self.center)
  809. index, indexi = self._get_index()
  810. # TODO: default is for backward compat
  811. # change to False in the future
  812. if raw is None:
  813. warnings.warn(
  814. "Currently, 'apply' passes the values as ndarrays to the "
  815. "applied function. In the future, this will change to passing "
  816. "it as Series objects. You need to specify 'raw=True' to keep "
  817. "the current behaviour, and you can pass 'raw=False' to "
  818. "silence this warning", FutureWarning, stacklevel=3)
  819. raw = True
  820. def f(arg, window, min_periods, closed):
  821. minp = _use_window(min_periods, window)
  822. if not raw:
  823. arg = Series(arg, index=self.obj.index)
  824. return libwindow.roll_generic(
  825. arg, window, minp, indexi,
  826. closed, offset, func, raw, args, kwargs)
  827. return self._apply(f, func, args=args, kwargs=kwargs,
  828. center=False, raw=raw)
  829. def sum(self, *args, **kwargs):
  830. nv.validate_window_func('sum', args, kwargs)
  831. return self._apply('roll_sum', 'sum', **kwargs)
  832. _shared_docs['max'] = dedent("""
  833. Calculate the %(name)s maximum.
  834. Parameters
  835. ----------
  836. *args, **kwargs
  837. Arguments and keyword arguments to be passed into func.
  838. """)
  839. def max(self, *args, **kwargs):
  840. nv.validate_window_func('max', args, kwargs)
  841. return self._apply('roll_max', 'max', **kwargs)
  842. _shared_docs['min'] = dedent("""
  843. Calculate the %(name)s minimum.
  844. Parameters
  845. ----------
  846. **kwargs
  847. Under Review.
  848. Returns
  849. -------
  850. Series or DataFrame
  851. Returned object type is determined by the caller of the %(name)s
  852. calculation.
  853. See Also
  854. --------
  855. Series.%(name)s : Calling object with a Series.
  856. DataFrame.%(name)s : Calling object with a DataFrame.
  857. Series.min : Similar method for Series.
  858. DataFrame.min : Similar method for DataFrame.
  859. Examples
  860. --------
  861. Performing a rolling minimum with a window size of 3.
  862. >>> s = pd.Series([4, 3, 5, 2, 6])
  863. >>> s.rolling(3).min()
  864. 0 NaN
  865. 1 NaN
  866. 2 3.0
  867. 3 2.0
  868. 4 2.0
  869. dtype: float64
  870. """)
  871. def min(self, *args, **kwargs):
  872. nv.validate_window_func('min', args, kwargs)
  873. return self._apply('roll_min', 'min', **kwargs)
  874. def mean(self, *args, **kwargs):
  875. nv.validate_window_func('mean', args, kwargs)
  876. return self._apply('roll_mean', 'mean', **kwargs)
  877. _shared_docs['median'] = dedent("""
  878. Calculate the %(name)s median.
  879. Parameters
  880. ----------
  881. **kwargs
  882. For compatibility with other %(name)s methods. Has no effect
  883. on the computed median.
  884. Returns
  885. -------
  886. Series or DataFrame
  887. Returned type is the same as the original object.
  888. See Also
  889. --------
  890. Series.%(name)s : Calling object with Series data.
  891. DataFrame.%(name)s : Calling object with DataFrames.
  892. Series.median : Equivalent method for Series.
  893. DataFrame.median : Equivalent method for DataFrame.
  894. Examples
  895. --------
  896. Compute the rolling median of a series with a window size of 3.
  897. >>> s = pd.Series([0, 1, 2, 3, 4])
  898. >>> s.rolling(3).median()
  899. 0 NaN
  900. 1 NaN
  901. 2 1.0
  902. 3 2.0
  903. 4 3.0
  904. dtype: float64
  905. """)
  906. def median(self, **kwargs):
  907. return self._apply('roll_median_c', 'median', **kwargs)
  908. _shared_docs['std'] = dedent("""
  909. Calculate %(name)s standard deviation.
  910. Normalized by N-1 by default. This can be changed using the `ddof`
  911. argument.
  912. Parameters
  913. ----------
  914. ddof : int, default 1
  915. Delta Degrees of Freedom. The divisor used in calculations
  916. is ``N - ddof``, where ``N`` represents the number of elements.
  917. *args, **kwargs
  918. For NumPy compatibility. No additional arguments are used.
  919. Returns
  920. -------
  921. Series or DataFrame
  922. Returns the same object type as the caller of the %(name)s calculation.
  923. See Also
  924. --------
  925. Series.%(name)s : Calling object with Series data.
  926. DataFrame.%(name)s : Calling object with DataFrames.
  927. Series.std : Equivalent method for Series.
  928. DataFrame.std : Equivalent method for DataFrame.
  929. numpy.std : Equivalent method for Numpy array.
  930. Notes
  931. -----
  932. The default `ddof` of 1 used in Series.std is different than the default
  933. `ddof` of 0 in numpy.std.
  934. A minimum of one period is required for the rolling calculation.
  935. Examples
  936. --------
  937. >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
  938. >>> s.rolling(3).std()
  939. 0 NaN
  940. 1 NaN
  941. 2 0.577350
  942. 3 1.000000
  943. 4 1.000000
  944. 5 1.154701
  945. 6 0.000000
  946. dtype: float64
  947. >>> s.expanding(3).std()
  948. 0 NaN
  949. 1 NaN
  950. 2 0.577350
  951. 3 0.957427
  952. 4 0.894427
  953. 5 0.836660
  954. 6 0.786796
  955. dtype: float64
  956. """)
  957. def std(self, ddof=1, *args, **kwargs):
  958. nv.validate_window_func('std', args, kwargs)
  959. window = self._get_window()
  960. index, indexi = self._get_index()
  961. def f(arg, *args, **kwargs):
  962. minp = _require_min_periods(1)(self.min_periods, window)
  963. return _zsqrt(libwindow.roll_var(arg, window, minp, indexi,
  964. self.closed, ddof))
  965. return self._apply(f, 'std', check_minp=_require_min_periods(1),
  966. ddof=ddof, **kwargs)
  967. _shared_docs['var'] = dedent("""
  968. Calculate unbiased %(name)s variance.
  969. Normalized by N-1 by default. This can be changed using the `ddof`
  970. argument.
  971. Parameters
  972. ----------
  973. ddof : int, default 1
  974. Delta Degrees of Freedom. The divisor used in calculations
  975. is ``N - ddof``, where ``N`` represents the number of elements.
  976. *args, **kwargs
  977. For NumPy compatibility. No additional arguments are used.
  978. Returns
  979. -------
  980. Series or DataFrame
  981. Returns the same object type as the caller of the %(name)s calculation.
  982. See Also
  983. --------
  984. Series.%(name)s : Calling object with Series data.
  985. DataFrame.%(name)s : Calling object with DataFrames.
  986. Series.var : Equivalent method for Series.
  987. DataFrame.var : Equivalent method for DataFrame.
  988. numpy.var : Equivalent method for Numpy array.
  989. Notes
  990. -----
  991. The default `ddof` of 1 used in :meth:`Series.var` is different than the
  992. default `ddof` of 0 in :func:`numpy.var`.
  993. A minimum of 1 period is required for the rolling calculation.
  994. Examples
  995. --------
  996. >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
  997. >>> s.rolling(3).var()
  998. 0 NaN
  999. 1 NaN
  1000. 2 0.333333
  1001. 3 1.000000
  1002. 4 1.000000
  1003. 5 1.333333
  1004. 6 0.000000
  1005. dtype: float64
  1006. >>> s.expanding(3).var()
  1007. 0 NaN
  1008. 1 NaN
  1009. 2 0.333333
  1010. 3 0.916667
  1011. 4 0.800000
  1012. 5 0.700000
  1013. 6 0.619048
  1014. dtype: float64
  1015. """)
  1016. def var(self, ddof=1, *args, **kwargs):
  1017. nv.validate_window_func('var', args, kwargs)
  1018. return self._apply('roll_var', 'var',
  1019. check_minp=_require_min_periods(1), ddof=ddof,
  1020. **kwargs)
  1021. _shared_docs['skew'] = """
  1022. Unbiased %(name)s skewness.
  1023. Parameters
  1024. ----------
  1025. **kwargs
  1026. Keyword arguments to be passed into func.
  1027. """
  1028. def skew(self, **kwargs):
  1029. return self._apply('roll_skew', 'skew',
  1030. check_minp=_require_min_periods(3), **kwargs)
  1031. _shared_docs['kurt'] = dedent("""
  1032. Calculate unbiased %(name)s kurtosis.
  1033. This function uses Fisher's definition of kurtosis without bias.
  1034. Parameters
  1035. ----------
  1036. **kwargs
  1037. Under Review.
  1038. Returns
  1039. -------
  1040. Series or DataFrame
  1041. Returned object type is determined by the caller of the %(name)s
  1042. calculation
  1043. See Also
  1044. --------
  1045. Series.%(name)s : Calling object with Series data.
  1046. DataFrame.%(name)s : Calling object with DataFrames.
  1047. Series.kurt : Equivalent method for Series.
  1048. DataFrame.kurt : Equivalent method for DataFrame.
  1049. scipy.stats.skew : Third moment of a probability density.
  1050. scipy.stats.kurtosis : Reference SciPy method.
  1051. Notes
  1052. -----
  1053. A minimum of 4 periods is required for the %(name)s calculation.
  1054. """)
  1055. def kurt(self, **kwargs):
  1056. return self._apply('roll_kurt', 'kurt',
  1057. check_minp=_require_min_periods(4), **kwargs)
  1058. _shared_docs['quantile'] = dedent("""
  1059. Calculate the %(name)s quantile.
  1060. Parameters
  1061. ----------
  1062. quantile : float
  1063. Quantile to compute. 0 <= quantile <= 1.
  1064. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
  1065. .. versionadded:: 0.23.0
  1066. This optional parameter specifies the interpolation method to use,
  1067. when the desired quantile lies between two data points `i` and `j`:
  1068. * linear: `i + (j - i) * fraction`, where `fraction` is the
  1069. fractional part of the index surrounded by `i` and `j`.
  1070. * lower: `i`.
  1071. * higher: `j`.
  1072. * nearest: `i` or `j` whichever is nearest.
  1073. * midpoint: (`i` + `j`) / 2.
  1074. **kwargs:
  1075. For compatibility with other %(name)s methods. Has no effect on
  1076. the result.
  1077. Returns
  1078. -------
  1079. Series or DataFrame
  1080. Returned object type is determined by the caller of the %(name)s
  1081. calculation.
  1082. See Also
  1083. --------
  1084. pandas.Series.quantile : Computes value at the given quantile over all data
  1085. in Series.
  1086. pandas.DataFrame.quantile : Computes values at the given quantile over
  1087. requested axis in DataFrame.
  1088. Examples
  1089. --------
  1090. >>> s = pd.Series([1, 2, 3, 4])
  1091. >>> s.rolling(2).quantile(.4, interpolation='lower')
  1092. 0 NaN
  1093. 1 1.0
  1094. 2 2.0
  1095. 3 3.0
  1096. dtype: float64
  1097. >>> s.rolling(2).quantile(.4, interpolation='midpoint')
  1098. 0 NaN
  1099. 1 1.5
  1100. 2 2.5
  1101. 3 3.5
  1102. dtype: float64
  1103. """)
  1104. def quantile(self, quantile, interpolation='linear', **kwargs):
  1105. window = self._get_window()
  1106. index, indexi = self._get_index()
  1107. def f(arg, *args, **kwargs):
  1108. minp = _use_window(self.min_periods, window)
  1109. if quantile == 1.0:
  1110. return libwindow.roll_max(arg, window, minp, indexi,
  1111. self.closed)
  1112. elif quantile == 0.0:
  1113. return libwindow.roll_min(arg, window, minp, indexi,
  1114. self.closed)
  1115. else:
  1116. return libwindow.roll_quantile(arg, window, minp, indexi,
  1117. self.closed, quantile,
  1118. interpolation)
  1119. return self._apply(f, 'quantile', quantile=quantile,
  1120. **kwargs)
  1121. _shared_docs['cov'] = """
  1122. Calculate the %(name)s sample covariance.
  1123. Parameters
  1124. ----------
  1125. other : Series, DataFrame, or ndarray, optional
  1126. If not supplied then will default to self and produce pairwise
  1127. output.
  1128. pairwise : bool, default None
  1129. If False then only matching columns between self and other will be
  1130. used and the output will be a DataFrame.
  1131. If True then all pairwise combinations will be calculated and the
  1132. output will be a MultiIndexed DataFrame in the case of DataFrame
  1133. inputs. In the case of missing elements, only complete pairwise
  1134. observations will be used.
  1135. ddof : int, default 1
  1136. Delta Degrees of Freedom. The divisor used in calculations
  1137. is ``N - ddof``, where ``N`` represents the number of elements.
  1138. **kwargs
  1139. Keyword arguments to be passed into func.
  1140. """
  1141. def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
  1142. if other is None:
  1143. other = self._selected_obj
  1144. # only default unset
  1145. pairwise = True if pairwise is None else pairwise
  1146. other = self._shallow_copy(other)
  1147. # GH 16058: offset window
  1148. if self.is_freq_type:
  1149. window = self.win_freq
  1150. else:
  1151. window = self._get_window(other)
  1152. def _get_cov(X, Y):
  1153. # GH #12373 : rolling functions error on float32 data
  1154. # to avoid potential overflow, cast the data to float64
  1155. X = X.astype('float64')
  1156. Y = Y.astype('float64')
  1157. mean = lambda x: x.rolling(window, self.min_periods,
  1158. center=self.center).mean(**kwargs)
  1159. count = (X + Y).rolling(window=window,
  1160. center=self.center).count(**kwargs)
  1161. bias_adj = count / (count - ddof)
  1162. return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
  1163. return _flex_binary_moment(self._selected_obj, other._selected_obj,
  1164. _get_cov, pairwise=bool(pairwise))
  1165. _shared_docs['corr'] = dedent("""
  1166. Calculate %(name)s correlation.
  1167. Parameters
  1168. ----------
  1169. other : Series, DataFrame, or ndarray, optional
  1170. If not supplied then will default to self.
  1171. pairwise : bool, default None
  1172. Calculate pairwise combinations of columns within a
  1173. DataFrame. If `other` is not specified, defaults to `True`,
  1174. otherwise defaults to `False`.
  1175. Not relevant for :class:`~pandas.Series`.
  1176. **kwargs
  1177. Unused.
  1178. Returns
  1179. -------
  1180. Series or DataFrame
  1181. Returned object type is determined by the caller of the
  1182. %(name)s calculation.
  1183. See Also
  1184. --------
  1185. Series.%(name)s : Calling object with Series data.
  1186. DataFrame.%(name)s : Calling object with DataFrames.
  1187. Series.corr : Equivalent method for Series.
  1188. DataFrame.corr : Equivalent method for DataFrame.
  1189. %(name)s.cov : Similar method to calculate covariance.
  1190. numpy.corrcoef : NumPy Pearson's correlation calculation.
  1191. Notes
  1192. -----
  1193. This function uses Pearson's definition of correlation
  1194. (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
  1195. When `other` is not specified, the output will be self correlation (e.g.
  1196. all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
  1197. set to `True`.
  1198. Function will return ``NaN`` for correlations of equal valued sequences;
  1199. this is the result of a 0/0 division error.
  1200. When `pairwise` is set to `False`, only matching columns between `self` and
  1201. `other` will be used.
  1202. When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
  1203. with the original index on the first level, and the `other` DataFrame
  1204. columns on the second level.
  1205. In the case of missing elements, only complete pairwise observations
  1206. will be used.
  1207. Examples
  1208. --------
  1209. The below example shows a rolling calculation with a window size of
  1210. four matching the equivalent function call using :meth:`numpy.corrcoef`.
  1211. >>> v1 = [3, 3, 3, 5, 8]
  1212. >>> v2 = [3, 4, 4, 4, 8]
  1213. >>> fmt = "{0:.6f}" # limit the printed precision to 6 digits
  1214. >>> # numpy returns a 2X2 array, the correlation coefficient
  1215. >>> # is the number at entry [0][1]
  1216. >>> print(fmt.format(np.corrcoef(v1[:-1], v2[:-1])[0][1]))
  1217. 0.333333
  1218. >>> print(fmt.format(np.corrcoef(v1[1:], v2[1:])[0][1]))
  1219. 0.916949
  1220. >>> s1 = pd.Series(v1)
  1221. >>> s2 = pd.Series(v2)
  1222. >>> s1.rolling(4).corr(s2)
  1223. 0 NaN
  1224. 1 NaN
  1225. 2 NaN
  1226. 3 0.333333
  1227. 4 0.916949
  1228. dtype: float64
  1229. The below example shows a similar rolling calculation on a
  1230. DataFrame using the pairwise option.
  1231. >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\
  1232. [46., 31.], [50., 36.]])
  1233. >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7))
  1234. [[1. 0.6263001]
  1235. [0.6263001 1. ]]
  1236. >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7))
  1237. [[1. 0.5553681]
  1238. [0.5553681 1. ]]
  1239. >>> df = pd.DataFrame(matrix, columns=['X','Y'])
  1240. >>> df
  1241. X Y
  1242. 0 51.0 35.0
  1243. 1 49.0 30.0
  1244. 2 47.0 32.0
  1245. 3 46.0 31.0
  1246. 4 50.0 36.0
  1247. >>> df.rolling(4).corr(pairwise=True)
  1248. X Y
  1249. 0 X NaN NaN
  1250. Y NaN NaN
  1251. 1 X NaN NaN
  1252. Y NaN NaN
  1253. 2 X NaN NaN
  1254. Y NaN NaN
  1255. 3 X 1.000000 0.626300
  1256. Y 0.626300 1.000000
  1257. 4 X 1.000000 0.555368
  1258. Y 0.555368 1.000000
  1259. """)
  1260. def corr(self, other=None, pairwise=None, **kwargs):
  1261. if other is None:
  1262. other = self._selected_obj
  1263. # only default unset
  1264. pairwise = True if pairwise is None else pairwise
  1265. other = self._shallow_copy(other)
  1266. window = self._get_window(other)
  1267. def _get_corr(a, b):
  1268. a = a.rolling(window=window, min_periods=self.min_periods,
  1269. center=self.center)
  1270. b = b.rolling(window=window, min_periods=self.min_periods,
  1271. center=self.center)
  1272. return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs))
  1273. return _flex_binary_moment(self._selected_obj, other._selected_obj,
  1274. _get_corr, pairwise=bool(pairwise))
  1275. class Rolling(_Rolling_and_Expanding):
  1276. @cache_readonly
  1277. def is_datetimelike(self):
  1278. return isinstance(self._on,
  1279. (ABCDatetimeIndex,
  1280. ABCTimedeltaIndex,
  1281. ABCPeriodIndex))
  1282. @cache_readonly
  1283. def _on(self):
  1284. if self.on is None:
  1285. return self.obj.index
  1286. elif (isinstance(self.obj, ABCDataFrame) and
  1287. self.on in self.obj.columns):
  1288. from pandas import Index
  1289. return Index(self.obj[self.on])
  1290. else:
  1291. raise ValueError("invalid on specified as {0}, "
  1292. "must be a column (if DataFrame) "
  1293. "or None".format(self.on))
  1294. def validate(self):
  1295. super(Rolling, self).validate()
  1296. # we allow rolling on a datetimelike index
  1297. if ((self.obj.empty or self.is_datetimelike) and
  1298. isinstance(self.window, (compat.string_types, ABCDateOffset,
  1299. timedelta))):
  1300. self._validate_monotonic()
  1301. freq = self._validate_freq()
  1302. # we don't allow center
  1303. if self.center:
  1304. raise NotImplementedError("center is not implemented "
  1305. "for datetimelike and offset "
  1306. "based windows")
  1307. # this will raise ValueError on non-fixed freqs
  1308. self.win_freq = self.window
  1309. self.window = freq.nanos
  1310. self.win_type = 'freq'
  1311. # min_periods must be an integer
  1312. if self.min_periods is None:
  1313. self.min_periods = 1
  1314. elif not is_integer(self.window):
  1315. raise ValueError("window must be an integer")
  1316. elif self.window < 0:
  1317. raise ValueError("window must be non-negative")
  1318. if not self.is_datetimelike and self.closed is not None:
  1319. raise ValueError("closed only implemented for datetimelike "
  1320. "and offset based windows")
  1321. def _validate_monotonic(self):
  1322. """
  1323. Validate on is_monotonic.
  1324. """
  1325. if not self._on.is_monotonic:
  1326. formatted = self.on or 'index'
  1327. raise ValueError("{0} must be "
  1328. "monotonic".format(formatted))
  1329. def _validate_freq(self):
  1330. """
  1331. Validate & return window frequency.
  1332. """
  1333. from pandas.tseries.frequencies import to_offset
  1334. try:
  1335. return to_offset(self.window)
  1336. except (TypeError, ValueError):
  1337. raise ValueError("passed window {0} is not "
  1338. "compatible with a datetimelike "
  1339. "index".format(self.window))
  1340. _agg_see_also_doc = dedent("""
  1341. See Also
  1342. --------
  1343. pandas.Series.rolling
  1344. pandas.DataFrame.rolling
  1345. """)
  1346. _agg_examples_doc = dedent("""
  1347. Examples
  1348. --------
  1349. >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'])
  1350. >>> df
  1351. A B C
  1352. 0 -2.385977 -0.102758 0.438822
  1353. 1 -1.004295 0.905829 -0.954544
  1354. 2 0.735167 -0.165272 -1.619346
  1355. 3 -0.702657 -1.340923 -0.706334
  1356. 4 -0.246845 0.211596 -0.901819
  1357. 5 2.463718 3.157577 -1.380906
  1358. 6 -1.142255 2.340594 -0.039875
  1359. 7 1.396598 -1.647453 1.677227
  1360. 8 -0.543425 1.761277 -0.220481
  1361. 9 -0.640505 0.289374 -1.550670
  1362. >>> df.rolling(3).sum()
  1363. A B C
  1364. 0 NaN NaN NaN
  1365. 1 NaN NaN NaN
  1366. 2 -2.655105 0.637799 -2.135068
  1367. 3 -0.971785 -0.600366 -3.280224
  1368. 4 -0.214334 -1.294599 -3.227500
  1369. 5 1.514216 2.028250 -2.989060
  1370. 6 1.074618 5.709767 -2.322600
  1371. 7 2.718061 3.850718 0.256446
  1372. 8 -0.289082 2.454418 1.416871
  1373. 9 0.212668 0.403198 -0.093924
  1374. >>> df.rolling(3).agg({'A':'sum', 'B':'min'})
  1375. A B
  1376. 0 NaN NaN
  1377. 1 NaN NaN
  1378. 2 -2.655105 -0.165272
  1379. 3 -0.971785 -1.340923
  1380. 4 -0.214334 -1.340923
  1381. 5 1.514216 -1.340923
  1382. 6 1.074618 0.211596
  1383. 7 2.718061 -1.647453
  1384. 8 -0.289082 -1.647453
  1385. 9 0.212668 -1.647453
  1386. """)
  1387. @Substitution(see_also=_agg_see_also_doc,
  1388. examples=_agg_examples_doc,
  1389. versionadded='',
  1390. klass='Series/Dataframe',
  1391. axis='')
  1392. @Appender(_shared_docs['aggregate'])
  1393. def aggregate(self, arg, *args, **kwargs):
  1394. return super(Rolling, self).aggregate(arg, *args, **kwargs)
  1395. agg = aggregate
  1396. @Substitution(name='rolling')
  1397. @Appender(_shared_docs['count'])
  1398. def count(self):
  1399. # different impl for freq counting
  1400. if self.is_freq_type:
  1401. return self._apply('roll_count', 'count')
  1402. return super(Rolling, self).count()
  1403. @Substitution(name='rolling')
  1404. @Appender(_shared_docs['apply'])
  1405. def apply(self, func, raw=None, args=(), kwargs={}):
  1406. return super(Rolling, self).apply(
  1407. func, raw=raw, args=args, kwargs=kwargs)
  1408. @Substitution(name='rolling')
  1409. @Appender(_shared_docs['sum'])
  1410. def sum(self, *args, **kwargs):
  1411. nv.validate_rolling_func('sum', args, kwargs)
  1412. return super(Rolling, self).sum(*args, **kwargs)
  1413. @Substitution(name='rolling')
  1414. @Appender(_doc_template)
  1415. @Appender(_shared_docs['max'])
  1416. def max(self, *args, **kwargs):
  1417. nv.validate_rolling_func('max', args, kwargs)
  1418. return super(Rolling, self).max(*args, **kwargs)
  1419. @Substitution(name='rolling')
  1420. @Appender(_shared_docs['min'])
  1421. def min(self, *args, **kwargs):
  1422. nv.validate_rolling_func('min', args, kwargs)
  1423. return super(Rolling, self).min(*args, **kwargs)
  1424. @Substitution(name='rolling')
  1425. @Appender(_shared_docs['mean'])
  1426. def mean(self, *args, **kwargs):
  1427. nv.validate_rolling_func('mean', args, kwargs)
  1428. return super(Rolling, self).mean(*args, **kwargs)
  1429. @Substitution(name='rolling')
  1430. @Appender(_shared_docs['median'])
  1431. def median(self, **kwargs):
  1432. return super(Rolling, self).median(**kwargs)
  1433. @Substitution(name='rolling')
  1434. @Appender(_shared_docs['std'])
  1435. def std(self, ddof=1, *args, **kwargs):
  1436. nv.validate_rolling_func('std', args, kwargs)
  1437. return super(Rolling, self).std(ddof=ddof, **kwargs)
  1438. @Substitution(name='rolling')
  1439. @Appender(_shared_docs['var'])
  1440. def var(self, ddof=1, *args, **kwargs):
  1441. nv.validate_rolling_func('var', args, kwargs)
  1442. return super(Rolling, self).var(ddof=ddof, **kwargs)
  1443. @Substitution(name='rolling')
  1444. @Appender(_doc_template)
  1445. @Appender(_shared_docs['skew'])
  1446. def skew(self, **kwargs):
  1447. return super(Rolling, self).skew(**kwargs)
  1448. _agg_doc = dedent("""
  1449. Examples
  1450. --------
  1451. The example below will show a rolling calculation with a window size of
  1452. four matching the equivalent function call using `scipy.stats`.
  1453. >>> arr = [1, 2, 3, 4, 999]
  1454. >>> fmt = "{0:.6f}" # limit the printed precision to 6 digits
  1455. >>> import scipy.stats
  1456. >>> print(fmt.format(scipy.stats.kurtosis(arr[:-1], bias=False)))
  1457. -1.200000
  1458. >>> print(fmt.format(scipy.stats.kurtosis(arr[1:], bias=False)))
  1459. 3.999946
  1460. >>> s = pd.Series(arr)
  1461. >>> s.rolling(4).kurt()
  1462. 0 NaN
  1463. 1 NaN
  1464. 2 NaN
  1465. 3 -1.200000
  1466. 4 3.999946
  1467. dtype: float64
  1468. """)
  1469. @Appender(_agg_doc)
  1470. @Substitution(name='rolling')
  1471. @Appender(_shared_docs['kurt'])
  1472. def kurt(self, **kwargs):
  1473. return super(Rolling, self).kurt(**kwargs)
  1474. @Substitution(name='rolling')
  1475. @Appender(_shared_docs['quantile'])
  1476. def quantile(self, quantile, interpolation='linear', **kwargs):
  1477. return super(Rolling, self).quantile(quantile=quantile,
  1478. interpolation=interpolation,
  1479. **kwargs)
  1480. @Substitution(name='rolling')
  1481. @Appender(_doc_template)
  1482. @Appender(_shared_docs['cov'])
  1483. def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
  1484. return super(Rolling, self).cov(other=other, pairwise=pairwise,
  1485. ddof=ddof, **kwargs)
  1486. @Substitution(name='rolling')
  1487. @Appender(_shared_docs['corr'])
  1488. def corr(self, other=None, pairwise=None, **kwargs):
  1489. return super(Rolling, self).corr(other=other, pairwise=pairwise,
  1490. **kwargs)
  1491. class RollingGroupby(_GroupByMixin, Rolling):
  1492. """
  1493. Provides a rolling groupby implementation.
  1494. .. versionadded:: 0.18.1
  1495. """
  1496. @property
  1497. def _constructor(self):
  1498. return Rolling
  1499. def _gotitem(self, key, ndim, subset=None):
  1500. # we are setting the index on the actual object
  1501. # here so our index is carried thru to the selected obj
  1502. # when we do the splitting for the groupby
  1503. if self.on is not None:
  1504. self._groupby.obj = self._groupby.obj.set_index(self._on)
  1505. self.on = None
  1506. return super(RollingGroupby, self)._gotitem(key, ndim, subset=subset)
  1507. def _validate_monotonic(self):
  1508. """
  1509. Validate that on is monotonic;
  1510. we don't care for groupby.rolling
  1511. because we have already validated at a higher
  1512. level.
  1513. """
  1514. pass
  1515. class Expanding(_Rolling_and_Expanding):
  1516. """
  1517. Provides expanding transformations.
  1518. .. versionadded:: 0.18.0
  1519. Parameters
  1520. ----------
  1521. min_periods : int, default 1
  1522. Minimum number of observations in window required to have a value
  1523. (otherwise result is NA).
  1524. center : bool, default False
  1525. Set the labels at the center of the window.
  1526. axis : int or str, default 0
  1527. Returns
  1528. -------
  1529. a Window sub-classed for the particular operation
  1530. See Also
  1531. --------
  1532. rolling : Provides rolling window calculations.
  1533. ewm : Provides exponential weighted functions.
  1534. Notes
  1535. -----
  1536. By default, the result is set to the right edge of the window. This can be
  1537. changed to the center of the window by setting ``center=True``.
  1538. Examples
  1539. --------
  1540. >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
  1541. B
  1542. 0 0.0
  1543. 1 1.0
  1544. 2 2.0
  1545. 3 NaN
  1546. 4 4.0
  1547. >>> df.expanding(2).sum()
  1548. B
  1549. 0 NaN
  1550. 1 1.0
  1551. 2 3.0
  1552. 3 3.0
  1553. 4 7.0
  1554. """
  1555. _attributes = ['min_periods', 'center', 'axis']
  1556. def __init__(self, obj, min_periods=1, center=False, axis=0,
  1557. **kwargs):
  1558. super(Expanding, self).__init__(obj=obj, min_periods=min_periods,
  1559. center=center, axis=axis)
  1560. @property
  1561. def _constructor(self):
  1562. return Expanding
  1563. def _get_window(self, other=None):
  1564. """
  1565. Get the window length over which to perform some operation.
  1566. Parameters
  1567. ----------
  1568. other : object, default None
  1569. The other object that is involved in the operation.
  1570. Such an object is involved for operations like covariance.
  1571. Returns
  1572. -------
  1573. window : int
  1574. The window length.
  1575. """
  1576. axis = self.obj._get_axis(self.axis)
  1577. length = len(axis) + (other is not None) * len(axis)
  1578. other = self.min_periods or -1
  1579. return max(length, other)
  1580. _agg_see_also_doc = dedent("""
  1581. See Also
  1582. --------
  1583. pandas.DataFrame.expanding.aggregate
  1584. pandas.DataFrame.rolling.aggregate
  1585. pandas.DataFrame.aggregate
  1586. """)
  1587. _agg_examples_doc = dedent("""
  1588. Examples
  1589. --------
  1590. >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'])
  1591. >>> df
  1592. A B C
  1593. 0 -2.385977 -0.102758 0.438822
  1594. 1 -1.004295 0.905829 -0.954544
  1595. 2 0.735167 -0.165272 -1.619346
  1596. 3 -0.702657 -1.340923 -0.706334
  1597. 4 -0.246845 0.211596 -0.901819
  1598. 5 2.463718 3.157577 -1.380906
  1599. 6 -1.142255 2.340594 -0.039875
  1600. 7 1.396598 -1.647453 1.677227
  1601. 8 -0.543425 1.761277 -0.220481
  1602. 9 -0.640505 0.289374 -1.550670
  1603. >>> df.ewm(alpha=0.5).mean()
  1604. A B C
  1605. 0 -2.385977 -0.102758 0.438822
  1606. 1 -1.464856 0.569633 -0.490089
  1607. 2 -0.207700 0.149687 -1.135379
  1608. 3 -0.471677 -0.645305 -0.906555
  1609. 4 -0.355635 -0.203033 -0.904111
  1610. 5 1.076417 1.503943 -1.146293
  1611. 6 -0.041654 1.925562 -0.588728
  1612. 7 0.680292 0.132049 0.548693
  1613. 8 0.067236 0.948257 0.163353
  1614. 9 -0.286980 0.618493 -0.694496
  1615. """)
  1616. @Substitution(see_also=_agg_see_also_doc,
  1617. examples=_agg_examples_doc,
  1618. versionadded='',
  1619. klass='Series/Dataframe',
  1620. axis='')
  1621. @Appender(_shared_docs['aggregate'])
  1622. def aggregate(self, arg, *args, **kwargs):
  1623. return super(Expanding, self).aggregate(arg, *args, **kwargs)
  1624. agg = aggregate
  1625. @Substitution(name='expanding')
  1626. @Appender(_shared_docs['count'])
  1627. def count(self, **kwargs):
  1628. return super(Expanding, self).count(**kwargs)
  1629. @Substitution(name='expanding')
  1630. @Appender(_shared_docs['apply'])
  1631. def apply(self, func, raw=None, args=(), kwargs={}):
  1632. return super(Expanding, self).apply(
  1633. func, raw=raw, args=args, kwargs=kwargs)
  1634. @Substitution(name='expanding')
  1635. @Appender(_shared_docs['sum'])
  1636. def sum(self, *args, **kwargs):
  1637. nv.validate_expanding_func('sum', args, kwargs)
  1638. return super(Expanding, self).sum(*args, **kwargs)
  1639. @Substitution(name='expanding')
  1640. @Appender(_doc_template)
  1641. @Appender(_shared_docs['max'])
  1642. def max(self, *args, **kwargs):
  1643. nv.validate_expanding_func('max', args, kwargs)
  1644. return super(Expanding, self).max(*args, **kwargs)
  1645. @Substitution(name='expanding')
  1646. @Appender(_shared_docs['min'])
  1647. def min(self, *args, **kwargs):
  1648. nv.validate_expanding_func('min', args, kwargs)
  1649. return super(Expanding, self).min(*args, **kwargs)
  1650. @Substitution(name='expanding')
  1651. @Appender(_shared_docs['mean'])
  1652. def mean(self, *args, **kwargs):
  1653. nv.validate_expanding_func('mean', args, kwargs)
  1654. return super(Expanding, self).mean(*args, **kwargs)
  1655. @Substitution(name='expanding')
  1656. @Appender(_shared_docs['median'])
  1657. def median(self, **kwargs):
  1658. return super(Expanding, self).median(**kwargs)
  1659. @Substitution(name='expanding')
  1660. @Appender(_shared_docs['std'])
  1661. def std(self, ddof=1, *args, **kwargs):
  1662. nv.validate_expanding_func('std', args, kwargs)
  1663. return super(Expanding, self).std(ddof=ddof, **kwargs)
  1664. @Substitution(name='expanding')
  1665. @Appender(_shared_docs['var'])
  1666. def var(self, ddof=1, *args, **kwargs):
  1667. nv.validate_expanding_func('var', args, kwargs)
  1668. return super(Expanding, self).var(ddof=ddof, **kwargs)
  1669. @Substitution(name='expanding')
  1670. @Appender(_doc_template)
  1671. @Appender(_shared_docs['skew'])
  1672. def skew(self, **kwargs):
  1673. return super(Expanding, self).skew(**kwargs)
  1674. _agg_doc = dedent("""
  1675. Examples
  1676. --------
  1677. The example below will show an expanding calculation with a window size of
  1678. four matching the equivalent function call using `scipy.stats`.
  1679. >>> arr = [1, 2, 3, 4, 999]
  1680. >>> import scipy.stats
  1681. >>> fmt = "{0:.6f}" # limit the printed precision to 6 digits
  1682. >>> print(fmt.format(scipy.stats.kurtosis(arr[:-1], bias=False)))
  1683. -1.200000
  1684. >>> print(fmt.format(scipy.stats.kurtosis(arr, bias=False)))
  1685. 4.999874
  1686. >>> s = pd.Series(arr)
  1687. >>> s.expanding(4).kurt()
  1688. 0 NaN
  1689. 1 NaN
  1690. 2 NaN
  1691. 3 -1.200000
  1692. 4 4.999874
  1693. dtype: float64
  1694. """)
  1695. @Appender(_agg_doc)
  1696. @Substitution(name='expanding')
  1697. @Appender(_shared_docs['kurt'])
  1698. def kurt(self, **kwargs):
  1699. return super(Expanding, self).kurt(**kwargs)
  1700. @Substitution(name='expanding')
  1701. @Appender(_shared_docs['quantile'])
  1702. def quantile(self, quantile, interpolation='linear', **kwargs):
  1703. return super(Expanding, self).quantile(quantile=quantile,
  1704. interpolation=interpolation,
  1705. **kwargs)
  1706. @Substitution(name='expanding')
  1707. @Appender(_doc_template)
  1708. @Appender(_shared_docs['cov'])
  1709. def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
  1710. return super(Expanding, self).cov(other=other, pairwise=pairwise,
  1711. ddof=ddof, **kwargs)
  1712. @Substitution(name='expanding')
  1713. @Appender(_shared_docs['corr'])
  1714. def corr(self, other=None, pairwise=None, **kwargs):
  1715. return super(Expanding, self).corr(other=other, pairwise=pairwise,
  1716. **kwargs)
  1717. class ExpandingGroupby(_GroupByMixin, Expanding):
  1718. """
  1719. Provides a expanding groupby implementation.
  1720. .. versionadded:: 0.18.1
  1721. """
  1722. @property
  1723. def _constructor(self):
  1724. return Expanding
  1725. _bias_template = """
  1726. Parameters
  1727. ----------
  1728. bias : bool, default False
  1729. Use a standard estimation bias correction.
  1730. *args, **kwargs
  1731. Arguments and keyword arguments to be passed into func.
  1732. """
  1733. _pairwise_template = """
  1734. Parameters
  1735. ----------
  1736. other : Series, DataFrame, or ndarray, optional
  1737. If not supplied then will default to self and produce pairwise
  1738. output.
  1739. pairwise : bool, default None
  1740. If False then only matching columns between self and other will be
  1741. used and the output will be a DataFrame.
  1742. If True then all pairwise combinations will be calculated and the
  1743. output will be a MultiIndex DataFrame in the case of DataFrame
  1744. inputs. In the case of missing elements, only complete pairwise
  1745. observations will be used.
  1746. bias : bool, default False
  1747. Use a standard estimation bias correction.
  1748. **kwargs
  1749. Keyword arguments to be passed into func.
  1750. """
  1751. class EWM(_Rolling):
  1752. r"""
  1753. Provides exponential weighted functions.
  1754. .. versionadded:: 0.18.0
  1755. Parameters
  1756. ----------
  1757. com : float, optional
  1758. Specify decay in terms of center of mass,
  1759. :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`
  1760. span : float, optional
  1761. Specify decay in terms of span,
  1762. :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`
  1763. halflife : float, optional
  1764. Specify decay in terms of half-life,
  1765. :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0`
  1766. alpha : float, optional
  1767. Specify smoothing factor :math:`\alpha` directly,
  1768. :math:`0 < \alpha \leq 1`
  1769. .. versionadded:: 0.18.0
  1770. min_periods : int, default 0
  1771. Minimum number of observations in window required to have a value
  1772. (otherwise result is NA).
  1773. adjust : bool, default True
  1774. Divide by decaying adjustment factor in beginning periods to account
  1775. for imbalance in relative weightings (viewing EWMA as a moving average)
  1776. ignore_na : bool, default False
  1777. Ignore missing values when calculating weights;
  1778. specify True to reproduce pre-0.15.0 behavior
  1779. Returns
  1780. -------
  1781. a Window sub-classed for the particular operation
  1782. See Also
  1783. --------
  1784. rolling : Provides rolling window calculations.
  1785. expanding : Provides expanding transformations.
  1786. Notes
  1787. -----
  1788. Exactly one of center of mass, span, half-life, and alpha must be provided.
  1789. Allowed values and relationship between the parameters are specified in the
  1790. parameter descriptions above; see the link at the end of this section for
  1791. a detailed explanation.
  1792. When adjust is True (default), weighted averages are calculated using
  1793. weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1.
  1794. When adjust is False, weighted averages are calculated recursively as:
  1795. weighted_average[0] = arg[0];
  1796. weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i].
  1797. When ignore_na is False (default), weights are based on absolute positions.
  1798. For example, the weights of x and y used in calculating the final weighted
  1799. average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and
  1800. (1-alpha)**2 and alpha (if adjust is False).
  1801. When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based
  1802. on relative positions. For example, the weights of x and y used in
  1803. calculating the final weighted average of [x, None, y] are 1-alpha and 1
  1804. (if adjust is True), and 1-alpha and alpha (if adjust is False).
  1805. More details can be found at
  1806. http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
  1807. Examples
  1808. --------
  1809. >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
  1810. B
  1811. 0 0.0
  1812. 1 1.0
  1813. 2 2.0
  1814. 3 NaN
  1815. 4 4.0
  1816. >>> df.ewm(com=0.5).mean()
  1817. B
  1818. 0 0.000000
  1819. 1 0.750000
  1820. 2 1.615385
  1821. 3 1.615385
  1822. 4 3.670213
  1823. """
  1824. _attributes = ['com', 'min_periods', 'adjust', 'ignore_na', 'axis']
  1825. def __init__(self, obj, com=None, span=None, halflife=None, alpha=None,
  1826. min_periods=0, adjust=True, ignore_na=False,
  1827. axis=0):
  1828. self.obj = obj
  1829. self.com = _get_center_of_mass(com, span, halflife, alpha)
  1830. self.min_periods = min_periods
  1831. self.adjust = adjust
  1832. self.ignore_na = ignore_na
  1833. self.axis = axis
  1834. self.on = None
  1835. @property
  1836. def _constructor(self):
  1837. return EWM
  1838. _agg_see_also_doc = dedent("""
  1839. See Also
  1840. --------
  1841. pandas.DataFrame.rolling.aggregate
  1842. """)
  1843. _agg_examples_doc = dedent("""
  1844. Examples
  1845. --------
  1846. >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'])
  1847. >>> df
  1848. A B C
  1849. 0 -2.385977 -0.102758 0.438822
  1850. 1 -1.004295 0.905829 -0.954544
  1851. 2 0.735167 -0.165272 -1.619346
  1852. 3 -0.702657 -1.340923 -0.706334
  1853. 4 -0.246845 0.211596 -0.901819
  1854. 5 2.463718 3.157577 -1.380906
  1855. 6 -1.142255 2.340594 -0.039875
  1856. 7 1.396598 -1.647453 1.677227
  1857. 8 -0.543425 1.761277 -0.220481
  1858. 9 -0.640505 0.289374 -1.550670
  1859. >>> df.ewm(alpha=0.5).mean()
  1860. A B C
  1861. 0 -2.385977 -0.102758 0.438822
  1862. 1 -1.464856 0.569633 -0.490089
  1863. 2 -0.207700 0.149687 -1.135379
  1864. 3 -0.471677 -0.645305 -0.906555
  1865. 4 -0.355635 -0.203033 -0.904111
  1866. 5 1.076417 1.503943 -1.146293
  1867. 6 -0.041654 1.925562 -0.588728
  1868. 7 0.680292 0.132049 0.548693
  1869. 8 0.067236 0.948257 0.163353
  1870. 9 -0.286980 0.618493 -0.694496
  1871. """)
  1872. @Substitution(see_also=_agg_see_also_doc,
  1873. examples=_agg_examples_doc,
  1874. versionadded='',
  1875. klass='Series/Dataframe',
  1876. axis='')
  1877. @Appender(_shared_docs['aggregate'])
  1878. def aggregate(self, arg, *args, **kwargs):
  1879. return super(EWM, self).aggregate(arg, *args, **kwargs)
  1880. agg = aggregate
  1881. def _apply(self, func, **kwargs):
  1882. """
  1883. Rolling statistical measure using supplied function. Designed to be
  1884. used with passed-in Cython array-based functions.
  1885. Parameters
  1886. ----------
  1887. func : str/callable to apply
  1888. Returns
  1889. -------
  1890. y : same type as input argument
  1891. """
  1892. blocks, obj, index = self._create_blocks()
  1893. results = []
  1894. for b in blocks:
  1895. try:
  1896. values = self._prep_values(b.values)
  1897. except TypeError:
  1898. results.append(b.values.copy())
  1899. continue
  1900. if values.size == 0:
  1901. results.append(values.copy())
  1902. continue
  1903. # if we have a string function name, wrap it
  1904. if isinstance(func, compat.string_types):
  1905. cfunc = getattr(libwindow, func, None)
  1906. if cfunc is None:
  1907. raise ValueError("we do not support this function "
  1908. "in libwindow.{func}".format(func=func))
  1909. def func(arg):
  1910. return cfunc(arg, self.com, int(self.adjust),
  1911. int(self.ignore_na), int(self.min_periods))
  1912. results.append(np.apply_along_axis(func, self.axis, values))
  1913. return self._wrap_results(results, blocks, obj)
  1914. @Substitution(name='ewm')
  1915. @Appender(_doc_template)
  1916. def mean(self, *args, **kwargs):
  1917. """
  1918. Exponential weighted moving average.
  1919. Parameters
  1920. ----------
  1921. *args, **kwargs
  1922. Arguments and keyword arguments to be passed into func.
  1923. """
  1924. nv.validate_window_func('mean', args, kwargs)
  1925. return self._apply('ewma', **kwargs)
  1926. @Substitution(name='ewm')
  1927. @Appender(_doc_template)
  1928. @Appender(_bias_template)
  1929. def std(self, bias=False, *args, **kwargs):
  1930. """
  1931. Exponential weighted moving stddev.
  1932. """
  1933. nv.validate_window_func('std', args, kwargs)
  1934. return _zsqrt(self.var(bias=bias, **kwargs))
  1935. vol = std
  1936. @Substitution(name='ewm')
  1937. @Appender(_doc_template)
  1938. @Appender(_bias_template)
  1939. def var(self, bias=False, *args, **kwargs):
  1940. """
  1941. Exponential weighted moving variance.
  1942. """
  1943. nv.validate_window_func('var', args, kwargs)
  1944. def f(arg):
  1945. return libwindow.ewmcov(arg, arg, self.com, int(self.adjust),
  1946. int(self.ignore_na), int(self.min_periods),
  1947. int(bias))
  1948. return self._apply(f, **kwargs)
  1949. @Substitution(name='ewm')
  1950. @Appender(_doc_template)
  1951. @Appender(_pairwise_template)
  1952. def cov(self, other=None, pairwise=None, bias=False, **kwargs):
  1953. """
  1954. Exponential weighted sample covariance.
  1955. """
  1956. if other is None:
  1957. other = self._selected_obj
  1958. # only default unset
  1959. pairwise = True if pairwise is None else pairwise
  1960. other = self._shallow_copy(other)
  1961. def _get_cov(X, Y):
  1962. X = self._shallow_copy(X)
  1963. Y = self._shallow_copy(Y)
  1964. cov = libwindow.ewmcov(X._prep_values(), Y._prep_values(),
  1965. self.com, int(self.adjust),
  1966. int(self.ignore_na), int(self.min_periods),
  1967. int(bias))
  1968. return X._wrap_result(cov)
  1969. return _flex_binary_moment(self._selected_obj, other._selected_obj,
  1970. _get_cov, pairwise=bool(pairwise))
  1971. @Substitution(name='ewm')
  1972. @Appender(_doc_template)
  1973. @Appender(_pairwise_template)
  1974. def corr(self, other=None, pairwise=None, **kwargs):
  1975. """
  1976. Exponential weighted sample correlation.
  1977. """
  1978. if other is None:
  1979. other = self._selected_obj
  1980. # only default unset
  1981. pairwise = True if pairwise is None else pairwise
  1982. other = self._shallow_copy(other)
  1983. def _get_corr(X, Y):
  1984. X = self._shallow_copy(X)
  1985. Y = self._shallow_copy(Y)
  1986. def _cov(x, y):
  1987. return libwindow.ewmcov(x, y, self.com, int(self.adjust),
  1988. int(self.ignore_na),
  1989. int(self.min_periods),
  1990. 1)
  1991. x_values = X._prep_values()
  1992. y_values = Y._prep_values()
  1993. with np.errstate(all='ignore'):
  1994. cov = _cov(x_values, y_values)
  1995. x_var = _cov(x_values, x_values)
  1996. y_var = _cov(y_values, y_values)
  1997. corr = cov / _zsqrt(x_var * y_var)
  1998. return X._wrap_result(corr)
  1999. return _flex_binary_moment(self._selected_obj, other._selected_obj,
  2000. _get_corr, pairwise=bool(pairwise))
  2001. # Helper Funcs
  2002. def _flex_binary_moment(arg1, arg2, f, pairwise=False):
  2003. if not (isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame)) and
  2004. isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame))):
  2005. raise TypeError("arguments to moment function must be of type "
  2006. "np.ndarray/Series/DataFrame")
  2007. if (isinstance(arg1, (np.ndarray, ABCSeries)) and
  2008. isinstance(arg2, (np.ndarray, ABCSeries))):
  2009. X, Y = _prep_binary(arg1, arg2)
  2010. return f(X, Y)
  2011. elif isinstance(arg1, ABCDataFrame):
  2012. from pandas import DataFrame
  2013. def dataframe_from_int_dict(data, frame_template):
  2014. result = DataFrame(data, index=frame_template.index)
  2015. if len(result.columns) > 0:
  2016. result.columns = frame_template.columns[result.columns]
  2017. return result
  2018. results = {}
  2019. if isinstance(arg2, ABCDataFrame):
  2020. if pairwise is False:
  2021. if arg1 is arg2:
  2022. # special case in order to handle duplicate column names
  2023. for i, col in enumerate(arg1.columns):
  2024. results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i])
  2025. return dataframe_from_int_dict(results, arg1)
  2026. else:
  2027. if not arg1.columns.is_unique:
  2028. raise ValueError("'arg1' columns are not unique")
  2029. if not arg2.columns.is_unique:
  2030. raise ValueError("'arg2' columns are not unique")
  2031. with warnings.catch_warnings(record=True):
  2032. warnings.simplefilter("ignore", RuntimeWarning)
  2033. X, Y = arg1.align(arg2, join='outer')
  2034. X = X + 0 * Y
  2035. Y = Y + 0 * X
  2036. with warnings.catch_warnings(record=True):
  2037. warnings.simplefilter("ignore", RuntimeWarning)
  2038. res_columns = arg1.columns.union(arg2.columns)
  2039. for col in res_columns:
  2040. if col in X and col in Y:
  2041. results[col] = f(X[col], Y[col])
  2042. return DataFrame(results, index=X.index,
  2043. columns=res_columns)
  2044. elif pairwise is True:
  2045. results = defaultdict(dict)
  2046. for i, k1 in enumerate(arg1.columns):
  2047. for j, k2 in enumerate(arg2.columns):
  2048. if j < i and arg2 is arg1:
  2049. # Symmetric case
  2050. results[i][j] = results[j][i]
  2051. else:
  2052. results[i][j] = f(*_prep_binary(arg1.iloc[:, i],
  2053. arg2.iloc[:, j]))
  2054. from pandas import MultiIndex, concat
  2055. result_index = arg1.index.union(arg2.index)
  2056. if len(result_index):
  2057. # construct result frame
  2058. result = concat(
  2059. [concat([results[i][j]
  2060. for j, c in enumerate(arg2.columns)],
  2061. ignore_index=True)
  2062. for i, c in enumerate(arg1.columns)],
  2063. ignore_index=True,
  2064. axis=1)
  2065. result.columns = arg1.columns
  2066. # set the index and reorder
  2067. if arg2.columns.nlevels > 1:
  2068. result.index = MultiIndex.from_product(
  2069. arg2.columns.levels + [result_index])
  2070. result = result.reorder_levels([2, 0, 1]).sort_index()
  2071. else:
  2072. result.index = MultiIndex.from_product(
  2073. [range(len(arg2.columns)),
  2074. range(len(result_index))])
  2075. result = result.swaplevel(1, 0).sort_index()
  2076. result.index = MultiIndex.from_product(
  2077. [result_index] + [arg2.columns])
  2078. else:
  2079. # empty result
  2080. result = DataFrame(
  2081. index=MultiIndex(levels=[arg1.index, arg2.columns],
  2082. codes=[[], []]),
  2083. columns=arg2.columns,
  2084. dtype='float64')
  2085. # reset our index names to arg1 names
  2086. # reset our column names to arg2 names
  2087. # careful not to mutate the original names
  2088. result.columns = result.columns.set_names(
  2089. arg1.columns.names)
  2090. result.index = result.index.set_names(
  2091. result_index.names + arg2.columns.names)
  2092. return result
  2093. else:
  2094. raise ValueError("'pairwise' is not True/False")
  2095. else:
  2096. results = {i: f(*_prep_binary(arg1.iloc[:, i], arg2))
  2097. for i, col in enumerate(arg1.columns)}
  2098. return dataframe_from_int_dict(results, arg1)
  2099. else:
  2100. return _flex_binary_moment(arg2, arg1, f)
  2101. def _get_center_of_mass(comass, span, halflife, alpha):
  2102. valid_count = com.count_not_none(comass, span, halflife, alpha)
  2103. if valid_count > 1:
  2104. raise ValueError("comass, span, halflife, and alpha "
  2105. "are mutually exclusive")
  2106. # Convert to center of mass; domain checks ensure 0 < alpha <= 1
  2107. if comass is not None:
  2108. if comass < 0:
  2109. raise ValueError("comass must satisfy: comass >= 0")
  2110. elif span is not None:
  2111. if span < 1:
  2112. raise ValueError("span must satisfy: span >= 1")
  2113. comass = (span - 1) / 2.
  2114. elif halflife is not None:
  2115. if halflife <= 0:
  2116. raise ValueError("halflife must satisfy: halflife > 0")
  2117. decay = 1 - np.exp(np.log(0.5) / halflife)
  2118. comass = 1 / decay - 1
  2119. elif alpha is not None:
  2120. if alpha <= 0 or alpha > 1:
  2121. raise ValueError("alpha must satisfy: 0 < alpha <= 1")
  2122. comass = (1.0 - alpha) / alpha
  2123. else:
  2124. raise ValueError("Must pass one of comass, span, halflife, or alpha")
  2125. return float(comass)
  2126. def _offset(window, center):
  2127. if not is_integer(window):
  2128. window = len(window)
  2129. offset = (window - 1) / 2. if center else 0
  2130. try:
  2131. return int(offset)
  2132. except TypeError:
  2133. return offset.astype(int)
  2134. def _require_min_periods(p):
  2135. def _check_func(minp, window):
  2136. if minp is None:
  2137. return window
  2138. else:
  2139. return max(p, minp)
  2140. return _check_func
  2141. def _use_window(minp, window):
  2142. if minp is None:
  2143. return window
  2144. else:
  2145. return minp
  2146. def _zsqrt(x):
  2147. with np.errstate(all='ignore'):
  2148. result = np.sqrt(x)
  2149. mask = x < 0
  2150. if isinstance(x, ABCDataFrame):
  2151. if mask.values.any():
  2152. result[mask] = 0
  2153. else:
  2154. if mask.any():
  2155. result[mask] = 0
  2156. return result
  2157. def _prep_binary(arg1, arg2):
  2158. if not isinstance(arg2, type(arg1)):
  2159. raise Exception('Input arrays must be of the same type!')
  2160. # mask out values, this also makes a common index...
  2161. X = arg1 + 0 * arg2
  2162. Y = arg2 + 0 * arg1
  2163. return X, Y
  2164. # Top-level exports
  2165. def rolling(obj, win_type=None, **kwds):
  2166. if not isinstance(obj, (ABCSeries, ABCDataFrame)):
  2167. raise TypeError('invalid type: %s' % type(obj))
  2168. if win_type is not None:
  2169. return Window(obj, win_type=win_type, **kwds)
  2170. return Rolling(obj, **kwds)
  2171. rolling.__doc__ = Window.__doc__
  2172. def expanding(obj, **kwds):
  2173. if not isinstance(obj, (ABCSeries, ABCDataFrame)):
  2174. raise TypeError('invalid type: %s' % type(obj))
  2175. return Expanding(obj, **kwds)
  2176. expanding.__doc__ = Expanding.__doc__
  2177. def ewm(obj, **kwds):
  2178. if not isinstance(obj, (ABCSeries, ABCDataFrame)):
  2179. raise TypeError('invalid type: %s' % type(obj))
  2180. return EWM(obj, **kwds)
  2181. ewm.__doc__ = EWM.__doc__