datetimelike.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724
  1. # -*- coding: utf-8 -*-
  2. """
  3. Base and utility classes for tseries type pandas objects.
  4. """
  5. import operator
  6. import warnings
  7. import numpy as np
  8. from pandas._libs import NaT, iNaT, lib
  9. from pandas.compat.numpy import function as nv
  10. from pandas.errors import AbstractMethodError
  11. from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg
  12. from pandas.core.dtypes.common import (
  13. ensure_int64, is_dtype_equal, is_float, is_integer, is_list_like,
  14. is_period_dtype, is_scalar)
  15. from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
  16. from pandas.core import algorithms, ops
  17. from pandas.core.accessor import PandasDelegate
  18. from pandas.core.arrays import ExtensionOpsMixin
  19. from pandas.core.arrays.datetimelike import (
  20. DatetimeLikeArrayMixin, _ensure_datetimelike_to_i8)
  21. import pandas.core.indexes.base as ibase
  22. from pandas.core.indexes.base import Index, _index_shared_docs
  23. from pandas.core.tools.timedeltas import to_timedelta
  24. import pandas.io.formats.printing as printing
  25. _index_doc_kwargs = dict(ibase._index_doc_kwargs)
  26. def ea_passthrough(array_method):
  27. """
  28. Make an alias for a method of the underlying ExtensionArray.
  29. Parameters
  30. ----------
  31. array_method : method on an Array class
  32. Returns
  33. -------
  34. method
  35. """
  36. def method(self, *args, **kwargs):
  37. return array_method(self._data, *args, **kwargs)
  38. method.__name__ = array_method.__name__
  39. method.__doc__ = array_method.__doc__
  40. return method
  41. class DatetimeIndexOpsMixin(ExtensionOpsMixin):
  42. """
  43. common ops mixin to support a unified interface datetimelike Index
  44. """
  45. _data = None # type: DatetimeLikeArrayMixin
  46. # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
  47. # properties there. They can be made into cache_readonly for Index
  48. # subclasses bc they are immutable
  49. inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget)
  50. _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget)
  51. hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget)
  52. _hasnans = hasnans # for index / array -agnostic code
  53. _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget)
  54. resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget)
  55. _box_values = ea_passthrough(DatetimeLikeArrayMixin._box_values)
  56. _maybe_mask_results = ea_passthrough(
  57. DatetimeLikeArrayMixin._maybe_mask_results)
  58. __iter__ = ea_passthrough(DatetimeLikeArrayMixin.__iter__)
  59. @property
  60. def freq(self):
  61. """
  62. Return the frequency object if it is set, otherwise None.
  63. """
  64. return self._data.freq
  65. @freq.setter
  66. def freq(self, value):
  67. # validation is handled by _data setter
  68. self._data.freq = value
  69. @property
  70. def freqstr(self):
  71. """
  72. Return the frequency object as a string if it is set, otherwise None.
  73. """
  74. return self._data.freqstr
  75. def unique(self, level=None):
  76. if level is not None:
  77. self._validate_index_level(level)
  78. result = self._data.unique()
  79. # Note: if `self` is already unique, then self.unique() should share
  80. # a `freq` with self. If not already unique, then self.freq must be
  81. # None, so again sharing freq is correct.
  82. return self._shallow_copy(result._data)
  83. @classmethod
  84. def _create_comparison_method(cls, op):
  85. """
  86. Create a comparison method that dispatches to ``cls.values``.
  87. """
  88. def wrapper(self, other):
  89. if isinstance(other, ABCSeries):
  90. # the arrays defer to Series for comparison ops but the indexes
  91. # don't, so we have to unwrap here.
  92. other = other._values
  93. result = op(self._data, maybe_unwrap_index(other))
  94. return result
  95. wrapper.__doc__ = op.__doc__
  96. wrapper.__name__ = '__{}__'.format(op.__name__)
  97. return wrapper
  98. @property
  99. def _ndarray_values(self):
  100. return self._data._ndarray_values
  101. # ------------------------------------------------------------------------
  102. # Abstract data attributes
  103. @property
  104. def values(self):
  105. # type: () -> np.ndarray
  106. # Note: PeriodArray overrides this to return an ndarray of objects.
  107. return self._data._data
  108. @property
  109. @Appender(DatetimeLikeArrayMixin.asi8.__doc__)
  110. def asi8(self):
  111. return self._data.asi8
  112. # ------------------------------------------------------------------------
  113. def equals(self, other):
  114. """
  115. Determines if two Index objects contain the same elements.
  116. """
  117. if self.is_(other):
  118. return True
  119. if not isinstance(other, ABCIndexClass):
  120. return False
  121. elif not isinstance(other, type(self)):
  122. try:
  123. other = type(self)(other)
  124. except Exception:
  125. return False
  126. if not is_dtype_equal(self.dtype, other.dtype):
  127. # have different timezone
  128. return False
  129. elif is_period_dtype(self):
  130. if not is_period_dtype(other):
  131. return False
  132. if self.freq != other.freq:
  133. return False
  134. return np.array_equal(self.asi8, other.asi8)
  135. @staticmethod
  136. def _join_i8_wrapper(joinf, dtype, with_indexers=True):
  137. """
  138. Create the join wrapper methods.
  139. """
  140. from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
  141. @staticmethod
  142. def wrapper(left, right):
  143. if isinstance(left, (np.ndarray, ABCIndex, ABCSeries,
  144. DatetimeLikeArrayMixin)):
  145. left = left.view('i8')
  146. if isinstance(right, (np.ndarray, ABCIndex, ABCSeries,
  147. DatetimeLikeArrayMixin)):
  148. right = right.view('i8')
  149. results = joinf(left, right)
  150. if with_indexers:
  151. join_index, left_indexer, right_indexer = results
  152. join_index = join_index.view(dtype)
  153. return join_index, left_indexer, right_indexer
  154. return results
  155. return wrapper
  156. def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise',
  157. from_utc=False):
  158. # See DatetimeLikeArrayMixin._ensure_localized.__doc__
  159. if getattr(self, 'tz', None):
  160. # ensure_localized is only relevant for tz-aware DTI
  161. result = self._data._ensure_localized(arg,
  162. ambiguous=ambiguous,
  163. nonexistent=nonexistent,
  164. from_utc=from_utc)
  165. return type(self)._simple_new(result, name=self.name)
  166. return arg
  167. def _box_values(self, values):
  168. return self._data._box_values(values)
  169. @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
  170. def __contains__(self, key):
  171. try:
  172. res = self.get_loc(key)
  173. return (is_scalar(res) or isinstance(res, slice) or
  174. (is_list_like(res) and len(res)))
  175. except (KeyError, TypeError, ValueError):
  176. return False
  177. contains = __contains__
  178. # Try to run function on index first, and then on elements of index
  179. # Especially important for group-by functionality
  180. def map(self, f):
  181. try:
  182. result = f(self)
  183. # Try to use this result if we can
  184. if isinstance(result, np.ndarray):
  185. result = Index(result)
  186. if not isinstance(result, Index):
  187. raise TypeError('The map function must return an Index object')
  188. return result
  189. except Exception:
  190. return self.astype(object).map(f)
  191. def sort_values(self, return_indexer=False, ascending=True):
  192. """
  193. Return sorted copy of Index.
  194. """
  195. if return_indexer:
  196. _as = self.argsort()
  197. if not ascending:
  198. _as = _as[::-1]
  199. sorted_index = self.take(_as)
  200. return sorted_index, _as
  201. else:
  202. sorted_values = np.sort(self._ndarray_values)
  203. attribs = self._get_attributes_dict()
  204. freq = attribs['freq']
  205. if freq is not None and not is_period_dtype(self):
  206. if freq.n > 0 and not ascending:
  207. freq = freq * -1
  208. elif freq.n < 0 and ascending:
  209. freq = freq * -1
  210. attribs['freq'] = freq
  211. if not ascending:
  212. sorted_values = sorted_values[::-1]
  213. return self._simple_new(sorted_values, **attribs)
  214. @Appender(_index_shared_docs['take'] % _index_doc_kwargs)
  215. def take(self, indices, axis=0, allow_fill=True,
  216. fill_value=None, **kwargs):
  217. nv.validate_take(tuple(), kwargs)
  218. indices = ensure_int64(indices)
  219. maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
  220. if isinstance(maybe_slice, slice):
  221. return self[maybe_slice]
  222. taken = self._assert_take_fillable(self.asi8, indices,
  223. allow_fill=allow_fill,
  224. fill_value=fill_value,
  225. na_value=iNaT)
  226. # keep freq in PeriodArray/Index, reset otherwise
  227. freq = self.freq if is_period_dtype(self) else None
  228. return self._shallow_copy(taken, freq=freq)
  229. _can_hold_na = True
  230. _na_value = NaT
  231. """The expected NA value to use with this index."""
  232. @property
  233. def asobject(self):
  234. """
  235. Return object Index which contains boxed values.
  236. .. deprecated:: 0.23.0
  237. Use ``astype(object)`` instead.
  238. *this is an internal non-public method*
  239. """
  240. warnings.warn("'asobject' is deprecated. Use 'astype(object)'"
  241. " instead", FutureWarning, stacklevel=2)
  242. return self.astype(object)
  243. def _convert_tolerance(self, tolerance, target):
  244. tolerance = np.asarray(to_timedelta(tolerance, box=False))
  245. if target.size != tolerance.size and tolerance.size > 1:
  246. raise ValueError('list-like tolerance size must match '
  247. 'target index size')
  248. return tolerance
  249. def tolist(self):
  250. """
  251. Return a list of the underlying data.
  252. """
  253. return list(self.astype(object))
  254. def min(self, axis=None, skipna=True, *args, **kwargs):
  255. """
  256. Return the minimum value of the Index or minimum along
  257. an axis.
  258. See Also
  259. --------
  260. numpy.ndarray.min
  261. Series.min : Return the minimum value in a Series.
  262. """
  263. nv.validate_min(args, kwargs)
  264. nv.validate_minmax_axis(axis)
  265. if not len(self):
  266. return self._na_value
  267. i8 = self.asi8
  268. try:
  269. # quick check
  270. if len(i8) and self.is_monotonic:
  271. if i8[0] != iNaT:
  272. return self._box_func(i8[0])
  273. if self.hasnans:
  274. if skipna:
  275. min_stamp = self[~self._isnan].asi8.min()
  276. else:
  277. return self._na_value
  278. else:
  279. min_stamp = i8.min()
  280. return self._box_func(min_stamp)
  281. except ValueError:
  282. return self._na_value
  283. def argmin(self, axis=None, skipna=True, *args, **kwargs):
  284. """
  285. Returns the indices of the minimum values along an axis.
  286. See `numpy.ndarray.argmin` for more information on the
  287. `axis` parameter.
  288. See Also
  289. --------
  290. numpy.ndarray.argmin
  291. """
  292. nv.validate_argmin(args, kwargs)
  293. nv.validate_minmax_axis(axis)
  294. i8 = self.asi8
  295. if self.hasnans:
  296. mask = self._isnan
  297. if mask.all() or not skipna:
  298. return -1
  299. i8 = i8.copy()
  300. i8[mask] = np.iinfo('int64').max
  301. return i8.argmin()
  302. def max(self, axis=None, skipna=True, *args, **kwargs):
  303. """
  304. Return the maximum value of the Index or maximum along
  305. an axis.
  306. See Also
  307. --------
  308. numpy.ndarray.max
  309. Series.max : Return the maximum value in a Series.
  310. """
  311. nv.validate_max(args, kwargs)
  312. nv.validate_minmax_axis(axis)
  313. if not len(self):
  314. return self._na_value
  315. i8 = self.asi8
  316. try:
  317. # quick check
  318. if len(i8) and self.is_monotonic:
  319. if i8[-1] != iNaT:
  320. return self._box_func(i8[-1])
  321. if self.hasnans:
  322. if skipna:
  323. max_stamp = self[~self._isnan].asi8.max()
  324. else:
  325. return self._na_value
  326. else:
  327. max_stamp = i8.max()
  328. return self._box_func(max_stamp)
  329. except ValueError:
  330. return self._na_value
  331. def argmax(self, axis=None, skipna=True, *args, **kwargs):
  332. """
  333. Returns the indices of the maximum values along an axis.
  334. See `numpy.ndarray.argmax` for more information on the
  335. `axis` parameter.
  336. See Also
  337. --------
  338. numpy.ndarray.argmax
  339. """
  340. nv.validate_argmax(args, kwargs)
  341. nv.validate_minmax_axis(axis)
  342. i8 = self.asi8
  343. if self.hasnans:
  344. mask = self._isnan
  345. if mask.all() or not skipna:
  346. return -1
  347. i8 = i8.copy()
  348. i8[mask] = 0
  349. return i8.argmax()
  350. # --------------------------------------------------------------------
  351. # Rendering Methods
  352. def _format_with_header(self, header, **kwargs):
  353. return header + list(self._format_native_types(**kwargs))
  354. @property
  355. def _formatter_func(self):
  356. raise AbstractMethodError(self)
  357. def _format_attrs(self):
  358. """
  359. Return a list of tuples of the (attr,formatted_value).
  360. """
  361. attrs = super(DatetimeIndexOpsMixin, self)._format_attrs()
  362. for attrib in self._attributes:
  363. if attrib == 'freq':
  364. freq = self.freqstr
  365. if freq is not None:
  366. freq = "'%s'" % freq
  367. attrs.append(('freq', freq))
  368. return attrs
  369. # --------------------------------------------------------------------
  370. def _convert_scalar_indexer(self, key, kind=None):
  371. """
  372. We don't allow integer or float indexing on datetime-like when using
  373. loc.
  374. Parameters
  375. ----------
  376. key : label of the slice bound
  377. kind : {'ix', 'loc', 'getitem', 'iloc'} or None
  378. """
  379. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  380. # we don't allow integer/float indexing for loc
  381. # we don't allow float indexing for ix/getitem
  382. if is_scalar(key):
  383. is_int = is_integer(key)
  384. is_flt = is_float(key)
  385. if kind in ['loc'] and (is_int or is_flt):
  386. self._invalid_indexer('index', key)
  387. elif kind in ['ix', 'getitem'] and is_flt:
  388. self._invalid_indexer('index', key)
  389. return (super(DatetimeIndexOpsMixin, self)
  390. ._convert_scalar_indexer(key, kind=kind))
  391. @classmethod
  392. def _add_datetimelike_methods(cls):
  393. """
  394. Add in the datetimelike methods (as we may have to override the
  395. superclass).
  396. """
  397. def __add__(self, other):
  398. # dispatch to ExtensionArray implementation
  399. result = self._data.__add__(maybe_unwrap_index(other))
  400. return wrap_arithmetic_op(self, other, result)
  401. cls.__add__ = __add__
  402. def __radd__(self, other):
  403. # alias for __add__
  404. return self.__add__(other)
  405. cls.__radd__ = __radd__
  406. def __sub__(self, other):
  407. # dispatch to ExtensionArray implementation
  408. result = self._data.__sub__(maybe_unwrap_index(other))
  409. return wrap_arithmetic_op(self, other, result)
  410. cls.__sub__ = __sub__
  411. def __rsub__(self, other):
  412. result = self._data.__rsub__(maybe_unwrap_index(other))
  413. return wrap_arithmetic_op(self, other, result)
  414. cls.__rsub__ = __rsub__
  415. def isin(self, values):
  416. """
  417. Compute boolean array of whether each index value is found in the
  418. passed set of values.
  419. Parameters
  420. ----------
  421. values : set or sequence of values
  422. Returns
  423. -------
  424. is_contained : ndarray (boolean dtype)
  425. """
  426. if not isinstance(values, type(self)):
  427. try:
  428. values = type(self)(values)
  429. except ValueError:
  430. return self.astype(object).isin(values)
  431. return algorithms.isin(self.asi8, values.asi8)
  432. @Appender(_index_shared_docs['repeat'] % _index_doc_kwargs)
  433. def repeat(self, repeats, axis=None):
  434. nv.validate_repeat(tuple(), dict(axis=axis))
  435. freq = self.freq if is_period_dtype(self) else None
  436. return self._shallow_copy(self.asi8.repeat(repeats), freq=freq)
  437. @Appender(_index_shared_docs['where'] % _index_doc_kwargs)
  438. def where(self, cond, other=None):
  439. other = _ensure_datetimelike_to_i8(other, to_utc=True)
  440. values = _ensure_datetimelike_to_i8(self, to_utc=True)
  441. result = np.where(cond, values, other).astype('i8')
  442. result = self._ensure_localized(result, from_utc=True)
  443. return self._shallow_copy(result)
  444. def _summary(self, name=None):
  445. """
  446. Return a summarized representation.
  447. Parameters
  448. ----------
  449. name : str
  450. name to use in the summary representation
  451. Returns
  452. -------
  453. String with a summarized representation of the index
  454. """
  455. formatter = self._formatter_func
  456. if len(self) > 0:
  457. index_summary = ', %s to %s' % (formatter(self[0]),
  458. formatter(self[-1]))
  459. else:
  460. index_summary = ''
  461. if name is None:
  462. name = type(self).__name__
  463. result = '%s: %s entries%s' % (printing.pprint_thing(name),
  464. len(self), index_summary)
  465. if self.freq:
  466. result += '\nFreq: %s' % self.freqstr
  467. # display as values, not quoted
  468. result = result.replace("'", "")
  469. return result
  470. def _concat_same_dtype(self, to_concat, name):
  471. """
  472. Concatenate to_concat which has the same class.
  473. """
  474. attribs = self._get_attributes_dict()
  475. attribs['name'] = name
  476. # do not pass tz to set because tzlocal cannot be hashed
  477. if len({str(x.dtype) for x in to_concat}) != 1:
  478. raise ValueError('to_concat must have the same tz')
  479. if not is_period_dtype(self):
  480. # reset freq
  481. attribs['freq'] = None
  482. new_data = type(self._values)._concat_same_type(to_concat).asi8
  483. return self._simple_new(new_data, **attribs)
  484. @Appender(_index_shared_docs['astype'])
  485. def astype(self, dtype, copy=True):
  486. if is_dtype_equal(self.dtype, dtype) and copy is False:
  487. # Ensure that self.astype(self.dtype) is self
  488. return self
  489. new_values = self._data.astype(dtype, copy=copy)
  490. # pass copy=False because any copying will be done in the
  491. # _data.astype call above
  492. return Index(new_values,
  493. dtype=new_values.dtype, name=self.name, copy=False)
  494. @deprecate_kwarg(old_arg_name='n', new_arg_name='periods')
  495. def shift(self, periods, freq=None):
  496. """
  497. Shift index by desired number of time frequency increments.
  498. This method is for shifting the values of datetime-like indexes
  499. by a specified time increment a given number of times.
  500. Parameters
  501. ----------
  502. periods : int
  503. Number of periods (or increments) to shift by,
  504. can be positive or negative.
  505. .. versionchanged:: 0.24.0
  506. freq : pandas.DateOffset, pandas.Timedelta or string, optional
  507. Frequency increment to shift by.
  508. If None, the index is shifted by its own `freq` attribute.
  509. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
  510. Returns
  511. -------
  512. pandas.DatetimeIndex
  513. Shifted index.
  514. See Also
  515. --------
  516. Index.shift : Shift values of Index.
  517. PeriodIndex.shift : Shift values of PeriodIndex.
  518. """
  519. result = self._data._time_shift(periods, freq=freq)
  520. return type(self)(result, name=self.name)
  521. def wrap_arithmetic_op(self, other, result):
  522. if result is NotImplemented:
  523. return NotImplemented
  524. if isinstance(result, tuple):
  525. # divmod, rdivmod
  526. assert len(result) == 2
  527. return (wrap_arithmetic_op(self, other, result[0]),
  528. wrap_arithmetic_op(self, other, result[1]))
  529. if not isinstance(result, Index):
  530. # Index.__new__ will choose appropriate subclass for dtype
  531. result = Index(result)
  532. res_name = ops.get_op_result_name(self, other)
  533. result.name = res_name
  534. return result
  535. def maybe_unwrap_index(obj):
  536. """
  537. If operating against another Index object, we need to unwrap the underlying
  538. data before deferring to the DatetimeArray/TimedeltaArray/PeriodArray
  539. implementation, otherwise we will incorrectly return NotImplemented.
  540. Parameters
  541. ----------
  542. obj : object
  543. Returns
  544. -------
  545. unwrapped object
  546. """
  547. if isinstance(obj, ABCIndexClass):
  548. return obj._data
  549. return obj
  550. class DatetimelikeDelegateMixin(PandasDelegate):
  551. """
  552. Delegation mechanism, specific for Datetime, Timedelta, and Period types.
  553. Functionality is delegated from the Index class to an Array class. A
  554. few things can be customized
  555. * _delegate_class : type
  556. The class being delegated to.
  557. * _delegated_methods, delegated_properties : List
  558. The list of property / method names being delagated.
  559. * raw_methods : Set
  560. The set of methods whose results should should *not* be
  561. boxed in an index, after being returned from the array
  562. * raw_properties : Set
  563. The set of properties whose results should should *not* be
  564. boxed in an index, after being returned from the array
  565. """
  566. # raw_methods : dispatch methods that shouldn't be boxed in an Index
  567. _raw_methods = set()
  568. # raw_properties : dispatch properties that shouldn't be boxed in an Index
  569. _raw_properties = set()
  570. name = None
  571. _data = None
  572. @property
  573. def _delegate_class(self):
  574. raise AbstractMethodError
  575. def _delegate_property_get(self, name, *args, **kwargs):
  576. result = getattr(self._data, name)
  577. if name not in self._raw_properties:
  578. result = Index(result, name=self.name)
  579. return result
  580. def _delegate_property_set(self, name, value, *args, **kwargs):
  581. setattr(self._data, name, value)
  582. def _delegate_method(self, name, *args, **kwargs):
  583. result = operator.methodcaller(name, *args, **kwargs)(self._data)
  584. if name not in self._raw_methods:
  585. result = Index(result, name=self.name)
  586. return result