timedeltas.py 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069
  1. # -*- coding: utf-8 -*-
  2. from __future__ import division
  3. from datetime import timedelta
  4. import textwrap
  5. import warnings
  6. import numpy as np
  7. from pandas._libs import lib, tslibs
  8. from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
  9. from pandas._libs.tslibs.fields import get_timedelta_field
  10. from pandas._libs.tslibs.timedeltas import (
  11. array_to_timedelta64, parse_timedelta_unit, precision_from_unit)
  12. import pandas.compat as compat
  13. from pandas.util._decorators import Appender
  14. from pandas.core.dtypes.common import (
  15. _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_dtype_equal,
  16. is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
  17. is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype,
  18. pandas_dtype)
  19. from pandas.core.dtypes.dtypes import DatetimeTZDtype
  20. from pandas.core.dtypes.generic import (
  21. ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex)
  22. from pandas.core.dtypes.missing import isna
  23. from pandas.core import ops
  24. from pandas.core.algorithms import checked_add_with_arr
  25. import pandas.core.common as com
  26. from pandas.tseries.frequencies import to_offset
  27. from pandas.tseries.offsets import Tick
  28. from . import datetimelike as dtl
  29. _BAD_DTYPE = "dtype {dtype} cannot be converted to timedelta64[ns]"
  30. def _is_convertible_to_td(key):
  31. return isinstance(key, (Tick, timedelta,
  32. np.timedelta64, compat.string_types))
  33. def _field_accessor(name, alias, docstring=None):
  34. def f(self):
  35. values = self.asi8
  36. result = get_timedelta_field(values, alias)
  37. if self._hasnans:
  38. result = self._maybe_mask_results(result, fill_value=None,
  39. convert='float64')
  40. return result
  41. f.__name__ = name
  42. f.__doc__ = "\n{}\n".format(docstring)
  43. return property(f)
  44. def _td_array_cmp(cls, op):
  45. """
  46. Wrap comparison operations to convert timedelta-like to timedelta64
  47. """
  48. opname = '__{name}__'.format(name=op.__name__)
  49. nat_result = True if opname == '__ne__' else False
  50. def wrapper(self, other):
  51. if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
  52. return NotImplemented
  53. if _is_convertible_to_td(other) or other is NaT:
  54. try:
  55. other = Timedelta(other)
  56. except ValueError:
  57. # failed to parse as timedelta
  58. return ops.invalid_comparison(self, other, op)
  59. result = op(self.view('i8'), other.value)
  60. if isna(other):
  61. result.fill(nat_result)
  62. elif not is_list_like(other):
  63. return ops.invalid_comparison(self, other, op)
  64. elif len(other) != len(self):
  65. raise ValueError("Lengths must match")
  66. else:
  67. try:
  68. other = type(self)._from_sequence(other)._data
  69. except (ValueError, TypeError):
  70. return ops.invalid_comparison(self, other, op)
  71. result = op(self.view('i8'), other.view('i8'))
  72. result = com.values_from_object(result)
  73. o_mask = np.array(isna(other))
  74. if o_mask.any():
  75. result[o_mask] = nat_result
  76. if self._hasnans:
  77. result[self._isnan] = nat_result
  78. return result
  79. return compat.set_function_name(wrapper, opname, cls)
  80. class TimedeltaArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
  81. """
  82. Pandas ExtensionArray for timedelta data.
  83. .. versionadded:: 0.24.0
  84. .. warning::
  85. TimedeltaArray is currently experimental, and its API may change
  86. without warning. In particular, :attr:`TimedeltaArray.dtype` is
  87. expected to change to be an instance of an ``ExtensionDtype``
  88. subclass.
  89. Parameters
  90. ----------
  91. values : array-like
  92. The timedelta data.
  93. dtype : numpy.dtype
  94. Currently, only ``numpy.dtype("timedelta64[ns]")`` is accepted.
  95. freq : Offset, optional
  96. copy : bool, default False
  97. Whether to copy the underlying array of data.
  98. """
  99. _typ = "timedeltaarray"
  100. _scalar_type = Timedelta
  101. __array_priority__ = 1000
  102. # define my properties & methods for delegation
  103. _other_ops = []
  104. _bool_ops = []
  105. _object_ops = ['freq']
  106. _field_ops = ['days', 'seconds', 'microseconds', 'nanoseconds']
  107. _datetimelike_ops = _field_ops + _object_ops + _bool_ops
  108. _datetimelike_methods = ["to_pytimedelta", "total_seconds",
  109. "round", "floor", "ceil"]
  110. # Needed so that NaT.__richcmp__(DateTimeArray) operates pointwise
  111. ndim = 1
  112. @property
  113. def _box_func(self):
  114. return lambda x: Timedelta(x, unit='ns')
  115. @property
  116. def dtype(self):
  117. """
  118. The dtype for the TimedeltaArray.
  119. .. warning::
  120. A future version of pandas will change dtype to be an instance
  121. of a :class:`pandas.api.extensions.ExtensionDtype` subclass,
  122. not a ``numpy.dtype``.
  123. Returns
  124. -------
  125. numpy.dtype
  126. """
  127. return _TD_DTYPE
  128. # ----------------------------------------------------------------
  129. # Constructors
  130. _attributes = ["freq"]
  131. def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False):
  132. if isinstance(values, (ABCSeries, ABCIndexClass)):
  133. values = values._values
  134. inferred_freq = getattr(values, "_freq", None)
  135. if isinstance(values, type(self)):
  136. if freq is None:
  137. freq = values.freq
  138. elif freq and values.freq:
  139. freq = to_offset(freq)
  140. freq, _ = dtl.validate_inferred_freq(freq, values.freq, False)
  141. values = values._data
  142. if not isinstance(values, np.ndarray):
  143. msg = (
  144. "Unexpected type '{}'. 'values' must be a TimedeltaArray "
  145. "ndarray, or Series or Index containing one of those."
  146. )
  147. raise ValueError(msg.format(type(values).__name__))
  148. if values.ndim != 1:
  149. raise ValueError("Only 1-dimensional input arrays are supported.")
  150. if values.dtype == 'i8':
  151. # for compat with datetime/timedelta/period shared methods,
  152. # we can sometimes get here with int64 values. These represent
  153. # nanosecond UTC (or tz-naive) unix timestamps
  154. values = values.view(_TD_DTYPE)
  155. _validate_td64_dtype(values.dtype)
  156. dtype = _validate_td64_dtype(dtype)
  157. if freq == "infer":
  158. msg = (
  159. "Frequency inference not allowed in TimedeltaArray.__init__. "
  160. "Use 'pd.array()' instead."
  161. )
  162. raise ValueError(msg)
  163. if copy:
  164. values = values.copy()
  165. if freq:
  166. freq = to_offset(freq)
  167. self._data = values
  168. self._dtype = dtype
  169. self._freq = freq
  170. if inferred_freq is None and freq is not None:
  171. type(self)._validate_frequency(self, freq)
  172. @classmethod
  173. def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE):
  174. assert dtype == _TD_DTYPE, dtype
  175. assert isinstance(values, np.ndarray), type(values)
  176. result = object.__new__(cls)
  177. result._data = values.view(_TD_DTYPE)
  178. result._freq = to_offset(freq)
  179. result._dtype = _TD_DTYPE
  180. return result
  181. @classmethod
  182. def _from_sequence(cls, data, dtype=_TD_DTYPE, copy=False,
  183. freq=None, unit=None):
  184. if dtype:
  185. _validate_td64_dtype(dtype)
  186. freq, freq_infer = dtl.maybe_infer_freq(freq)
  187. data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
  188. freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq,
  189. freq_infer)
  190. result = cls._simple_new(data, freq=freq)
  191. if inferred_freq is None and freq is not None:
  192. # this condition precludes `freq_infer`
  193. cls._validate_frequency(result, freq)
  194. elif freq_infer:
  195. # Set _freq directly to bypass duplicative _validate_frequency
  196. # check.
  197. result._freq = to_offset(result.inferred_freq)
  198. return result
  199. @classmethod
  200. def _generate_range(cls, start, end, periods, freq, closed=None):
  201. periods = dtl.validate_periods(periods)
  202. if freq is None and any(x is None for x in [periods, start, end]):
  203. raise ValueError('Must provide freq argument if no data is '
  204. 'supplied')
  205. if com.count_not_none(start, end, periods, freq) != 3:
  206. raise ValueError('Of the four parameters: start, end, periods, '
  207. 'and freq, exactly three must be specified')
  208. if start is not None:
  209. start = Timedelta(start)
  210. if end is not None:
  211. end = Timedelta(end)
  212. if start is None and end is None:
  213. if closed is not None:
  214. raise ValueError("Closed has to be None if not both of start"
  215. "and end are defined")
  216. left_closed, right_closed = dtl.validate_endpoints(closed)
  217. if freq is not None:
  218. index = _generate_regular_range(start, end, periods, freq)
  219. else:
  220. index = np.linspace(start.value, end.value, periods).astype('i8')
  221. if not left_closed:
  222. index = index[1:]
  223. if not right_closed:
  224. index = index[:-1]
  225. return cls._simple_new(index, freq=freq)
  226. # ----------------------------------------------------------------
  227. # DatetimeLike Interface
  228. def _unbox_scalar(self, value):
  229. if not isinstance(value, self._scalar_type) and value is not NaT:
  230. raise ValueError("'value' should be a Timedelta.")
  231. self._check_compatible_with(value)
  232. return value.value
  233. def _scalar_from_string(self, value):
  234. return Timedelta(value)
  235. def _check_compatible_with(self, other):
  236. # we don't have anything to validate.
  237. pass
  238. def _maybe_clear_freq(self):
  239. self._freq = None
  240. # ----------------------------------------------------------------
  241. # Array-Like / EA-Interface Methods
  242. @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
  243. def _validate_fill_value(self, fill_value):
  244. if isna(fill_value):
  245. fill_value = iNaT
  246. elif isinstance(fill_value, (timedelta, np.timedelta64, Tick)):
  247. fill_value = Timedelta(fill_value).value
  248. else:
  249. raise ValueError("'fill_value' should be a Timedelta. "
  250. "Got '{got}'.".format(got=fill_value))
  251. return fill_value
  252. def astype(self, dtype, copy=True):
  253. # We handle
  254. # --> timedelta64[ns]
  255. # --> timedelta64
  256. # DatetimeLikeArrayMixin super call handles other cases
  257. dtype = pandas_dtype(dtype)
  258. if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
  259. # by pandas convention, converting to non-nano timedelta64
  260. # returns an int64-dtyped array with ints representing multiples
  261. # of the desired timedelta unit. This is essentially division
  262. if self._hasnans:
  263. # avoid double-copying
  264. result = self._data.astype(dtype, copy=False)
  265. values = self._maybe_mask_results(result,
  266. fill_value=None,
  267. convert='float64')
  268. return values
  269. result = self._data.astype(dtype, copy=copy)
  270. return result.astype('i8')
  271. elif is_timedelta64_ns_dtype(dtype):
  272. if copy:
  273. return self.copy()
  274. return self
  275. return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
  276. # ----------------------------------------------------------------
  277. # Rendering Methods
  278. def _formatter(self, boxed=False):
  279. from pandas.io.formats.format import _get_format_timedelta64
  280. return _get_format_timedelta64(self, box=True)
  281. def _format_native_types(self, na_rep='NaT', date_format=None):
  282. from pandas.io.formats.format import _get_format_timedelta64
  283. formatter = _get_format_timedelta64(self._data, na_rep)
  284. return np.array([formatter(x) for x in self._data])
  285. # ----------------------------------------------------------------
  286. # Arithmetic Methods
  287. _create_comparison_method = classmethod(_td_array_cmp)
  288. def _add_offset(self, other):
  289. assert not isinstance(other, Tick)
  290. raise TypeError("cannot add the type {typ} to a {cls}"
  291. .format(typ=type(other).__name__,
  292. cls=type(self).__name__))
  293. def _add_delta(self, delta):
  294. """
  295. Add a timedelta-like, Tick, or TimedeltaIndex-like object
  296. to self, yielding a new TimedeltaArray.
  297. Parameters
  298. ----------
  299. other : {timedelta, np.timedelta64, Tick,
  300. TimedeltaIndex, ndarray[timedelta64]}
  301. Returns
  302. -------
  303. result : TimedeltaArray
  304. """
  305. new_values = super(TimedeltaArray, self)._add_delta(delta)
  306. return type(self)._from_sequence(new_values, freq='infer')
  307. def _add_datetime_arraylike(self, other):
  308. """
  309. Add DatetimeArray/Index or ndarray[datetime64] to TimedeltaArray.
  310. """
  311. if isinstance(other, np.ndarray):
  312. # At this point we have already checked that dtype is datetime64
  313. from pandas.core.arrays import DatetimeArray
  314. other = DatetimeArray(other)
  315. # defer to implementation in DatetimeArray
  316. return other + self
  317. def _add_datetimelike_scalar(self, other):
  318. # adding a timedeltaindex to a datetimelike
  319. from pandas.core.arrays import DatetimeArray
  320. assert other is not NaT
  321. other = Timestamp(other)
  322. if other is NaT:
  323. # In this case we specifically interpret NaT as a datetime, not
  324. # the timedelta interpretation we would get by returning self + NaT
  325. result = self.asi8.view('m8[ms]') + NaT.to_datetime64()
  326. return DatetimeArray(result)
  327. i8 = self.asi8
  328. result = checked_add_with_arr(i8, other.value,
  329. arr_mask=self._isnan)
  330. result = self._maybe_mask_results(result)
  331. dtype = DatetimeTZDtype(tz=other.tz) if other.tz else _NS_DTYPE
  332. return DatetimeArray(result, dtype=dtype, freq=self.freq)
  333. def _addsub_offset_array(self, other, op):
  334. # Add or subtract Array-like of DateOffset objects
  335. try:
  336. # TimedeltaIndex can only operate with a subset of DateOffset
  337. # subclasses. Incompatible classes will raise AttributeError,
  338. # which we re-raise as TypeError
  339. return super(TimedeltaArray, self)._addsub_offset_array(
  340. other, op
  341. )
  342. except AttributeError:
  343. raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}"
  344. .format(cls=type(self).__name__))
  345. def __mul__(self, other):
  346. other = lib.item_from_zerodim(other)
  347. if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
  348. return NotImplemented
  349. if is_scalar(other):
  350. # numpy will accept float and int, raise TypeError for others
  351. result = self._data * other
  352. freq = None
  353. if self.freq is not None and not isna(other):
  354. freq = self.freq * other
  355. return type(self)(result, freq=freq)
  356. if not hasattr(other, "dtype"):
  357. # list, tuple
  358. other = np.array(other)
  359. if len(other) != len(self) and not is_timedelta64_dtype(other):
  360. # Exclude timedelta64 here so we correctly raise TypeError
  361. # for that instead of ValueError
  362. raise ValueError("Cannot multiply with unequal lengths")
  363. if is_object_dtype(other):
  364. # this multiplication will succeed only if all elements of other
  365. # are int or float scalars, so we will end up with
  366. # timedelta64[ns]-dtyped result
  367. result = [self[n] * other[n] for n in range(len(self))]
  368. result = np.array(result)
  369. return type(self)(result)
  370. # numpy will accept float or int dtype, raise TypeError for others
  371. result = self._data * other
  372. return type(self)(result)
  373. __rmul__ = __mul__
  374. def __truediv__(self, other):
  375. # timedelta / X is well-defined for timedelta-like or numeric X
  376. other = lib.item_from_zerodim(other)
  377. if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
  378. return NotImplemented
  379. if isinstance(other, (timedelta, np.timedelta64, Tick)):
  380. other = Timedelta(other)
  381. if other is NaT:
  382. # specifically timedelta64-NaT
  383. result = np.empty(self.shape, dtype=np.float64)
  384. result.fill(np.nan)
  385. return result
  386. # otherwise, dispatch to Timedelta implementation
  387. return self._data / other
  388. elif lib.is_scalar(other):
  389. # assume it is numeric
  390. result = self._data / other
  391. freq = None
  392. if self.freq is not None:
  393. # Tick division is not implemented, so operate on Timedelta
  394. freq = self.freq.delta / other
  395. return type(self)(result, freq=freq)
  396. if not hasattr(other, "dtype"):
  397. # e.g. list, tuple
  398. other = np.array(other)
  399. if len(other) != len(self):
  400. raise ValueError("Cannot divide vectors with unequal lengths")
  401. elif is_timedelta64_dtype(other):
  402. # let numpy handle it
  403. return self._data / other
  404. elif is_object_dtype(other):
  405. # Note: we do not do type inference on the result, so either
  406. # an object array or numeric-dtyped (if numpy does inference)
  407. # will be returned. GH#23829
  408. result = [self[n] / other[n] for n in range(len(self))]
  409. result = np.array(result)
  410. return result
  411. else:
  412. result = self._data / other
  413. return type(self)(result)
  414. def __rtruediv__(self, other):
  415. # X / timedelta is defined only for timedelta-like X
  416. other = lib.item_from_zerodim(other)
  417. if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
  418. return NotImplemented
  419. if isinstance(other, (timedelta, np.timedelta64, Tick)):
  420. other = Timedelta(other)
  421. if other is NaT:
  422. # specifically timedelta64-NaT
  423. result = np.empty(self.shape, dtype=np.float64)
  424. result.fill(np.nan)
  425. return result
  426. # otherwise, dispatch to Timedelta implementation
  427. return other / self._data
  428. elif lib.is_scalar(other):
  429. raise TypeError("Cannot divide {typ} by {cls}"
  430. .format(typ=type(other).__name__,
  431. cls=type(self).__name__))
  432. if not hasattr(other, "dtype"):
  433. # e.g. list, tuple
  434. other = np.array(other)
  435. if len(other) != len(self):
  436. raise ValueError("Cannot divide vectors with unequal lengths")
  437. elif is_timedelta64_dtype(other):
  438. # let numpy handle it
  439. return other / self._data
  440. elif is_object_dtype(other):
  441. # Note: unlike in __truediv__, we do not _need_ to do type#
  442. # inference on the result. It does not raise, a numeric array
  443. # is returned. GH#23829
  444. result = [other[n] / self[n] for n in range(len(self))]
  445. return np.array(result)
  446. else:
  447. raise TypeError("Cannot divide {dtype} data by {cls}"
  448. .format(dtype=other.dtype,
  449. cls=type(self).__name__))
  450. if compat.PY2:
  451. __div__ = __truediv__
  452. __rdiv__ = __rtruediv__
  453. def __floordiv__(self, other):
  454. if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
  455. return NotImplemented
  456. other = lib.item_from_zerodim(other)
  457. if is_scalar(other):
  458. if isinstance(other, (timedelta, np.timedelta64, Tick)):
  459. other = Timedelta(other)
  460. if other is NaT:
  461. # treat this specifically as timedelta-NaT
  462. result = np.empty(self.shape, dtype=np.float64)
  463. result.fill(np.nan)
  464. return result
  465. # dispatch to Timedelta implementation
  466. result = other.__rfloordiv__(self._data)
  467. return result
  468. # at this point we should only have numeric scalars; anything
  469. # else will raise
  470. result = self.asi8 // other
  471. result[self._isnan] = iNaT
  472. freq = None
  473. if self.freq is not None:
  474. # Note: freq gets division, not floor-division
  475. freq = self.freq / other
  476. return type(self)(result.view('m8[ns]'), freq=freq)
  477. if not hasattr(other, "dtype"):
  478. # list, tuple
  479. other = np.array(other)
  480. if len(other) != len(self):
  481. raise ValueError("Cannot divide with unequal lengths")
  482. elif is_timedelta64_dtype(other):
  483. other = type(self)(other)
  484. # numpy timedelta64 does not natively support floordiv, so operate
  485. # on the i8 values
  486. result = self.asi8 // other.asi8
  487. mask = self._isnan | other._isnan
  488. if mask.any():
  489. result = result.astype(np.int64)
  490. result[mask] = np.nan
  491. return result
  492. elif is_object_dtype(other):
  493. result = [self[n] // other[n] for n in range(len(self))]
  494. result = np.array(result)
  495. if lib.infer_dtype(result, skipna=False) == 'timedelta':
  496. result, _ = sequence_to_td64ns(result)
  497. return type(self)(result)
  498. return result
  499. elif is_integer_dtype(other) or is_float_dtype(other):
  500. result = self._data // other
  501. return type(self)(result)
  502. else:
  503. dtype = getattr(other, "dtype", type(other).__name__)
  504. raise TypeError("Cannot divide {typ} by {cls}"
  505. .format(typ=dtype, cls=type(self).__name__))
  506. def __rfloordiv__(self, other):
  507. if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
  508. return NotImplemented
  509. other = lib.item_from_zerodim(other)
  510. if is_scalar(other):
  511. if isinstance(other, (timedelta, np.timedelta64, Tick)):
  512. other = Timedelta(other)
  513. if other is NaT:
  514. # treat this specifically as timedelta-NaT
  515. result = np.empty(self.shape, dtype=np.float64)
  516. result.fill(np.nan)
  517. return result
  518. # dispatch to Timedelta implementation
  519. result = other.__floordiv__(self._data)
  520. return result
  521. raise TypeError("Cannot divide {typ} by {cls}"
  522. .format(typ=type(other).__name__,
  523. cls=type(self).__name__))
  524. if not hasattr(other, "dtype"):
  525. # list, tuple
  526. other = np.array(other)
  527. if len(other) != len(self):
  528. raise ValueError("Cannot divide with unequal lengths")
  529. elif is_timedelta64_dtype(other):
  530. other = type(self)(other)
  531. # numpy timedelta64 does not natively support floordiv, so operate
  532. # on the i8 values
  533. result = other.asi8 // self.asi8
  534. mask = self._isnan | other._isnan
  535. if mask.any():
  536. result = result.astype(np.int64)
  537. result[mask] = np.nan
  538. return result
  539. elif is_object_dtype(other):
  540. result = [other[n] // self[n] for n in range(len(self))]
  541. result = np.array(result)
  542. return result
  543. else:
  544. dtype = getattr(other, "dtype", type(other).__name__)
  545. raise TypeError("Cannot divide {typ} by {cls}"
  546. .format(typ=dtype, cls=type(self).__name__))
  547. def __mod__(self, other):
  548. # Note: This is a naive implementation, can likely be optimized
  549. if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
  550. return NotImplemented
  551. other = lib.item_from_zerodim(other)
  552. if isinstance(other, (timedelta, np.timedelta64, Tick)):
  553. other = Timedelta(other)
  554. return self - (self // other) * other
  555. def __rmod__(self, other):
  556. # Note: This is a naive implementation, can likely be optimized
  557. if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
  558. return NotImplemented
  559. other = lib.item_from_zerodim(other)
  560. if isinstance(other, (timedelta, np.timedelta64, Tick)):
  561. other = Timedelta(other)
  562. return other - (other // self) * self
  563. def __divmod__(self, other):
  564. # Note: This is a naive implementation, can likely be optimized
  565. if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
  566. return NotImplemented
  567. other = lib.item_from_zerodim(other)
  568. if isinstance(other, (timedelta, np.timedelta64, Tick)):
  569. other = Timedelta(other)
  570. res1 = self // other
  571. res2 = self - res1 * other
  572. return res1, res2
  573. def __rdivmod__(self, other):
  574. # Note: This is a naive implementation, can likely be optimized
  575. if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
  576. return NotImplemented
  577. other = lib.item_from_zerodim(other)
  578. if isinstance(other, (timedelta, np.timedelta64, Tick)):
  579. other = Timedelta(other)
  580. res1 = other // self
  581. res2 = other - res1 * self
  582. return res1, res2
  583. # Note: TimedeltaIndex overrides this in call to cls._add_numeric_methods
  584. def __neg__(self):
  585. if self.freq is not None:
  586. return type(self)(-self._data, freq=-self.freq)
  587. return type(self)(-self._data)
  588. def __abs__(self):
  589. # Note: freq is not preserved
  590. return type(self)(np.abs(self._data))
  591. # ----------------------------------------------------------------
  592. # Conversion Methods - Vectorized analogues of Timedelta methods
  593. def total_seconds(self):
  594. """
  595. Return total duration of each element expressed in seconds.
  596. This method is available directly on TimedeltaArray, TimedeltaIndex
  597. and on Series containing timedelta values under the ``.dt`` namespace.
  598. Returns
  599. -------
  600. seconds : [ndarray, Float64Index, Series]
  601. When the calling object is a TimedeltaArray, the return type
  602. is ndarray. When the calling object is a TimedeltaIndex,
  603. the return type is a Float64Index. When the calling object
  604. is a Series, the return type is Series of type `float64` whose
  605. index is the same as the original.
  606. See Also
  607. --------
  608. datetime.timedelta.total_seconds : Standard library version
  609. of this method.
  610. TimedeltaIndex.components : Return a DataFrame with components of
  611. each Timedelta.
  612. Examples
  613. --------
  614. **Series**
  615. >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='d'))
  616. >>> s
  617. 0 0 days
  618. 1 1 days
  619. 2 2 days
  620. 3 3 days
  621. 4 4 days
  622. dtype: timedelta64[ns]
  623. >>> s.dt.total_seconds()
  624. 0 0.0
  625. 1 86400.0
  626. 2 172800.0
  627. 3 259200.0
  628. 4 345600.0
  629. dtype: float64
  630. **TimedeltaIndex**
  631. >>> idx = pd.to_timedelta(np.arange(5), unit='d')
  632. >>> idx
  633. TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
  634. dtype='timedelta64[ns]', freq=None)
  635. >>> idx.total_seconds()
  636. Float64Index([0.0, 86400.0, 172800.0, 259200.00000000003, 345600.0],
  637. dtype='float64')
  638. """
  639. return self._maybe_mask_results(1e-9 * self.asi8, fill_value=None)
  640. def to_pytimedelta(self):
  641. """
  642. Return Timedelta Array/Index as object ndarray of datetime.timedelta
  643. objects.
  644. Returns
  645. -------
  646. datetimes : ndarray
  647. """
  648. return tslibs.ints_to_pytimedelta(self.asi8)
  649. days = _field_accessor("days", "days",
  650. "Number of days for each element.")
  651. seconds = _field_accessor("seconds", "seconds",
  652. "Number of seconds (>= 0 and less than 1 day) "
  653. "for each element.")
  654. microseconds = _field_accessor("microseconds", "microseconds",
  655. "Number of microseconds (>= 0 and less "
  656. "than 1 second) for each element.")
  657. nanoseconds = _field_accessor("nanoseconds", "nanoseconds",
  658. "Number of nanoseconds (>= 0 and less "
  659. "than 1 microsecond) for each element.")
  660. @property
  661. def components(self):
  662. """
  663. Return a dataframe of the components (days, hours, minutes,
  664. seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas.
  665. Returns
  666. -------
  667. a DataFrame
  668. """
  669. from pandas import DataFrame
  670. columns = ['days', 'hours', 'minutes', 'seconds',
  671. 'milliseconds', 'microseconds', 'nanoseconds']
  672. hasnans = self._hasnans
  673. if hasnans:
  674. def f(x):
  675. if isna(x):
  676. return [np.nan] * len(columns)
  677. return x.components
  678. else:
  679. def f(x):
  680. return x.components
  681. result = DataFrame([f(x) for x in self], columns=columns)
  682. if not hasnans:
  683. result = result.astype('int64')
  684. return result
  685. TimedeltaArray._add_comparison_ops()
  686. # ---------------------------------------------------------------------
  687. # Constructor Helpers
  688. def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
  689. """
  690. Parameters
  691. ----------
  692. array : list-like
  693. copy : bool, default False
  694. unit : str, default "ns"
  695. The timedelta unit to treat integers as multiples of.
  696. errors : {"raise", "coerce", "ignore"}, default "raise"
  697. How to handle elements that cannot be converted to timedelta64[ns].
  698. See ``pandas.to_timedelta`` for details.
  699. Returns
  700. -------
  701. converted : numpy.ndarray
  702. The sequence converted to a numpy array with dtype ``timedelta64[ns]``.
  703. inferred_freq : Tick or None
  704. The inferred frequency of the sequence.
  705. Raises
  706. ------
  707. ValueError : Data cannot be converted to timedelta64[ns].
  708. Notes
  709. -----
  710. Unlike `pandas.to_timedelta`, if setting ``errors=ignore`` will not cause
  711. errors to be ignored; they are caught and subsequently ignored at a
  712. higher level.
  713. """
  714. inferred_freq = None
  715. unit = parse_timedelta_unit(unit)
  716. # Unwrap whatever we have into a np.ndarray
  717. if not hasattr(data, 'dtype'):
  718. # e.g. list, tuple
  719. if np.ndim(data) == 0:
  720. # i.e. generator
  721. data = list(data)
  722. data = np.array(data, copy=False)
  723. elif isinstance(data, ABCSeries):
  724. data = data._values
  725. elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArray)):
  726. inferred_freq = data.freq
  727. data = data._data
  728. # Convert whatever we have into timedelta64[ns] dtype
  729. if is_object_dtype(data.dtype) or is_string_dtype(data.dtype):
  730. # no need to make a copy, need to convert if string-dtyped
  731. data = objects_to_td64ns(data, unit=unit, errors=errors)
  732. copy = False
  733. elif is_integer_dtype(data.dtype):
  734. # treat as multiples of the given unit
  735. data, copy_made = ints_to_td64ns(data, unit=unit)
  736. copy = copy and not copy_made
  737. elif is_float_dtype(data.dtype):
  738. # cast the unit, multiply base/frace separately
  739. # to avoid precision issues from float -> int
  740. mask = np.isnan(data)
  741. m, p = precision_from_unit(unit)
  742. base = data.astype(np.int64)
  743. frac = data - base
  744. if p:
  745. frac = np.round(frac, p)
  746. data = (base * m + (frac * m).astype(np.int64)).view('timedelta64[ns]')
  747. data[mask] = iNaT
  748. copy = False
  749. elif is_timedelta64_dtype(data.dtype):
  750. if data.dtype != _TD_DTYPE:
  751. # non-nano unit
  752. # TODO: watch out for overflows
  753. data = data.astype(_TD_DTYPE)
  754. copy = False
  755. elif is_datetime64_dtype(data):
  756. # GH#23539
  757. warnings.warn("Passing datetime64-dtype data to TimedeltaIndex is "
  758. "deprecated, will raise a TypeError in a future "
  759. "version",
  760. FutureWarning, stacklevel=4)
  761. data = ensure_int64(data).view(_TD_DTYPE)
  762. else:
  763. raise TypeError("dtype {dtype} cannot be converted to timedelta64[ns]"
  764. .format(dtype=data.dtype))
  765. data = np.array(data, copy=copy)
  766. if data.ndim != 1:
  767. raise ValueError("Only 1-dimensional input arrays are supported.")
  768. assert data.dtype == 'm8[ns]', data
  769. return data, inferred_freq
  770. def ints_to_td64ns(data, unit="ns"):
  771. """
  772. Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating
  773. the integers as multiples of the given timedelta unit.
  774. Parameters
  775. ----------
  776. data : numpy.ndarray with integer-dtype
  777. unit : str, default "ns"
  778. The timedelta unit to treat integers as multiples of.
  779. Returns
  780. -------
  781. numpy.ndarray : timedelta64[ns] array converted from data
  782. bool : whether a copy was made
  783. """
  784. copy_made = False
  785. unit = unit if unit is not None else "ns"
  786. if data.dtype != np.int64:
  787. # converting to int64 makes a copy, so we can avoid
  788. # re-copying later
  789. data = data.astype(np.int64)
  790. copy_made = True
  791. if unit != "ns":
  792. dtype_str = "timedelta64[{unit}]".format(unit=unit)
  793. data = data.view(dtype_str)
  794. # TODO: watch out for overflows when converting from lower-resolution
  795. data = data.astype("timedelta64[ns]")
  796. # the astype conversion makes a copy, so we can avoid re-copying later
  797. copy_made = True
  798. else:
  799. data = data.view("timedelta64[ns]")
  800. return data, copy_made
  801. def objects_to_td64ns(data, unit="ns", errors="raise"):
  802. """
  803. Convert a object-dtyped or string-dtyped array into an
  804. timedelta64[ns]-dtyped array.
  805. Parameters
  806. ----------
  807. data : ndarray or Index
  808. unit : str, default "ns"
  809. The timedelta unit to treat integers as multiples of.
  810. errors : {"raise", "coerce", "ignore"}, default "raise"
  811. How to handle elements that cannot be converted to timedelta64[ns].
  812. See ``pandas.to_timedelta`` for details.
  813. Returns
  814. -------
  815. numpy.ndarray : timedelta64[ns] array converted from data
  816. Raises
  817. ------
  818. ValueError : Data cannot be converted to timedelta64[ns].
  819. Notes
  820. -----
  821. Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause
  822. errors to be ignored; they are caught and subsequently ignored at a
  823. higher level.
  824. """
  825. # coerce Index to np.ndarray, converting string-dtype if necessary
  826. values = np.array(data, dtype=np.object_, copy=False)
  827. result = array_to_timedelta64(values,
  828. unit=unit, errors=errors)
  829. return result.view('timedelta64[ns]')
  830. def _validate_td64_dtype(dtype):
  831. dtype = pandas_dtype(dtype)
  832. if is_dtype_equal(dtype, np.dtype("timedelta64")):
  833. dtype = _TD_DTYPE
  834. msg = textwrap.dedent("""\
  835. Passing in 'timedelta' dtype with no precision is deprecated
  836. and will raise in a future version. Please pass in
  837. 'timedelta64[ns]' instead.""")
  838. warnings.warn(msg, FutureWarning, stacklevel=4)
  839. if not is_dtype_equal(dtype, _TD_DTYPE):
  840. raise ValueError(_BAD_DTYPE.format(dtype=dtype))
  841. return dtype
  842. def _generate_regular_range(start, end, periods, offset):
  843. stride = offset.nanos
  844. if periods is None:
  845. b = Timedelta(start).value
  846. e = Timedelta(end).value
  847. e += stride - e % stride
  848. elif start is not None:
  849. b = Timedelta(start).value
  850. e = b + periods * stride
  851. elif end is not None:
  852. e = Timedelta(end).value + stride
  853. b = e - periods * stride
  854. else:
  855. raise ValueError("at least 'start' or 'end' should be specified "
  856. "if a 'period' is given.")
  857. data = np.arange(b, e, stride, dtype=np.int64)
  858. return data