datetimelike.py 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598
  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime, timedelta
  3. import operator
  4. import warnings
  5. import numpy as np
  6. from pandas._libs import NaT, algos, iNaT, lib
  7. from pandas._libs.tslibs.period import (
  8. DIFFERENT_FREQ, IncompatibleFrequency, Period)
  9. from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
  10. from pandas._libs.tslibs.timestamps import (
  11. RoundTo, maybe_integer_op_deprecated, round_nsint64)
  12. import pandas.compat as compat
  13. from pandas.compat.numpy import function as nv
  14. from pandas.errors import (
  15. AbstractMethodError, NullFrequencyError, PerformanceWarning)
  16. from pandas.util._decorators import Appender, Substitution
  17. from pandas.util._validators import validate_fillna_kwargs
  18. from pandas.core.dtypes.common import (
  19. is_categorical_dtype, is_datetime64_any_dtype, is_datetime64_dtype,
  20. is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal,
  21. is_extension_array_dtype, is_float_dtype, is_integer_dtype, is_list_like,
  22. is_object_dtype, is_offsetlike, is_period_dtype, is_string_dtype,
  23. is_timedelta64_dtype, is_unsigned_integer_dtype, pandas_dtype)
  24. from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
  25. from pandas.core.dtypes.inference import is_array_like
  26. from pandas.core.dtypes.missing import isna
  27. from pandas.core import missing, nanops
  28. from pandas.core.algorithms import (
  29. checked_add_with_arr, take, unique1d, value_counts)
  30. import pandas.core.common as com
  31. from pandas.tseries import frequencies
  32. from pandas.tseries.offsets import DateOffset, Tick
  33. from .base import ExtensionArray, ExtensionOpsMixin
  34. class AttributesMixin(object):
  35. @property
  36. def _attributes(self):
  37. # Inheriting subclass should implement _attributes as a list of strings
  38. raise AbstractMethodError(self)
  39. @classmethod
  40. def _simple_new(cls, values, **kwargs):
  41. raise AbstractMethodError(cls)
  42. def _get_attributes_dict(self):
  43. """
  44. return an attributes dict for my class
  45. """
  46. return {k: getattr(self, k, None) for k in self._attributes}
  47. @property
  48. def _scalar_type(self):
  49. # type: () -> Union[type, Tuple[type]]
  50. """The scalar associated with this datelike
  51. * PeriodArray : Period
  52. * DatetimeArray : Timestamp
  53. * TimedeltaArray : Timedelta
  54. """
  55. raise AbstractMethodError(self)
  56. def _scalar_from_string(self, value):
  57. # type: (str) -> Union[Period, Timestamp, Timedelta, NaTType]
  58. """
  59. Construct a scalar type from a string.
  60. Parameters
  61. ----------
  62. value : str
  63. Returns
  64. -------
  65. Period, Timestamp, or Timedelta, or NaT
  66. Whatever the type of ``self._scalar_type`` is.
  67. Notes
  68. -----
  69. This should call ``self._check_compatible_with`` before
  70. unboxing the result.
  71. """
  72. raise AbstractMethodError(self)
  73. def _unbox_scalar(self, value):
  74. # type: (Union[Period, Timestamp, Timedelta, NaTType]) -> int
  75. """
  76. Unbox the integer value of a scalar `value`.
  77. Parameters
  78. ----------
  79. value : Union[Period, Timestamp, Timedelta]
  80. Returns
  81. -------
  82. int
  83. Examples
  84. --------
  85. >>> self._unbox_scalar(Timedelta('10s')) # DOCTEST: +SKIP
  86. 10000000000
  87. """
  88. raise AbstractMethodError(self)
  89. def _check_compatible_with(self, other):
  90. # type: (Union[Period, Timestamp, Timedelta, NaTType]) -> None
  91. """
  92. Verify that `self` and `other` are compatible.
  93. * DatetimeArray verifies that the timezones (if any) match
  94. * PeriodArray verifies that the freq matches
  95. * Timedelta has no verification
  96. In each case, NaT is considered compatible.
  97. Parameters
  98. ----------
  99. other
  100. Raises
  101. ------
  102. Exception
  103. """
  104. raise AbstractMethodError(self)
  105. class DatelikeOps(object):
  106. """
  107. Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
  108. """
  109. @Substitution(URL="https://docs.python.org/3/library/datetime.html"
  110. "#strftime-and-strptime-behavior")
  111. def strftime(self, date_format):
  112. """
  113. Convert to Index using specified date_format.
  114. Return an Index of formatted strings specified by date_format, which
  115. supports the same string format as the python standard library. Details
  116. of the string format can be found in `python string format
  117. doc <%(URL)s>`__
  118. Parameters
  119. ----------
  120. date_format : str
  121. Date format string (e.g. "%%Y-%%m-%%d").
  122. Returns
  123. -------
  124. Index
  125. Index of formatted strings
  126. See Also
  127. --------
  128. to_datetime : Convert the given argument to datetime.
  129. DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
  130. DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
  131. DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
  132. Examples
  133. --------
  134. >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
  135. ... periods=3, freq='s')
  136. >>> rng.strftime('%%B %%d, %%Y, %%r')
  137. Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
  138. 'March 10, 2018, 09:00:02 AM'],
  139. dtype='object')
  140. """
  141. from pandas import Index
  142. return Index(self._format_native_types(date_format=date_format))
  143. class TimelikeOps(object):
  144. """
  145. Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
  146. """
  147. _round_doc = (
  148. """
  149. Perform {op} operation on the data to the specified `freq`.
  150. Parameters
  151. ----------
  152. freq : str or Offset
  153. The frequency level to {op} the index to. Must be a fixed
  154. frequency like 'S' (second) not 'ME' (month end). See
  155. :ref:`frequency aliases <timeseries.offset_aliases>` for
  156. a list of possible `freq` values.
  157. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
  158. Only relevant for DatetimeIndex:
  159. - 'infer' will attempt to infer fall dst-transition hours based on
  160. order
  161. - bool-ndarray where True signifies a DST time, False designates
  162. a non-DST time (note that this flag is only applicable for
  163. ambiguous times)
  164. - 'NaT' will return NaT where there are ambiguous times
  165. - 'raise' will raise an AmbiguousTimeError if there are ambiguous
  166. times
  167. .. versionadded:: 0.24.0
  168. nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta,
  169. default 'raise'
  170. A nonexistent time does not exist in a particular timezone
  171. where clocks moved forward due to DST.
  172. - 'shift_forward' will shift the nonexistent time forward to the
  173. closest existing time
  174. - 'shift_backward' will shift the nonexistent time backward to the
  175. closest existing time
  176. - 'NaT' will return NaT where there are nonexistent times
  177. - timedelta objects will shift nonexistent times by the timedelta
  178. - 'raise' will raise an NonExistentTimeError if there are
  179. nonexistent times
  180. .. versionadded:: 0.24.0
  181. Returns
  182. -------
  183. DatetimeIndex, TimedeltaIndex, or Series
  184. Index of the same type for a DatetimeIndex or TimedeltaIndex,
  185. or a Series with the same index for a Series.
  186. Raises
  187. ------
  188. ValueError if the `freq` cannot be converted.
  189. Examples
  190. --------
  191. **DatetimeIndex**
  192. >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
  193. >>> rng
  194. DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',
  195. '2018-01-01 12:01:00'],
  196. dtype='datetime64[ns]', freq='T')
  197. """)
  198. _round_example = (
  199. """>>> rng.round('H')
  200. DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
  201. '2018-01-01 12:00:00'],
  202. dtype='datetime64[ns]', freq=None)
  203. **Series**
  204. >>> pd.Series(rng).dt.round("H")
  205. 0 2018-01-01 12:00:00
  206. 1 2018-01-01 12:00:00
  207. 2 2018-01-01 12:00:00
  208. dtype: datetime64[ns]
  209. """)
  210. _floor_example = (
  211. """>>> rng.floor('H')
  212. DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
  213. '2018-01-01 12:00:00'],
  214. dtype='datetime64[ns]', freq=None)
  215. **Series**
  216. >>> pd.Series(rng).dt.floor("H")
  217. 0 2018-01-01 11:00:00
  218. 1 2018-01-01 12:00:00
  219. 2 2018-01-01 12:00:00
  220. dtype: datetime64[ns]
  221. """
  222. )
  223. _ceil_example = (
  224. """>>> rng.ceil('H')
  225. DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
  226. '2018-01-01 13:00:00'],
  227. dtype='datetime64[ns]', freq=None)
  228. **Series**
  229. >>> pd.Series(rng).dt.ceil("H")
  230. 0 2018-01-01 12:00:00
  231. 1 2018-01-01 12:00:00
  232. 2 2018-01-01 13:00:00
  233. dtype: datetime64[ns]
  234. """
  235. )
  236. def _round(self, freq, mode, ambiguous, nonexistent):
  237. # round the local times
  238. values = _ensure_datetimelike_to_i8(self)
  239. result = round_nsint64(values, mode, freq)
  240. result = self._maybe_mask_results(result, fill_value=NaT)
  241. dtype = self.dtype
  242. if is_datetime64tz_dtype(self):
  243. dtype = None
  244. return self._ensure_localized(
  245. self._simple_new(result, dtype=dtype), ambiguous, nonexistent
  246. )
  247. @Appender((_round_doc + _round_example).format(op="round"))
  248. def round(self, freq, ambiguous='raise', nonexistent='raise'):
  249. return self._round(
  250. freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent
  251. )
  252. @Appender((_round_doc + _floor_example).format(op="floor"))
  253. def floor(self, freq, ambiguous='raise', nonexistent='raise'):
  254. return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
  255. @Appender((_round_doc + _ceil_example).format(op="ceil"))
  256. def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
  257. return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
  258. class DatetimeLikeArrayMixin(ExtensionOpsMixin,
  259. AttributesMixin,
  260. ExtensionArray):
  261. """
  262. Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
  263. Assumes that __new__/__init__ defines:
  264. _data
  265. _freq
  266. and that the inheriting class has methods:
  267. _generate_range
  268. """
  269. @property
  270. def _box_func(self):
  271. """
  272. box function to get object from internal representation
  273. """
  274. raise AbstractMethodError(self)
  275. def _box_values(self, values):
  276. """
  277. apply box func to passed values
  278. """
  279. return lib.map_infer(values, self._box_func)
  280. def __iter__(self):
  281. return (self._box_func(v) for v in self.asi8)
  282. @property
  283. def asi8(self):
  284. # type: () -> ndarray
  285. """
  286. Integer representation of the values.
  287. Returns
  288. -------
  289. ndarray
  290. An ndarray with int64 dtype.
  291. """
  292. # do not cache or you'll create a memory leak
  293. return self._data.view('i8')
  294. @property
  295. def _ndarray_values(self):
  296. return self._data
  297. # ----------------------------------------------------------------
  298. # Rendering Methods
  299. def _format_native_types(self, na_rep='NaT', date_format=None):
  300. """
  301. Helper method for astype when converting to strings.
  302. Returns
  303. -------
  304. ndarray[str]
  305. """
  306. raise AbstractMethodError(self)
  307. def _formatter(self, boxed=False):
  308. # TODO: Remove Datetime & DatetimeTZ formatters.
  309. return "'{}'".format
  310. # ----------------------------------------------------------------
  311. # Array-Like / EA-Interface Methods
  312. @property
  313. def nbytes(self):
  314. return self._data.nbytes
  315. def __array__(self, dtype=None):
  316. # used for Timedelta/DatetimeArray, overwritten by PeriodArray
  317. if is_object_dtype(dtype):
  318. return np.array(list(self), dtype=object)
  319. return self._data
  320. @property
  321. def shape(self):
  322. return (len(self),)
  323. @property
  324. def size(self):
  325. # type: () -> int
  326. """The number of elements in this array."""
  327. return np.prod(self.shape)
  328. def __len__(self):
  329. return len(self._data)
  330. def __getitem__(self, key):
  331. """
  332. This getitem defers to the underlying array, which by-definition can
  333. only handle list-likes, slices, and integer scalars
  334. """
  335. is_int = lib.is_integer(key)
  336. if lib.is_scalar(key) and not is_int:
  337. raise IndexError("only integers, slices (`:`), ellipsis (`...`), "
  338. "numpy.newaxis (`None`) and integer or boolean "
  339. "arrays are valid indices")
  340. getitem = self._data.__getitem__
  341. if is_int:
  342. val = getitem(key)
  343. return self._box_func(val)
  344. if com.is_bool_indexer(key):
  345. key = np.asarray(key, dtype=bool)
  346. if key.all():
  347. key = slice(0, None, None)
  348. else:
  349. key = lib.maybe_booleans_to_slice(key.view(np.uint8))
  350. is_period = is_period_dtype(self)
  351. if is_period:
  352. freq = self.freq
  353. else:
  354. freq = None
  355. if isinstance(key, slice):
  356. if self.freq is not None and key.step is not None:
  357. freq = key.step * self.freq
  358. else:
  359. freq = self.freq
  360. elif key is Ellipsis:
  361. # GH#21282 indexing with Ellipsis is similar to a full slice,
  362. # should preserve `freq` attribute
  363. freq = self.freq
  364. result = getitem(key)
  365. if result.ndim > 1:
  366. # To support MPL which performs slicing with 2 dim
  367. # even though it only has 1 dim by definition
  368. if is_period:
  369. return self._simple_new(result, dtype=self.dtype, freq=freq)
  370. return result
  371. return self._simple_new(result, dtype=self.dtype, freq=freq)
  372. def __setitem__(
  373. self,
  374. key, # type: Union[int, Sequence[int], Sequence[bool], slice]
  375. value, # type: Union[NaTType, Scalar, Sequence[Scalar]]
  376. ):
  377. # type: (...) -> None
  378. # I'm fudging the types a bit here. The "Scalar" above really depends
  379. # on type(self). For PeriodArray, it's Period (or stuff coercible
  380. # to a period in from_sequence). For DatetimeArray, it's Timestamp...
  381. # I don't know if mypy can do that, possibly with Generics.
  382. # https://mypy.readthedocs.io/en/latest/generics.html
  383. if is_list_like(value):
  384. is_slice = isinstance(key, slice)
  385. if lib.is_scalar(key):
  386. raise ValueError("setting an array element with a sequence.")
  387. if (not is_slice
  388. and len(key) != len(value)
  389. and not com.is_bool_indexer(key)):
  390. msg = ("shape mismatch: value array of length '{}' does not "
  391. "match indexing result of length '{}'.")
  392. raise ValueError(msg.format(len(key), len(value)))
  393. if not is_slice and len(key) == 0:
  394. return
  395. value = type(self)._from_sequence(value, dtype=self.dtype)
  396. self._check_compatible_with(value)
  397. value = value.asi8
  398. elif isinstance(value, self._scalar_type):
  399. self._check_compatible_with(value)
  400. value = self._unbox_scalar(value)
  401. elif isna(value) or value == iNaT:
  402. value = iNaT
  403. else:
  404. msg = (
  405. "'value' should be a '{scalar}', 'NaT', or array of those. "
  406. "Got '{typ}' instead."
  407. )
  408. raise TypeError(msg.format(scalar=self._scalar_type.__name__,
  409. typ=type(value).__name__))
  410. self._data[key] = value
  411. self._maybe_clear_freq()
  412. def _maybe_clear_freq(self):
  413. # inplace operations like __setitem__ may invalidate the freq of
  414. # DatetimeArray and TimedeltaArray
  415. pass
  416. def astype(self, dtype, copy=True):
  417. # Some notes on cases we don't have to handle here in the base class:
  418. # 1. PeriodArray.astype handles period -> period
  419. # 2. DatetimeArray.astype handles conversion between tz.
  420. # 3. DatetimeArray.astype handles datetime -> period
  421. from pandas import Categorical
  422. dtype = pandas_dtype(dtype)
  423. if is_object_dtype(dtype):
  424. return self._box_values(self.asi8)
  425. elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
  426. return self._format_native_types()
  427. elif is_integer_dtype(dtype):
  428. # we deliberately ignore int32 vs. int64 here.
  429. # See https://github.com/pandas-dev/pandas/issues/24381 for more.
  430. values = self.asi8
  431. if is_unsigned_integer_dtype(dtype):
  432. # Again, we ignore int32 vs. int64
  433. values = values.view("uint64")
  434. if copy:
  435. values = values.copy()
  436. return values
  437. elif (is_datetime_or_timedelta_dtype(dtype) and
  438. not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
  439. # disallow conversion between datetime/timedelta,
  440. # and conversions for any datetimelike to float
  441. msg = 'Cannot cast {name} to dtype {dtype}'
  442. raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
  443. elif is_categorical_dtype(dtype):
  444. return Categorical(self, dtype=dtype)
  445. else:
  446. return np.asarray(self, dtype=dtype)
  447. def view(self, dtype=None):
  448. """
  449. New view on this array with the same data.
  450. Parameters
  451. ----------
  452. dtype : numpy dtype, optional
  453. Returns
  454. -------
  455. ndarray
  456. With the specified `dtype`.
  457. """
  458. return self._data.view(dtype=dtype)
  459. # ------------------------------------------------------------------
  460. # ExtensionArray Interface
  461. def unique(self):
  462. result = unique1d(self.asi8)
  463. return type(self)(result, dtype=self.dtype)
  464. def _validate_fill_value(self, fill_value):
  465. """
  466. If a fill_value is passed to `take` convert it to an i8 representation,
  467. raising ValueError if this is not possible.
  468. Parameters
  469. ----------
  470. fill_value : object
  471. Returns
  472. -------
  473. fill_value : np.int64
  474. Raises
  475. ------
  476. ValueError
  477. """
  478. raise AbstractMethodError(self)
  479. def take(self, indices, allow_fill=False, fill_value=None):
  480. if allow_fill:
  481. fill_value = self._validate_fill_value(fill_value)
  482. new_values = take(self.asi8,
  483. indices,
  484. allow_fill=allow_fill,
  485. fill_value=fill_value)
  486. return type(self)(new_values, dtype=self.dtype)
  487. @classmethod
  488. def _concat_same_type(cls, to_concat):
  489. dtypes = {x.dtype for x in to_concat}
  490. assert len(dtypes) == 1
  491. dtype = list(dtypes)[0]
  492. values = np.concatenate([x.asi8 for x in to_concat])
  493. return cls(values, dtype=dtype)
  494. def copy(self, deep=False):
  495. values = self.asi8.copy()
  496. return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq)
  497. def _values_for_factorize(self):
  498. return self.asi8, iNaT
  499. @classmethod
  500. def _from_factorized(cls, values, original):
  501. return cls(values, dtype=original.dtype)
  502. def _values_for_argsort(self):
  503. return self._data
  504. # ------------------------------------------------------------------
  505. # Additional array methods
  506. # These are not part of the EA API, but we implement them because
  507. # pandas assumes they're there.
  508. def searchsorted(self, value, side='left', sorter=None):
  509. """
  510. Find indices where elements should be inserted to maintain order.
  511. Find the indices into a sorted array `self` such that, if the
  512. corresponding elements in `value` were inserted before the indices,
  513. the order of `self` would be preserved.
  514. Parameters
  515. ----------
  516. value : array_like
  517. Values to insert into `self`.
  518. side : {'left', 'right'}, optional
  519. If 'left', the index of the first suitable location found is given.
  520. If 'right', return the last such index. If there is no suitable
  521. index, return either 0 or N (where N is the length of `self`).
  522. sorter : 1-D array_like, optional
  523. Optional array of integer indices that sort `self` into ascending
  524. order. They are typically the result of ``np.argsort``.
  525. Returns
  526. -------
  527. indices : array of ints
  528. Array of insertion points with the same shape as `value`.
  529. """
  530. if isinstance(value, compat.string_types):
  531. value = self._scalar_from_string(value)
  532. if not (isinstance(value, (self._scalar_type, type(self)))
  533. or isna(value)):
  534. raise ValueError("Unexpected type for 'value': {valtype}"
  535. .format(valtype=type(value)))
  536. self._check_compatible_with(value)
  537. if isinstance(value, type(self)):
  538. value = value.asi8
  539. else:
  540. value = self._unbox_scalar(value)
  541. return self.asi8.searchsorted(value, side=side, sorter=sorter)
  542. def repeat(self, repeats, *args, **kwargs):
  543. """
  544. Repeat elements of an array.
  545. See Also
  546. --------
  547. numpy.ndarray.repeat
  548. """
  549. nv.validate_repeat(args, kwargs)
  550. values = self._data.repeat(repeats)
  551. return type(self)(values.view('i8'), dtype=self.dtype)
  552. def value_counts(self, dropna=False):
  553. """
  554. Return a Series containing counts of unique values.
  555. Parameters
  556. ----------
  557. dropna : boolean, default True
  558. Don't include counts of NaT values.
  559. Returns
  560. -------
  561. Series
  562. """
  563. from pandas import Series, Index
  564. if dropna:
  565. values = self[~self.isna()]._data
  566. else:
  567. values = self._data
  568. cls = type(self)
  569. result = value_counts(values, sort=False, dropna=dropna)
  570. index = Index(cls(result.index.view('i8'), dtype=self.dtype),
  571. name=result.index.name)
  572. return Series(result.values, index=index, name=result.name)
  573. def map(self, mapper):
  574. # TODO(GH-23179): Add ExtensionArray.map
  575. # Need to figure out if we want ExtensionArray.map first.
  576. # If so, then we can refactor IndexOpsMixin._map_values to
  577. # a standalone function and call from here..
  578. # Else, just rewrite _map_infer_values to do the right thing.
  579. from pandas import Index
  580. return Index(self).map(mapper).array
  581. # ------------------------------------------------------------------
  582. # Null Handling
  583. def isna(self):
  584. return self._isnan
  585. @property # NB: override with cache_readonly in immutable subclasses
  586. def _isnan(self):
  587. """
  588. return if each value is nan
  589. """
  590. return (self.asi8 == iNaT)
  591. @property # NB: override with cache_readonly in immutable subclasses
  592. def _hasnans(self):
  593. """
  594. return if I have any nans; enables various perf speedups
  595. """
  596. return bool(self._isnan.any())
  597. def _maybe_mask_results(self, result, fill_value=iNaT, convert=None):
  598. """
  599. Parameters
  600. ----------
  601. result : a ndarray
  602. fill_value : object, default iNaT
  603. convert : string/dtype or None
  604. Returns
  605. -------
  606. result : ndarray with values replace by the fill_value
  607. mask the result if needed, convert to the provided dtype if its not
  608. None
  609. This is an internal routine
  610. """
  611. if self._hasnans:
  612. if convert:
  613. result = result.astype(convert)
  614. if fill_value is None:
  615. fill_value = np.nan
  616. result[self._isnan] = fill_value
  617. return result
  618. def fillna(self, value=None, method=None, limit=None):
  619. # TODO(GH-20300): remove this
  620. # Just overriding to ensure that we avoid an astype(object).
  621. # Either 20300 or a `_values_for_fillna` would avoid this duplication.
  622. if isinstance(value, ABCSeries):
  623. value = value.array
  624. value, method = validate_fillna_kwargs(value, method)
  625. mask = self.isna()
  626. if is_array_like(value):
  627. if len(value) != len(self):
  628. raise ValueError("Length of 'value' does not match. Got ({}) "
  629. " expected {}".format(len(value), len(self)))
  630. value = value[mask]
  631. if mask.any():
  632. if method is not None:
  633. if method == 'pad':
  634. func = missing.pad_1d
  635. else:
  636. func = missing.backfill_1d
  637. values = self._data
  638. if not is_period_dtype(self):
  639. # For PeriodArray self._data is i8, which gets copied
  640. # by `func`. Otherwise we need to make a copy manually
  641. # to avoid modifying `self` in-place.
  642. values = values.copy()
  643. new_values = func(values, limit=limit,
  644. mask=mask)
  645. if is_datetime64tz_dtype(self):
  646. # we need to pass int64 values to the constructor to avoid
  647. # re-localizing incorrectly
  648. new_values = new_values.view("i8")
  649. new_values = type(self)(new_values, dtype=self.dtype)
  650. else:
  651. # fill with value
  652. new_values = self.copy()
  653. new_values[mask] = value
  654. else:
  655. new_values = self.copy()
  656. return new_values
  657. # ------------------------------------------------------------------
  658. # Frequency Properties/Methods
  659. @property
  660. def freq(self):
  661. """
  662. Return the frequency object if it is set, otherwise None.
  663. """
  664. return self._freq
  665. @freq.setter
  666. def freq(self, value):
  667. if value is not None:
  668. value = frequencies.to_offset(value)
  669. self._validate_frequency(self, value)
  670. self._freq = value
  671. @property
  672. def freqstr(self):
  673. """
  674. Return the frequency object as a string if its set, otherwise None
  675. """
  676. if self.freq is None:
  677. return None
  678. return self.freq.freqstr
  679. @property # NB: override with cache_readonly in immutable subclasses
  680. def inferred_freq(self):
  681. """
  682. Tryies to return a string representing a frequency guess,
  683. generated by infer_freq. Returns None if it can't autodetect the
  684. frequency.
  685. """
  686. try:
  687. return frequencies.infer_freq(self)
  688. except ValueError:
  689. return None
  690. @property # NB: override with cache_readonly in immutable subclasses
  691. def _resolution(self):
  692. return frequencies.Resolution.get_reso_from_freq(self.freqstr)
  693. @property # NB: override with cache_readonly in immutable subclasses
  694. def resolution(self):
  695. """
  696. Returns day, hour, minute, second, millisecond or microsecond
  697. """
  698. return frequencies.Resolution.get_str(self._resolution)
  699. @classmethod
  700. def _validate_frequency(cls, index, freq, **kwargs):
  701. """
  702. Validate that a frequency is compatible with the values of a given
  703. Datetime Array/Index or Timedelta Array/Index
  704. Parameters
  705. ----------
  706. index : DatetimeIndex or TimedeltaIndex
  707. The index on which to determine if the given frequency is valid
  708. freq : DateOffset
  709. The frequency to validate
  710. """
  711. if is_period_dtype(cls):
  712. # Frequency validation is not meaningful for Period Array/Index
  713. return None
  714. inferred = index.inferred_freq
  715. if index.size == 0 or inferred == freq.freqstr:
  716. return None
  717. try:
  718. on_freq = cls._generate_range(start=index[0], end=None,
  719. periods=len(index), freq=freq,
  720. **kwargs)
  721. if not np.array_equal(index.asi8, on_freq.asi8):
  722. raise ValueError
  723. except ValueError as e:
  724. if "non-fixed" in str(e):
  725. # non-fixed frequencies are not meaningful for timedelta64;
  726. # we retain that error message
  727. raise e
  728. # GH#11587 the main way this is reached is if the `np.array_equal`
  729. # check above is False. This can also be reached if index[0]
  730. # is `NaT`, in which case the call to `cls._generate_range` will
  731. # raise a ValueError, which we re-raise with a more targeted
  732. # message.
  733. raise ValueError('Inferred frequency {infer} from passed values '
  734. 'does not conform to passed frequency {passed}'
  735. .format(infer=inferred, passed=freq.freqstr))
  736. # monotonicity/uniqueness properties are called via frequencies.infer_freq,
  737. # see GH#23789
  738. @property
  739. def _is_monotonic_increasing(self):
  740. return algos.is_monotonic(self.asi8, timelike=True)[0]
  741. @property
  742. def _is_monotonic_decreasing(self):
  743. return algos.is_monotonic(self.asi8, timelike=True)[1]
  744. @property
  745. def _is_unique(self):
  746. return len(unique1d(self.asi8)) == len(self)
  747. # ------------------------------------------------------------------
  748. # Arithmetic Methods
  749. def _add_datetimelike_scalar(self, other):
  750. # Overriden by TimedeltaArray
  751. raise TypeError("cannot add {cls} and {typ}"
  752. .format(cls=type(self).__name__,
  753. typ=type(other).__name__))
  754. _add_datetime_arraylike = _add_datetimelike_scalar
  755. def _sub_datetimelike_scalar(self, other):
  756. # Overridden by DatetimeArray
  757. assert other is not NaT
  758. raise TypeError("cannot subtract a datelike from a {cls}"
  759. .format(cls=type(self).__name__))
  760. _sub_datetime_arraylike = _sub_datetimelike_scalar
  761. def _sub_period(self, other):
  762. # Overriden by PeriodArray
  763. raise TypeError("cannot subtract Period from a {cls}"
  764. .format(cls=type(self).__name__))
  765. def _add_offset(self, offset):
  766. raise AbstractMethodError(self)
  767. def _add_delta(self, other):
  768. """
  769. Add a timedelta-like, Tick or TimedeltaIndex-like object
  770. to self, yielding an int64 numpy array
  771. Parameters
  772. ----------
  773. delta : {timedelta, np.timedelta64, Tick,
  774. TimedeltaIndex, ndarray[timedelta64]}
  775. Returns
  776. -------
  777. result : ndarray[int64]
  778. Notes
  779. -----
  780. The result's name is set outside of _add_delta by the calling
  781. method (__add__ or __sub__), if necessary (i.e. for Indexes).
  782. """
  783. if isinstance(other, (Tick, timedelta, np.timedelta64)):
  784. new_values = self._add_timedeltalike_scalar(other)
  785. elif is_timedelta64_dtype(other):
  786. # ndarray[timedelta64] or TimedeltaArray/index
  787. new_values = self._add_delta_tdi(other)
  788. return new_values
  789. def _add_timedeltalike_scalar(self, other):
  790. """
  791. Add a delta of a timedeltalike
  792. return the i8 result view
  793. """
  794. if isna(other):
  795. # i.e np.timedelta64("NaT"), not recognized by delta_to_nanoseconds
  796. new_values = np.empty(len(self), dtype='i8')
  797. new_values[:] = iNaT
  798. return new_values
  799. inc = delta_to_nanoseconds(other)
  800. new_values = checked_add_with_arr(self.asi8, inc,
  801. arr_mask=self._isnan).view('i8')
  802. new_values = self._maybe_mask_results(new_values)
  803. return new_values.view('i8')
  804. def _add_delta_tdi(self, other):
  805. """
  806. Add a delta of a TimedeltaIndex
  807. return the i8 result view
  808. """
  809. if len(self) != len(other):
  810. raise ValueError("cannot add indices of unequal length")
  811. if isinstance(other, np.ndarray):
  812. # ndarray[timedelta64]; wrap in TimedeltaIndex for op
  813. from pandas import TimedeltaIndex
  814. other = TimedeltaIndex(other)
  815. self_i8 = self.asi8
  816. other_i8 = other.asi8
  817. new_values = checked_add_with_arr(self_i8, other_i8,
  818. arr_mask=self._isnan,
  819. b_mask=other._isnan)
  820. if self._hasnans or other._hasnans:
  821. mask = (self._isnan) | (other._isnan)
  822. new_values[mask] = iNaT
  823. return new_values.view('i8')
  824. def _add_nat(self):
  825. """
  826. Add pd.NaT to self
  827. """
  828. if is_period_dtype(self):
  829. raise TypeError('Cannot add {cls} and {typ}'
  830. .format(cls=type(self).__name__,
  831. typ=type(NaT).__name__))
  832. # GH#19124 pd.NaT is treated like a timedelta for both timedelta
  833. # and datetime dtypes
  834. result = np.zeros(len(self), dtype=np.int64)
  835. result.fill(iNaT)
  836. return type(self)(result, dtype=self.dtype, freq=None)
  837. def _sub_nat(self):
  838. """
  839. Subtract pd.NaT from self
  840. """
  841. # GH#19124 Timedelta - datetime is not in general well-defined.
  842. # We make an exception for pd.NaT, which in this case quacks
  843. # like a timedelta.
  844. # For datetime64 dtypes by convention we treat NaT as a datetime, so
  845. # this subtraction returns a timedelta64 dtype.
  846. # For period dtype, timedelta64 is a close-enough return dtype.
  847. result = np.zeros(len(self), dtype=np.int64)
  848. result.fill(iNaT)
  849. return result.view('timedelta64[ns]')
  850. def _sub_period_array(self, other):
  851. """
  852. Subtract a Period Array/Index from self. This is only valid if self
  853. is itself a Period Array/Index, raises otherwise. Both objects must
  854. have the same frequency.
  855. Parameters
  856. ----------
  857. other : PeriodIndex or PeriodArray
  858. Returns
  859. -------
  860. result : np.ndarray[object]
  861. Array of DateOffset objects; nulls represented by NaT
  862. """
  863. if not is_period_dtype(self):
  864. raise TypeError("cannot subtract {dtype}-dtype from {cls}"
  865. .format(dtype=other.dtype,
  866. cls=type(self).__name__))
  867. if len(self) != len(other):
  868. raise ValueError("cannot subtract arrays/indices of "
  869. "unequal length")
  870. if self.freq != other.freq:
  871. msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
  872. own_freq=self.freqstr,
  873. other_freq=other.freqstr)
  874. raise IncompatibleFrequency(msg)
  875. new_values = checked_add_with_arr(self.asi8, -other.asi8,
  876. arr_mask=self._isnan,
  877. b_mask=other._isnan)
  878. new_values = np.array([self.freq.base * x for x in new_values])
  879. if self._hasnans or other._hasnans:
  880. mask = (self._isnan) | (other._isnan)
  881. new_values[mask] = NaT
  882. return new_values
  883. def _addsub_int_array(self, other, op):
  884. """
  885. Add or subtract array-like of integers equivalent to applying
  886. `_time_shift` pointwise.
  887. Parameters
  888. ----------
  889. other : Index, ExtensionArray, np.ndarray
  890. integer-dtype
  891. op : {operator.add, operator.sub}
  892. Returns
  893. -------
  894. result : same class as self
  895. """
  896. # _addsub_int_array is overriden by PeriodArray
  897. assert not is_period_dtype(self)
  898. assert op in [operator.add, operator.sub]
  899. if self.freq is None:
  900. # GH#19123
  901. raise NullFrequencyError("Cannot shift with no freq")
  902. elif isinstance(self.freq, Tick):
  903. # easy case where we can convert to timedelta64 operation
  904. td = Timedelta(self.freq)
  905. return op(self, td * other)
  906. # We should only get here with DatetimeIndex; dispatch
  907. # to _addsub_offset_array
  908. assert not is_timedelta64_dtype(self)
  909. return op(self, np.array(other) * self.freq)
  910. def _addsub_offset_array(self, other, op):
  911. """
  912. Add or subtract array-like of DateOffset objects
  913. Parameters
  914. ----------
  915. other : Index, np.ndarray
  916. object-dtype containing pd.DateOffset objects
  917. op : {operator.add, operator.sub}
  918. Returns
  919. -------
  920. result : same class as self
  921. """
  922. assert op in [operator.add, operator.sub]
  923. if len(other) == 1:
  924. return op(self, other[0])
  925. warnings.warn("Adding/subtracting array of DateOffsets to "
  926. "{cls} not vectorized"
  927. .format(cls=type(self).__name__), PerformanceWarning)
  928. # For EA self.astype('O') returns a numpy array, not an Index
  929. left = lib.values_from_object(self.astype('O'))
  930. res_values = op(left, np.array(other))
  931. kwargs = {}
  932. if not is_period_dtype(self):
  933. kwargs['freq'] = 'infer'
  934. return self._from_sequence(res_values, **kwargs)
  935. def _time_shift(self, periods, freq=None):
  936. """
  937. Shift each value by `periods`.
  938. Note this is different from ExtensionArray.shift, which
  939. shifts the *position* of each element, padding the end with
  940. missing values.
  941. Parameters
  942. ----------
  943. periods : int
  944. Number of periods to shift by.
  945. freq : pandas.DateOffset, pandas.Timedelta, or string
  946. Frequency increment to shift by.
  947. """
  948. if freq is not None and freq != self.freq:
  949. if isinstance(freq, compat.string_types):
  950. freq = frequencies.to_offset(freq)
  951. offset = periods * freq
  952. result = self + offset
  953. return result
  954. if periods == 0:
  955. # immutable so OK
  956. return self.copy()
  957. if self.freq is None:
  958. raise NullFrequencyError("Cannot shift with no freq")
  959. start = self[0] + periods * self.freq
  960. end = self[-1] + periods * self.freq
  961. # Note: in the DatetimeTZ case, _generate_range will infer the
  962. # appropriate timezone from `start` and `end`, so tz does not need
  963. # to be passed explicitly.
  964. return self._generate_range(start=start, end=end, periods=None,
  965. freq=self.freq)
  966. def __add__(self, other):
  967. other = lib.item_from_zerodim(other)
  968. if isinstance(other, (ABCSeries, ABCDataFrame)):
  969. return NotImplemented
  970. # scalar others
  971. elif other is NaT:
  972. result = self._add_nat()
  973. elif isinstance(other, (Tick, timedelta, np.timedelta64)):
  974. result = self._add_delta(other)
  975. elif isinstance(other, DateOffset):
  976. # specifically _not_ a Tick
  977. result = self._add_offset(other)
  978. elif isinstance(other, (datetime, np.datetime64)):
  979. result = self._add_datetimelike_scalar(other)
  980. elif lib.is_integer(other):
  981. # This check must come after the check for np.timedelta64
  982. # as is_integer returns True for these
  983. if not is_period_dtype(self):
  984. maybe_integer_op_deprecated(self)
  985. result = self._time_shift(other)
  986. # array-like others
  987. elif is_timedelta64_dtype(other):
  988. # TimedeltaIndex, ndarray[timedelta64]
  989. result = self._add_delta(other)
  990. elif is_offsetlike(other):
  991. # Array/Index of DateOffset objects
  992. result = self._addsub_offset_array(other, operator.add)
  993. elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
  994. # DatetimeIndex, ndarray[datetime64]
  995. return self._add_datetime_arraylike(other)
  996. elif is_integer_dtype(other):
  997. if not is_period_dtype(self):
  998. maybe_integer_op_deprecated(self)
  999. result = self._addsub_int_array(other, operator.add)
  1000. elif is_float_dtype(other):
  1001. # Explicitly catch invalid dtypes
  1002. raise TypeError("cannot add {dtype}-dtype to {cls}"
  1003. .format(dtype=other.dtype,
  1004. cls=type(self).__name__))
  1005. elif is_period_dtype(other):
  1006. # if self is a TimedeltaArray and other is a PeriodArray with
  1007. # a timedelta-like (i.e. Tick) freq, this operation is valid.
  1008. # Defer to the PeriodArray implementation.
  1009. # In remaining cases, this will end up raising TypeError.
  1010. return NotImplemented
  1011. elif is_extension_array_dtype(other):
  1012. # Categorical op will raise; defer explicitly
  1013. return NotImplemented
  1014. else: # pragma: no cover
  1015. return NotImplemented
  1016. if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
  1017. from pandas.core.arrays import TimedeltaArray
  1018. # TODO: infer freq?
  1019. return TimedeltaArray(result)
  1020. return result
  1021. def __radd__(self, other):
  1022. # alias for __add__
  1023. return self.__add__(other)
  1024. def __sub__(self, other):
  1025. other = lib.item_from_zerodim(other)
  1026. if isinstance(other, (ABCSeries, ABCDataFrame)):
  1027. return NotImplemented
  1028. # scalar others
  1029. elif other is NaT:
  1030. result = self._sub_nat()
  1031. elif isinstance(other, (Tick, timedelta, np.timedelta64)):
  1032. result = self._add_delta(-other)
  1033. elif isinstance(other, DateOffset):
  1034. # specifically _not_ a Tick
  1035. result = self._add_offset(-other)
  1036. elif isinstance(other, (datetime, np.datetime64)):
  1037. result = self._sub_datetimelike_scalar(other)
  1038. elif lib.is_integer(other):
  1039. # This check must come after the check for np.timedelta64
  1040. # as is_integer returns True for these
  1041. if not is_period_dtype(self):
  1042. maybe_integer_op_deprecated(self)
  1043. result = self._time_shift(-other)
  1044. elif isinstance(other, Period):
  1045. result = self._sub_period(other)
  1046. # array-like others
  1047. elif is_timedelta64_dtype(other):
  1048. # TimedeltaIndex, ndarray[timedelta64]
  1049. result = self._add_delta(-other)
  1050. elif is_offsetlike(other):
  1051. # Array/Index of DateOffset objects
  1052. result = self._addsub_offset_array(other, operator.sub)
  1053. elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
  1054. # DatetimeIndex, ndarray[datetime64]
  1055. result = self._sub_datetime_arraylike(other)
  1056. elif is_period_dtype(other):
  1057. # PeriodIndex
  1058. result = self._sub_period_array(other)
  1059. elif is_integer_dtype(other):
  1060. if not is_period_dtype(self):
  1061. maybe_integer_op_deprecated(self)
  1062. result = self._addsub_int_array(other, operator.sub)
  1063. elif isinstance(other, ABCIndexClass):
  1064. raise TypeError("cannot subtract {cls} and {typ}"
  1065. .format(cls=type(self).__name__,
  1066. typ=type(other).__name__))
  1067. elif is_float_dtype(other):
  1068. # Explicitly catch invalid dtypes
  1069. raise TypeError("cannot subtract {dtype}-dtype from {cls}"
  1070. .format(dtype=other.dtype,
  1071. cls=type(self).__name__))
  1072. elif is_extension_array_dtype(other):
  1073. # Categorical op will raise; defer explicitly
  1074. return NotImplemented
  1075. else: # pragma: no cover
  1076. return NotImplemented
  1077. if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
  1078. from pandas.core.arrays import TimedeltaArray
  1079. # TODO: infer freq?
  1080. return TimedeltaArray(result)
  1081. return result
  1082. def __rsub__(self, other):
  1083. if is_datetime64_dtype(other) and is_timedelta64_dtype(self):
  1084. # ndarray[datetime64] cannot be subtracted from self, so
  1085. # we need to wrap in DatetimeArray/Index and flip the operation
  1086. if not isinstance(other, DatetimeLikeArrayMixin):
  1087. # Avoid down-casting DatetimeIndex
  1088. from pandas.core.arrays import DatetimeArray
  1089. other = DatetimeArray(other)
  1090. return other - self
  1091. elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and
  1092. not is_datetime64_any_dtype(other)):
  1093. # GH#19959 datetime - datetime is well-defined as timedelta,
  1094. # but any other type - datetime is not well-defined.
  1095. raise TypeError("cannot subtract {cls} from {typ}"
  1096. .format(cls=type(self).__name__,
  1097. typ=type(other).__name__))
  1098. elif is_period_dtype(self) and is_timedelta64_dtype(other):
  1099. # TODO: Can we simplify/generalize these cases at all?
  1100. raise TypeError("cannot subtract {cls} from {dtype}"
  1101. .format(cls=type(self).__name__,
  1102. dtype=other.dtype))
  1103. return -(self - other)
  1104. # FIXME: DTA/TDA/PA inplace methods should actually be inplace, GH#24115
  1105. def __iadd__(self, other):
  1106. # alias for __add__
  1107. return self.__add__(other)
  1108. def __isub__(self, other):
  1109. # alias for __sub__
  1110. return self.__sub__(other)
  1111. # --------------------------------------------------------------
  1112. # Comparison Methods
  1113. def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise',
  1114. from_utc=False):
  1115. """
  1116. Ensure that we are re-localized.
  1117. This is for compat as we can then call this on all datetimelike
  1118. arrays generally (ignored for Period/Timedelta)
  1119. Parameters
  1120. ----------
  1121. arg : Union[DatetimeLikeArray, DatetimeIndexOpsMixin, ndarray]
  1122. ambiguous : str, bool, or bool-ndarray, default 'raise'
  1123. nonexistent : str, default 'raise'
  1124. from_utc : bool, default False
  1125. If True, localize the i8 ndarray to UTC first before converting to
  1126. the appropriate tz. If False, localize directly to the tz.
  1127. Returns
  1128. -------
  1129. localized array
  1130. """
  1131. # reconvert to local tz
  1132. tz = getattr(self, 'tz', None)
  1133. if tz is not None:
  1134. if not isinstance(arg, type(self)):
  1135. arg = self._simple_new(arg)
  1136. if from_utc:
  1137. arg = arg.tz_localize('UTC').tz_convert(self.tz)
  1138. else:
  1139. arg = arg.tz_localize(
  1140. self.tz, ambiguous=ambiguous, nonexistent=nonexistent
  1141. )
  1142. return arg
  1143. # --------------------------------------------------------------
  1144. # Reductions
  1145. def _reduce(self, name, axis=0, skipna=True, **kwargs):
  1146. op = getattr(self, name, None)
  1147. if op:
  1148. return op(axis=axis, skipna=skipna, **kwargs)
  1149. else:
  1150. return super(DatetimeLikeArrayMixin, self)._reduce(
  1151. name, skipna, **kwargs
  1152. )
  1153. def min(self, axis=None, skipna=True, *args, **kwargs):
  1154. """
  1155. Return the minimum value of the Array or minimum along
  1156. an axis.
  1157. See Also
  1158. --------
  1159. numpy.ndarray.min
  1160. Index.min : Return the minimum value in an Index.
  1161. Series.min : Return the minimum value in a Series.
  1162. """
  1163. nv.validate_min(args, kwargs)
  1164. nv.validate_minmax_axis(axis)
  1165. result = nanops.nanmin(self.asi8, skipna=skipna, mask=self.isna())
  1166. if isna(result):
  1167. # Period._from_ordinal does not handle np.nan gracefully
  1168. return NaT
  1169. return self._box_func(result)
  1170. def max(self, axis=None, skipna=True, *args, **kwargs):
  1171. """
  1172. Return the maximum value of the Array or maximum along
  1173. an axis.
  1174. See Also
  1175. --------
  1176. numpy.ndarray.max
  1177. Index.max : Return the maximum value in an Index.
  1178. Series.max : Return the maximum value in a Series.
  1179. """
  1180. # TODO: skipna is broken with max.
  1181. # See https://github.com/pandas-dev/pandas/issues/24265
  1182. nv.validate_max(args, kwargs)
  1183. nv.validate_minmax_axis(axis)
  1184. mask = self.isna()
  1185. if skipna:
  1186. values = self[~mask].asi8
  1187. elif mask.any():
  1188. return NaT
  1189. else:
  1190. values = self.asi8
  1191. if not len(values):
  1192. # short-circut for empty max / min
  1193. return NaT
  1194. result = nanops.nanmax(values, skipna=skipna)
  1195. # Don't have to worry about NA `result`, since no NA went in.
  1196. return self._box_func(result)
  1197. # -------------------------------------------------------------------
  1198. # Shared Constructor Helpers
  1199. def validate_periods(periods):
  1200. """
  1201. If a `periods` argument is passed to the Datetime/Timedelta Array/Index
  1202. constructor, cast it to an integer.
  1203. Parameters
  1204. ----------
  1205. periods : None, float, int
  1206. Returns
  1207. -------
  1208. periods : None or int
  1209. Raises
  1210. ------
  1211. TypeError
  1212. if periods is None, float, or int
  1213. """
  1214. if periods is not None:
  1215. if lib.is_float(periods):
  1216. periods = int(periods)
  1217. elif not lib.is_integer(periods):
  1218. raise TypeError('periods must be a number, got {periods}'
  1219. .format(periods=periods))
  1220. return periods
  1221. def validate_endpoints(closed):
  1222. """
  1223. Check that the `closed` argument is among [None, "left", "right"]
  1224. Parameters
  1225. ----------
  1226. closed : {None, "left", "right"}
  1227. Returns
  1228. -------
  1229. left_closed : bool
  1230. right_closed : bool
  1231. Raises
  1232. ------
  1233. ValueError : if argument is not among valid values
  1234. """
  1235. left_closed = False
  1236. right_closed = False
  1237. if closed is None:
  1238. left_closed = True
  1239. right_closed = True
  1240. elif closed == "left":
  1241. left_closed = True
  1242. elif closed == "right":
  1243. right_closed = True
  1244. else:
  1245. raise ValueError("Closed has to be either 'left', 'right' or None")
  1246. return left_closed, right_closed
  1247. def validate_inferred_freq(freq, inferred_freq, freq_infer):
  1248. """
  1249. If the user passes a freq and another freq is inferred from passed data,
  1250. require that they match.
  1251. Parameters
  1252. ----------
  1253. freq : DateOffset or None
  1254. inferred_freq : DateOffset or None
  1255. freq_infer : bool
  1256. Returns
  1257. -------
  1258. freq : DateOffset or None
  1259. freq_infer : bool
  1260. Notes
  1261. -----
  1262. We assume at this point that `maybe_infer_freq` has been called, so
  1263. `freq` is either a DateOffset object or None.
  1264. """
  1265. if inferred_freq is not None:
  1266. if freq is not None and freq != inferred_freq:
  1267. raise ValueError('Inferred frequency {inferred} from passed '
  1268. 'values does not conform to passed frequency '
  1269. '{passed}'
  1270. .format(inferred=inferred_freq,
  1271. passed=freq.freqstr))
  1272. elif freq is None:
  1273. freq = inferred_freq
  1274. freq_infer = False
  1275. return freq, freq_infer
  1276. def maybe_infer_freq(freq):
  1277. """
  1278. Comparing a DateOffset to the string "infer" raises, so we need to
  1279. be careful about comparisons. Make a dummy variable `freq_infer` to
  1280. signify the case where the given freq is "infer" and set freq to None
  1281. to avoid comparison trouble later on.
  1282. Parameters
  1283. ----------
  1284. freq : {DateOffset, None, str}
  1285. Returns
  1286. -------
  1287. freq : {DateOffset, None}
  1288. freq_infer : bool
  1289. """
  1290. freq_infer = False
  1291. if not isinstance(freq, DateOffset):
  1292. # if a passed freq is None, don't infer automatically
  1293. if freq != 'infer':
  1294. freq = frequencies.to_offset(freq)
  1295. else:
  1296. freq_infer = True
  1297. freq = None
  1298. return freq, freq_infer
  1299. def _ensure_datetimelike_to_i8(other, to_utc=False):
  1300. """
  1301. Helper for coercing an input scalar or array to i8.
  1302. Parameters
  1303. ----------
  1304. other : 1d array
  1305. to_utc : bool, default False
  1306. If True, convert the values to UTC before extracting the i8 values
  1307. If False, extract the i8 values directly.
  1308. Returns
  1309. -------
  1310. i8 1d array
  1311. """
  1312. from pandas import Index
  1313. from pandas.core.arrays import PeriodArray
  1314. if lib.is_scalar(other) and isna(other):
  1315. return iNaT
  1316. elif isinstance(other, (PeriodArray, ABCIndexClass,
  1317. DatetimeLikeArrayMixin)):
  1318. # convert tz if needed
  1319. if getattr(other, 'tz', None) is not None:
  1320. if to_utc:
  1321. other = other.tz_convert('UTC')
  1322. else:
  1323. other = other.tz_localize(None)
  1324. else:
  1325. try:
  1326. return np.array(other, copy=False).view('i8')
  1327. except TypeError:
  1328. # period array cannot be coerced to int
  1329. other = Index(other)
  1330. return other.asi8