period.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956
  1. # -*- coding: utf-8 -*-
  2. from datetime import timedelta
  3. import operator
  4. import numpy as np
  5. from pandas._libs.tslibs import (
  6. NaT, frequencies as libfrequencies, iNaT, period as libperiod)
  7. from pandas._libs.tslibs.fields import isleapyear_arr
  8. from pandas._libs.tslibs.period import (
  9. DIFFERENT_FREQ, IncompatibleFrequency, Period, get_period_field_arr,
  10. period_asfreq_arr)
  11. from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
  12. import pandas.compat as compat
  13. from pandas.util._decorators import Appender, cache_readonly
  14. from pandas.core.dtypes.common import (
  15. _TD_DTYPE, ensure_object, is_datetime64_dtype, is_float_dtype,
  16. is_list_like, is_period_dtype, pandas_dtype)
  17. from pandas.core.dtypes.dtypes import PeriodDtype
  18. from pandas.core.dtypes.generic import (
  19. ABCDataFrame, ABCIndexClass, ABCPeriodIndex, ABCSeries)
  20. from pandas.core.dtypes.missing import isna, notna
  21. import pandas.core.algorithms as algos
  22. from pandas.core.arrays import datetimelike as dtl
  23. import pandas.core.common as com
  24. from pandas.tseries import frequencies
  25. from pandas.tseries.offsets import DateOffset, Tick, _delta_to_tick
  26. def _field_accessor(name, alias, docstring=None):
  27. def f(self):
  28. base, mult = libfrequencies.get_freq_code(self.freq)
  29. result = get_period_field_arr(alias, self.asi8, base)
  30. return result
  31. f.__name__ = name
  32. f.__doc__ = docstring
  33. return property(f)
  34. def _period_array_cmp(cls, op):
  35. """
  36. Wrap comparison operations to convert Period-like to PeriodDtype
  37. """
  38. opname = '__{name}__'.format(name=op.__name__)
  39. nat_result = True if opname == '__ne__' else False
  40. def wrapper(self, other):
  41. op = getattr(self.asi8, opname)
  42. if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
  43. return NotImplemented
  44. if is_list_like(other) and len(other) != len(self):
  45. raise ValueError("Lengths must match")
  46. if isinstance(other, Period):
  47. self._check_compatible_with(other)
  48. result = op(other.ordinal)
  49. elif isinstance(other, cls):
  50. self._check_compatible_with(other)
  51. result = op(other.asi8)
  52. mask = self._isnan | other._isnan
  53. if mask.any():
  54. result[mask] = nat_result
  55. return result
  56. elif other is NaT:
  57. result = np.empty(len(self.asi8), dtype=bool)
  58. result.fill(nat_result)
  59. else:
  60. other = Period(other, freq=self.freq)
  61. result = op(other.ordinal)
  62. if self._hasnans:
  63. result[self._isnan] = nat_result
  64. return result
  65. return compat.set_function_name(wrapper, opname, cls)
  66. class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps):
  67. """
  68. Pandas ExtensionArray for storing Period data.
  69. Users should use :func:`period_array` to create new instances.
  70. Parameters
  71. ----------
  72. values : Union[PeriodArray, Series[period], ndarary[int], PeriodIndex]
  73. The data to store. These should be arrays that can be directly
  74. converted to ordinals without inference or copy (PeriodArray,
  75. ndarray[int64]), or a box around such an array (Series[period],
  76. PeriodIndex).
  77. freq : str or DateOffset
  78. The `freq` to use for the array. Mostly applicable when `values`
  79. is an ndarray of integers, when `freq` is required. When `values`
  80. is a PeriodArray (or box around), it's checked that ``values.freq``
  81. matches `freq`.
  82. copy : bool, default False
  83. Whether to copy the ordinals before storing.
  84. See Also
  85. --------
  86. period_array : Create a new PeriodArray.
  87. pandas.PeriodIndex : Immutable Index for period data.
  88. Notes
  89. -----
  90. There are two components to a PeriodArray
  91. - ordinals : integer ndarray
  92. - freq : pd.tseries.offsets.Offset
  93. The values are physically stored as a 1-D ndarray of integers. These are
  94. called "ordinals" and represent some kind of offset from a base.
  95. The `freq` indicates the span covered by each element of the array.
  96. All elements in the PeriodArray have the same `freq`.
  97. """
  98. # array priority higher than numpy scalars
  99. __array_priority__ = 1000
  100. _attributes = ["freq"]
  101. _typ = "periodarray" # ABCPeriodArray
  102. _scalar_type = Period
  103. # Names others delegate to us
  104. _other_ops = []
  105. _bool_ops = ['is_leap_year']
  106. _object_ops = ['start_time', 'end_time', 'freq']
  107. _field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second',
  108. 'weekofyear', 'weekday', 'week', 'dayofweek',
  109. 'dayofyear', 'quarter', 'qyear',
  110. 'days_in_month', 'daysinmonth']
  111. _datetimelike_ops = _field_ops + _object_ops + _bool_ops
  112. _datetimelike_methods = ['strftime', 'to_timestamp', 'asfreq']
  113. # --------------------------------------------------------------------
  114. # Constructors
  115. def __init__(self, values, freq=None, dtype=None, copy=False):
  116. freq = validate_dtype_freq(dtype, freq)
  117. if freq is not None:
  118. freq = Period._maybe_convert_freq(freq)
  119. if isinstance(values, ABCSeries):
  120. values = values._values
  121. if not isinstance(values, type(self)):
  122. raise TypeError("Incorrect dtype")
  123. elif isinstance(values, ABCPeriodIndex):
  124. values = values._values
  125. if isinstance(values, type(self)):
  126. if freq is not None and freq != values.freq:
  127. msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
  128. own_freq=values.freq.freqstr,
  129. other_freq=freq.freqstr)
  130. raise IncompatibleFrequency(msg)
  131. values, freq = values._data, values.freq
  132. values = np.array(values, dtype='int64', copy=copy)
  133. self._data = values
  134. if freq is None:
  135. raise ValueError('freq is not specified and cannot be inferred')
  136. self._dtype = PeriodDtype(freq)
  137. @classmethod
  138. def _simple_new(cls, values, freq=None, **kwargs):
  139. # alias for PeriodArray.__init__
  140. return cls(values, freq=freq, **kwargs)
  141. @classmethod
  142. def _from_sequence(cls, scalars, dtype=None, copy=False):
  143. # type: (Sequence[Optional[Period]], PeriodDtype, bool) -> PeriodArray
  144. if dtype:
  145. freq = dtype.freq
  146. else:
  147. freq = None
  148. if isinstance(scalars, cls):
  149. validate_dtype_freq(scalars.dtype, freq)
  150. if copy:
  151. scalars = scalars.copy()
  152. return scalars
  153. periods = np.asarray(scalars, dtype=object)
  154. if copy:
  155. periods = periods.copy()
  156. freq = freq or libperiod.extract_freq(periods)
  157. ordinals = libperiod.extract_ordinals(periods, freq)
  158. return cls(ordinals, freq=freq)
  159. @classmethod
  160. def _from_datetime64(cls, data, freq, tz=None):
  161. """
  162. Construct a PeriodArray from a datetime64 array
  163. Parameters
  164. ----------
  165. data : ndarray[datetime64[ns], datetime64[ns, tz]]
  166. freq : str or Tick
  167. tz : tzinfo, optional
  168. Returns
  169. -------
  170. PeriodArray[freq]
  171. """
  172. data, freq = dt64arr_to_periodarr(data, freq, tz)
  173. return cls(data, freq=freq)
  174. @classmethod
  175. def _generate_range(cls, start, end, periods, freq, fields):
  176. periods = dtl.validate_periods(periods)
  177. if freq is not None:
  178. freq = Period._maybe_convert_freq(freq)
  179. field_count = len(fields)
  180. if start is not None or end is not None:
  181. if field_count > 0:
  182. raise ValueError('Can either instantiate from fields '
  183. 'or endpoints, but not both')
  184. subarr, freq = _get_ordinal_range(start, end, periods, freq)
  185. elif field_count > 0:
  186. subarr, freq = _range_from_fields(freq=freq, **fields)
  187. else:
  188. raise ValueError('Not enough parameters to construct '
  189. 'Period range')
  190. return subarr, freq
  191. # -----------------------------------------------------------------
  192. # DatetimeLike Interface
  193. def _unbox_scalar(self, value):
  194. # type: (Union[Period, NaTType]) -> int
  195. if value is NaT:
  196. return value.value
  197. elif isinstance(value, self._scalar_type):
  198. if not isna(value):
  199. self._check_compatible_with(value)
  200. return value.ordinal
  201. else:
  202. raise ValueError("'value' should be a Period. Got '{val}' instead."
  203. .format(val=value))
  204. def _scalar_from_string(self, value):
  205. # type: (str) -> Period
  206. return Period(value, freq=self.freq)
  207. def _check_compatible_with(self, other):
  208. if other is NaT:
  209. return
  210. if self.freqstr != other.freqstr:
  211. _raise_on_incompatible(self, other)
  212. # --------------------------------------------------------------------
  213. # Data / Attributes
  214. @cache_readonly
  215. def dtype(self):
  216. return self._dtype
  217. @property
  218. def freq(self):
  219. """
  220. Return the frequency object for this PeriodArray.
  221. """
  222. return self.dtype.freq
  223. def __array__(self, dtype=None):
  224. # overriding DatetimelikeArray
  225. return np.array(list(self), dtype=object)
  226. # --------------------------------------------------------------------
  227. # Vectorized analogues of Period properties
  228. year = _field_accessor('year', 0, "The year of the period")
  229. month = _field_accessor('month', 3, "The month as January=1, December=12")
  230. day = _field_accessor('day', 4, "The days of the period")
  231. hour = _field_accessor('hour', 5, "The hour of the period")
  232. minute = _field_accessor('minute', 6, "The minute of the period")
  233. second = _field_accessor('second', 7, "The second of the period")
  234. weekofyear = _field_accessor('week', 8, "The week ordinal of the year")
  235. week = weekofyear
  236. dayofweek = _field_accessor('dayofweek', 10,
  237. "The day of the week with Monday=0, Sunday=6")
  238. weekday = dayofweek
  239. dayofyear = day_of_year = _field_accessor('dayofyear', 9,
  240. "The ordinal day of the year")
  241. quarter = _field_accessor('quarter', 2, "The quarter of the date")
  242. qyear = _field_accessor('qyear', 1)
  243. days_in_month = _field_accessor('days_in_month', 11,
  244. "The number of days in the month")
  245. daysinmonth = days_in_month
  246. @property
  247. def is_leap_year(self):
  248. """
  249. Logical indicating if the date belongs to a leap year
  250. """
  251. return isleapyear_arr(np.asarray(self.year))
  252. @property
  253. def start_time(self):
  254. return self.to_timestamp(how='start')
  255. @property
  256. def end_time(self):
  257. return self.to_timestamp(how='end')
  258. def to_timestamp(self, freq=None, how='start'):
  259. """
  260. Cast to DatetimeArray/Index.
  261. Parameters
  262. ----------
  263. freq : string or DateOffset, optional
  264. Target frequency. The default is 'D' for week or longer,
  265. 'S' otherwise
  266. how : {'s', 'e', 'start', 'end'}
  267. Returns
  268. -------
  269. DatetimeArray/Index
  270. """
  271. from pandas.core.arrays import DatetimeArray
  272. how = libperiod._validate_end_alias(how)
  273. end = how == 'E'
  274. if end:
  275. if freq == 'B':
  276. # roll forward to ensure we land on B date
  277. adjust = Timedelta(1, 'D') - Timedelta(1, 'ns')
  278. return self.to_timestamp(how='start') + adjust
  279. else:
  280. adjust = Timedelta(1, 'ns')
  281. return (self + self.freq).to_timestamp(how='start') - adjust
  282. if freq is None:
  283. base, mult = libfrequencies.get_freq_code(self.freq)
  284. freq = libfrequencies.get_to_timestamp_base(base)
  285. else:
  286. freq = Period._maybe_convert_freq(freq)
  287. base, mult = libfrequencies.get_freq_code(freq)
  288. new_data = self.asfreq(freq, how=how)
  289. new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base)
  290. return DatetimeArray._from_sequence(new_data, freq='infer')
  291. # --------------------------------------------------------------------
  292. # Array-like / EA-Interface Methods
  293. def _formatter(self, boxed=False):
  294. if boxed:
  295. return str
  296. return "'{}'".format
  297. @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
  298. def _validate_fill_value(self, fill_value):
  299. if isna(fill_value):
  300. fill_value = iNaT
  301. elif isinstance(fill_value, Period):
  302. self._check_compatible_with(fill_value)
  303. fill_value = fill_value.ordinal
  304. else:
  305. raise ValueError("'fill_value' should be a Period. "
  306. "Got '{got}'.".format(got=fill_value))
  307. return fill_value
  308. # --------------------------------------------------------------------
  309. def _time_shift(self, periods, freq=None):
  310. """
  311. Shift each value by `periods`.
  312. Note this is different from ExtensionArray.shift, which
  313. shifts the *position* of each element, padding the end with
  314. missing values.
  315. Parameters
  316. ----------
  317. periods : int
  318. Number of periods to shift by.
  319. freq : pandas.DateOffset, pandas.Timedelta, or string
  320. Frequency increment to shift by.
  321. """
  322. if freq is not None:
  323. raise TypeError("`freq` argument is not supported for "
  324. "{cls}._time_shift"
  325. .format(cls=type(self).__name__))
  326. values = self.asi8 + periods * self.freq.n
  327. if self._hasnans:
  328. values[self._isnan] = iNaT
  329. return type(self)(values, freq=self.freq)
  330. @property
  331. def _box_func(self):
  332. return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq)
  333. def asfreq(self, freq=None, how='E'):
  334. """
  335. Convert the Period Array/Index to the specified frequency `freq`.
  336. Parameters
  337. ----------
  338. freq : str
  339. a frequency
  340. how : str {'E', 'S'}
  341. 'E', 'END', or 'FINISH' for end,
  342. 'S', 'START', or 'BEGIN' for start.
  343. Whether the elements should be aligned to the end
  344. or start within pa period. January 31st ('END') vs.
  345. January 1st ('START') for example.
  346. Returns
  347. -------
  348. new : Period Array/Index with the new frequency
  349. Examples
  350. --------
  351. >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A')
  352. >>> pidx
  353. <class 'pandas.core.indexes.period.PeriodIndex'>
  354. [2010, ..., 2015]
  355. Length: 6, Freq: A-DEC
  356. >>> pidx.asfreq('M')
  357. <class 'pandas.core.indexes.period.PeriodIndex'>
  358. [2010-12, ..., 2015-12]
  359. Length: 6, Freq: M
  360. >>> pidx.asfreq('M', how='S')
  361. <class 'pandas.core.indexes.period.PeriodIndex'>
  362. [2010-01, ..., 2015-01]
  363. Length: 6, Freq: M
  364. """
  365. how = libperiod._validate_end_alias(how)
  366. freq = Period._maybe_convert_freq(freq)
  367. base1, mult1 = libfrequencies.get_freq_code(self.freq)
  368. base2, mult2 = libfrequencies.get_freq_code(freq)
  369. asi8 = self.asi8
  370. # mult1 can't be negative or 0
  371. end = how == 'E'
  372. if end:
  373. ordinal = asi8 + mult1 - 1
  374. else:
  375. ordinal = asi8
  376. new_data = period_asfreq_arr(ordinal, base1, base2, end)
  377. if self._hasnans:
  378. new_data[self._isnan] = iNaT
  379. return type(self)(new_data, freq=freq)
  380. # ------------------------------------------------------------------
  381. # Rendering Methods
  382. def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
  383. """
  384. actually format my specific types
  385. """
  386. values = self.astype(object)
  387. if date_format:
  388. formatter = lambda dt: dt.strftime(date_format)
  389. else:
  390. formatter = lambda dt: u'%s' % dt
  391. if self._hasnans:
  392. mask = self._isnan
  393. values[mask] = na_rep
  394. imask = ~mask
  395. values[imask] = np.array([formatter(dt) for dt
  396. in values[imask]])
  397. else:
  398. values = np.array([formatter(dt) for dt in values])
  399. return values
  400. # ------------------------------------------------------------------
  401. def astype(self, dtype, copy=True):
  402. # We handle Period[T] -> Period[U]
  403. # Our parent handles everything else.
  404. dtype = pandas_dtype(dtype)
  405. if is_period_dtype(dtype):
  406. return self.asfreq(dtype.freq)
  407. return super(PeriodArray, self).astype(dtype, copy=copy)
  408. @property
  409. def flags(self):
  410. # TODO: remove
  411. # We need this since reduction.SeriesBinGrouper uses values.flags
  412. # Ideally, we wouldn't be passing objects down there in the first
  413. # place.
  414. return self._data.flags
  415. # ------------------------------------------------------------------
  416. # Arithmetic Methods
  417. _create_comparison_method = classmethod(_period_array_cmp)
  418. def _sub_datelike(self, other):
  419. assert other is not NaT
  420. return NotImplemented
  421. def _sub_period(self, other):
  422. # If the operation is well-defined, we return an object-Index
  423. # of DateOffsets. Null entries are filled with pd.NaT
  424. self._check_compatible_with(other)
  425. asi8 = self.asi8
  426. new_data = asi8 - other.ordinal
  427. new_data = np.array([self.freq * x for x in new_data])
  428. if self._hasnans:
  429. new_data[self._isnan] = NaT
  430. return new_data
  431. @Appender(dtl.DatetimeLikeArrayMixin._addsub_int_array.__doc__)
  432. def _addsub_int_array(
  433. self,
  434. other, # type: Union[Index, ExtensionArray, np.ndarray[int]]
  435. op # type: Callable[Any, Any]
  436. ):
  437. # type: (...) -> PeriodArray
  438. assert op in [operator.add, operator.sub]
  439. if op is operator.sub:
  440. other = -other
  441. res_values = algos.checked_add_with_arr(self.asi8, other,
  442. arr_mask=self._isnan)
  443. res_values = res_values.view('i8')
  444. res_values[self._isnan] = iNaT
  445. return type(self)(res_values, freq=self.freq)
  446. def _add_offset(self, other):
  447. assert not isinstance(other, Tick)
  448. base = libfrequencies.get_base_alias(other.rule_code)
  449. if base != self.freq.rule_code:
  450. _raise_on_incompatible(self, other)
  451. # Note: when calling parent class's _add_timedeltalike_scalar,
  452. # it will call delta_to_nanoseconds(delta). Because delta here
  453. # is an integer, delta_to_nanoseconds will return it unchanged.
  454. result = super(PeriodArray, self)._add_timedeltalike_scalar(other.n)
  455. return type(self)(result, freq=self.freq)
  456. def _add_timedeltalike_scalar(self, other):
  457. """
  458. Parameters
  459. ----------
  460. other : timedelta, Tick, np.timedelta64
  461. Returns
  462. -------
  463. result : ndarray[int64]
  464. """
  465. assert isinstance(self.freq, Tick) # checked by calling function
  466. assert isinstance(other, (timedelta, np.timedelta64, Tick))
  467. if notna(other):
  468. # special handling for np.timedelta64("NaT"), avoid calling
  469. # _check_timedeltalike_freq_compat as that would raise TypeError
  470. other = self._check_timedeltalike_freq_compat(other)
  471. # Note: when calling parent class's _add_timedeltalike_scalar,
  472. # it will call delta_to_nanoseconds(delta). Because delta here
  473. # is an integer, delta_to_nanoseconds will return it unchanged.
  474. ordinals = super(PeriodArray, self)._add_timedeltalike_scalar(other)
  475. return ordinals
  476. def _add_delta_tdi(self, other):
  477. """
  478. Parameters
  479. ----------
  480. other : TimedeltaArray or ndarray[timedelta64]
  481. Returns
  482. -------
  483. result : ndarray[int64]
  484. """
  485. assert isinstance(self.freq, Tick) # checked by calling function
  486. delta = self._check_timedeltalike_freq_compat(other)
  487. return self._addsub_int_array(delta, operator.add).asi8
  488. def _add_delta(self, other):
  489. """
  490. Add a timedelta-like, Tick, or TimedeltaIndex-like object
  491. to self, yielding a new PeriodArray
  492. Parameters
  493. ----------
  494. other : {timedelta, np.timedelta64, Tick,
  495. TimedeltaIndex, ndarray[timedelta64]}
  496. Returns
  497. -------
  498. result : PeriodArray
  499. """
  500. if not isinstance(self.freq, Tick):
  501. # We cannot add timedelta-like to non-tick PeriodArray
  502. _raise_on_incompatible(self, other)
  503. new_ordinals = super(PeriodArray, self)._add_delta(other)
  504. return type(self)(new_ordinals, freq=self.freq)
  505. def _check_timedeltalike_freq_compat(self, other):
  506. """
  507. Arithmetic operations with timedelta-like scalars or array `other`
  508. are only valid if `other` is an integer multiple of `self.freq`.
  509. If the operation is valid, find that integer multiple. Otherwise,
  510. raise because the operation is invalid.
  511. Parameters
  512. ----------
  513. other : timedelta, np.timedelta64, Tick,
  514. ndarray[timedelta64], TimedeltaArray, TimedeltaIndex
  515. Returns
  516. -------
  517. multiple : int or ndarray[int64]
  518. Raises
  519. ------
  520. IncompatibleFrequency
  521. """
  522. assert isinstance(self.freq, Tick) # checked by calling function
  523. own_offset = frequencies.to_offset(self.freq.rule_code)
  524. base_nanos = delta_to_nanoseconds(own_offset)
  525. if isinstance(other, (timedelta, np.timedelta64, Tick)):
  526. nanos = delta_to_nanoseconds(other)
  527. elif isinstance(other, np.ndarray):
  528. # numpy timedelta64 array; all entries must be compatible
  529. assert other.dtype.kind == 'm'
  530. if other.dtype != _TD_DTYPE:
  531. # i.e. non-nano unit
  532. # TODO: disallow unit-less timedelta64
  533. other = other.astype(_TD_DTYPE)
  534. nanos = other.view('i8')
  535. else:
  536. # TimedeltaArray/Index
  537. nanos = other.asi8
  538. if np.all(nanos % base_nanos == 0):
  539. # nanos being added is an integer multiple of the
  540. # base-frequency to self.freq
  541. delta = nanos // base_nanos
  542. # delta is the integer (or integer-array) number of periods
  543. # by which will be added to self.
  544. return delta
  545. _raise_on_incompatible(self, other)
  546. def _values_for_argsort(self):
  547. return self._data
  548. PeriodArray._add_comparison_ops()
  549. def _raise_on_incompatible(left, right):
  550. """
  551. Helper function to render a consistent error message when raising
  552. IncompatibleFrequency.
  553. Parameters
  554. ----------
  555. left : PeriodArray
  556. right : DateOffset, Period, ndarray, or timedelta-like
  557. Raises
  558. ------
  559. IncompatibleFrequency
  560. """
  561. # GH#24283 error message format depends on whether right is scalar
  562. if isinstance(right, np.ndarray):
  563. other_freq = None
  564. elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, DateOffset)):
  565. other_freq = right.freqstr
  566. else:
  567. other_freq = _delta_to_tick(Timedelta(right)).freqstr
  568. msg = DIFFERENT_FREQ.format(cls=type(left).__name__,
  569. own_freq=left.freqstr,
  570. other_freq=other_freq)
  571. raise IncompatibleFrequency(msg)
  572. # -------------------------------------------------------------------
  573. # Constructor Helpers
  574. def period_array(data, freq=None, copy=False):
  575. # type: (Sequence[Optional[Period]], Optional[Tick]) -> PeriodArray
  576. """
  577. Construct a new PeriodArray from a sequence of Period scalars.
  578. Parameters
  579. ----------
  580. data : Sequence of Period objects
  581. A sequence of Period objects. These are required to all have
  582. the same ``freq.`` Missing values can be indicated by ``None``
  583. or ``pandas.NaT``.
  584. freq : str, Tick, or Offset
  585. The frequency of every element of the array. This can be specified
  586. to avoid inferring the `freq` from `data`.
  587. copy : bool, default False
  588. Whether to ensure a copy of the data is made.
  589. Returns
  590. -------
  591. PeriodArray
  592. See Also
  593. --------
  594. PeriodArray
  595. pandas.PeriodIndex
  596. Examples
  597. --------
  598. >>> period_array([pd.Period('2017', freq='A'),
  599. ... pd.Period('2018', freq='A')])
  600. <PeriodArray>
  601. ['2017', '2018']
  602. Length: 2, dtype: period[A-DEC]
  603. >>> period_array([pd.Period('2017', freq='A'),
  604. ... pd.Period('2018', freq='A'),
  605. ... pd.NaT])
  606. <PeriodArray>
  607. ['2017', '2018', 'NaT']
  608. Length: 3, dtype: period[A-DEC]
  609. Integers that look like years are handled
  610. >>> period_array([2000, 2001, 2002], freq='D')
  611. ['2000-01-01', '2001-01-01', '2002-01-01']
  612. Length: 3, dtype: period[D]
  613. Datetime-like strings may also be passed
  614. >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q')
  615. <PeriodArray>
  616. ['2000Q1', '2000Q2', '2000Q3', '2000Q4']
  617. Length: 4, dtype: period[Q-DEC]
  618. """
  619. if is_datetime64_dtype(data):
  620. return PeriodArray._from_datetime64(data, freq)
  621. if isinstance(data, (ABCPeriodIndex, ABCSeries, PeriodArray)):
  622. return PeriodArray(data, freq)
  623. # other iterable of some kind
  624. if not isinstance(data, (np.ndarray, list, tuple)):
  625. data = list(data)
  626. data = np.asarray(data)
  627. if freq:
  628. dtype = PeriodDtype(freq)
  629. else:
  630. dtype = None
  631. if is_float_dtype(data) and len(data) > 0:
  632. raise TypeError("PeriodIndex does not allow "
  633. "floating point in construction")
  634. data = ensure_object(data)
  635. return PeriodArray._from_sequence(data, dtype=dtype)
  636. def validate_dtype_freq(dtype, freq):
  637. """
  638. If both a dtype and a freq are available, ensure they match. If only
  639. dtype is available, extract the implied freq.
  640. Parameters
  641. ----------
  642. dtype : dtype
  643. freq : DateOffset or None
  644. Returns
  645. -------
  646. freq : DateOffset
  647. Raises
  648. ------
  649. ValueError : non-period dtype
  650. IncompatibleFrequency : mismatch between dtype and freq
  651. """
  652. if freq is not None:
  653. freq = frequencies.to_offset(freq)
  654. if dtype is not None:
  655. dtype = pandas_dtype(dtype)
  656. if not is_period_dtype(dtype):
  657. raise ValueError('dtype must be PeriodDtype')
  658. if freq is None:
  659. freq = dtype.freq
  660. elif freq != dtype.freq:
  661. raise IncompatibleFrequency('specified freq and dtype '
  662. 'are different')
  663. return freq
  664. def dt64arr_to_periodarr(data, freq, tz=None):
  665. """
  666. Convert an datetime-like array to values Period ordinals.
  667. Parameters
  668. ----------
  669. data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]]
  670. freq : Optional[Union[str, Tick]]
  671. Must match the `freq` on the `data` if `data` is a DatetimeIndex
  672. or Series.
  673. tz : Optional[tzinfo]
  674. Returns
  675. -------
  676. ordinals : ndarray[int]
  677. freq : Tick
  678. The frequencey extracted from the Series or DatetimeIndex if that's
  679. used.
  680. """
  681. if data.dtype != np.dtype('M8[ns]'):
  682. raise ValueError('Wrong dtype: {dtype}'.format(dtype=data.dtype))
  683. if freq is None:
  684. if isinstance(data, ABCIndexClass):
  685. data, freq = data._values, data.freq
  686. elif isinstance(data, ABCSeries):
  687. data, freq = data._values, data.dt.freq
  688. freq = Period._maybe_convert_freq(freq)
  689. if isinstance(data, (ABCIndexClass, ABCSeries)):
  690. data = data._values
  691. base, mult = libfrequencies.get_freq_code(freq)
  692. return libperiod.dt64arr_to_periodarr(data.view('i8'), base, tz), freq
  693. def _get_ordinal_range(start, end, periods, freq, mult=1):
  694. if com.count_not_none(start, end, periods) != 2:
  695. raise ValueError('Of the three parameters: start, end, and periods, '
  696. 'exactly two must be specified')
  697. if freq is not None:
  698. _, mult = libfrequencies.get_freq_code(freq)
  699. if start is not None:
  700. start = Period(start, freq)
  701. if end is not None:
  702. end = Period(end, freq)
  703. is_start_per = isinstance(start, Period)
  704. is_end_per = isinstance(end, Period)
  705. if is_start_per and is_end_per and start.freq != end.freq:
  706. raise ValueError('start and end must have same freq')
  707. if (start is NaT or end is NaT):
  708. raise ValueError('start and end must not be NaT')
  709. if freq is None:
  710. if is_start_per:
  711. freq = start.freq
  712. elif is_end_per:
  713. freq = end.freq
  714. else: # pragma: no cover
  715. raise ValueError('Could not infer freq from start/end')
  716. if periods is not None:
  717. periods = periods * mult
  718. if start is None:
  719. data = np.arange(end.ordinal - periods + mult,
  720. end.ordinal + 1, mult,
  721. dtype=np.int64)
  722. else:
  723. data = np.arange(start.ordinal, start.ordinal + periods, mult,
  724. dtype=np.int64)
  725. else:
  726. data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
  727. return data, freq
  728. def _range_from_fields(year=None, month=None, quarter=None, day=None,
  729. hour=None, minute=None, second=None, freq=None):
  730. if hour is None:
  731. hour = 0
  732. if minute is None:
  733. minute = 0
  734. if second is None:
  735. second = 0
  736. if day is None:
  737. day = 1
  738. ordinals = []
  739. if quarter is not None:
  740. if freq is None:
  741. freq = 'Q'
  742. base = libfrequencies.FreqGroup.FR_QTR
  743. else:
  744. base, mult = libfrequencies.get_freq_code(freq)
  745. if base != libfrequencies.FreqGroup.FR_QTR:
  746. raise AssertionError("base must equal FR_QTR")
  747. year, quarter = _make_field_arrays(year, quarter)
  748. for y, q in compat.zip(year, quarter):
  749. y, m = libperiod.quarter_to_myear(y, q, freq)
  750. val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base)
  751. ordinals.append(val)
  752. else:
  753. base, mult = libfrequencies.get_freq_code(freq)
  754. arrays = _make_field_arrays(year, month, day, hour, minute, second)
  755. for y, mth, d, h, mn, s in compat.zip(*arrays):
  756. ordinals.append(libperiod.period_ordinal(
  757. y, mth, d, h, mn, s, 0, 0, base))
  758. return np.array(ordinals, dtype=np.int64), freq
  759. def _make_field_arrays(*fields):
  760. length = None
  761. for x in fields:
  762. if isinstance(x, (list, np.ndarray, ABCSeries)):
  763. if length is not None and len(x) != length:
  764. raise ValueError('Mismatched Period array lengths')
  765. elif length is None:
  766. length = len(x)
  767. arrays = [np.asarray(x) if isinstance(x, (np.ndarray, list, ABCSeries))
  768. else np.repeat(x, length) for x in fields]
  769. return arrays