period.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966
  1. # pylint: disable=E1101,E1103,W0232
  2. from datetime import datetime, timedelta
  3. import warnings
  4. import numpy as np
  5. from pandas._libs import index as libindex
  6. from pandas._libs.tslibs import (
  7. NaT, frequencies as libfrequencies, iNaT, resolution)
  8. from pandas._libs.tslibs.period import (
  9. DIFFERENT_FREQ, IncompatibleFrequency, Period)
  10. from pandas.util._decorators import Appender, Substitution, cache_readonly
  11. from pandas.core.dtypes.common import (
  12. is_bool_dtype, is_datetime64_any_dtype, is_float, is_float_dtype,
  13. is_integer, is_integer_dtype, pandas_dtype)
  14. from pandas import compat
  15. from pandas.core import common as com
  16. from pandas.core.accessor import delegate_names
  17. from pandas.core.algorithms import unique1d
  18. from pandas.core.arrays.period import (
  19. PeriodArray, period_array, validate_dtype_freq)
  20. from pandas.core.base import _shared_docs
  21. import pandas.core.indexes.base as ibase
  22. from pandas.core.indexes.base import _index_shared_docs, ensure_index
  23. from pandas.core.indexes.datetimelike import (
  24. DatetimeIndexOpsMixin, DatetimelikeDelegateMixin)
  25. from pandas.core.indexes.datetimes import DatetimeIndex, Index, Int64Index
  26. from pandas.core.missing import isna
  27. from pandas.core.ops import get_op_result_name
  28. from pandas.core.tools.datetimes import DateParseError, parse_time_string
  29. from pandas.tseries import frequencies
  30. from pandas.tseries.offsets import DateOffset, Tick
  31. _index_doc_kwargs = dict(ibase._index_doc_kwargs)
  32. _index_doc_kwargs.update(
  33. dict(target_klass='PeriodIndex or list of Periods'))
  34. # --- Period index sketch
  35. def _new_PeriodIndex(cls, **d):
  36. # GH13277 for unpickling
  37. values = d.pop('data')
  38. if values.dtype == 'int64':
  39. freq = d.pop('freq', None)
  40. values = PeriodArray(values, freq=freq)
  41. return cls._simple_new(values, **d)
  42. else:
  43. return cls(values, **d)
  44. class PeriodDelegateMixin(DatetimelikeDelegateMixin):
  45. """
  46. Delegate from PeriodIndex to PeriodArray.
  47. """
  48. _delegate_class = PeriodArray
  49. _delegated_properties = PeriodArray._datetimelike_ops
  50. _delegated_methods = (
  51. set(PeriodArray._datetimelike_methods) | {'_addsub_int_array'}
  52. )
  53. _raw_properties = {'is_leap_year'}
  54. @delegate_names(PeriodArray,
  55. PeriodDelegateMixin._delegated_properties,
  56. typ='property')
  57. @delegate_names(PeriodArray,
  58. PeriodDelegateMixin._delegated_methods,
  59. typ="method",
  60. overwrite=True)
  61. class PeriodIndex(DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin):
  62. """
  63. Immutable ndarray holding ordinal values indicating regular periods in
  64. time such as particular years, quarters, months, etc.
  65. Index keys are boxed to Period objects which carries the metadata (eg,
  66. frequency information).
  67. Parameters
  68. ----------
  69. data : array-like (1-dimensional), optional
  70. Optional period-like data to construct index with
  71. copy : bool
  72. Make a copy of input ndarray
  73. freq : string or period object, optional
  74. One of pandas period strings or corresponding objects
  75. start : starting value, period-like, optional
  76. If data is None, used as the start point in generating regular
  77. period data.
  78. .. deprecated:: 0.24.0
  79. periods : int, optional, > 0
  80. Number of periods to generate, if generating index. Takes precedence
  81. over end argument
  82. .. deprecated:: 0.24.0
  83. end : end value, period-like, optional
  84. If periods is none, generated index will extend to first conforming
  85. period on or just past end argument
  86. .. deprecated:: 0.24.0
  87. year : int, array, or Series, default None
  88. month : int, array, or Series, default None
  89. quarter : int, array, or Series, default None
  90. day : int, array, or Series, default None
  91. hour : int, array, or Series, default None
  92. minute : int, array, or Series, default None
  93. second : int, array, or Series, default None
  94. tz : object, default None
  95. Timezone for converting datetime64 data to Periods
  96. dtype : str or PeriodDtype, default None
  97. Attributes
  98. ----------
  99. day
  100. dayofweek
  101. dayofyear
  102. days_in_month
  103. daysinmonth
  104. end_time
  105. freq
  106. freqstr
  107. hour
  108. is_leap_year
  109. minute
  110. month
  111. quarter
  112. qyear
  113. second
  114. start_time
  115. week
  116. weekday
  117. weekofyear
  118. year
  119. Methods
  120. -------
  121. asfreq
  122. strftime
  123. to_timestamp
  124. Notes
  125. -----
  126. Creating a PeriodIndex based on `start`, `periods`, and `end` has
  127. been deprecated in favor of :func:`period_range`.
  128. Examples
  129. --------
  130. >>> idx = pd.PeriodIndex(year=year_arr, quarter=q_arr)
  131. See Also
  132. ---------
  133. Index : The base pandas Index type.
  134. Period : Represents a period of time.
  135. DatetimeIndex : Index with datetime64 data.
  136. TimedeltaIndex : Index of timedelta64 data.
  137. period_range : Create a fixed-frequency PeriodIndex.
  138. """
  139. _typ = 'periodindex'
  140. _attributes = ['name', 'freq']
  141. # define my properties & methods for delegation
  142. _is_numeric_dtype = False
  143. _infer_as_myclass = True
  144. _data = None # type: PeriodArray
  145. _engine_type = libindex.PeriodEngine
  146. # ------------------------------------------------------------------------
  147. # Index Constructors
  148. def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
  149. periods=None, tz=None, dtype=None, copy=False, name=None,
  150. **fields):
  151. valid_field_set = {'year', 'month', 'day', 'quarter',
  152. 'hour', 'minute', 'second'}
  153. if not set(fields).issubset(valid_field_set):
  154. raise TypeError('__new__() got an unexpected keyword argument {}'.
  155. format(list(set(fields) - valid_field_set)[0]))
  156. if name is None and hasattr(data, 'name'):
  157. name = data.name
  158. if data is None and ordinal is None:
  159. # range-based.
  160. data, freq2 = PeriodArray._generate_range(start, end, periods,
  161. freq, fields)
  162. # PeriodArray._generate range does validate that fields is
  163. # empty when really using the range-based constructor.
  164. if not fields:
  165. msg = ("Creating a PeriodIndex by passing range "
  166. "endpoints is deprecated. Use "
  167. "`pandas.period_range` instead.")
  168. # period_range differs from PeriodIndex for cases like
  169. # start="2000", periods=4
  170. # PeriodIndex interprets that as A-DEC freq.
  171. # period_range interprets it as 'D' freq.
  172. cond = (
  173. freq is None and (
  174. (start and not isinstance(start, Period)) or
  175. (end and not isinstance(end, Period))
  176. )
  177. )
  178. if cond:
  179. msg += (
  180. " Note that the default `freq` may differ. Pass "
  181. "'freq=\"{}\"' to ensure the same output."
  182. ).format(freq2.freqstr)
  183. warnings.warn(msg, FutureWarning, stacklevel=2)
  184. freq = freq2
  185. data = PeriodArray(data, freq=freq)
  186. else:
  187. freq = validate_dtype_freq(dtype, freq)
  188. # PeriodIndex allow PeriodIndex(period_index, freq=different)
  189. # Let's not encourage that kind of behavior in PeriodArray.
  190. if freq and isinstance(data, cls) and data.freq != freq:
  191. # TODO: We can do some of these with no-copy / coercion?
  192. # e.g. D -> 2D seems to be OK
  193. data = data.asfreq(freq)
  194. if data is None and ordinal is not None:
  195. # we strangely ignore `ordinal` if data is passed.
  196. ordinal = np.asarray(ordinal, dtype=np.int64)
  197. data = PeriodArray(ordinal, freq)
  198. else:
  199. # don't pass copy here, since we copy later.
  200. data = period_array(data=data, freq=freq)
  201. if copy:
  202. data = data.copy()
  203. return cls._simple_new(data, name=name)
  204. @classmethod
  205. def _simple_new(cls, values, name=None, freq=None, **kwargs):
  206. """
  207. Create a new PeriodIndex.
  208. Parameters
  209. ----------
  210. values : PeriodArray, PeriodIndex, Index[int64], ndarray[int64]
  211. Values that can be converted to a PeriodArray without inference
  212. or coercion.
  213. """
  214. # TODO: raising on floats is tested, but maybe not useful.
  215. # Should the callers know not to pass floats?
  216. # At the very least, I think we can ensure that lists aren't passed.
  217. if isinstance(values, list):
  218. values = np.asarray(values)
  219. if is_float_dtype(values):
  220. raise TypeError("PeriodIndex._simple_new does not accept floats.")
  221. if freq:
  222. freq = Period._maybe_convert_freq(freq)
  223. values = PeriodArray(values, freq=freq)
  224. if not isinstance(values, PeriodArray):
  225. raise TypeError("PeriodIndex._simple_new only accepts PeriodArray")
  226. result = object.__new__(cls)
  227. result._data = values
  228. # For groupby perf. See note in indexes/base about _index_data
  229. result._index_data = values._data
  230. result.name = name
  231. result._reset_identity()
  232. return result
  233. # ------------------------------------------------------------------------
  234. # Data
  235. @property
  236. def values(self):
  237. return np.asarray(self)
  238. @property
  239. def freq(self):
  240. return self._data.freq
  241. @freq.setter
  242. def freq(self, value):
  243. value = Period._maybe_convert_freq(value)
  244. # TODO: When this deprecation is enforced, PeriodIndex.freq can
  245. # be removed entirely, and we'll just inherit.
  246. msg = ('Setting {cls}.freq has been deprecated and will be '
  247. 'removed in a future version; use {cls}.asfreq instead. '
  248. 'The {cls}.freq setter is not guaranteed to work.')
  249. warnings.warn(msg.format(cls=type(self).__name__),
  250. FutureWarning, stacklevel=2)
  251. # PeriodArray._freq isn't actually mutable. We set the private _freq
  252. # here, but people shouldn't be doing this anyway.
  253. self._data._freq = value
  254. def _shallow_copy(self, values=None, **kwargs):
  255. # TODO: simplify, figure out type of values
  256. if values is None:
  257. values = self._data
  258. if isinstance(values, type(self)):
  259. values = values._values
  260. if not isinstance(values, PeriodArray):
  261. if (isinstance(values, np.ndarray) and
  262. is_integer_dtype(values.dtype)):
  263. values = PeriodArray(values, freq=self.freq)
  264. else:
  265. # in particular, I would like to avoid period_array here.
  266. # Some people seem to be calling use with unexpected types
  267. # Index.difference -> ndarray[Period]
  268. # DatetimelikeIndexOpsMixin.repeat -> ndarray[ordinal]
  269. # I think that once all of Datetime* are EAs, we can simplify
  270. # this quite a bit.
  271. values = period_array(values, freq=self.freq)
  272. # We don't allow changing `freq` in _shallow_copy.
  273. validate_dtype_freq(self.dtype, kwargs.get('freq'))
  274. attributes = self._get_attributes_dict()
  275. attributes.update(kwargs)
  276. if not len(values) and 'dtype' not in kwargs:
  277. attributes['dtype'] = self.dtype
  278. return self._simple_new(values, **attributes)
  279. def _shallow_copy_with_infer(self, values=None, **kwargs):
  280. """ we always want to return a PeriodIndex """
  281. return self._shallow_copy(values=values, **kwargs)
  282. @property
  283. def _box_func(self):
  284. """Maybe box an ordinal or Period"""
  285. # TODO(DatetimeArray): Avoid double-boxing
  286. # PeriodArray takes care of boxing already, so we need to check
  287. # whether we're given an ordinal or a Period. It seems like some
  288. # places outside of indexes/period.py are calling this _box_func,
  289. # but passing data that's already boxed.
  290. def func(x):
  291. if isinstance(x, Period) or x is NaT:
  292. return x
  293. else:
  294. return Period._from_ordinal(ordinal=x, freq=self.freq)
  295. return func
  296. def _maybe_convert_timedelta(self, other):
  297. """
  298. Convert timedelta-like input to an integer multiple of self.freq
  299. Parameters
  300. ----------
  301. other : timedelta, np.timedelta64, DateOffset, int, np.ndarray
  302. Returns
  303. -------
  304. converted : int, np.ndarray[int64]
  305. Raises
  306. ------
  307. IncompatibleFrequency : if the input cannot be written as a multiple
  308. of self.freq. Note IncompatibleFrequency subclasses ValueError.
  309. """
  310. if isinstance(
  311. other, (timedelta, np.timedelta64, Tick, np.ndarray)):
  312. offset = frequencies.to_offset(self.freq.rule_code)
  313. if isinstance(offset, Tick):
  314. # _check_timedeltalike_freq_compat will raise if incompatible
  315. delta = self._data._check_timedeltalike_freq_compat(other)
  316. return delta
  317. elif isinstance(other, DateOffset):
  318. freqstr = other.rule_code
  319. base = libfrequencies.get_base_alias(freqstr)
  320. if base == self.freq.rule_code:
  321. return other.n
  322. msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
  323. own_freq=self.freqstr,
  324. other_freq=other.freqstr)
  325. raise IncompatibleFrequency(msg)
  326. elif is_integer(other):
  327. # integer is passed to .shift via
  328. # _add_datetimelike_methods basically
  329. # but ufunc may pass integer to _add_delta
  330. return other
  331. # raise when input doesn't have freq
  332. msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
  333. own_freq=self.freqstr,
  334. other_freq=None)
  335. raise IncompatibleFrequency(msg)
  336. # ------------------------------------------------------------------------
  337. # Rendering Methods
  338. def _format_native_types(self, na_rep=u'NaT', quoting=None, **kwargs):
  339. # just dispatch, return ndarray
  340. return self._data._format_native_types(na_rep=na_rep,
  341. quoting=quoting,
  342. **kwargs)
  343. def _mpl_repr(self):
  344. # how to represent ourselves to matplotlib
  345. return self.astype(object).values
  346. @property
  347. def _formatter_func(self):
  348. return self.array._formatter(boxed=False)
  349. # ------------------------------------------------------------------------
  350. # Indexing
  351. @cache_readonly
  352. def _engine(self):
  353. return self._engine_type(lambda: self, len(self))
  354. @Appender(_index_shared_docs['contains'])
  355. def __contains__(self, key):
  356. if isinstance(key, Period):
  357. if key.freq != self.freq:
  358. return False
  359. else:
  360. return key.ordinal in self._engine
  361. else:
  362. try:
  363. self.get_loc(key)
  364. return True
  365. except Exception:
  366. return False
  367. contains = __contains__
  368. @cache_readonly
  369. def _int64index(self):
  370. return Int64Index._simple_new(self.asi8, name=self.name)
  371. # ------------------------------------------------------------------------
  372. # Index Methods
  373. def _coerce_scalar_to_index(self, item):
  374. """
  375. we need to coerce a scalar to a compat for our index type
  376. Parameters
  377. ----------
  378. item : scalar item to coerce
  379. """
  380. return PeriodIndex([item], **self._get_attributes_dict())
  381. def __array__(self, dtype=None):
  382. if is_integer_dtype(dtype):
  383. return self.asi8
  384. else:
  385. return self.astype(object).values
  386. def __array_wrap__(self, result, context=None):
  387. """
  388. Gets called after a ufunc. Needs additional handling as
  389. PeriodIndex stores internal data as int dtype
  390. Replace this to __numpy_ufunc__ in future version
  391. """
  392. if isinstance(context, tuple) and len(context) > 0:
  393. func = context[0]
  394. if func is np.add:
  395. pass
  396. elif func is np.subtract:
  397. name = self.name
  398. left = context[1][0]
  399. right = context[1][1]
  400. if (isinstance(left, PeriodIndex) and
  401. isinstance(right, PeriodIndex)):
  402. name = left.name if left.name == right.name else None
  403. return Index(result, name=name)
  404. elif isinstance(left, Period) or isinstance(right, Period):
  405. return Index(result, name=name)
  406. elif isinstance(func, np.ufunc):
  407. if 'M->M' not in func.types:
  408. msg = "ufunc '{0}' not supported for the PeriodIndex"
  409. # This should be TypeError, but TypeError cannot be raised
  410. # from here because numpy catches.
  411. raise ValueError(msg.format(func.__name__))
  412. if is_bool_dtype(result):
  413. return result
  414. # the result is object dtype array of Period
  415. # cannot pass _simple_new as it is
  416. return type(self)(result, freq=self.freq, name=self.name)
  417. def asof_locs(self, where, mask):
  418. """
  419. where : array of timestamps
  420. mask : array of booleans where data is not NA
  421. """
  422. where_idx = where
  423. if isinstance(where_idx, DatetimeIndex):
  424. where_idx = PeriodIndex(where_idx.values, freq=self.freq)
  425. locs = self._ndarray_values[mask].searchsorted(
  426. where_idx._ndarray_values, side='right')
  427. locs = np.where(locs > 0, locs - 1, 0)
  428. result = np.arange(len(self))[mask].take(locs)
  429. first = mask.argmax()
  430. result[(locs == 0) & (where_idx._ndarray_values <
  431. self._ndarray_values[first])] = -1
  432. return result
  433. @Appender(_index_shared_docs['astype'])
  434. def astype(self, dtype, copy=True, how='start'):
  435. dtype = pandas_dtype(dtype)
  436. if is_datetime64_any_dtype(dtype):
  437. # 'how' is index-specific, isn't part of the EA interface.
  438. tz = getattr(dtype, 'tz', None)
  439. return self.to_timestamp(how=how).tz_localize(tz)
  440. # TODO: should probably raise on `how` here, so we don't ignore it.
  441. return super(PeriodIndex, self).astype(dtype, copy=copy)
  442. @Substitution(klass='PeriodIndex')
  443. @Appender(_shared_docs['searchsorted'])
  444. def searchsorted(self, value, side='left', sorter=None):
  445. if isinstance(value, Period):
  446. if value.freq != self.freq:
  447. msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
  448. own_freq=self.freqstr,
  449. other_freq=value.freqstr)
  450. raise IncompatibleFrequency(msg)
  451. value = value.ordinal
  452. elif isinstance(value, compat.string_types):
  453. try:
  454. value = Period(value, freq=self.freq).ordinal
  455. except DateParseError:
  456. raise KeyError("Cannot interpret '{}' as period".format(value))
  457. return self._ndarray_values.searchsorted(value, side=side,
  458. sorter=sorter)
  459. @property
  460. def is_all_dates(self):
  461. return True
  462. @property
  463. def is_full(self):
  464. """
  465. Returns True if this PeriodIndex is range-like in that all Periods
  466. between start and end are present, in order.
  467. """
  468. if len(self) == 0:
  469. return True
  470. if not self.is_monotonic:
  471. raise ValueError('Index is not monotonic')
  472. values = self.asi8
  473. return ((values[1:] - values[:-1]) < 2).all()
  474. @property
  475. def inferred_type(self):
  476. # b/c data is represented as ints make sure we can't have ambiguous
  477. # indexing
  478. return 'period'
  479. def get_value(self, series, key):
  480. """
  481. Fast lookup of value from 1-dimensional ndarray. Only use this if you
  482. know what you're doing
  483. """
  484. s = com.values_from_object(series)
  485. try:
  486. return com.maybe_box(self,
  487. super(PeriodIndex, self).get_value(s, key),
  488. series, key)
  489. except (KeyError, IndexError):
  490. try:
  491. asdt, parsed, reso = parse_time_string(key, self.freq)
  492. grp = resolution.Resolution.get_freq_group(reso)
  493. freqn = resolution.get_freq_group(self.freq)
  494. vals = self._ndarray_values
  495. # if our data is higher resolution than requested key, slice
  496. if grp < freqn:
  497. iv = Period(asdt, freq=(grp, 1))
  498. ord1 = iv.asfreq(self.freq, how='S').ordinal
  499. ord2 = iv.asfreq(self.freq, how='E').ordinal
  500. if ord2 < vals[0] or ord1 > vals[-1]:
  501. raise KeyError(key)
  502. pos = np.searchsorted(self._ndarray_values, [ord1, ord2])
  503. key = slice(pos[0], pos[1] + 1)
  504. return series[key]
  505. elif grp == freqn:
  506. key = Period(asdt, freq=self.freq).ordinal
  507. return com.maybe_box(self, self._engine.get_value(s, key),
  508. series, key)
  509. else:
  510. raise KeyError(key)
  511. except TypeError:
  512. pass
  513. period = Period(key, self.freq)
  514. key = period.value if isna(period) else period.ordinal
  515. return com.maybe_box(self, self._engine.get_value(s, key),
  516. series, key)
  517. @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
  518. def get_indexer(self, target, method=None, limit=None, tolerance=None):
  519. target = ensure_index(target)
  520. if hasattr(target, 'freq') and target.freq != self.freq:
  521. msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
  522. own_freq=self.freqstr,
  523. other_freq=target.freqstr)
  524. raise IncompatibleFrequency(msg)
  525. if isinstance(target, PeriodIndex):
  526. target = target.asi8
  527. if tolerance is not None:
  528. tolerance = self._convert_tolerance(tolerance, target)
  529. return Index.get_indexer(self._int64index, target, method,
  530. limit, tolerance)
  531. def _get_unique_index(self, dropna=False):
  532. """
  533. wrap Index._get_unique_index to handle NaT
  534. """
  535. res = super(PeriodIndex, self)._get_unique_index(dropna=dropna)
  536. if dropna:
  537. res = res.dropna()
  538. return res
  539. @Appender(Index.unique.__doc__)
  540. def unique(self, level=None):
  541. # override the Index.unique method for performance GH#23083
  542. if level is not None:
  543. # this should never occur, but is retained to make the signature
  544. # match Index.unique
  545. self._validate_index_level(level)
  546. values = self._ndarray_values
  547. result = unique1d(values)
  548. return self._shallow_copy(result)
  549. def get_loc(self, key, method=None, tolerance=None):
  550. """
  551. Get integer location for requested label
  552. Returns
  553. -------
  554. loc : int
  555. """
  556. try:
  557. return self._engine.get_loc(key)
  558. except KeyError:
  559. if is_integer(key):
  560. raise
  561. try:
  562. asdt, parsed, reso = parse_time_string(key, self.freq)
  563. key = asdt
  564. except TypeError:
  565. pass
  566. except DateParseError:
  567. # A string with invalid format
  568. raise KeyError("Cannot interpret '{}' as period".format(key))
  569. try:
  570. key = Period(key, freq=self.freq)
  571. except ValueError:
  572. # we cannot construct the Period
  573. # as we have an invalid type
  574. raise KeyError(key)
  575. try:
  576. ordinal = iNaT if key is NaT else key.ordinal
  577. if tolerance is not None:
  578. tolerance = self._convert_tolerance(tolerance,
  579. np.asarray(key))
  580. return self._int64index.get_loc(ordinal, method, tolerance)
  581. except KeyError:
  582. raise KeyError(key)
  583. def _maybe_cast_slice_bound(self, label, side, kind):
  584. """
  585. If label is a string or a datetime, cast it to Period.ordinal according
  586. to resolution.
  587. Parameters
  588. ----------
  589. label : object
  590. side : {'left', 'right'}
  591. kind : {'ix', 'loc', 'getitem'}
  592. Returns
  593. -------
  594. bound : Period or object
  595. Notes
  596. -----
  597. Value of `side` parameter should be validated in caller.
  598. """
  599. assert kind in ['ix', 'loc', 'getitem']
  600. if isinstance(label, datetime):
  601. return Period(label, freq=self.freq)
  602. elif isinstance(label, compat.string_types):
  603. try:
  604. _, parsed, reso = parse_time_string(label, self.freq)
  605. bounds = self._parsed_string_to_bounds(reso, parsed)
  606. return bounds[0 if side == 'left' else 1]
  607. except Exception:
  608. raise KeyError(label)
  609. elif is_integer(label) or is_float(label):
  610. self._invalid_indexer('slice', label)
  611. return label
  612. def _parsed_string_to_bounds(self, reso, parsed):
  613. if reso == 'year':
  614. t1 = Period(year=parsed.year, freq='A')
  615. elif reso == 'month':
  616. t1 = Period(year=parsed.year, month=parsed.month, freq='M')
  617. elif reso == 'quarter':
  618. q = (parsed.month - 1) // 3 + 1
  619. t1 = Period(year=parsed.year, quarter=q, freq='Q-DEC')
  620. elif reso == 'day':
  621. t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
  622. freq='D')
  623. elif reso == 'hour':
  624. t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
  625. hour=parsed.hour, freq='H')
  626. elif reso == 'minute':
  627. t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
  628. hour=parsed.hour, minute=parsed.minute, freq='T')
  629. elif reso == 'second':
  630. t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
  631. hour=parsed.hour, minute=parsed.minute,
  632. second=parsed.second, freq='S')
  633. else:
  634. raise KeyError(reso)
  635. return (t1.asfreq(self.freq, how='start'),
  636. t1.asfreq(self.freq, how='end'))
  637. def _get_string_slice(self, key):
  638. if not self.is_monotonic:
  639. raise ValueError('Partial indexing only valid for '
  640. 'ordered time series')
  641. key, parsed, reso = parse_time_string(key, self.freq)
  642. grp = resolution.Resolution.get_freq_group(reso)
  643. freqn = resolution.get_freq_group(self.freq)
  644. if reso in ['day', 'hour', 'minute', 'second'] and not grp < freqn:
  645. raise KeyError(key)
  646. t1, t2 = self._parsed_string_to_bounds(reso, parsed)
  647. return slice(self.searchsorted(t1.ordinal, side='left'),
  648. self.searchsorted(t2.ordinal, side='right'))
  649. def _convert_tolerance(self, tolerance, target):
  650. tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance,
  651. target)
  652. if target.size != tolerance.size and tolerance.size > 1:
  653. raise ValueError('list-like tolerance size must match '
  654. 'target index size')
  655. return self._maybe_convert_timedelta(tolerance)
  656. def insert(self, loc, item):
  657. if not isinstance(item, Period) or self.freq != item.freq:
  658. return self.astype(object).insert(loc, item)
  659. idx = np.concatenate((self[:loc].asi8, np.array([item.ordinal]),
  660. self[loc:].asi8))
  661. return self._shallow_copy(idx)
  662. def join(self, other, how='left', level=None, return_indexers=False,
  663. sort=False):
  664. """
  665. See Index.join
  666. """
  667. self._assert_can_do_setop(other)
  668. result = Int64Index.join(self, other, how=how, level=level,
  669. return_indexers=return_indexers,
  670. sort=sort)
  671. if return_indexers:
  672. result, lidx, ridx = result
  673. return self._apply_meta(result), lidx, ridx
  674. return self._apply_meta(result)
  675. def _assert_can_do_setop(self, other):
  676. super(PeriodIndex, self)._assert_can_do_setop(other)
  677. if not isinstance(other, PeriodIndex):
  678. raise ValueError('can only call with other PeriodIndex-ed objects')
  679. if self.freq != other.freq:
  680. msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
  681. own_freq=self.freqstr,
  682. other_freq=other.freqstr)
  683. raise IncompatibleFrequency(msg)
  684. def _wrap_setop_result(self, other, result):
  685. name = get_op_result_name(self, other)
  686. result = self._apply_meta(result)
  687. result.name = name
  688. return result
  689. def _apply_meta(self, rawarr):
  690. if not isinstance(rawarr, PeriodIndex):
  691. rawarr = PeriodIndex._simple_new(rawarr, freq=self.freq,
  692. name=self.name)
  693. return rawarr
  694. def __setstate__(self, state):
  695. """Necessary for making this object picklable"""
  696. if isinstance(state, dict):
  697. super(PeriodIndex, self).__setstate__(state)
  698. elif isinstance(state, tuple):
  699. # < 0.15 compat
  700. if len(state) == 2:
  701. nd_state, own_state = state
  702. data = np.empty(nd_state[1], dtype=nd_state[2])
  703. np.ndarray.__setstate__(data, nd_state)
  704. # backcompat
  705. freq = Period._maybe_convert_freq(own_state[1])
  706. else: # pragma: no cover
  707. data = np.empty(state)
  708. np.ndarray.__setstate__(self, state)
  709. freq = None # ?
  710. data = PeriodArray(data, freq=freq)
  711. self._data = data
  712. else:
  713. raise Exception("invalid pickle state")
  714. _unpickle_compat = __setstate__
  715. @property
  716. def flags(self):
  717. """ return the ndarray.flags for the underlying data """
  718. warnings.warn("{obj}.flags is deprecated and will be removed "
  719. "in a future version".format(obj=type(self).__name__),
  720. FutureWarning, stacklevel=2)
  721. return self._ndarray_values.flags
  722. def item(self):
  723. """
  724. return the first element of the underlying data as a python
  725. scalar
  726. """
  727. # TODO(DatetimeArray): remove
  728. if len(self) == 1:
  729. return self[0]
  730. else:
  731. # copy numpy's message here because Py26 raises an IndexError
  732. raise ValueError('can only convert an array of size 1 to a '
  733. 'Python scalar')
  734. @property
  735. def data(self):
  736. """ return the data pointer of the underlying data """
  737. warnings.warn("{obj}.data is deprecated and will be removed "
  738. "in a future version".format(obj=type(self).__name__),
  739. FutureWarning, stacklevel=2)
  740. return np.asarray(self._data).data
  741. @property
  742. def base(self):
  743. """ return the base object if the memory of the underlying data is
  744. shared
  745. """
  746. warnings.warn("{obj}.base is deprecated and will be removed "
  747. "in a future version".format(obj=type(self).__name__),
  748. FutureWarning, stacklevel=2)
  749. return np.asarray(self._data)
  750. PeriodIndex._add_comparison_ops()
  751. PeriodIndex._add_numeric_methods_disabled()
  752. PeriodIndex._add_logical_methods_disabled()
  753. PeriodIndex._add_datetimelike_methods()
  754. def period_range(start=None, end=None, periods=None, freq=None, name=None):
  755. """
  756. Return a fixed frequency PeriodIndex, with day (calendar) as the default
  757. frequency
  758. Parameters
  759. ----------
  760. start : string or period-like, default None
  761. Left bound for generating periods
  762. end : string or period-like, default None
  763. Right bound for generating periods
  764. periods : integer, default None
  765. Number of periods to generate
  766. freq : string or DateOffset, optional
  767. Frequency alias. By default the freq is taken from `start` or `end`
  768. if those are Period objects. Otherwise, the default is ``"D"`` for
  769. daily frequency.
  770. name : string, default None
  771. Name of the resulting PeriodIndex
  772. Returns
  773. -------
  774. prng : PeriodIndex
  775. Notes
  776. -----
  777. Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
  778. must be specified.
  779. To learn more about the frequency strings, please see `this link
  780. <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
  781. Examples
  782. --------
  783. >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
  784. PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05',
  785. '2017-06', '2017-06', '2017-07', '2017-08', '2017-09',
  786. '2017-10', '2017-11', '2017-12', '2018-01'],
  787. dtype='period[M]', freq='M')
  788. If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
  789. endpoints for a ``PeriodIndex`` with frequency matching that of the
  790. ``period_range`` constructor.
  791. >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
  792. ... end=pd.Period('2017Q2', freq='Q'), freq='M')
  793. PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
  794. dtype='period[M]', freq='M')
  795. """
  796. if com.count_not_none(start, end, periods) != 2:
  797. raise ValueError('Of the three parameters: start, end, and periods, '
  798. 'exactly two must be specified')
  799. if freq is None and (not isinstance(start, Period)
  800. and not isinstance(end, Period)):
  801. freq = 'D'
  802. data, freq = PeriodArray._generate_range(start, end, periods, freq,
  803. fields={})
  804. data = PeriodArray(data, freq=freq)
  805. return PeriodIndex(data, name=name)