interval.py 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315
  1. """ define the IntervalIndex """
  2. import textwrap
  3. import warnings
  4. import numpy as np
  5. from pandas._libs import Timedelta, Timestamp
  6. from pandas._libs.interval import Interval, IntervalMixin, IntervalTree
  7. from pandas.compat import add_metaclass
  8. from pandas.util._decorators import Appender, cache_readonly
  9. from pandas.util._doctools import _WritableDoc
  10. from pandas.util._exceptions import rewrite_exception
  11. from pandas.core.dtypes.cast import (
  12. find_common_type, infer_dtype_from_scalar, maybe_downcast_to_dtype)
  13. from pandas.core.dtypes.common import (
  14. ensure_platform_int, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
  15. is_dtype_equal, is_float, is_float_dtype, is_integer, is_integer_dtype,
  16. is_interval_dtype, is_list_like, is_number, is_object_dtype, is_scalar)
  17. from pandas.core.dtypes.missing import isna
  18. from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs
  19. import pandas.core.common as com
  20. from pandas.core.config import get_option
  21. import pandas.core.indexes.base as ibase
  22. from pandas.core.indexes.base import (
  23. Index, _index_shared_docs, default_pprint, ensure_index)
  24. from pandas.core.indexes.datetimes import DatetimeIndex, date_range
  25. from pandas.core.indexes.multi import MultiIndex
  26. from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
  27. from pandas.core.ops import get_op_result_name
  28. from pandas.tseries.frequencies import to_offset
  29. from pandas.tseries.offsets import DateOffset
  30. _VALID_CLOSED = {'left', 'right', 'both', 'neither'}
  31. _index_doc_kwargs = dict(ibase._index_doc_kwargs)
  32. _index_doc_kwargs.update(
  33. dict(klass='IntervalIndex',
  34. qualname="IntervalIndex",
  35. target_klass='IntervalIndex or list of Intervals',
  36. name=textwrap.dedent("""\
  37. name : object, optional
  38. Name to be stored in the index.
  39. """),
  40. ))
  41. def _get_next_label(label):
  42. dtype = getattr(label, 'dtype', type(label))
  43. if isinstance(label, (Timestamp, Timedelta)):
  44. dtype = 'datetime64'
  45. if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
  46. return label + np.timedelta64(1, 'ns')
  47. elif is_integer_dtype(dtype):
  48. return label + 1
  49. elif is_float_dtype(dtype):
  50. return np.nextafter(label, np.infty)
  51. else:
  52. raise TypeError('cannot determine next label for type {typ!r}'
  53. .format(typ=type(label)))
  54. def _get_prev_label(label):
  55. dtype = getattr(label, 'dtype', type(label))
  56. if isinstance(label, (Timestamp, Timedelta)):
  57. dtype = 'datetime64'
  58. if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
  59. return label - np.timedelta64(1, 'ns')
  60. elif is_integer_dtype(dtype):
  61. return label - 1
  62. elif is_float_dtype(dtype):
  63. return np.nextafter(label, -np.infty)
  64. else:
  65. raise TypeError('cannot determine next label for type {typ!r}'
  66. .format(typ=type(label)))
  67. def _get_interval_closed_bounds(interval):
  68. """
  69. Given an Interval or IntervalIndex, return the corresponding interval with
  70. closed bounds.
  71. """
  72. left, right = interval.left, interval.right
  73. if interval.open_left:
  74. left = _get_next_label(left)
  75. if interval.open_right:
  76. right = _get_prev_label(right)
  77. return left, right
  78. def _new_IntervalIndex(cls, d):
  79. """
  80. This is called upon unpickling, rather than the default which doesn't have
  81. arguments and breaks __new__
  82. """
  83. return cls.from_arrays(**d)
  84. @Appender(_interval_shared_docs['class'] % dict(
  85. klass="IntervalIndex",
  86. summary="Immutable index of intervals that are closed on the same side.",
  87. name=_index_doc_kwargs['name'],
  88. versionadded="0.20.0",
  89. extra_attributes="is_overlapping\nvalues\n",
  90. extra_methods="contains\n",
  91. examples=textwrap.dedent("""\
  92. Examples
  93. --------
  94. A new ``IntervalIndex`` is typically constructed using
  95. :func:`interval_range`:
  96. >>> pd.interval_range(start=0, end=5)
  97. IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
  98. closed='right',
  99. dtype='interval[int64]')
  100. It may also be constructed using one of the constructor
  101. methods: :meth:`IntervalIndex.from_arrays`,
  102. :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`.
  103. See further examples in the doc strings of ``interval_range`` and the
  104. mentioned constructor methods.
  105. """),
  106. ))
  107. @add_metaclass(_WritableDoc)
  108. class IntervalIndex(IntervalMixin, Index):
  109. _typ = 'intervalindex'
  110. _comparables = ['name']
  111. _attributes = ['name', 'closed']
  112. # we would like our indexing holder to defer to us
  113. _defer_to_indexing = True
  114. # Immutable, so we are able to cache computations like isna in '_mask'
  115. _mask = None
  116. # --------------------------------------------------------------------
  117. # Constructors
  118. def __new__(cls, data, closed=None, dtype=None, copy=False,
  119. name=None, verify_integrity=True):
  120. if name is None and hasattr(data, 'name'):
  121. name = data.name
  122. with rewrite_exception("IntervalArray", cls.__name__):
  123. array = IntervalArray(data, closed=closed, copy=copy, dtype=dtype,
  124. verify_integrity=verify_integrity)
  125. return cls._simple_new(array, name)
  126. @classmethod
  127. def _simple_new(cls, array, name, closed=None):
  128. """
  129. Construct from an IntervalArray
  130. Parameters
  131. ----------
  132. array : IntervalArray
  133. name : str
  134. Attached as result.name
  135. closed : Any
  136. Ignored.
  137. """
  138. result = IntervalMixin.__new__(cls)
  139. result._data = array
  140. result.name = name
  141. result._reset_identity()
  142. return result
  143. @classmethod
  144. @Appender(_interval_shared_docs['from_breaks'] % _index_doc_kwargs)
  145. def from_breaks(cls, breaks, closed='right', name=None, copy=False,
  146. dtype=None):
  147. with rewrite_exception("IntervalArray", cls.__name__):
  148. array = IntervalArray.from_breaks(breaks, closed=closed, copy=copy,
  149. dtype=dtype)
  150. return cls._simple_new(array, name=name)
  151. @classmethod
  152. @Appender(_interval_shared_docs['from_arrays'] % _index_doc_kwargs)
  153. def from_arrays(cls, left, right, closed='right', name=None, copy=False,
  154. dtype=None):
  155. with rewrite_exception("IntervalArray", cls.__name__):
  156. array = IntervalArray.from_arrays(left, right, closed, copy=copy,
  157. dtype=dtype)
  158. return cls._simple_new(array, name=name)
  159. @classmethod
  160. @Appender(_interval_shared_docs['from_intervals'] % _index_doc_kwargs)
  161. def from_intervals(cls, data, closed=None, name=None, copy=False,
  162. dtype=None):
  163. msg = ('IntervalIndex.from_intervals is deprecated and will be '
  164. 'removed in a future version; Use IntervalIndex(...) instead')
  165. warnings.warn(msg, FutureWarning, stacklevel=2)
  166. with rewrite_exception("IntervalArray", cls.__name__):
  167. array = IntervalArray(data, closed=closed, copy=copy, dtype=dtype)
  168. if name is None and isinstance(data, cls):
  169. name = data.name
  170. return cls._simple_new(array, name=name)
  171. @classmethod
  172. @Appender(_interval_shared_docs['from_tuples'] % _index_doc_kwargs)
  173. def from_tuples(cls, data, closed='right', name=None, copy=False,
  174. dtype=None):
  175. with rewrite_exception("IntervalArray", cls.__name__):
  176. arr = IntervalArray.from_tuples(data, closed=closed, copy=copy,
  177. dtype=dtype)
  178. return cls._simple_new(arr, name=name)
  179. # --------------------------------------------------------------------
  180. @Appender(_index_shared_docs['_shallow_copy'])
  181. def _shallow_copy(self, left=None, right=None, **kwargs):
  182. result = self._data._shallow_copy(left=left, right=right)
  183. attributes = self._get_attributes_dict()
  184. attributes.update(kwargs)
  185. return self._simple_new(result, **attributes)
  186. @cache_readonly
  187. def _isnan(self):
  188. """Return a mask indicating if each value is NA"""
  189. if self._mask is None:
  190. self._mask = isna(self.left)
  191. return self._mask
  192. @cache_readonly
  193. def _engine(self):
  194. left = self._maybe_convert_i8(self.left)
  195. right = self._maybe_convert_i8(self.right)
  196. return IntervalTree(left, right, closed=self.closed)
  197. def __contains__(self, key):
  198. """
  199. return a boolean if this key is IN the index
  200. We *only* accept an Interval
  201. Parameters
  202. ----------
  203. key : Interval
  204. Returns
  205. -------
  206. boolean
  207. """
  208. if not isinstance(key, Interval):
  209. return False
  210. try:
  211. self.get_loc(key)
  212. return True
  213. except KeyError:
  214. return False
  215. def contains(self, key):
  216. """
  217. Return a boolean indicating if the key is IN the index
  218. We accept / allow keys to be not *just* actual
  219. objects.
  220. Parameters
  221. ----------
  222. key : int, float, Interval
  223. Returns
  224. -------
  225. boolean
  226. """
  227. try:
  228. self.get_loc(key)
  229. return True
  230. except KeyError:
  231. return False
  232. @Appender(_interval_shared_docs['to_tuples'] % dict(
  233. return_type="Index",
  234. examples="""
  235. Examples
  236. --------
  237. >>> idx = pd.IntervalIndex.from_arrays([0, np.nan, 2], [1, np.nan, 3])
  238. >>> idx.to_tuples()
  239. Index([(0.0, 1.0), (nan, nan), (2.0, 3.0)], dtype='object')
  240. >>> idx.to_tuples(na_tuple=False)
  241. Index([(0.0, 1.0), nan, (2.0, 3.0)], dtype='object')""",
  242. ))
  243. def to_tuples(self, na_tuple=True):
  244. tuples = self._data.to_tuples(na_tuple=na_tuple)
  245. return Index(tuples)
  246. @cache_readonly
  247. def _multiindex(self):
  248. return MultiIndex.from_arrays([self.left, self.right],
  249. names=['left', 'right'])
  250. @property
  251. def left(self):
  252. """
  253. Return the left endpoints of each Interval in the IntervalIndex as
  254. an Index
  255. """
  256. return self._data._left
  257. @property
  258. def right(self):
  259. """
  260. Return the right endpoints of each Interval in the IntervalIndex as
  261. an Index
  262. """
  263. return self._data._right
  264. @property
  265. def closed(self):
  266. """
  267. Whether the intervals are closed on the left-side, right-side, both or
  268. neither
  269. """
  270. return self._data._closed
  271. @Appender(_interval_shared_docs['set_closed'] % _index_doc_kwargs)
  272. def set_closed(self, closed):
  273. if closed not in _VALID_CLOSED:
  274. msg = "invalid option for 'closed': {closed}"
  275. raise ValueError(msg.format(closed=closed))
  276. # return self._shallow_copy(closed=closed)
  277. array = self._data.set_closed(closed)
  278. return self._simple_new(array, self.name)
  279. @property
  280. def length(self):
  281. """
  282. Return an Index with entries denoting the length of each Interval in
  283. the IntervalIndex
  284. """
  285. return self._data.length
  286. @property
  287. def size(self):
  288. # Avoid materializing ndarray[Interval]
  289. return self._data.size
  290. @property
  291. def shape(self):
  292. # Avoid materializing ndarray[Interval]
  293. return self._data.shape
  294. @property
  295. def itemsize(self):
  296. msg = ('IntervalIndex.itemsize is deprecated and will be removed in '
  297. 'a future version')
  298. warnings.warn(msg, FutureWarning, stacklevel=2)
  299. # supress the warning from the underlying left/right itemsize
  300. with warnings.catch_warnings():
  301. warnings.simplefilter('ignore')
  302. return self.left.itemsize + self.right.itemsize
  303. def __len__(self):
  304. return len(self.left)
  305. @cache_readonly
  306. def values(self):
  307. """
  308. Return the IntervalIndex's data as an IntervalArray.
  309. """
  310. return self._data
  311. @cache_readonly
  312. def _values(self):
  313. return self._data
  314. @cache_readonly
  315. def _ndarray_values(self):
  316. return np.array(self._data)
  317. def __array__(self, result=None):
  318. """ the array interface, return my values """
  319. return self._ndarray_values
  320. def __array_wrap__(self, result, context=None):
  321. # we don't want the superclass implementation
  322. return result
  323. def __reduce__(self):
  324. d = dict(left=self.left,
  325. right=self.right)
  326. d.update(self._get_attributes_dict())
  327. return _new_IntervalIndex, (self.__class__, d), None
  328. @Appender(_index_shared_docs['copy'])
  329. def copy(self, deep=False, name=None):
  330. array = self._data.copy(deep=deep)
  331. attributes = self._get_attributes_dict()
  332. if name is not None:
  333. attributes.update(name=name)
  334. return self._simple_new(array, **attributes)
  335. @Appender(_index_shared_docs['astype'])
  336. def astype(self, dtype, copy=True):
  337. with rewrite_exception('IntervalArray', self.__class__.__name__):
  338. new_values = self.values.astype(dtype, copy=copy)
  339. if is_interval_dtype(new_values):
  340. return self._shallow_copy(new_values.left, new_values.right)
  341. return super(IntervalIndex, self).astype(dtype, copy=copy)
  342. @cache_readonly
  343. def dtype(self):
  344. """Return the dtype object of the underlying data"""
  345. return self._data.dtype
  346. @property
  347. def inferred_type(self):
  348. """Return a string of the type inferred from the values"""
  349. return 'interval'
  350. @Appender(Index.memory_usage.__doc__)
  351. def memory_usage(self, deep=False):
  352. # we don't use an explicit engine
  353. # so return the bytes here
  354. return (self.left.memory_usage(deep=deep) +
  355. self.right.memory_usage(deep=deep))
  356. @cache_readonly
  357. def mid(self):
  358. """
  359. Return the midpoint of each Interval in the IntervalIndex as an Index
  360. """
  361. return self._data.mid
  362. @cache_readonly
  363. def is_monotonic(self):
  364. """
  365. Return True if the IntervalIndex is monotonic increasing (only equal or
  366. increasing values), else False
  367. """
  368. return self._multiindex.is_monotonic
  369. @cache_readonly
  370. def is_monotonic_increasing(self):
  371. """
  372. Return True if the IntervalIndex is monotonic increasing (only equal or
  373. increasing values), else False
  374. """
  375. return self._multiindex.is_monotonic_increasing
  376. @cache_readonly
  377. def is_monotonic_decreasing(self):
  378. """
  379. Return True if the IntervalIndex is monotonic decreasing (only equal or
  380. decreasing values), else False
  381. """
  382. return self._multiindex.is_monotonic_decreasing
  383. @cache_readonly
  384. def is_unique(self):
  385. """
  386. Return True if the IntervalIndex contains unique elements, else False
  387. """
  388. return self._multiindex.is_unique
  389. @cache_readonly
  390. @Appender(_interval_shared_docs['is_non_overlapping_monotonic']
  391. % _index_doc_kwargs)
  392. def is_non_overlapping_monotonic(self):
  393. return self._data.is_non_overlapping_monotonic
  394. @property
  395. def is_overlapping(self):
  396. """
  397. Return True if the IntervalIndex has overlapping intervals, else False.
  398. Two intervals overlap if they share a common point, including closed
  399. endpoints. Intervals that only have an open endpoint in common do not
  400. overlap.
  401. .. versionadded:: 0.24.0
  402. Returns
  403. -------
  404. bool
  405. Boolean indicating if the IntervalIndex has overlapping intervals.
  406. See Also
  407. --------
  408. Interval.overlaps : Check whether two Interval objects overlap.
  409. IntervalIndex.overlaps : Check an IntervalIndex elementwise for
  410. overlaps.
  411. Examples
  412. --------
  413. >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])
  414. >>> index
  415. IntervalIndex([(0, 2], (1, 3], (4, 5]],
  416. closed='right',
  417. dtype='interval[int64]')
  418. >>> index.is_overlapping
  419. True
  420. Intervals that share closed endpoints overlap:
  421. >>> index = pd.interval_range(0, 3, closed='both')
  422. >>> index
  423. IntervalIndex([[0, 1], [1, 2], [2, 3]],
  424. closed='both',
  425. dtype='interval[int64]')
  426. >>> index.is_overlapping
  427. True
  428. Intervals that only have an open endpoint in common do not overlap:
  429. >>> index = pd.interval_range(0, 3, closed='left')
  430. >>> index
  431. IntervalIndex([[0, 1), [1, 2), [2, 3)],
  432. closed='left',
  433. dtype='interval[int64]')
  434. >>> index.is_overlapping
  435. False
  436. """
  437. # GH 23309
  438. return self._engine.is_overlapping
  439. @Appender(_index_shared_docs['_convert_scalar_indexer'])
  440. def _convert_scalar_indexer(self, key, kind=None):
  441. if kind == 'iloc':
  442. return super(IntervalIndex, self)._convert_scalar_indexer(
  443. key, kind=kind)
  444. return key
  445. def _maybe_cast_slice_bound(self, label, side, kind):
  446. return getattr(self, side)._maybe_cast_slice_bound(label, side, kind)
  447. @Appender(_index_shared_docs['_convert_list_indexer'])
  448. def _convert_list_indexer(self, keyarr, kind=None):
  449. """
  450. we are passed a list-like indexer. Return the
  451. indexer for matching intervals.
  452. """
  453. locs = self.get_indexer_for(keyarr)
  454. # we have missing values
  455. if (locs == -1).any():
  456. raise KeyError
  457. return locs
  458. def _maybe_cast_indexed(self, key):
  459. """
  460. we need to cast the key, which could be a scalar
  461. or an array-like to the type of our subtype
  462. """
  463. if isinstance(key, IntervalIndex):
  464. return key
  465. subtype = self.dtype.subtype
  466. if is_float_dtype(subtype):
  467. if is_integer(key):
  468. key = float(key)
  469. elif isinstance(key, (np.ndarray, Index)):
  470. key = key.astype('float64')
  471. elif is_integer_dtype(subtype):
  472. if is_integer(key):
  473. key = int(key)
  474. return key
  475. def _needs_i8_conversion(self, key):
  476. """
  477. Check if a given key needs i8 conversion. Conversion is necessary for
  478. Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An
  479. Interval-like requires conversion if it's endpoints are one of the
  480. aforementioned types.
  481. Assumes that any list-like data has already been cast to an Index.
  482. Parameters
  483. ----------
  484. key : scalar or Index-like
  485. The key that should be checked for i8 conversion
  486. Returns
  487. -------
  488. boolean
  489. """
  490. if is_interval_dtype(key) or isinstance(key, Interval):
  491. return self._needs_i8_conversion(key.left)
  492. i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex)
  493. return isinstance(key, i8_types)
  494. def _maybe_convert_i8(self, key):
  495. """
  496. Maybe convert a given key to it's equivalent i8 value(s). Used as a
  497. preprocessing step prior to IntervalTree queries (self._engine), which
  498. expects numeric data.
  499. Parameters
  500. ----------
  501. key : scalar or list-like
  502. The key that should maybe be converted to i8.
  503. Returns
  504. -------
  505. key: scalar or list-like
  506. The original key if no conversion occured, int if converted scalar,
  507. Int64Index if converted list-like.
  508. """
  509. original = key
  510. if is_list_like(key):
  511. key = ensure_index(key)
  512. if not self._needs_i8_conversion(key):
  513. return original
  514. scalar = is_scalar(key)
  515. if is_interval_dtype(key) or isinstance(key, Interval):
  516. # convert left/right and reconstruct
  517. left = self._maybe_convert_i8(key.left)
  518. right = self._maybe_convert_i8(key.right)
  519. constructor = Interval if scalar else IntervalIndex.from_arrays
  520. return constructor(left, right, closed=self.closed)
  521. if scalar:
  522. # Timestamp/Timedelta
  523. key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True)
  524. else:
  525. # DatetimeIndex/TimedeltaIndex
  526. key_dtype, key_i8 = key.dtype, Index(key.asi8)
  527. if key.hasnans:
  528. # convert NaT from it's i8 value to np.nan so it's not viewed
  529. # as a valid value, maybe causing errors (e.g. is_overlapping)
  530. key_i8 = key_i8.where(~key._isnan)
  531. # ensure consistency with IntervalIndex subtype
  532. subtype = self.dtype.subtype
  533. msg = ('Cannot index an IntervalIndex of subtype {subtype} with '
  534. 'values of dtype {other}')
  535. if not is_dtype_equal(subtype, key_dtype):
  536. raise ValueError(msg.format(subtype=subtype, other=key_dtype))
  537. return key_i8
  538. def _check_method(self, method):
  539. if method is None:
  540. return
  541. if method in ['bfill', 'backfill', 'pad', 'ffill', 'nearest']:
  542. msg = 'method {method} not yet implemented for IntervalIndex'
  543. raise NotImplementedError(msg.format(method=method))
  544. raise ValueError("Invalid fill method")
  545. def _searchsorted_monotonic(self, label, side, exclude_label=False):
  546. if not self.is_non_overlapping_monotonic:
  547. raise KeyError('can only get slices from an IntervalIndex if '
  548. 'bounds are non-overlapping and all monotonic '
  549. 'increasing or decreasing')
  550. if isinstance(label, IntervalMixin):
  551. raise NotImplementedError
  552. # GH 20921: "not is_monotonic_increasing" for the second condition
  553. # instead of "is_monotonic_decreasing" to account for single element
  554. # indexes being both increasing and decreasing
  555. if ((side == 'left' and self.left.is_monotonic_increasing) or
  556. (side == 'right' and not self.left.is_monotonic_increasing)):
  557. sub_idx = self.right
  558. if self.open_right or exclude_label:
  559. label = _get_next_label(label)
  560. else:
  561. sub_idx = self.left
  562. if self.open_left or exclude_label:
  563. label = _get_prev_label(label)
  564. return sub_idx._searchsorted_monotonic(label, side)
  565. def _get_loc_only_exact_matches(self, key):
  566. if isinstance(key, Interval):
  567. if not self.is_unique:
  568. raise ValueError("cannot index with a slice Interval"
  569. " and a non-unique index")
  570. # TODO: this expands to a tuple index, see if we can
  571. # do better
  572. return Index(self._multiindex.values).get_loc(key)
  573. raise KeyError
  574. def _find_non_overlapping_monotonic_bounds(self, key):
  575. if isinstance(key, IntervalMixin):
  576. start = self._searchsorted_monotonic(
  577. key.left, 'left', exclude_label=key.open_left)
  578. stop = self._searchsorted_monotonic(
  579. key.right, 'right', exclude_label=key.open_right)
  580. elif isinstance(key, slice):
  581. # slice
  582. start, stop = key.start, key.stop
  583. if (key.step or 1) != 1:
  584. raise NotImplementedError("cannot slice with a slice step")
  585. if start is None:
  586. start = 0
  587. else:
  588. start = self._searchsorted_monotonic(start, 'left')
  589. if stop is None:
  590. stop = len(self)
  591. else:
  592. stop = self._searchsorted_monotonic(stop, 'right')
  593. else:
  594. # scalar or index-like
  595. start = self._searchsorted_monotonic(key, 'left')
  596. stop = self._searchsorted_monotonic(key, 'right')
  597. return start, stop
  598. def get_loc(self, key, method=None):
  599. """Get integer location, slice or boolean mask for requested label.
  600. Parameters
  601. ----------
  602. key : label
  603. method : {None}, optional
  604. * default: matches where the label is within an interval only.
  605. Returns
  606. -------
  607. loc : int if unique index, slice if monotonic index, else mask
  608. Examples
  609. ---------
  610. >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)
  611. >>> index = pd.IntervalIndex([i1, i2])
  612. >>> index.get_loc(1)
  613. 0
  614. You can also supply an interval or an location for a point inside an
  615. interval.
  616. >>> index.get_loc(pd.Interval(0, 2))
  617. array([0, 1], dtype=int64)
  618. >>> index.get_loc(1.5)
  619. 1
  620. If a label is in several intervals, you get the locations of all the
  621. relevant intervals.
  622. >>> i3 = pd.Interval(0, 2)
  623. >>> overlapping_index = pd.IntervalIndex([i2, i3])
  624. >>> overlapping_index.get_loc(1.5)
  625. array([0, 1], dtype=int64)
  626. """
  627. self._check_method(method)
  628. original_key = key
  629. key = self._maybe_cast_indexed(key)
  630. if self.is_non_overlapping_monotonic:
  631. if isinstance(key, Interval):
  632. left = self._maybe_cast_slice_bound(key.left, 'left', None)
  633. right = self._maybe_cast_slice_bound(key.right, 'right', None)
  634. key = Interval(left, right, key.closed)
  635. else:
  636. key = self._maybe_cast_slice_bound(key, 'left', None)
  637. start, stop = self._find_non_overlapping_monotonic_bounds(key)
  638. if start is None or stop is None:
  639. return slice(start, stop)
  640. elif start + 1 == stop:
  641. return start
  642. elif start < stop:
  643. return slice(start, stop)
  644. else:
  645. raise KeyError(original_key)
  646. else:
  647. # use the interval tree
  648. key = self._maybe_convert_i8(key)
  649. if isinstance(key, Interval):
  650. left, right = _get_interval_closed_bounds(key)
  651. return self._engine.get_loc_interval(left, right)
  652. else:
  653. return self._engine.get_loc(key)
  654. def get_value(self, series, key):
  655. if com.is_bool_indexer(key):
  656. loc = key
  657. elif is_list_like(key):
  658. loc = self.get_indexer(key)
  659. elif isinstance(key, slice):
  660. if not (key.step is None or key.step == 1):
  661. raise ValueError("cannot support not-default step in a slice")
  662. try:
  663. loc = self.get_loc(key)
  664. except TypeError:
  665. # we didn't find exact intervals or are non-unique
  666. msg = "unable to slice with this key: {key}".format(key=key)
  667. raise ValueError(msg)
  668. else:
  669. loc = self.get_loc(key)
  670. return series.iloc[loc]
  671. @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
  672. def get_indexer(self, target, method=None, limit=None, tolerance=None):
  673. self._check_method(method)
  674. target = ensure_index(target)
  675. target = self._maybe_cast_indexed(target)
  676. if self.equals(target):
  677. return np.arange(len(self), dtype='intp')
  678. if self.is_non_overlapping_monotonic:
  679. start, stop = self._find_non_overlapping_monotonic_bounds(target)
  680. start_plus_one = start + 1
  681. if not ((start_plus_one < stop).any()):
  682. return np.where(start_plus_one == stop, start, -1)
  683. if not self.is_unique:
  684. raise ValueError("cannot handle non-unique indices")
  685. # IntervalIndex
  686. if isinstance(target, IntervalIndex):
  687. indexer = self._get_reindexer(target)
  688. # non IntervalIndex
  689. else:
  690. indexer = np.concatenate([self.get_loc(i) for i in target])
  691. return ensure_platform_int(indexer)
  692. def _get_reindexer(self, target):
  693. """
  694. Return an indexer for a target IntervalIndex with self
  695. """
  696. # find the left and right indexers
  697. left = self._maybe_convert_i8(target.left)
  698. right = self._maybe_convert_i8(target.right)
  699. lindexer = self._engine.get_indexer(left.values)
  700. rindexer = self._engine.get_indexer(right.values)
  701. # we want to return an indexer on the intervals
  702. # however, our keys could provide overlapping of multiple
  703. # intervals, so we iterate thru the indexers and construct
  704. # a set of indexers
  705. indexer = []
  706. n = len(self)
  707. for i, (lhs, rhs) in enumerate(zip(lindexer, rindexer)):
  708. target_value = target[i]
  709. # matching on the lhs bound
  710. if (lhs != -1 and
  711. self.closed == 'right' and
  712. target_value.left == self[lhs].right):
  713. lhs += 1
  714. # matching on the lhs bound
  715. if (rhs != -1 and
  716. self.closed == 'left' and
  717. target_value.right == self[rhs].left):
  718. rhs -= 1
  719. # not found
  720. if lhs == -1 and rhs == -1:
  721. indexer.append(np.array([-1]))
  722. elif rhs == -1:
  723. indexer.append(np.arange(lhs, n))
  724. elif lhs == -1:
  725. # care about left/right closed here
  726. value = self[i]
  727. # target.closed same as self.closed
  728. if self.closed == target.closed:
  729. if target_value.left < value.left:
  730. indexer.append(np.array([-1]))
  731. continue
  732. # target.closed == 'left'
  733. elif self.closed == 'right':
  734. if target_value.left <= value.left:
  735. indexer.append(np.array([-1]))
  736. continue
  737. # target.closed == 'right'
  738. elif self.closed == 'left':
  739. if target_value.left <= value.left:
  740. indexer.append(np.array([-1]))
  741. continue
  742. indexer.append(np.arange(0, rhs + 1))
  743. else:
  744. indexer.append(np.arange(lhs, rhs + 1))
  745. return np.concatenate(indexer)
  746. @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
  747. def get_indexer_non_unique(self, target):
  748. target = self._maybe_cast_indexed(ensure_index(target))
  749. return super(IntervalIndex, self).get_indexer_non_unique(target)
  750. @Appender(_index_shared_docs['where'])
  751. def where(self, cond, other=None):
  752. if other is None:
  753. other = self._na_value
  754. values = np.where(cond, self.values, other)
  755. return self._shallow_copy(values)
  756. def delete(self, loc):
  757. """
  758. Return a new IntervalIndex with passed location(-s) deleted
  759. Returns
  760. -------
  761. new_index : IntervalIndex
  762. """
  763. new_left = self.left.delete(loc)
  764. new_right = self.right.delete(loc)
  765. return self._shallow_copy(new_left, new_right)
  766. def insert(self, loc, item):
  767. """
  768. Return a new IntervalIndex inserting new item at location. Follows
  769. Python list.append semantics for negative values. Only Interval
  770. objects and NA can be inserted into an IntervalIndex
  771. Parameters
  772. ----------
  773. loc : int
  774. item : object
  775. Returns
  776. -------
  777. new_index : IntervalIndex
  778. """
  779. if isinstance(item, Interval):
  780. if item.closed != self.closed:
  781. raise ValueError('inserted item must be closed on the same '
  782. 'side as the index')
  783. left_insert = item.left
  784. right_insert = item.right
  785. elif is_scalar(item) and isna(item):
  786. # GH 18295
  787. left_insert = right_insert = item
  788. else:
  789. raise ValueError('can only insert Interval objects and NA into '
  790. 'an IntervalIndex')
  791. new_left = self.left.insert(loc, left_insert)
  792. new_right = self.right.insert(loc, right_insert)
  793. return self._shallow_copy(new_left, new_right)
  794. def _as_like_interval_index(self, other):
  795. self._assert_can_do_setop(other)
  796. other = ensure_index(other)
  797. if not isinstance(other, IntervalIndex):
  798. msg = ('the other index needs to be an IntervalIndex too, but '
  799. 'was type {}').format(other.__class__.__name__)
  800. raise TypeError(msg)
  801. elif self.closed != other.closed:
  802. msg = ('can only do set operations between two IntervalIndex '
  803. 'objects that are closed on the same side')
  804. raise ValueError(msg)
  805. return other
  806. def _concat_same_dtype(self, to_concat, name):
  807. """
  808. assert that we all have the same .closed
  809. we allow a 0-len index here as well
  810. """
  811. if not len({i.closed for i in to_concat if len(i)}) == 1:
  812. msg = ('can only append two IntervalIndex objects '
  813. 'that are closed on the same side')
  814. raise ValueError(msg)
  815. return super(IntervalIndex, self)._concat_same_dtype(to_concat, name)
  816. @Appender(_index_shared_docs['take'] % _index_doc_kwargs)
  817. def take(self, indices, axis=0, allow_fill=True,
  818. fill_value=None, **kwargs):
  819. result = self._data.take(indices, axis=axis, allow_fill=allow_fill,
  820. fill_value=fill_value, **kwargs)
  821. attributes = self._get_attributes_dict()
  822. return self._simple_new(result, **attributes)
  823. def __getitem__(self, value):
  824. result = self._data[value]
  825. if isinstance(result, IntervalArray):
  826. return self._shallow_copy(result)
  827. else:
  828. # scalar
  829. return result
  830. # --------------------------------------------------------------------
  831. # Rendering Methods
  832. # __repr__ associated methods are based on MultiIndex
  833. def _format_with_header(self, header, **kwargs):
  834. return header + list(self._format_native_types(**kwargs))
  835. def _format_native_types(self, na_rep='', quoting=None, **kwargs):
  836. """ actually format my specific types """
  837. from pandas.io.formats.format import ExtensionArrayFormatter
  838. return ExtensionArrayFormatter(values=self,
  839. na_rep=na_rep,
  840. justify='all',
  841. leading_space=False).get_result()
  842. def _format_data(self, name=None):
  843. # TODO: integrate with categorical and make generic
  844. # name argument is unused here; just for compat with base / categorical
  845. n = len(self)
  846. max_seq_items = min((get_option(
  847. 'display.max_seq_items') or n) // 10, 10)
  848. formatter = str
  849. if n == 0:
  850. summary = '[]'
  851. elif n == 1:
  852. first = formatter(self[0])
  853. summary = '[{first}]'.format(first=first)
  854. elif n == 2:
  855. first = formatter(self[0])
  856. last = formatter(self[-1])
  857. summary = '[{first}, {last}]'.format(first=first, last=last)
  858. else:
  859. if n > max_seq_items:
  860. n = min(max_seq_items // 2, 10)
  861. head = [formatter(x) for x in self[:n]]
  862. tail = [formatter(x) for x in self[-n:]]
  863. summary = '[{head} ... {tail}]'.format(
  864. head=', '.join(head), tail=', '.join(tail))
  865. else:
  866. tail = [formatter(x) for x in self]
  867. summary = '[{tail}]'.format(tail=', '.join(tail))
  868. return summary + ',' + self._format_space()
  869. def _format_attrs(self):
  870. attrs = [('closed', repr(self.closed))]
  871. if self.name is not None:
  872. attrs.append(('name', default_pprint(self.name)))
  873. attrs.append(('dtype', "'{dtype}'".format(dtype=self.dtype)))
  874. return attrs
  875. def _format_space(self):
  876. space = ' ' * (len(self.__class__.__name__) + 1)
  877. return "\n{space}".format(space=space)
  878. # --------------------------------------------------------------------
  879. def argsort(self, *args, **kwargs):
  880. return np.lexsort((self.right, self.left))
  881. def equals(self, other):
  882. """
  883. Determines if two IntervalIndex objects contain the same elements
  884. """
  885. if self.is_(other):
  886. return True
  887. # if we can coerce to an II
  888. # then we can compare
  889. if not isinstance(other, IntervalIndex):
  890. if not is_interval_dtype(other):
  891. return False
  892. other = Index(getattr(other, '.values', other))
  893. return (self.left.equals(other.left) and
  894. self.right.equals(other.right) and
  895. self.closed == other.closed)
  896. @Appender(_interval_shared_docs['overlaps'] % _index_doc_kwargs)
  897. def overlaps(self, other):
  898. return self._data.overlaps(other)
  899. def _setop(op_name, sort=None):
  900. def func(self, other, sort=sort):
  901. other = self._as_like_interval_index(other)
  902. # GH 19016: ensure set op will not return a prohibited dtype
  903. subtypes = [self.dtype.subtype, other.dtype.subtype]
  904. common_subtype = find_common_type(subtypes)
  905. if is_object_dtype(common_subtype):
  906. msg = ('can only do {op} between two IntervalIndex '
  907. 'objects that have compatible dtypes')
  908. raise TypeError(msg.format(op=op_name))
  909. result = getattr(self._multiindex, op_name)(other._multiindex,
  910. sort=sort)
  911. result_name = get_op_result_name(self, other)
  912. # GH 19101: ensure empty results have correct dtype
  913. if result.empty:
  914. result = result.values.astype(self.dtype.subtype)
  915. else:
  916. result = result.values
  917. return type(self).from_tuples(result, closed=self.closed,
  918. name=result_name)
  919. return func
  920. @property
  921. def is_all_dates(self):
  922. """
  923. This is False even when left/right contain datetime-like objects,
  924. as the check is done on the Interval itself
  925. """
  926. return False
  927. union = _setop('union')
  928. intersection = _setop('intersection', sort=False)
  929. difference = _setop('difference')
  930. symmetric_difference = _setop('symmetric_difference')
  931. # TODO: arithmetic operations
  932. IntervalIndex._add_logical_methods_disabled()
  933. def _is_valid_endpoint(endpoint):
  934. """helper for interval_range to check if start/end are valid types"""
  935. return any([is_number(endpoint),
  936. isinstance(endpoint, Timestamp),
  937. isinstance(endpoint, Timedelta),
  938. endpoint is None])
  939. def _is_type_compatible(a, b):
  940. """helper for interval_range to check type compat of start/end/freq"""
  941. is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset))
  942. is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset))
  943. return ((is_number(a) and is_number(b)) or
  944. (is_ts_compat(a) and is_ts_compat(b)) or
  945. (is_td_compat(a) and is_td_compat(b)) or
  946. com._any_none(a, b))
  947. def interval_range(start=None, end=None, periods=None, freq=None,
  948. name=None, closed='right'):
  949. """
  950. Return a fixed frequency IntervalIndex
  951. Parameters
  952. ----------
  953. start : numeric or datetime-like, default None
  954. Left bound for generating intervals
  955. end : numeric or datetime-like, default None
  956. Right bound for generating intervals
  957. periods : integer, default None
  958. Number of periods to generate
  959. freq : numeric, string, or DateOffset, default None
  960. The length of each interval. Must be consistent with the type of start
  961. and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1
  962. for numeric and 'D' for datetime-like.
  963. name : string, default None
  964. Name of the resulting IntervalIndex
  965. closed : {'left', 'right', 'both', 'neither'}, default 'right'
  966. Whether the intervals are closed on the left-side, right-side, both
  967. or neither.
  968. Returns
  969. -------
  970. rng : IntervalIndex
  971. See Also
  972. --------
  973. IntervalIndex : An Index of intervals that are all closed on the same side.
  974. Notes
  975. -----
  976. Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
  977. exactly three must be specified. If ``freq`` is omitted, the resulting
  978. ``IntervalIndex`` will have ``periods`` linearly spaced elements between
  979. ``start`` and ``end``, inclusively.
  980. To learn more about datetime-like frequency strings, please see `this link
  981. <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
  982. Examples
  983. --------
  984. Numeric ``start`` and ``end`` is supported.
  985. >>> pd.interval_range(start=0, end=5)
  986. IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
  987. closed='right', dtype='interval[int64]')
  988. Additionally, datetime-like input is also supported.
  989. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
  990. ... end=pd.Timestamp('2017-01-04'))
  991. IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
  992. (2017-01-03, 2017-01-04]],
  993. closed='right', dtype='interval[datetime64[ns]]')
  994. The ``freq`` parameter specifies the frequency between the left and right.
  995. endpoints of the individual intervals within the ``IntervalIndex``. For
  996. numeric ``start`` and ``end``, the frequency must also be numeric.
  997. >>> pd.interval_range(start=0, periods=4, freq=1.5)
  998. IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
  999. closed='right', dtype='interval[float64]')
  1000. Similarly, for datetime-like ``start`` and ``end``, the frequency must be
  1001. convertible to a DateOffset.
  1002. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
  1003. ... periods=3, freq='MS')
  1004. IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
  1005. (2017-03-01, 2017-04-01]],
  1006. closed='right', dtype='interval[datetime64[ns]]')
  1007. Specify ``start``, ``end``, and ``periods``; the frequency is generated
  1008. automatically (linearly spaced).
  1009. >>> pd.interval_range(start=0, end=6, periods=4)
  1010. IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
  1011. closed='right',
  1012. dtype='interval[float64]')
  1013. The ``closed`` parameter specifies which endpoints of the individual
  1014. intervals within the ``IntervalIndex`` are closed.
  1015. >>> pd.interval_range(end=5, periods=4, closed='both')
  1016. IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
  1017. closed='both', dtype='interval[int64]')
  1018. """
  1019. start = com.maybe_box_datetimelike(start)
  1020. end = com.maybe_box_datetimelike(end)
  1021. endpoint = start if start is not None else end
  1022. if freq is None and com._any_none(periods, start, end):
  1023. freq = 1 if is_number(endpoint) else 'D'
  1024. if com.count_not_none(start, end, periods, freq) != 3:
  1025. raise ValueError('Of the four parameters: start, end, periods, and '
  1026. 'freq, exactly three must be specified')
  1027. if not _is_valid_endpoint(start):
  1028. msg = 'start must be numeric or datetime-like, got {start}'
  1029. raise ValueError(msg.format(start=start))
  1030. elif not _is_valid_endpoint(end):
  1031. msg = 'end must be numeric or datetime-like, got {end}'
  1032. raise ValueError(msg.format(end=end))
  1033. if is_float(periods):
  1034. periods = int(periods)
  1035. elif not is_integer(periods) and periods is not None:
  1036. msg = 'periods must be a number, got {periods}'
  1037. raise TypeError(msg.format(periods=periods))
  1038. if freq is not None and not is_number(freq):
  1039. try:
  1040. freq = to_offset(freq)
  1041. except ValueError:
  1042. raise ValueError('freq must be numeric or convertible to '
  1043. 'DateOffset, got {freq}'.format(freq=freq))
  1044. # verify type compatibility
  1045. if not all([_is_type_compatible(start, end),
  1046. _is_type_compatible(start, freq),
  1047. _is_type_compatible(end, freq)]):
  1048. raise TypeError("start, end, freq need to be type compatible")
  1049. # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
  1050. if periods is not None:
  1051. periods += 1
  1052. if is_number(endpoint):
  1053. # force consistency between start/end/freq (lower end if freq skips it)
  1054. if com._all_not_none(start, end, freq):
  1055. end -= (end - start) % freq
  1056. # compute the period/start/end if unspecified (at most one)
  1057. if periods is None:
  1058. periods = int((end - start) // freq) + 1
  1059. elif start is None:
  1060. start = end - (periods - 1) * freq
  1061. elif end is None:
  1062. end = start + (periods - 1) * freq
  1063. breaks = np.linspace(start, end, periods)
  1064. if all(is_integer(x) for x in com._not_none(start, end, freq)):
  1065. # np.linspace always produces float output
  1066. breaks = maybe_downcast_to_dtype(breaks, 'int64')
  1067. else:
  1068. # delegate to the appropriate range function
  1069. if isinstance(endpoint, Timestamp):
  1070. range_func = date_range
  1071. else:
  1072. range_func = timedelta_range
  1073. breaks = range_func(start=start, end=end, periods=periods, freq=freq)
  1074. return IntervalIndex.from_breaks(breaks, name=name, closed=closed)