interval.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104
  1. from operator import le, lt
  2. import textwrap
  3. import numpy as np
  4. from pandas._libs.interval import (
  5. Interval, IntervalMixin, intervals_to_interval_bounds)
  6. from pandas.compat import add_metaclass
  7. from pandas.compat.numpy import function as nv
  8. from pandas.util._decorators import Appender
  9. from pandas.util._doctools import _WritableDoc
  10. from pandas.core.dtypes.cast import maybe_convert_platform
  11. from pandas.core.dtypes.common import (
  12. is_categorical_dtype, is_datetime64_any_dtype, is_float_dtype,
  13. is_integer_dtype, is_interval, is_interval_dtype, is_scalar,
  14. is_string_dtype, is_timedelta64_dtype, pandas_dtype)
  15. from pandas.core.dtypes.dtypes import IntervalDtype
  16. from pandas.core.dtypes.generic import (
  17. ABCDatetimeIndex, ABCInterval, ABCIntervalIndex, ABCPeriodIndex, ABCSeries)
  18. from pandas.core.dtypes.missing import isna, notna
  19. from pandas.core.arrays.base import (
  20. ExtensionArray, _extension_array_shared_docs)
  21. from pandas.core.arrays.categorical import Categorical
  22. import pandas.core.common as com
  23. from pandas.core.config import get_option
  24. from pandas.core.indexes.base import Index, ensure_index
  25. _VALID_CLOSED = {'left', 'right', 'both', 'neither'}
  26. _interval_shared_docs = {}
  27. _shared_docs_kwargs = dict(
  28. klass='IntervalArray',
  29. qualname='arrays.IntervalArray',
  30. name=''
  31. )
  32. _interval_shared_docs['class'] = """
  33. %(summary)s
  34. .. versionadded:: %(versionadded)s
  35. .. warning::
  36. The indexing behaviors are provisional and may change in
  37. a future version of pandas.
  38. Parameters
  39. ----------
  40. data : array-like (1-dimensional)
  41. Array-like containing Interval objects from which to build the
  42. %(klass)s.
  43. closed : {'left', 'right', 'both', 'neither'}, default 'right'
  44. Whether the intervals are closed on the left-side, right-side, both or
  45. neither.
  46. dtype : dtype or None, default None
  47. If None, dtype will be inferred.
  48. .. versionadded:: 0.23.0
  49. copy : bool, default False
  50. Copy the input data.
  51. %(name)s\
  52. verify_integrity : bool, default True
  53. Verify that the %(klass)s is valid.
  54. Attributes
  55. ----------
  56. left
  57. right
  58. closed
  59. mid
  60. length
  61. is_non_overlapping_monotonic
  62. %(extra_attributes)s\
  63. Methods
  64. -------
  65. from_arrays
  66. from_tuples
  67. from_breaks
  68. overlaps
  69. set_closed
  70. to_tuples
  71. %(extra_methods)s\
  72. See Also
  73. --------
  74. Index : The base pandas Index type.
  75. Interval : A bounded slice-like interval; the elements of an %(klass)s.
  76. interval_range : Function to create a fixed frequency IntervalIndex.
  77. cut : Bin values into discrete Intervals.
  78. qcut : Bin values into equal-sized Intervals based on rank or sample quantiles.
  79. Notes
  80. ------
  81. See the `user guide
  82. <http://pandas.pydata.org/pandas-docs/stable/advanced.html#intervalindex>`_
  83. for more.
  84. %(examples)s\
  85. """
  86. @Appender(_interval_shared_docs['class'] % dict(
  87. klass="IntervalArray",
  88. summary="Pandas array for interval data that are closed on the same side.",
  89. versionadded="0.24.0",
  90. name='',
  91. extra_attributes='',
  92. extra_methods='',
  93. examples=textwrap.dedent("""\
  94. Examples
  95. --------
  96. A new ``IntervalArray`` can be constructed directly from an array-like of
  97. ``Interval`` objects:
  98. >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
  99. IntervalArray([(0, 1], (1, 5]],
  100. closed='right',
  101. dtype='interval[int64]')
  102. It may also be constructed using one of the constructor
  103. methods: :meth:`IntervalArray.from_arrays`,
  104. :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.
  105. """),
  106. ))
  107. @add_metaclass(_WritableDoc)
  108. class IntervalArray(IntervalMixin, ExtensionArray):
  109. dtype = IntervalDtype()
  110. ndim = 1
  111. can_hold_na = True
  112. _na_value = _fill_value = np.nan
  113. def __new__(cls, data, closed=None, dtype=None, copy=False,
  114. verify_integrity=True):
  115. if isinstance(data, ABCSeries) and is_interval_dtype(data):
  116. data = data.values
  117. if isinstance(data, (cls, ABCIntervalIndex)):
  118. left = data.left
  119. right = data.right
  120. closed = closed or data.closed
  121. else:
  122. # don't allow scalars
  123. if is_scalar(data):
  124. msg = ("{}(...) must be called with a collection of some kind,"
  125. " {} was passed")
  126. raise TypeError(msg.format(cls.__name__, data))
  127. # might need to convert empty or purely na data
  128. data = maybe_convert_platform_interval(data)
  129. left, right, infer_closed = intervals_to_interval_bounds(
  130. data, validate_closed=closed is None)
  131. closed = closed or infer_closed
  132. return cls._simple_new(left, right, closed, copy=copy, dtype=dtype,
  133. verify_integrity=verify_integrity)
  134. @classmethod
  135. def _simple_new(cls, left, right, closed=None,
  136. copy=False, dtype=None, verify_integrity=True):
  137. result = IntervalMixin.__new__(cls)
  138. closed = closed or 'right'
  139. left = ensure_index(left, copy=copy)
  140. right = ensure_index(right, copy=copy)
  141. if dtype is not None:
  142. # GH 19262: dtype must be an IntervalDtype to override inferred
  143. dtype = pandas_dtype(dtype)
  144. if not is_interval_dtype(dtype):
  145. msg = 'dtype must be an IntervalDtype, got {dtype}'
  146. raise TypeError(msg.format(dtype=dtype))
  147. elif dtype.subtype is not None:
  148. left = left.astype(dtype.subtype)
  149. right = right.astype(dtype.subtype)
  150. # coerce dtypes to match if needed
  151. if is_float_dtype(left) and is_integer_dtype(right):
  152. right = right.astype(left.dtype)
  153. elif is_float_dtype(right) and is_integer_dtype(left):
  154. left = left.astype(right.dtype)
  155. if type(left) != type(right):
  156. msg = ('must not have differing left [{ltype}] and right '
  157. '[{rtype}] types')
  158. raise ValueError(msg.format(ltype=type(left).__name__,
  159. rtype=type(right).__name__))
  160. elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
  161. # GH 19016
  162. msg = ('category, object, and string subtypes are not supported '
  163. 'for IntervalArray')
  164. raise TypeError(msg)
  165. elif isinstance(left, ABCPeriodIndex):
  166. msg = 'Period dtypes are not supported, use a PeriodIndex instead'
  167. raise ValueError(msg)
  168. elif (isinstance(left, ABCDatetimeIndex) and
  169. str(left.tz) != str(right.tz)):
  170. msg = ("left and right must have the same time zone, got "
  171. "'{left_tz}' and '{right_tz}'")
  172. raise ValueError(msg.format(left_tz=left.tz, right_tz=right.tz))
  173. result._left = left
  174. result._right = right
  175. result._closed = closed
  176. if verify_integrity:
  177. result._validate()
  178. return result
  179. @classmethod
  180. def _from_sequence(cls, scalars, dtype=None, copy=False):
  181. return cls(scalars, dtype=dtype, copy=copy)
  182. @classmethod
  183. def _from_factorized(cls, values, original):
  184. if len(values) == 0:
  185. # An empty array returns object-dtype here. We can't create
  186. # a new IA from an (empty) object-dtype array, so turn it into the
  187. # correct dtype.
  188. values = values.astype(original.dtype.subtype)
  189. return cls(values, closed=original.closed)
  190. _interval_shared_docs['from_breaks'] = """
  191. Construct an %(klass)s from an array of splits.
  192. Parameters
  193. ----------
  194. breaks : array-like (1-dimensional)
  195. Left and right bounds for each interval.
  196. closed : {'left', 'right', 'both', 'neither'}, default 'right'
  197. Whether the intervals are closed on the left-side, right-side, both
  198. or neither.
  199. copy : boolean, default False
  200. copy the data
  201. dtype : dtype or None, default None
  202. If None, dtype will be inferred
  203. .. versionadded:: 0.23.0
  204. See Also
  205. --------
  206. interval_range : Function to create a fixed frequency IntervalIndex.
  207. %(klass)s.from_arrays : Construct from a left and right array.
  208. %(klass)s.from_tuples : Construct from a sequence of tuples.
  209. Examples
  210. --------
  211. >>> pd.%(qualname)s.from_breaks([0, 1, 2, 3])
  212. %(klass)s([(0, 1], (1, 2], (2, 3]],
  213. closed='right',
  214. dtype='interval[int64]')
  215. """
  216. @classmethod
  217. @Appender(_interval_shared_docs['from_breaks'] % _shared_docs_kwargs)
  218. def from_breaks(cls, breaks, closed='right', copy=False, dtype=None):
  219. breaks = maybe_convert_platform_interval(breaks)
  220. return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy,
  221. dtype=dtype)
  222. _interval_shared_docs['from_arrays'] = """
  223. Construct from two arrays defining the left and right bounds.
  224. Parameters
  225. ----------
  226. left : array-like (1-dimensional)
  227. Left bounds for each interval.
  228. right : array-like (1-dimensional)
  229. Right bounds for each interval.
  230. closed : {'left', 'right', 'both', 'neither'}, default 'right'
  231. Whether the intervals are closed on the left-side, right-side, both
  232. or neither.
  233. copy : boolean, default False
  234. Copy the data.
  235. dtype : dtype, optional
  236. If None, dtype will be inferred.
  237. .. versionadded:: 0.23.0
  238. Returns
  239. -------
  240. %(klass)s
  241. Raises
  242. ------
  243. ValueError
  244. When a value is missing in only one of `left` or `right`.
  245. When a value in `left` is greater than the corresponding value
  246. in `right`.
  247. See Also
  248. --------
  249. interval_range : Function to create a fixed frequency IntervalIndex.
  250. %(klass)s.from_breaks : Construct an %(klass)s from an array of
  251. splits.
  252. %(klass)s.from_tuples : Construct an %(klass)s from an
  253. array-like of tuples.
  254. Notes
  255. -----
  256. Each element of `left` must be less than or equal to the `right`
  257. element at the same position. If an element is missing, it must be
  258. missing in both `left` and `right`. A TypeError is raised when
  259. using an unsupported type for `left` or `right`. At the moment,
  260. 'category', 'object', and 'string' subtypes are not supported.
  261. Examples
  262. --------
  263. >>> %(klass)s.from_arrays([0, 1, 2], [1, 2, 3])
  264. %(klass)s([(0, 1], (1, 2], (2, 3]],
  265. closed='right',
  266. dtype='interval[int64]')
  267. """
  268. @classmethod
  269. @Appender(_interval_shared_docs['from_arrays'] % _shared_docs_kwargs)
  270. def from_arrays(cls, left, right, closed='right', copy=False, dtype=None):
  271. left = maybe_convert_platform_interval(left)
  272. right = maybe_convert_platform_interval(right)
  273. return cls._simple_new(left, right, closed, copy=copy,
  274. dtype=dtype, verify_integrity=True)
  275. _interval_shared_docs['from_intervals'] = """
  276. Construct an %(klass)s from a 1d array of Interval objects
  277. .. deprecated:: 0.23.0
  278. Parameters
  279. ----------
  280. data : array-like (1-dimensional)
  281. Array of Interval objects. All intervals must be closed on the same
  282. sides.
  283. copy : boolean, default False
  284. by-default copy the data, this is compat only and ignored
  285. dtype : dtype or None, default None
  286. If None, dtype will be inferred
  287. ..versionadded:: 0.23.0
  288. See Also
  289. --------
  290. interval_range : Function to create a fixed frequency IntervalIndex.
  291. %(klass)s.from_arrays : Construct an %(klass)s from a left and
  292. right array.
  293. %(klass)s.from_breaks : Construct an %(klass)s from an array of
  294. splits.
  295. %(klass)s.from_tuples : Construct an %(klass)s from an
  296. array-like of tuples.
  297. Examples
  298. --------
  299. >>> pd.%(qualname)s.from_intervals([pd.Interval(0, 1),
  300. ... pd.Interval(1, 2)])
  301. %(klass)s([(0, 1], (1, 2]],
  302. closed='right', dtype='interval[int64]')
  303. The generic Index constructor work identically when it infers an array
  304. of all intervals:
  305. >>> pd.Index([pd.Interval(0, 1), pd.Interval(1, 2)])
  306. %(klass)s([(0, 1], (1, 2]],
  307. closed='right', dtype='interval[int64]')
  308. """
  309. _interval_shared_docs['from_tuples'] = """
  310. Construct an %(klass)s from an array-like of tuples
  311. Parameters
  312. ----------
  313. data : array-like (1-dimensional)
  314. Array of tuples
  315. closed : {'left', 'right', 'both', 'neither'}, default 'right'
  316. Whether the intervals are closed on the left-side, right-side, both
  317. or neither.
  318. copy : boolean, default False
  319. by-default copy the data, this is compat only and ignored
  320. dtype : dtype or None, default None
  321. If None, dtype will be inferred
  322. ..versionadded:: 0.23.0
  323. See Also
  324. --------
  325. interval_range : Function to create a fixed frequency IntervalIndex.
  326. %(klass)s.from_arrays : Construct an %(klass)s from a left and
  327. right array.
  328. %(klass)s.from_breaks : Construct an %(klass)s from an array of
  329. splits.
  330. Examples
  331. --------
  332. >>> pd.%(qualname)s.from_tuples([(0, 1), (1, 2)])
  333. %(klass)s([(0, 1], (1, 2]],
  334. closed='right', dtype='interval[int64]')
  335. """
  336. @classmethod
  337. @Appender(_interval_shared_docs['from_tuples'] % _shared_docs_kwargs)
  338. def from_tuples(cls, data, closed='right', copy=False, dtype=None):
  339. if len(data):
  340. left, right = [], []
  341. else:
  342. # ensure that empty data keeps input dtype
  343. left = right = data
  344. for d in data:
  345. if isna(d):
  346. lhs = rhs = np.nan
  347. else:
  348. name = cls.__name__
  349. try:
  350. # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...]
  351. lhs, rhs = d
  352. except ValueError:
  353. msg = ('{name}.from_tuples requires tuples of '
  354. 'length 2, got {tpl}').format(name=name, tpl=d)
  355. raise ValueError(msg)
  356. except TypeError:
  357. msg = ('{name}.from_tuples received an invalid '
  358. 'item, {tpl}').format(name=name, tpl=d)
  359. raise TypeError(msg)
  360. left.append(lhs)
  361. right.append(rhs)
  362. return cls.from_arrays(left, right, closed, copy=False,
  363. dtype=dtype)
  364. def _validate(self):
  365. """Verify that the IntervalArray is valid.
  366. Checks that
  367. * closed is valid
  368. * left and right match lengths
  369. * left and right have the same missing values
  370. * left is always below right
  371. """
  372. if self.closed not in _VALID_CLOSED:
  373. raise ValueError("invalid option for 'closed': {closed}"
  374. .format(closed=self.closed))
  375. if len(self.left) != len(self.right):
  376. raise ValueError('left and right must have the same length')
  377. left_mask = notna(self.left)
  378. right_mask = notna(self.right)
  379. if not (left_mask == right_mask).all():
  380. raise ValueError('missing values must be missing in the same '
  381. 'location both left and right sides')
  382. if not (self.left[left_mask] <= self.right[left_mask]).all():
  383. raise ValueError('left side of interval must be <= right side')
  384. # ---------
  385. # Interface
  386. # ---------
  387. def __iter__(self):
  388. return iter(np.asarray(self))
  389. def __len__(self):
  390. return len(self.left)
  391. def __getitem__(self, value):
  392. left = self.left[value]
  393. right = self.right[value]
  394. # scalar
  395. if not isinstance(left, Index):
  396. if isna(left):
  397. return self._fill_value
  398. return Interval(left, right, self.closed)
  399. return self._shallow_copy(left, right)
  400. def __setitem__(self, key, value):
  401. # na value: need special casing to set directly on numpy arrays
  402. needs_float_conversion = False
  403. if is_scalar(value) and isna(value):
  404. if is_integer_dtype(self.dtype.subtype):
  405. # can't set NaN on a numpy integer array
  406. needs_float_conversion = True
  407. elif is_datetime64_any_dtype(self.dtype.subtype):
  408. # need proper NaT to set directly on the numpy array
  409. value = np.datetime64('NaT')
  410. elif is_timedelta64_dtype(self.dtype.subtype):
  411. # need proper NaT to set directly on the numpy array
  412. value = np.timedelta64('NaT')
  413. value_left, value_right = value, value
  414. # scalar interval
  415. elif is_interval_dtype(value) or isinstance(value, ABCInterval):
  416. self._check_closed_matches(value, name="value")
  417. value_left, value_right = value.left, value.right
  418. else:
  419. # list-like of intervals
  420. try:
  421. array = IntervalArray(value)
  422. value_left, value_right = array.left, array.right
  423. except TypeError:
  424. # wrong type: not interval or NA
  425. msg = "'value' should be an interval type, got {} instead."
  426. raise TypeError(msg.format(type(value)))
  427. # Need to ensure that left and right are updated atomically, so we're
  428. # forced to copy, update the copy, and swap in the new values.
  429. left = self.left.copy(deep=True)
  430. if needs_float_conversion:
  431. left = left.astype('float')
  432. left.values[key] = value_left
  433. self._left = left
  434. right = self.right.copy(deep=True)
  435. if needs_float_conversion:
  436. right = right.astype('float')
  437. right.values[key] = value_right
  438. self._right = right
  439. def fillna(self, value=None, method=None, limit=None):
  440. """
  441. Fill NA/NaN values using the specified method.
  442. Parameters
  443. ----------
  444. value : scalar, dict, Series
  445. If a scalar value is passed it is used to fill all missing values.
  446. Alternatively, a Series or dict can be used to fill in different
  447. values for each index. The value should not be a list. The
  448. value(s) passed should be either Interval objects or NA/NaN.
  449. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
  450. (Not implemented yet for IntervalArray)
  451. Method to use for filling holes in reindexed Series
  452. limit : int, default None
  453. (Not implemented yet for IntervalArray)
  454. If method is specified, this is the maximum number of consecutive
  455. NaN values to forward/backward fill. In other words, if there is
  456. a gap with more than this number of consecutive NaNs, it will only
  457. be partially filled. If method is not specified, this is the
  458. maximum number of entries along the entire axis where NaNs will be
  459. filled.
  460. Returns
  461. -------
  462. filled : IntervalArray with NA/NaN filled
  463. """
  464. if method is not None:
  465. raise TypeError('Filling by method is not supported for '
  466. 'IntervalArray.')
  467. if limit is not None:
  468. raise TypeError('limit is not supported for IntervalArray.')
  469. if not isinstance(value, ABCInterval):
  470. msg = ("'IntervalArray.fillna' only supports filling with a "
  471. "scalar 'pandas.Interval'. Got a '{}' instead."
  472. .format(type(value).__name__))
  473. raise TypeError(msg)
  474. value = getattr(value, '_values', value)
  475. self._check_closed_matches(value, name="value")
  476. left = self.left.fillna(value=value.left)
  477. right = self.right.fillna(value=value.right)
  478. return self._shallow_copy(left, right)
  479. @property
  480. def dtype(self):
  481. return IntervalDtype(self.left.dtype)
  482. def astype(self, dtype, copy=True):
  483. """
  484. Cast to an ExtensionArray or NumPy array with dtype 'dtype'.
  485. Parameters
  486. ----------
  487. dtype : str or dtype
  488. Typecode or data-type to which the array is cast.
  489. copy : bool, default True
  490. Whether to copy the data, even if not necessary. If False,
  491. a copy is made only if the old dtype does not match the
  492. new dtype.
  493. Returns
  494. -------
  495. array : ExtensionArray or ndarray
  496. ExtensionArray or NumPy ndarray with 'dtype' for its dtype.
  497. """
  498. dtype = pandas_dtype(dtype)
  499. if is_interval_dtype(dtype):
  500. if dtype == self.dtype:
  501. return self.copy() if copy else self
  502. # need to cast to different subtype
  503. try:
  504. new_left = self.left.astype(dtype.subtype)
  505. new_right = self.right.astype(dtype.subtype)
  506. except TypeError:
  507. msg = ('Cannot convert {dtype} to {new_dtype}; subtypes are '
  508. 'incompatible')
  509. raise TypeError(msg.format(dtype=self.dtype, new_dtype=dtype))
  510. return self._shallow_copy(new_left, new_right)
  511. elif is_categorical_dtype(dtype):
  512. return Categorical(np.asarray(self))
  513. # TODO: This try/except will be repeated.
  514. try:
  515. return np.asarray(self).astype(dtype, copy=copy)
  516. except (TypeError, ValueError):
  517. msg = 'Cannot cast {name} to dtype {dtype}'
  518. raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
  519. @classmethod
  520. def _concat_same_type(cls, to_concat):
  521. """
  522. Concatenate multiple IntervalArray
  523. Parameters
  524. ----------
  525. to_concat : sequence of IntervalArray
  526. Returns
  527. -------
  528. IntervalArray
  529. """
  530. closed = {interval.closed for interval in to_concat}
  531. if len(closed) != 1:
  532. raise ValueError("Intervals must all be closed on the same side.")
  533. closed = closed.pop()
  534. left = np.concatenate([interval.left for interval in to_concat])
  535. right = np.concatenate([interval.right for interval in to_concat])
  536. return cls._simple_new(left, right, closed=closed, copy=False)
  537. def _shallow_copy(self, left=None, right=None, closed=None):
  538. """
  539. Return a new IntervalArray with the replacement attributes
  540. Parameters
  541. ----------
  542. left : array-like
  543. Values to be used for the left-side of the the intervals.
  544. If None, the existing left and right values will be used.
  545. right : array-like
  546. Values to be used for the right-side of the the intervals.
  547. If None and left is IntervalArray-like, the left and right
  548. of the IntervalArray-like will be used.
  549. closed : {'left', 'right', 'both', 'neither'}, optional
  550. Whether the intervals are closed on the left-side, right-side, both
  551. or neither. If None, the existing closed will be used.
  552. """
  553. if left is None:
  554. # no values passed
  555. left, right = self.left, self.right
  556. elif right is None:
  557. # only single value passed, could be an IntervalArray
  558. # or array of Intervals
  559. if not isinstance(left, (type(self), ABCIntervalIndex)):
  560. left = type(self)(left)
  561. left, right = left.left, left.right
  562. else:
  563. # both left and right are values
  564. pass
  565. closed = closed or self.closed
  566. return self._simple_new(
  567. left, right, closed=closed, verify_integrity=False)
  568. def copy(self, deep=False):
  569. """
  570. Return a copy of the array.
  571. Parameters
  572. ----------
  573. deep : bool, default False
  574. Also copy the underlying data backing this array.
  575. Returns
  576. -------
  577. IntervalArray
  578. """
  579. left = self.left.copy(deep=True) if deep else self.left
  580. right = self.right.copy(deep=True) if deep else self.right
  581. closed = self.closed
  582. # TODO: Could skip verify_integrity here.
  583. return type(self).from_arrays(left, right, closed=closed)
  584. def isna(self):
  585. return isna(self.left)
  586. @property
  587. def nbytes(self):
  588. return self.left.nbytes + self.right.nbytes
  589. @property
  590. def size(self):
  591. # Avoid materializing self.values
  592. return self.left.size
  593. @property
  594. def shape(self):
  595. return self.left.shape
  596. def take(self, indices, allow_fill=False, fill_value=None, axis=None,
  597. **kwargs):
  598. """
  599. Take elements from the IntervalArray.
  600. Parameters
  601. ----------
  602. indices : sequence of integers
  603. Indices to be taken.
  604. allow_fill : bool, default False
  605. How to handle negative values in `indices`.
  606. * False: negative values in `indices` indicate positional indices
  607. from the right (the default). This is similar to
  608. :func:`numpy.take`.
  609. * True: negative values in `indices` indicate
  610. missing values. These values are set to `fill_value`. Any other
  611. other negative values raise a ``ValueError``.
  612. fill_value : Interval or NA, optional
  613. Fill value to use for NA-indices when `allow_fill` is True.
  614. This may be ``None``, in which case the default NA value for
  615. the type, ``self.dtype.na_value``, is used.
  616. For many ExtensionArrays, there will be two representations of
  617. `fill_value`: a user-facing "boxed" scalar, and a low-level
  618. physical NA value. `fill_value` should be the user-facing version,
  619. and the implementation should handle translating that to the
  620. physical version for processing the take if necessary.
  621. axis : any, default None
  622. Present for compat with IntervalIndex; does nothing.
  623. Returns
  624. -------
  625. IntervalArray
  626. Raises
  627. ------
  628. IndexError
  629. When the indices are out of bounds for the array.
  630. ValueError
  631. When `indices` contains negative values other than ``-1``
  632. and `allow_fill` is True.
  633. """
  634. from pandas.core.algorithms import take
  635. nv.validate_take(tuple(), kwargs)
  636. fill_left = fill_right = fill_value
  637. if allow_fill:
  638. if fill_value is None:
  639. fill_left = fill_right = self.left._na_value
  640. elif is_interval(fill_value):
  641. self._check_closed_matches(fill_value, name='fill_value')
  642. fill_left, fill_right = fill_value.left, fill_value.right
  643. elif not is_scalar(fill_value) and notna(fill_value):
  644. msg = ("'IntervalArray.fillna' only supports filling with a "
  645. "'scalar pandas.Interval or NA'. Got a '{}' instead."
  646. .format(type(fill_value).__name__))
  647. raise ValueError(msg)
  648. left_take = take(self.left, indices,
  649. allow_fill=allow_fill, fill_value=fill_left)
  650. right_take = take(self.right, indices,
  651. allow_fill=allow_fill, fill_value=fill_right)
  652. return self._shallow_copy(left_take, right_take)
  653. def value_counts(self, dropna=True):
  654. """
  655. Returns a Series containing counts of each interval.
  656. Parameters
  657. ----------
  658. dropna : boolean, default True
  659. Don't include counts of NaN.
  660. Returns
  661. -------
  662. counts : Series
  663. See Also
  664. --------
  665. Series.value_counts
  666. """
  667. # TODO: implement this is a non-naive way!
  668. from pandas.core.algorithms import value_counts
  669. return value_counts(np.asarray(self), dropna=dropna)
  670. # Formatting
  671. def _format_data(self):
  672. # TODO: integrate with categorical and make generic
  673. # name argument is unused here; just for compat with base / categorical
  674. n = len(self)
  675. max_seq_items = min((get_option(
  676. 'display.max_seq_items') or n) // 10, 10)
  677. formatter = str
  678. if n == 0:
  679. summary = '[]'
  680. elif n == 1:
  681. first = formatter(self[0])
  682. summary = '[{first}]'.format(first=first)
  683. elif n == 2:
  684. first = formatter(self[0])
  685. last = formatter(self[-1])
  686. summary = '[{first}, {last}]'.format(first=first, last=last)
  687. else:
  688. if n > max_seq_items:
  689. n = min(max_seq_items // 2, 10)
  690. head = [formatter(x) for x in self[:n]]
  691. tail = [formatter(x) for x in self[-n:]]
  692. summary = '[{head} ... {tail}]'.format(
  693. head=', '.join(head), tail=', '.join(tail))
  694. else:
  695. tail = [formatter(x) for x in self]
  696. summary = '[{tail}]'.format(tail=', '.join(tail))
  697. return summary
  698. def __repr__(self):
  699. tpl = textwrap.dedent("""\
  700. {cls}({data},
  701. {lead}closed='{closed}',
  702. {lead}dtype='{dtype}')""")
  703. return tpl.format(cls=self.__class__.__name__,
  704. data=self._format_data(),
  705. lead=' ' * len(self.__class__.__name__) + ' ',
  706. closed=self.closed, dtype=self.dtype)
  707. def _format_space(self):
  708. space = ' ' * (len(self.__class__.__name__) + 1)
  709. return "\n{space}".format(space=space)
  710. @property
  711. def left(self):
  712. """
  713. Return the left endpoints of each Interval in the IntervalArray as
  714. an Index
  715. """
  716. return self._left
  717. @property
  718. def right(self):
  719. """
  720. Return the right endpoints of each Interval in the IntervalArray as
  721. an Index
  722. """
  723. return self._right
  724. @property
  725. def closed(self):
  726. """
  727. Whether the intervals are closed on the left-side, right-side, both or
  728. neither
  729. """
  730. return self._closed
  731. _interval_shared_docs['set_closed'] = """
  732. Return an %(klass)s identical to the current one, but closed on the
  733. specified side
  734. .. versionadded:: 0.24.0
  735. Parameters
  736. ----------
  737. closed : {'left', 'right', 'both', 'neither'}
  738. Whether the intervals are closed on the left-side, right-side, both
  739. or neither.
  740. Returns
  741. -------
  742. new_index : %(klass)s
  743. Examples
  744. --------
  745. >>> index = pd.interval_range(0, 3)
  746. >>> index
  747. IntervalIndex([(0, 1], (1, 2], (2, 3]],
  748. closed='right',
  749. dtype='interval[int64]')
  750. >>> index.set_closed('both')
  751. IntervalIndex([[0, 1], [1, 2], [2, 3]],
  752. closed='both',
  753. dtype='interval[int64]')
  754. """
  755. @Appender(_interval_shared_docs['set_closed'] % _shared_docs_kwargs)
  756. def set_closed(self, closed):
  757. if closed not in _VALID_CLOSED:
  758. msg = "invalid option for 'closed': {closed}"
  759. raise ValueError(msg.format(closed=closed))
  760. return self._shallow_copy(closed=closed)
  761. @property
  762. def length(self):
  763. """
  764. Return an Index with entries denoting the length of each Interval in
  765. the IntervalArray
  766. """
  767. try:
  768. return self.right - self.left
  769. except TypeError:
  770. # length not defined for some types, e.g. string
  771. msg = ('IntervalArray contains Intervals without defined length, '
  772. 'e.g. Intervals with string endpoints')
  773. raise TypeError(msg)
  774. @property
  775. def mid(self):
  776. """
  777. Return the midpoint of each Interval in the IntervalArray as an Index
  778. """
  779. try:
  780. return 0.5 * (self.left + self.right)
  781. except TypeError:
  782. # datetime safe version
  783. return self.left + 0.5 * self.length
  784. _interval_shared_docs['is_non_overlapping_monotonic'] = """
  785. Return True if the %(klass)s is non-overlapping (no Intervals share
  786. points) and is either monotonic increasing or monotonic decreasing,
  787. else False
  788. """
  789. @property
  790. @Appender(_interval_shared_docs['is_non_overlapping_monotonic']
  791. % _shared_docs_kwargs)
  792. def is_non_overlapping_monotonic(self):
  793. # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... )
  794. # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)
  795. # we already require left <= right
  796. # strict inequality for closed == 'both'; equality implies overlapping
  797. # at a point when both sides of intervals are included
  798. if self.closed == 'both':
  799. return bool((self.right[:-1] < self.left[1:]).all() or
  800. (self.left[:-1] > self.right[1:]).all())
  801. # non-strict inequality when closed != 'both'; at least one side is
  802. # not included in the intervals, so equality does not imply overlapping
  803. return bool((self.right[:-1] <= self.left[1:]).all() or
  804. (self.left[:-1] >= self.right[1:]).all())
  805. # Conversion
  806. def __array__(self, dtype=None):
  807. """
  808. Return the IntervalArray's data as a numpy array of Interval
  809. objects (with dtype='object')
  810. """
  811. left = self.left
  812. right = self.right
  813. mask = self.isna()
  814. closed = self._closed
  815. result = np.empty(len(left), dtype=object)
  816. for i in range(len(left)):
  817. if mask[i]:
  818. result[i] = np.nan
  819. else:
  820. result[i] = Interval(left[i], right[i], closed)
  821. return result
  822. _interval_shared_docs['to_tuples'] = """\
  823. Return an %(return_type)s of tuples of the form (left, right)
  824. Parameters
  825. ----------
  826. na_tuple : boolean, default True
  827. Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA
  828. value itself if False, ``nan``.
  829. .. versionadded:: 0.23.0
  830. Returns
  831. -------
  832. tuples: %(return_type)s
  833. %(examples)s\
  834. """
  835. @Appender(_interval_shared_docs['to_tuples'] % dict(
  836. return_type='ndarray',
  837. examples='',
  838. ))
  839. def to_tuples(self, na_tuple=True):
  840. tuples = com.asarray_tuplesafe(zip(self.left, self.right))
  841. if not na_tuple:
  842. # GH 18756
  843. tuples = np.where(~self.isna(), tuples, np.nan)
  844. return tuples
  845. @Appender(_extension_array_shared_docs['repeat'] % _shared_docs_kwargs)
  846. def repeat(self, repeats, axis=None):
  847. nv.validate_repeat(tuple(), dict(axis=axis))
  848. left_repeat = self.left.repeat(repeats)
  849. right_repeat = self.right.repeat(repeats)
  850. return self._shallow_copy(left=left_repeat, right=right_repeat)
  851. _interval_shared_docs['overlaps'] = """
  852. Check elementwise if an Interval overlaps the values in the %(klass)s.
  853. Two intervals overlap if they share a common point, including closed
  854. endpoints. Intervals that only have an open endpoint in common do not
  855. overlap.
  856. .. versionadded:: 0.24.0
  857. Parameters
  858. ----------
  859. other : Interval
  860. Interval to check against for an overlap.
  861. Returns
  862. -------
  863. ndarray
  864. Boolean array positionally indicating where an overlap occurs.
  865. See Also
  866. --------
  867. Interval.overlaps : Check whether two Interval objects overlap.
  868. Examples
  869. --------
  870. >>> intervals = pd.%(qualname)s.from_tuples([(0, 1), (1, 3), (2, 4)])
  871. >>> intervals
  872. %(klass)s([(0, 1], (1, 3], (2, 4]],
  873. closed='right',
  874. dtype='interval[int64]')
  875. >>> intervals.overlaps(pd.Interval(0.5, 1.5))
  876. array([ True, True, False])
  877. Intervals that share closed endpoints overlap:
  878. >>> intervals.overlaps(pd.Interval(1, 3, closed='left'))
  879. array([ True, True, True])
  880. Intervals that only have an open endpoint in common do not overlap:
  881. >>> intervals.overlaps(pd.Interval(1, 2, closed='right'))
  882. array([False, True, False])
  883. """
  884. @Appender(_interval_shared_docs['overlaps'] % _shared_docs_kwargs)
  885. def overlaps(self, other):
  886. if isinstance(other, (IntervalArray, ABCIntervalIndex)):
  887. raise NotImplementedError
  888. elif not isinstance(other, Interval):
  889. msg = '`other` must be Interval-like, got {other}'
  890. raise TypeError(msg.format(other=type(other).__name__))
  891. # equality is okay if both endpoints are closed (overlap at a point)
  892. op1 = le if (self.closed_left and other.closed_right) else lt
  893. op2 = le if (other.closed_left and self.closed_right) else lt
  894. # overlaps is equivalent negation of two interval being disjoint:
  895. # disjoint = (A.left > B.right) or (B.left > A.right)
  896. # (simplifying the negation allows this to be done in less operations)
  897. return op1(self.left, other.right) & op2(other.left, self.right)
  898. def maybe_convert_platform_interval(values):
  899. """
  900. Try to do platform conversion, with special casing for IntervalArray.
  901. Wrapper around maybe_convert_platform that alters the default return
  902. dtype in certain cases to be compatible with IntervalArray. For example,
  903. empty lists return with integer dtype instead of object dtype, which is
  904. prohibited for IntervalArray.
  905. Parameters
  906. ----------
  907. values : array-like
  908. Returns
  909. -------
  910. array
  911. """
  912. if isinstance(values, (list, tuple)) and len(values) == 0:
  913. # GH 19016
  914. # empty lists/tuples get object dtype by default, but this is not
  915. # prohibited for IntervalArray, so coerce to integer instead
  916. return np.array([], dtype=np.int64)
  917. elif is_categorical_dtype(values):
  918. values = np.asarray(values)
  919. return maybe_convert_platform(values)