range.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702
  1. from datetime import timedelta
  2. import operator
  3. from sys import getsizeof
  4. import warnings
  5. import numpy as np
  6. from pandas._libs import index as libindex, lib
  7. import pandas.compat as compat
  8. from pandas.compat import get_range_parameters, lrange, range
  9. from pandas.compat.numpy import function as nv
  10. from pandas.util._decorators import Appender, cache_readonly
  11. from pandas.core.dtypes import concat as _concat
  12. from pandas.core.dtypes.common import (
  13. is_int64_dtype, is_integer, is_scalar, is_timedelta64_dtype)
  14. from pandas.core.dtypes.generic import (
  15. ABCDataFrame, ABCSeries, ABCTimedeltaIndex)
  16. from pandas.core import ops
  17. import pandas.core.common as com
  18. import pandas.core.indexes.base as ibase
  19. from pandas.core.indexes.base import Index, _index_shared_docs
  20. from pandas.core.indexes.numeric import Int64Index
  21. class RangeIndex(Int64Index):
  22. """
  23. Immutable Index implementing a monotonic integer range.
  24. RangeIndex is a memory-saving special case of Int64Index limited to
  25. representing monotonic ranges. Using RangeIndex may in some instances
  26. improve computing speed.
  27. This is the default index type used
  28. by DataFrame and Series when no explicit index is provided by the user.
  29. Parameters
  30. ----------
  31. start : int (default: 0), or other RangeIndex instance
  32. If int and "stop" is not given, interpreted as "stop" instead.
  33. stop : int (default: 0)
  34. step : int (default: 1)
  35. name : object, optional
  36. Name to be stored in the index
  37. copy : bool, default False
  38. Unused, accepted for homogeneity with other index types.
  39. Attributes
  40. ----------
  41. None
  42. Methods
  43. -------
  44. from_range
  45. See Also
  46. --------
  47. Index : The base pandas Index type.
  48. Int64Index : Index of int64 data.
  49. """
  50. _typ = 'rangeindex'
  51. _engine_type = libindex.Int64Engine
  52. # --------------------------------------------------------------------
  53. # Constructors
  54. def __new__(cls, start=None, stop=None, step=None,
  55. dtype=None, copy=False, name=None, fastpath=None):
  56. if fastpath is not None:
  57. warnings.warn("The 'fastpath' keyword is deprecated, and will be "
  58. "removed in a future version.",
  59. FutureWarning, stacklevel=2)
  60. if fastpath:
  61. return cls._simple_new(start, stop, step, name=name)
  62. cls._validate_dtype(dtype)
  63. # RangeIndex
  64. if isinstance(start, RangeIndex):
  65. if name is None:
  66. name = start.name
  67. return cls._simple_new(name=name,
  68. **dict(start._get_data_as_items()))
  69. # validate the arguments
  70. def ensure_int(value, field):
  71. msg = ("RangeIndex(...) must be called with integers,"
  72. " {value} was passed for {field}")
  73. if not is_scalar(value):
  74. raise TypeError(msg.format(value=type(value).__name__,
  75. field=field))
  76. try:
  77. new_value = int(value)
  78. assert(new_value == value)
  79. except (TypeError, ValueError, AssertionError):
  80. raise TypeError(msg.format(value=type(value).__name__,
  81. field=field))
  82. return new_value
  83. if com._all_none(start, stop, step):
  84. msg = "RangeIndex(...) must be called with integers"
  85. raise TypeError(msg)
  86. elif start is None:
  87. start = 0
  88. else:
  89. start = ensure_int(start, 'start')
  90. if stop is None:
  91. stop = start
  92. start = 0
  93. else:
  94. stop = ensure_int(stop, 'stop')
  95. if step is None:
  96. step = 1
  97. elif step == 0:
  98. raise ValueError("Step must not be zero")
  99. else:
  100. step = ensure_int(step, 'step')
  101. return cls._simple_new(start, stop, step, name)
  102. @classmethod
  103. def from_range(cls, data, name=None, dtype=None, **kwargs):
  104. """ Create RangeIndex from a range (py3), or xrange (py2) object. """
  105. if not isinstance(data, range):
  106. raise TypeError(
  107. '{0}(...) must be called with object coercible to a '
  108. 'range, {1} was passed'.format(cls.__name__, repr(data)))
  109. start, stop, step = get_range_parameters(data)
  110. return RangeIndex(start, stop, step, dtype=dtype, name=name, **kwargs)
  111. @classmethod
  112. def _simple_new(cls, start, stop=None, step=None, name=None,
  113. dtype=None, **kwargs):
  114. result = object.__new__(cls)
  115. # handle passed None, non-integers
  116. if start is None and stop is None:
  117. # empty
  118. start, stop, step = 0, 0, 1
  119. if start is None or not is_integer(start):
  120. try:
  121. return RangeIndex(start, stop, step, name=name, **kwargs)
  122. except TypeError:
  123. return Index(start, stop, step, name=name, **kwargs)
  124. result._start = start
  125. result._stop = stop or 0
  126. result._step = step or 1
  127. result.name = name
  128. for k, v in compat.iteritems(kwargs):
  129. setattr(result, k, v)
  130. result._reset_identity()
  131. return result
  132. # --------------------------------------------------------------------
  133. @staticmethod
  134. def _validate_dtype(dtype):
  135. """ require dtype to be None or int64 """
  136. if not (dtype is None or is_int64_dtype(dtype)):
  137. raise TypeError('Invalid to pass a non-int64 dtype to RangeIndex')
  138. @cache_readonly
  139. def _constructor(self):
  140. """ return the class to use for construction """
  141. return Int64Index
  142. @cache_readonly
  143. def _data(self):
  144. return np.arange(self._start, self._stop, self._step, dtype=np.int64)
  145. @cache_readonly
  146. def _int64index(self):
  147. return Int64Index._simple_new(self._data, name=self.name)
  148. def _get_data_as_items(self):
  149. """ return a list of tuples of start, stop, step """
  150. return [('start', self._start),
  151. ('stop', self._stop),
  152. ('step', self._step)]
  153. def __reduce__(self):
  154. d = self._get_attributes_dict()
  155. d.update(dict(self._get_data_as_items()))
  156. return ibase._new_Index, (self.__class__, d), None
  157. # --------------------------------------------------------------------
  158. # Rendering Methods
  159. def _format_attrs(self):
  160. """
  161. Return a list of tuples of the (attr, formatted_value)
  162. """
  163. attrs = self._get_data_as_items()
  164. if self.name is not None:
  165. attrs.append(('name', ibase.default_pprint(self.name)))
  166. return attrs
  167. def _format_data(self, name=None):
  168. # we are formatting thru the attributes
  169. return None
  170. # --------------------------------------------------------------------
  171. @cache_readonly
  172. def nbytes(self):
  173. """
  174. Return the number of bytes in the underlying data
  175. On implementations where this is undetermined (PyPy)
  176. assume 24 bytes for each value
  177. """
  178. return sum(getsizeof(getattr(self, v), 24) for v in
  179. ['_start', '_stop', '_step'])
  180. def memory_usage(self, deep=False):
  181. """
  182. Memory usage of my values
  183. Parameters
  184. ----------
  185. deep : bool
  186. Introspect the data deeply, interrogate
  187. `object` dtypes for system-level memory consumption
  188. Returns
  189. -------
  190. bytes used
  191. Notes
  192. -----
  193. Memory usage does not include memory consumed by elements that
  194. are not components of the array if deep=False
  195. See Also
  196. --------
  197. numpy.ndarray.nbytes
  198. """
  199. return self.nbytes
  200. @property
  201. def dtype(self):
  202. return np.dtype(np.int64)
  203. @property
  204. def is_unique(self):
  205. """ return if the index has unique values """
  206. return True
  207. @cache_readonly
  208. def is_monotonic_increasing(self):
  209. return self._step > 0 or len(self) <= 1
  210. @cache_readonly
  211. def is_monotonic_decreasing(self):
  212. return self._step < 0 or len(self) <= 1
  213. @property
  214. def has_duplicates(self):
  215. return False
  216. def tolist(self):
  217. return lrange(self._start, self._stop, self._step)
  218. @Appender(_index_shared_docs['_shallow_copy'])
  219. def _shallow_copy(self, values=None, **kwargs):
  220. if values is None:
  221. name = kwargs.get("name", self.name)
  222. return RangeIndex._simple_new(
  223. name=name, **dict(self._get_data_as_items()))
  224. else:
  225. kwargs.setdefault('name', self.name)
  226. return self._int64index._shallow_copy(values, **kwargs)
  227. @Appender(ibase._index_shared_docs['copy'])
  228. def copy(self, name=None, deep=False, dtype=None, **kwargs):
  229. self._validate_dtype(dtype)
  230. if name is None:
  231. name = self.name
  232. return RangeIndex._simple_new(
  233. name=name, **dict(self._get_data_as_items()))
  234. def _minmax(self, meth):
  235. no_steps = len(self) - 1
  236. if no_steps == -1:
  237. return np.nan
  238. elif ((meth == 'min' and self._step > 0) or
  239. (meth == 'max' and self._step < 0)):
  240. return self._start
  241. return self._start + self._step * no_steps
  242. def min(self, axis=None, skipna=True):
  243. """The minimum value of the RangeIndex"""
  244. nv.validate_minmax_axis(axis)
  245. return self._minmax('min')
  246. def max(self, axis=None, skipna=True):
  247. """The maximum value of the RangeIndex"""
  248. nv.validate_minmax_axis(axis)
  249. return self._minmax('max')
  250. def argsort(self, *args, **kwargs):
  251. """
  252. Returns the indices that would sort the index and its
  253. underlying data.
  254. Returns
  255. -------
  256. argsorted : numpy array
  257. See Also
  258. --------
  259. numpy.ndarray.argsort
  260. """
  261. nv.validate_argsort(args, kwargs)
  262. if self._step > 0:
  263. return np.arange(len(self))
  264. else:
  265. return np.arange(len(self) - 1, -1, -1)
  266. def equals(self, other):
  267. """
  268. Determines if two Index objects contain the same elements.
  269. """
  270. if isinstance(other, RangeIndex):
  271. ls = len(self)
  272. lo = len(other)
  273. return (ls == lo == 0 or
  274. ls == lo == 1 and
  275. self._start == other._start or
  276. ls == lo and
  277. self._start == other._start and
  278. self._step == other._step)
  279. return super(RangeIndex, self).equals(other)
  280. def intersection(self, other, sort=False):
  281. """
  282. Form the intersection of two Index objects.
  283. Parameters
  284. ----------
  285. other : Index or array-like
  286. sort : False or None, default False
  287. Sort the resulting index if possible
  288. .. versionadded:: 0.24.0
  289. .. versionchanged:: 0.24.1
  290. Changed the default to ``False`` to match the behaviour
  291. from before 0.24.0.
  292. Returns
  293. -------
  294. intersection : Index
  295. """
  296. self._validate_sort_keyword(sort)
  297. if self.equals(other):
  298. return self._get_reconciled_name_object(other)
  299. if not isinstance(other, RangeIndex):
  300. return super(RangeIndex, self).intersection(other, sort=sort)
  301. if not len(self) or not len(other):
  302. return RangeIndex._simple_new(None)
  303. first = self[::-1] if self._step < 0 else self
  304. second = other[::-1] if other._step < 0 else other
  305. # check whether intervals intersect
  306. # deals with in- and decreasing ranges
  307. int_low = max(first._start, second._start)
  308. int_high = min(first._stop, second._stop)
  309. if int_high <= int_low:
  310. return RangeIndex._simple_new(None)
  311. # Method hint: linear Diophantine equation
  312. # solve intersection problem
  313. # performance hint: for identical step sizes, could use
  314. # cheaper alternative
  315. gcd, s, t = first._extended_gcd(first._step, second._step)
  316. # check whether element sets intersect
  317. if (first._start - second._start) % gcd:
  318. return RangeIndex._simple_new(None)
  319. # calculate parameters for the RangeIndex describing the
  320. # intersection disregarding the lower bounds
  321. tmp_start = first._start + (second._start - first._start) * \
  322. first._step // gcd * s
  323. new_step = first._step * second._step // gcd
  324. new_index = RangeIndex._simple_new(tmp_start, int_high, new_step)
  325. # adjust index to limiting interval
  326. new_index._start = new_index._min_fitting_element(int_low)
  327. if (self._step < 0 and other._step < 0) is not (new_index._step < 0):
  328. new_index = new_index[::-1]
  329. if sort is None:
  330. new_index = new_index.sort_values()
  331. return new_index
  332. def _min_fitting_element(self, lower_limit):
  333. """Returns the smallest element greater than or equal to the limit"""
  334. no_steps = -(-(lower_limit - self._start) // abs(self._step))
  335. return self._start + abs(self._step) * no_steps
  336. def _max_fitting_element(self, upper_limit):
  337. """Returns the largest element smaller than or equal to the limit"""
  338. no_steps = (upper_limit - self._start) // abs(self._step)
  339. return self._start + abs(self._step) * no_steps
  340. def _extended_gcd(self, a, b):
  341. """
  342. Extended Euclidean algorithms to solve Bezout's identity:
  343. a*x + b*y = gcd(x, y)
  344. Finds one particular solution for x, y: s, t
  345. Returns: gcd, s, t
  346. """
  347. s, old_s = 0, 1
  348. t, old_t = 1, 0
  349. r, old_r = b, a
  350. while r:
  351. quotient = old_r // r
  352. old_r, r = r, old_r - quotient * r
  353. old_s, s = s, old_s - quotient * s
  354. old_t, t = t, old_t - quotient * t
  355. return old_r, old_s, old_t
  356. def union(self, other):
  357. """
  358. Form the union of two Index objects and sorts if possible
  359. Parameters
  360. ----------
  361. other : Index or array-like
  362. Returns
  363. -------
  364. union : Index
  365. """
  366. self._assert_can_do_setop(other)
  367. if len(other) == 0 or self.equals(other) or len(self) == 0:
  368. return super(RangeIndex, self).union(other)
  369. if isinstance(other, RangeIndex):
  370. start_s, step_s = self._start, self._step
  371. end_s = self._start + self._step * (len(self) - 1)
  372. start_o, step_o = other._start, other._step
  373. end_o = other._start + other._step * (len(other) - 1)
  374. if self._step < 0:
  375. start_s, step_s, end_s = end_s, -step_s, start_s
  376. if other._step < 0:
  377. start_o, step_o, end_o = end_o, -step_o, start_o
  378. if len(self) == 1 and len(other) == 1:
  379. step_s = step_o = abs(self._start - other._start)
  380. elif len(self) == 1:
  381. step_s = step_o
  382. elif len(other) == 1:
  383. step_o = step_s
  384. start_r = min(start_s, start_o)
  385. end_r = max(end_s, end_o)
  386. if step_o == step_s:
  387. if ((start_s - start_o) % step_s == 0 and
  388. (start_s - end_o) <= step_s and
  389. (start_o - end_s) <= step_s):
  390. return RangeIndex(start_r, end_r + step_s, step_s)
  391. if ((step_s % 2 == 0) and
  392. (abs(start_s - start_o) <= step_s / 2) and
  393. (abs(end_s - end_o) <= step_s / 2)):
  394. return RangeIndex(start_r, end_r + step_s / 2, step_s / 2)
  395. elif step_o % step_s == 0:
  396. if ((start_o - start_s) % step_s == 0 and
  397. (start_o + step_s >= start_s) and
  398. (end_o - step_s <= end_s)):
  399. return RangeIndex(start_r, end_r + step_s, step_s)
  400. elif step_s % step_o == 0:
  401. if ((start_s - start_o) % step_o == 0 and
  402. (start_s + step_o >= start_o) and
  403. (end_s - step_o <= end_o)):
  404. return RangeIndex(start_r, end_r + step_o, step_o)
  405. return self._int64index.union(other)
  406. @Appender(_index_shared_docs['join'])
  407. def join(self, other, how='left', level=None, return_indexers=False,
  408. sort=False):
  409. if how == 'outer' and self is not other:
  410. # note: could return RangeIndex in more circumstances
  411. return self._int64index.join(other, how, level, return_indexers,
  412. sort)
  413. return super(RangeIndex, self).join(other, how, level, return_indexers,
  414. sort)
  415. def _concat_same_dtype(self, indexes, name):
  416. return _concat._concat_rangeindex_same_dtype(indexes).rename(name)
  417. def __len__(self):
  418. """
  419. return the length of the RangeIndex
  420. """
  421. return max(0, -(-(self._stop - self._start) // self._step))
  422. @property
  423. def size(self):
  424. return len(self)
  425. def __getitem__(self, key):
  426. """
  427. Conserve RangeIndex type for scalar and slice keys.
  428. """
  429. super_getitem = super(RangeIndex, self).__getitem__
  430. if is_scalar(key):
  431. if not lib.is_integer(key):
  432. raise IndexError("only integers, slices (`:`), "
  433. "ellipsis (`...`), numpy.newaxis (`None`) "
  434. "and integer or boolean "
  435. "arrays are valid indices")
  436. n = com.cast_scalar_indexer(key)
  437. if n != key:
  438. return super_getitem(key)
  439. if n < 0:
  440. n = len(self) + key
  441. if n < 0 or n > len(self) - 1:
  442. raise IndexError("index {key} is out of bounds for axis 0 "
  443. "with size {size}".format(key=key,
  444. size=len(self)))
  445. return self._start + n * self._step
  446. if isinstance(key, slice):
  447. # This is basically PySlice_GetIndicesEx, but delegation to our
  448. # super routines if we don't have integers
  449. length = len(self)
  450. # complete missing slice information
  451. step = 1 if key.step is None else key.step
  452. if key.start is None:
  453. start = length - 1 if step < 0 else 0
  454. else:
  455. start = key.start
  456. if start < 0:
  457. start += length
  458. if start < 0:
  459. start = -1 if step < 0 else 0
  460. if start >= length:
  461. start = length - 1 if step < 0 else length
  462. if key.stop is None:
  463. stop = -1 if step < 0 else length
  464. else:
  465. stop = key.stop
  466. if stop < 0:
  467. stop += length
  468. if stop < 0:
  469. stop = -1
  470. if stop > length:
  471. stop = length
  472. # delegate non-integer slices
  473. if (start != int(start) or
  474. stop != int(stop) or
  475. step != int(step)):
  476. return super_getitem(key)
  477. # convert indexes to values
  478. start = self._start + self._step * start
  479. stop = self._start + self._step * stop
  480. step = self._step * step
  481. return RangeIndex._simple_new(start, stop, step, name=self.name)
  482. # fall back to Int64Index
  483. return super_getitem(key)
  484. def __floordiv__(self, other):
  485. if isinstance(other, (ABCSeries, ABCDataFrame)):
  486. return NotImplemented
  487. if is_integer(other) and other != 0:
  488. if (len(self) == 0 or
  489. self._start % other == 0 and
  490. self._step % other == 0):
  491. start = self._start // other
  492. step = self._step // other
  493. stop = start + len(self) * step
  494. return RangeIndex._simple_new(
  495. start, stop, step, name=self.name)
  496. if len(self) == 1:
  497. start = self._start // other
  498. return RangeIndex._simple_new(
  499. start, start + 1, 1, name=self.name)
  500. return self._int64index // other
  501. @classmethod
  502. def _add_numeric_methods_binary(cls):
  503. """ add in numeric methods, specialized to RangeIndex """
  504. def _make_evaluate_binop(op, step=False):
  505. """
  506. Parameters
  507. ----------
  508. op : callable that accepts 2 parms
  509. perform the binary op
  510. step : callable, optional, default to False
  511. op to apply to the step parm if not None
  512. if False, use the existing step
  513. """
  514. def _evaluate_numeric_binop(self, other):
  515. if isinstance(other, (ABCSeries, ABCDataFrame)):
  516. return NotImplemented
  517. elif isinstance(other, ABCTimedeltaIndex):
  518. # Defer to TimedeltaIndex implementation
  519. return NotImplemented
  520. elif isinstance(other, (timedelta, np.timedelta64)):
  521. # GH#19333 is_integer evaluated True on timedelta64,
  522. # so we need to catch these explicitly
  523. return op(self._int64index, other)
  524. elif is_timedelta64_dtype(other):
  525. # Must be an np.ndarray; GH#22390
  526. return op(self._int64index, other)
  527. other = self._validate_for_numeric_binop(other, op)
  528. attrs = self._get_attributes_dict()
  529. attrs = self._maybe_update_attributes(attrs)
  530. left, right = self, other
  531. try:
  532. # apply if we have an override
  533. if step:
  534. with np.errstate(all='ignore'):
  535. rstep = step(left._step, right)
  536. # we don't have a representable op
  537. # so return a base index
  538. if not is_integer(rstep) or not rstep:
  539. raise ValueError
  540. else:
  541. rstep = left._step
  542. with np.errstate(all='ignore'):
  543. rstart = op(left._start, right)
  544. rstop = op(left._stop, right)
  545. result = RangeIndex(rstart,
  546. rstop,
  547. rstep,
  548. **attrs)
  549. # for compat with numpy / Int64Index
  550. # even if we can represent as a RangeIndex, return
  551. # as a Float64Index if we have float-like descriptors
  552. if not all(is_integer(x) for x in
  553. [rstart, rstop, rstep]):
  554. result = result.astype('float64')
  555. return result
  556. except (ValueError, TypeError, ZeroDivisionError):
  557. # Defer to Int64Index implementation
  558. return op(self._int64index, other)
  559. # TODO: Do attrs get handled reliably?
  560. name = '__{name}__'.format(name=op.__name__)
  561. return compat.set_function_name(_evaluate_numeric_binop, name, cls)
  562. cls.__add__ = _make_evaluate_binop(operator.add)
  563. cls.__radd__ = _make_evaluate_binop(ops.radd)
  564. cls.__sub__ = _make_evaluate_binop(operator.sub)
  565. cls.__rsub__ = _make_evaluate_binop(ops.rsub)
  566. cls.__mul__ = _make_evaluate_binop(operator.mul, step=operator.mul)
  567. cls.__rmul__ = _make_evaluate_binop(ops.rmul, step=ops.rmul)
  568. cls.__truediv__ = _make_evaluate_binop(operator.truediv,
  569. step=operator.truediv)
  570. cls.__rtruediv__ = _make_evaluate_binop(ops.rtruediv,
  571. step=ops.rtruediv)
  572. if not compat.PY3:
  573. cls.__div__ = _make_evaluate_binop(operator.div, step=operator.div)
  574. cls.__rdiv__ = _make_evaluate_binop(ops.rdiv, step=ops.rdiv)
  575. RangeIndex._add_numeric_methods()
  576. RangeIndex._add_logical_methods()