numeric.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. import warnings
  2. import numpy as np
  3. from pandas._libs import index as libindex
  4. import pandas.compat as compat
  5. from pandas.util._decorators import Appender, cache_readonly
  6. from pandas.core.dtypes.common import (
  7. is_bool, is_bool_dtype, is_dtype_equal, is_extension_array_dtype, is_float,
  8. is_integer_dtype, is_scalar, needs_i8_conversion, pandas_dtype)
  9. import pandas.core.dtypes.concat as _concat
  10. from pandas.core.dtypes.missing import isna
  11. from pandas.core import algorithms
  12. import pandas.core.common as com
  13. import pandas.core.indexes.base as ibase
  14. from pandas.core.indexes.base import (
  15. Index, InvalidIndexError, _index_shared_docs)
  16. from pandas.core.ops import get_op_result_name
  17. _num_index_shared_docs = dict()
  18. class NumericIndex(Index):
  19. """
  20. Provide numeric type operations
  21. This is an abstract class
  22. """
  23. _is_numeric_dtype = True
  24. def __new__(cls, data=None, dtype=None, copy=False, name=None,
  25. fastpath=None):
  26. if fastpath is not None:
  27. warnings.warn("The 'fastpath' keyword is deprecated, and will be "
  28. "removed in a future version.",
  29. FutureWarning, stacklevel=2)
  30. if fastpath:
  31. return cls._simple_new(data, name=name)
  32. # is_scalar, generators handled in coerce_to_ndarray
  33. data = cls._coerce_to_ndarray(data)
  34. if issubclass(data.dtype.type, compat.string_types):
  35. cls._string_data_error(data)
  36. if copy or not is_dtype_equal(data.dtype, cls._default_dtype):
  37. subarr = np.array(data, dtype=cls._default_dtype, copy=copy)
  38. cls._assert_safe_casting(data, subarr)
  39. else:
  40. subarr = data
  41. if name is None and hasattr(data, 'name'):
  42. name = data.name
  43. return cls._simple_new(subarr, name=name)
  44. @Appender(_index_shared_docs['_maybe_cast_slice_bound'])
  45. def _maybe_cast_slice_bound(self, label, side, kind):
  46. assert kind in ['ix', 'loc', 'getitem', None]
  47. # we will try to coerce to integers
  48. return self._maybe_cast_indexer(label)
  49. @Appender(_index_shared_docs['_shallow_copy'])
  50. def _shallow_copy(self, values=None, **kwargs):
  51. if values is not None and not self._can_hold_na:
  52. # Ensure we are not returning an Int64Index with float data:
  53. return self._shallow_copy_with_infer(values=values, **kwargs)
  54. return (super(NumericIndex, self)._shallow_copy(values=values,
  55. **kwargs))
  56. def _convert_for_op(self, value):
  57. """ Convert value to be insertable to ndarray """
  58. if is_bool(value) or is_bool_dtype(value):
  59. # force conversion to object
  60. # so we don't lose the bools
  61. raise TypeError
  62. return value
  63. def _convert_tolerance(self, tolerance, target):
  64. tolerance = np.asarray(tolerance)
  65. if target.size != tolerance.size and tolerance.size > 1:
  66. raise ValueError('list-like tolerance size must match '
  67. 'target index size')
  68. if not np.issubdtype(tolerance.dtype, np.number):
  69. if tolerance.ndim > 0:
  70. raise ValueError(('tolerance argument for %s must contain '
  71. 'numeric elements if it is list type') %
  72. (type(self).__name__,))
  73. else:
  74. raise ValueError(('tolerance argument for %s must be numeric '
  75. 'if it is a scalar: %r') %
  76. (type(self).__name__, tolerance))
  77. return tolerance
  78. @classmethod
  79. def _assert_safe_casting(cls, data, subarr):
  80. """
  81. Subclasses need to override this only if the process of casting data
  82. from some accepted dtype to the internal dtype(s) bears the risk of
  83. truncation (e.g. float to int).
  84. """
  85. pass
  86. def _concat_same_dtype(self, indexes, name):
  87. return _concat._concat_index_same_dtype(indexes).rename(name)
  88. @property
  89. def is_all_dates(self):
  90. """
  91. Checks that all the labels are datetime objects
  92. """
  93. return False
  94. @Appender(Index.insert.__doc__)
  95. def insert(self, loc, item):
  96. # treat NA values as nans:
  97. if is_scalar(item) and isna(item):
  98. item = self._na_value
  99. return super(NumericIndex, self).insert(loc, item)
  100. _num_index_shared_docs['class_descr'] = """
  101. Immutable ndarray implementing an ordered, sliceable set. The basic object
  102. storing axis labels for all pandas objects. %(klass)s is a special case
  103. of `Index` with purely %(ltype)s labels. %(extra)s
  104. Parameters
  105. ----------
  106. data : array-like (1-dimensional)
  107. dtype : NumPy dtype (default: %(dtype)s)
  108. copy : bool
  109. Make a copy of input ndarray
  110. name : object
  111. Name to be stored in the index
  112. Attributes
  113. ----------
  114. None
  115. Methods
  116. -------
  117. None
  118. See Also
  119. --------
  120. Index : The base pandas Index type.
  121. Notes
  122. -----
  123. An Index instance can **only** contain hashable objects.
  124. """
  125. _int64_descr_args = dict(
  126. klass='Int64Index',
  127. ltype='integer',
  128. dtype='int64',
  129. extra=''
  130. )
  131. class IntegerIndex(NumericIndex):
  132. """
  133. This is an abstract class for Int64Index, UInt64Index.
  134. """
  135. def __contains__(self, key):
  136. """
  137. Check if key is a float and has a decimal. If it has, return False.
  138. """
  139. hash(key)
  140. try:
  141. if is_float(key) and int(key) != key:
  142. return False
  143. return key in self._engine
  144. except (OverflowError, TypeError, ValueError):
  145. return False
  146. class Int64Index(IntegerIndex):
  147. __doc__ = _num_index_shared_docs['class_descr'] % _int64_descr_args
  148. _typ = 'int64index'
  149. _can_hold_na = False
  150. _engine_type = libindex.Int64Engine
  151. _default_dtype = np.int64
  152. @property
  153. def inferred_type(self):
  154. """Always 'integer' for ``Int64Index``"""
  155. return 'integer'
  156. @property
  157. def asi8(self):
  158. # do not cache or you'll create a memory leak
  159. return self.values.view('i8')
  160. @Appender(_index_shared_docs['_convert_scalar_indexer'])
  161. def _convert_scalar_indexer(self, key, kind=None):
  162. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  163. # don't coerce ilocs to integers
  164. if kind != 'iloc':
  165. key = self._maybe_cast_indexer(key)
  166. return (super(Int64Index, self)
  167. ._convert_scalar_indexer(key, kind=kind))
  168. def _wrap_joined_index(self, joined, other):
  169. name = get_op_result_name(self, other)
  170. return Int64Index(joined, name=name)
  171. @classmethod
  172. def _assert_safe_casting(cls, data, subarr):
  173. """
  174. Ensure incoming data can be represented as ints.
  175. """
  176. if not issubclass(data.dtype.type, np.signedinteger):
  177. if not np.array_equal(data, subarr):
  178. raise TypeError('Unsafe NumPy casting, you must '
  179. 'explicitly cast')
  180. Int64Index._add_numeric_methods()
  181. Int64Index._add_logical_methods()
  182. _uint64_descr_args = dict(
  183. klass='UInt64Index',
  184. ltype='unsigned integer',
  185. dtype='uint64',
  186. extra=''
  187. )
  188. class UInt64Index(IntegerIndex):
  189. __doc__ = _num_index_shared_docs['class_descr'] % _uint64_descr_args
  190. _typ = 'uint64index'
  191. _can_hold_na = False
  192. _engine_type = libindex.UInt64Engine
  193. _default_dtype = np.uint64
  194. @property
  195. def inferred_type(self):
  196. """Always 'integer' for ``UInt64Index``"""
  197. return 'integer'
  198. @property
  199. def asi8(self):
  200. # do not cache or you'll create a memory leak
  201. return self.values.view('u8')
  202. @Appender(_index_shared_docs['_convert_scalar_indexer'])
  203. def _convert_scalar_indexer(self, key, kind=None):
  204. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  205. # don't coerce ilocs to integers
  206. if kind != 'iloc':
  207. key = self._maybe_cast_indexer(key)
  208. return (super(UInt64Index, self)
  209. ._convert_scalar_indexer(key, kind=kind))
  210. @Appender(_index_shared_docs['_convert_arr_indexer'])
  211. def _convert_arr_indexer(self, keyarr):
  212. # Cast the indexer to uint64 if possible so
  213. # that the values returned from indexing are
  214. # also uint64.
  215. keyarr = com.asarray_tuplesafe(keyarr)
  216. if is_integer_dtype(keyarr):
  217. return com.asarray_tuplesafe(keyarr, dtype=np.uint64)
  218. return keyarr
  219. @Appender(_index_shared_docs['_convert_index_indexer'])
  220. def _convert_index_indexer(self, keyarr):
  221. # Cast the indexer to uint64 if possible so
  222. # that the values returned from indexing are
  223. # also uint64.
  224. if keyarr.is_integer():
  225. return keyarr.astype(np.uint64)
  226. return keyarr
  227. def _wrap_joined_index(self, joined, other):
  228. name = get_op_result_name(self, other)
  229. return UInt64Index(joined, name=name)
  230. @classmethod
  231. def _assert_safe_casting(cls, data, subarr):
  232. """
  233. Ensure incoming data can be represented as uints.
  234. """
  235. if not issubclass(data.dtype.type, np.unsignedinteger):
  236. if not np.array_equal(data, subarr):
  237. raise TypeError('Unsafe NumPy casting, you must '
  238. 'explicitly cast')
  239. UInt64Index._add_numeric_methods()
  240. UInt64Index._add_logical_methods()
  241. _float64_descr_args = dict(
  242. klass='Float64Index',
  243. dtype='float64',
  244. ltype='float',
  245. extra=''
  246. )
  247. class Float64Index(NumericIndex):
  248. __doc__ = _num_index_shared_docs['class_descr'] % _float64_descr_args
  249. _typ = 'float64index'
  250. _engine_type = libindex.Float64Engine
  251. _default_dtype = np.float64
  252. @property
  253. def inferred_type(self):
  254. """Always 'floating' for ``Float64Index``"""
  255. return 'floating'
  256. @Appender(_index_shared_docs['astype'])
  257. def astype(self, dtype, copy=True):
  258. dtype = pandas_dtype(dtype)
  259. if needs_i8_conversion(dtype):
  260. msg = ('Cannot convert Float64Index to dtype {dtype}; integer '
  261. 'values are required for conversion').format(dtype=dtype)
  262. raise TypeError(msg)
  263. elif (is_integer_dtype(dtype) and
  264. not is_extension_array_dtype(dtype)) and self.hasnans:
  265. # TODO(jreback); this can change once we have an EA Index type
  266. # GH 13149
  267. raise ValueError('Cannot convert NA to integer')
  268. return super(Float64Index, self).astype(dtype, copy=copy)
  269. @Appender(_index_shared_docs['_convert_scalar_indexer'])
  270. def _convert_scalar_indexer(self, key, kind=None):
  271. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  272. if kind == 'iloc':
  273. return self._validate_indexer('positional', key, kind)
  274. return key
  275. @Appender(_index_shared_docs['_convert_slice_indexer'])
  276. def _convert_slice_indexer(self, key, kind=None):
  277. # if we are not a slice, then we are done
  278. if not isinstance(key, slice):
  279. return key
  280. if kind == 'iloc':
  281. return super(Float64Index, self)._convert_slice_indexer(key,
  282. kind=kind)
  283. # translate to locations
  284. return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
  285. def _format_native_types(self, na_rep='', float_format=None, decimal='.',
  286. quoting=None, **kwargs):
  287. from pandas.io.formats.format import FloatArrayFormatter
  288. formatter = FloatArrayFormatter(self.values, na_rep=na_rep,
  289. float_format=float_format,
  290. decimal=decimal, quoting=quoting,
  291. fixed_width=False)
  292. return formatter.get_result_as_array()
  293. def get_value(self, series, key):
  294. """ we always want to get an index value, never a value """
  295. if not is_scalar(key):
  296. raise InvalidIndexError
  297. k = com.values_from_object(key)
  298. loc = self.get_loc(k)
  299. new_values = com.values_from_object(series)[loc]
  300. return new_values
  301. def equals(self, other):
  302. """
  303. Determines if two Index objects contain the same elements.
  304. """
  305. if self is other:
  306. return True
  307. if not isinstance(other, Index):
  308. return False
  309. # need to compare nans locations and make sure that they are the same
  310. # since nans don't compare equal this is a bit tricky
  311. try:
  312. if not isinstance(other, Float64Index):
  313. other = self._constructor(other)
  314. if (not is_dtype_equal(self.dtype, other.dtype) or
  315. self.shape != other.shape):
  316. return False
  317. left, right = self._ndarray_values, other._ndarray_values
  318. return ((left == right) | (self._isnan & other._isnan)).all()
  319. except (TypeError, ValueError):
  320. return False
  321. def __contains__(self, other):
  322. if super(Float64Index, self).__contains__(other):
  323. return True
  324. try:
  325. # if other is a sequence this throws a ValueError
  326. return np.isnan(other) and self.hasnans
  327. except ValueError:
  328. try:
  329. return len(other) <= 1 and ibase._try_get_item(other) in self
  330. except TypeError:
  331. pass
  332. except TypeError:
  333. pass
  334. return False
  335. @Appender(_index_shared_docs['get_loc'])
  336. def get_loc(self, key, method=None, tolerance=None):
  337. try:
  338. if np.all(np.isnan(key)) or is_bool(key):
  339. nan_idxs = self._nan_idxs
  340. try:
  341. return nan_idxs.item()
  342. except (ValueError, IndexError):
  343. # should only need to catch ValueError here but on numpy
  344. # 1.7 .item() can raise IndexError when NaNs are present
  345. if not len(nan_idxs):
  346. raise KeyError(key)
  347. return nan_idxs
  348. except (TypeError, NotImplementedError):
  349. pass
  350. return super(Float64Index, self).get_loc(key, method=method,
  351. tolerance=tolerance)
  352. @cache_readonly
  353. def is_unique(self):
  354. return super(Float64Index, self).is_unique and self._nan_idxs.size < 2
  355. @Appender(Index.isin.__doc__)
  356. def isin(self, values, level=None):
  357. if level is not None:
  358. self._validate_index_level(level)
  359. return algorithms.isin(np.array(self), values)
  360. Float64Index._add_numeric_methods()
  361. Float64Index._add_logical_methods_disabled()