base.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530
  1. """
  2. Base and utility classes for pandas objects.
  3. """
  4. import textwrap
  5. import warnings
  6. import numpy as np
  7. import pandas._libs.lib as lib
  8. import pandas.compat as compat
  9. from pandas.compat import PYPY, OrderedDict, builtins, map, range
  10. from pandas.compat.numpy import function as nv
  11. from pandas.errors import AbstractMethodError
  12. from pandas.util._decorators import Appender, Substitution, cache_readonly
  13. from pandas.util._validators import validate_bool_kwarg
  14. from pandas.core.dtypes.common import (
  15. is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimelike,
  16. is_extension_array_dtype, is_extension_type, is_list_like, is_object_dtype,
  17. is_scalar, is_timedelta64_ns_dtype)
  18. from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
  19. from pandas.core.dtypes.missing import isna
  20. from pandas.core import algorithms, common as com
  21. from pandas.core.accessor import DirNamesMixin
  22. import pandas.core.nanops as nanops
  23. _shared_docs = dict()
  24. _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
  25. unique='IndexOpsMixin', duplicated='IndexOpsMixin')
  26. class StringMixin(object):
  27. """implements string methods so long as object defines a `__unicode__`
  28. method.
  29. Handles Python2/3 compatibility transparently.
  30. """
  31. # side note - this could be made into a metaclass if more than one
  32. # object needs
  33. # ----------------------------------------------------------------------
  34. # Formatting
  35. def __unicode__(self):
  36. raise AbstractMethodError(self)
  37. def __str__(self):
  38. """
  39. Return a string representation for a particular Object
  40. Invoked by str(df) in both py2/py3.
  41. Yields Bytestring in Py2, Unicode String in py3.
  42. """
  43. if compat.PY3:
  44. return self.__unicode__()
  45. return self.__bytes__()
  46. def __bytes__(self):
  47. """
  48. Return a string representation for a particular object.
  49. Invoked by bytes(obj) in py3 only.
  50. Yields a bytestring in both py2/py3.
  51. """
  52. from pandas.core.config import get_option
  53. encoding = get_option("display.encoding")
  54. return self.__unicode__().encode(encoding, 'replace')
  55. def __repr__(self):
  56. """
  57. Return a string representation for a particular object.
  58. Yields Bytestring in Py2, Unicode String in py3.
  59. """
  60. return str(self)
  61. class PandasObject(StringMixin, DirNamesMixin):
  62. """baseclass for various pandas objects"""
  63. @property
  64. def _constructor(self):
  65. """class constructor (for this class it's just `__class__`"""
  66. return self.__class__
  67. def __unicode__(self):
  68. """
  69. Return a string representation for a particular object.
  70. Invoked by unicode(obj) in py2 only. Yields a Unicode String in both
  71. py2/py3.
  72. """
  73. # Should be overwritten by base classes
  74. return object.__repr__(self)
  75. def _reset_cache(self, key=None):
  76. """
  77. Reset cached properties. If ``key`` is passed, only clears that key.
  78. """
  79. if getattr(self, '_cache', None) is None:
  80. return
  81. if key is None:
  82. self._cache.clear()
  83. else:
  84. self._cache.pop(key, None)
  85. def __sizeof__(self):
  86. """
  87. Generates the total memory usage for an object that returns
  88. either a value or Series of values
  89. """
  90. if hasattr(self, 'memory_usage'):
  91. mem = self.memory_usage(deep=True)
  92. if not is_scalar(mem):
  93. mem = mem.sum()
  94. return int(mem)
  95. # no memory_usage attribute, so fall back to
  96. # object's 'sizeof'
  97. return super(PandasObject, self).__sizeof__()
  98. class NoNewAttributesMixin(object):
  99. """Mixin which prevents adding new attributes.
  100. Prevents additional attributes via xxx.attribute = "something" after a
  101. call to `self.__freeze()`. Mainly used to prevent the user from using
  102. wrong attributes on a accessor (`Series.cat/.str/.dt`).
  103. If you really want to add a new attribute at a later time, you need to use
  104. `object.__setattr__(self, key, value)`.
  105. """
  106. def _freeze(self):
  107. """Prevents setting additional attributes"""
  108. object.__setattr__(self, "__frozen", True)
  109. # prevent adding any attribute via s.xxx.new_attribute = ...
  110. def __setattr__(self, key, value):
  111. # _cache is used by a decorator
  112. # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key)
  113. # because
  114. # 1.) getattr is false for attributes that raise errors
  115. # 2.) cls.__dict__ doesn't traverse into base classes
  116. if (getattr(self, "__frozen", False) and not
  117. (key == "_cache" or
  118. key in type(self).__dict__ or
  119. getattr(self, key, None) is not None)):
  120. raise AttributeError("You cannot add any new attribute '{key}'".
  121. format(key=key))
  122. object.__setattr__(self, key, value)
  123. class GroupByError(Exception):
  124. pass
  125. class DataError(GroupByError):
  126. pass
  127. class SpecificationError(GroupByError):
  128. pass
  129. class SelectionMixin(object):
  130. """
  131. mixin implementing the selection & aggregation interface on a group-like
  132. object sub-classes need to define: obj, exclusions
  133. """
  134. _selection = None
  135. _internal_names = ['_cache', '__setstate__']
  136. _internal_names_set = set(_internal_names)
  137. _builtin_table = OrderedDict((
  138. (builtins.sum, np.sum),
  139. (builtins.max, np.max),
  140. (builtins.min, np.min),
  141. ))
  142. _cython_table = OrderedDict((
  143. (builtins.sum, 'sum'),
  144. (builtins.max, 'max'),
  145. (builtins.min, 'min'),
  146. (np.all, 'all'),
  147. (np.any, 'any'),
  148. (np.sum, 'sum'),
  149. (np.nansum, 'sum'),
  150. (np.mean, 'mean'),
  151. (np.nanmean, 'mean'),
  152. (np.prod, 'prod'),
  153. (np.nanprod, 'prod'),
  154. (np.std, 'std'),
  155. (np.nanstd, 'std'),
  156. (np.var, 'var'),
  157. (np.nanvar, 'var'),
  158. (np.median, 'median'),
  159. (np.nanmedian, 'median'),
  160. (np.max, 'max'),
  161. (np.nanmax, 'max'),
  162. (np.min, 'min'),
  163. (np.nanmin, 'min'),
  164. (np.cumprod, 'cumprod'),
  165. (np.nancumprod, 'cumprod'),
  166. (np.cumsum, 'cumsum'),
  167. (np.nancumsum, 'cumsum'),
  168. ))
  169. @property
  170. def _selection_name(self):
  171. """
  172. return a name for myself; this would ideally be called
  173. the 'name' property, but we cannot conflict with the
  174. Series.name property which can be set
  175. """
  176. if self._selection is None:
  177. return None # 'result'
  178. else:
  179. return self._selection
  180. @property
  181. def _selection_list(self):
  182. if not isinstance(self._selection, (list, tuple, ABCSeries,
  183. ABCIndexClass, np.ndarray)):
  184. return [self._selection]
  185. return self._selection
  186. @cache_readonly
  187. def _selected_obj(self):
  188. if self._selection is None or isinstance(self.obj, ABCSeries):
  189. return self.obj
  190. else:
  191. return self.obj[self._selection]
  192. @cache_readonly
  193. def ndim(self):
  194. return self._selected_obj.ndim
  195. @cache_readonly
  196. def _obj_with_exclusions(self):
  197. if self._selection is not None and isinstance(self.obj,
  198. ABCDataFrame):
  199. return self.obj.reindex(columns=self._selection_list)
  200. if len(self.exclusions) > 0:
  201. return self.obj.drop(self.exclusions, axis=1)
  202. else:
  203. return self.obj
  204. def __getitem__(self, key):
  205. if self._selection is not None:
  206. raise IndexError('Column(s) {selection} already selected'
  207. .format(selection=self._selection))
  208. if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass,
  209. np.ndarray)):
  210. if len(self.obj.columns.intersection(key)) != len(key):
  211. bad_keys = list(set(key).difference(self.obj.columns))
  212. raise KeyError("Columns not found: {missing}"
  213. .format(missing=str(bad_keys)[1:-1]))
  214. return self._gotitem(list(key), ndim=2)
  215. elif not getattr(self, 'as_index', False):
  216. if key not in self.obj.columns:
  217. raise KeyError("Column not found: {key}".format(key=key))
  218. return self._gotitem(key, ndim=2)
  219. else:
  220. if key not in self.obj:
  221. raise KeyError("Column not found: {key}".format(key=key))
  222. return self._gotitem(key, ndim=1)
  223. def _gotitem(self, key, ndim, subset=None):
  224. """
  225. sub-classes to define
  226. return a sliced object
  227. Parameters
  228. ----------
  229. key : string / list of selections
  230. ndim : 1,2
  231. requested ndim of result
  232. subset : object, default None
  233. subset to act on
  234. """
  235. raise AbstractMethodError(self)
  236. def aggregate(self, func, *args, **kwargs):
  237. raise AbstractMethodError(self)
  238. agg = aggregate
  239. def _try_aggregate_string_function(self, arg, *args, **kwargs):
  240. """
  241. if arg is a string, then try to operate on it:
  242. - try to find a function (or attribute) on ourselves
  243. - try to find a numpy function
  244. - raise
  245. """
  246. assert isinstance(arg, compat.string_types)
  247. f = getattr(self, arg, None)
  248. if f is not None:
  249. if callable(f):
  250. return f(*args, **kwargs)
  251. # people may try to aggregate on a non-callable attribute
  252. # but don't let them think they can pass args to it
  253. assert len(args) == 0
  254. assert len([kwarg for kwarg in kwargs
  255. if kwarg not in ['axis', '_level']]) == 0
  256. return f
  257. f = getattr(np, arg, None)
  258. if f is not None:
  259. return f(self, *args, **kwargs)
  260. raise ValueError("{arg} is an unknown string function".format(arg=arg))
  261. def _aggregate(self, arg, *args, **kwargs):
  262. """
  263. provide an implementation for the aggregators
  264. Parameters
  265. ----------
  266. arg : string, dict, function
  267. *args : args to pass on to the function
  268. **kwargs : kwargs to pass on to the function
  269. Returns
  270. -------
  271. tuple of result, how
  272. Notes
  273. -----
  274. how can be a string describe the required post-processing, or
  275. None if not required
  276. """
  277. is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
  278. is_nested_renamer = False
  279. _axis = kwargs.pop('_axis', None)
  280. if _axis is None:
  281. _axis = getattr(self, 'axis', 0)
  282. _level = kwargs.pop('_level', None)
  283. if isinstance(arg, compat.string_types):
  284. return self._try_aggregate_string_function(arg, *args,
  285. **kwargs), None
  286. if isinstance(arg, dict):
  287. # aggregate based on the passed dict
  288. if _axis != 0: # pragma: no cover
  289. raise ValueError('Can only pass dict with axis=0')
  290. obj = self._selected_obj
  291. def nested_renaming_depr(level=4):
  292. # deprecation of nested renaming
  293. # GH 15931
  294. warnings.warn(
  295. ("using a dict with renaming "
  296. "is deprecated and will be removed in a future "
  297. "version"),
  298. FutureWarning, stacklevel=level)
  299. # if we have a dict of any non-scalars
  300. # eg. {'A' : ['mean']}, normalize all to
  301. # be list-likes
  302. if any(is_aggregator(x) for x in compat.itervalues(arg)):
  303. new_arg = compat.OrderedDict()
  304. for k, v in compat.iteritems(arg):
  305. if not isinstance(v, (tuple, list, dict)):
  306. new_arg[k] = [v]
  307. else:
  308. new_arg[k] = v
  309. # the keys must be in the columns
  310. # for ndim=2, or renamers for ndim=1
  311. # ok for now, but deprecated
  312. # {'A': { 'ra': 'mean' }}
  313. # {'A': { 'ra': ['mean'] }}
  314. # {'ra': ['mean']}
  315. # not ok
  316. # {'ra' : { 'A' : 'mean' }}
  317. if isinstance(v, dict):
  318. is_nested_renamer = True
  319. if k not in obj.columns:
  320. msg = ('cannot perform renaming for {key} with a '
  321. 'nested dictionary').format(key=k)
  322. raise SpecificationError(msg)
  323. nested_renaming_depr(4 + (_level or 0))
  324. elif isinstance(obj, ABCSeries):
  325. nested_renaming_depr()
  326. elif (isinstance(obj, ABCDataFrame) and
  327. k not in obj.columns):
  328. raise KeyError(
  329. "Column '{col}' does not exist!".format(col=k))
  330. arg = new_arg
  331. else:
  332. # deprecation of renaming keys
  333. # GH 15931
  334. keys = list(compat.iterkeys(arg))
  335. if (isinstance(obj, ABCDataFrame) and
  336. len(obj.columns.intersection(keys)) != len(keys)):
  337. nested_renaming_depr()
  338. from pandas.core.reshape.concat import concat
  339. def _agg_1dim(name, how, subset=None):
  340. """
  341. aggregate a 1-dim with how
  342. """
  343. colg = self._gotitem(name, ndim=1, subset=subset)
  344. if colg.ndim != 1:
  345. raise SpecificationError("nested dictionary is ambiguous "
  346. "in aggregation")
  347. return colg.aggregate(how, _level=(_level or 0) + 1)
  348. def _agg_2dim(name, how):
  349. """
  350. aggregate a 2-dim with how
  351. """
  352. colg = self._gotitem(self._selection, ndim=2,
  353. subset=obj)
  354. return colg.aggregate(how, _level=None)
  355. def _agg(arg, func):
  356. """
  357. run the aggregations over the arg with func
  358. return an OrderedDict
  359. """
  360. result = compat.OrderedDict()
  361. for fname, agg_how in compat.iteritems(arg):
  362. result[fname] = func(fname, agg_how)
  363. return result
  364. # set the final keys
  365. keys = list(compat.iterkeys(arg))
  366. result = compat.OrderedDict()
  367. # nested renamer
  368. if is_nested_renamer:
  369. result = list(_agg(arg, _agg_1dim).values())
  370. if all(isinstance(r, dict) for r in result):
  371. result, results = compat.OrderedDict(), result
  372. for r in results:
  373. result.update(r)
  374. keys = list(compat.iterkeys(result))
  375. else:
  376. if self._selection is not None:
  377. keys = None
  378. # some selection on the object
  379. elif self._selection is not None:
  380. sl = set(self._selection_list)
  381. # we are a Series like object,
  382. # but may have multiple aggregations
  383. if len(sl) == 1:
  384. result = _agg(arg, lambda fname,
  385. agg_how: _agg_1dim(self._selection, agg_how))
  386. # we are selecting the same set as we are aggregating
  387. elif not len(sl - set(keys)):
  388. result = _agg(arg, _agg_1dim)
  389. # we are a DataFrame, with possibly multiple aggregations
  390. else:
  391. result = _agg(arg, _agg_2dim)
  392. # no selection
  393. else:
  394. try:
  395. result = _agg(arg, _agg_1dim)
  396. except SpecificationError:
  397. # we are aggregating expecting all 1d-returns
  398. # but we have 2d
  399. result = _agg(arg, _agg_2dim)
  400. # combine results
  401. def is_any_series():
  402. # return a boolean if we have *any* nested series
  403. return any(isinstance(r, ABCSeries)
  404. for r in compat.itervalues(result))
  405. def is_any_frame():
  406. # return a boolean if we have *any* nested series
  407. return any(isinstance(r, ABCDataFrame)
  408. for r in compat.itervalues(result))
  409. if isinstance(result, list):
  410. return concat(result, keys=keys, axis=1, sort=True), True
  411. elif is_any_frame():
  412. # we have a dict of DataFrames
  413. # return a MI DataFrame
  414. return concat([result[k] for k in keys],
  415. keys=keys, axis=1), True
  416. elif isinstance(self, ABCSeries) and is_any_series():
  417. # we have a dict of Series
  418. # return a MI Series
  419. try:
  420. result = concat(result)
  421. except TypeError:
  422. # we want to give a nice error here if
  423. # we have non-same sized objects, so
  424. # we don't automatically broadcast
  425. raise ValueError("cannot perform both aggregation "
  426. "and transformation operations "
  427. "simultaneously")
  428. return result, True
  429. # fall thru
  430. from pandas import DataFrame, Series
  431. try:
  432. result = DataFrame(result)
  433. except ValueError:
  434. # we have a dict of scalars
  435. result = Series(result,
  436. name=getattr(self, 'name', None))
  437. return result, True
  438. elif is_list_like(arg) and arg not in compat.string_types:
  439. # we require a list, but not an 'str'
  440. return self._aggregate_multiple_funcs(arg,
  441. _level=_level,
  442. _axis=_axis), None
  443. else:
  444. result = None
  445. f = self._is_cython_func(arg)
  446. if f and not args and not kwargs:
  447. return getattr(self, f)(), None
  448. # caller can react
  449. return result, True
  450. def _aggregate_multiple_funcs(self, arg, _level, _axis):
  451. from pandas.core.reshape.concat import concat
  452. if _axis != 0:
  453. raise NotImplementedError("axis other than 0 is not supported")
  454. if self._selected_obj.ndim == 1:
  455. obj = self._selected_obj
  456. else:
  457. obj = self._obj_with_exclusions
  458. results = []
  459. keys = []
  460. # degenerate case
  461. if obj.ndim == 1:
  462. for a in arg:
  463. try:
  464. colg = self._gotitem(obj.name, ndim=1, subset=obj)
  465. results.append(colg.aggregate(a))
  466. # make sure we find a good name
  467. name = com.get_callable_name(a) or a
  468. keys.append(name)
  469. except (TypeError, DataError):
  470. pass
  471. except SpecificationError:
  472. raise
  473. # multiples
  474. else:
  475. for index, col in enumerate(obj):
  476. try:
  477. colg = self._gotitem(col, ndim=1,
  478. subset=obj.iloc[:, index])
  479. results.append(colg.aggregate(arg))
  480. keys.append(col)
  481. except (TypeError, DataError):
  482. pass
  483. except ValueError:
  484. # cannot aggregate
  485. continue
  486. except SpecificationError:
  487. raise
  488. # if we are empty
  489. if not len(results):
  490. raise ValueError("no results")
  491. try:
  492. return concat(results, keys=keys, axis=1, sort=False)
  493. except TypeError:
  494. # we are concatting non-NDFrame objects,
  495. # e.g. a list of scalars
  496. from pandas.core.dtypes.cast import is_nested_object
  497. from pandas import Series
  498. result = Series(results, index=keys, name=self.name)
  499. if is_nested_object(result):
  500. raise ValueError("cannot combine transform and "
  501. "aggregation operations")
  502. return result
  503. def _shallow_copy(self, obj=None, obj_type=None, **kwargs):
  504. """
  505. return a new object with the replacement attributes
  506. """
  507. if obj is None:
  508. obj = self._selected_obj.copy()
  509. if obj_type is None:
  510. obj_type = self._constructor
  511. if isinstance(obj, obj_type):
  512. obj = obj.obj
  513. for attr in self._attributes:
  514. if attr not in kwargs:
  515. kwargs[attr] = getattr(self, attr)
  516. return obj_type(obj, **kwargs)
  517. def _is_cython_func(self, arg):
  518. """
  519. if we define an internal function for this argument, return it
  520. """
  521. return self._cython_table.get(arg)
  522. def _is_builtin_func(self, arg):
  523. """
  524. if we define an builtin function for this argument, return it,
  525. otherwise return the arg
  526. """
  527. return self._builtin_table.get(arg, arg)
  528. class IndexOpsMixin(object):
  529. """ common ops mixin to support a unified interface / docs for Series /
  530. Index
  531. """
  532. # ndarray compatibility
  533. __array_priority__ = 1000
  534. def transpose(self, *args, **kwargs):
  535. """
  536. Return the transpose, which is by definition self.
  537. """
  538. nv.validate_transpose(args, kwargs)
  539. return self
  540. T = property(transpose, doc="Return the transpose, which is by "
  541. "definition self.")
  542. @property
  543. def _is_homogeneous_type(self):
  544. """
  545. Whether the object has a single dtype.
  546. By definition, Series and Index are always considered homogeneous.
  547. A MultiIndex may or may not be homogeneous, depending on the
  548. dtypes of the levels.
  549. See Also
  550. --------
  551. DataFrame._is_homogeneous_type
  552. MultiIndex._is_homogeneous_type
  553. """
  554. return True
  555. @property
  556. def shape(self):
  557. """
  558. Return a tuple of the shape of the underlying data.
  559. """
  560. return self._values.shape
  561. @property
  562. def ndim(self):
  563. """
  564. Number of dimensions of the underlying data, by definition 1.
  565. """
  566. return 1
  567. def item(self):
  568. """
  569. Return the first element of the underlying data as a python scalar.
  570. """
  571. try:
  572. return self.values.item()
  573. except IndexError:
  574. # copy numpy's message here because Py26 raises an IndexError
  575. raise ValueError('can only convert an array of size 1 to a '
  576. 'Python scalar')
  577. @property
  578. def data(self):
  579. """
  580. Return the data pointer of the underlying data.
  581. """
  582. warnings.warn("{obj}.data is deprecated and will be removed "
  583. "in a future version".format(obj=type(self).__name__),
  584. FutureWarning, stacklevel=2)
  585. return self.values.data
  586. @property
  587. def itemsize(self):
  588. """
  589. Return the size of the dtype of the item of the underlying data.
  590. """
  591. warnings.warn("{obj}.itemsize is deprecated and will be removed "
  592. "in a future version".format(obj=type(self).__name__),
  593. FutureWarning, stacklevel=2)
  594. return self._ndarray_values.itemsize
  595. @property
  596. def nbytes(self):
  597. """
  598. Return the number of bytes in the underlying data.
  599. """
  600. return self._values.nbytes
  601. @property
  602. def strides(self):
  603. """
  604. Return the strides of the underlying data.
  605. """
  606. warnings.warn("{obj}.strides is deprecated and will be removed "
  607. "in a future version".format(obj=type(self).__name__),
  608. FutureWarning, stacklevel=2)
  609. return self._ndarray_values.strides
  610. @property
  611. def size(self):
  612. """
  613. Return the number of elements in the underlying data.
  614. """
  615. return len(self._values)
  616. @property
  617. def flags(self):
  618. """
  619. Return the ndarray.flags for the underlying data.
  620. """
  621. warnings.warn("{obj}.flags is deprecated and will be removed "
  622. "in a future version".format(obj=type(self).__name__),
  623. FutureWarning, stacklevel=2)
  624. return self.values.flags
  625. @property
  626. def base(self):
  627. """
  628. Return the base object if the memory of the underlying data is shared.
  629. """
  630. warnings.warn("{obj}.base is deprecated and will be removed "
  631. "in a future version".format(obj=type(self).__name__),
  632. FutureWarning, stacklevel=2)
  633. return self.values.base
  634. @property
  635. def array(self):
  636. # type: () -> ExtensionArray
  637. """
  638. The ExtensionArray of the data backing this Series or Index.
  639. .. versionadded:: 0.24.0
  640. Returns
  641. -------
  642. array : ExtensionArray
  643. An ExtensionArray of the values stored within. For extension
  644. types, this is the actual array. For NumPy native types, this
  645. is a thin (no copy) wrapper around :class:`numpy.ndarray`.
  646. ``.array`` differs ``.values`` which may require converting the
  647. data to a different form.
  648. See Also
  649. --------
  650. Index.to_numpy : Similar method that always returns a NumPy array.
  651. Series.to_numpy : Similar method that always returns a NumPy array.
  652. Notes
  653. -----
  654. This table lays out the different array types for each extension
  655. dtype within pandas.
  656. ================== =============================
  657. dtype array type
  658. ================== =============================
  659. category Categorical
  660. period PeriodArray
  661. interval IntervalArray
  662. IntegerNA IntegerArray
  663. datetime64[ns, tz] DatetimeArray
  664. ================== =============================
  665. For any 3rd-party extension types, the array type will be an
  666. ExtensionArray.
  667. For all remaining dtypes ``.array`` will be a
  668. :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray
  669. stored within. If you absolutely need a NumPy array (possibly with
  670. copying / coercing data), then use :meth:`Series.to_numpy` instead.
  671. Examples
  672. --------
  673. For regular NumPy types like int, and float, a PandasArray
  674. is returned.
  675. >>> pd.Series([1, 2, 3]).array
  676. <PandasArray>
  677. [1, 2, 3]
  678. Length: 3, dtype: int64
  679. For extension types, like Categorical, the actual ExtensionArray
  680. is returned
  681. >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
  682. >>> ser.array
  683. [a, b, a]
  684. Categories (2, object): [a, b]
  685. """
  686. result = self._values
  687. if is_datetime64_ns_dtype(result.dtype):
  688. from pandas.arrays import DatetimeArray
  689. result = DatetimeArray(result)
  690. elif is_timedelta64_ns_dtype(result.dtype):
  691. from pandas.arrays import TimedeltaArray
  692. result = TimedeltaArray(result)
  693. elif not is_extension_array_dtype(result.dtype):
  694. from pandas.core.arrays.numpy_ import PandasArray
  695. result = PandasArray(result)
  696. return result
  697. def to_numpy(self, dtype=None, copy=False):
  698. """
  699. A NumPy ndarray representing the values in this Series or Index.
  700. .. versionadded:: 0.24.0
  701. Parameters
  702. ----------
  703. dtype : str or numpy.dtype, optional
  704. The dtype to pass to :meth:`numpy.asarray`
  705. copy : bool, default False
  706. Whether to ensure that the returned value is a not a view on
  707. another array. Note that ``copy=False`` does not *ensure* that
  708. ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
  709. a copy is made, even if not strictly necessary.
  710. Returns
  711. -------
  712. numpy.ndarray
  713. See Also
  714. --------
  715. Series.array : Get the actual data stored within.
  716. Index.array : Get the actual data stored within.
  717. DataFrame.to_numpy : Similar method for DataFrame.
  718. Notes
  719. -----
  720. The returned array will be the same up to equality (values equal
  721. in `self` will be equal in the returned array; likewise for values
  722. that are not equal). When `self` contains an ExtensionArray, the
  723. dtype may be different. For example, for a category-dtype Series,
  724. ``to_numpy()`` will return a NumPy array and the categorical dtype
  725. will be lost.
  726. For NumPy dtypes, this will be a reference to the actual data stored
  727. in this Series or Index (assuming ``copy=False``). Modifying the result
  728. in place will modify the data stored in the Series or Index (not that
  729. we recommend doing that).
  730. For extension types, ``to_numpy()`` *may* require copying data and
  731. coercing the result to a NumPy type (possibly object), which may be
  732. expensive. When you need a no-copy reference to the underlying data,
  733. :attr:`Series.array` should be used instead.
  734. This table lays out the different dtypes and default return types of
  735. ``to_numpy()`` for various dtypes within pandas.
  736. ================== ================================
  737. dtype array type
  738. ================== ================================
  739. category[T] ndarray[T] (same dtype as input)
  740. period ndarray[object] (Periods)
  741. interval ndarray[object] (Intervals)
  742. IntegerNA ndarray[object]
  743. datetime64[ns] datetime64[ns]
  744. datetime64[ns, tz] ndarray[object] (Timestamps)
  745. ================== ================================
  746. Examples
  747. --------
  748. >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
  749. >>> ser.to_numpy()
  750. array(['a', 'b', 'a'], dtype=object)
  751. Specify the `dtype` to control how datetime-aware data is represented.
  752. Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp`
  753. objects, each with the correct ``tz``.
  754. >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
  755. >>> ser.to_numpy(dtype=object)
  756. array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
  757. Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
  758. dtype=object)
  759. Or ``dtype='datetime64[ns]'`` to return an ndarray of native
  760. datetime64 values. The values are converted to UTC and the timezone
  761. info is dropped.
  762. >>> ser.to_numpy(dtype="datetime64[ns]")
  763. ... # doctest: +ELLIPSIS
  764. array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],
  765. dtype='datetime64[ns]')
  766. """
  767. if is_datetime64tz_dtype(self.dtype) and dtype is None:
  768. # note: this is going to change very soon.
  769. # I have a WIP PR making this unnecessary, but it's
  770. # a bit out of scope for the DatetimeArray PR.
  771. dtype = "object"
  772. result = np.asarray(self._values, dtype=dtype)
  773. # TODO(GH-24345): Avoid potential double copy
  774. if copy:
  775. result = result.copy()
  776. return result
  777. @property
  778. def _ndarray_values(self):
  779. # type: () -> np.ndarray
  780. """
  781. The data as an ndarray, possibly losing information.
  782. The expectation is that this is cheap to compute, and is primarily
  783. used for interacting with our indexers.
  784. - categorical -> codes
  785. """
  786. if is_extension_array_dtype(self):
  787. return self.array._ndarray_values
  788. return self.values
  789. @property
  790. def empty(self):
  791. return not self.size
  792. def max(self, axis=None, skipna=True):
  793. """
  794. Return the maximum value of the Index.
  795. Parameters
  796. ----------
  797. axis : int, optional
  798. For compatibility with NumPy. Only 0 or None are allowed.
  799. skipna : bool, default True
  800. Returns
  801. -------
  802. scalar
  803. Maximum value.
  804. See Also
  805. --------
  806. Index.min : Return the minimum value in an Index.
  807. Series.max : Return the maximum value in a Series.
  808. DataFrame.max : Return the maximum values in a DataFrame.
  809. Examples
  810. --------
  811. >>> idx = pd.Index([3, 2, 1])
  812. >>> idx.max()
  813. 3
  814. >>> idx = pd.Index(['c', 'b', 'a'])
  815. >>> idx.max()
  816. 'c'
  817. For a MultiIndex, the maximum is determined lexicographically.
  818. >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)])
  819. >>> idx.max()
  820. ('b', 2)
  821. """
  822. nv.validate_minmax_axis(axis)
  823. return nanops.nanmax(self._values, skipna=skipna)
  824. def argmax(self, axis=None, skipna=True):
  825. """
  826. Return a ndarray of the maximum argument indexer.
  827. Parameters
  828. ----------
  829. axis : {None}
  830. Dummy argument for consistency with Series
  831. skipna : bool, default True
  832. See Also
  833. --------
  834. numpy.ndarray.argmax
  835. """
  836. nv.validate_minmax_axis(axis)
  837. return nanops.nanargmax(self._values, skipna=skipna)
  838. def min(self, axis=None, skipna=True):
  839. """
  840. Return the minimum value of the Index.
  841. Parameters
  842. ----------
  843. axis : {None}
  844. Dummy argument for consistency with Series
  845. skipna : bool, default True
  846. Returns
  847. -------
  848. scalar
  849. Minimum value.
  850. See Also
  851. --------
  852. Index.max : Return the maximum value of the object.
  853. Series.min : Return the minimum value in a Series.
  854. DataFrame.min : Return the minimum values in a DataFrame.
  855. Examples
  856. --------
  857. >>> idx = pd.Index([3, 2, 1])
  858. >>> idx.min()
  859. 1
  860. >>> idx = pd.Index(['c', 'b', 'a'])
  861. >>> idx.min()
  862. 'a'
  863. For a MultiIndex, the minimum is determined lexicographically.
  864. >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)])
  865. >>> idx.min()
  866. ('a', 1)
  867. """
  868. nv.validate_minmax_axis(axis)
  869. return nanops.nanmin(self._values, skipna=skipna)
  870. def argmin(self, axis=None, skipna=True):
  871. """
  872. Return a ndarray of the minimum argument indexer.
  873. Parameters
  874. ----------
  875. axis : {None}
  876. Dummy argument for consistency with Series
  877. skipna : bool, default True
  878. See Also
  879. --------
  880. numpy.ndarray.argmin
  881. """
  882. nv.validate_minmax_axis(axis)
  883. return nanops.nanargmin(self._values, skipna=skipna)
  884. def tolist(self):
  885. """
  886. Return a list of the values.
  887. These are each a scalar type, which is a Python scalar
  888. (for str, int, float) or a pandas scalar
  889. (for Timestamp/Timedelta/Interval/Period)
  890. See Also
  891. --------
  892. numpy.ndarray.tolist
  893. """
  894. if is_datetimelike(self._values):
  895. return [com.maybe_box_datetimelike(x) for x in self._values]
  896. elif is_extension_array_dtype(self._values):
  897. return list(self._values)
  898. else:
  899. return self._values.tolist()
  900. to_list = tolist
  901. def __iter__(self):
  902. """
  903. Return an iterator of the values.
  904. These are each a scalar type, which is a Python scalar
  905. (for str, int, float) or a pandas scalar
  906. (for Timestamp/Timedelta/Interval/Period)
  907. """
  908. # We are explicity making element iterators.
  909. if is_datetimelike(self._values):
  910. return map(com.maybe_box_datetimelike, self._values)
  911. elif is_extension_array_dtype(self._values):
  912. return iter(self._values)
  913. else:
  914. return map(self._values.item, range(self._values.size))
  915. @cache_readonly
  916. def hasnans(self):
  917. """
  918. Return if I have any nans; enables various perf speedups.
  919. """
  920. return bool(isna(self).any())
  921. def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
  922. filter_type=None, **kwds):
  923. """ perform the reduction type operation if we can """
  924. func = getattr(self, name, None)
  925. if func is None:
  926. raise TypeError("{klass} cannot perform the operation {op}".format(
  927. klass=self.__class__.__name__, op=name))
  928. return func(skipna=skipna, **kwds)
  929. def _map_values(self, mapper, na_action=None):
  930. """
  931. An internal function that maps values using the input
  932. correspondence (which can be a dict, Series, or function).
  933. Parameters
  934. ----------
  935. mapper : function, dict, or Series
  936. The input correspondence object
  937. na_action : {None, 'ignore'}
  938. If 'ignore', propagate NA values, without passing them to the
  939. mapping function
  940. Returns
  941. -------
  942. applied : Union[Index, MultiIndex], inferred
  943. The output of the mapping function applied to the index.
  944. If the function returns a tuple with more than one element
  945. a MultiIndex will be returned.
  946. """
  947. # we can fastpath dict/Series to an efficient map
  948. # as we know that we are not going to have to yield
  949. # python types
  950. if isinstance(mapper, dict):
  951. if hasattr(mapper, '__missing__'):
  952. # If a dictionary subclass defines a default value method,
  953. # convert mapper to a lookup function (GH #15999).
  954. dict_with_default = mapper
  955. mapper = lambda x: dict_with_default[x]
  956. else:
  957. # Dictionary does not have a default. Thus it's safe to
  958. # convert to an Series for efficiency.
  959. # we specify the keys here to handle the
  960. # possibility that they are tuples
  961. from pandas import Series
  962. mapper = Series(mapper)
  963. if isinstance(mapper, ABCSeries):
  964. # Since values were input this means we came from either
  965. # a dict or a series and mapper should be an index
  966. if is_extension_type(self.dtype):
  967. values = self._values
  968. else:
  969. values = self.values
  970. indexer = mapper.index.get_indexer(values)
  971. new_values = algorithms.take_1d(mapper._values, indexer)
  972. return new_values
  973. # we must convert to python types
  974. if is_extension_type(self.dtype):
  975. values = self._values
  976. if na_action is not None:
  977. raise NotImplementedError
  978. map_f = lambda values, f: values.map(f)
  979. else:
  980. values = self.astype(object)
  981. values = getattr(values, 'values', values)
  982. if na_action == 'ignore':
  983. def map_f(values, f):
  984. return lib.map_infer_mask(values, f,
  985. isna(values).view(np.uint8))
  986. else:
  987. map_f = lib.map_infer
  988. # mapper is a function
  989. new_values = map_f(values, mapper)
  990. return new_values
  991. def value_counts(self, normalize=False, sort=True, ascending=False,
  992. bins=None, dropna=True):
  993. """
  994. Return a Series containing counts of unique values.
  995. The resulting object will be in descending order so that the
  996. first element is the most frequently-occurring element.
  997. Excludes NA values by default.
  998. Parameters
  999. ----------
  1000. normalize : boolean, default False
  1001. If True then the object returned will contain the relative
  1002. frequencies of the unique values.
  1003. sort : boolean, default True
  1004. Sort by values.
  1005. ascending : boolean, default False
  1006. Sort in ascending order.
  1007. bins : integer, optional
  1008. Rather than count values, group them into half-open bins,
  1009. a convenience for ``pd.cut``, only works with numeric data.
  1010. dropna : boolean, default True
  1011. Don't include counts of NaN.
  1012. Returns
  1013. -------
  1014. counts : Series
  1015. See Also
  1016. --------
  1017. Series.count: Number of non-NA elements in a Series.
  1018. DataFrame.count: Number of non-NA elements in a DataFrame.
  1019. Examples
  1020. --------
  1021. >>> index = pd.Index([3, 1, 2, 3, 4, np.nan])
  1022. >>> index.value_counts()
  1023. 3.0 2
  1024. 4.0 1
  1025. 2.0 1
  1026. 1.0 1
  1027. dtype: int64
  1028. With `normalize` set to `True`, returns the relative frequency by
  1029. dividing all values by the sum of values.
  1030. >>> s = pd.Series([3, 1, 2, 3, 4, np.nan])
  1031. >>> s.value_counts(normalize=True)
  1032. 3.0 0.4
  1033. 4.0 0.2
  1034. 2.0 0.2
  1035. 1.0 0.2
  1036. dtype: float64
  1037. **bins**
  1038. Bins can be useful for going from a continuous variable to a
  1039. categorical variable; instead of counting unique
  1040. apparitions of values, divide the index in the specified
  1041. number of half-open bins.
  1042. >>> s.value_counts(bins=3)
  1043. (2.0, 3.0] 2
  1044. (0.996, 2.0] 2
  1045. (3.0, 4.0] 1
  1046. dtype: int64
  1047. **dropna**
  1048. With `dropna` set to `False` we can also see NaN index values.
  1049. >>> s.value_counts(dropna=False)
  1050. 3.0 2
  1051. NaN 1
  1052. 4.0 1
  1053. 2.0 1
  1054. 1.0 1
  1055. dtype: int64
  1056. """
  1057. from pandas.core.algorithms import value_counts
  1058. result = value_counts(self, sort=sort, ascending=ascending,
  1059. normalize=normalize, bins=bins, dropna=dropna)
  1060. return result
  1061. def unique(self):
  1062. values = self._values
  1063. if hasattr(values, 'unique'):
  1064. result = values.unique()
  1065. else:
  1066. from pandas.core.algorithms import unique1d
  1067. result = unique1d(values)
  1068. return result
  1069. def nunique(self, dropna=True):
  1070. """
  1071. Return number of unique elements in the object.
  1072. Excludes NA values by default.
  1073. Parameters
  1074. ----------
  1075. dropna : boolean, default True
  1076. Don't include NaN in the count.
  1077. Returns
  1078. -------
  1079. nunique : int
  1080. """
  1081. uniqs = self.unique()
  1082. n = len(uniqs)
  1083. if dropna and isna(uniqs).any():
  1084. n -= 1
  1085. return n
  1086. @property
  1087. def is_unique(self):
  1088. """
  1089. Return boolean if values in the object are unique.
  1090. Returns
  1091. -------
  1092. is_unique : boolean
  1093. """
  1094. return self.nunique(dropna=False) == len(self)
  1095. @property
  1096. def is_monotonic(self):
  1097. """
  1098. Return boolean if values in the object are
  1099. monotonic_increasing.
  1100. .. versionadded:: 0.19.0
  1101. Returns
  1102. -------
  1103. is_monotonic : boolean
  1104. """
  1105. from pandas import Index
  1106. return Index(self).is_monotonic
  1107. is_monotonic_increasing = is_monotonic
  1108. @property
  1109. def is_monotonic_decreasing(self):
  1110. """
  1111. Return boolean if values in the object are
  1112. monotonic_decreasing.
  1113. .. versionadded:: 0.19.0
  1114. Returns
  1115. -------
  1116. is_monotonic_decreasing : boolean
  1117. """
  1118. from pandas import Index
  1119. return Index(self).is_monotonic_decreasing
  1120. def memory_usage(self, deep=False):
  1121. """
  1122. Memory usage of the values
  1123. Parameters
  1124. ----------
  1125. deep : bool
  1126. Introspect the data deeply, interrogate
  1127. `object` dtypes for system-level memory consumption
  1128. Returns
  1129. -------
  1130. bytes used
  1131. See Also
  1132. --------
  1133. numpy.ndarray.nbytes
  1134. Notes
  1135. -----
  1136. Memory usage does not include memory consumed by elements that
  1137. are not components of the array if deep=False or if used on PyPy
  1138. """
  1139. if hasattr(self.array, 'memory_usage'):
  1140. return self.array.memory_usage(deep=deep)
  1141. v = self.array.nbytes
  1142. if deep and is_object_dtype(self) and not PYPY:
  1143. v += lib.memory_usage_of_objects(self.array)
  1144. return v
  1145. @Substitution(
  1146. values='', order='', size_hint='',
  1147. sort=textwrap.dedent("""\
  1148. sort : boolean, default False
  1149. Sort `uniques` and shuffle `labels` to maintain the
  1150. relationship.
  1151. """))
  1152. @Appender(algorithms._shared_docs['factorize'])
  1153. def factorize(self, sort=False, na_sentinel=-1):
  1154. return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel)
  1155. _shared_docs['searchsorted'] = (
  1156. """
  1157. Find indices where elements should be inserted to maintain order.
  1158. Find the indices into a sorted %(klass)s `self` such that, if the
  1159. corresponding elements in `value` were inserted before the indices,
  1160. the order of `self` would be preserved.
  1161. Parameters
  1162. ----------
  1163. value : array_like
  1164. Values to insert into `self`.
  1165. side : {'left', 'right'}, optional
  1166. If 'left', the index of the first suitable location found is given.
  1167. If 'right', return the last such index. If there is no suitable
  1168. index, return either 0 or N (where N is the length of `self`).
  1169. sorter : 1-D array_like, optional
  1170. Optional array of integer indices that sort `self` into ascending
  1171. order. They are typically the result of ``np.argsort``.
  1172. Returns
  1173. -------
  1174. int or array of int
  1175. A scalar or array of insertion points with the
  1176. same shape as `value`.
  1177. .. versionchanged :: 0.24.0
  1178. If `value` is a scalar, an int is now always returned.
  1179. Previously, scalar inputs returned an 1-item array for
  1180. :class:`Series` and :class:`Categorical`.
  1181. See Also
  1182. --------
  1183. numpy.searchsorted
  1184. Notes
  1185. -----
  1186. Binary search is used to find the required insertion points.
  1187. Examples
  1188. --------
  1189. >>> x = pd.Series([1, 2, 3])
  1190. >>> x
  1191. 0 1
  1192. 1 2
  1193. 2 3
  1194. dtype: int64
  1195. >>> x.searchsorted(4)
  1196. 3
  1197. >>> x.searchsorted([0, 4])
  1198. array([0, 3])
  1199. >>> x.searchsorted([1, 3], side='left')
  1200. array([0, 2])
  1201. >>> x.searchsorted([1, 3], side='right')
  1202. array([1, 3])
  1203. >>> x = pd.Categorical(['apple', 'bread', 'bread',
  1204. 'cheese', 'milk'], ordered=True)
  1205. [apple, bread, bread, cheese, milk]
  1206. Categories (4, object): [apple < bread < cheese < milk]
  1207. >>> x.searchsorted('bread')
  1208. 1
  1209. >>> x.searchsorted(['bread'], side='right')
  1210. array([3])
  1211. """)
  1212. @Substitution(klass='IndexOpsMixin')
  1213. @Appender(_shared_docs['searchsorted'])
  1214. def searchsorted(self, value, side='left', sorter=None):
  1215. # needs coercion on the key (DatetimeIndex does already)
  1216. return self._values.searchsorted(value, side=side, sorter=sorter)
  1217. def drop_duplicates(self, keep='first', inplace=False):
  1218. inplace = validate_bool_kwarg(inplace, 'inplace')
  1219. if isinstance(self, ABCIndexClass):
  1220. if self.is_unique:
  1221. return self._shallow_copy()
  1222. duplicated = self.duplicated(keep=keep)
  1223. result = self[np.logical_not(duplicated)]
  1224. if inplace:
  1225. return self._update_inplace(result)
  1226. else:
  1227. return result
  1228. def duplicated(self, keep='first'):
  1229. from pandas.core.algorithms import duplicated
  1230. if isinstance(self, ABCIndexClass):
  1231. if self.is_unique:
  1232. return np.zeros(len(self), dtype=np.bool)
  1233. return duplicated(self, keep=keep)
  1234. else:
  1235. return self._constructor(duplicated(self, keep=keep),
  1236. index=self.index).__finalize__(self)
  1237. # ----------------------------------------------------------------------
  1238. # abstracts
  1239. def _update_inplace(self, result, **kwargs):
  1240. raise AbstractMethodError(self)