indexing.py 90 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766
  1. # pylint: disable=W0223
  2. import textwrap
  3. import warnings
  4. import numpy as np
  5. from pandas._libs.indexing import _NDFrameIndexerBase
  6. import pandas.compat as compat
  7. from pandas.compat import range, zip
  8. from pandas.errors import AbstractMethodError
  9. from pandas.util._decorators import Appender
  10. from pandas.core.dtypes.common import (
  11. ensure_platform_int, is_float, is_integer, is_integer_dtype, is_iterator,
  12. is_list_like, is_scalar, is_sequence, is_sparse)
  13. from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries
  14. from pandas.core.dtypes.missing import _infer_fill_value, isna
  15. import pandas.core.common as com
  16. from pandas.core.index import Index, MultiIndex
  17. # the supported indexers
  18. def get_indexers_list():
  19. return [
  20. ('ix', _IXIndexer),
  21. ('iloc', _iLocIndexer),
  22. ('loc', _LocIndexer),
  23. ('at', _AtIndexer),
  24. ('iat', _iAtIndexer),
  25. ]
  26. # "null slice"
  27. _NS = slice(None, None)
  28. # the public IndexSlicerMaker
  29. class _IndexSlice(object):
  30. """
  31. Create an object to more easily perform multi-index slicing
  32. See Also
  33. --------
  34. MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.
  35. Notes
  36. -----
  37. See :ref:`Defined Levels <advanced.shown_levels>`
  38. for further info on slicing a MultiIndex.
  39. Examples
  40. --------
  41. >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])
  42. >>> columns = ['foo', 'bar']
  43. >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),
  44. index=midx, columns=columns)
  45. Using the default slice command:
  46. >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]
  47. foo bar
  48. A0 B0 0 1
  49. B1 2 3
  50. A1 B0 8 9
  51. B1 10 11
  52. Using the IndexSlice class for a more intuitive command:
  53. >>> idx = pd.IndexSlice
  54. >>> dfmi.loc[idx[:, 'B0':'B1'], :]
  55. foo bar
  56. A0 B0 0 1
  57. B1 2 3
  58. A1 B0 8 9
  59. B1 10 11
  60. """
  61. def __getitem__(self, arg):
  62. return arg
  63. IndexSlice = _IndexSlice()
  64. class IndexingError(Exception):
  65. pass
  66. class _NDFrameIndexer(_NDFrameIndexerBase):
  67. _valid_types = None
  68. _exception = KeyError
  69. axis = None
  70. def __call__(self, axis=None):
  71. # we need to return a copy of ourselves
  72. new_self = self.__class__(self.name, self.obj)
  73. if axis is not None:
  74. axis = self.obj._get_axis_number(axis)
  75. new_self.axis = axis
  76. return new_self
  77. def __iter__(self):
  78. raise NotImplementedError('ix is not iterable')
  79. def __getitem__(self, key):
  80. if type(key) is tuple:
  81. key = tuple(com.apply_if_callable(x, self.obj)
  82. for x in key)
  83. try:
  84. values = self.obj._get_value(*key)
  85. if is_scalar(values):
  86. return values
  87. except Exception:
  88. pass
  89. return self._getitem_tuple(key)
  90. else:
  91. # we by definition only have the 0th axis
  92. axis = self.axis or 0
  93. key = com.apply_if_callable(key, self.obj)
  94. return self._getitem_axis(key, axis=axis)
  95. def _get_label(self, label, axis=None):
  96. if axis is None:
  97. axis = self.axis or 0
  98. if self.ndim == 1:
  99. # for perf reasons we want to try _xs first
  100. # as its basically direct indexing
  101. # but will fail when the index is not present
  102. # see GH5667
  103. return self.obj._xs(label, axis=axis)
  104. elif isinstance(label, tuple) and isinstance(label[axis], slice):
  105. raise IndexingError('no slices here, handle elsewhere')
  106. return self.obj._xs(label, axis=axis)
  107. def _get_loc(self, key, axis=None):
  108. if axis is None:
  109. axis = self.axis
  110. return self.obj._ixs(key, axis=axis)
  111. def _slice(self, obj, axis=None, kind=None):
  112. if axis is None:
  113. axis = self.axis
  114. return self.obj._slice(obj, axis=axis, kind=kind)
  115. def _get_setitem_indexer(self, key):
  116. if self.axis is not None:
  117. return self._convert_tuple(key, is_setter=True)
  118. axis = self.obj._get_axis(0)
  119. if isinstance(axis, MultiIndex) and self.name != 'iloc':
  120. try:
  121. return axis.get_loc(key)
  122. except Exception:
  123. pass
  124. if isinstance(key, tuple):
  125. try:
  126. return self._convert_tuple(key, is_setter=True)
  127. except IndexingError:
  128. pass
  129. if isinstance(key, range):
  130. return self._convert_range(key, is_setter=True)
  131. try:
  132. return self._convert_to_indexer(key, is_setter=True)
  133. except TypeError as e:
  134. # invalid indexer type vs 'other' indexing errors
  135. if 'cannot do' in str(e):
  136. raise
  137. raise IndexingError(key)
  138. def __setitem__(self, key, value):
  139. if isinstance(key, tuple):
  140. key = tuple(com.apply_if_callable(x, self.obj)
  141. for x in key)
  142. else:
  143. key = com.apply_if_callable(key, self.obj)
  144. indexer = self._get_setitem_indexer(key)
  145. self._setitem_with_indexer(indexer, value)
  146. def _validate_key(self, key, axis):
  147. """
  148. Ensure that key is valid for current indexer.
  149. Parameters
  150. ----------
  151. key : scalar, slice or list-like
  152. The key requested
  153. axis : int
  154. Dimension on which the indexing is being made
  155. Raises
  156. ------
  157. TypeError
  158. If the key (or some element of it) has wrong type
  159. IndexError
  160. If the key (or some element of it) is out of bounds
  161. KeyError
  162. If the key was not found
  163. """
  164. raise AbstractMethodError()
  165. def _has_valid_tuple(self, key):
  166. """ check the key for valid keys across my indexer """
  167. for i, k in enumerate(key):
  168. if i >= self.obj.ndim:
  169. raise IndexingError('Too many indexers')
  170. try:
  171. self._validate_key(k, i)
  172. except ValueError:
  173. raise ValueError("Location based indexing can only have "
  174. "[{types}] types"
  175. .format(types=self._valid_types))
  176. def _is_nested_tuple_indexer(self, tup):
  177. if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
  178. return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
  179. return False
  180. def _convert_tuple(self, key, is_setter=False):
  181. keyidx = []
  182. if self.axis is not None:
  183. axis = self.obj._get_axis_number(self.axis)
  184. for i in range(self.ndim):
  185. if i == axis:
  186. keyidx.append(self._convert_to_indexer(
  187. key, axis=axis, is_setter=is_setter))
  188. else:
  189. keyidx.append(slice(None))
  190. else:
  191. for i, k in enumerate(key):
  192. if i >= self.obj.ndim:
  193. raise IndexingError('Too many indexers')
  194. idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter)
  195. keyidx.append(idx)
  196. return tuple(keyidx)
  197. def _convert_range(self, key, is_setter=False):
  198. """ convert a range argument """
  199. return list(key)
  200. def _convert_scalar_indexer(self, key, axis):
  201. # if we are accessing via lowered dim, use the last dim
  202. if axis is None:
  203. axis = 0
  204. ax = self.obj._get_axis(min(axis, self.ndim - 1))
  205. # a scalar
  206. return ax._convert_scalar_indexer(key, kind=self.name)
  207. def _convert_slice_indexer(self, key, axis):
  208. # if we are accessing via lowered dim, use the last dim
  209. ax = self.obj._get_axis(min(axis, self.ndim - 1))
  210. return ax._convert_slice_indexer(key, kind=self.name)
  211. def _has_valid_setitem_indexer(self, indexer):
  212. return True
  213. def _has_valid_positional_setitem_indexer(self, indexer):
  214. """ validate that an positional indexer cannot enlarge its target
  215. will raise if needed, does not modify the indexer externally
  216. """
  217. if isinstance(indexer, dict):
  218. raise IndexError("{0} cannot enlarge its target object"
  219. .format(self.name))
  220. else:
  221. if not isinstance(indexer, tuple):
  222. indexer = self._tuplify(indexer)
  223. for ax, i in zip(self.obj.axes, indexer):
  224. if isinstance(i, slice):
  225. # should check the stop slice?
  226. pass
  227. elif is_list_like_indexer(i):
  228. # should check the elements?
  229. pass
  230. elif is_integer(i):
  231. if i >= len(ax):
  232. raise IndexError("{name} cannot enlarge its target "
  233. "object".format(name=self.name))
  234. elif isinstance(i, dict):
  235. raise IndexError("{name} cannot enlarge its target object"
  236. .format(name=self.name))
  237. return True
  238. def _setitem_with_indexer(self, indexer, value):
  239. self._has_valid_setitem_indexer(indexer)
  240. # also has the side effect of consolidating in-place
  241. from pandas import Series
  242. info_axis = self.obj._info_axis_number
  243. # maybe partial set
  244. take_split_path = self.obj._is_mixed_type
  245. # if there is only one block/type, still have to take split path
  246. # unless the block is one-dimensional or it can hold the value
  247. if not take_split_path and self.obj._data.blocks:
  248. blk, = self.obj._data.blocks
  249. if 1 < blk.ndim: # in case of dict, keys are indices
  250. val = list(value.values()) if isinstance(value,
  251. dict) else value
  252. take_split_path = not blk._can_hold_element(val)
  253. if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
  254. for i, ax in zip(indexer, self.obj.axes):
  255. # if we have any multi-indexes that have non-trivial slices
  256. # (not null slices) then we must take the split path, xref
  257. # GH 10360
  258. if (isinstance(ax, MultiIndex) and
  259. not (is_integer(i) or com.is_null_slice(i))):
  260. take_split_path = True
  261. break
  262. if isinstance(indexer, tuple):
  263. nindexer = []
  264. for i, idx in enumerate(indexer):
  265. if isinstance(idx, dict):
  266. # reindex the axis to the new value
  267. # and set inplace
  268. key, _ = convert_missing_indexer(idx)
  269. # if this is the items axes, then take the main missing
  270. # path first
  271. # this correctly sets the dtype and avoids cache issues
  272. # essentially this separates out the block that is needed
  273. # to possibly be modified
  274. if self.ndim > 1 and i == self.obj._info_axis_number:
  275. # add the new item, and set the value
  276. # must have all defined axes if we have a scalar
  277. # or a list-like on the non-info axes if we have a
  278. # list-like
  279. len_non_info_axes = [
  280. len(_ax) for _i, _ax in enumerate(self.obj.axes)
  281. if _i != i
  282. ]
  283. if any(not l for l in len_non_info_axes):
  284. if not is_list_like_indexer(value):
  285. raise ValueError("cannot set a frame with no "
  286. "defined index and a scalar")
  287. self.obj[key] = value
  288. return self.obj
  289. # add a new item with the dtype setup
  290. self.obj[key] = _infer_fill_value(value)
  291. new_indexer = convert_from_missing_indexer_tuple(
  292. indexer, self.obj.axes)
  293. self._setitem_with_indexer(new_indexer, value)
  294. return self.obj
  295. # reindex the axis
  296. # make sure to clear the cache because we are
  297. # just replacing the block manager here
  298. # so the object is the same
  299. index = self.obj._get_axis(i)
  300. labels = index.insert(len(index), key)
  301. self.obj._data = self.obj.reindex(labels, axis=i)._data
  302. self.obj._maybe_update_cacher(clear=True)
  303. self.obj._is_copy = None
  304. nindexer.append(labels.get_loc(key))
  305. else:
  306. nindexer.append(idx)
  307. indexer = tuple(nindexer)
  308. else:
  309. indexer, missing = convert_missing_indexer(indexer)
  310. if missing:
  311. # reindex the axis to the new value
  312. # and set inplace
  313. if self.ndim == 1:
  314. index = self.obj.index
  315. new_index = index.insert(len(index), indexer)
  316. # we have a coerced indexer, e.g. a float
  317. # that matches in an Int64Index, so
  318. # we will not create a duplicate index, rather
  319. # index to that element
  320. # e.g. 0.0 -> 0
  321. # GH12246
  322. if index.is_unique:
  323. new_indexer = index.get_indexer([new_index[-1]])
  324. if (new_indexer != -1).any():
  325. return self._setitem_with_indexer(new_indexer,
  326. value)
  327. # this preserves dtype of the value
  328. new_values = Series([value])._values
  329. if len(self.obj._values):
  330. try:
  331. new_values = np.concatenate([self.obj._values,
  332. new_values])
  333. except TypeError:
  334. as_obj = self.obj.astype(object)
  335. new_values = np.concatenate([as_obj,
  336. new_values])
  337. self.obj._data = self.obj._constructor(
  338. new_values, index=new_index, name=self.obj.name)._data
  339. self.obj._maybe_update_cacher(clear=True)
  340. return self.obj
  341. elif self.ndim == 2:
  342. # no columns and scalar
  343. if not len(self.obj.columns):
  344. raise ValueError("cannot set a frame with no defined "
  345. "columns")
  346. # append a Series
  347. if isinstance(value, Series):
  348. value = value.reindex(index=self.obj.columns,
  349. copy=True)
  350. value.name = indexer
  351. # a list-list
  352. else:
  353. # must have conforming columns
  354. if is_list_like_indexer(value):
  355. if len(value) != len(self.obj.columns):
  356. raise ValueError("cannot set a row with "
  357. "mismatched columns")
  358. value = Series(value, index=self.obj.columns,
  359. name=indexer)
  360. self.obj._data = self.obj.append(value)._data
  361. self.obj._maybe_update_cacher(clear=True)
  362. return self.obj
  363. # set using setitem (Panel and > dims)
  364. elif self.ndim >= 3:
  365. return self.obj.__setitem__(indexer, value)
  366. # set
  367. item_labels = self.obj._get_axis(info_axis)
  368. # align and set the values
  369. if take_split_path:
  370. if not isinstance(indexer, tuple):
  371. indexer = self._tuplify(indexer)
  372. if isinstance(value, ABCSeries):
  373. value = self._align_series(indexer, value)
  374. info_idx = indexer[info_axis]
  375. if is_integer(info_idx):
  376. info_idx = [info_idx]
  377. labels = item_labels[info_idx]
  378. # if we have a partial multiindex, then need to adjust the plane
  379. # indexer here
  380. if (len(labels) == 1 and
  381. isinstance(self.obj[labels[0]].axes[0], MultiIndex)):
  382. item = labels[0]
  383. obj = self.obj[item]
  384. index = obj.index
  385. idx = indexer[:info_axis][0]
  386. plane_indexer = tuple([idx]) + indexer[info_axis + 1:]
  387. lplane_indexer = length_of_indexer(plane_indexer[0], index)
  388. # require that we are setting the right number of values that
  389. # we are indexing
  390. if is_list_like_indexer(value) and np.iterable(
  391. value) and lplane_indexer != len(value):
  392. if len(obj[idx]) != len(value):
  393. raise ValueError("cannot set using a multi-index "
  394. "selection indexer with a different "
  395. "length than the value")
  396. # make sure we have an ndarray
  397. value = getattr(value, 'values', value).ravel()
  398. # we can directly set the series here
  399. # as we select a slice indexer on the mi
  400. idx = index._convert_slice_indexer(idx)
  401. obj._consolidate_inplace()
  402. obj = obj.copy()
  403. obj._data = obj._data.setitem(indexer=tuple([idx]),
  404. value=value)
  405. self.obj[item] = obj
  406. return
  407. # non-mi
  408. else:
  409. plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:]
  410. if info_axis > 0:
  411. plane_axis = self.obj.axes[:info_axis][0]
  412. lplane_indexer = length_of_indexer(plane_indexer[0],
  413. plane_axis)
  414. else:
  415. lplane_indexer = 0
  416. def setter(item, v):
  417. s = self.obj[item]
  418. pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
  419. # perform the equivalent of a setitem on the info axis
  420. # as we have a null slice or a slice with full bounds
  421. # which means essentially reassign to the columns of a
  422. # multi-dim object
  423. # GH6149 (null slice), GH10408 (full bounds)
  424. if (isinstance(pi, tuple) and
  425. all(com.is_null_slice(idx) or
  426. com.is_full_slice(idx, len(self.obj))
  427. for idx in pi)):
  428. s = v
  429. else:
  430. # set the item, possibly having a dtype change
  431. s._consolidate_inplace()
  432. s = s.copy()
  433. s._data = s._data.setitem(indexer=pi, value=v)
  434. s._maybe_update_cacher(clear=True)
  435. # reset the sliced object if unique
  436. self.obj[item] = s
  437. def can_do_equal_len():
  438. """ return True if we have an equal len settable """
  439. if (not len(labels) == 1 or not np.iterable(value) or
  440. is_scalar(plane_indexer[0])):
  441. return False
  442. item = labels[0]
  443. index = self.obj[item].index
  444. values_len = len(value)
  445. # equal len list/ndarray
  446. if len(index) == values_len:
  447. return True
  448. elif lplane_indexer == values_len:
  449. return True
  450. return False
  451. # we need an iterable, with a ndim of at least 1
  452. # eg. don't pass through np.array(0)
  453. if is_list_like_indexer(value) and getattr(value, 'ndim', 1) > 0:
  454. # we have an equal len Frame
  455. if isinstance(value, ABCDataFrame) and value.ndim > 1:
  456. sub_indexer = list(indexer)
  457. multiindex_indexer = isinstance(labels, MultiIndex)
  458. for item in labels:
  459. if item in value:
  460. sub_indexer[info_axis] = item
  461. v = self._align_series(
  462. tuple(sub_indexer), value[item],
  463. multiindex_indexer)
  464. else:
  465. v = np.nan
  466. setter(item, v)
  467. # we have an equal len ndarray/convertible to our labels
  468. # hasattr first, to avoid coercing to ndarray without reason.
  469. # But we may be relying on the ndarray coercion to check ndim.
  470. # Why not just convert to an ndarray earlier on if needed?
  471. elif ((hasattr(value, 'ndim') and value.ndim == 2)
  472. or (not hasattr(value, 'ndim') and
  473. np.array(value).ndim) == 2):
  474. # note that this coerces the dtype if we are mixed
  475. # GH 7551
  476. value = np.array(value, dtype=object)
  477. if len(labels) != value.shape[1]:
  478. raise ValueError('Must have equal len keys and value '
  479. 'when setting with an ndarray')
  480. for i, item in enumerate(labels):
  481. # setting with a list, recoerces
  482. setter(item, value[:, i].tolist())
  483. # we have an equal len list/ndarray
  484. elif can_do_equal_len():
  485. setter(labels[0], value)
  486. # per label values
  487. else:
  488. if len(labels) != len(value):
  489. raise ValueError('Must have equal len keys and value '
  490. 'when setting with an iterable')
  491. for item, v in zip(labels, value):
  492. setter(item, v)
  493. else:
  494. # scalar
  495. for item in labels:
  496. setter(item, value)
  497. else:
  498. if isinstance(indexer, tuple):
  499. indexer = maybe_convert_ix(*indexer)
  500. # if we are setting on the info axis ONLY
  501. # set using those methods to avoid block-splitting
  502. # logic here
  503. if (len(indexer) > info_axis and
  504. is_integer(indexer[info_axis]) and
  505. all(com.is_null_slice(idx)
  506. for i, idx in enumerate(indexer)
  507. if i != info_axis) and
  508. item_labels.is_unique):
  509. self.obj[item_labels[indexer[info_axis]]] = value
  510. return
  511. if isinstance(value, (ABCSeries, dict)):
  512. # TODO(EA): ExtensionBlock.setitem this causes issues with
  513. # setting for extensionarrays that store dicts. Need to decide
  514. # if it's worth supporting that.
  515. value = self._align_series(indexer, Series(value))
  516. elif isinstance(value, ABCDataFrame):
  517. value = self._align_frame(indexer, value)
  518. if isinstance(value, ABCPanel):
  519. value = self._align_panel(indexer, value)
  520. # check for chained assignment
  521. self.obj._check_is_chained_assignment_possible()
  522. # actually do the set
  523. self.obj._consolidate_inplace()
  524. self.obj._data = self.obj._data.setitem(indexer=indexer,
  525. value=value)
  526. self.obj._maybe_update_cacher(clear=True)
  527. def _align_series(self, indexer, ser, multiindex_indexer=False):
  528. """
  529. Parameters
  530. ----------
  531. indexer : tuple, slice, scalar
  532. The indexer used to get the locations that will be set to
  533. `ser`
  534. ser : pd.Series
  535. The values to assign to the locations specified by `indexer`
  536. multiindex_indexer : boolean, optional
  537. Defaults to False. Should be set to True if `indexer` was from
  538. a `pd.MultiIndex`, to avoid unnecessary broadcasting.
  539. Returns:
  540. --------
  541. `np.array` of `ser` broadcast to the appropriate shape for assignment
  542. to the locations selected by `indexer`
  543. """
  544. if isinstance(indexer, (slice, np.ndarray, list, Index)):
  545. indexer = tuple([indexer])
  546. if isinstance(indexer, tuple):
  547. # flatten np.ndarray indexers
  548. def ravel(i):
  549. return i.ravel() if isinstance(i, np.ndarray) else i
  550. indexer = tuple(map(ravel, indexer))
  551. aligners = [not com.is_null_slice(idx) for idx in indexer]
  552. sum_aligners = sum(aligners)
  553. single_aligner = sum_aligners == 1
  554. is_frame = self.obj.ndim == 2
  555. is_panel = self.obj.ndim >= 3
  556. obj = self.obj
  557. # are we a single alignable value on a non-primary
  558. # dim (e.g. panel: 1,2, or frame: 0) ?
  559. # hence need to align to a single axis dimension
  560. # rather that find all valid dims
  561. # frame
  562. if is_frame:
  563. single_aligner = single_aligner and aligners[0]
  564. # panel
  565. elif is_panel:
  566. single_aligner = (single_aligner and
  567. (aligners[1] or aligners[2]))
  568. # we have a frame, with multiple indexers on both axes; and a
  569. # series, so need to broadcast (see GH5206)
  570. if (sum_aligners == self.ndim and
  571. all(is_sequence(_) for _ in indexer)):
  572. ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
  573. # single indexer
  574. if len(indexer) > 1 and not multiindex_indexer:
  575. len_indexer = len(indexer[1])
  576. ser = np.tile(ser, len_indexer).reshape(len_indexer, -1).T
  577. return ser
  578. for i, idx in enumerate(indexer):
  579. ax = obj.axes[i]
  580. # multiple aligners (or null slices)
  581. if is_sequence(idx) or isinstance(idx, slice):
  582. if single_aligner and com.is_null_slice(idx):
  583. continue
  584. new_ix = ax[idx]
  585. if not is_list_like_indexer(new_ix):
  586. new_ix = Index([new_ix])
  587. else:
  588. new_ix = Index(new_ix)
  589. if ser.index.equals(new_ix) or not len(new_ix):
  590. return ser._values.copy()
  591. return ser.reindex(new_ix)._values
  592. # 2 dims
  593. elif single_aligner and is_frame:
  594. # reindex along index
  595. ax = self.obj.axes[1]
  596. if ser.index.equals(ax) or not len(ax):
  597. return ser._values.copy()
  598. return ser.reindex(ax)._values
  599. # >2 dims
  600. elif single_aligner:
  601. broadcast = []
  602. for n, labels in enumerate(self.obj._get_plane_axes(i)):
  603. # reindex along the matching dimensions
  604. if len(labels & ser.index):
  605. ser = ser.reindex(labels)
  606. else:
  607. broadcast.append((n, len(labels)))
  608. # broadcast along other dims
  609. ser = ser._values.copy()
  610. for (axis, l) in broadcast:
  611. shape = [-1] * (len(broadcast) + 1)
  612. shape[axis] = l
  613. ser = np.tile(ser, l).reshape(shape)
  614. if self.obj.ndim == 3:
  615. ser = ser.T
  616. return ser
  617. elif is_scalar(indexer):
  618. ax = self.obj._get_axis(1)
  619. if ser.index.equals(ax):
  620. return ser._values.copy()
  621. return ser.reindex(ax)._values
  622. raise ValueError('Incompatible indexer with Series')
  623. def _align_frame(self, indexer, df):
  624. is_frame = self.obj.ndim == 2
  625. is_panel = self.obj.ndim >= 3
  626. if isinstance(indexer, tuple):
  627. idx, cols = None, None
  628. sindexers = []
  629. for i, ix in enumerate(indexer):
  630. ax = self.obj.axes[i]
  631. if is_sequence(ix) or isinstance(ix, slice):
  632. if isinstance(ix, np.ndarray):
  633. ix = ix.ravel()
  634. if idx is None:
  635. idx = ax[ix]
  636. elif cols is None:
  637. cols = ax[ix]
  638. else:
  639. break
  640. else:
  641. sindexers.append(i)
  642. # panel
  643. if is_panel:
  644. # need to conform to the convention
  645. # as we are not selecting on the items axis
  646. # and we have a single indexer
  647. # GH 7763
  648. if len(sindexers) == 1 and sindexers[0] != 0:
  649. df = df.T
  650. if idx is None:
  651. idx = df.index
  652. if cols is None:
  653. cols = df.columns
  654. if idx is not None and cols is not None:
  655. if df.index.equals(idx) and df.columns.equals(cols):
  656. val = df.copy()._values
  657. else:
  658. val = df.reindex(idx, columns=cols)._values
  659. return val
  660. elif ((isinstance(indexer, slice) or is_list_like_indexer(indexer)) and
  661. is_frame):
  662. ax = self.obj.index[indexer]
  663. if df.index.equals(ax):
  664. val = df.copy()._values
  665. else:
  666. # we have a multi-index and are trying to align
  667. # with a particular, level GH3738
  668. if (isinstance(ax, MultiIndex) and
  669. isinstance(df.index, MultiIndex) and
  670. ax.nlevels != df.index.nlevels):
  671. raise TypeError("cannot align on a multi-index with out "
  672. "specifying the join levels")
  673. val = df.reindex(index=ax)._values
  674. return val
  675. elif is_scalar(indexer) and is_panel:
  676. idx = self.obj.axes[1]
  677. cols = self.obj.axes[2]
  678. # by definition we are indexing on the 0th axis
  679. # a passed in dataframe which is actually a transpose
  680. # of what is needed
  681. if idx.equals(df.index) and cols.equals(df.columns):
  682. return df.copy()._values
  683. return df.reindex(idx, columns=cols)._values
  684. raise ValueError('Incompatible indexer with DataFrame')
  685. def _align_panel(self, indexer, df):
  686. raise NotImplementedError("cannot set using an indexer with a Panel "
  687. "yet!")
  688. def _getitem_tuple(self, tup):
  689. try:
  690. return self._getitem_lowerdim(tup)
  691. except IndexingError:
  692. pass
  693. # no multi-index, so validate all of the indexers
  694. self._has_valid_tuple(tup)
  695. # ugly hack for GH #836
  696. if self._multi_take_opportunity(tup):
  697. return self._multi_take(tup)
  698. # no shortcut needed
  699. retval = self.obj
  700. for i, key in enumerate(tup):
  701. if i >= self.obj.ndim:
  702. raise IndexingError('Too many indexers')
  703. if com.is_null_slice(key):
  704. continue
  705. retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
  706. return retval
  707. def _multi_take_opportunity(self, tup):
  708. """
  709. Check whether there is the possibility to use ``_multi_take``.
  710. Currently the limit is that all axes being indexed must be indexed with
  711. list-likes.
  712. Parameters
  713. ----------
  714. tup : tuple
  715. Tuple of indexers, one per axis
  716. Returns
  717. -------
  718. boolean: Whether the current indexing can be passed through _multi_take
  719. """
  720. if not all(is_list_like_indexer(x) for x in tup):
  721. return False
  722. # just too complicated
  723. if any(com.is_bool_indexer(x) for x in tup):
  724. return False
  725. return True
  726. def _multi_take(self, tup):
  727. """
  728. Create the indexers for the passed tuple of keys, and execute the take
  729. operation. This allows the take operation to be executed all at once -
  730. rather than once for each dimension - improving efficiency.
  731. Parameters
  732. ----------
  733. tup : tuple
  734. Tuple of indexers, one per axis
  735. Returns
  736. -------
  737. values: same type as the object being indexed
  738. """
  739. # GH 836
  740. o = self.obj
  741. d = {axis: self._get_listlike_indexer(key, axis)
  742. for (key, axis) in zip(tup, o._AXIS_ORDERS)}
  743. return o._reindex_with_indexers(d, copy=True, allow_dups=True)
  744. def _convert_for_reindex(self, key, axis=None):
  745. return key
  746. def _handle_lowerdim_multi_index_axis0(self, tup):
  747. # we have an axis0 multi-index, handle or raise
  748. try:
  749. # fast path for series or for tup devoid of slices
  750. return self._get_label(tup, axis=self.axis)
  751. except TypeError:
  752. # slices are unhashable
  753. pass
  754. except Exception as e1:
  755. if isinstance(tup[0], (slice, Index)):
  756. raise IndexingError("Handle elsewhere")
  757. # raise the error if we are not sorted
  758. ax0 = self.obj._get_axis(0)
  759. if not ax0.is_lexsorted_for_tuple(tup):
  760. raise e1
  761. return None
  762. def _getitem_lowerdim(self, tup):
  763. # we can directly get the axis result since the axis is specified
  764. if self.axis is not None:
  765. axis = self.obj._get_axis_number(self.axis)
  766. return self._getitem_axis(tup, axis=axis)
  767. # we may have a nested tuples indexer here
  768. if self._is_nested_tuple_indexer(tup):
  769. return self._getitem_nested_tuple(tup)
  770. # we maybe be using a tuple to represent multiple dimensions here
  771. ax0 = self.obj._get_axis(0)
  772. # ...but iloc should handle the tuple as simple integer-location
  773. # instead of checking it as multiindex representation (GH 13797)
  774. if isinstance(ax0, MultiIndex) and self.name != 'iloc':
  775. result = self._handle_lowerdim_multi_index_axis0(tup)
  776. if result is not None:
  777. return result
  778. if len(tup) > self.obj.ndim:
  779. raise IndexingError("Too many indexers. handle elsewhere")
  780. # to avoid wasted computation
  781. # df.ix[d1:d2, 0] -> columns first (True)
  782. # df.ix[0, ['C', 'B', A']] -> rows first (False)
  783. for i, key in enumerate(tup):
  784. if is_label_like(key) or isinstance(key, tuple):
  785. section = self._getitem_axis(key, axis=i)
  786. # we have yielded a scalar ?
  787. if not is_list_like_indexer(section):
  788. return section
  789. elif section.ndim == self.ndim:
  790. # we're in the middle of slicing through a MultiIndex
  791. # revise the key wrt to `section` by inserting an _NS
  792. new_key = tup[:i] + (_NS,) + tup[i + 1:]
  793. else:
  794. new_key = tup[:i] + tup[i + 1:]
  795. # unfortunately need an odious kludge here because of
  796. # DataFrame transposing convention
  797. if (isinstance(section, ABCDataFrame) and i > 0 and
  798. len(new_key) == 2):
  799. a, b = new_key
  800. new_key = b, a
  801. if len(new_key) == 1:
  802. new_key, = new_key
  803. # Slices should return views, but calling iloc/loc with a null
  804. # slice returns a new object.
  805. if com.is_null_slice(new_key):
  806. return section
  807. # This is an elided recursive call to iloc/loc/etc'
  808. return getattr(section, self.name)[new_key]
  809. raise IndexingError('not applicable')
  810. def _getitem_nested_tuple(self, tup):
  811. # we have a nested tuple so have at least 1 multi-index level
  812. # we should be able to match up the dimensionaility here
  813. # we have too many indexers for our dim, but have at least 1
  814. # multi-index dimension, try to see if we have something like
  815. # a tuple passed to a series with a multi-index
  816. if len(tup) > self.ndim:
  817. result = self._handle_lowerdim_multi_index_axis0(tup)
  818. if result is not None:
  819. return result
  820. # this is a series with a multi-index specified a tuple of
  821. # selectors
  822. return self._getitem_axis(tup, axis=self.axis)
  823. # handle the multi-axis by taking sections and reducing
  824. # this is iterative
  825. obj = self.obj
  826. axis = 0
  827. for i, key in enumerate(tup):
  828. if com.is_null_slice(key):
  829. axis += 1
  830. continue
  831. current_ndim = obj.ndim
  832. obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
  833. axis += 1
  834. # if we have a scalar, we are done
  835. if is_scalar(obj) or not hasattr(obj, 'ndim'):
  836. break
  837. # has the dim of the obj changed?
  838. # GH 7199
  839. if obj.ndim < current_ndim:
  840. # GH 7516
  841. # if had a 3 dim and are going to a 2d
  842. # axes are reversed on a DataFrame
  843. if i >= 1 and current_ndim == 3 and obj.ndim == 2:
  844. obj = obj.T
  845. axis -= 1
  846. return obj
  847. def _getitem_axis(self, key, axis=None):
  848. if axis is None:
  849. axis = self.axis or 0
  850. if is_iterator(key):
  851. key = list(key)
  852. self._validate_key(key, axis)
  853. labels = self.obj._get_axis(axis)
  854. if isinstance(key, slice):
  855. return self._get_slice_axis(key, axis=axis)
  856. elif (is_list_like_indexer(key) and
  857. not (isinstance(key, tuple) and
  858. isinstance(labels, MultiIndex))):
  859. if hasattr(key, 'ndim') and key.ndim > 1:
  860. raise ValueError('Cannot index with multidimensional key')
  861. return self._getitem_iterable(key, axis=axis)
  862. else:
  863. # maybe coerce a float scalar to integer
  864. key = labels._maybe_cast_indexer(key)
  865. if is_integer(key):
  866. if axis == 0 and isinstance(labels, MultiIndex):
  867. try:
  868. return self._get_label(key, axis=axis)
  869. except (KeyError, TypeError):
  870. if self.obj.index.levels[0].is_integer():
  871. raise
  872. # this is the fallback! (for a non-float, non-integer index)
  873. if not labels.is_floating() and not labels.is_integer():
  874. return self._get_loc(key, axis=axis)
  875. return self._get_label(key, axis=axis)
  876. def _get_listlike_indexer(self, key, axis, raise_missing=False):
  877. """
  878. Transform a list-like of keys into a new index and an indexer.
  879. Parameters
  880. ----------
  881. key : list-like
  882. Target labels
  883. axis: int
  884. Dimension on which the indexing is being made
  885. raise_missing: bool
  886. Whether to raise a KeyError if some labels are not found. Will be
  887. removed in the future, and then this method will always behave as
  888. if raise_missing=True.
  889. Raises
  890. ------
  891. KeyError
  892. If at least one key was requested but none was found, and
  893. raise_missing=True.
  894. Returns
  895. -------
  896. keyarr: Index
  897. New index (coinciding with 'key' if the axis is unique)
  898. values : array-like
  899. An indexer for the return object; -1 denotes keys not found
  900. """
  901. o = self.obj
  902. ax = o._get_axis(axis)
  903. # Have the index compute an indexer or return None
  904. # if it cannot handle:
  905. indexer, keyarr = ax._convert_listlike_indexer(key,
  906. kind=self.name)
  907. # We only act on all found values:
  908. if indexer is not None and (indexer != -1).all():
  909. self._validate_read_indexer(key, indexer, axis,
  910. raise_missing=raise_missing)
  911. return ax[indexer], indexer
  912. if ax.is_unique:
  913. # If we are trying to get actual keys from empty Series, we
  914. # patiently wait for a KeyError later on - otherwise, convert
  915. if len(ax) or not len(key):
  916. key = self._convert_for_reindex(key, axis)
  917. indexer = ax.get_indexer_for(key)
  918. keyarr = ax.reindex(keyarr)[0]
  919. else:
  920. keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
  921. self._validate_read_indexer(keyarr, indexer,
  922. o._get_axis_number(axis),
  923. raise_missing=raise_missing)
  924. return keyarr, indexer
  925. def _getitem_iterable(self, key, axis=None):
  926. """
  927. Index current object with an an iterable key (which can be a boolean
  928. indexer, or a collection of keys).
  929. Parameters
  930. ----------
  931. key : iterable
  932. Target labels, or boolean indexer
  933. axis: int, default None
  934. Dimension on which the indexing is being made
  935. Raises
  936. ------
  937. KeyError
  938. If no key was found. Will change in the future to raise if not all
  939. keys were found.
  940. IndexingError
  941. If the boolean indexer is unalignable with the object being
  942. indexed.
  943. Returns
  944. -------
  945. scalar, DataFrame, or Series: indexed value(s),
  946. """
  947. if axis is None:
  948. axis = self.axis or 0
  949. self._validate_key(key, axis)
  950. labels = self.obj._get_axis(axis)
  951. if com.is_bool_indexer(key):
  952. # A boolean indexer
  953. key = check_bool_indexer(labels, key)
  954. inds, = key.nonzero()
  955. return self.obj._take(inds, axis=axis)
  956. else:
  957. # A collection of keys
  958. keyarr, indexer = self._get_listlike_indexer(key, axis,
  959. raise_missing=False)
  960. return self.obj._reindex_with_indexers({axis: [keyarr, indexer]},
  961. copy=True, allow_dups=True)
  962. def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
  963. """
  964. Check that indexer can be used to return a result (e.g. at least one
  965. element was found, unless the list of keys was actually empty).
  966. Parameters
  967. ----------
  968. key : list-like
  969. Target labels (only used to show correct error message)
  970. indexer: array-like of booleans
  971. Indices corresponding to the key (with -1 indicating not found)
  972. axis: int
  973. Dimension on which the indexing is being made
  974. raise_missing: bool
  975. Whether to raise a KeyError if some labels are not found. Will be
  976. removed in the future, and then this method will always behave as
  977. if raise_missing=True.
  978. Raises
  979. ------
  980. KeyError
  981. If at least one key was requested but none was found, and
  982. raise_missing=True.
  983. """
  984. ax = self.obj._get_axis(axis)
  985. if len(key) == 0:
  986. return
  987. # Count missing values:
  988. missing = (indexer < 0).sum()
  989. if missing:
  990. if missing == len(indexer):
  991. raise KeyError(
  992. u"None of [{key}] are in the [{axis}]".format(
  993. key=key, axis=self.obj._get_axis_name(axis)))
  994. # We (temporarily) allow for some missing keys with .loc, except in
  995. # some cases (e.g. setting) in which "raise_missing" will be False
  996. if not(self.name == 'loc' and not raise_missing):
  997. not_found = list(set(key) - set(ax))
  998. raise KeyError("{} not in index".format(not_found))
  999. # we skip the warning on Categorical/Interval
  1000. # as this check is actually done (check for
  1001. # non-missing values), but a bit later in the
  1002. # code, so we want to avoid warning & then
  1003. # just raising
  1004. _missing_key_warning = textwrap.dedent("""
  1005. Passing list-likes to .loc or [] with any missing label will raise
  1006. KeyError in the future, you can use .reindex() as an alternative.
  1007. See the documentation here:
  1008. https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike""") # noqa
  1009. if not (ax.is_categorical() or ax.is_interval()):
  1010. warnings.warn(_missing_key_warning,
  1011. FutureWarning, stacklevel=6)
  1012. def _convert_to_indexer(self, obj, axis=None, is_setter=False,
  1013. raise_missing=False):
  1014. """
  1015. Convert indexing key into something we can use to do actual fancy
  1016. indexing on an ndarray
  1017. Examples
  1018. ix[:5] -> slice(0, 5)
  1019. ix[[1,2,3]] -> [1,2,3]
  1020. ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)
  1021. Going by Zen of Python?
  1022. 'In the face of ambiguity, refuse the temptation to guess.'
  1023. raise AmbiguousIndexError with integer labels?
  1024. - No, prefer label-based indexing
  1025. """
  1026. if axis is None:
  1027. axis = self.axis or 0
  1028. labels = self.obj._get_axis(axis)
  1029. if isinstance(obj, slice):
  1030. return self._convert_slice_indexer(obj, axis)
  1031. # try to find out correct indexer, if not type correct raise
  1032. try:
  1033. obj = self._convert_scalar_indexer(obj, axis)
  1034. except TypeError:
  1035. # but we will allow setting
  1036. if is_setter:
  1037. pass
  1038. # see if we are positional in nature
  1039. is_int_index = labels.is_integer()
  1040. is_int_positional = is_integer(obj) and not is_int_index
  1041. # if we are a label return me
  1042. try:
  1043. return labels.get_loc(obj)
  1044. except LookupError:
  1045. if isinstance(obj, tuple) and isinstance(labels, MultiIndex):
  1046. if is_setter and len(obj) == labels.nlevels:
  1047. return {'key': obj}
  1048. raise
  1049. except TypeError:
  1050. pass
  1051. except (ValueError):
  1052. if not is_int_positional:
  1053. raise
  1054. # a positional
  1055. if is_int_positional:
  1056. # if we are setting and its not a valid location
  1057. # its an insert which fails by definition
  1058. if is_setter:
  1059. # always valid
  1060. if self.name == 'loc':
  1061. return {'key': obj}
  1062. # a positional
  1063. if (obj >= self.obj.shape[axis] and
  1064. not isinstance(labels, MultiIndex)):
  1065. raise ValueError("cannot set by positional indexing with "
  1066. "enlargement")
  1067. return obj
  1068. if is_nested_tuple(obj, labels):
  1069. return labels.get_locs(obj)
  1070. elif is_list_like_indexer(obj):
  1071. if com.is_bool_indexer(obj):
  1072. obj = check_bool_indexer(labels, obj)
  1073. inds, = obj.nonzero()
  1074. return inds
  1075. else:
  1076. # When setting, missing keys are not allowed, even with .loc:
  1077. kwargs = {'raise_missing': True if is_setter else
  1078. raise_missing}
  1079. return self._get_listlike_indexer(obj, axis, **kwargs)[1]
  1080. else:
  1081. try:
  1082. return labels.get_loc(obj)
  1083. except LookupError:
  1084. # allow a not found key only if we are a setter
  1085. if not is_list_like_indexer(obj) and is_setter:
  1086. return {'key': obj}
  1087. raise
  1088. def _tuplify(self, loc):
  1089. tup = [slice(None, None) for _ in range(self.ndim)]
  1090. tup[0] = loc
  1091. return tuple(tup)
  1092. def _get_slice_axis(self, slice_obj, axis=None):
  1093. obj = self.obj
  1094. if axis is None:
  1095. axis = self.axis or 0
  1096. if not need_slice(slice_obj):
  1097. return obj.copy(deep=False)
  1098. indexer = self._convert_slice_indexer(slice_obj, axis)
  1099. if isinstance(indexer, slice):
  1100. return self._slice(indexer, axis=axis, kind='iloc')
  1101. else:
  1102. return self.obj._take(indexer, axis=axis)
  1103. class _IXIndexer(_NDFrameIndexer):
  1104. """A primarily label-location based indexer, with integer position
  1105. fallback.
  1106. Warning: Starting in 0.20.0, the .ix indexer is deprecated, in
  1107. favor of the more strict .iloc and .loc indexers.
  1108. ``.ix[]`` supports mixed integer and label based access. It is
  1109. primarily label based, but will fall back to integer positional
  1110. access unless the corresponding axis is of integer type.
  1111. ``.ix`` is the most general indexer and will support any of the
  1112. inputs in ``.loc`` and ``.iloc``. ``.ix`` also supports floating
  1113. point label schemes. ``.ix`` is exceptionally useful when dealing
  1114. with mixed positional and label based hierarchical indexes.
  1115. However, when an axis is integer based, ONLY label based access
  1116. and not positional access is supported. Thus, in such cases, it's
  1117. usually better to be explicit and use ``.iloc`` or ``.loc``.
  1118. See more at :ref:`Advanced Indexing <advanced>`.
  1119. """
  1120. _ix_deprecation_warning = textwrap.dedent("""
  1121. .ix is deprecated. Please use
  1122. .loc for label based indexing or
  1123. .iloc for positional indexing
  1124. See the documentation here:
  1125. http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""") # noqa
  1126. def __init__(self, name, obj):
  1127. warnings.warn(self._ix_deprecation_warning,
  1128. DeprecationWarning, stacklevel=2)
  1129. super(_IXIndexer, self).__init__(name, obj)
  1130. @Appender(_NDFrameIndexer._validate_key.__doc__)
  1131. def _validate_key(self, key, axis):
  1132. if isinstance(key, slice):
  1133. return True
  1134. elif com.is_bool_indexer(key):
  1135. return True
  1136. elif is_list_like_indexer(key):
  1137. return True
  1138. else:
  1139. self._convert_scalar_indexer(key, axis)
  1140. return True
  1141. def _convert_for_reindex(self, key, axis=None):
  1142. """
  1143. Transform a list of keys into a new array ready to be used as axis of
  1144. the object we return (e.g. including NaNs).
  1145. Parameters
  1146. ----------
  1147. key : list-like
  1148. Target labels
  1149. axis: int
  1150. Where the indexing is being made
  1151. Returns
  1152. -------
  1153. list-like of labels
  1154. """
  1155. if axis is None:
  1156. axis = self.axis or 0
  1157. labels = self.obj._get_axis(axis)
  1158. if com.is_bool_indexer(key):
  1159. key = check_bool_indexer(labels, key)
  1160. return labels[key]
  1161. if isinstance(key, Index):
  1162. keyarr = labels._convert_index_indexer(key)
  1163. else:
  1164. # asarray can be unsafe, NumPy strings are weird
  1165. keyarr = com.asarray_tuplesafe(key)
  1166. if is_integer_dtype(keyarr):
  1167. # Cast the indexer to uint64 if possible so
  1168. # that the values returned from indexing are
  1169. # also uint64.
  1170. keyarr = labels._convert_arr_indexer(keyarr)
  1171. if not labels.is_integer():
  1172. keyarr = ensure_platform_int(keyarr)
  1173. return labels.take(keyarr)
  1174. return keyarr
  1175. class _LocationIndexer(_NDFrameIndexer):
  1176. _exception = Exception
  1177. def __getitem__(self, key):
  1178. if type(key) is tuple:
  1179. key = tuple(com.apply_if_callable(x, self.obj)
  1180. for x in key)
  1181. try:
  1182. if self._is_scalar_access(key):
  1183. return self._getitem_scalar(key)
  1184. except (KeyError, IndexError, AttributeError):
  1185. pass
  1186. return self._getitem_tuple(key)
  1187. else:
  1188. # we by definition only have the 0th axis
  1189. axis = self.axis or 0
  1190. maybe_callable = com.apply_if_callable(key, self.obj)
  1191. return self._getitem_axis(maybe_callable, axis=axis)
  1192. def _is_scalar_access(self, key):
  1193. raise NotImplementedError()
  1194. def _getitem_scalar(self, key):
  1195. raise NotImplementedError()
  1196. def _getitem_axis(self, key, axis=None):
  1197. raise NotImplementedError()
  1198. def _getbool_axis(self, key, axis=None):
  1199. if axis is None:
  1200. axis = self.axis or 0
  1201. labels = self.obj._get_axis(axis)
  1202. key = check_bool_indexer(labels, key)
  1203. inds, = key.nonzero()
  1204. try:
  1205. return self.obj._take(inds, axis=axis)
  1206. except Exception as detail:
  1207. raise self._exception(detail)
  1208. def _get_slice_axis(self, slice_obj, axis=None):
  1209. """ this is pretty simple as we just have to deal with labels """
  1210. if axis is None:
  1211. axis = self.axis or 0
  1212. obj = self.obj
  1213. if not need_slice(slice_obj):
  1214. return obj.copy(deep=False)
  1215. labels = obj._get_axis(axis)
  1216. indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop,
  1217. slice_obj.step, kind=self.name)
  1218. if isinstance(indexer, slice):
  1219. return self._slice(indexer, axis=axis, kind='iloc')
  1220. else:
  1221. return self.obj._take(indexer, axis=axis)
  1222. class _LocIndexer(_LocationIndexer):
  1223. """
  1224. Access a group of rows and columns by label(s) or a boolean array.
  1225. ``.loc[]`` is primarily label based, but may also be used with a
  1226. boolean array.
  1227. Allowed inputs are:
  1228. - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
  1229. interpreted as a *label* of the index, and **never** as an
  1230. integer position along the index).
  1231. - A list or array of labels, e.g. ``['a', 'b', 'c']``.
  1232. - A slice object with labels, e.g. ``'a':'f'``.
  1233. .. warning:: Note that contrary to usual python slices, **both** the
  1234. start and the stop are included
  1235. - A boolean array of the same length as the axis being sliced,
  1236. e.g. ``[True, False, True]``.
  1237. - A ``callable`` function with one argument (the calling Series, DataFrame
  1238. or Panel) and that returns valid output for indexing (one of the above)
  1239. See more at :ref:`Selection by Label <indexing.label>`
  1240. Raises
  1241. ------
  1242. KeyError:
  1243. when any items are not found
  1244. See Also
  1245. --------
  1246. DataFrame.at : Access a single value for a row/column label pair.
  1247. DataFrame.iloc : Access group of rows and columns by integer position(s).
  1248. DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
  1249. Series/DataFrame.
  1250. Series.loc : Access group of values using labels.
  1251. Examples
  1252. --------
  1253. **Getting values**
  1254. >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
  1255. ... index=['cobra', 'viper', 'sidewinder'],
  1256. ... columns=['max_speed', 'shield'])
  1257. >>> df
  1258. max_speed shield
  1259. cobra 1 2
  1260. viper 4 5
  1261. sidewinder 7 8
  1262. Single label. Note this returns the row as a Series.
  1263. >>> df.loc['viper']
  1264. max_speed 4
  1265. shield 5
  1266. Name: viper, dtype: int64
  1267. List of labels. Note using ``[[]]`` returns a DataFrame.
  1268. >>> df.loc[['viper', 'sidewinder']]
  1269. max_speed shield
  1270. viper 4 5
  1271. sidewinder 7 8
  1272. Single label for row and column
  1273. >>> df.loc['cobra', 'shield']
  1274. 2
  1275. Slice with labels for row and single label for column. As mentioned
  1276. above, note that both the start and stop of the slice are included.
  1277. >>> df.loc['cobra':'viper', 'max_speed']
  1278. cobra 1
  1279. viper 4
  1280. Name: max_speed, dtype: int64
  1281. Boolean list with the same length as the row axis
  1282. >>> df.loc[[False, False, True]]
  1283. max_speed shield
  1284. sidewinder 7 8
  1285. Conditional that returns a boolean Series
  1286. >>> df.loc[df['shield'] > 6]
  1287. max_speed shield
  1288. sidewinder 7 8
  1289. Conditional that returns a boolean Series with column labels specified
  1290. >>> df.loc[df['shield'] > 6, ['max_speed']]
  1291. max_speed
  1292. sidewinder 7
  1293. Callable that returns a boolean Series
  1294. >>> df.loc[lambda df: df['shield'] == 8]
  1295. max_speed shield
  1296. sidewinder 7 8
  1297. **Setting values**
  1298. Set value for all items matching the list of labels
  1299. >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
  1300. >>> df
  1301. max_speed shield
  1302. cobra 1 2
  1303. viper 4 50
  1304. sidewinder 7 50
  1305. Set value for an entire row
  1306. >>> df.loc['cobra'] = 10
  1307. >>> df
  1308. max_speed shield
  1309. cobra 10 10
  1310. viper 4 50
  1311. sidewinder 7 50
  1312. Set value for an entire column
  1313. >>> df.loc[:, 'max_speed'] = 30
  1314. >>> df
  1315. max_speed shield
  1316. cobra 30 10
  1317. viper 30 50
  1318. sidewinder 30 50
  1319. Set value for rows matching callable condition
  1320. >>> df.loc[df['shield'] > 35] = 0
  1321. >>> df
  1322. max_speed shield
  1323. cobra 30 10
  1324. viper 0 0
  1325. sidewinder 0 0
  1326. **Getting values on a DataFrame with an index that has integer labels**
  1327. Another example using integers for the index
  1328. >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
  1329. ... index=[7, 8, 9], columns=['max_speed', 'shield'])
  1330. >>> df
  1331. max_speed shield
  1332. 7 1 2
  1333. 8 4 5
  1334. 9 7 8
  1335. Slice with integer labels for rows. As mentioned above, note that both
  1336. the start and stop of the slice are included.
  1337. >>> df.loc[7:9]
  1338. max_speed shield
  1339. 7 1 2
  1340. 8 4 5
  1341. 9 7 8
  1342. **Getting values with a MultiIndex**
  1343. A number of examples using a DataFrame with a MultiIndex
  1344. >>> tuples = [
  1345. ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
  1346. ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
  1347. ... ('viper', 'mark ii'), ('viper', 'mark iii')
  1348. ... ]
  1349. >>> index = pd.MultiIndex.from_tuples(tuples)
  1350. >>> values = [[12, 2], [0, 4], [10, 20],
  1351. ... [1, 4], [7, 1], [16, 36]]
  1352. >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
  1353. >>> df
  1354. max_speed shield
  1355. cobra mark i 12 2
  1356. mark ii 0 4
  1357. sidewinder mark i 10 20
  1358. mark ii 1 4
  1359. viper mark ii 7 1
  1360. mark iii 16 36
  1361. Single label. Note this returns a DataFrame with a single index.
  1362. >>> df.loc['cobra']
  1363. max_speed shield
  1364. mark i 12 2
  1365. mark ii 0 4
  1366. Single index tuple. Note this returns a Series.
  1367. >>> df.loc[('cobra', 'mark ii')]
  1368. max_speed 0
  1369. shield 4
  1370. Name: (cobra, mark ii), dtype: int64
  1371. Single label for row and column. Similar to passing in a tuple, this
  1372. returns a Series.
  1373. >>> df.loc['cobra', 'mark i']
  1374. max_speed 12
  1375. shield 2
  1376. Name: (cobra, mark i), dtype: int64
  1377. Single tuple. Note using ``[[]]`` returns a DataFrame.
  1378. >>> df.loc[[('cobra', 'mark ii')]]
  1379. max_speed shield
  1380. cobra mark ii 0 4
  1381. Single tuple for the index with a single label for the column
  1382. >>> df.loc[('cobra', 'mark i'), 'shield']
  1383. 2
  1384. Slice from index tuple to single label
  1385. >>> df.loc[('cobra', 'mark i'):'viper']
  1386. max_speed shield
  1387. cobra mark i 12 2
  1388. mark ii 0 4
  1389. sidewinder mark i 10 20
  1390. mark ii 1 4
  1391. viper mark ii 7 1
  1392. mark iii 16 36
  1393. Slice from index tuple to index tuple
  1394. >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
  1395. max_speed shield
  1396. cobra mark i 12 2
  1397. mark ii 0 4
  1398. sidewinder mark i 10 20
  1399. mark ii 1 4
  1400. viper mark ii 7 1
  1401. """
  1402. _valid_types = ("labels (MUST BE IN THE INDEX), slices of labels (BOTH "
  1403. "endpoints included! Can be slices of integers if the "
  1404. "index is integers), listlike of labels, boolean")
  1405. _exception = KeyError
  1406. @Appender(_NDFrameIndexer._validate_key.__doc__)
  1407. def _validate_key(self, key, axis):
  1408. # valid for a collection of labels (we check their presence later)
  1409. # slice of labels (where start-end in labels)
  1410. # slice of integers (only if in the labels)
  1411. # boolean
  1412. if isinstance(key, slice):
  1413. return
  1414. if com.is_bool_indexer(key):
  1415. return
  1416. if not is_list_like_indexer(key):
  1417. self._convert_scalar_indexer(key, axis)
  1418. def _is_scalar_access(self, key):
  1419. # this is a shortcut accessor to both .loc and .iloc
  1420. # that provide the equivalent access of .at and .iat
  1421. # a) avoid getting things via sections and (to minimize dtype changes)
  1422. # b) provide a performant path
  1423. if not hasattr(key, '__len__'):
  1424. return False
  1425. if len(key) != self.ndim:
  1426. return False
  1427. for i, k in enumerate(key):
  1428. if not is_scalar(k):
  1429. return False
  1430. ax = self.obj.axes[i]
  1431. if isinstance(ax, MultiIndex):
  1432. return False
  1433. if not ax.is_unique:
  1434. return False
  1435. return True
  1436. def _getitem_scalar(self, key):
  1437. # a fast-path to scalar access
  1438. # if not, raise
  1439. values = self.obj._get_value(*key)
  1440. return values
  1441. def _get_partial_string_timestamp_match_key(self, key, labels):
  1442. """Translate any partial string timestamp matches in key, returning the
  1443. new key (GH 10331)"""
  1444. if isinstance(labels, MultiIndex):
  1445. if (isinstance(key, compat.string_types) and
  1446. labels.levels[0].is_all_dates):
  1447. # Convert key '2016-01-01' to
  1448. # ('2016-01-01'[, slice(None, None, None)]+)
  1449. key = tuple([key] + [slice(None)] * (len(labels.levels) - 1))
  1450. if isinstance(key, tuple):
  1451. # Convert (..., '2016-01-01', ...) in tuple to
  1452. # (..., slice('2016-01-01', '2016-01-01', None), ...)
  1453. new_key = []
  1454. for i, component in enumerate(key):
  1455. if (isinstance(component, compat.string_types) and
  1456. labels.levels[i].is_all_dates):
  1457. new_key.append(slice(component, component, None))
  1458. else:
  1459. new_key.append(component)
  1460. key = tuple(new_key)
  1461. return key
  1462. def _getitem_axis(self, key, axis=None):
  1463. if axis is None:
  1464. axis = self.axis or 0
  1465. if is_iterator(key):
  1466. key = list(key)
  1467. labels = self.obj._get_axis(axis)
  1468. key = self._get_partial_string_timestamp_match_key(key, labels)
  1469. if isinstance(key, slice):
  1470. self._validate_key(key, axis)
  1471. return self._get_slice_axis(key, axis=axis)
  1472. elif com.is_bool_indexer(key):
  1473. return self._getbool_axis(key, axis=axis)
  1474. elif is_list_like_indexer(key):
  1475. # convert various list-like indexers
  1476. # to a list of keys
  1477. # we will use the *values* of the object
  1478. # and NOT the index if its a PandasObject
  1479. if isinstance(labels, MultiIndex):
  1480. if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1:
  1481. # Series, or 0,1 ndim ndarray
  1482. # GH 14730
  1483. key = list(key)
  1484. elif isinstance(key, ABCDataFrame):
  1485. # GH 15438
  1486. raise NotImplementedError("Indexing a MultiIndex with a "
  1487. "DataFrame key is not "
  1488. "implemented")
  1489. elif hasattr(key, 'ndim') and key.ndim > 1:
  1490. raise NotImplementedError("Indexing a MultiIndex with a "
  1491. "multidimensional key is not "
  1492. "implemented")
  1493. if (not isinstance(key, tuple) and len(key) > 1 and
  1494. not isinstance(key[0], tuple)):
  1495. key = tuple([key])
  1496. # an iterable multi-selection
  1497. if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
  1498. if hasattr(key, 'ndim') and key.ndim > 1:
  1499. raise ValueError('Cannot index with multidimensional key')
  1500. return self._getitem_iterable(key, axis=axis)
  1501. # nested tuple slicing
  1502. if is_nested_tuple(key, labels):
  1503. locs = labels.get_locs(key)
  1504. indexer = [slice(None)] * self.ndim
  1505. indexer[axis] = locs
  1506. return self.obj.iloc[tuple(indexer)]
  1507. # fall thru to straight lookup
  1508. self._validate_key(key, axis)
  1509. return self._get_label(key, axis=axis)
  1510. class _iLocIndexer(_LocationIndexer):
  1511. """
  1512. Purely integer-location based indexing for selection by position.
  1513. ``.iloc[]`` is primarily integer position based (from ``0`` to
  1514. ``length-1`` of the axis), but may also be used with a boolean
  1515. array.
  1516. Allowed inputs are:
  1517. - An integer, e.g. ``5``.
  1518. - A list or array of integers, e.g. ``[4, 3, 0]``.
  1519. - A slice object with ints, e.g. ``1:7``.
  1520. - A boolean array.
  1521. - A ``callable`` function with one argument (the calling Series, DataFrame
  1522. or Panel) and that returns valid output for indexing (one of the above).
  1523. This is useful in method chains, when you don't have a reference to the
  1524. calling object, but would like to base your selection on some value.
  1525. ``.iloc`` will raise ``IndexError`` if a requested indexer is
  1526. out-of-bounds, except *slice* indexers which allow out-of-bounds
  1527. indexing (this conforms with python/numpy *slice* semantics).
  1528. See more at ref:`Selection by Position <indexing.integer>`.
  1529. See Also
  1530. --------
  1531. DataFrame.iat : Fast integer location scalar accessor.
  1532. DataFrame.loc : Purely label-location based indexer for selection by label.
  1533. Series.iloc : Purely integer-location based indexing for
  1534. selection by position.
  1535. Examples
  1536. --------
  1537. >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
  1538. ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},
  1539. ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
  1540. >>> df = pd.DataFrame(mydict)
  1541. >>> df
  1542. a b c d
  1543. 0 1 2 3 4
  1544. 1 100 200 300 400
  1545. 2 1000 2000 3000 4000
  1546. **Indexing just the rows**
  1547. With a scalar integer.
  1548. >>> type(df.iloc[0])
  1549. <class 'pandas.core.series.Series'>
  1550. >>> df.iloc[0]
  1551. a 1
  1552. b 2
  1553. c 3
  1554. d 4
  1555. Name: 0, dtype: int64
  1556. With a list of integers.
  1557. >>> df.iloc[[0]]
  1558. a b c d
  1559. 0 1 2 3 4
  1560. >>> type(df.iloc[[0]])
  1561. <class 'pandas.core.frame.DataFrame'>
  1562. >>> df.iloc[[0, 1]]
  1563. a b c d
  1564. 0 1 2 3 4
  1565. 1 100 200 300 400
  1566. With a `slice` object.
  1567. >>> df.iloc[:3]
  1568. a b c d
  1569. 0 1 2 3 4
  1570. 1 100 200 300 400
  1571. 2 1000 2000 3000 4000
  1572. With a boolean mask the same length as the index.
  1573. >>> df.iloc[[True, False, True]]
  1574. a b c d
  1575. 0 1 2 3 4
  1576. 2 1000 2000 3000 4000
  1577. With a callable, useful in method chains. The `x` passed
  1578. to the ``lambda`` is the DataFrame being sliced. This selects
  1579. the rows whose index label even.
  1580. >>> df.iloc[lambda x: x.index % 2 == 0]
  1581. a b c d
  1582. 0 1 2 3 4
  1583. 2 1000 2000 3000 4000
  1584. **Indexing both axes**
  1585. You can mix the indexer types for the index and columns. Use ``:`` to
  1586. select the entire axis.
  1587. With scalar integers.
  1588. >>> df.iloc[0, 1]
  1589. 2
  1590. With lists of integers.
  1591. >>> df.iloc[[0, 2], [1, 3]]
  1592. b d
  1593. 0 2 4
  1594. 2 2000 4000
  1595. With `slice` objects.
  1596. >>> df.iloc[1:3, 0:3]
  1597. a b c
  1598. 1 100 200 300
  1599. 2 1000 2000 3000
  1600. With a boolean array whose length matches the columns.
  1601. >>> df.iloc[:, [True, False, True, False]]
  1602. a c
  1603. 0 1 3
  1604. 1 100 300
  1605. 2 1000 3000
  1606. With a callable function that expects the Series or DataFrame.
  1607. >>> df.iloc[:, lambda df: [0, 2]]
  1608. a c
  1609. 0 1 3
  1610. 1 100 300
  1611. 2 1000 3000
  1612. """
  1613. _valid_types = ("integer, integer slice (START point is INCLUDED, END "
  1614. "point is EXCLUDED), listlike of integers, boolean array")
  1615. _exception = IndexError
  1616. def _validate_key(self, key, axis):
  1617. if com.is_bool_indexer(key):
  1618. if hasattr(key, 'index') and isinstance(key.index, Index):
  1619. if key.index.inferred_type == 'integer':
  1620. raise NotImplementedError("iLocation based boolean "
  1621. "indexing on an integer type "
  1622. "is not available")
  1623. raise ValueError("iLocation based boolean indexing cannot use "
  1624. "an indexable as a mask")
  1625. return
  1626. if isinstance(key, slice):
  1627. return
  1628. elif is_integer(key):
  1629. self._validate_integer(key, axis)
  1630. elif isinstance(key, tuple):
  1631. # a tuple should already have been caught by this point
  1632. # so don't treat a tuple as a valid indexer
  1633. raise IndexingError('Too many indexers')
  1634. elif is_list_like_indexer(key):
  1635. # check that the key does not exceed the maximum size of the index
  1636. arr = np.array(key)
  1637. len_axis = len(self.obj._get_axis(axis))
  1638. if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):
  1639. raise IndexError("positional indexers are out-of-bounds")
  1640. else:
  1641. raise ValueError("Can only index by location with "
  1642. "a [{types}]".format(types=self._valid_types))
  1643. def _has_valid_setitem_indexer(self, indexer):
  1644. self._has_valid_positional_setitem_indexer(indexer)
  1645. def _is_scalar_access(self, key):
  1646. # this is a shortcut accessor to both .loc and .iloc
  1647. # that provide the equivalent access of .at and .iat
  1648. # a) avoid getting things via sections and (to minimize dtype changes)
  1649. # b) provide a performant path
  1650. if not hasattr(key, '__len__'):
  1651. return False
  1652. if len(key) != self.ndim:
  1653. return False
  1654. for i, k in enumerate(key):
  1655. if not is_integer(k):
  1656. return False
  1657. ax = self.obj.axes[i]
  1658. if not ax.is_unique:
  1659. return False
  1660. return True
  1661. def _getitem_scalar(self, key):
  1662. # a fast-path to scalar access
  1663. # if not, raise
  1664. values = self.obj._get_value(*key, takeable=True)
  1665. return values
  1666. def _validate_integer(self, key, axis):
  1667. """
  1668. Check that 'key' is a valid position in the desired axis.
  1669. Parameters
  1670. ----------
  1671. key : int
  1672. Requested position
  1673. axis : int
  1674. Desired axis
  1675. Returns
  1676. -------
  1677. None
  1678. Raises
  1679. ------
  1680. IndexError
  1681. If 'key' is not a valid position in axis 'axis'
  1682. """
  1683. len_axis = len(self.obj._get_axis(axis))
  1684. if key >= len_axis or key < -len_axis:
  1685. raise IndexError("single positional indexer is out-of-bounds")
  1686. def _getitem_tuple(self, tup):
  1687. self._has_valid_tuple(tup)
  1688. try:
  1689. return self._getitem_lowerdim(tup)
  1690. except IndexingError:
  1691. pass
  1692. retval = self.obj
  1693. axis = 0
  1694. for i, key in enumerate(tup):
  1695. if i >= self.obj.ndim:
  1696. raise IndexingError('Too many indexers')
  1697. if com.is_null_slice(key):
  1698. axis += 1
  1699. continue
  1700. retval = getattr(retval, self.name)._getitem_axis(key, axis=axis)
  1701. # if the dim was reduced, then pass a lower-dim the next time
  1702. if retval.ndim < self.ndim:
  1703. axis -= 1
  1704. # try to get for the next axis
  1705. axis += 1
  1706. return retval
  1707. def _get_slice_axis(self, slice_obj, axis=None):
  1708. if axis is None:
  1709. axis = self.axis or 0
  1710. obj = self.obj
  1711. if not need_slice(slice_obj):
  1712. return obj.copy(deep=False)
  1713. slice_obj = self._convert_slice_indexer(slice_obj, axis)
  1714. if isinstance(slice_obj, slice):
  1715. return self._slice(slice_obj, axis=axis, kind='iloc')
  1716. else:
  1717. return self.obj._take(slice_obj, axis=axis)
  1718. def _get_list_axis(self, key, axis=None):
  1719. """
  1720. Return Series values by list or array of integers
  1721. Parameters
  1722. ----------
  1723. key : list-like positional indexer
  1724. axis : int (can only be zero)
  1725. Returns
  1726. -------
  1727. Series object
  1728. """
  1729. if axis is None:
  1730. axis = self.axis or 0
  1731. try:
  1732. return self.obj._take(key, axis=axis)
  1733. except IndexError:
  1734. # re-raise with different error message
  1735. raise IndexError("positional indexers are out-of-bounds")
  1736. def _getitem_axis(self, key, axis=None):
  1737. if axis is None:
  1738. axis = self.axis or 0
  1739. if isinstance(key, slice):
  1740. return self._get_slice_axis(key, axis=axis)
  1741. if isinstance(key, list):
  1742. key = np.asarray(key)
  1743. if com.is_bool_indexer(key):
  1744. self._validate_key(key, axis)
  1745. return self._getbool_axis(key, axis=axis)
  1746. # a list of integers
  1747. elif is_list_like_indexer(key):
  1748. return self._get_list_axis(key, axis=axis)
  1749. # a single integer
  1750. else:
  1751. if not is_integer(key):
  1752. raise TypeError("Cannot index by location index with a "
  1753. "non-integer key")
  1754. # validate the location
  1755. self._validate_integer(key, axis)
  1756. return self._get_loc(key, axis=axis)
  1757. def _convert_to_indexer(self, obj, axis=None, is_setter=False):
  1758. """ much simpler as we only have to deal with our valid types """
  1759. if axis is None:
  1760. axis = self.axis or 0
  1761. # make need to convert a float key
  1762. if isinstance(obj, slice):
  1763. return self._convert_slice_indexer(obj, axis)
  1764. elif is_float(obj):
  1765. return self._convert_scalar_indexer(obj, axis)
  1766. try:
  1767. self._validate_key(obj, axis)
  1768. return obj
  1769. except ValueError:
  1770. raise ValueError("Can only index by location with "
  1771. "a [{types}]".format(types=self._valid_types))
  1772. class _ScalarAccessIndexer(_NDFrameIndexer):
  1773. """ access scalars quickly """
  1774. def _convert_key(self, key, is_setter=False):
  1775. return list(key)
  1776. def __getitem__(self, key):
  1777. if not isinstance(key, tuple):
  1778. # we could have a convertible item here (e.g. Timestamp)
  1779. if not is_list_like_indexer(key):
  1780. key = tuple([key])
  1781. else:
  1782. raise ValueError('Invalid call for scalar access (getting)!')
  1783. key = self._convert_key(key)
  1784. return self.obj._get_value(*key, takeable=self._takeable)
  1785. def __setitem__(self, key, value):
  1786. if isinstance(key, tuple):
  1787. key = tuple(com.apply_if_callable(x, self.obj)
  1788. for x in key)
  1789. else:
  1790. # scalar callable may return tuple
  1791. key = com.apply_if_callable(key, self.obj)
  1792. if not isinstance(key, tuple):
  1793. key = self._tuplify(key)
  1794. if len(key) != self.obj.ndim:
  1795. raise ValueError('Not enough indexers for scalar access '
  1796. '(setting)!')
  1797. key = list(self._convert_key(key, is_setter=True))
  1798. key.append(value)
  1799. self.obj._set_value(*key, takeable=self._takeable)
  1800. class _AtIndexer(_ScalarAccessIndexer):
  1801. """
  1802. Access a single value for a row/column label pair.
  1803. Similar to ``loc``, in that both provide label-based lookups. Use
  1804. ``at`` if you only need to get or set a single value in a DataFrame
  1805. or Series.
  1806. Raises
  1807. ------
  1808. KeyError
  1809. When label does not exist in DataFrame
  1810. See Also
  1811. --------
  1812. DataFrame.iat : Access a single value for a row/column pair by integer
  1813. position.
  1814. DataFrame.loc : Access a group of rows and columns by label(s).
  1815. Series.at : Access a single value using a label.
  1816. Examples
  1817. --------
  1818. >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
  1819. ... index=[4, 5, 6], columns=['A', 'B', 'C'])
  1820. >>> df
  1821. A B C
  1822. 4 0 2 3
  1823. 5 0 4 1
  1824. 6 10 20 30
  1825. Get value at specified row/column pair
  1826. >>> df.at[4, 'B']
  1827. 2
  1828. Set value at specified row/column pair
  1829. >>> df.at[4, 'B'] = 10
  1830. >>> df.at[4, 'B']
  1831. 10
  1832. Get value within a Series
  1833. >>> df.loc[5].at['B']
  1834. 4
  1835. """
  1836. _takeable = False
  1837. def _convert_key(self, key, is_setter=False):
  1838. """ require they keys to be the same type as the index (so we don't
  1839. fallback)
  1840. """
  1841. # allow arbitrary setting
  1842. if is_setter:
  1843. return list(key)
  1844. for ax, i in zip(self.obj.axes, key):
  1845. if ax.is_integer():
  1846. if not is_integer(i):
  1847. raise ValueError("At based indexing on an integer index "
  1848. "can only have integer indexers")
  1849. else:
  1850. if is_integer(i) and not ax.holds_integer():
  1851. raise ValueError("At based indexing on an non-integer "
  1852. "index can only have non-integer "
  1853. "indexers")
  1854. return key
  1855. class _iAtIndexer(_ScalarAccessIndexer):
  1856. """
  1857. Access a single value for a row/column pair by integer position.
  1858. Similar to ``iloc``, in that both provide integer-based lookups. Use
  1859. ``iat`` if you only need to get or set a single value in a DataFrame
  1860. or Series.
  1861. Raises
  1862. ------
  1863. IndexError
  1864. When integer position is out of bounds
  1865. See Also
  1866. --------
  1867. DataFrame.at : Access a single value for a row/column label pair.
  1868. DataFrame.loc : Access a group of rows and columns by label(s).
  1869. DataFrame.iloc : Access a group of rows and columns by integer position(s).
  1870. Examples
  1871. --------
  1872. >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
  1873. ... columns=['A', 'B', 'C'])
  1874. >>> df
  1875. A B C
  1876. 0 0 2 3
  1877. 1 0 4 1
  1878. 2 10 20 30
  1879. Get value at specified row/column pair
  1880. >>> df.iat[1, 2]
  1881. 1
  1882. Set value at specified row/column pair
  1883. >>> df.iat[1, 2] = 10
  1884. >>> df.iat[1, 2]
  1885. 10
  1886. Get value within a series
  1887. >>> df.loc[0].iat[1]
  1888. 2
  1889. """
  1890. _takeable = True
  1891. def _has_valid_setitem_indexer(self, indexer):
  1892. self._has_valid_positional_setitem_indexer(indexer)
  1893. def _convert_key(self, key, is_setter=False):
  1894. """ require integer args (and convert to label arguments) """
  1895. for a, i in zip(self.obj.axes, key):
  1896. if not is_integer(i):
  1897. raise ValueError("iAt based indexing can only have integer "
  1898. "indexers")
  1899. return key
  1900. def length_of_indexer(indexer, target=None):
  1901. """
  1902. return the length of a single non-tuple indexer which could be a slice
  1903. """
  1904. if target is not None and isinstance(indexer, slice):
  1905. target_len = len(target)
  1906. start = indexer.start
  1907. stop = indexer.stop
  1908. step = indexer.step
  1909. if start is None:
  1910. start = 0
  1911. elif start < 0:
  1912. start += target_len
  1913. if stop is None or stop > target_len:
  1914. stop = target_len
  1915. elif stop < 0:
  1916. stop += target_len
  1917. if step is None:
  1918. step = 1
  1919. elif step < 0:
  1920. step = -step
  1921. return (stop - start + step - 1) // step
  1922. elif isinstance(indexer, (ABCSeries, Index, np.ndarray, list)):
  1923. return len(indexer)
  1924. elif not is_list_like_indexer(indexer):
  1925. return 1
  1926. raise AssertionError("cannot find the length of the indexer")
  1927. def convert_to_index_sliceable(obj, key):
  1928. """
  1929. if we are index sliceable, then return my slicer, otherwise return None
  1930. """
  1931. idx = obj.index
  1932. if isinstance(key, slice):
  1933. return idx._convert_slice_indexer(key, kind='getitem')
  1934. elif isinstance(key, compat.string_types):
  1935. # we are an actual column
  1936. if obj._data.items.contains(key):
  1937. return None
  1938. # We might have a datetimelike string that we can translate to a
  1939. # slice here via partial string indexing
  1940. if idx.is_all_dates:
  1941. try:
  1942. return idx._get_string_slice(key)
  1943. except (KeyError, ValueError, NotImplementedError):
  1944. return None
  1945. return None
  1946. def check_bool_indexer(ax, key):
  1947. # boolean indexing, need to check that the data are aligned, otherwise
  1948. # disallowed
  1949. # this function assumes that is_bool_indexer(key) == True
  1950. result = key
  1951. if isinstance(key, ABCSeries) and not key.index.equals(ax):
  1952. result = result.reindex(ax)
  1953. mask = isna(result._values)
  1954. if mask.any():
  1955. raise IndexingError('Unalignable boolean Series provided as '
  1956. 'indexer (index of the boolean Series and of '
  1957. 'the indexed object do not match')
  1958. result = result.astype(bool)._values
  1959. elif is_sparse(result):
  1960. result = result.to_dense()
  1961. result = np.asarray(result, dtype=bool)
  1962. else:
  1963. # is_bool_indexer has already checked for nulls in the case of an
  1964. # object array key, so no check needed here
  1965. result = np.asarray(result, dtype=bool)
  1966. return result
  1967. def check_setitem_lengths(indexer, value, values):
  1968. """
  1969. Validate that value and indexer are the same length.
  1970. An special-case is allowed for when the indexer is a boolean array
  1971. and the number of true values equals the length of ``value``. In
  1972. this case, no exception is raised.
  1973. Parameters
  1974. ----------
  1975. indexer : sequence
  1976. The key for the setitem
  1977. value : array-like
  1978. The value for the setitem
  1979. values : array-like
  1980. The values being set into
  1981. Returns
  1982. -------
  1983. None
  1984. Raises
  1985. ------
  1986. ValueError
  1987. When the indexer is an ndarray or list and the lengths don't
  1988. match.
  1989. """
  1990. # boolean with truth values == len of the value is ok too
  1991. if isinstance(indexer, (np.ndarray, list)):
  1992. if is_list_like(value) and len(indexer) != len(value):
  1993. if not (isinstance(indexer, np.ndarray) and
  1994. indexer.dtype == np.bool_ and
  1995. len(indexer[indexer]) == len(value)):
  1996. raise ValueError("cannot set using a list-like indexer "
  1997. "with a different length than the value")
  1998. # slice
  1999. elif isinstance(indexer, slice):
  2000. if is_list_like(value) and len(values):
  2001. if len(value) != length_of_indexer(indexer, values):
  2002. raise ValueError("cannot set using a slice indexer with a "
  2003. "different length than the value")
  2004. def convert_missing_indexer(indexer):
  2005. """
  2006. reverse convert a missing indexer, which is a dict
  2007. return the scalar indexer and a boolean indicating if we converted
  2008. """
  2009. if isinstance(indexer, dict):
  2010. # a missing key (but not a tuple indexer)
  2011. indexer = indexer['key']
  2012. if isinstance(indexer, bool):
  2013. raise KeyError("cannot use a single bool to index into setitem")
  2014. return indexer, True
  2015. return indexer, False
  2016. def convert_from_missing_indexer_tuple(indexer, axes):
  2017. """
  2018. create a filtered indexer that doesn't have any missing indexers
  2019. """
  2020. def get_indexer(_i, _idx):
  2021. return (axes[_i].get_loc(_idx['key']) if isinstance(_idx, dict) else
  2022. _idx)
  2023. return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))
  2024. def maybe_convert_indices(indices, n):
  2025. """
  2026. Attempt to convert indices into valid, positive indices.
  2027. If we have negative indices, translate to positive here.
  2028. If we have indices that are out-of-bounds, raise an IndexError.
  2029. Parameters
  2030. ----------
  2031. indices : array-like
  2032. The array of indices that we are to convert.
  2033. n : int
  2034. The number of elements in the array that we are indexing.
  2035. Returns
  2036. -------
  2037. valid_indices : array-like
  2038. An array-like of positive indices that correspond to the ones
  2039. that were passed in initially to this function.
  2040. Raises
  2041. ------
  2042. IndexError : one of the converted indices either exceeded the number
  2043. of elements (specified by `n`) OR was still negative.
  2044. """
  2045. if isinstance(indices, list):
  2046. indices = np.array(indices)
  2047. if len(indices) == 0:
  2048. # If list is empty, np.array will return float and cause indexing
  2049. # errors.
  2050. return np.empty(0, dtype=np.intp)
  2051. mask = indices < 0
  2052. if mask.any():
  2053. indices = indices.copy()
  2054. indices[mask] += n
  2055. mask = (indices >= n) | (indices < 0)
  2056. if mask.any():
  2057. raise IndexError("indices are out-of-bounds")
  2058. return indices
  2059. def validate_indices(indices, n):
  2060. """
  2061. Perform bounds-checking for an indexer.
  2062. -1 is allowed for indicating missing values.
  2063. Parameters
  2064. ----------
  2065. indices : ndarray
  2066. n : int
  2067. length of the array being indexed
  2068. Raises
  2069. ------
  2070. ValueError
  2071. Examples
  2072. --------
  2073. >>> validate_indices([1, 2], 3)
  2074. # OK
  2075. >>> validate_indices([1, -2], 3)
  2076. ValueError
  2077. >>> validate_indices([1, 2, 3], 3)
  2078. IndexError
  2079. >>> validate_indices([-1, -1], 0)
  2080. # OK
  2081. >>> validate_indices([0, 1], 0)
  2082. IndexError
  2083. """
  2084. if len(indices):
  2085. min_idx = indices.min()
  2086. if min_idx < -1:
  2087. msg = ("'indices' contains values less than allowed ({} < {})"
  2088. .format(min_idx, -1))
  2089. raise ValueError(msg)
  2090. max_idx = indices.max()
  2091. if max_idx >= n:
  2092. raise IndexError("indices are out-of-bounds")
  2093. def maybe_convert_ix(*args):
  2094. """
  2095. We likely want to take the cross-product
  2096. """
  2097. ixify = True
  2098. for arg in args:
  2099. if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):
  2100. ixify = False
  2101. if ixify:
  2102. return np.ix_(*args)
  2103. else:
  2104. return args
  2105. def is_nested_tuple(tup, labels):
  2106. # check for a compatible nested tuple and multiindexes among the axes
  2107. if not isinstance(tup, tuple):
  2108. return False
  2109. for i, k in enumerate(tup):
  2110. if is_list_like(k) or isinstance(k, slice):
  2111. return isinstance(labels, MultiIndex)
  2112. return False
  2113. def is_list_like_indexer(key):
  2114. # allow a list_like, but exclude NamedTuples which can be indexers
  2115. return is_list_like(key) and not (isinstance(key, tuple) and
  2116. type(key) is not tuple)
  2117. def is_label_like(key):
  2118. # select a label or row
  2119. return not isinstance(key, slice) and not is_list_like_indexer(key)
  2120. def need_slice(obj):
  2121. return (obj.start is not None or obj.stop is not None or
  2122. (obj.step is not None and obj.step != 1))
  2123. def maybe_droplevels(index, key):
  2124. # drop levels
  2125. original_index = index
  2126. if isinstance(key, tuple):
  2127. for _ in key:
  2128. try:
  2129. index = index.droplevel(0)
  2130. except ValueError:
  2131. # we have dropped too much, so back out
  2132. return original_index
  2133. else:
  2134. try:
  2135. index = index.droplevel(0)
  2136. except ValueError:
  2137. pass
  2138. return index
  2139. def _non_reducing_slice(slice_):
  2140. """
  2141. Ensurse that a slice doesn't reduce to a Series or Scalar.
  2142. Any user-paseed `subset` should have this called on it
  2143. to make sure we're always working with DataFrames.
  2144. """
  2145. # default to column slice, like DataFrame
  2146. # ['A', 'B'] -> IndexSlices[:, ['A', 'B']]
  2147. kinds = tuple(list(compat.string_types) + [ABCSeries, np.ndarray, Index,
  2148. list])
  2149. if isinstance(slice_, kinds):
  2150. slice_ = IndexSlice[:, slice_]
  2151. def pred(part):
  2152. # true when slice does *not* reduce, False when part is a tuple,
  2153. # i.e. MultiIndex slice
  2154. return ((isinstance(part, slice) or is_list_like(part))
  2155. and not isinstance(part, tuple))
  2156. if not is_list_like(slice_):
  2157. if not isinstance(slice_, slice):
  2158. # a 1-d slice, like df.loc[1]
  2159. slice_ = [[slice_]]
  2160. else:
  2161. # slice(a, b, c)
  2162. slice_ = [slice_] # to tuplize later
  2163. else:
  2164. slice_ = [part if pred(part) else [part] for part in slice_]
  2165. return tuple(slice_)
  2166. def _maybe_numeric_slice(df, slice_, include_bool=False):
  2167. """
  2168. want nice defaults for background_gradient that don't break
  2169. with non-numeric data. But if slice_ is passed go with that.
  2170. """
  2171. if slice_ is None:
  2172. dtypes = [np.number]
  2173. if include_bool:
  2174. dtypes.append(bool)
  2175. slice_ = IndexSlice[:, df.select_dtypes(include=dtypes).columns]
  2176. return slice_