blocks.py 112 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299
  1. # -*- coding: utf-8 -*-
  2. from datetime import date, datetime, timedelta
  3. import functools
  4. import inspect
  5. import re
  6. import warnings
  7. import numpy as np
  8. from pandas._libs import internals as libinternals, lib, tslib, tslibs
  9. from pandas._libs.tslibs import Timedelta, conversion, is_null_datetimelike
  10. import pandas.compat as compat
  11. from pandas.compat import range, zip
  12. from pandas.util._validators import validate_bool_kwarg
  13. from pandas.core.dtypes.cast import (
  14. astype_nansafe, find_common_type, infer_dtype_from,
  15. infer_dtype_from_scalar, maybe_convert_objects, maybe_downcast_to_dtype,
  16. maybe_infer_dtype_type, maybe_promote, maybe_upcast, soft_convert_objects)
  17. from pandas.core.dtypes.common import (
  18. _NS_DTYPE, _TD_DTYPE, ensure_platform_int, is_bool_dtype, is_categorical,
  19. is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
  20. is_dtype_equal, is_extension_array_dtype, is_extension_type,
  21. is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype,
  22. is_list_like, is_numeric_v_string_like, is_object_dtype, is_period_dtype,
  23. is_re, is_re_compilable, is_sparse, is_timedelta64_dtype, pandas_dtype)
  24. import pandas.core.dtypes.concat as _concat
  25. from pandas.core.dtypes.dtypes import (
  26. CategoricalDtype, ExtensionDtype, PandasExtensionDtype)
  27. from pandas.core.dtypes.generic import (
  28. ABCDataFrame, ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass,
  29. ABCSeries)
  30. from pandas.core.dtypes.missing import (
  31. _isna_compat, array_equivalent, isna, notna)
  32. import pandas.core.algorithms as algos
  33. from pandas.core.arrays import (
  34. Categorical, DatetimeArray, ExtensionArray, TimedeltaArray)
  35. from pandas.core.base import PandasObject
  36. import pandas.core.common as com
  37. from pandas.core.indexes.datetimes import DatetimeIndex
  38. from pandas.core.indexing import check_setitem_lengths
  39. from pandas.core.internals.arrays import extract_array
  40. import pandas.core.missing as missing
  41. from pandas.core.nanops import nanpercentile
  42. from pandas.io.formats.printing import pprint_thing
  43. class Block(PandasObject):
  44. """
  45. Canonical n-dimensional unit of homogeneous dtype contained in a pandas
  46. data structure
  47. Index-ignorant; let the container take care of that
  48. """
  49. __slots__ = ['_mgr_locs', 'values', 'ndim']
  50. is_numeric = False
  51. is_float = False
  52. is_integer = False
  53. is_complex = False
  54. is_datetime = False
  55. is_datetimetz = False
  56. is_timedelta = False
  57. is_bool = False
  58. is_object = False
  59. is_categorical = False
  60. is_sparse = False
  61. is_extension = False
  62. _box_to_block_values = True
  63. _can_hold_na = False
  64. _can_consolidate = True
  65. _verify_integrity = True
  66. _validate_ndim = True
  67. _ftype = 'dense'
  68. _concatenator = staticmethod(np.concatenate)
  69. def __init__(self, values, placement, ndim=None):
  70. self.ndim = self._check_ndim(values, ndim)
  71. self.mgr_locs = placement
  72. self.values = values
  73. if (self._validate_ndim and self.ndim and
  74. len(self.mgr_locs) != len(self.values)):
  75. raise ValueError(
  76. 'Wrong number of items passed {val}, placement implies '
  77. '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
  78. def _check_ndim(self, values, ndim):
  79. """ndim inference and validation.
  80. Infers ndim from 'values' if not provided to __init__.
  81. Validates that values.ndim and ndim are consistent if and only if
  82. the class variable '_validate_ndim' is True.
  83. Parameters
  84. ----------
  85. values : array-like
  86. ndim : int or None
  87. Returns
  88. -------
  89. ndim : int
  90. Raises
  91. ------
  92. ValueError : the number of dimensions do not match
  93. """
  94. if ndim is None:
  95. ndim = values.ndim
  96. if self._validate_ndim and values.ndim != ndim:
  97. msg = ("Wrong number of dimensions. values.ndim != ndim "
  98. "[{} != {}]")
  99. raise ValueError(msg.format(values.ndim, ndim))
  100. return ndim
  101. @property
  102. def _holder(self):
  103. """The array-like that can hold the underlying values.
  104. None for 'Block', overridden by subclasses that don't
  105. use an ndarray.
  106. """
  107. return None
  108. @property
  109. def _consolidate_key(self):
  110. return (self._can_consolidate, self.dtype.name)
  111. @property
  112. def _is_single_block(self):
  113. return self.ndim == 1
  114. @property
  115. def is_view(self):
  116. """ return a boolean if I am possibly a view """
  117. return self.values.base is not None
  118. @property
  119. def is_datelike(self):
  120. """ return True if I am a non-datelike """
  121. return self.is_datetime or self.is_timedelta
  122. def is_categorical_astype(self, dtype):
  123. """
  124. validate that we have a astypeable to categorical,
  125. returns a boolean if we are a categorical
  126. """
  127. if dtype is Categorical or dtype is CategoricalDtype:
  128. # this is a pd.Categorical, but is not
  129. # a valid type for astypeing
  130. raise TypeError("invalid type {0} for astype".format(dtype))
  131. elif is_categorical_dtype(dtype):
  132. return True
  133. return False
  134. def external_values(self, dtype=None):
  135. """ return an outside world format, currently just the ndarray """
  136. return self.values
  137. def internal_values(self, dtype=None):
  138. """ return an internal format, currently just the ndarray
  139. this should be the pure internal API format
  140. """
  141. return self.values
  142. def formatting_values(self):
  143. """Return the internal values used by the DataFrame/SeriesFormatter"""
  144. return self.internal_values()
  145. def get_values(self, dtype=None):
  146. """
  147. return an internal format, currently just the ndarray
  148. this is often overridden to handle to_dense like operations
  149. """
  150. if is_object_dtype(dtype):
  151. return self.values.astype(object)
  152. return self.values
  153. def to_dense(self):
  154. return self.values.view()
  155. @property
  156. def _na_value(self):
  157. return np.nan
  158. @property
  159. def fill_value(self):
  160. return np.nan
  161. @property
  162. def mgr_locs(self):
  163. return self._mgr_locs
  164. @mgr_locs.setter
  165. def mgr_locs(self, new_mgr_locs):
  166. if not isinstance(new_mgr_locs, libinternals.BlockPlacement):
  167. new_mgr_locs = libinternals.BlockPlacement(new_mgr_locs)
  168. self._mgr_locs = new_mgr_locs
  169. @property
  170. def array_dtype(self):
  171. """ the dtype to return if I want to construct this block as an
  172. array
  173. """
  174. return self.dtype
  175. def make_block(self, values, placement=None, ndim=None):
  176. """
  177. Create a new block, with type inference propagate any values that are
  178. not specified
  179. """
  180. if placement is None:
  181. placement = self.mgr_locs
  182. if ndim is None:
  183. ndim = self.ndim
  184. return make_block(values, placement=placement, ndim=ndim)
  185. def make_block_same_class(self, values, placement=None, ndim=None,
  186. dtype=None):
  187. """ Wrap given values in a block of same type as self. """
  188. if dtype is not None:
  189. # issue 19431 fastparquet is passing this
  190. warnings.warn("dtype argument is deprecated, will be removed "
  191. "in a future release.", DeprecationWarning)
  192. if placement is None:
  193. placement = self.mgr_locs
  194. return make_block(values, placement=placement, ndim=ndim,
  195. klass=self.__class__, dtype=dtype)
  196. def __unicode__(self):
  197. # don't want to print out all of the items here
  198. name = pprint_thing(self.__class__.__name__)
  199. if self._is_single_block:
  200. result = '{name}: {len} dtype: {dtype}'.format(
  201. name=name, len=len(self), dtype=self.dtype)
  202. else:
  203. shape = ' x '.join(pprint_thing(s) for s in self.shape)
  204. result = '{name}: {index}, {shape}, dtype: {dtype}'.format(
  205. name=name, index=pprint_thing(self.mgr_locs.indexer),
  206. shape=shape, dtype=self.dtype)
  207. return result
  208. def __len__(self):
  209. return len(self.values)
  210. def __getstate__(self):
  211. return self.mgr_locs.indexer, self.values
  212. def __setstate__(self, state):
  213. self.mgr_locs = libinternals.BlockPlacement(state[0])
  214. self.values = state[1]
  215. self.ndim = self.values.ndim
  216. def _slice(self, slicer):
  217. """ return a slice of my values """
  218. return self.values[slicer]
  219. def reshape_nd(self, labels, shape, ref_items):
  220. """
  221. Parameters
  222. ----------
  223. labels : list of new axis labels
  224. shape : new shape
  225. ref_items : new ref_items
  226. return a new block that is transformed to a nd block
  227. """
  228. return _block2d_to_blocknd(values=self.get_values().T,
  229. placement=self.mgr_locs, shape=shape,
  230. labels=labels, ref_items=ref_items)
  231. def getitem_block(self, slicer, new_mgr_locs=None):
  232. """
  233. Perform __getitem__-like, return result as block.
  234. As of now, only supports slices that preserve dimensionality.
  235. """
  236. if new_mgr_locs is None:
  237. if isinstance(slicer, tuple):
  238. axis0_slicer = slicer[0]
  239. else:
  240. axis0_slicer = slicer
  241. new_mgr_locs = self.mgr_locs[axis0_slicer]
  242. new_values = self._slice(slicer)
  243. if self._validate_ndim and new_values.ndim != self.ndim:
  244. raise ValueError("Only same dim slicing is allowed")
  245. return self.make_block_same_class(new_values, new_mgr_locs)
  246. @property
  247. def shape(self):
  248. return self.values.shape
  249. @property
  250. def dtype(self):
  251. return self.values.dtype
  252. @property
  253. def ftype(self):
  254. if getattr(self.values, '_pandas_ftype', False):
  255. dtype = self.dtype.subtype
  256. else:
  257. dtype = self.dtype
  258. return "{dtype}:{ftype}".format(dtype=dtype, ftype=self._ftype)
  259. def merge(self, other):
  260. return _merge_blocks([self, other])
  261. def concat_same_type(self, to_concat, placement=None):
  262. """
  263. Concatenate list of single blocks of the same type.
  264. """
  265. values = self._concatenator([blk.values for blk in to_concat],
  266. axis=self.ndim - 1)
  267. return self.make_block_same_class(
  268. values, placement=placement or slice(0, len(values), 1))
  269. def iget(self, i):
  270. return self.values[i]
  271. def set(self, locs, values):
  272. """
  273. Modify Block in-place with new item value
  274. Returns
  275. -------
  276. None
  277. """
  278. self.values[locs] = values
  279. def delete(self, loc):
  280. """
  281. Delete given loc(-s) from block in-place.
  282. """
  283. self.values = np.delete(self.values, loc, 0)
  284. self.mgr_locs = self.mgr_locs.delete(loc)
  285. def apply(self, func, **kwargs):
  286. """ apply the function to my values; return a block if we are not
  287. one
  288. """
  289. with np.errstate(all='ignore'):
  290. result = func(self.values, **kwargs)
  291. if not isinstance(result, Block):
  292. result = self.make_block(values=_block_shape(result,
  293. ndim=self.ndim))
  294. return result
  295. def fillna(self, value, limit=None, inplace=False, downcast=None):
  296. """ fillna on the block with the value. If we fail, then convert to
  297. ObjectBlock and try again
  298. """
  299. inplace = validate_bool_kwarg(inplace, 'inplace')
  300. if not self._can_hold_na:
  301. if inplace:
  302. return self
  303. else:
  304. return self.copy()
  305. mask = isna(self.values)
  306. if limit is not None:
  307. if not is_integer(limit):
  308. raise ValueError('Limit must be an integer')
  309. if limit < 1:
  310. raise ValueError('Limit must be greater than 0')
  311. if self.ndim > 2:
  312. raise NotImplementedError("number of dimensions for 'fillna' "
  313. "is currently limited to 2")
  314. mask[mask.cumsum(self.ndim - 1) > limit] = False
  315. # fillna, but if we cannot coerce, then try again as an ObjectBlock
  316. try:
  317. values, _ = self._try_coerce_args(self.values, value)
  318. blocks = self.putmask(mask, value, inplace=inplace)
  319. blocks = [b.make_block(values=self._try_coerce_result(b.values))
  320. for b in blocks]
  321. return self._maybe_downcast(blocks, downcast)
  322. except (TypeError, ValueError):
  323. # we can't process the value, but nothing to do
  324. if not mask.any():
  325. return self if inplace else self.copy()
  326. # operate column-by-column
  327. def f(m, v, i):
  328. block = self.coerce_to_target_dtype(value)
  329. # slice out our block
  330. if i is not None:
  331. block = block.getitem_block(slice(i, i + 1))
  332. return block.fillna(value,
  333. limit=limit,
  334. inplace=inplace,
  335. downcast=None)
  336. return self.split_and_operate(mask, f, inplace)
  337. def split_and_operate(self, mask, f, inplace):
  338. """
  339. split the block per-column, and apply the callable f
  340. per-column, return a new block for each. Handle
  341. masking which will not change a block unless needed.
  342. Parameters
  343. ----------
  344. mask : 2-d boolean mask
  345. f : callable accepting (1d-mask, 1d values, indexer)
  346. inplace : boolean
  347. Returns
  348. -------
  349. list of blocks
  350. """
  351. if mask is None:
  352. mask = np.ones(self.shape, dtype=bool)
  353. new_values = self.values
  354. def make_a_block(nv, ref_loc):
  355. if isinstance(nv, Block):
  356. block = nv
  357. elif isinstance(nv, list):
  358. block = nv[0]
  359. else:
  360. # Put back the dimension that was taken from it and make
  361. # a block out of the result.
  362. try:
  363. nv = _block_shape(nv, ndim=self.ndim)
  364. except (AttributeError, NotImplementedError):
  365. pass
  366. block = self.make_block(values=nv,
  367. placement=ref_loc)
  368. return block
  369. # ndim == 1
  370. if self.ndim == 1:
  371. if mask.any():
  372. nv = f(mask, new_values, None)
  373. else:
  374. nv = new_values if inplace else new_values.copy()
  375. block = make_a_block(nv, self.mgr_locs)
  376. return [block]
  377. # ndim > 1
  378. new_blocks = []
  379. for i, ref_loc in enumerate(self.mgr_locs):
  380. m = mask[i]
  381. v = new_values[i]
  382. # need a new block
  383. if m.any():
  384. nv = f(m, v, i)
  385. else:
  386. nv = v if inplace else v.copy()
  387. block = make_a_block(nv, [ref_loc])
  388. new_blocks.append(block)
  389. return new_blocks
  390. def _maybe_downcast(self, blocks, downcast=None):
  391. # no need to downcast our float
  392. # unless indicated
  393. if downcast is None and self.is_float:
  394. return blocks
  395. elif downcast is None and (self.is_timedelta or self.is_datetime):
  396. return blocks
  397. if not isinstance(blocks, list):
  398. blocks = [blocks]
  399. return _extend_blocks([b.downcast(downcast) for b in blocks])
  400. def downcast(self, dtypes=None):
  401. """ try to downcast each item to the dict of dtypes if present """
  402. # turn it off completely
  403. if dtypes is False:
  404. return self
  405. values = self.values
  406. # single block handling
  407. if self._is_single_block:
  408. # try to cast all non-floats here
  409. if dtypes is None:
  410. dtypes = 'infer'
  411. nv = maybe_downcast_to_dtype(values, dtypes)
  412. return self.make_block(nv)
  413. # ndim > 1
  414. if dtypes is None:
  415. return self
  416. if not (dtypes == 'infer' or isinstance(dtypes, dict)):
  417. raise ValueError("downcast must have a dictionary or 'infer' as "
  418. "its argument")
  419. # operate column-by-column
  420. # this is expensive as it splits the blocks items-by-item
  421. def f(m, v, i):
  422. if dtypes == 'infer':
  423. dtype = 'infer'
  424. else:
  425. raise AssertionError("dtypes as dict is not supported yet")
  426. if dtype is not None:
  427. v = maybe_downcast_to_dtype(v, dtype)
  428. return v
  429. return self.split_and_operate(None, f, False)
  430. def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
  431. return self._astype(dtype, copy=copy, errors=errors, values=values,
  432. **kwargs)
  433. def _astype(self, dtype, copy=False, errors='raise', values=None,
  434. **kwargs):
  435. """Coerce to the new type
  436. Parameters
  437. ----------
  438. dtype : str, dtype convertible
  439. copy : boolean, default False
  440. copy if indicated
  441. errors : str, {'raise', 'ignore'}, default 'ignore'
  442. - ``raise`` : allow exceptions to be raised
  443. - ``ignore`` : suppress exceptions. On error return original object
  444. Returns
  445. -------
  446. Block
  447. """
  448. errors_legal_values = ('raise', 'ignore')
  449. if errors not in errors_legal_values:
  450. invalid_arg = ("Expected value of kwarg 'errors' to be one of {}. "
  451. "Supplied value is '{}'".format(
  452. list(errors_legal_values), errors))
  453. raise ValueError(invalid_arg)
  454. if (inspect.isclass(dtype) and
  455. issubclass(dtype, (PandasExtensionDtype, ExtensionDtype))):
  456. msg = ("Expected an instance of {}, but got the class instead. "
  457. "Try instantiating 'dtype'.".format(dtype.__name__))
  458. raise TypeError(msg)
  459. # may need to convert to categorical
  460. if self.is_categorical_astype(dtype):
  461. # deprecated 17636
  462. if ('categories' in kwargs or 'ordered' in kwargs):
  463. if isinstance(dtype, CategoricalDtype):
  464. raise TypeError(
  465. "Cannot specify a CategoricalDtype and also "
  466. "`categories` or `ordered`. Use "
  467. "`dtype=CategoricalDtype(categories, ordered)`"
  468. " instead.")
  469. warnings.warn("specifying 'categories' or 'ordered' in "
  470. ".astype() is deprecated; pass a "
  471. "CategoricalDtype instead",
  472. FutureWarning, stacklevel=7)
  473. categories = kwargs.get('categories', None)
  474. ordered = kwargs.get('ordered', None)
  475. if com._any_not_none(categories, ordered):
  476. dtype = CategoricalDtype(categories, ordered)
  477. if is_categorical_dtype(self.values):
  478. # GH 10696/18593: update an existing categorical efficiently
  479. return self.make_block(self.values.astype(dtype, copy=copy))
  480. return self.make_block(Categorical(self.values, dtype=dtype))
  481. # convert dtypes if needed
  482. dtype = pandas_dtype(dtype)
  483. # astype processing
  484. if is_dtype_equal(self.dtype, dtype):
  485. if copy:
  486. return self.copy()
  487. return self
  488. klass = None
  489. if is_sparse(self.values):
  490. # special case sparse, Series[Sparse].astype(object) is sparse
  491. klass = ExtensionBlock
  492. elif is_object_dtype(dtype):
  493. klass = ObjectBlock
  494. elif is_extension_array_dtype(dtype):
  495. klass = ExtensionBlock
  496. try:
  497. # force the copy here
  498. if values is None:
  499. if self.is_extension:
  500. values = self.values.astype(dtype)
  501. else:
  502. if issubclass(dtype.type,
  503. (compat.text_type, compat.string_types)):
  504. # use native type formatting for datetime/tz/timedelta
  505. if self.is_datelike:
  506. values = self.to_native_types()
  507. # astype formatting
  508. else:
  509. values = self.get_values()
  510. else:
  511. values = self.get_values(dtype=dtype)
  512. # _astype_nansafe works fine with 1-d only
  513. values = astype_nansafe(values.ravel(), dtype, copy=True)
  514. # TODO(extension)
  515. # should we make this attribute?
  516. try:
  517. values = values.reshape(self.shape)
  518. except AttributeError:
  519. pass
  520. newb = make_block(values, placement=self.mgr_locs,
  521. klass=klass, ndim=self.ndim)
  522. except Exception: # noqa: E722
  523. if errors == 'raise':
  524. raise
  525. newb = self.copy() if copy else self
  526. if newb.is_numeric and self.is_numeric:
  527. if newb.shape != self.shape:
  528. raise TypeError(
  529. "cannot set astype for copy = [{copy}] for dtype "
  530. "({dtype} [{shape}]) to different shape "
  531. "({newb_dtype} [{newb_shape}])".format(
  532. copy=copy, dtype=self.dtype.name,
  533. shape=self.shape, newb_dtype=newb.dtype.name,
  534. newb_shape=newb.shape))
  535. return newb
  536. def convert(self, copy=True, **kwargs):
  537. """ attempt to coerce any object types to better types return a copy
  538. of the block (if copy = True) by definition we are not an ObjectBlock
  539. here!
  540. """
  541. return self.copy() if copy else self
  542. def _can_hold_element(self, element):
  543. """ require the same dtype as ourselves """
  544. dtype = self.values.dtype.type
  545. tipo = maybe_infer_dtype_type(element)
  546. if tipo is not None:
  547. return issubclass(tipo.type, dtype)
  548. return isinstance(element, dtype)
  549. def _try_cast_result(self, result, dtype=None):
  550. """ try to cast the result to our original type, we may have
  551. roundtripped thru object in the mean-time
  552. """
  553. if dtype is None:
  554. dtype = self.dtype
  555. if self.is_integer or self.is_bool or self.is_datetime:
  556. pass
  557. elif self.is_float and result.dtype == self.dtype:
  558. # protect against a bool/object showing up here
  559. if isinstance(dtype, compat.string_types) and dtype == 'infer':
  560. return result
  561. if not isinstance(dtype, type):
  562. dtype = dtype.type
  563. if issubclass(dtype, (np.bool_, np.object_)):
  564. if issubclass(dtype, np.bool_):
  565. if isna(result).all():
  566. return result.astype(np.bool_)
  567. else:
  568. result = result.astype(np.object_)
  569. result[result == 1] = True
  570. result[result == 0] = False
  571. return result
  572. else:
  573. return result.astype(np.object_)
  574. return result
  575. # may need to change the dtype here
  576. return maybe_downcast_to_dtype(result, dtype)
  577. def _try_coerce_args(self, values, other):
  578. """ provide coercion to our input arguments """
  579. if np.any(notna(other)) and not self._can_hold_element(other):
  580. # coercion issues
  581. # let higher levels handle
  582. raise TypeError("cannot convert {} to an {}".format(
  583. type(other).__name__,
  584. type(self).__name__.lower().replace('Block', '')))
  585. return values, other
  586. def _try_coerce_result(self, result):
  587. """ reverse of try_coerce_args """
  588. return result
  589. def _try_coerce_and_cast_result(self, result, dtype=None):
  590. result = self._try_coerce_result(result)
  591. result = self._try_cast_result(result, dtype=dtype)
  592. return result
  593. def to_native_types(self, slicer=None, na_rep='nan', quoting=None,
  594. **kwargs):
  595. """ convert to our native types format, slicing if desired """
  596. values = self.get_values()
  597. if slicer is not None:
  598. values = values[:, slicer]
  599. mask = isna(values)
  600. if not self.is_object and not quoting:
  601. values = values.astype(str)
  602. else:
  603. values = np.array(values, dtype='object')
  604. values[mask] = na_rep
  605. return values
  606. # block actions ####
  607. def copy(self, deep=True):
  608. """ copy constructor """
  609. values = self.values
  610. if deep:
  611. values = values.copy()
  612. return self.make_block_same_class(values)
  613. def replace(self, to_replace, value, inplace=False, filter=None,
  614. regex=False, convert=True):
  615. """replace the to_replace value with value, possible to create new
  616. blocks here this is just a call to putmask. regex is not used here.
  617. It is used in ObjectBlocks. It is here for API compatibility.
  618. """
  619. inplace = validate_bool_kwarg(inplace, 'inplace')
  620. original_to_replace = to_replace
  621. # try to replace, if we raise an error, convert to ObjectBlock and
  622. # retry
  623. try:
  624. values, to_replace = self._try_coerce_args(self.values,
  625. to_replace)
  626. mask = missing.mask_missing(values, to_replace)
  627. if filter is not None:
  628. filtered_out = ~self.mgr_locs.isin(filter)
  629. mask[filtered_out.nonzero()[0]] = False
  630. blocks = self.putmask(mask, value, inplace=inplace)
  631. if convert:
  632. blocks = [b.convert(by_item=True, numeric=False,
  633. copy=not inplace) for b in blocks]
  634. return blocks
  635. except (TypeError, ValueError):
  636. # GH 22083, TypeError or ValueError occurred within error handling
  637. # causes infinite loop. Cast and retry only if not objectblock.
  638. if is_object_dtype(self):
  639. raise
  640. # try again with a compatible block
  641. block = self.astype(object)
  642. return block.replace(to_replace=original_to_replace,
  643. value=value,
  644. inplace=inplace,
  645. filter=filter,
  646. regex=regex,
  647. convert=convert)
  648. def _replace_single(self, *args, **kwargs):
  649. """ no-op on a non-ObjectBlock """
  650. return self if kwargs['inplace'] else self.copy()
  651. def setitem(self, indexer, value):
  652. """Set the value inplace, returning a a maybe different typed block.
  653. Parameters
  654. ----------
  655. indexer : tuple, list-like, array-like, slice
  656. The subset of self.values to set
  657. value : object
  658. The value being set
  659. Returns
  660. -------
  661. Block
  662. Notes
  663. -----
  664. `indexer` is a direct slice/positional indexer. `value` must
  665. be a compatible shape.
  666. """
  667. # coerce None values, if appropriate
  668. if value is None:
  669. if self.is_numeric:
  670. value = np.nan
  671. # coerce if block dtype can store value
  672. values = self.values
  673. try:
  674. values, value = self._try_coerce_args(values, value)
  675. # can keep its own dtype
  676. if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
  677. value.dtype):
  678. dtype = self.dtype
  679. else:
  680. dtype = 'infer'
  681. except (TypeError, ValueError):
  682. # current dtype cannot store value, coerce to common dtype
  683. find_dtype = False
  684. if hasattr(value, 'dtype'):
  685. dtype = value.dtype
  686. find_dtype = True
  687. elif lib.is_scalar(value):
  688. if isna(value):
  689. # NaN promotion is handled in latter path
  690. dtype = False
  691. else:
  692. dtype, _ = infer_dtype_from_scalar(value,
  693. pandas_dtype=True)
  694. find_dtype = True
  695. else:
  696. dtype = 'infer'
  697. if find_dtype:
  698. dtype = find_common_type([values.dtype, dtype])
  699. if not is_dtype_equal(self.dtype, dtype):
  700. b = self.astype(dtype)
  701. return b.setitem(indexer, value)
  702. # value must be storeable at this moment
  703. arr_value = np.array(value)
  704. # cast the values to a type that can hold nan (if necessary)
  705. if not self._can_hold_element(value):
  706. dtype, _ = maybe_promote(arr_value.dtype)
  707. values = values.astype(dtype)
  708. transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x)
  709. values = transf(values)
  710. # length checking
  711. check_setitem_lengths(indexer, value, values)
  712. def _is_scalar_indexer(indexer):
  713. # return True if we are all scalar indexers
  714. if arr_value.ndim == 1:
  715. if not isinstance(indexer, tuple):
  716. indexer = tuple([indexer])
  717. return any(isinstance(idx, np.ndarray) and len(idx) == 0
  718. for idx in indexer)
  719. return False
  720. def _is_empty_indexer(indexer):
  721. # return a boolean if we have an empty indexer
  722. if is_list_like(indexer) and not len(indexer):
  723. return True
  724. if arr_value.ndim == 1:
  725. if not isinstance(indexer, tuple):
  726. indexer = tuple([indexer])
  727. return any(isinstance(idx, np.ndarray) and len(idx) == 0
  728. for idx in indexer)
  729. return False
  730. # empty indexers
  731. # 8669 (empty)
  732. if _is_empty_indexer(indexer):
  733. pass
  734. # setting a single element for each dim and with a rhs that could
  735. # be say a list
  736. # GH 6043
  737. elif _is_scalar_indexer(indexer):
  738. values[indexer] = value
  739. # if we are an exact match (ex-broadcasting),
  740. # then use the resultant dtype
  741. elif (len(arr_value.shape) and
  742. arr_value.shape[0] == values.shape[0] and
  743. np.prod(arr_value.shape) == np.prod(values.shape)):
  744. values[indexer] = value
  745. try:
  746. values = values.astype(arr_value.dtype)
  747. except ValueError:
  748. pass
  749. # set
  750. else:
  751. values[indexer] = value
  752. # coerce and try to infer the dtypes of the result
  753. values = self._try_coerce_and_cast_result(values, dtype)
  754. block = self.make_block(transf(values))
  755. return block
  756. def putmask(self, mask, new, align=True, inplace=False, axis=0,
  757. transpose=False):
  758. """ putmask the data to the block; it is possible that we may create a
  759. new dtype of block
  760. return the resulting block(s)
  761. Parameters
  762. ----------
  763. mask : the condition to respect
  764. new : a ndarray/object
  765. align : boolean, perform alignment on other/cond, default is True
  766. inplace : perform inplace modification, default is False
  767. axis : int
  768. transpose : boolean
  769. Set to True if self is stored with axes reversed
  770. Returns
  771. -------
  772. a list of new blocks, the result of the putmask
  773. """
  774. new_values = self.values if inplace else self.values.copy()
  775. new = getattr(new, 'values', new)
  776. mask = getattr(mask, 'values', mask)
  777. # if we are passed a scalar None, convert it here
  778. if not is_list_like(new) and isna(new) and not self.is_object:
  779. new = self.fill_value
  780. if self._can_hold_element(new):
  781. _, new = self._try_coerce_args(new_values, new)
  782. if transpose:
  783. new_values = new_values.T
  784. # If the default repeat behavior in np.putmask would go in the
  785. # wrong direction, then explicitly repeat and reshape new instead
  786. if getattr(new, 'ndim', 0) >= 1:
  787. if self.ndim - 1 == new.ndim and axis == 1:
  788. new = np.repeat(
  789. new, new_values.shape[-1]).reshape(self.shape)
  790. new = new.astype(new_values.dtype)
  791. # we require exact matches between the len of the
  792. # values we are setting (or is compat). np.putmask
  793. # doesn't check this and will simply truncate / pad
  794. # the output, but we want sane error messages
  795. #
  796. # TODO: this prob needs some better checking
  797. # for 2D cases
  798. if ((is_list_like(new) and
  799. np.any(mask[mask]) and
  800. getattr(new, 'ndim', 1) == 1)):
  801. if not (mask.shape[-1] == len(new) or
  802. mask[mask].shape[-1] == len(new) or
  803. len(new) == 1):
  804. raise ValueError("cannot assign mismatch "
  805. "length to masked array")
  806. np.putmask(new_values, mask, new)
  807. # maybe upcast me
  808. elif mask.any():
  809. if transpose:
  810. mask = mask.T
  811. if isinstance(new, np.ndarray):
  812. new = new.T
  813. axis = new_values.ndim - axis - 1
  814. # Pseudo-broadcast
  815. if getattr(new, 'ndim', 0) >= 1:
  816. if self.ndim - 1 == new.ndim:
  817. new_shape = list(new.shape)
  818. new_shape.insert(axis, 1)
  819. new = new.reshape(tuple(new_shape))
  820. # operate column-by-column
  821. def f(m, v, i):
  822. if i is None:
  823. # ndim==1 case.
  824. n = new
  825. else:
  826. if isinstance(new, np.ndarray):
  827. n = np.squeeze(new[i % new.shape[0]])
  828. else:
  829. n = np.array(new)
  830. # type of the new block
  831. dtype, _ = maybe_promote(n.dtype)
  832. # we need to explicitly astype here to make a copy
  833. n = n.astype(dtype)
  834. nv = _putmask_smart(v, m, n)
  835. return nv
  836. new_blocks = self.split_and_operate(mask, f, inplace)
  837. return new_blocks
  838. if inplace:
  839. return [self]
  840. if transpose:
  841. new_values = new_values.T
  842. return [self.make_block(new_values)]
  843. def coerce_to_target_dtype(self, other):
  844. """
  845. coerce the current block to a dtype compat for other
  846. we will return a block, possibly object, and not raise
  847. we can also safely try to coerce to the same dtype
  848. and will receive the same block
  849. """
  850. # if we cannot then coerce to object
  851. dtype, _ = infer_dtype_from(other, pandas_dtype=True)
  852. if is_dtype_equal(self.dtype, dtype):
  853. return self
  854. if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype):
  855. # we don't upcast to bool
  856. return self.astype(object)
  857. elif ((self.is_float or self.is_complex) and
  858. (is_integer_dtype(dtype) or is_float_dtype(dtype))):
  859. # don't coerce float/complex to int
  860. return self
  861. elif (self.is_datetime or
  862. is_datetime64_dtype(dtype) or
  863. is_datetime64tz_dtype(dtype)):
  864. # not a datetime
  865. if not ((is_datetime64_dtype(dtype) or
  866. is_datetime64tz_dtype(dtype)) and self.is_datetime):
  867. return self.astype(object)
  868. # don't upcast timezone with different timezone or no timezone
  869. mytz = getattr(self.dtype, 'tz', None)
  870. othertz = getattr(dtype, 'tz', None)
  871. if str(mytz) != str(othertz):
  872. return self.astype(object)
  873. raise AssertionError("possible recursion in "
  874. "coerce_to_target_dtype: {} {}".format(
  875. self, other))
  876. elif (self.is_timedelta or is_timedelta64_dtype(dtype)):
  877. # not a timedelta
  878. if not (is_timedelta64_dtype(dtype) and self.is_timedelta):
  879. return self.astype(object)
  880. raise AssertionError("possible recursion in "
  881. "coerce_to_target_dtype: {} {}".format(
  882. self, other))
  883. try:
  884. return self.astype(dtype)
  885. except (ValueError, TypeError, OverflowError):
  886. pass
  887. return self.astype(object)
  888. def interpolate(self, method='pad', axis=0, index=None, values=None,
  889. inplace=False, limit=None, limit_direction='forward',
  890. limit_area=None, fill_value=None, coerce=False,
  891. downcast=None, **kwargs):
  892. inplace = validate_bool_kwarg(inplace, 'inplace')
  893. def check_int_bool(self, inplace):
  894. # Only FloatBlocks will contain NaNs.
  895. # timedelta subclasses IntBlock
  896. if (self.is_bool or self.is_integer) and not self.is_timedelta:
  897. if inplace:
  898. return self
  899. else:
  900. return self.copy()
  901. # a fill na type method
  902. try:
  903. m = missing.clean_fill_method(method)
  904. except ValueError:
  905. m = None
  906. if m is not None:
  907. r = check_int_bool(self, inplace)
  908. if r is not None:
  909. return r
  910. return self._interpolate_with_fill(method=m, axis=axis,
  911. inplace=inplace, limit=limit,
  912. fill_value=fill_value,
  913. coerce=coerce,
  914. downcast=downcast)
  915. # try an interp method
  916. try:
  917. m = missing.clean_interp_method(method, **kwargs)
  918. except ValueError:
  919. m = None
  920. if m is not None:
  921. r = check_int_bool(self, inplace)
  922. if r is not None:
  923. return r
  924. return self._interpolate(method=m, index=index, values=values,
  925. axis=axis, limit=limit,
  926. limit_direction=limit_direction,
  927. limit_area=limit_area,
  928. fill_value=fill_value, inplace=inplace,
  929. downcast=downcast, **kwargs)
  930. raise ValueError("invalid method '{0}' to interpolate.".format(method))
  931. def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
  932. limit=None, fill_value=None, coerce=False,
  933. downcast=None):
  934. """ fillna but using the interpolate machinery """
  935. inplace = validate_bool_kwarg(inplace, 'inplace')
  936. # if we are coercing, then don't force the conversion
  937. # if the block can't hold the type
  938. if coerce:
  939. if not self._can_hold_na:
  940. if inplace:
  941. return [self]
  942. else:
  943. return [self.copy()]
  944. values = self.values if inplace else self.values.copy()
  945. values, fill_value = self._try_coerce_args(values, fill_value)
  946. values = missing.interpolate_2d(values, method=method, axis=axis,
  947. limit=limit, fill_value=fill_value,
  948. dtype=self.dtype)
  949. values = self._try_coerce_result(values)
  950. blocks = [self.make_block_same_class(values, ndim=self.ndim)]
  951. return self._maybe_downcast(blocks, downcast)
  952. def _interpolate(self, method=None, index=None, values=None,
  953. fill_value=None, axis=0, limit=None,
  954. limit_direction='forward', limit_area=None,
  955. inplace=False, downcast=None, **kwargs):
  956. """ interpolate using scipy wrappers """
  957. inplace = validate_bool_kwarg(inplace, 'inplace')
  958. data = self.values if inplace else self.values.copy()
  959. # only deal with floats
  960. if not self.is_float:
  961. if not self.is_integer:
  962. return self
  963. data = data.astype(np.float64)
  964. if fill_value is None:
  965. fill_value = self.fill_value
  966. if method in ('krogh', 'piecewise_polynomial', 'pchip'):
  967. if not index.is_monotonic:
  968. raise ValueError("{0} interpolation requires that the "
  969. "index be monotonic.".format(method))
  970. # process 1-d slices in the axis direction
  971. def func(x):
  972. # process a 1-d slice, returning it
  973. # should the axis argument be handled below in apply_along_axis?
  974. # i.e. not an arg to missing.interpolate_1d
  975. return missing.interpolate_1d(index, x, method=method, limit=limit,
  976. limit_direction=limit_direction,
  977. limit_area=limit_area,
  978. fill_value=fill_value,
  979. bounds_error=False, **kwargs)
  980. # interp each column independently
  981. interp_values = np.apply_along_axis(func, axis, data)
  982. blocks = [self.make_block_same_class(interp_values)]
  983. return self._maybe_downcast(blocks, downcast)
  984. def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
  985. """
  986. Take values according to indexer and return them as a block.bb
  987. """
  988. # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
  989. # so need to preserve types
  990. # sparse is treated like an ndarray, but needs .get_values() shaping
  991. values = self.values
  992. if self.is_sparse:
  993. values = self.get_values()
  994. if fill_tuple is None:
  995. fill_value = self.fill_value
  996. new_values = algos.take_nd(values, indexer, axis=axis,
  997. allow_fill=False, fill_value=fill_value)
  998. else:
  999. fill_value = fill_tuple[0]
  1000. new_values = algos.take_nd(values, indexer, axis=axis,
  1001. allow_fill=True, fill_value=fill_value)
  1002. if new_mgr_locs is None:
  1003. if axis == 0:
  1004. slc = libinternals.indexer_as_slice(indexer)
  1005. if slc is not None:
  1006. new_mgr_locs = self.mgr_locs[slc]
  1007. else:
  1008. new_mgr_locs = self.mgr_locs[indexer]
  1009. else:
  1010. new_mgr_locs = self.mgr_locs
  1011. if not is_dtype_equal(new_values.dtype, self.dtype):
  1012. return self.make_block(new_values, new_mgr_locs)
  1013. else:
  1014. return self.make_block_same_class(new_values, new_mgr_locs)
  1015. def diff(self, n, axis=1):
  1016. """ return block for the diff of the values """
  1017. new_values = algos.diff(self.values, n, axis=axis)
  1018. return [self.make_block(values=new_values)]
  1019. def shift(self, periods, axis=0, fill_value=None):
  1020. """ shift the block by periods, possibly upcast """
  1021. # convert integer to float if necessary. need to do a lot more than
  1022. # that, handle boolean etc also
  1023. new_values, fill_value = maybe_upcast(self.values, fill_value)
  1024. # make sure array sent to np.roll is c_contiguous
  1025. f_ordered = new_values.flags.f_contiguous
  1026. if f_ordered:
  1027. new_values = new_values.T
  1028. axis = new_values.ndim - axis - 1
  1029. if np.prod(new_values.shape):
  1030. new_values = np.roll(new_values, ensure_platform_int(periods),
  1031. axis=axis)
  1032. axis_indexer = [slice(None)] * self.ndim
  1033. if periods > 0:
  1034. axis_indexer[axis] = slice(None, periods)
  1035. else:
  1036. axis_indexer[axis] = slice(periods, None)
  1037. new_values[tuple(axis_indexer)] = fill_value
  1038. # restore original order
  1039. if f_ordered:
  1040. new_values = new_values.T
  1041. return [self.make_block(new_values)]
  1042. def where(self, other, cond, align=True, errors='raise',
  1043. try_cast=False, axis=0, transpose=False):
  1044. """
  1045. evaluate the block; return result block(s) from the result
  1046. Parameters
  1047. ----------
  1048. other : a ndarray/object
  1049. cond : the condition to respect
  1050. align : boolean, perform alignment on other/cond
  1051. errors : str, {'raise', 'ignore'}, default 'raise'
  1052. - ``raise`` : allow exceptions to be raised
  1053. - ``ignore`` : suppress exceptions. On error return original object
  1054. axis : int
  1055. transpose : boolean
  1056. Set to True if self is stored with axes reversed
  1057. Returns
  1058. -------
  1059. a new block(s), the result of the func
  1060. """
  1061. import pandas.core.computation.expressions as expressions
  1062. assert errors in ['raise', 'ignore']
  1063. values = self.values
  1064. orig_other = other
  1065. if transpose:
  1066. values = values.T
  1067. other = getattr(other, '_values', getattr(other, 'values', other))
  1068. cond = getattr(cond, 'values', cond)
  1069. # If the default broadcasting would go in the wrong direction, then
  1070. # explicitly reshape other instead
  1071. if getattr(other, 'ndim', 0) >= 1:
  1072. if values.ndim - 1 == other.ndim and axis == 1:
  1073. other = other.reshape(tuple(other.shape + (1, )))
  1074. elif transpose and values.ndim == self.ndim - 1:
  1075. cond = cond.T
  1076. if not hasattr(cond, 'shape'):
  1077. raise ValueError("where must have a condition that is ndarray "
  1078. "like")
  1079. # our where function
  1080. def func(cond, values, other):
  1081. if cond.ravel().all():
  1082. return values
  1083. values, other = self._try_coerce_args(values, other)
  1084. try:
  1085. return self._try_coerce_result(expressions.where(
  1086. cond, values, other))
  1087. except Exception as detail:
  1088. if errors == 'raise':
  1089. raise TypeError(
  1090. 'Could not operate [{other!r}] with block values '
  1091. '[{detail!s}]'.format(other=other, detail=detail))
  1092. else:
  1093. # return the values
  1094. result = np.empty(values.shape, dtype='float64')
  1095. result.fill(np.nan)
  1096. return result
  1097. # see if we can operate on the entire block, or need item-by-item
  1098. # or if we are a single block (ndim == 1)
  1099. try:
  1100. result = func(cond, values, other)
  1101. except TypeError:
  1102. # we cannot coerce, return a compat dtype
  1103. # we are explicitly ignoring errors
  1104. block = self.coerce_to_target_dtype(other)
  1105. blocks = block.where(orig_other, cond, align=align,
  1106. errors=errors,
  1107. try_cast=try_cast, axis=axis,
  1108. transpose=transpose)
  1109. return self._maybe_downcast(blocks, 'infer')
  1110. if self._can_hold_na or self.ndim == 1:
  1111. if transpose:
  1112. result = result.T
  1113. # try to cast if requested
  1114. if try_cast:
  1115. result = self._try_cast_result(result)
  1116. return self.make_block(result)
  1117. # might need to separate out blocks
  1118. axis = cond.ndim - 1
  1119. cond = cond.swapaxes(axis, 0)
  1120. mask = np.array([cond[i].all() for i in range(cond.shape[0])],
  1121. dtype=bool)
  1122. result_blocks = []
  1123. for m in [mask, ~mask]:
  1124. if m.any():
  1125. r = self._try_cast_result(result.take(m.nonzero()[0],
  1126. axis=axis))
  1127. result_blocks.append(
  1128. self.make_block(r.T, placement=self.mgr_locs[m]))
  1129. return result_blocks
  1130. def equals(self, other):
  1131. if self.dtype != other.dtype or self.shape != other.shape:
  1132. return False
  1133. return array_equivalent(self.values, other.values)
  1134. def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
  1135. """Return a list of unstacked blocks of self
  1136. Parameters
  1137. ----------
  1138. unstacker_func : callable
  1139. Partially applied unstacker.
  1140. new_columns : Index
  1141. All columns of the unstacked BlockManager.
  1142. n_rows : int
  1143. Only used in ExtensionBlock.unstack
  1144. fill_value : int
  1145. Only used in ExtensionBlock.unstack
  1146. Returns
  1147. -------
  1148. blocks : list of Block
  1149. New blocks of unstacked values.
  1150. mask : array_like of bool
  1151. The mask of columns of `blocks` we should keep.
  1152. """
  1153. unstacker = unstacker_func(self.values.T)
  1154. new_items = unstacker.get_new_columns()
  1155. new_placement = new_columns.get_indexer(new_items)
  1156. new_values, mask = unstacker.get_new_values()
  1157. mask = mask.any(0)
  1158. new_values = new_values.T[mask]
  1159. new_placement = new_placement[mask]
  1160. blocks = [make_block(new_values, placement=new_placement)]
  1161. return blocks, mask
  1162. def quantile(self, qs, interpolation='linear', axis=0):
  1163. """
  1164. compute the quantiles of the
  1165. Parameters
  1166. ----------
  1167. qs: a scalar or list of the quantiles to be computed
  1168. interpolation: type of interpolation, default 'linear'
  1169. axis: axis to compute, default 0
  1170. Returns
  1171. -------
  1172. Block
  1173. """
  1174. if self.is_datetimetz:
  1175. # TODO: cleanup this special case.
  1176. # We need to operate on i8 values for datetimetz
  1177. # but `Block.get_values()` returns an ndarray of objects
  1178. # right now. We need an API for "values to do numeric-like ops on"
  1179. values = self.values.asi8
  1180. # TODO: NonConsolidatableMixin shape
  1181. # Usual shape inconsistencies for ExtensionBlocks
  1182. if self.ndim > 1:
  1183. values = values[None, :]
  1184. else:
  1185. values = self.get_values()
  1186. values, _ = self._try_coerce_args(values, values)
  1187. is_empty = values.shape[axis] == 0
  1188. orig_scalar = not is_list_like(qs)
  1189. if orig_scalar:
  1190. # make list-like, unpack later
  1191. qs = [qs]
  1192. if is_empty:
  1193. if self.ndim == 1:
  1194. result = self._na_value
  1195. else:
  1196. # create the array of na_values
  1197. # 2d len(values) * len(qs)
  1198. result = np.repeat(np.array([self.fill_value] * len(qs)),
  1199. len(values)).reshape(len(values),
  1200. len(qs))
  1201. else:
  1202. # asarray needed for Sparse, see GH#24600
  1203. # TODO: Why self.values and not values?
  1204. mask = np.asarray(isna(self.values))
  1205. result = nanpercentile(values, np.array(qs) * 100,
  1206. axis=axis, na_value=self.fill_value,
  1207. mask=mask, ndim=self.ndim,
  1208. interpolation=interpolation)
  1209. result = np.array(result, copy=False)
  1210. if self.ndim > 1:
  1211. result = result.T
  1212. if orig_scalar and not lib.is_scalar(result):
  1213. # result could be scalar in case with is_empty and self.ndim == 1
  1214. assert result.shape[-1] == 1, result.shape
  1215. result = result[..., 0]
  1216. result = lib.item_from_zerodim(result)
  1217. ndim = getattr(result, 'ndim', None) or 0
  1218. result = self._try_coerce_result(result)
  1219. return make_block(result,
  1220. placement=np.arange(len(result)),
  1221. ndim=ndim)
  1222. def _replace_coerce(self, to_replace, value, inplace=True, regex=False,
  1223. convert=False, mask=None):
  1224. """
  1225. Replace value corresponding to the given boolean array with another
  1226. value.
  1227. Parameters
  1228. ----------
  1229. to_replace : object or pattern
  1230. Scalar to replace or regular expression to match.
  1231. value : object
  1232. Replacement object.
  1233. inplace : bool, default False
  1234. Perform inplace modification.
  1235. regex : bool, default False
  1236. If true, perform regular expression substitution.
  1237. convert : bool, default True
  1238. If true, try to coerce any object types to better types.
  1239. mask : array-like of bool, optional
  1240. True indicate corresponding element is ignored.
  1241. Returns
  1242. -------
  1243. A new block if there is anything to replace or the original block.
  1244. """
  1245. if mask.any():
  1246. if not regex:
  1247. self = self.coerce_to_target_dtype(value)
  1248. return self.putmask(mask, value, inplace=inplace)
  1249. else:
  1250. return self._replace_single(to_replace, value, inplace=inplace,
  1251. regex=regex,
  1252. convert=convert,
  1253. mask=mask)
  1254. return self
  1255. class NonConsolidatableMixIn(object):
  1256. """ hold methods for the nonconsolidatable blocks """
  1257. _can_consolidate = False
  1258. _verify_integrity = False
  1259. _validate_ndim = False
  1260. def __init__(self, values, placement, ndim=None):
  1261. """Initialize a non-consolidatable block.
  1262. 'ndim' may be inferred from 'placement'.
  1263. This will call continue to call __init__ for the other base
  1264. classes mixed in with this Mixin.
  1265. """
  1266. # Placement must be converted to BlockPlacement so that we can check
  1267. # its length
  1268. if not isinstance(placement, libinternals.BlockPlacement):
  1269. placement = libinternals.BlockPlacement(placement)
  1270. # Maybe infer ndim from placement
  1271. if ndim is None:
  1272. if len(placement) != 1:
  1273. ndim = 1
  1274. else:
  1275. ndim = 2
  1276. super(NonConsolidatableMixIn, self).__init__(values, placement,
  1277. ndim=ndim)
  1278. @property
  1279. def shape(self):
  1280. if self.ndim == 1:
  1281. return (len(self.values)),
  1282. return (len(self.mgr_locs), len(self.values))
  1283. def iget(self, col):
  1284. if self.ndim == 2 and isinstance(col, tuple):
  1285. col, loc = col
  1286. if not com.is_null_slice(col) and col != 0:
  1287. raise IndexError("{0} only contains one item".format(self))
  1288. return self.values[loc]
  1289. else:
  1290. if col != 0:
  1291. raise IndexError("{0} only contains one item".format(self))
  1292. return self.values
  1293. def should_store(self, value):
  1294. return isinstance(value, self._holder)
  1295. def set(self, locs, values, check=False):
  1296. assert locs.tolist() == [0]
  1297. self.values = values
  1298. def putmask(self, mask, new, align=True, inplace=False, axis=0,
  1299. transpose=False):
  1300. """
  1301. putmask the data to the block; we must be a single block and not
  1302. generate other blocks
  1303. return the resulting block
  1304. Parameters
  1305. ----------
  1306. mask : the condition to respect
  1307. new : a ndarray/object
  1308. align : boolean, perform alignment on other/cond, default is True
  1309. inplace : perform inplace modification, default is False
  1310. Returns
  1311. -------
  1312. a new block, the result of the putmask
  1313. """
  1314. inplace = validate_bool_kwarg(inplace, 'inplace')
  1315. # use block's copy logic.
  1316. # .values may be an Index which does shallow copy by default
  1317. new_values = self.values if inplace else self.copy().values
  1318. new_values, new = self._try_coerce_args(new_values, new)
  1319. if isinstance(new, np.ndarray) and len(new) == len(mask):
  1320. new = new[mask]
  1321. mask = _safe_reshape(mask, new_values.shape)
  1322. new_values[mask] = new
  1323. new_values = self._try_coerce_result(new_values)
  1324. return [self.make_block(values=new_values)]
  1325. def _try_cast_result(self, result, dtype=None):
  1326. return result
  1327. def _get_unstack_items(self, unstacker, new_columns):
  1328. """
  1329. Get the placement, values, and mask for a Block unstack.
  1330. This is shared between ObjectBlock and ExtensionBlock. They
  1331. differ in that ObjectBlock passes the values, while ExtensionBlock
  1332. passes the dummy ndarray of positions to be used by a take
  1333. later.
  1334. Parameters
  1335. ----------
  1336. unstacker : pandas.core.reshape.reshape._Unstacker
  1337. new_columns : Index
  1338. All columns of the unstacked BlockManager.
  1339. Returns
  1340. -------
  1341. new_placement : ndarray[int]
  1342. The placement of the new columns in `new_columns`.
  1343. new_values : Union[ndarray, ExtensionArray]
  1344. The first return value from _Unstacker.get_new_values.
  1345. mask : ndarray[bool]
  1346. The second return value from _Unstacker.get_new_values.
  1347. """
  1348. # shared with ExtensionBlock
  1349. new_items = unstacker.get_new_columns()
  1350. new_placement = new_columns.get_indexer(new_items)
  1351. new_values, mask = unstacker.get_new_values()
  1352. mask = mask.any(0)
  1353. return new_placement, new_values, mask
  1354. class ExtensionBlock(NonConsolidatableMixIn, Block):
  1355. """Block for holding extension types.
  1356. Notes
  1357. -----
  1358. This holds all 3rd-party extension array types. It's also the immediate
  1359. parent class for our internal extension types' blocks, CategoricalBlock.
  1360. ExtensionArrays are limited to 1-D.
  1361. """
  1362. is_extension = True
  1363. def __init__(self, values, placement, ndim=None):
  1364. values = self._maybe_coerce_values(values)
  1365. super(ExtensionBlock, self).__init__(values, placement, ndim)
  1366. def _maybe_coerce_values(self, values):
  1367. """Unbox to an extension array.
  1368. This will unbox an ExtensionArray stored in an Index or Series.
  1369. ExtensionArrays pass through. No dtype coercion is done.
  1370. Parameters
  1371. ----------
  1372. values : Index, Series, ExtensionArray
  1373. Returns
  1374. -------
  1375. ExtensionArray
  1376. """
  1377. if isinstance(values, (ABCIndexClass, ABCSeries)):
  1378. values = values._values
  1379. return values
  1380. @property
  1381. def _holder(self):
  1382. # For extension blocks, the holder is values-dependent.
  1383. return type(self.values)
  1384. @property
  1385. def fill_value(self):
  1386. # Used in reindex_indexer
  1387. return self.values.dtype.na_value
  1388. @property
  1389. def _can_hold_na(self):
  1390. # The default ExtensionArray._can_hold_na is True
  1391. return self._holder._can_hold_na
  1392. @property
  1393. def is_view(self):
  1394. """Extension arrays are never treated as views."""
  1395. return False
  1396. @property
  1397. def is_numeric(self):
  1398. return self.values.dtype._is_numeric
  1399. def setitem(self, indexer, value):
  1400. """Set the value inplace, returning a same-typed block.
  1401. This differs from Block.setitem by not allowing setitem to change
  1402. the dtype of the Block.
  1403. Parameters
  1404. ----------
  1405. indexer : tuple, list-like, array-like, slice
  1406. The subset of self.values to set
  1407. value : object
  1408. The value being set
  1409. Returns
  1410. -------
  1411. Block
  1412. Notes
  1413. -----
  1414. `indexer` is a direct slice/positional indexer. `value` must
  1415. be a compatible shape.
  1416. """
  1417. if isinstance(indexer, tuple):
  1418. # we are always 1-D
  1419. indexer = indexer[0]
  1420. check_setitem_lengths(indexer, value, self.values)
  1421. self.values[indexer] = value
  1422. return self
  1423. def get_values(self, dtype=None):
  1424. # ExtensionArrays must be iterable, so this works.
  1425. values = np.asarray(self.values)
  1426. if values.ndim == self.ndim - 1:
  1427. values = values.reshape((1,) + values.shape)
  1428. return values
  1429. def to_dense(self):
  1430. return np.asarray(self.values)
  1431. def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
  1432. """
  1433. Take values according to indexer and return them as a block.
  1434. """
  1435. if fill_tuple is None:
  1436. fill_value = None
  1437. else:
  1438. fill_value = fill_tuple[0]
  1439. # axis doesn't matter; we are really a single-dim object
  1440. # but are passed the axis depending on the calling routing
  1441. # if its REALLY axis 0, then this will be a reindex and not a take
  1442. new_values = self.values.take(indexer, fill_value=fill_value,
  1443. allow_fill=True)
  1444. if self.ndim == 1 and new_mgr_locs is None:
  1445. new_mgr_locs = [0]
  1446. else:
  1447. if new_mgr_locs is None:
  1448. new_mgr_locs = self.mgr_locs
  1449. return self.make_block_same_class(new_values, new_mgr_locs)
  1450. def _can_hold_element(self, element):
  1451. # XXX: We may need to think about pushing this onto the array.
  1452. # We're doing the same as CategoricalBlock here.
  1453. return True
  1454. def _slice(self, slicer):
  1455. """ return a slice of my values """
  1456. # slice the category
  1457. # return same dims as we currently have
  1458. if isinstance(slicer, tuple) and len(slicer) == 2:
  1459. if not com.is_null_slice(slicer[0]):
  1460. raise AssertionError("invalid slicing for a 1-ndim "
  1461. "categorical")
  1462. slicer = slicer[1]
  1463. return self.values[slicer]
  1464. def formatting_values(self):
  1465. # Deprecating the ability to override _formatting_values.
  1466. # Do the warning here, it's only user in pandas, since we
  1467. # have to check if the subclass overrode it.
  1468. fv = getattr(type(self.values), '_formatting_values', None)
  1469. if fv and fv != ExtensionArray._formatting_values:
  1470. msg = (
  1471. "'ExtensionArray._formatting_values' is deprecated. "
  1472. "Specify 'ExtensionArray._formatter' instead."
  1473. )
  1474. warnings.warn(msg, DeprecationWarning, stacklevel=10)
  1475. return self.values._formatting_values()
  1476. return self.values
  1477. def concat_same_type(self, to_concat, placement=None):
  1478. """
  1479. Concatenate list of single blocks of the same type.
  1480. """
  1481. values = self._holder._concat_same_type(
  1482. [blk.values for blk in to_concat])
  1483. placement = placement or slice(0, len(values), 1)
  1484. return self.make_block_same_class(values, ndim=self.ndim,
  1485. placement=placement)
  1486. def fillna(self, value, limit=None, inplace=False, downcast=None):
  1487. values = self.values if inplace else self.values.copy()
  1488. values = values.fillna(value=value, limit=limit)
  1489. return [self.make_block_same_class(values=values,
  1490. placement=self.mgr_locs,
  1491. ndim=self.ndim)]
  1492. def interpolate(self, method='pad', axis=0, inplace=False, limit=None,
  1493. fill_value=None, **kwargs):
  1494. values = self.values if inplace else self.values.copy()
  1495. return self.make_block_same_class(
  1496. values=values.fillna(value=fill_value, method=method,
  1497. limit=limit),
  1498. placement=self.mgr_locs)
  1499. def shift(self, periods, axis=0, fill_value=None):
  1500. """
  1501. Shift the block by `periods`.
  1502. Dispatches to underlying ExtensionArray and re-boxes in an
  1503. ExtensionBlock.
  1504. """
  1505. # type: (int, Optional[BlockPlacement]) -> List[ExtensionBlock]
  1506. return [
  1507. self.make_block_same_class(
  1508. self.values.shift(periods=periods, fill_value=fill_value),
  1509. placement=self.mgr_locs, ndim=self.ndim)
  1510. ]
  1511. def where(self, other, cond, align=True, errors='raise',
  1512. try_cast=False, axis=0, transpose=False):
  1513. if isinstance(other, ABCDataFrame):
  1514. # ExtensionArrays are 1-D, so if we get here then
  1515. # `other` should be a DataFrame with a single column.
  1516. assert other.shape[1] == 1
  1517. other = other.iloc[:, 0]
  1518. other = extract_array(other, extract_numpy=True)
  1519. if isinstance(cond, ABCDataFrame):
  1520. assert cond.shape[1] == 1
  1521. cond = cond.iloc[:, 0]
  1522. cond = extract_array(cond, extract_numpy=True)
  1523. if lib.is_scalar(other) and isna(other):
  1524. # The default `other` for Series / Frame is np.nan
  1525. # we want to replace that with the correct NA value
  1526. # for the type
  1527. other = self.dtype.na_value
  1528. if is_sparse(self.values):
  1529. # TODO(SparseArray.__setitem__): remove this if condition
  1530. # We need to re-infer the type of the data after doing the
  1531. # where, for cases where the subtypes don't match
  1532. dtype = None
  1533. else:
  1534. dtype = self.dtype
  1535. try:
  1536. result = self.values.copy()
  1537. icond = ~cond
  1538. if lib.is_scalar(other):
  1539. result[icond] = other
  1540. else:
  1541. result[icond] = other[icond]
  1542. except (NotImplementedError, TypeError):
  1543. # NotImplementedError for class not implementing `__setitem__`
  1544. # TypeError for SparseArray, which implements just to raise
  1545. # a TypeError
  1546. result = self._holder._from_sequence(
  1547. np.where(cond, self.values, other),
  1548. dtype=dtype,
  1549. )
  1550. return self.make_block_same_class(result, placement=self.mgr_locs)
  1551. @property
  1552. def _ftype(self):
  1553. return getattr(self.values, '_pandas_ftype', Block._ftype)
  1554. def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
  1555. # ExtensionArray-safe unstack.
  1556. # We override ObjectBlock._unstack, which unstacks directly on the
  1557. # values of the array. For EA-backed blocks, this would require
  1558. # converting to a 2-D ndarray of objects.
  1559. # Instead, we unstack an ndarray of integer positions, followed by
  1560. # a `take` on the actual values.
  1561. dummy_arr = np.arange(n_rows)
  1562. dummy_unstacker = functools.partial(unstacker_func, fill_value=-1)
  1563. unstacker = dummy_unstacker(dummy_arr)
  1564. new_placement, new_values, mask = self._get_unstack_items(
  1565. unstacker, new_columns
  1566. )
  1567. blocks = [
  1568. self.make_block_same_class(
  1569. self.values.take(indices, allow_fill=True,
  1570. fill_value=fill_value),
  1571. [place])
  1572. for indices, place in zip(new_values.T, new_placement)
  1573. ]
  1574. return blocks, mask
  1575. class ObjectValuesExtensionBlock(ExtensionBlock):
  1576. """
  1577. Block providing backwards-compatibility for `.values`.
  1578. Used by PeriodArray and IntervalArray to ensure that
  1579. Series[T].values is an ndarray of objects.
  1580. """
  1581. def external_values(self, dtype=None):
  1582. return self.values.astype(object)
  1583. class NumericBlock(Block):
  1584. __slots__ = ()
  1585. is_numeric = True
  1586. _can_hold_na = True
  1587. class FloatOrComplexBlock(NumericBlock):
  1588. __slots__ = ()
  1589. def equals(self, other):
  1590. if self.dtype != other.dtype or self.shape != other.shape:
  1591. return False
  1592. left, right = self.values, other.values
  1593. return ((left == right) | (np.isnan(left) & np.isnan(right))).all()
  1594. class FloatBlock(FloatOrComplexBlock):
  1595. __slots__ = ()
  1596. is_float = True
  1597. def _can_hold_element(self, element):
  1598. tipo = maybe_infer_dtype_type(element)
  1599. if tipo is not None:
  1600. return (issubclass(tipo.type, (np.floating, np.integer)) and
  1601. not issubclass(tipo.type, (np.datetime64, np.timedelta64)))
  1602. return (
  1603. isinstance(
  1604. element, (float, int, np.floating, np.int_, compat.long))
  1605. and not isinstance(element, (bool, np.bool_, datetime, timedelta,
  1606. np.datetime64, np.timedelta64)))
  1607. def to_native_types(self, slicer=None, na_rep='', float_format=None,
  1608. decimal='.', quoting=None, **kwargs):
  1609. """ convert to our native types format, slicing if desired """
  1610. values = self.values
  1611. if slicer is not None:
  1612. values = values[:, slicer]
  1613. # see gh-13418: no special formatting is desired at the
  1614. # output (important for appropriate 'quoting' behaviour),
  1615. # so do not pass it through the FloatArrayFormatter
  1616. if float_format is None and decimal == '.':
  1617. mask = isna(values)
  1618. if not quoting:
  1619. values = values.astype(str)
  1620. else:
  1621. values = np.array(values, dtype='object')
  1622. values[mask] = na_rep
  1623. return values
  1624. from pandas.io.formats.format import FloatArrayFormatter
  1625. formatter = FloatArrayFormatter(values, na_rep=na_rep,
  1626. float_format=float_format,
  1627. decimal=decimal, quoting=quoting,
  1628. fixed_width=False)
  1629. return formatter.get_result_as_array()
  1630. def should_store(self, value):
  1631. # when inserting a column should not coerce integers to floats
  1632. # unnecessarily
  1633. return (issubclass(value.dtype.type, np.floating) and
  1634. value.dtype == self.dtype)
  1635. class ComplexBlock(FloatOrComplexBlock):
  1636. __slots__ = ()
  1637. is_complex = True
  1638. def _can_hold_element(self, element):
  1639. tipo = maybe_infer_dtype_type(element)
  1640. if tipo is not None:
  1641. return issubclass(tipo.type,
  1642. (np.floating, np.integer, np.complexfloating))
  1643. return (
  1644. isinstance(
  1645. element,
  1646. (float, int, complex, np.float_, np.int_, compat.long))
  1647. and not isinstance(element, (bool, np.bool_)))
  1648. def should_store(self, value):
  1649. return issubclass(value.dtype.type, np.complexfloating)
  1650. class IntBlock(NumericBlock):
  1651. __slots__ = ()
  1652. is_integer = True
  1653. _can_hold_na = False
  1654. def _can_hold_element(self, element):
  1655. tipo = maybe_infer_dtype_type(element)
  1656. if tipo is not None:
  1657. return (issubclass(tipo.type, np.integer) and
  1658. not issubclass(tipo.type, (np.datetime64,
  1659. np.timedelta64)) and
  1660. self.dtype.itemsize >= tipo.itemsize)
  1661. return is_integer(element)
  1662. def should_store(self, value):
  1663. return is_integer_dtype(value) and value.dtype == self.dtype
  1664. class DatetimeLikeBlockMixin(object):
  1665. """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""
  1666. @property
  1667. def _holder(self):
  1668. return DatetimeArray
  1669. @property
  1670. def _na_value(self):
  1671. return tslibs.NaT
  1672. @property
  1673. def fill_value(self):
  1674. return tslibs.iNaT
  1675. def get_values(self, dtype=None):
  1676. """
  1677. return object dtype as boxed values, such as Timestamps/Timedelta
  1678. """
  1679. if is_object_dtype(dtype):
  1680. values = self.values
  1681. if self.ndim > 1:
  1682. values = values.ravel()
  1683. values = lib.map_infer(values, self._box_func)
  1684. if self.ndim > 1:
  1685. values = values.reshape(self.values.shape)
  1686. return values
  1687. return self.values
  1688. class DatetimeBlock(DatetimeLikeBlockMixin, Block):
  1689. __slots__ = ()
  1690. is_datetime = True
  1691. _can_hold_na = True
  1692. def __init__(self, values, placement, ndim=None):
  1693. values = self._maybe_coerce_values(values)
  1694. super(DatetimeBlock, self).__init__(values,
  1695. placement=placement, ndim=ndim)
  1696. def _maybe_coerce_values(self, values):
  1697. """Input validation for values passed to __init__. Ensure that
  1698. we have datetime64ns, coercing if necessary.
  1699. Parameters
  1700. ----------
  1701. values : array-like
  1702. Must be convertible to datetime64
  1703. Returns
  1704. -------
  1705. values : ndarray[datetime64ns]
  1706. Overridden by DatetimeTZBlock.
  1707. """
  1708. if values.dtype != _NS_DTYPE:
  1709. values = conversion.ensure_datetime64ns(values)
  1710. if isinstance(values, DatetimeArray):
  1711. values = values._data
  1712. assert isinstance(values, np.ndarray), type(values)
  1713. return values
  1714. def _astype(self, dtype, **kwargs):
  1715. """
  1716. these automatically copy, so copy=True has no effect
  1717. raise on an except if raise == True
  1718. """
  1719. dtype = pandas_dtype(dtype)
  1720. # if we are passed a datetime64[ns, tz]
  1721. if is_datetime64tz_dtype(dtype):
  1722. values = self.values
  1723. if getattr(values, 'tz', None) is None:
  1724. values = DatetimeIndex(values).tz_localize('UTC')
  1725. values = values.tz_convert(dtype.tz)
  1726. return self.make_block(values)
  1727. # delegate
  1728. return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs)
  1729. def _can_hold_element(self, element):
  1730. tipo = maybe_infer_dtype_type(element)
  1731. if tipo is not None:
  1732. return tipo == _NS_DTYPE or tipo == np.int64
  1733. return (is_integer(element) or isinstance(element, datetime) or
  1734. isna(element))
  1735. def _try_coerce_args(self, values, other):
  1736. """
  1737. Coerce values and other to dtype 'i8'. NaN and NaT convert to
  1738. the smallest i8, and will correctly round-trip to NaT if converted
  1739. back in _try_coerce_result. values is always ndarray-like, other
  1740. may not be
  1741. Parameters
  1742. ----------
  1743. values : ndarray-like
  1744. other : ndarray-like or scalar
  1745. Returns
  1746. -------
  1747. base-type values, base-type other
  1748. """
  1749. values = values.view('i8')
  1750. if isinstance(other, bool):
  1751. raise TypeError
  1752. elif is_null_datetimelike(other):
  1753. other = tslibs.iNaT
  1754. elif isinstance(other, (datetime, np.datetime64, date)):
  1755. other = self._box_func(other)
  1756. if getattr(other, 'tz') is not None:
  1757. raise TypeError("cannot coerce a Timestamp with a tz on a "
  1758. "naive Block")
  1759. other = other.asm8.view('i8')
  1760. elif hasattr(other, 'dtype') and is_datetime64_dtype(other):
  1761. other = other.astype('i8', copy=False).view('i8')
  1762. else:
  1763. # coercion issues
  1764. # let higher levels handle
  1765. raise TypeError(other)
  1766. return values, other
  1767. def _try_coerce_result(self, result):
  1768. """ reverse of try_coerce_args """
  1769. if isinstance(result, np.ndarray):
  1770. if result.dtype.kind in ['i', 'f']:
  1771. result = result.astype('M8[ns]')
  1772. elif isinstance(result, (np.integer, np.float, np.datetime64)):
  1773. result = self._box_func(result)
  1774. return result
  1775. @property
  1776. def _box_func(self):
  1777. return tslibs.Timestamp
  1778. def to_native_types(self, slicer=None, na_rep=None, date_format=None,
  1779. quoting=None, **kwargs):
  1780. """ convert to our native types format, slicing if desired """
  1781. values = self.values
  1782. i8values = self.values.view('i8')
  1783. if slicer is not None:
  1784. i8values = i8values[..., slicer]
  1785. from pandas.io.formats.format import _get_format_datetime64_from_values
  1786. fmt = _get_format_datetime64_from_values(values, date_format)
  1787. result = tslib.format_array_from_datetime(
  1788. i8values.ravel(), tz=getattr(self.values, 'tz', None),
  1789. format=fmt, na_rep=na_rep).reshape(i8values.shape)
  1790. return np.atleast_2d(result)
  1791. def should_store(self, value):
  1792. return (issubclass(value.dtype.type, np.datetime64) and
  1793. not is_datetime64tz_dtype(value) and
  1794. not is_extension_array_dtype(value))
  1795. def set(self, locs, values):
  1796. """
  1797. Modify Block in-place with new item value
  1798. Returns
  1799. -------
  1800. None
  1801. """
  1802. values = conversion.ensure_datetime64ns(values, copy=False)
  1803. self.values[locs] = values
  1804. def external_values(self):
  1805. return np.asarray(self.values.astype('datetime64[ns]', copy=False))
  1806. class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
  1807. """ implement a datetime64 block with a tz attribute """
  1808. __slots__ = ()
  1809. is_datetimetz = True
  1810. is_extension = True
  1811. @property
  1812. def _holder(self):
  1813. return DatetimeArray
  1814. def _maybe_coerce_values(self, values):
  1815. """Input validation for values passed to __init__. Ensure that
  1816. we have datetime64TZ, coercing if necessary.
  1817. Parametetrs
  1818. -----------
  1819. values : array-like
  1820. Must be convertible to datetime64
  1821. Returns
  1822. -------
  1823. values : DatetimeArray
  1824. """
  1825. if not isinstance(values, self._holder):
  1826. values = self._holder(values)
  1827. if values.tz is None:
  1828. raise ValueError("cannot create a DatetimeTZBlock without a tz")
  1829. return values
  1830. @property
  1831. def is_view(self):
  1832. """ return a boolean if I am possibly a view """
  1833. # check the ndarray values of the DatetimeIndex values
  1834. return self.values._data.base is not None
  1835. def copy(self, deep=True):
  1836. """ copy constructor """
  1837. values = self.values
  1838. if deep:
  1839. values = values.copy(deep=True)
  1840. return self.make_block_same_class(values)
  1841. def get_values(self, dtype=None):
  1842. """
  1843. Returns an ndarray of values.
  1844. Parameters
  1845. ----------
  1846. dtype : np.dtype
  1847. Only `object`-like dtypes are respected here (not sure
  1848. why).
  1849. Returns
  1850. -------
  1851. values : ndarray
  1852. When ``dtype=object``, then and object-dtype ndarray of
  1853. boxed values is returned. Otherwise, an M8[ns] ndarray
  1854. is returned.
  1855. DatetimeArray is always 1-d. ``get_values`` will reshape
  1856. the return value to be the same dimensionality as the
  1857. block.
  1858. """
  1859. values = self.values
  1860. if is_object_dtype(dtype):
  1861. values = values._box_values(values._data)
  1862. values = np.asarray(values)
  1863. if self.ndim == 2:
  1864. # Ensure that our shape is correct for DataFrame.
  1865. # ExtensionArrays are always 1-D, even in a DataFrame when
  1866. # the analogous NumPy-backed column would be a 2-D ndarray.
  1867. values = values.reshape(1, -1)
  1868. return values
  1869. def to_dense(self):
  1870. # we request M8[ns] dtype here, even though it discards tzinfo,
  1871. # as lots of code (e.g. anything using values_from_object)
  1872. # expects that behavior.
  1873. return np.asarray(self.values, dtype=_NS_DTYPE)
  1874. def _slice(self, slicer):
  1875. """ return a slice of my values """
  1876. if isinstance(slicer, tuple):
  1877. col, loc = slicer
  1878. if not com.is_null_slice(col) and col != 0:
  1879. raise IndexError("{0} only contains one item".format(self))
  1880. return self.values[loc]
  1881. return self.values[slicer]
  1882. def _try_coerce_args(self, values, other):
  1883. """
  1884. localize and return i8 for the values
  1885. Parameters
  1886. ----------
  1887. values : ndarray-like
  1888. other : ndarray-like or scalar
  1889. Returns
  1890. -------
  1891. base-type values, base-type other
  1892. """
  1893. # asi8 is a view, needs copy
  1894. values = _block_shape(values.view("i8"), ndim=self.ndim)
  1895. if isinstance(other, ABCSeries):
  1896. other = self._holder(other)
  1897. if isinstance(other, bool):
  1898. raise TypeError
  1899. elif is_datetime64_dtype(other):
  1900. # add the tz back
  1901. other = self._holder(other, dtype=self.dtype)
  1902. elif is_null_datetimelike(other):
  1903. other = tslibs.iNaT
  1904. elif isinstance(other, self._holder):
  1905. if other.tz != self.values.tz:
  1906. raise ValueError("incompatible or non tz-aware value")
  1907. other = _block_shape(other.asi8, ndim=self.ndim)
  1908. elif isinstance(other, (np.datetime64, datetime, date)):
  1909. other = tslibs.Timestamp(other)
  1910. tz = getattr(other, 'tz', None)
  1911. # test we can have an equal time zone
  1912. if tz is None or str(tz) != str(self.values.tz):
  1913. raise ValueError("incompatible or non tz-aware value")
  1914. other = other.value
  1915. else:
  1916. raise TypeError(other)
  1917. return values, other
  1918. def _try_coerce_result(self, result):
  1919. """ reverse of try_coerce_args """
  1920. if isinstance(result, np.ndarray):
  1921. if result.dtype.kind in ['i', 'f']:
  1922. result = result.astype('M8[ns]')
  1923. elif isinstance(result, (np.integer, np.float, np.datetime64)):
  1924. result = self._box_func(result)
  1925. if isinstance(result, np.ndarray):
  1926. # allow passing of > 1dim if its trivial
  1927. if result.ndim > 1:
  1928. result = result.reshape(np.prod(result.shape))
  1929. # GH#24096 new values invalidates a frequency
  1930. result = self._holder._simple_new(result, freq=None,
  1931. dtype=self.values.dtype)
  1932. return result
  1933. @property
  1934. def _box_func(self):
  1935. return lambda x: tslibs.Timestamp(x, tz=self.dtype.tz)
  1936. def diff(self, n, axis=0):
  1937. """1st discrete difference
  1938. Parameters
  1939. ----------
  1940. n : int, number of periods to diff
  1941. axis : int, axis to diff upon. default 0
  1942. Return
  1943. ------
  1944. A list with a new TimeDeltaBlock.
  1945. Note
  1946. ----
  1947. The arguments here are mimicking shift so they are called correctly
  1948. by apply.
  1949. """
  1950. if axis == 0:
  1951. # Cannot currently calculate diff across multiple blocks since this
  1952. # function is invoked via apply
  1953. raise NotImplementedError
  1954. new_values = (self.values - self.shift(n, axis=axis)[0].values).asi8
  1955. # Reshape the new_values like how algos.diff does for timedelta data
  1956. new_values = new_values.reshape(1, len(new_values))
  1957. new_values = new_values.astype('timedelta64[ns]')
  1958. return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]
  1959. def concat_same_type(self, to_concat, placement=None):
  1960. # need to handle concat([tz1, tz2]) here, since DatetimeArray
  1961. # only handles cases where all the tzs are the same.
  1962. # Instead of placing the condition here, it could also go into the
  1963. # is_uniform_join_units check, but I'm not sure what is better.
  1964. if len({x.dtype for x in to_concat}) > 1:
  1965. values = _concat._concat_datetime([x.values for x in to_concat])
  1966. placement = placement or slice(0, len(values), 1)
  1967. if self.ndim > 1:
  1968. values = np.atleast_2d(values)
  1969. return ObjectBlock(values, ndim=self.ndim, placement=placement)
  1970. return super(DatetimeTZBlock, self).concat_same_type(to_concat,
  1971. placement)
  1972. def fillna(self, value, limit=None, inplace=False, downcast=None):
  1973. # We support filling a DatetimeTZ with a `value` whose timezone
  1974. # is different by coercing to object.
  1975. try:
  1976. return super(DatetimeTZBlock, self).fillna(
  1977. value, limit, inplace, downcast
  1978. )
  1979. except (ValueError, TypeError):
  1980. # different timezones, or a non-tz
  1981. return self.astype(object).fillna(
  1982. value, limit=limit, inplace=inplace, downcast=downcast
  1983. )
  1984. def setitem(self, indexer, value):
  1985. # https://github.com/pandas-dev/pandas/issues/24020
  1986. # Need a dedicated setitem until #24020 (type promotion in setitem
  1987. # for extension arrays) is designed and implemented.
  1988. try:
  1989. return super(DatetimeTZBlock, self).setitem(indexer, value)
  1990. except (ValueError, TypeError):
  1991. newb = make_block(self.values.astype(object),
  1992. placement=self.mgr_locs,
  1993. klass=ObjectBlock,)
  1994. return newb.setitem(indexer, value)
  1995. def equals(self, other):
  1996. # override for significant performance improvement
  1997. if self.dtype != other.dtype or self.shape != other.shape:
  1998. return False
  1999. return (self.values.view('i8') == other.values.view('i8')).all()
  2000. class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
  2001. __slots__ = ()
  2002. is_timedelta = True
  2003. _can_hold_na = True
  2004. is_numeric = False
  2005. def __init__(self, values, placement, ndim=None):
  2006. if values.dtype != _TD_DTYPE:
  2007. values = conversion.ensure_timedelta64ns(values)
  2008. if isinstance(values, TimedeltaArray):
  2009. values = values._data
  2010. assert isinstance(values, np.ndarray), type(values)
  2011. super(TimeDeltaBlock, self).__init__(values,
  2012. placement=placement, ndim=ndim)
  2013. @property
  2014. def _holder(self):
  2015. return TimedeltaArray
  2016. @property
  2017. def _box_func(self):
  2018. return lambda x: Timedelta(x, unit='ns')
  2019. def _can_hold_element(self, element):
  2020. tipo = maybe_infer_dtype_type(element)
  2021. if tipo is not None:
  2022. return issubclass(tipo.type, (np.timedelta64, np.int64))
  2023. return is_integer(element) or isinstance(
  2024. element, (timedelta, np.timedelta64, np.int64))
  2025. def fillna(self, value, **kwargs):
  2026. # allow filling with integers to be
  2027. # interpreted as nanoseconds
  2028. if is_integer(value) and not isinstance(value, np.timedelta64):
  2029. # Deprecation GH#24694, GH#19233
  2030. warnings.warn("Passing integers to fillna is deprecated, will "
  2031. "raise a TypeError in a future version. To retain "
  2032. "the old behavior, pass pd.Timedelta(seconds=n) "
  2033. "instead.",
  2034. FutureWarning, stacklevel=6)
  2035. value = Timedelta(value, unit='s')
  2036. return super(TimeDeltaBlock, self).fillna(value, **kwargs)
  2037. def _try_coerce_args(self, values, other):
  2038. """
  2039. Coerce values and other to int64, with null values converted to
  2040. iNaT. values is always ndarray-like, other may not be
  2041. Parameters
  2042. ----------
  2043. values : ndarray-like
  2044. other : ndarray-like or scalar
  2045. Returns
  2046. -------
  2047. base-type values, base-type other
  2048. """
  2049. values = values.view('i8')
  2050. if isinstance(other, bool):
  2051. raise TypeError
  2052. elif is_null_datetimelike(other):
  2053. other = tslibs.iNaT
  2054. elif isinstance(other, (timedelta, np.timedelta64)):
  2055. other = Timedelta(other).value
  2056. elif hasattr(other, 'dtype') and is_timedelta64_dtype(other):
  2057. other = other.astype('i8', copy=False).view('i8')
  2058. else:
  2059. # coercion issues
  2060. # let higher levels handle
  2061. raise TypeError(other)
  2062. return values, other
  2063. def _try_coerce_result(self, result):
  2064. """ reverse of try_coerce_args / try_operate """
  2065. if isinstance(result, np.ndarray):
  2066. mask = isna(result)
  2067. if result.dtype.kind in ['i', 'f']:
  2068. result = result.astype('m8[ns]')
  2069. result[mask] = tslibs.iNaT
  2070. elif isinstance(result, (np.integer, np.float)):
  2071. result = self._box_func(result)
  2072. return result
  2073. def should_store(self, value):
  2074. return (issubclass(value.dtype.type, np.timedelta64) and
  2075. not is_extension_array_dtype(value))
  2076. def to_native_types(self, slicer=None, na_rep=None, quoting=None,
  2077. **kwargs):
  2078. """ convert to our native types format, slicing if desired """
  2079. values = self.values
  2080. if slicer is not None:
  2081. values = values[:, slicer]
  2082. mask = isna(values)
  2083. rvalues = np.empty(values.shape, dtype=object)
  2084. if na_rep is None:
  2085. na_rep = 'NaT'
  2086. rvalues[mask] = na_rep
  2087. imask = (~mask).ravel()
  2088. # FIXME:
  2089. # should use the formats.format.Timedelta64Formatter here
  2090. # to figure what format to pass to the Timedelta
  2091. # e.g. to not show the decimals say
  2092. rvalues.flat[imask] = np.array([Timedelta(val)._repr_base(format='all')
  2093. for val in values.ravel()[imask]],
  2094. dtype=object)
  2095. return rvalues
  2096. def external_values(self, dtype=None):
  2097. return np.asarray(self.values.astype("timedelta64[ns]", copy=False))
  2098. class BoolBlock(NumericBlock):
  2099. __slots__ = ()
  2100. is_bool = True
  2101. _can_hold_na = False
  2102. def _can_hold_element(self, element):
  2103. tipo = maybe_infer_dtype_type(element)
  2104. if tipo is not None:
  2105. return issubclass(tipo.type, np.bool_)
  2106. return isinstance(element, (bool, np.bool_))
  2107. def should_store(self, value):
  2108. return (issubclass(value.dtype.type, np.bool_) and not
  2109. is_extension_array_dtype(value))
  2110. def replace(self, to_replace, value, inplace=False, filter=None,
  2111. regex=False, convert=True):
  2112. inplace = validate_bool_kwarg(inplace, 'inplace')
  2113. to_replace_values = np.atleast_1d(to_replace)
  2114. if not np.can_cast(to_replace_values, bool):
  2115. return self
  2116. return super(BoolBlock, self).replace(to_replace, value,
  2117. inplace=inplace, filter=filter,
  2118. regex=regex, convert=convert)
  2119. class ObjectBlock(Block):
  2120. __slots__ = ()
  2121. is_object = True
  2122. _can_hold_na = True
  2123. def __init__(self, values, placement=None, ndim=2):
  2124. if issubclass(values.dtype.type, compat.string_types):
  2125. values = np.array(values, dtype=object)
  2126. super(ObjectBlock, self).__init__(values, ndim=ndim,
  2127. placement=placement)
  2128. @property
  2129. def is_bool(self):
  2130. """ we can be a bool if we have only bool values but are of type
  2131. object
  2132. """
  2133. return lib.is_bool_array(self.values.ravel())
  2134. # TODO: Refactor when convert_objects is removed since there will be 1 path
  2135. def convert(self, *args, **kwargs):
  2136. """ attempt to coerce any object types to better types return a copy of
  2137. the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
  2138. can return multiple blocks!
  2139. """
  2140. if args:
  2141. raise NotImplementedError
  2142. by_item = kwargs.get('by_item', True)
  2143. new_inputs = ['coerce', 'datetime', 'numeric', 'timedelta']
  2144. new_style = False
  2145. for kw in new_inputs:
  2146. new_style |= kw in kwargs
  2147. if new_style:
  2148. fn = soft_convert_objects
  2149. fn_inputs = new_inputs
  2150. else:
  2151. fn = maybe_convert_objects
  2152. fn_inputs = ['convert_dates', 'convert_numeric',
  2153. 'convert_timedeltas']
  2154. fn_inputs += ['copy']
  2155. fn_kwargs = {key: kwargs[key] for key in fn_inputs if key in kwargs}
  2156. # operate column-by-column
  2157. def f(m, v, i):
  2158. shape = v.shape
  2159. values = fn(v.ravel(), **fn_kwargs)
  2160. try:
  2161. values = values.reshape(shape)
  2162. values = _block_shape(values, ndim=self.ndim)
  2163. except (AttributeError, NotImplementedError):
  2164. pass
  2165. return values
  2166. if by_item and not self._is_single_block:
  2167. blocks = self.split_and_operate(None, f, False)
  2168. else:
  2169. values = f(None, self.values.ravel(), None)
  2170. blocks = [make_block(values, ndim=self.ndim,
  2171. placement=self.mgr_locs)]
  2172. return blocks
  2173. def set(self, locs, values):
  2174. """
  2175. Modify Block in-place with new item value
  2176. Returns
  2177. -------
  2178. None
  2179. """
  2180. try:
  2181. self.values[locs] = values
  2182. except (ValueError):
  2183. # broadcasting error
  2184. # see GH6171
  2185. new_shape = list(values.shape)
  2186. new_shape[0] = len(self.items)
  2187. self.values = np.empty(tuple(new_shape), dtype=self.dtype)
  2188. self.values.fill(np.nan)
  2189. self.values[locs] = values
  2190. def _maybe_downcast(self, blocks, downcast=None):
  2191. if downcast is not None:
  2192. return blocks
  2193. # split and convert the blocks
  2194. return _extend_blocks([b.convert(datetime=True, numeric=False)
  2195. for b in blocks])
  2196. def _can_hold_element(self, element):
  2197. return True
  2198. def _try_coerce_args(self, values, other):
  2199. """ provide coercion to our input arguments """
  2200. if isinstance(other, ABCDatetimeIndex):
  2201. # May get a DatetimeIndex here. Unbox it.
  2202. other = other.array
  2203. if isinstance(other, DatetimeArray):
  2204. # hit in pandas/tests/indexing/test_coercion.py
  2205. # ::TestWhereCoercion::test_where_series_datetime64[datetime64tz]
  2206. # when falling back to ObjectBlock.where
  2207. other = other.astype(object)
  2208. return values, other
  2209. def should_store(self, value):
  2210. return not (issubclass(value.dtype.type,
  2211. (np.integer, np.floating, np.complexfloating,
  2212. np.datetime64, np.bool_)) or
  2213. # TODO(ExtensionArray): remove is_extension_type
  2214. # when all extension arrays have been ported.
  2215. is_extension_type(value) or
  2216. is_extension_array_dtype(value))
  2217. def replace(self, to_replace, value, inplace=False, filter=None,
  2218. regex=False, convert=True):
  2219. to_rep_is_list = is_list_like(to_replace)
  2220. value_is_list = is_list_like(value)
  2221. both_lists = to_rep_is_list and value_is_list
  2222. either_list = to_rep_is_list or value_is_list
  2223. result_blocks = []
  2224. blocks = [self]
  2225. if not either_list and is_re(to_replace):
  2226. return self._replace_single(to_replace, value, inplace=inplace,
  2227. filter=filter, regex=True,
  2228. convert=convert)
  2229. elif not (either_list or regex):
  2230. return super(ObjectBlock, self).replace(to_replace, value,
  2231. inplace=inplace,
  2232. filter=filter, regex=regex,
  2233. convert=convert)
  2234. elif both_lists:
  2235. for to_rep, v in zip(to_replace, value):
  2236. result_blocks = []
  2237. for b in blocks:
  2238. result = b._replace_single(to_rep, v, inplace=inplace,
  2239. filter=filter, regex=regex,
  2240. convert=convert)
  2241. result_blocks = _extend_blocks(result, result_blocks)
  2242. blocks = result_blocks
  2243. return result_blocks
  2244. elif to_rep_is_list and regex:
  2245. for to_rep in to_replace:
  2246. result_blocks = []
  2247. for b in blocks:
  2248. result = b._replace_single(to_rep, value, inplace=inplace,
  2249. filter=filter, regex=regex,
  2250. convert=convert)
  2251. result_blocks = _extend_blocks(result, result_blocks)
  2252. blocks = result_blocks
  2253. return result_blocks
  2254. return self._replace_single(to_replace, value, inplace=inplace,
  2255. filter=filter, convert=convert,
  2256. regex=regex)
  2257. def _replace_single(self, to_replace, value, inplace=False, filter=None,
  2258. regex=False, convert=True, mask=None):
  2259. """
  2260. Replace elements by the given value.
  2261. Parameters
  2262. ----------
  2263. to_replace : object or pattern
  2264. Scalar to replace or regular expression to match.
  2265. value : object
  2266. Replacement object.
  2267. inplace : bool, default False
  2268. Perform inplace modification.
  2269. filter : list, optional
  2270. regex : bool, default False
  2271. If true, perform regular expression substitution.
  2272. convert : bool, default True
  2273. If true, try to coerce any object types to better types.
  2274. mask : array-like of bool, optional
  2275. True indicate corresponding element is ignored.
  2276. Returns
  2277. -------
  2278. a new block, the result after replacing
  2279. """
  2280. inplace = validate_bool_kwarg(inplace, 'inplace')
  2281. # to_replace is regex compilable
  2282. to_rep_re = regex and is_re_compilable(to_replace)
  2283. # regex is regex compilable
  2284. regex_re = is_re_compilable(regex)
  2285. # only one will survive
  2286. if to_rep_re and regex_re:
  2287. raise AssertionError('only one of to_replace and regex can be '
  2288. 'regex compilable')
  2289. # if regex was passed as something that can be a regex (rather than a
  2290. # boolean)
  2291. if regex_re:
  2292. to_replace = regex
  2293. regex = regex_re or to_rep_re
  2294. # try to get the pattern attribute (compiled re) or it's a string
  2295. try:
  2296. pattern = to_replace.pattern
  2297. except AttributeError:
  2298. pattern = to_replace
  2299. # if the pattern is not empty and to_replace is either a string or a
  2300. # regex
  2301. if regex and pattern:
  2302. rx = re.compile(to_replace)
  2303. else:
  2304. # if the thing to replace is not a string or compiled regex call
  2305. # the superclass method -> to_replace is some kind of object
  2306. return super(ObjectBlock, self).replace(to_replace, value,
  2307. inplace=inplace,
  2308. filter=filter, regex=regex)
  2309. new_values = self.values if inplace else self.values.copy()
  2310. # deal with replacing values with objects (strings) that match but
  2311. # whose replacement is not a string (numeric, nan, object)
  2312. if isna(value) or not isinstance(value, compat.string_types):
  2313. def re_replacer(s):
  2314. try:
  2315. return value if rx.search(s) is not None else s
  2316. except TypeError:
  2317. return s
  2318. else:
  2319. # value is guaranteed to be a string here, s can be either a string
  2320. # or null if it's null it gets returned
  2321. def re_replacer(s):
  2322. try:
  2323. return rx.sub(value, s)
  2324. except TypeError:
  2325. return s
  2326. f = np.vectorize(re_replacer, otypes=[self.dtype])
  2327. if filter is None:
  2328. filt = slice(None)
  2329. else:
  2330. filt = self.mgr_locs.isin(filter).nonzero()[0]
  2331. if mask is None:
  2332. new_values[filt] = f(new_values[filt])
  2333. else:
  2334. new_values[filt][mask] = f(new_values[filt][mask])
  2335. # convert
  2336. block = self.make_block(new_values)
  2337. if convert:
  2338. block = block.convert(by_item=True, numeric=False)
  2339. return block
  2340. def _replace_coerce(self, to_replace, value, inplace=True, regex=False,
  2341. convert=False, mask=None):
  2342. """
  2343. Replace value corresponding to the given boolean array with another
  2344. value.
  2345. Parameters
  2346. ----------
  2347. to_replace : object or pattern
  2348. Scalar to replace or regular expression to match.
  2349. value : object
  2350. Replacement object.
  2351. inplace : bool, default False
  2352. Perform inplace modification.
  2353. regex : bool, default False
  2354. If true, perform regular expression substitution.
  2355. convert : bool, default True
  2356. If true, try to coerce any object types to better types.
  2357. mask : array-like of bool, optional
  2358. True indicate corresponding element is ignored.
  2359. Returns
  2360. -------
  2361. A new block if there is anything to replace or the original block.
  2362. """
  2363. if mask.any():
  2364. block = super(ObjectBlock, self)._replace_coerce(
  2365. to_replace=to_replace, value=value, inplace=inplace,
  2366. regex=regex, convert=convert, mask=mask)
  2367. if convert:
  2368. block = [b.convert(by_item=True, numeric=False, copy=True)
  2369. for b in block]
  2370. return block
  2371. return self
  2372. class CategoricalBlock(ExtensionBlock):
  2373. __slots__ = ()
  2374. is_categorical = True
  2375. _verify_integrity = True
  2376. _can_hold_na = True
  2377. _concatenator = staticmethod(_concat._concat_categorical)
  2378. def __init__(self, values, placement, ndim=None):
  2379. from pandas.core.arrays.categorical import _maybe_to_categorical
  2380. # coerce to categorical if we can
  2381. super(CategoricalBlock, self).__init__(_maybe_to_categorical(values),
  2382. placement=placement,
  2383. ndim=ndim)
  2384. @property
  2385. def _holder(self):
  2386. return Categorical
  2387. @property
  2388. def array_dtype(self):
  2389. """ the dtype to return if I want to construct this block as an
  2390. array
  2391. """
  2392. return np.object_
  2393. def _try_coerce_result(self, result):
  2394. """ reverse of try_coerce_args """
  2395. # GH12564: CategoricalBlock is 1-dim only
  2396. # while returned results could be any dim
  2397. if ((not is_categorical_dtype(result)) and
  2398. isinstance(result, np.ndarray)):
  2399. result = _block_shape(result, ndim=self.ndim)
  2400. return result
  2401. def to_dense(self):
  2402. # Categorical.get_values returns a DatetimeIndex for datetime
  2403. # categories, so we can't simply use `np.asarray(self.values)` like
  2404. # other types.
  2405. return self.values.get_values()
  2406. def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
  2407. """ convert to our native types format, slicing if desired """
  2408. values = self.values
  2409. if slicer is not None:
  2410. # Categorical is always one dimension
  2411. values = values[slicer]
  2412. mask = isna(values)
  2413. values = np.array(values, dtype='object')
  2414. values[mask] = na_rep
  2415. # we are expected to return a 2-d ndarray
  2416. return values.reshape(1, len(values))
  2417. def concat_same_type(self, to_concat, placement=None):
  2418. """
  2419. Concatenate list of single blocks of the same type.
  2420. Note that this CategoricalBlock._concat_same_type *may* not
  2421. return a CategoricalBlock. When the categories in `to_concat`
  2422. differ, this will return an object ndarray.
  2423. If / when we decide we don't like that behavior:
  2424. 1. Change Categorical._concat_same_type to use union_categoricals
  2425. 2. Delete this method.
  2426. """
  2427. values = self._concatenator([blk.values for blk in to_concat],
  2428. axis=self.ndim - 1)
  2429. # not using self.make_block_same_class as values can be object dtype
  2430. return make_block(
  2431. values, placement=placement or slice(0, len(values), 1),
  2432. ndim=self.ndim)
  2433. def where(self, other, cond, align=True, errors='raise',
  2434. try_cast=False, axis=0, transpose=False):
  2435. # TODO(CategoricalBlock.where):
  2436. # This can all be deleted in favor of ExtensionBlock.where once
  2437. # we enforce the deprecation.
  2438. object_msg = (
  2439. "Implicitly converting categorical to object-dtype ndarray. "
  2440. "One or more of the values in 'other' are not present in this "
  2441. "categorical's categories. A future version of pandas will raise "
  2442. "a ValueError when 'other' contains different categories.\n\n"
  2443. "To preserve the current behavior, add the new categories to "
  2444. "the categorical before calling 'where', or convert the "
  2445. "categorical to a different dtype."
  2446. )
  2447. try:
  2448. # Attempt to do preserve categorical dtype.
  2449. result = super(CategoricalBlock, self).where(
  2450. other, cond, align, errors, try_cast, axis, transpose
  2451. )
  2452. except (TypeError, ValueError):
  2453. warnings.warn(object_msg, FutureWarning, stacklevel=6)
  2454. result = self.astype(object).where(other, cond, align=align,
  2455. errors=errors,
  2456. try_cast=try_cast,
  2457. axis=axis, transpose=transpose)
  2458. return result
  2459. # -----------------------------------------------------------------
  2460. # Constructor Helpers
  2461. def get_block_type(values, dtype=None):
  2462. """
  2463. Find the appropriate Block subclass to use for the given values and dtype.
  2464. Parameters
  2465. ----------
  2466. values : ndarray-like
  2467. dtype : numpy or pandas dtype
  2468. Returns
  2469. -------
  2470. cls : class, subclass of Block
  2471. """
  2472. dtype = dtype or values.dtype
  2473. vtype = dtype.type
  2474. if is_sparse(dtype):
  2475. # Need this first(ish) so that Sparse[datetime] is sparse
  2476. cls = ExtensionBlock
  2477. elif is_categorical(values):
  2478. cls = CategoricalBlock
  2479. elif issubclass(vtype, np.datetime64):
  2480. assert not is_datetime64tz_dtype(values)
  2481. cls = DatetimeBlock
  2482. elif is_datetime64tz_dtype(values):
  2483. cls = DatetimeTZBlock
  2484. elif is_interval_dtype(dtype) or is_period_dtype(dtype):
  2485. cls = ObjectValuesExtensionBlock
  2486. elif is_extension_array_dtype(values):
  2487. cls = ExtensionBlock
  2488. elif issubclass(vtype, np.floating):
  2489. cls = FloatBlock
  2490. elif issubclass(vtype, np.timedelta64):
  2491. assert issubclass(vtype, np.integer)
  2492. cls = TimeDeltaBlock
  2493. elif issubclass(vtype, np.complexfloating):
  2494. cls = ComplexBlock
  2495. elif issubclass(vtype, np.integer):
  2496. cls = IntBlock
  2497. elif dtype == np.bool_:
  2498. cls = BoolBlock
  2499. else:
  2500. cls = ObjectBlock
  2501. return cls
  2502. def make_block(values, placement, klass=None, ndim=None, dtype=None,
  2503. fastpath=None):
  2504. if fastpath is not None:
  2505. # GH#19265 pyarrow is passing this
  2506. warnings.warn("fastpath argument is deprecated, will be removed "
  2507. "in a future release.", DeprecationWarning)
  2508. if klass is None:
  2509. dtype = dtype or values.dtype
  2510. klass = get_block_type(values, dtype)
  2511. elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values):
  2512. # TODO: This is no longer hit internally; does it need to be retained
  2513. # for e.g. pyarrow?
  2514. values = DatetimeArray._simple_new(values, dtype=dtype)
  2515. return klass(values, ndim=ndim, placement=placement)
  2516. # -----------------------------------------------------------------
  2517. def _extend_blocks(result, blocks=None):
  2518. """ return a new extended blocks, givin the result """
  2519. from pandas.core.internals import BlockManager
  2520. if blocks is None:
  2521. blocks = []
  2522. if isinstance(result, list):
  2523. for r in result:
  2524. if isinstance(r, list):
  2525. blocks.extend(r)
  2526. else:
  2527. blocks.append(r)
  2528. elif isinstance(result, BlockManager):
  2529. blocks.extend(result.blocks)
  2530. else:
  2531. blocks.append(result)
  2532. return blocks
  2533. def _block_shape(values, ndim=1, shape=None):
  2534. """ guarantee the shape of the values to be at least 1 d """
  2535. if values.ndim < ndim:
  2536. if shape is None:
  2537. shape = values.shape
  2538. if not is_extension_array_dtype(values):
  2539. # TODO: https://github.com/pandas-dev/pandas/issues/23023
  2540. # block.shape is incorrect for "2D" ExtensionArrays
  2541. # We can't, and don't need to, reshape.
  2542. values = values.reshape(tuple((1, ) + shape))
  2543. return values
  2544. def _merge_blocks(blocks, dtype=None, _can_consolidate=True):
  2545. if len(blocks) == 1:
  2546. return blocks[0]
  2547. if _can_consolidate:
  2548. if dtype is None:
  2549. if len({b.dtype for b in blocks}) != 1:
  2550. raise AssertionError("_merge_blocks are invalid!")
  2551. dtype = blocks[0].dtype
  2552. # FIXME: optimization potential in case all mgrs contain slices and
  2553. # combination of those slices is a slice, too.
  2554. new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks])
  2555. new_values = np.vstack([b.values for b in blocks])
  2556. argsort = np.argsort(new_mgr_locs)
  2557. new_values = new_values[argsort]
  2558. new_mgr_locs = new_mgr_locs[argsort]
  2559. return make_block(new_values, placement=new_mgr_locs)
  2560. # no merge
  2561. return blocks
  2562. def _block2d_to_blocknd(values, placement, shape, labels, ref_items):
  2563. """ pivot to the labels shape """
  2564. panel_shape = (len(placement),) + shape
  2565. # TODO: lexsort depth needs to be 2!!
  2566. # Create observation selection vector using major and minor
  2567. # labels, for converting to panel format.
  2568. selector = _factor_indexer(shape[1:], labels)
  2569. mask = np.zeros(np.prod(shape), dtype=bool)
  2570. mask.put(selector, True)
  2571. if mask.all():
  2572. pvalues = np.empty(panel_shape, dtype=values.dtype)
  2573. else:
  2574. dtype, fill_value = maybe_promote(values.dtype)
  2575. pvalues = np.empty(panel_shape, dtype=dtype)
  2576. pvalues.fill(fill_value)
  2577. for i in range(len(placement)):
  2578. pvalues[i].flat[mask] = values[:, i]
  2579. return make_block(pvalues, placement=placement)
  2580. def _safe_reshape(arr, new_shape):
  2581. """
  2582. If possible, reshape `arr` to have shape `new_shape`,
  2583. with a couple of exceptions (see gh-13012):
  2584. 1) If `arr` is a ExtensionArray or Index, `arr` will be
  2585. returned as is.
  2586. 2) If `arr` is a Series, the `_values` attribute will
  2587. be reshaped and returned.
  2588. Parameters
  2589. ----------
  2590. arr : array-like, object to be reshaped
  2591. new_shape : int or tuple of ints, the new shape
  2592. """
  2593. if isinstance(arr, ABCSeries):
  2594. arr = arr._values
  2595. if not isinstance(arr, ABCExtensionArray):
  2596. arr = arr.reshape(new_shape)
  2597. return arr
  2598. def _factor_indexer(shape, labels):
  2599. """
  2600. given a tuple of shape and a list of Categorical labels, return the
  2601. expanded label indexer
  2602. """
  2603. mult = np.array(shape)[::-1].cumprod()[::-1]
  2604. return ensure_platform_int(
  2605. np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T)
  2606. def _putmask_smart(v, m, n):
  2607. """
  2608. Return a new ndarray, try to preserve dtype if possible.
  2609. Parameters
  2610. ----------
  2611. v : `values`, updated in-place (array like)
  2612. m : `mask`, applies to both sides (array like)
  2613. n : `new values` either scalar or an array like aligned with `values`
  2614. Returns
  2615. -------
  2616. values : ndarray with updated values
  2617. this *may* be a copy of the original
  2618. See Also
  2619. --------
  2620. ndarray.putmask
  2621. """
  2622. # we cannot use np.asarray() here as we cannot have conversions
  2623. # that numpy does when numeric are mixed with strings
  2624. # n should be the length of the mask or a scalar here
  2625. if not is_list_like(n):
  2626. n = np.repeat(n, len(m))
  2627. elif isinstance(n, np.ndarray) and n.ndim == 0: # numpy scalar
  2628. n = np.repeat(np.array(n, ndmin=1), len(m))
  2629. # see if we are only masking values that if putted
  2630. # will work in the current dtype
  2631. try:
  2632. nn = n[m]
  2633. # make sure that we have a nullable type
  2634. # if we have nulls
  2635. if not _isna_compat(v, nn[0]):
  2636. raise ValueError
  2637. # we ignore ComplexWarning here
  2638. with warnings.catch_warnings(record=True):
  2639. warnings.simplefilter("ignore", np.ComplexWarning)
  2640. nn_at = nn.astype(v.dtype)
  2641. # avoid invalid dtype comparisons
  2642. # between numbers & strings
  2643. # only compare integers/floats
  2644. # don't compare integers to datetimelikes
  2645. if (not is_numeric_v_string_like(nn, nn_at) and
  2646. (is_float_dtype(nn.dtype) or
  2647. is_integer_dtype(nn.dtype) and
  2648. is_float_dtype(nn_at.dtype) or
  2649. is_integer_dtype(nn_at.dtype))):
  2650. comp = (nn == nn_at)
  2651. if is_list_like(comp) and comp.all():
  2652. nv = v.copy()
  2653. nv[m] = nn_at
  2654. return nv
  2655. except (ValueError, IndexError, TypeError, OverflowError):
  2656. pass
  2657. n = np.asarray(n)
  2658. def _putmask_preserve(nv, n):
  2659. try:
  2660. nv[m] = n[m]
  2661. except (IndexError, ValueError):
  2662. nv[m] = n
  2663. return nv
  2664. # preserves dtype if possible
  2665. if v.dtype.kind == n.dtype.kind:
  2666. return _putmask_preserve(v, n)
  2667. # change the dtype if needed
  2668. dtype, _ = maybe_promote(n.dtype)
  2669. if is_extension_type(v.dtype) and is_object_dtype(dtype):
  2670. v = v.get_values(dtype)
  2671. else:
  2672. v = v.astype(dtype)
  2673. return _putmask_preserve(v, n)