managers.py 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065
  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. from functools import partial
  4. import itertools
  5. import operator
  6. import re
  7. import numpy as np
  8. from pandas._libs import internals as libinternals, lib
  9. from pandas.compat import map, range, zip
  10. from pandas.util._validators import validate_bool_kwarg
  11. from pandas.core.dtypes.cast import (
  12. find_common_type, infer_dtype_from_scalar, maybe_convert_objects,
  13. maybe_promote)
  14. from pandas.core.dtypes.common import (
  15. _NS_DTYPE, is_datetimelike_v_numeric, is_extension_array_dtype,
  16. is_extension_type, is_list_like, is_numeric_v_string_like, is_scalar)
  17. import pandas.core.dtypes.concat as _concat
  18. from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries
  19. from pandas.core.dtypes.missing import isna
  20. import pandas.core.algorithms as algos
  21. from pandas.core.arrays.sparse import _maybe_to_sparse
  22. from pandas.core.base import PandasObject
  23. from pandas.core.index import Index, MultiIndex, ensure_index
  24. from pandas.core.indexing import maybe_convert_indices
  25. from pandas.io.formats.printing import pprint_thing
  26. from .blocks import (
  27. Block, CategoricalBlock, DatetimeTZBlock, ExtensionBlock,
  28. ObjectValuesExtensionBlock, _extend_blocks, _merge_blocks, _safe_reshape,
  29. get_block_type, make_block)
  30. from .concat import ( # all for concatenate_block_managers
  31. combine_concat_plans, concatenate_join_units, get_mgr_concatenation_plan,
  32. is_uniform_join_units)
  33. # TODO: flexible with index=None and/or items=None
  34. class BlockManager(PandasObject):
  35. """
  36. Core internal data structure to implement DataFrame, Series, Panel, etc.
  37. Manage a bunch of labeled 2D mixed-type ndarrays. Essentially it's a
  38. lightweight blocked set of labeled data to be manipulated by the DataFrame
  39. public API class
  40. Attributes
  41. ----------
  42. shape
  43. ndim
  44. axes
  45. values
  46. items
  47. Methods
  48. -------
  49. set_axis(axis, new_labels)
  50. copy(deep=True)
  51. get_dtype_counts
  52. get_ftype_counts
  53. get_dtypes
  54. get_ftypes
  55. apply(func, axes, block_filter_fn)
  56. get_bool_data
  57. get_numeric_data
  58. get_slice(slice_like, axis)
  59. get(label)
  60. iget(loc)
  61. take(indexer, axis)
  62. reindex_axis(new_labels, axis)
  63. reindex_indexer(new_labels, indexer, axis)
  64. delete(label)
  65. insert(loc, label, value)
  66. set(label, value)
  67. Parameters
  68. ----------
  69. Notes
  70. -----
  71. This is *not* a public API class
  72. """
  73. __slots__ = ['axes', 'blocks', '_ndim', '_shape', '_known_consolidated',
  74. '_is_consolidated', '_blknos', '_blklocs']
  75. def __init__(self, blocks, axes, do_integrity_check=True):
  76. self.axes = [ensure_index(ax) for ax in axes]
  77. self.blocks = tuple(blocks)
  78. for block in blocks:
  79. if block.is_sparse:
  80. if len(block.mgr_locs) != 1:
  81. raise AssertionError("Sparse block refers to multiple "
  82. "items")
  83. else:
  84. if self.ndim != block.ndim:
  85. raise AssertionError(
  86. 'Number of Block dimensions ({block}) must equal '
  87. 'number of axes ({self})'.format(block=block.ndim,
  88. self=self.ndim))
  89. if do_integrity_check:
  90. self._verify_integrity()
  91. self._consolidate_check()
  92. self._rebuild_blknos_and_blklocs()
  93. def make_empty(self, axes=None):
  94. """ return an empty BlockManager with the items axis of len 0 """
  95. if axes is None:
  96. axes = [ensure_index([])] + [ensure_index(a)
  97. for a in self.axes[1:]]
  98. # preserve dtype if possible
  99. if self.ndim == 1:
  100. blocks = np.array([], dtype=self.array_dtype)
  101. else:
  102. blocks = []
  103. return self.__class__(blocks, axes)
  104. def __nonzero__(self):
  105. return True
  106. # Python3 compat
  107. __bool__ = __nonzero__
  108. @property
  109. def shape(self):
  110. return tuple(len(ax) for ax in self.axes)
  111. @property
  112. def ndim(self):
  113. return len(self.axes)
  114. def set_axis(self, axis, new_labels):
  115. new_labels = ensure_index(new_labels)
  116. old_len = len(self.axes[axis])
  117. new_len = len(new_labels)
  118. if new_len != old_len:
  119. raise ValueError(
  120. 'Length mismatch: Expected axis has {old} elements, new '
  121. 'values have {new} elements'.format(old=old_len, new=new_len))
  122. self.axes[axis] = new_labels
  123. def rename_axis(self, mapper, axis, copy=True, level=None):
  124. """
  125. Rename one of axes.
  126. Parameters
  127. ----------
  128. mapper : unary callable
  129. axis : int
  130. copy : boolean, default True
  131. level : int, default None
  132. """
  133. obj = self.copy(deep=copy)
  134. obj.set_axis(axis, _transform_index(self.axes[axis], mapper, level))
  135. return obj
  136. @property
  137. def _is_single_block(self):
  138. if self.ndim == 1:
  139. return True
  140. if len(self.blocks) != 1:
  141. return False
  142. blk = self.blocks[0]
  143. return (blk.mgr_locs.is_slice_like and
  144. blk.mgr_locs.as_slice == slice(0, len(self), 1))
  145. def _rebuild_blknos_and_blklocs(self):
  146. """
  147. Update mgr._blknos / mgr._blklocs.
  148. """
  149. new_blknos = np.empty(self.shape[0], dtype=np.int64)
  150. new_blklocs = np.empty(self.shape[0], dtype=np.int64)
  151. new_blknos.fill(-1)
  152. new_blklocs.fill(-1)
  153. for blkno, blk in enumerate(self.blocks):
  154. rl = blk.mgr_locs
  155. new_blknos[rl.indexer] = blkno
  156. new_blklocs[rl.indexer] = np.arange(len(rl))
  157. if (new_blknos == -1).any():
  158. raise AssertionError("Gaps in blk ref_locs")
  159. self._blknos = new_blknos
  160. self._blklocs = new_blklocs
  161. @property
  162. def items(self):
  163. return self.axes[0]
  164. def _get_counts(self, f):
  165. """ return a dict of the counts of the function in BlockManager """
  166. self._consolidate_inplace()
  167. counts = dict()
  168. for b in self.blocks:
  169. v = f(b)
  170. counts[v] = counts.get(v, 0) + b.shape[0]
  171. return counts
  172. def get_dtype_counts(self):
  173. return self._get_counts(lambda b: b.dtype.name)
  174. def get_ftype_counts(self):
  175. return self._get_counts(lambda b: b.ftype)
  176. def get_dtypes(self):
  177. dtypes = np.array([blk.dtype for blk in self.blocks])
  178. return algos.take_1d(dtypes, self._blknos, allow_fill=False)
  179. def get_ftypes(self):
  180. ftypes = np.array([blk.ftype for blk in self.blocks])
  181. return algos.take_1d(ftypes, self._blknos, allow_fill=False)
  182. def __getstate__(self):
  183. block_values = [b.values for b in self.blocks]
  184. block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks]
  185. axes_array = [ax for ax in self.axes]
  186. extra_state = {
  187. '0.14.1': {
  188. 'axes': axes_array,
  189. 'blocks': [dict(values=b.values, mgr_locs=b.mgr_locs.indexer)
  190. for b in self.blocks]
  191. }
  192. }
  193. # First three elements of the state are to maintain forward
  194. # compatibility with 0.13.1.
  195. return axes_array, block_values, block_items, extra_state
  196. def __setstate__(self, state):
  197. def unpickle_block(values, mgr_locs):
  198. return make_block(values, placement=mgr_locs)
  199. if (isinstance(state, tuple) and len(state) >= 4 and
  200. '0.14.1' in state[3]):
  201. state = state[3]['0.14.1']
  202. self.axes = [ensure_index(ax) for ax in state['axes']]
  203. self.blocks = tuple(unpickle_block(b['values'], b['mgr_locs'])
  204. for b in state['blocks'])
  205. else:
  206. # discard anything after 3rd, support beta pickling format for a
  207. # little while longer
  208. ax_arrays, bvalues, bitems = state[:3]
  209. self.axes = [ensure_index(ax) for ax in ax_arrays]
  210. if len(bitems) == 1 and self.axes[0].equals(bitems[0]):
  211. # This is a workaround for pre-0.14.1 pickles that didn't
  212. # support unpickling multi-block frames/panels with non-unique
  213. # columns/items, because given a manager with items ["a", "b",
  214. # "a"] there's no way of knowing which block's "a" is where.
  215. #
  216. # Single-block case can be supported under the assumption that
  217. # block items corresponded to manager items 1-to-1.
  218. all_mgr_locs = [slice(0, len(bitems[0]))]
  219. else:
  220. all_mgr_locs = [self.axes[0].get_indexer(blk_items)
  221. for blk_items in bitems]
  222. self.blocks = tuple(
  223. unpickle_block(values, mgr_locs)
  224. for values, mgr_locs in zip(bvalues, all_mgr_locs))
  225. self._post_setstate()
  226. def _post_setstate(self):
  227. self._is_consolidated = False
  228. self._known_consolidated = False
  229. self._rebuild_blknos_and_blklocs()
  230. def __len__(self):
  231. return len(self.items)
  232. def __unicode__(self):
  233. output = pprint_thing(self.__class__.__name__)
  234. for i, ax in enumerate(self.axes):
  235. if i == 0:
  236. output += u'\nItems: {ax}'.format(ax=ax)
  237. else:
  238. output += u'\nAxis {i}: {ax}'.format(i=i, ax=ax)
  239. for block in self.blocks:
  240. output += u'\n{block}'.format(block=pprint_thing(block))
  241. return output
  242. def _verify_integrity(self):
  243. mgr_shape = self.shape
  244. tot_items = sum(len(x.mgr_locs) for x in self.blocks)
  245. for block in self.blocks:
  246. if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
  247. construction_error(tot_items, block.shape[1:], self.axes)
  248. if len(self.items) != tot_items:
  249. raise AssertionError('Number of manager items must equal union of '
  250. 'block items\n# manager items: {0}, # '
  251. 'tot_items: {1}'.format(
  252. len(self.items), tot_items))
  253. def apply(self, f, axes=None, filter=None, do_integrity_check=False,
  254. consolidate=True, **kwargs):
  255. """
  256. iterate over the blocks, collect and create a new block manager
  257. Parameters
  258. ----------
  259. f : the callable or function name to operate on at the block level
  260. axes : optional (if not supplied, use self.axes)
  261. filter : list, if supplied, only call the block if the filter is in
  262. the block
  263. do_integrity_check : boolean, default False. Do the block manager
  264. integrity check
  265. consolidate: boolean, default True. Join together blocks having same
  266. dtype
  267. Returns
  268. -------
  269. Block Manager (new object)
  270. """
  271. result_blocks = []
  272. # filter kwarg is used in replace-* family of methods
  273. if filter is not None:
  274. filter_locs = set(self.items.get_indexer_for(filter))
  275. if len(filter_locs) == len(self.items):
  276. # All items are included, as if there were no filtering
  277. filter = None
  278. else:
  279. kwargs['filter'] = filter_locs
  280. if consolidate:
  281. self._consolidate_inplace()
  282. if f == 'where':
  283. align_copy = True
  284. if kwargs.get('align', True):
  285. align_keys = ['other', 'cond']
  286. else:
  287. align_keys = ['cond']
  288. elif f == 'putmask':
  289. align_copy = False
  290. if kwargs.get('align', True):
  291. align_keys = ['new', 'mask']
  292. else:
  293. align_keys = ['mask']
  294. elif f == 'fillna':
  295. # fillna internally does putmask, maybe it's better to do this
  296. # at mgr, not block level?
  297. align_copy = False
  298. align_keys = ['value']
  299. else:
  300. align_keys = []
  301. # TODO(EA): may interfere with ExtensionBlock.setitem for blocks
  302. # with a .values attribute.
  303. aligned_args = {k: kwargs[k]
  304. for k in align_keys
  305. if hasattr(kwargs[k], 'values') and
  306. not isinstance(kwargs[k], ABCExtensionArray)}
  307. for b in self.blocks:
  308. if filter is not None:
  309. if not b.mgr_locs.isin(filter_locs).any():
  310. result_blocks.append(b)
  311. continue
  312. if aligned_args:
  313. b_items = self.items[b.mgr_locs.indexer]
  314. for k, obj in aligned_args.items():
  315. axis = getattr(obj, '_info_axis_number', 0)
  316. kwargs[k] = obj.reindex(b_items, axis=axis,
  317. copy=align_copy)
  318. applied = getattr(b, f)(**kwargs)
  319. result_blocks = _extend_blocks(applied, result_blocks)
  320. if len(result_blocks) == 0:
  321. return self.make_empty(axes or self.axes)
  322. bm = self.__class__(result_blocks, axes or self.axes,
  323. do_integrity_check=do_integrity_check)
  324. bm._consolidate_inplace()
  325. return bm
  326. def quantile(self, axis=0, consolidate=True, transposed=False,
  327. interpolation='linear', qs=None, numeric_only=None):
  328. """
  329. Iterate over blocks applying quantile reduction.
  330. This routine is intended for reduction type operations and
  331. will do inference on the generated blocks.
  332. Parameters
  333. ----------
  334. axis: reduction axis, default 0
  335. consolidate: boolean, default True. Join together blocks having same
  336. dtype
  337. transposed: boolean, default False
  338. we are holding transposed data
  339. interpolation : type of interpolation, default 'linear'
  340. qs : a scalar or list of the quantiles to be computed
  341. numeric_only : ignored
  342. Returns
  343. -------
  344. Block Manager (new object)
  345. """
  346. # Series dispatches to DataFrame for quantile, which allows us to
  347. # simplify some of the code here and in the blocks
  348. assert self.ndim >= 2
  349. if consolidate:
  350. self._consolidate_inplace()
  351. def get_axe(block, qs, axes):
  352. from pandas import Float64Index
  353. if is_list_like(qs):
  354. ax = Float64Index(qs)
  355. elif block.ndim == 1:
  356. ax = Float64Index([qs])
  357. else:
  358. ax = axes[0]
  359. return ax
  360. axes, blocks = [], []
  361. for b in self.blocks:
  362. block = b.quantile(axis=axis, qs=qs, interpolation=interpolation)
  363. axe = get_axe(b, qs, axes=self.axes)
  364. axes.append(axe)
  365. blocks.append(block)
  366. # note that some DatetimeTZ, Categorical are always ndim==1
  367. ndim = {b.ndim for b in blocks}
  368. assert 0 not in ndim, ndim
  369. if 2 in ndim:
  370. new_axes = list(self.axes)
  371. # multiple blocks that are reduced
  372. if len(blocks) > 1:
  373. new_axes[1] = axes[0]
  374. # reset the placement to the original
  375. for b, sb in zip(blocks, self.blocks):
  376. b.mgr_locs = sb.mgr_locs
  377. else:
  378. new_axes[axis] = Index(np.concatenate(
  379. [ax.values for ax in axes]))
  380. if transposed:
  381. new_axes = new_axes[::-1]
  382. blocks = [b.make_block(b.values.T,
  383. placement=np.arange(b.shape[1])
  384. ) for b in blocks]
  385. return self.__class__(blocks, new_axes)
  386. # single block, i.e. ndim == {1}
  387. values = _concat._concat_compat([b.values for b in blocks])
  388. # compute the orderings of our original data
  389. if len(self.blocks) > 1:
  390. indexer = np.empty(len(self.axes[0]), dtype=np.intp)
  391. i = 0
  392. for b in self.blocks:
  393. for j in b.mgr_locs:
  394. indexer[j] = i
  395. i = i + 1
  396. values = values.take(indexer)
  397. return SingleBlockManager(
  398. [make_block(values,
  399. ndim=1,
  400. placement=np.arange(len(values)))],
  401. axes[0])
  402. def isna(self, func, **kwargs):
  403. return self.apply('apply', func=func, **kwargs)
  404. def where(self, **kwargs):
  405. return self.apply('where', **kwargs)
  406. def setitem(self, **kwargs):
  407. return self.apply('setitem', **kwargs)
  408. def putmask(self, **kwargs):
  409. return self.apply('putmask', **kwargs)
  410. def diff(self, **kwargs):
  411. return self.apply('diff', **kwargs)
  412. def interpolate(self, **kwargs):
  413. return self.apply('interpolate', **kwargs)
  414. def shift(self, **kwargs):
  415. return self.apply('shift', **kwargs)
  416. def fillna(self, **kwargs):
  417. return self.apply('fillna', **kwargs)
  418. def downcast(self, **kwargs):
  419. return self.apply('downcast', **kwargs)
  420. def astype(self, dtype, **kwargs):
  421. return self.apply('astype', dtype=dtype, **kwargs)
  422. def convert(self, **kwargs):
  423. return self.apply('convert', **kwargs)
  424. def replace(self, **kwargs):
  425. return self.apply('replace', **kwargs)
  426. def replace_list(self, src_list, dest_list, inplace=False, regex=False):
  427. """ do a list replace """
  428. inplace = validate_bool_kwarg(inplace, 'inplace')
  429. # figure out our mask a-priori to avoid repeated replacements
  430. values = self.as_array()
  431. def comp(s, regex=False):
  432. """
  433. Generate a bool array by perform an equality check, or perform
  434. an element-wise regular expression matching
  435. """
  436. if isna(s):
  437. return isna(values)
  438. if hasattr(s, 'asm8'):
  439. return _compare_or_regex_search(maybe_convert_objects(values),
  440. getattr(s, 'asm8'), regex)
  441. return _compare_or_regex_search(values, s, regex)
  442. masks = [comp(s, regex) for i, s in enumerate(src_list)]
  443. result_blocks = []
  444. src_len = len(src_list) - 1
  445. for blk in self.blocks:
  446. # its possible to get multiple result blocks here
  447. # replace ALWAYS will return a list
  448. rb = [blk if inplace else blk.copy()]
  449. for i, (s, d) in enumerate(zip(src_list, dest_list)):
  450. new_rb = []
  451. for b in rb:
  452. m = masks[i][b.mgr_locs.indexer]
  453. convert = i == src_len
  454. result = b._replace_coerce(mask=m, to_replace=s, value=d,
  455. inplace=inplace,
  456. convert=convert, regex=regex)
  457. if m.any():
  458. new_rb = _extend_blocks(result, new_rb)
  459. else:
  460. new_rb.append(b)
  461. rb = new_rb
  462. result_blocks.extend(rb)
  463. bm = self.__class__(result_blocks, self.axes)
  464. bm._consolidate_inplace()
  465. return bm
  466. def reshape_nd(self, axes, **kwargs):
  467. """ a 2d-nd reshape operation on a BlockManager """
  468. return self.apply('reshape_nd', axes=axes, **kwargs)
  469. def is_consolidated(self):
  470. """
  471. Return True if more than one block with the same dtype
  472. """
  473. if not self._known_consolidated:
  474. self._consolidate_check()
  475. return self._is_consolidated
  476. def _consolidate_check(self):
  477. ftypes = [blk.ftype for blk in self.blocks]
  478. self._is_consolidated = len(ftypes) == len(set(ftypes))
  479. self._known_consolidated = True
  480. @property
  481. def is_mixed_type(self):
  482. # Warning, consolidation needs to get checked upstairs
  483. self._consolidate_inplace()
  484. return len(self.blocks) > 1
  485. @property
  486. def is_numeric_mixed_type(self):
  487. # Warning, consolidation needs to get checked upstairs
  488. self._consolidate_inplace()
  489. return all(block.is_numeric for block in self.blocks)
  490. @property
  491. def is_datelike_mixed_type(self):
  492. # Warning, consolidation needs to get checked upstairs
  493. self._consolidate_inplace()
  494. return any(block.is_datelike for block in self.blocks)
  495. @property
  496. def any_extension_types(self):
  497. """Whether any of the blocks in this manager are extension blocks"""
  498. return any(block.is_extension for block in self.blocks)
  499. @property
  500. def is_view(self):
  501. """ return a boolean if we are a single block and are a view """
  502. if len(self.blocks) == 1:
  503. return self.blocks[0].is_view
  504. # It is technically possible to figure out which blocks are views
  505. # e.g. [ b.values.base is not None for b in self.blocks ]
  506. # but then we have the case of possibly some blocks being a view
  507. # and some blocks not. setting in theory is possible on the non-view
  508. # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit
  509. # complicated
  510. return False
  511. def get_bool_data(self, copy=False):
  512. """
  513. Parameters
  514. ----------
  515. copy : boolean, default False
  516. Whether to copy the blocks
  517. """
  518. self._consolidate_inplace()
  519. return self.combine([b for b in self.blocks if b.is_bool], copy)
  520. def get_numeric_data(self, copy=False):
  521. """
  522. Parameters
  523. ----------
  524. copy : boolean, default False
  525. Whether to copy the blocks
  526. """
  527. self._consolidate_inplace()
  528. return self.combine([b for b in self.blocks if b.is_numeric], copy)
  529. def combine(self, blocks, copy=True):
  530. """ return a new manager with the blocks """
  531. if len(blocks) == 0:
  532. return self.make_empty()
  533. # FIXME: optimization potential
  534. indexer = np.sort(np.concatenate([b.mgr_locs.as_array
  535. for b in blocks]))
  536. inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])
  537. new_blocks = []
  538. for b in blocks:
  539. b = b.copy(deep=copy)
  540. b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
  541. axis=0, allow_fill=False)
  542. new_blocks.append(b)
  543. axes = list(self.axes)
  544. axes[0] = self.items.take(indexer)
  545. return self.__class__(new_blocks, axes, do_integrity_check=False)
  546. def get_slice(self, slobj, axis=0):
  547. if axis >= self.ndim:
  548. raise IndexError("Requested axis not found in manager")
  549. if axis == 0:
  550. new_blocks = self._slice_take_blocks_ax0(slobj)
  551. else:
  552. slicer = [slice(None)] * (axis + 1)
  553. slicer[axis] = slobj
  554. slicer = tuple(slicer)
  555. new_blocks = [blk.getitem_block(slicer) for blk in self.blocks]
  556. new_axes = list(self.axes)
  557. new_axes[axis] = new_axes[axis][slobj]
  558. bm = self.__class__(new_blocks, new_axes, do_integrity_check=False)
  559. bm._consolidate_inplace()
  560. return bm
  561. def __contains__(self, item):
  562. return item in self.items
  563. @property
  564. def nblocks(self):
  565. return len(self.blocks)
  566. def copy(self, deep=True):
  567. """
  568. Make deep or shallow copy of BlockManager
  569. Parameters
  570. ----------
  571. deep : boolean o rstring, default True
  572. If False, return shallow copy (do not copy data)
  573. If 'all', copy data and a deep copy of the index
  574. Returns
  575. -------
  576. copy : BlockManager
  577. """
  578. # this preserves the notion of view copying of axes
  579. if deep:
  580. if deep == 'all':
  581. copy = lambda ax: ax.copy(deep=True)
  582. else:
  583. copy = lambda ax: ax.view()
  584. new_axes = [copy(ax) for ax in self.axes]
  585. else:
  586. new_axes = list(self.axes)
  587. return self.apply('copy', axes=new_axes, deep=deep,
  588. do_integrity_check=False)
  589. def as_array(self, transpose=False, items=None):
  590. """Convert the blockmanager data into an numpy array.
  591. Parameters
  592. ----------
  593. transpose : boolean, default False
  594. If True, transpose the return array
  595. items : list of strings or None
  596. Names of block items that will be included in the returned
  597. array. ``None`` means that all block items will be used
  598. Returns
  599. -------
  600. arr : ndarray
  601. """
  602. if len(self.blocks) == 0:
  603. arr = np.empty(self.shape, dtype=float)
  604. return arr.transpose() if transpose else arr
  605. if items is not None:
  606. mgr = self.reindex_axis(items, axis=0)
  607. else:
  608. mgr = self
  609. if self._is_single_block and mgr.blocks[0].is_datetimetz:
  610. # TODO(Block.get_values): Make DatetimeTZBlock.get_values
  611. # always be object dtype. Some callers seem to want the
  612. # DatetimeArray (previously DTI)
  613. arr = mgr.blocks[0].get_values(dtype=object)
  614. elif self._is_single_block or not self.is_mixed_type:
  615. arr = np.asarray(mgr.blocks[0].get_values())
  616. else:
  617. arr = mgr._interleave()
  618. return arr.transpose() if transpose else arr
  619. def _interleave(self):
  620. """
  621. Return ndarray from blocks with specified item order
  622. Items must be contained in the blocks
  623. """
  624. from pandas.core.dtypes.common import is_sparse
  625. dtype = _interleaved_dtype(self.blocks)
  626. # TODO: https://github.com/pandas-dev/pandas/issues/22791
  627. # Give EAs some input on what happens here. Sparse needs this.
  628. if is_sparse(dtype):
  629. dtype = dtype.subtype
  630. elif is_extension_array_dtype(dtype):
  631. dtype = 'object'
  632. result = np.empty(self.shape, dtype=dtype)
  633. itemmask = np.zeros(self.shape[0])
  634. for blk in self.blocks:
  635. rl = blk.mgr_locs
  636. result[rl.indexer] = blk.get_values(dtype)
  637. itemmask[rl.indexer] = 1
  638. if not itemmask.all():
  639. raise AssertionError('Some items were not contained in blocks')
  640. return result
  641. def to_dict(self, copy=True):
  642. """
  643. Return a dict of str(dtype) -> BlockManager
  644. Parameters
  645. ----------
  646. copy : boolean, default True
  647. Returns
  648. -------
  649. values : a dict of dtype -> BlockManager
  650. Notes
  651. -----
  652. This consolidates based on str(dtype)
  653. """
  654. self._consolidate_inplace()
  655. bd = {}
  656. for b in self.blocks:
  657. bd.setdefault(str(b.dtype), []).append(b)
  658. return {dtype: self.combine(blocks, copy=copy)
  659. for dtype, blocks in bd.items()}
  660. def xs(self, key, axis=1, copy=True, takeable=False):
  661. if axis < 1:
  662. raise AssertionError(
  663. 'Can only take xs across axis >= 1, got {ax}'.format(ax=axis))
  664. # take by position
  665. if takeable:
  666. loc = key
  667. else:
  668. loc = self.axes[axis].get_loc(key)
  669. slicer = [slice(None, None) for _ in range(self.ndim)]
  670. slicer[axis] = loc
  671. slicer = tuple(slicer)
  672. new_axes = list(self.axes)
  673. # could be an array indexer!
  674. if isinstance(loc, (slice, np.ndarray)):
  675. new_axes[axis] = new_axes[axis][loc]
  676. else:
  677. new_axes.pop(axis)
  678. new_blocks = []
  679. if len(self.blocks) > 1:
  680. # we must copy here as we are mixed type
  681. for blk in self.blocks:
  682. newb = make_block(values=blk.values[slicer],
  683. klass=blk.__class__,
  684. placement=blk.mgr_locs)
  685. new_blocks.append(newb)
  686. elif len(self.blocks) == 1:
  687. block = self.blocks[0]
  688. vals = block.values[slicer]
  689. if copy:
  690. vals = vals.copy()
  691. new_blocks = [make_block(values=vals,
  692. placement=block.mgr_locs,
  693. klass=block.__class__)]
  694. return self.__class__(new_blocks, new_axes)
  695. def fast_xs(self, loc):
  696. """
  697. get a cross sectional for a given location in the
  698. items ; handle dups
  699. return the result, is *could* be a view in the case of a
  700. single block
  701. """
  702. if len(self.blocks) == 1:
  703. return self.blocks[0].iget((slice(None), loc))
  704. items = self.items
  705. # non-unique (GH4726)
  706. if not items.is_unique:
  707. result = self._interleave()
  708. if self.ndim == 2:
  709. result = result.T
  710. return result[loc]
  711. # unique
  712. dtype = _interleaved_dtype(self.blocks)
  713. n = len(items)
  714. if is_extension_array_dtype(dtype):
  715. # we'll eventually construct an ExtensionArray.
  716. result = np.empty(n, dtype=object)
  717. else:
  718. result = np.empty(n, dtype=dtype)
  719. for blk in self.blocks:
  720. # Such assignment may incorrectly coerce NaT to None
  721. # result[blk.mgr_locs] = blk._slice((slice(None), loc))
  722. for i, rl in enumerate(blk.mgr_locs):
  723. result[rl] = blk._try_coerce_result(blk.iget((i, loc)))
  724. if is_extension_array_dtype(dtype):
  725. result = dtype.construct_array_type()._from_sequence(
  726. result, dtype=dtype
  727. )
  728. return result
  729. def consolidate(self):
  730. """
  731. Join together blocks having same dtype
  732. Returns
  733. -------
  734. y : BlockManager
  735. """
  736. if self.is_consolidated():
  737. return self
  738. bm = self.__class__(self.blocks, self.axes)
  739. bm._is_consolidated = False
  740. bm._consolidate_inplace()
  741. return bm
  742. def _consolidate_inplace(self):
  743. if not self.is_consolidated():
  744. self.blocks = tuple(_consolidate(self.blocks))
  745. self._is_consolidated = True
  746. self._known_consolidated = True
  747. self._rebuild_blknos_and_blklocs()
  748. def get(self, item, fastpath=True):
  749. """
  750. Return values for selected item (ndarray or BlockManager).
  751. """
  752. if self.items.is_unique:
  753. if not isna(item):
  754. loc = self.items.get_loc(item)
  755. else:
  756. indexer = np.arange(len(self.items))[isna(self.items)]
  757. # allow a single nan location indexer
  758. if not is_scalar(indexer):
  759. if len(indexer) == 1:
  760. loc = indexer.item()
  761. else:
  762. raise ValueError("cannot label index with a null key")
  763. return self.iget(loc, fastpath=fastpath)
  764. else:
  765. if isna(item):
  766. raise TypeError("cannot label index with a null key")
  767. indexer = self.items.get_indexer_for([item])
  768. return self.reindex_indexer(new_axis=self.items[indexer],
  769. indexer=indexer, axis=0,
  770. allow_dups=True)
  771. def iget(self, i, fastpath=True):
  772. """
  773. Return the data as a SingleBlockManager if fastpath=True and possible
  774. Otherwise return as a ndarray
  775. """
  776. block = self.blocks[self._blknos[i]]
  777. values = block.iget(self._blklocs[i])
  778. if not fastpath or not block._box_to_block_values or values.ndim != 1:
  779. return values
  780. # fastpath shortcut for select a single-dim from a 2-dim BM
  781. return SingleBlockManager(
  782. [block.make_block_same_class(values,
  783. placement=slice(0, len(values)),
  784. ndim=1)],
  785. self.axes[1])
  786. def delete(self, item):
  787. """
  788. Delete selected item (items if non-unique) in-place.
  789. """
  790. indexer = self.items.get_loc(item)
  791. is_deleted = np.zeros(self.shape[0], dtype=np.bool_)
  792. is_deleted[indexer] = True
  793. ref_loc_offset = -is_deleted.cumsum()
  794. is_blk_deleted = [False] * len(self.blocks)
  795. if isinstance(indexer, int):
  796. affected_start = indexer
  797. else:
  798. affected_start = is_deleted.nonzero()[0][0]
  799. for blkno, _ in _fast_count_smallints(self._blknos[affected_start:]):
  800. blk = self.blocks[blkno]
  801. bml = blk.mgr_locs
  802. blk_del = is_deleted[bml.indexer].nonzero()[0]
  803. if len(blk_del) == len(bml):
  804. is_blk_deleted[blkno] = True
  805. continue
  806. elif len(blk_del) != 0:
  807. blk.delete(blk_del)
  808. bml = blk.mgr_locs
  809. blk.mgr_locs = bml.add(ref_loc_offset[bml.indexer])
  810. # FIXME: use Index.delete as soon as it uses fastpath=True
  811. self.axes[0] = self.items[~is_deleted]
  812. self.blocks = tuple(b for blkno, b in enumerate(self.blocks)
  813. if not is_blk_deleted[blkno])
  814. self._shape = None
  815. self._rebuild_blknos_and_blklocs()
  816. def set(self, item, value):
  817. """
  818. Set new item in-place. Does not consolidate. Adds new Block if not
  819. contained in the current set of items
  820. """
  821. # FIXME: refactor, clearly separate broadcasting & zip-like assignment
  822. # can prob also fix the various if tests for sparse/categorical
  823. # TODO(EA): Remove an is_extension_ when all extension types satisfy
  824. # the interface
  825. value_is_extension_type = (is_extension_type(value) or
  826. is_extension_array_dtype(value))
  827. # categorical/spares/datetimetz
  828. if value_is_extension_type:
  829. def value_getitem(placement):
  830. return value
  831. else:
  832. if value.ndim == self.ndim - 1:
  833. value = _safe_reshape(value, (1,) + value.shape)
  834. def value_getitem(placement):
  835. return value
  836. else:
  837. def value_getitem(placement):
  838. return value[placement.indexer]
  839. if value.shape[1:] != self.shape[1:]:
  840. raise AssertionError('Shape of new values must be compatible '
  841. 'with manager shape')
  842. try:
  843. loc = self.items.get_loc(item)
  844. except KeyError:
  845. # This item wasn't present, just insert at end
  846. self.insert(len(self.items), item, value)
  847. return
  848. if isinstance(loc, int):
  849. loc = [loc]
  850. blknos = self._blknos[loc]
  851. blklocs = self._blklocs[loc].copy()
  852. unfit_mgr_locs = []
  853. unfit_val_locs = []
  854. removed_blknos = []
  855. for blkno, val_locs in libinternals.get_blkno_placements(blknos,
  856. self.nblocks,
  857. group=True):
  858. blk = self.blocks[blkno]
  859. blk_locs = blklocs[val_locs.indexer]
  860. if blk.should_store(value):
  861. blk.set(blk_locs, value_getitem(val_locs))
  862. else:
  863. unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs])
  864. unfit_val_locs.append(val_locs)
  865. # If all block items are unfit, schedule the block for removal.
  866. if len(val_locs) == len(blk.mgr_locs):
  867. removed_blknos.append(blkno)
  868. else:
  869. self._blklocs[blk.mgr_locs.indexer] = -1
  870. blk.delete(blk_locs)
  871. self._blklocs[blk.mgr_locs.indexer] = np.arange(len(blk))
  872. if len(removed_blknos):
  873. # Remove blocks & update blknos accordingly
  874. is_deleted = np.zeros(self.nblocks, dtype=np.bool_)
  875. is_deleted[removed_blknos] = True
  876. new_blknos = np.empty(self.nblocks, dtype=np.int64)
  877. new_blknos.fill(-1)
  878. new_blknos[~is_deleted] = np.arange(self.nblocks -
  879. len(removed_blknos))
  880. self._blknos = algos.take_1d(new_blknos, self._blknos, axis=0,
  881. allow_fill=False)
  882. self.blocks = tuple(blk for i, blk in enumerate(self.blocks)
  883. if i not in set(removed_blknos))
  884. if unfit_val_locs:
  885. unfit_mgr_locs = np.concatenate(unfit_mgr_locs)
  886. unfit_count = len(unfit_mgr_locs)
  887. new_blocks = []
  888. if value_is_extension_type:
  889. # This code (ab-)uses the fact that sparse blocks contain only
  890. # one item.
  891. new_blocks.extend(
  892. make_block(values=value.copy(), ndim=self.ndim,
  893. placement=slice(mgr_loc, mgr_loc + 1))
  894. for mgr_loc in unfit_mgr_locs)
  895. self._blknos[unfit_mgr_locs] = (np.arange(unfit_count) +
  896. len(self.blocks))
  897. self._blklocs[unfit_mgr_locs] = 0
  898. else:
  899. # unfit_val_locs contains BlockPlacement objects
  900. unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:])
  901. new_blocks.append(
  902. make_block(values=value_getitem(unfit_val_items),
  903. ndim=self.ndim, placement=unfit_mgr_locs))
  904. self._blknos[unfit_mgr_locs] = len(self.blocks)
  905. self._blklocs[unfit_mgr_locs] = np.arange(unfit_count)
  906. self.blocks += tuple(new_blocks)
  907. # Newly created block's dtype may already be present.
  908. self._known_consolidated = False
  909. def insert(self, loc, item, value, allow_duplicates=False):
  910. """
  911. Insert item at selected position.
  912. Parameters
  913. ----------
  914. loc : int
  915. item : hashable
  916. value : array_like
  917. allow_duplicates: bool
  918. If False, trying to insert non-unique item will raise
  919. """
  920. if not allow_duplicates and item in self.items:
  921. # Should this be a different kind of error??
  922. raise ValueError('cannot insert {}, already exists'.format(item))
  923. if not isinstance(loc, int):
  924. raise TypeError("loc must be int")
  925. # insert to the axis; this could possibly raise a TypeError
  926. new_axis = self.items.insert(loc, item)
  927. block = make_block(values=value, ndim=self.ndim,
  928. placement=slice(loc, loc + 1))
  929. for blkno, count in _fast_count_smallints(self._blknos[loc:]):
  930. blk = self.blocks[blkno]
  931. if count == len(blk.mgr_locs):
  932. blk.mgr_locs = blk.mgr_locs.add(1)
  933. else:
  934. new_mgr_locs = blk.mgr_locs.as_array.copy()
  935. new_mgr_locs[new_mgr_locs >= loc] += 1
  936. blk.mgr_locs = new_mgr_locs
  937. if loc == self._blklocs.shape[0]:
  938. # np.append is a lot faster, let's use it if we can.
  939. self._blklocs = np.append(self._blklocs, 0)
  940. self._blknos = np.append(self._blknos, len(self.blocks))
  941. else:
  942. self._blklocs = np.insert(self._blklocs, loc, 0)
  943. self._blknos = np.insert(self._blknos, loc, len(self.blocks))
  944. self.axes[0] = new_axis
  945. self.blocks += (block,)
  946. self._shape = None
  947. self._known_consolidated = False
  948. if len(self.blocks) > 100:
  949. self._consolidate_inplace()
  950. def reindex_axis(self, new_index, axis, method=None, limit=None,
  951. fill_value=None, copy=True):
  952. """
  953. Conform block manager to new index.
  954. """
  955. new_index = ensure_index(new_index)
  956. new_index, indexer = self.axes[axis].reindex(new_index, method=method,
  957. limit=limit)
  958. return self.reindex_indexer(new_index, indexer, axis=axis,
  959. fill_value=fill_value, copy=copy)
  960. def reindex_indexer(self, new_axis, indexer, axis, fill_value=None,
  961. allow_dups=False, copy=True):
  962. """
  963. Parameters
  964. ----------
  965. new_axis : Index
  966. indexer : ndarray of int64 or None
  967. axis : int
  968. fill_value : object
  969. allow_dups : bool
  970. pandas-indexer with -1's only.
  971. """
  972. if indexer is None:
  973. if new_axis is self.axes[axis] and not copy:
  974. return self
  975. result = self.copy(deep=copy)
  976. result.axes = list(self.axes)
  977. result.axes[axis] = new_axis
  978. return result
  979. self._consolidate_inplace()
  980. # some axes don't allow reindexing with dups
  981. if not allow_dups:
  982. self.axes[axis]._can_reindex(indexer)
  983. if axis >= self.ndim:
  984. raise IndexError("Requested axis not found in manager")
  985. if axis == 0:
  986. new_blocks = self._slice_take_blocks_ax0(indexer,
  987. fill_tuple=(fill_value,))
  988. else:
  989. new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=(
  990. fill_value if fill_value is not None else blk.fill_value,))
  991. for blk in self.blocks]
  992. new_axes = list(self.axes)
  993. new_axes[axis] = new_axis
  994. return self.__class__(new_blocks, new_axes)
  995. def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
  996. """
  997. Slice/take blocks along axis=0.
  998. Overloaded for SingleBlock
  999. Returns
  1000. -------
  1001. new_blocks : list of Block
  1002. """
  1003. allow_fill = fill_tuple is not None
  1004. sl_type, slobj, sllen = _preprocess_slice_or_indexer(
  1005. slice_or_indexer, self.shape[0], allow_fill=allow_fill)
  1006. if self._is_single_block:
  1007. blk = self.blocks[0]
  1008. if sl_type in ('slice', 'mask'):
  1009. return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
  1010. elif not allow_fill or self.ndim == 1:
  1011. if allow_fill and fill_tuple[0] is None:
  1012. _, fill_value = maybe_promote(blk.dtype)
  1013. fill_tuple = (fill_value, )
  1014. return [blk.take_nd(slobj, axis=0,
  1015. new_mgr_locs=slice(0, sllen),
  1016. fill_tuple=fill_tuple)]
  1017. if sl_type in ('slice', 'mask'):
  1018. blknos = self._blknos[slobj]
  1019. blklocs = self._blklocs[slobj]
  1020. else:
  1021. blknos = algos.take_1d(self._blknos, slobj, fill_value=-1,
  1022. allow_fill=allow_fill)
  1023. blklocs = algos.take_1d(self._blklocs, slobj, fill_value=-1,
  1024. allow_fill=allow_fill)
  1025. # When filling blknos, make sure blknos is updated before appending to
  1026. # blocks list, that way new blkno is exactly len(blocks).
  1027. #
  1028. # FIXME: mgr_groupby_blknos must return mgr_locs in ascending order,
  1029. # pytables serialization will break otherwise.
  1030. blocks = []
  1031. for blkno, mgr_locs in libinternals.get_blkno_placements(blknos,
  1032. self.nblocks,
  1033. group=True):
  1034. if blkno == -1:
  1035. # If we've got here, fill_tuple was not None.
  1036. fill_value = fill_tuple[0]
  1037. blocks.append(self._make_na_block(placement=mgr_locs,
  1038. fill_value=fill_value))
  1039. else:
  1040. blk = self.blocks[blkno]
  1041. # Otherwise, slicing along items axis is necessary.
  1042. if not blk._can_consolidate:
  1043. # A non-consolidatable block, it's easy, because there's
  1044. # only one item and each mgr loc is a copy of that single
  1045. # item.
  1046. for mgr_loc in mgr_locs:
  1047. newblk = blk.copy(deep=True)
  1048. newblk.mgr_locs = slice(mgr_loc, mgr_loc + 1)
  1049. blocks.append(newblk)
  1050. else:
  1051. blocks.append(blk.take_nd(blklocs[mgr_locs.indexer],
  1052. axis=0, new_mgr_locs=mgr_locs,
  1053. fill_tuple=None))
  1054. return blocks
  1055. def _make_na_block(self, placement, fill_value=None):
  1056. # TODO: infer dtypes other than float64 from fill_value
  1057. if fill_value is None:
  1058. fill_value = np.nan
  1059. block_shape = list(self.shape)
  1060. block_shape[0] = len(placement)
  1061. dtype, fill_value = infer_dtype_from_scalar(fill_value)
  1062. block_values = np.empty(block_shape, dtype=dtype)
  1063. block_values.fill(fill_value)
  1064. return make_block(block_values, placement=placement)
  1065. def take(self, indexer, axis=1, verify=True, convert=True):
  1066. """
  1067. Take items along any axis.
  1068. """
  1069. self._consolidate_inplace()
  1070. indexer = (np.arange(indexer.start, indexer.stop, indexer.step,
  1071. dtype='int64')
  1072. if isinstance(indexer, slice)
  1073. else np.asanyarray(indexer, dtype='int64'))
  1074. n = self.shape[axis]
  1075. if convert:
  1076. indexer = maybe_convert_indices(indexer, n)
  1077. if verify:
  1078. if ((indexer == -1) | (indexer >= n)).any():
  1079. raise Exception('Indices must be nonzero and less than '
  1080. 'the axis length')
  1081. new_labels = self.axes[axis].take(indexer)
  1082. return self.reindex_indexer(new_axis=new_labels, indexer=indexer,
  1083. axis=axis, allow_dups=True)
  1084. def merge(self, other, lsuffix='', rsuffix=''):
  1085. # We assume at this point that the axes of self and other match.
  1086. # This is only called from Panel.join, which reindexes prior
  1087. # to calling to ensure this assumption holds.
  1088. l, r = items_overlap_with_suffix(left=self.items, lsuffix=lsuffix,
  1089. right=other.items, rsuffix=rsuffix)
  1090. new_items = _concat_indexes([l, r])
  1091. new_blocks = [blk.copy(deep=False) for blk in self.blocks]
  1092. offset = self.shape[0]
  1093. for blk in other.blocks:
  1094. blk = blk.copy(deep=False)
  1095. blk.mgr_locs = blk.mgr_locs.add(offset)
  1096. new_blocks.append(blk)
  1097. new_axes = list(self.axes)
  1098. new_axes[0] = new_items
  1099. return self.__class__(_consolidate(new_blocks), new_axes)
  1100. def equals(self, other):
  1101. self_axes, other_axes = self.axes, other.axes
  1102. if len(self_axes) != len(other_axes):
  1103. return False
  1104. if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
  1105. return False
  1106. self._consolidate_inplace()
  1107. other._consolidate_inplace()
  1108. if len(self.blocks) != len(other.blocks):
  1109. return False
  1110. # canonicalize block order, using a tuple combining the type
  1111. # name and then mgr_locs because there might be unconsolidated
  1112. # blocks (say, Categorical) which can only be distinguished by
  1113. # the iteration order
  1114. def canonicalize(block):
  1115. return (block.dtype.name, block.mgr_locs.as_array.tolist())
  1116. self_blocks = sorted(self.blocks, key=canonicalize)
  1117. other_blocks = sorted(other.blocks, key=canonicalize)
  1118. return all(block.equals(oblock)
  1119. for block, oblock in zip(self_blocks, other_blocks))
  1120. def unstack(self, unstacker_func, fill_value):
  1121. """Return a blockmanager with all blocks unstacked.
  1122. Parameters
  1123. ----------
  1124. unstacker_func : callable
  1125. A (partially-applied) ``pd.core.reshape._Unstacker`` class.
  1126. fill_value : Any
  1127. fill_value for newly introduced missing values.
  1128. Returns
  1129. -------
  1130. unstacked : BlockManager
  1131. """
  1132. n_rows = self.shape[-1]
  1133. dummy = unstacker_func(np.empty((0, 0)), value_columns=self.items)
  1134. new_columns = dummy.get_new_columns()
  1135. new_index = dummy.get_new_index()
  1136. new_blocks = []
  1137. columns_mask = []
  1138. for blk in self.blocks:
  1139. blocks, mask = blk._unstack(
  1140. partial(unstacker_func,
  1141. value_columns=self.items[blk.mgr_locs.indexer]),
  1142. new_columns,
  1143. n_rows,
  1144. fill_value
  1145. )
  1146. new_blocks.extend(blocks)
  1147. columns_mask.extend(mask)
  1148. new_columns = new_columns[columns_mask]
  1149. bm = BlockManager(new_blocks, [new_columns, new_index])
  1150. return bm
  1151. class SingleBlockManager(BlockManager):
  1152. """ manage a single block with """
  1153. ndim = 1
  1154. _is_consolidated = True
  1155. _known_consolidated = True
  1156. __slots__ = ()
  1157. def __init__(self, block, axis, do_integrity_check=False, fastpath=False):
  1158. if isinstance(axis, list):
  1159. if len(axis) != 1:
  1160. raise ValueError("cannot create SingleBlockManager with more "
  1161. "than 1 axis")
  1162. axis = axis[0]
  1163. # passed from constructor, single block, single axis
  1164. if fastpath:
  1165. self.axes = [axis]
  1166. if isinstance(block, list):
  1167. # empty block
  1168. if len(block) == 0:
  1169. block = [np.array([])]
  1170. elif len(block) != 1:
  1171. raise ValueError('Cannot create SingleBlockManager with '
  1172. 'more than 1 block')
  1173. block = block[0]
  1174. else:
  1175. self.axes = [ensure_index(axis)]
  1176. # create the block here
  1177. if isinstance(block, list):
  1178. # provide consolidation to the interleaved_dtype
  1179. if len(block) > 1:
  1180. dtype = _interleaved_dtype(block)
  1181. block = [b.astype(dtype) for b in block]
  1182. block = _consolidate(block)
  1183. if len(block) != 1:
  1184. raise ValueError('Cannot create SingleBlockManager with '
  1185. 'more than 1 block')
  1186. block = block[0]
  1187. if not isinstance(block, Block):
  1188. block = make_block(block, placement=slice(0, len(axis)), ndim=1)
  1189. self.blocks = [block]
  1190. def _post_setstate(self):
  1191. pass
  1192. @property
  1193. def _block(self):
  1194. return self.blocks[0]
  1195. @property
  1196. def _values(self):
  1197. return self._block.values
  1198. @property
  1199. def _blknos(self):
  1200. """ compat with BlockManager """
  1201. return None
  1202. @property
  1203. def _blklocs(self):
  1204. """ compat with BlockManager """
  1205. return None
  1206. def get_slice(self, slobj, axis=0):
  1207. if axis >= self.ndim:
  1208. raise IndexError("Requested axis not found in manager")
  1209. return self.__class__(self._block._slice(slobj),
  1210. self.index[slobj], fastpath=True)
  1211. @property
  1212. def index(self):
  1213. return self.axes[0]
  1214. def convert(self, **kwargs):
  1215. """ convert the whole block as one """
  1216. kwargs['by_item'] = False
  1217. return self.apply('convert', **kwargs)
  1218. @property
  1219. def dtype(self):
  1220. return self._block.dtype
  1221. @property
  1222. def array_dtype(self):
  1223. return self._block.array_dtype
  1224. @property
  1225. def ftype(self):
  1226. return self._block.ftype
  1227. def get_dtype_counts(self):
  1228. return {self.dtype.name: 1}
  1229. def get_ftype_counts(self):
  1230. return {self.ftype: 1}
  1231. def get_dtypes(self):
  1232. return np.array([self._block.dtype])
  1233. def get_ftypes(self):
  1234. return np.array([self._block.ftype])
  1235. def external_values(self):
  1236. return self._block.external_values()
  1237. def internal_values(self):
  1238. return self._block.internal_values()
  1239. def formatting_values(self):
  1240. """Return the internal values used by the DataFrame/SeriesFormatter"""
  1241. return self._block.formatting_values()
  1242. def get_values(self):
  1243. """ return a dense type view """
  1244. return np.array(self._block.to_dense(), copy=False)
  1245. @property
  1246. def asobject(self):
  1247. """
  1248. return a object dtype array. datetime/timedelta like values are boxed
  1249. to Timestamp/Timedelta instances.
  1250. """
  1251. return self._block.get_values(dtype=object)
  1252. @property
  1253. def _can_hold_na(self):
  1254. return self._block._can_hold_na
  1255. def is_consolidated(self):
  1256. return True
  1257. def _consolidate_check(self):
  1258. pass
  1259. def _consolidate_inplace(self):
  1260. pass
  1261. def delete(self, item):
  1262. """
  1263. Delete single item from SingleBlockManager.
  1264. Ensures that self.blocks doesn't become empty.
  1265. """
  1266. loc = self.items.get_loc(item)
  1267. self._block.delete(loc)
  1268. self.axes[0] = self.axes[0].delete(loc)
  1269. def fast_xs(self, loc):
  1270. """
  1271. fast path for getting a cross-section
  1272. return a view of the data
  1273. """
  1274. return self._block.values[loc]
  1275. def concat(self, to_concat, new_axis):
  1276. """
  1277. Concatenate a list of SingleBlockManagers into a single
  1278. SingleBlockManager.
  1279. Used for pd.concat of Series objects with axis=0.
  1280. Parameters
  1281. ----------
  1282. to_concat : list of SingleBlockManagers
  1283. new_axis : Index of the result
  1284. Returns
  1285. -------
  1286. SingleBlockManager
  1287. """
  1288. non_empties = [x for x in to_concat if len(x) > 0]
  1289. # check if all series are of the same block type:
  1290. if len(non_empties) > 0:
  1291. blocks = [obj.blocks[0] for obj in non_empties]
  1292. if len({b.dtype for b in blocks}) == 1:
  1293. new_block = blocks[0].concat_same_type(blocks)
  1294. else:
  1295. values = [x.values for x in blocks]
  1296. values = _concat._concat_compat(values)
  1297. new_block = make_block(
  1298. values, placement=slice(0, len(values), 1))
  1299. else:
  1300. values = [x._block.values for x in to_concat]
  1301. values = _concat._concat_compat(values)
  1302. new_block = make_block(
  1303. values, placement=slice(0, len(values), 1))
  1304. mgr = SingleBlockManager(new_block, new_axis)
  1305. return mgr
  1306. # --------------------------------------------------------------------
  1307. # Constructor Helpers
  1308. def create_block_manager_from_blocks(blocks, axes):
  1309. try:
  1310. if len(blocks) == 1 and not isinstance(blocks[0], Block):
  1311. # if blocks[0] is of length 0, return empty blocks
  1312. if not len(blocks[0]):
  1313. blocks = []
  1314. else:
  1315. # It's OK if a single block is passed as values, its placement
  1316. # is basically "all items", but if there're many, don't bother
  1317. # converting, it's an error anyway.
  1318. blocks = [make_block(values=blocks[0],
  1319. placement=slice(0, len(axes[0])))]
  1320. mgr = BlockManager(blocks, axes)
  1321. mgr._consolidate_inplace()
  1322. return mgr
  1323. except (ValueError) as e:
  1324. blocks = [getattr(b, 'values', b) for b in blocks]
  1325. tot_items = sum(b.shape[0] for b in blocks)
  1326. construction_error(tot_items, blocks[0].shape[1:], axes, e)
  1327. def create_block_manager_from_arrays(arrays, names, axes):
  1328. try:
  1329. blocks = form_blocks(arrays, names, axes)
  1330. mgr = BlockManager(blocks, axes)
  1331. mgr._consolidate_inplace()
  1332. return mgr
  1333. except ValueError as e:
  1334. construction_error(len(arrays), arrays[0].shape, axes, e)
  1335. def construction_error(tot_items, block_shape, axes, e=None):
  1336. """ raise a helpful message about our construction """
  1337. passed = tuple(map(int, [tot_items] + list(block_shape)))
  1338. # Correcting the user facing error message during dataframe construction
  1339. if len(passed) <= 2:
  1340. passed = passed[::-1]
  1341. implied = tuple(len(ax) for ax in axes)
  1342. # Correcting the user facing error message during dataframe construction
  1343. if len(implied) <= 2:
  1344. implied = implied[::-1]
  1345. if passed == implied and e is not None:
  1346. raise e
  1347. if block_shape[0] == 0:
  1348. raise ValueError("Empty data passed with indices specified.")
  1349. raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
  1350. passed, implied))
  1351. # -----------------------------------------------------------------------
  1352. def form_blocks(arrays, names, axes):
  1353. # put "leftover" items in float bucket, where else?
  1354. # generalize?
  1355. items_dict = defaultdict(list)
  1356. extra_locs = []
  1357. names_idx = ensure_index(names)
  1358. if names_idx.equals(axes[0]):
  1359. names_indexer = np.arange(len(names_idx))
  1360. else:
  1361. assert names_idx.intersection(axes[0]).is_unique
  1362. names_indexer = names_idx.get_indexer_for(axes[0])
  1363. for i, name_idx in enumerate(names_indexer):
  1364. if name_idx == -1:
  1365. extra_locs.append(i)
  1366. continue
  1367. k = names[name_idx]
  1368. v = arrays[name_idx]
  1369. block_type = get_block_type(v)
  1370. items_dict[block_type.__name__].append((i, k, v))
  1371. blocks = []
  1372. if len(items_dict['FloatBlock']):
  1373. float_blocks = _multi_blockify(items_dict['FloatBlock'])
  1374. blocks.extend(float_blocks)
  1375. if len(items_dict['ComplexBlock']):
  1376. complex_blocks = _multi_blockify(items_dict['ComplexBlock'])
  1377. blocks.extend(complex_blocks)
  1378. if len(items_dict['TimeDeltaBlock']):
  1379. timedelta_blocks = _multi_blockify(items_dict['TimeDeltaBlock'])
  1380. blocks.extend(timedelta_blocks)
  1381. if len(items_dict['IntBlock']):
  1382. int_blocks = _multi_blockify(items_dict['IntBlock'])
  1383. blocks.extend(int_blocks)
  1384. if len(items_dict['DatetimeBlock']):
  1385. datetime_blocks = _simple_blockify(items_dict['DatetimeBlock'],
  1386. _NS_DTYPE)
  1387. blocks.extend(datetime_blocks)
  1388. if len(items_dict['DatetimeTZBlock']):
  1389. dttz_blocks = [make_block(array,
  1390. klass=DatetimeTZBlock,
  1391. placement=[i])
  1392. for i, _, array in items_dict['DatetimeTZBlock']]
  1393. blocks.extend(dttz_blocks)
  1394. if len(items_dict['BoolBlock']):
  1395. bool_blocks = _simple_blockify(items_dict['BoolBlock'], np.bool_)
  1396. blocks.extend(bool_blocks)
  1397. if len(items_dict['ObjectBlock']) > 0:
  1398. object_blocks = _simple_blockify(items_dict['ObjectBlock'], np.object_)
  1399. blocks.extend(object_blocks)
  1400. if len(items_dict['SparseBlock']) > 0:
  1401. sparse_blocks = _sparse_blockify(items_dict['SparseBlock'])
  1402. blocks.extend(sparse_blocks)
  1403. if len(items_dict['CategoricalBlock']) > 0:
  1404. cat_blocks = [make_block(array, klass=CategoricalBlock, placement=[i])
  1405. for i, _, array in items_dict['CategoricalBlock']]
  1406. blocks.extend(cat_blocks)
  1407. if len(items_dict['ExtensionBlock']):
  1408. external_blocks = [
  1409. make_block(array, klass=ExtensionBlock, placement=[i])
  1410. for i, _, array in items_dict['ExtensionBlock']
  1411. ]
  1412. blocks.extend(external_blocks)
  1413. if len(items_dict['ObjectValuesExtensionBlock']):
  1414. external_blocks = [
  1415. make_block(array, klass=ObjectValuesExtensionBlock, placement=[i])
  1416. for i, _, array in items_dict['ObjectValuesExtensionBlock']
  1417. ]
  1418. blocks.extend(external_blocks)
  1419. if len(extra_locs):
  1420. shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])
  1421. # empty items -> dtype object
  1422. block_values = np.empty(shape, dtype=object)
  1423. block_values.fill(np.nan)
  1424. na_block = make_block(block_values, placement=extra_locs)
  1425. blocks.append(na_block)
  1426. return blocks
  1427. def _simple_blockify(tuples, dtype):
  1428. """ return a single array of a block that has a single dtype; if dtype is
  1429. not None, coerce to this dtype
  1430. """
  1431. values, placement = _stack_arrays(tuples, dtype)
  1432. # CHECK DTYPE?
  1433. if dtype is not None and values.dtype != dtype: # pragma: no cover
  1434. values = values.astype(dtype)
  1435. block = make_block(values, placement=placement)
  1436. return [block]
  1437. def _multi_blockify(tuples, dtype=None):
  1438. """ return an array of blocks that potentially have different dtypes """
  1439. # group by dtype
  1440. grouper = itertools.groupby(tuples, lambda x: x[2].dtype)
  1441. new_blocks = []
  1442. for dtype, tup_block in grouper:
  1443. values, placement = _stack_arrays(list(tup_block), dtype)
  1444. block = make_block(values, placement=placement)
  1445. new_blocks.append(block)
  1446. return new_blocks
  1447. def _sparse_blockify(tuples, dtype=None):
  1448. """ return an array of blocks that potentially have different dtypes (and
  1449. are sparse)
  1450. """
  1451. new_blocks = []
  1452. for i, names, array in tuples:
  1453. array = _maybe_to_sparse(array)
  1454. block = make_block(array, placement=[i])
  1455. new_blocks.append(block)
  1456. return new_blocks
  1457. def _stack_arrays(tuples, dtype):
  1458. # fml
  1459. def _asarray_compat(x):
  1460. if isinstance(x, ABCSeries):
  1461. return x._values
  1462. else:
  1463. return np.asarray(x)
  1464. def _shape_compat(x):
  1465. if isinstance(x, ABCSeries):
  1466. return len(x),
  1467. else:
  1468. return x.shape
  1469. placement, names, arrays = zip(*tuples)
  1470. first = arrays[0]
  1471. shape = (len(arrays),) + _shape_compat(first)
  1472. stacked = np.empty(shape, dtype=dtype)
  1473. for i, arr in enumerate(arrays):
  1474. stacked[i] = _asarray_compat(arr)
  1475. return stacked, placement
  1476. def _interleaved_dtype(blocks):
  1477. # type: (List[Block]) -> Optional[Union[np.dtype, ExtensionDtype]]
  1478. """Find the common dtype for `blocks`.
  1479. Parameters
  1480. ----------
  1481. blocks : List[Block]
  1482. Returns
  1483. -------
  1484. dtype : Optional[Union[np.dtype, ExtensionDtype]]
  1485. None is returned when `blocks` is empty.
  1486. """
  1487. if not len(blocks):
  1488. return None
  1489. return find_common_type([b.dtype for b in blocks])
  1490. def _consolidate(blocks):
  1491. """
  1492. Merge blocks having same dtype, exclude non-consolidating blocks
  1493. """
  1494. # sort by _can_consolidate, dtype
  1495. gkey = lambda x: x._consolidate_key
  1496. grouper = itertools.groupby(sorted(blocks, key=gkey), gkey)
  1497. new_blocks = []
  1498. for (_can_consolidate, dtype), group_blocks in grouper:
  1499. merged_blocks = _merge_blocks(list(group_blocks), dtype=dtype,
  1500. _can_consolidate=_can_consolidate)
  1501. new_blocks = _extend_blocks(merged_blocks, new_blocks)
  1502. return new_blocks
  1503. def _compare_or_regex_search(a, b, regex=False):
  1504. """
  1505. Compare two array_like inputs of the same shape or two scalar values
  1506. Calls operator.eq or re.search, depending on regex argument. If regex is
  1507. True, perform an element-wise regex matching.
  1508. Parameters
  1509. ----------
  1510. a : array_like or scalar
  1511. b : array_like or scalar
  1512. regex : bool, default False
  1513. Returns
  1514. -------
  1515. mask : array_like of bool
  1516. """
  1517. if not regex:
  1518. op = lambda x: operator.eq(x, b)
  1519. else:
  1520. op = np.vectorize(lambda x: bool(re.search(b, x)) if isinstance(x, str)
  1521. else False)
  1522. is_a_array = isinstance(a, np.ndarray)
  1523. is_b_array = isinstance(b, np.ndarray)
  1524. # numpy deprecation warning to have i8 vs integer comparisons
  1525. if is_datetimelike_v_numeric(a, b):
  1526. result = False
  1527. # numpy deprecation warning if comparing numeric vs string-like
  1528. elif is_numeric_v_string_like(a, b):
  1529. result = False
  1530. else:
  1531. result = op(a)
  1532. if is_scalar(result) and (is_a_array or is_b_array):
  1533. type_names = [type(a).__name__, type(b).__name__]
  1534. if is_a_array:
  1535. type_names[0] = 'ndarray(dtype={dtype})'.format(dtype=a.dtype)
  1536. if is_b_array:
  1537. type_names[1] = 'ndarray(dtype={dtype})'.format(dtype=b.dtype)
  1538. raise TypeError(
  1539. "Cannot compare types {a!r} and {b!r}".format(a=type_names[0],
  1540. b=type_names[1]))
  1541. return result
  1542. def _concat_indexes(indexes):
  1543. return indexes[0].append(indexes[1:])
  1544. def items_overlap_with_suffix(left, lsuffix, right, rsuffix):
  1545. """
  1546. If two indices overlap, add suffixes to overlapping entries.
  1547. If corresponding suffix is empty, the entry is simply converted to string.
  1548. """
  1549. to_rename = left.intersection(right)
  1550. if len(to_rename) == 0:
  1551. return left, right
  1552. else:
  1553. if not lsuffix and not rsuffix:
  1554. raise ValueError('columns overlap but no suffix specified: '
  1555. '{rename}'.format(rename=to_rename))
  1556. def lrenamer(x):
  1557. if x in to_rename:
  1558. return '{x}{lsuffix}'.format(x=x, lsuffix=lsuffix)
  1559. return x
  1560. def rrenamer(x):
  1561. if x in to_rename:
  1562. return '{x}{rsuffix}'.format(x=x, rsuffix=rsuffix)
  1563. return x
  1564. return (_transform_index(left, lrenamer),
  1565. _transform_index(right, rrenamer))
  1566. def _transform_index(index, func, level=None):
  1567. """
  1568. Apply function to all values found in index.
  1569. This includes transforming multiindex entries separately.
  1570. Only apply function to one level of the MultiIndex if level is specified.
  1571. """
  1572. if isinstance(index, MultiIndex):
  1573. if level is not None:
  1574. items = [tuple(func(y) if i == level else y
  1575. for i, y in enumerate(x)) for x in index]
  1576. else:
  1577. items = [tuple(func(y) for y in x) for x in index]
  1578. return MultiIndex.from_tuples(items, names=index.names)
  1579. else:
  1580. items = [func(x) for x in index]
  1581. return Index(items, name=index.name, tupleize_cols=False)
  1582. def _fast_count_smallints(arr):
  1583. """Faster version of set(arr) for sequences of small numbers."""
  1584. counts = np.bincount(arr.astype(np.int_))
  1585. nz = counts.nonzero()[0]
  1586. return np.c_[nz, counts[nz]]
  1587. def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill):
  1588. if isinstance(slice_or_indexer, slice):
  1589. return ('slice', slice_or_indexer,
  1590. libinternals.slice_len(slice_or_indexer, length))
  1591. elif (isinstance(slice_or_indexer, np.ndarray) and
  1592. slice_or_indexer.dtype == np.bool_):
  1593. return 'mask', slice_or_indexer, slice_or_indexer.sum()
  1594. else:
  1595. indexer = np.asanyarray(slice_or_indexer, dtype=np.int64)
  1596. if not allow_fill:
  1597. indexer = maybe_convert_indices(indexer, length)
  1598. return 'fancy', indexer, len(indexer)
  1599. def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy):
  1600. """
  1601. Concatenate block managers into one.
  1602. Parameters
  1603. ----------
  1604. mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
  1605. axes : list of Index
  1606. concat_axis : int
  1607. copy : bool
  1608. """
  1609. concat_plans = [get_mgr_concatenation_plan(mgr, indexers)
  1610. for mgr, indexers in mgrs_indexers]
  1611. concat_plan = combine_concat_plans(concat_plans, concat_axis)
  1612. blocks = []
  1613. for placement, join_units in concat_plan:
  1614. if len(join_units) == 1 and not join_units[0].indexers:
  1615. b = join_units[0].block
  1616. values = b.values
  1617. if copy:
  1618. values = values.copy()
  1619. elif not copy:
  1620. values = values.view()
  1621. b = b.make_block_same_class(values, placement=placement)
  1622. elif is_uniform_join_units(join_units):
  1623. b = join_units[0].block.concat_same_type(
  1624. [ju.block for ju in join_units], placement=placement)
  1625. else:
  1626. b = make_block(
  1627. concatenate_join_units(join_units, concat_axis, copy=copy),
  1628. placement=placement)
  1629. blocks.append(b)
  1630. return BlockManager(blocks, axes)