test_internals.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296
  1. # -*- coding: utf-8 -*-
  2. # pylint: disable=W0102
  3. from datetime import date, datetime
  4. from distutils.version import LooseVersion
  5. import itertools
  6. import operator
  7. import re
  8. import sys
  9. import numpy as np
  10. import pytest
  11. from pandas._libs.internals import BlockPlacement
  12. from pandas.compat import OrderedDict, lrange, u, zip
  13. import pandas as pd
  14. from pandas import (
  15. Categorical, DataFrame, DatetimeIndex, Index, MultiIndex, Series,
  16. SparseArray)
  17. import pandas.core.algorithms as algos
  18. from pandas.core.arrays import DatetimeArray, TimedeltaArray
  19. from pandas.core.internals import BlockManager, SingleBlockManager, make_block
  20. import pandas.util.testing as tm
  21. from pandas.util.testing import (
  22. assert_almost_equal, assert_frame_equal, assert_series_equal, randn)
  23. # in 3.6.1 a c-api slicing function changed, see src/compat_helper.h
  24. PY361 = LooseVersion(sys.version) >= LooseVersion('3.6.1')
  25. @pytest.fixture
  26. def mgr():
  27. return create_mgr(
  28. 'a: f8; b: object; c: f8; d: object; e: f8;'
  29. 'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;'
  30. 'k: M8[ns, US/Eastern]; l: M8[ns, CET];')
  31. def assert_block_equal(left, right):
  32. tm.assert_numpy_array_equal(left.values, right.values)
  33. assert left.dtype == right.dtype
  34. assert isinstance(left.mgr_locs, BlockPlacement)
  35. assert isinstance(right.mgr_locs, BlockPlacement)
  36. tm.assert_numpy_array_equal(left.mgr_locs.as_array,
  37. right.mgr_locs.as_array)
  38. def get_numeric_mat(shape):
  39. arr = np.arange(shape[0])
  40. return np.lib.stride_tricks.as_strided(x=arr, shape=shape, strides=(
  41. arr.itemsize, ) + (0, ) * (len(shape) - 1)).copy()
  42. N = 10
  43. def create_block(typestr, placement, item_shape=None, num_offset=0):
  44. """
  45. Supported typestr:
  46. * float, f8, f4, f2
  47. * int, i8, i4, i2, i1
  48. * uint, u8, u4, u2, u1
  49. * complex, c16, c8
  50. * bool
  51. * object, string, O
  52. * datetime, dt, M8[ns], M8[ns, tz]
  53. * timedelta, td, m8[ns]
  54. * sparse (SparseArray with fill_value=0.0)
  55. * sparse_na (SparseArray with fill_value=np.nan)
  56. * category, category2
  57. """
  58. placement = BlockPlacement(placement)
  59. num_items = len(placement)
  60. if item_shape is None:
  61. item_shape = (N, )
  62. shape = (num_items, ) + item_shape
  63. mat = get_numeric_mat(shape)
  64. if typestr in ('float', 'f8', 'f4', 'f2', 'int', 'i8', 'i4', 'i2', 'i1',
  65. 'uint', 'u8', 'u4', 'u2', 'u1'):
  66. values = mat.astype(typestr) + num_offset
  67. elif typestr in ('complex', 'c16', 'c8'):
  68. values = 1.j * (mat.astype(typestr) + num_offset)
  69. elif typestr in ('object', 'string', 'O'):
  70. values = np.reshape(['A%d' % i for i in mat.ravel() + num_offset],
  71. shape)
  72. elif typestr in ('b', 'bool', ):
  73. values = np.ones(shape, dtype=np.bool_)
  74. elif typestr in ('datetime', 'dt', 'M8[ns]'):
  75. values = (mat * 1e9).astype('M8[ns]')
  76. elif typestr.startswith('M8[ns'):
  77. # datetime with tz
  78. m = re.search(r'M8\[ns,\s*(\w+\/?\w*)\]', typestr)
  79. assert m is not None, "incompatible typestr -> {0}".format(typestr)
  80. tz = m.groups()[0]
  81. assert num_items == 1, "must have only 1 num items for a tz-aware"
  82. values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)
  83. elif typestr in ('timedelta', 'td', 'm8[ns]'):
  84. values = (mat * 1).astype('m8[ns]')
  85. elif typestr in ('category', ):
  86. values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
  87. elif typestr in ('category2', ):
  88. values = Categorical(['a', 'a', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'd'
  89. ])
  90. elif typestr in ('sparse', 'sparse_na'):
  91. # FIXME: doesn't support num_rows != 10
  92. assert shape[-1] == 10
  93. assert all(s == 1 for s in shape[:-1])
  94. if typestr.endswith('_na'):
  95. fill_value = np.nan
  96. else:
  97. fill_value = 0.0
  98. values = SparseArray([fill_value, fill_value, 1, 2, 3, fill_value,
  99. 4, 5, fill_value, 6], fill_value=fill_value)
  100. arr = values.sp_values.view()
  101. arr += (num_offset - 1)
  102. else:
  103. raise ValueError('Unsupported typestr: "%s"' % typestr)
  104. return make_block(values, placement=placement, ndim=len(shape))
  105. def create_single_mgr(typestr, num_rows=None):
  106. if num_rows is None:
  107. num_rows = N
  108. return SingleBlockManager(
  109. create_block(typestr, placement=slice(0, num_rows), item_shape=()),
  110. np.arange(num_rows))
  111. def create_mgr(descr, item_shape=None):
  112. """
  113. Construct BlockManager from string description.
  114. String description syntax looks similar to np.matrix initializer. It looks
  115. like this::
  116. a,b,c: f8; d,e,f: i8
  117. Rules are rather simple:
  118. * see list of supported datatypes in `create_block` method
  119. * components are semicolon-separated
  120. * each component is `NAME,NAME,NAME: DTYPE_ID`
  121. * whitespace around colons & semicolons are removed
  122. * components with same DTYPE_ID are combined into single block
  123. * to force multiple blocks with same dtype, use '-SUFFIX'::
  124. 'a:f8-1; b:f8-2; c:f8-foobar'
  125. """
  126. if item_shape is None:
  127. item_shape = (N, )
  128. offset = 0
  129. mgr_items = []
  130. block_placements = OrderedDict()
  131. for d in descr.split(';'):
  132. d = d.strip()
  133. if not len(d):
  134. continue
  135. names, blockstr = d.partition(':')[::2]
  136. blockstr = blockstr.strip()
  137. names = names.strip().split(',')
  138. mgr_items.extend(names)
  139. placement = list(np.arange(len(names)) + offset)
  140. try:
  141. block_placements[blockstr].extend(placement)
  142. except KeyError:
  143. block_placements[blockstr] = placement
  144. offset += len(names)
  145. mgr_items = Index(mgr_items)
  146. blocks = []
  147. num_offset = 0
  148. for blockstr, placement in block_placements.items():
  149. typestr = blockstr.split('-')[0]
  150. blocks.append(create_block(typestr,
  151. placement,
  152. item_shape=item_shape,
  153. num_offset=num_offset, ))
  154. num_offset += len(placement)
  155. return BlockManager(sorted(blocks, key=lambda b: b.mgr_locs[0]),
  156. [mgr_items] + [np.arange(n) for n in item_shape])
  157. class TestBlock(object):
  158. def setup_method(self, method):
  159. # self.fblock = get_float_ex() # a,c,e
  160. # self.cblock = get_complex_ex() #
  161. # self.oblock = get_obj_ex()
  162. # self.bool_block = get_bool_ex()
  163. # self.int_block = get_int_ex()
  164. self.fblock = create_block('float', [0, 2, 4])
  165. self.cblock = create_block('complex', [7])
  166. self.oblock = create_block('object', [1, 3])
  167. self.bool_block = create_block('bool', [5])
  168. self.int_block = create_block('int', [6])
  169. def test_constructor(self):
  170. int32block = create_block('i4', [0])
  171. assert int32block.dtype == np.int32
  172. def test_pickle(self):
  173. def _check(blk):
  174. assert_block_equal(tm.round_trip_pickle(blk), blk)
  175. _check(self.fblock)
  176. _check(self.cblock)
  177. _check(self.oblock)
  178. _check(self.bool_block)
  179. def test_mgr_locs(self):
  180. assert isinstance(self.fblock.mgr_locs, BlockPlacement)
  181. tm.assert_numpy_array_equal(self.fblock.mgr_locs.as_array,
  182. np.array([0, 2, 4], dtype=np.int64))
  183. def test_attrs(self):
  184. assert self.fblock.shape == self.fblock.values.shape
  185. assert self.fblock.dtype == self.fblock.values.dtype
  186. assert len(self.fblock) == len(self.fblock.values)
  187. def test_merge(self):
  188. avals = randn(2, 10)
  189. bvals = randn(2, 10)
  190. ref_cols = Index(['e', 'a', 'b', 'd', 'f'])
  191. ablock = make_block(avals, ref_cols.get_indexer(['e', 'b']))
  192. bblock = make_block(bvals, ref_cols.get_indexer(['a', 'd']))
  193. merged = ablock.merge(bblock)
  194. tm.assert_numpy_array_equal(merged.mgr_locs.as_array,
  195. np.array([0, 1, 2, 3], dtype=np.int64))
  196. tm.assert_numpy_array_equal(merged.values[[0, 2]], np.array(avals))
  197. tm.assert_numpy_array_equal(merged.values[[1, 3]], np.array(bvals))
  198. # TODO: merge with mixed type?
  199. def test_copy(self):
  200. cop = self.fblock.copy()
  201. assert cop is not self.fblock
  202. assert_block_equal(self.fblock, cop)
  203. def test_reindex_index(self):
  204. pass
  205. def test_reindex_cast(self):
  206. pass
  207. def test_insert(self):
  208. pass
  209. def test_delete(self):
  210. newb = self.fblock.copy()
  211. newb.delete(0)
  212. assert isinstance(newb.mgr_locs, BlockPlacement)
  213. tm.assert_numpy_array_equal(newb.mgr_locs.as_array,
  214. np.array([2, 4], dtype=np.int64))
  215. assert (newb.values[0] == 1).all()
  216. newb = self.fblock.copy()
  217. newb.delete(1)
  218. assert isinstance(newb.mgr_locs, BlockPlacement)
  219. tm.assert_numpy_array_equal(newb.mgr_locs.as_array,
  220. np.array([0, 4], dtype=np.int64))
  221. assert (newb.values[1] == 2).all()
  222. newb = self.fblock.copy()
  223. newb.delete(2)
  224. tm.assert_numpy_array_equal(newb.mgr_locs.as_array,
  225. np.array([0, 2], dtype=np.int64))
  226. assert (newb.values[1] == 1).all()
  227. newb = self.fblock.copy()
  228. with pytest.raises(Exception):
  229. newb.delete(3)
  230. def test_make_block_same_class(self):
  231. # issue 19431
  232. block = create_block('M8[ns, US/Eastern]', [3])
  233. with tm.assert_produces_warning(DeprecationWarning,
  234. check_stacklevel=False):
  235. block.make_block_same_class(block.values,
  236. dtype=block.values.dtype)
  237. class TestDatetimeBlock(object):
  238. def test_try_coerce_arg(self):
  239. block = create_block('datetime', [0])
  240. # coerce None
  241. none_coerced = block._try_coerce_args(block.values, None)[1]
  242. assert pd.Timestamp(none_coerced) is pd.NaT
  243. # coerce different types of date bojects
  244. vals = (np.datetime64('2010-10-10'), datetime(2010, 10, 10),
  245. date(2010, 10, 10))
  246. for val in vals:
  247. coerced = block._try_coerce_args(block.values, val)[1]
  248. assert np.int64 == type(coerced)
  249. assert pd.Timestamp('2010-10-10') == pd.Timestamp(coerced)
  250. class TestBlockManager(object):
  251. def test_constructor_corner(self):
  252. pass
  253. def test_attrs(self):
  254. mgr = create_mgr('a,b,c: f8-1; d,e,f: f8-2')
  255. assert mgr.nblocks == 2
  256. assert len(mgr) == 6
  257. def test_is_mixed_dtype(self):
  258. assert not create_mgr('a,b:f8').is_mixed_type
  259. assert not create_mgr('a:f8-1; b:f8-2').is_mixed_type
  260. assert create_mgr('a,b:f8; c,d: f4').is_mixed_type
  261. assert create_mgr('a,b:f8; c,d: object').is_mixed_type
  262. def test_duplicate_ref_loc_failure(self):
  263. tmp_mgr = create_mgr('a:bool; a: f8')
  264. axes, blocks = tmp_mgr.axes, tmp_mgr.blocks
  265. blocks[0].mgr_locs = np.array([0])
  266. blocks[1].mgr_locs = np.array([0])
  267. # test trying to create block manager with overlapping ref locs
  268. with pytest.raises(AssertionError):
  269. BlockManager(blocks, axes)
  270. blocks[0].mgr_locs = np.array([0])
  271. blocks[1].mgr_locs = np.array([1])
  272. mgr = BlockManager(blocks, axes)
  273. mgr.iget(1)
  274. def test_contains(self, mgr):
  275. assert 'a' in mgr
  276. assert 'baz' not in mgr
  277. def test_pickle(self, mgr):
  278. mgr2 = tm.round_trip_pickle(mgr)
  279. assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  280. # share ref_items
  281. # assert mgr2.blocks[0].ref_items is mgr2.blocks[1].ref_items
  282. # GH2431
  283. assert hasattr(mgr2, "_is_consolidated")
  284. assert hasattr(mgr2, "_known_consolidated")
  285. # reset to False on load
  286. assert not mgr2._is_consolidated
  287. assert not mgr2._known_consolidated
  288. def test_non_unique_pickle(self):
  289. mgr = create_mgr('a,a,a:f8')
  290. mgr2 = tm.round_trip_pickle(mgr)
  291. assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  292. mgr = create_mgr('a: f8; a: i8')
  293. mgr2 = tm.round_trip_pickle(mgr)
  294. assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  295. def test_categorical_block_pickle(self):
  296. mgr = create_mgr('a: category')
  297. mgr2 = tm.round_trip_pickle(mgr)
  298. assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  299. smgr = create_single_mgr('category')
  300. smgr2 = tm.round_trip_pickle(smgr)
  301. assert_series_equal(Series(smgr), Series(smgr2))
  302. def test_get(self):
  303. cols = Index(list('abc'))
  304. values = np.random.rand(3, 3)
  305. block = make_block(values=values.copy(), placement=np.arange(3))
  306. mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])
  307. assert_almost_equal(mgr.get('a', fastpath=False), values[0])
  308. assert_almost_equal(mgr.get('b', fastpath=False), values[1])
  309. assert_almost_equal(mgr.get('c', fastpath=False), values[2])
  310. assert_almost_equal(mgr.get('a').internal_values(), values[0])
  311. assert_almost_equal(mgr.get('b').internal_values(), values[1])
  312. assert_almost_equal(mgr.get('c').internal_values(), values[2])
  313. def test_set(self):
  314. mgr = create_mgr('a,b,c: int', item_shape=(3, ))
  315. mgr.set('d', np.array(['foo'] * 3))
  316. mgr.set('b', np.array(['bar'] * 3))
  317. tm.assert_numpy_array_equal(mgr.get('a').internal_values(),
  318. np.array([0] * 3))
  319. tm.assert_numpy_array_equal(mgr.get('b').internal_values(),
  320. np.array(['bar'] * 3, dtype=np.object_))
  321. tm.assert_numpy_array_equal(mgr.get('c').internal_values(),
  322. np.array([2] * 3))
  323. tm.assert_numpy_array_equal(mgr.get('d').internal_values(),
  324. np.array(['foo'] * 3, dtype=np.object_))
  325. def test_set_change_dtype(self, mgr):
  326. mgr.set('baz', np.zeros(N, dtype=bool))
  327. mgr.set('baz', np.repeat('foo', N))
  328. assert mgr.get('baz').dtype == np.object_
  329. mgr2 = mgr.consolidate()
  330. mgr2.set('baz', np.repeat('foo', N))
  331. assert mgr2.get('baz').dtype == np.object_
  332. mgr2.set('quux', randn(N).astype(int))
  333. assert mgr2.get('quux').dtype == np.int_
  334. mgr2.set('quux', randn(N))
  335. assert mgr2.get('quux').dtype == np.float_
  336. def test_set_change_dtype_slice(self): # GH8850
  337. cols = MultiIndex.from_tuples([('1st', 'a'), ('2nd', 'b'), ('3rd', 'c')
  338. ])
  339. df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols)
  340. df['2nd'] = df['2nd'] * 2.0
  341. blocks = df._to_dict_of_blocks()
  342. assert sorted(blocks.keys()) == ['float64', 'int64']
  343. assert_frame_equal(blocks['float64'], DataFrame(
  344. [[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]))
  345. assert_frame_equal(blocks['int64'], DataFrame(
  346. [[3], [6]], columns=cols[2:]))
  347. def test_copy(self, mgr):
  348. cp = mgr.copy(deep=False)
  349. for blk, cp_blk in zip(mgr.blocks, cp.blocks):
  350. # view assertion
  351. assert cp_blk.equals(blk)
  352. if isinstance(blk.values, np.ndarray):
  353. assert cp_blk.values.base is blk.values.base
  354. else:
  355. # DatetimeTZBlock has DatetimeIndex values
  356. assert cp_blk.values._data.base is blk.values._data.base
  357. cp = mgr.copy(deep=True)
  358. for blk, cp_blk in zip(mgr.blocks, cp.blocks):
  359. # copy assertion we either have a None for a base or in case of
  360. # some blocks it is an array (e.g. datetimetz), but was copied
  361. assert cp_blk.equals(blk)
  362. if not isinstance(cp_blk.values, np.ndarray):
  363. assert cp_blk.values._data.base is not blk.values._data.base
  364. else:
  365. assert cp_blk.values.base is None and blk.values.base is None
  366. def test_sparse(self):
  367. mgr = create_mgr('a: sparse-1; b: sparse-2')
  368. # what to test here?
  369. assert mgr.as_array().dtype == np.float64
  370. def test_sparse_mixed(self):
  371. mgr = create_mgr('a: sparse-1; b: sparse-2; c: f8')
  372. assert len(mgr.blocks) == 3
  373. assert isinstance(mgr, BlockManager)
  374. # what to test here?
  375. def test_as_array_float(self):
  376. mgr = create_mgr('c: f4; d: f2; e: f8')
  377. assert mgr.as_array().dtype == np.float64
  378. mgr = create_mgr('c: f4; d: f2')
  379. assert mgr.as_array().dtype == np.float32
  380. def test_as_array_int_bool(self):
  381. mgr = create_mgr('a: bool-1; b: bool-2')
  382. assert mgr.as_array().dtype == np.bool_
  383. mgr = create_mgr('a: i8-1; b: i8-2; c: i4; d: i2; e: u1')
  384. assert mgr.as_array().dtype == np.int64
  385. mgr = create_mgr('c: i4; d: i2; e: u1')
  386. assert mgr.as_array().dtype == np.int32
  387. def test_as_array_datetime(self):
  388. mgr = create_mgr('h: datetime-1; g: datetime-2')
  389. assert mgr.as_array().dtype == 'M8[ns]'
  390. def test_as_array_datetime_tz(self):
  391. mgr = create_mgr('h: M8[ns, US/Eastern]; g: M8[ns, CET]')
  392. assert mgr.get('h').dtype == 'datetime64[ns, US/Eastern]'
  393. assert mgr.get('g').dtype == 'datetime64[ns, CET]'
  394. assert mgr.as_array().dtype == 'object'
  395. def test_astype(self):
  396. # coerce all
  397. mgr = create_mgr('c: f4; d: f2; e: f8')
  398. for t in ['float16', 'float32', 'float64', 'int32', 'int64']:
  399. t = np.dtype(t)
  400. tmgr = mgr.astype(t)
  401. assert tmgr.get('c').dtype.type == t
  402. assert tmgr.get('d').dtype.type == t
  403. assert tmgr.get('e').dtype.type == t
  404. # mixed
  405. mgr = create_mgr('a,b: object; c: bool; d: datetime;'
  406. 'e: f4; f: f2; g: f8')
  407. for t in ['float16', 'float32', 'float64', 'int32', 'int64']:
  408. t = np.dtype(t)
  409. tmgr = mgr.astype(t, errors='ignore')
  410. assert tmgr.get('c').dtype.type == t
  411. assert tmgr.get('e').dtype.type == t
  412. assert tmgr.get('f').dtype.type == t
  413. assert tmgr.get('g').dtype.type == t
  414. assert tmgr.get('a').dtype.type == np.object_
  415. assert tmgr.get('b').dtype.type == np.object_
  416. if t != np.int64:
  417. assert tmgr.get('d').dtype.type == np.datetime64
  418. else:
  419. assert tmgr.get('d').dtype.type == t
  420. def test_convert(self):
  421. def _compare(old_mgr, new_mgr):
  422. """ compare the blocks, numeric compare ==, object don't """
  423. old_blocks = set(old_mgr.blocks)
  424. new_blocks = set(new_mgr.blocks)
  425. assert len(old_blocks) == len(new_blocks)
  426. # compare non-numeric
  427. for b in old_blocks:
  428. found = False
  429. for nb in new_blocks:
  430. if (b.values == nb.values).all():
  431. found = True
  432. break
  433. assert found
  434. for b in new_blocks:
  435. found = False
  436. for ob in old_blocks:
  437. if (b.values == ob.values).all():
  438. found = True
  439. break
  440. assert found
  441. # noops
  442. mgr = create_mgr('f: i8; g: f8')
  443. new_mgr = mgr.convert()
  444. _compare(mgr, new_mgr)
  445. mgr = create_mgr('a, b: object; f: i8; g: f8')
  446. new_mgr = mgr.convert()
  447. _compare(mgr, new_mgr)
  448. # convert
  449. mgr = create_mgr('a,b,foo: object; f: i8; g: f8')
  450. mgr.set('a', np.array(['1'] * N, dtype=np.object_))
  451. mgr.set('b', np.array(['2.'] * N, dtype=np.object_))
  452. mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_))
  453. new_mgr = mgr.convert(numeric=True)
  454. assert new_mgr.get('a').dtype == np.int64
  455. assert new_mgr.get('b').dtype == np.float64
  456. assert new_mgr.get('foo').dtype == np.object_
  457. assert new_mgr.get('f').dtype == np.int64
  458. assert new_mgr.get('g').dtype == np.float64
  459. mgr = create_mgr('a,b,foo: object; f: i4; bool: bool; dt: datetime;'
  460. 'i: i8; g: f8; h: f2')
  461. mgr.set('a', np.array(['1'] * N, dtype=np.object_))
  462. mgr.set('b', np.array(['2.'] * N, dtype=np.object_))
  463. mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_))
  464. new_mgr = mgr.convert(numeric=True)
  465. assert new_mgr.get('a').dtype == np.int64
  466. assert new_mgr.get('b').dtype == np.float64
  467. assert new_mgr.get('foo').dtype == np.object_
  468. assert new_mgr.get('f').dtype == np.int32
  469. assert new_mgr.get('bool').dtype == np.bool_
  470. assert new_mgr.get('dt').dtype.type, np.datetime64
  471. assert new_mgr.get('i').dtype == np.int64
  472. assert new_mgr.get('g').dtype == np.float64
  473. assert new_mgr.get('h').dtype == np.float16
  474. def test_interleave(self):
  475. # self
  476. for dtype in ['f8', 'i8', 'object', 'bool', 'complex', 'M8[ns]',
  477. 'm8[ns]']:
  478. mgr = create_mgr('a: {0}'.format(dtype))
  479. assert mgr.as_array().dtype == dtype
  480. mgr = create_mgr('a: {0}; b: {0}'.format(dtype))
  481. assert mgr.as_array().dtype == dtype
  482. # will be converted according the actual dtype of the underlying
  483. mgr = create_mgr('a: category')
  484. assert mgr.as_array().dtype == 'i8'
  485. mgr = create_mgr('a: category; b: category')
  486. assert mgr.as_array().dtype == 'i8'
  487. mgr = create_mgr('a: category; b: category2')
  488. assert mgr.as_array().dtype == 'object'
  489. mgr = create_mgr('a: category2')
  490. assert mgr.as_array().dtype == 'object'
  491. mgr = create_mgr('a: category2; b: category2')
  492. assert mgr.as_array().dtype == 'object'
  493. # combinations
  494. mgr = create_mgr('a: f8')
  495. assert mgr.as_array().dtype == 'f8'
  496. mgr = create_mgr('a: f8; b: i8')
  497. assert mgr.as_array().dtype == 'f8'
  498. mgr = create_mgr('a: f4; b: i8')
  499. assert mgr.as_array().dtype == 'f8'
  500. mgr = create_mgr('a: f4; b: i8; d: object')
  501. assert mgr.as_array().dtype == 'object'
  502. mgr = create_mgr('a: bool; b: i8')
  503. assert mgr.as_array().dtype == 'object'
  504. mgr = create_mgr('a: complex')
  505. assert mgr.as_array().dtype == 'complex'
  506. mgr = create_mgr('a: f8; b: category')
  507. assert mgr.as_array().dtype == 'object'
  508. mgr = create_mgr('a: M8[ns]; b: category')
  509. assert mgr.as_array().dtype == 'object'
  510. mgr = create_mgr('a: M8[ns]; b: bool')
  511. assert mgr.as_array().dtype == 'object'
  512. mgr = create_mgr('a: M8[ns]; b: i8')
  513. assert mgr.as_array().dtype == 'object'
  514. mgr = create_mgr('a: m8[ns]; b: bool')
  515. assert mgr.as_array().dtype == 'object'
  516. mgr = create_mgr('a: m8[ns]; b: i8')
  517. assert mgr.as_array().dtype == 'object'
  518. mgr = create_mgr('a: M8[ns]; b: m8[ns]')
  519. assert mgr.as_array().dtype == 'object'
  520. def test_interleave_non_unique_cols(self):
  521. df = DataFrame([
  522. [pd.Timestamp('20130101'), 3.5],
  523. [pd.Timestamp('20130102'), 4.5]],
  524. columns=['x', 'x'],
  525. index=[1, 2])
  526. df_unique = df.copy()
  527. df_unique.columns = ['x', 'y']
  528. assert df_unique.values.shape == df.values.shape
  529. tm.assert_numpy_array_equal(df_unique.values[0], df.values[0])
  530. tm.assert_numpy_array_equal(df_unique.values[1], df.values[1])
  531. def test_consolidate(self):
  532. pass
  533. def test_consolidate_ordering_issues(self, mgr):
  534. mgr.set('f', randn(N))
  535. mgr.set('d', randn(N))
  536. mgr.set('b', randn(N))
  537. mgr.set('g', randn(N))
  538. mgr.set('h', randn(N))
  539. # we have datetime/tz blocks in mgr
  540. cons = mgr.consolidate()
  541. assert cons.nblocks == 4
  542. cons = mgr.consolidate().get_numeric_data()
  543. assert cons.nblocks == 1
  544. assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement)
  545. tm.assert_numpy_array_equal(cons.blocks[0].mgr_locs.as_array,
  546. np.arange(len(cons.items), dtype=np.int64))
  547. def test_reindex_index(self):
  548. pass
  549. def test_reindex_items(self):
  550. # mgr is not consolidated, f8 & f8-2 blocks
  551. mgr = create_mgr('a: f8; b: i8; c: f8; d: i8; e: f8;'
  552. 'f: bool; g: f8-2')
  553. reindexed = mgr.reindex_axis(['g', 'c', 'a', 'd'], axis=0)
  554. assert reindexed.nblocks == 2
  555. tm.assert_index_equal(reindexed.items, pd.Index(['g', 'c', 'a', 'd']))
  556. assert_almost_equal(
  557. mgr.get('g', fastpath=False), reindexed.get('g', fastpath=False))
  558. assert_almost_equal(
  559. mgr.get('c', fastpath=False), reindexed.get('c', fastpath=False))
  560. assert_almost_equal(
  561. mgr.get('a', fastpath=False), reindexed.get('a', fastpath=False))
  562. assert_almost_equal(
  563. mgr.get('d', fastpath=False), reindexed.get('d', fastpath=False))
  564. assert_almost_equal(
  565. mgr.get('g').internal_values(),
  566. reindexed.get('g').internal_values())
  567. assert_almost_equal(
  568. mgr.get('c').internal_values(),
  569. reindexed.get('c').internal_values())
  570. assert_almost_equal(
  571. mgr.get('a').internal_values(),
  572. reindexed.get('a').internal_values())
  573. assert_almost_equal(
  574. mgr.get('d').internal_values(),
  575. reindexed.get('d').internal_values())
  576. def test_multiindex_xs(self):
  577. mgr = create_mgr('a,b,c: f8; d,e,f: i8')
  578. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
  579. 'three']],
  580. codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  581. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  582. names=['first', 'second'])
  583. mgr.set_axis(1, index)
  584. result = mgr.xs('bar', axis=1)
  585. assert result.shape == (6, 2)
  586. assert result.axes[1][0] == ('bar', 'one')
  587. assert result.axes[1][1] == ('bar', 'two')
  588. def test_get_numeric_data(self):
  589. mgr = create_mgr('int: int; float: float; complex: complex;'
  590. 'str: object; bool: bool; obj: object; dt: datetime',
  591. item_shape=(3, ))
  592. mgr.set('obj', np.array([1, 2, 3], dtype=np.object_))
  593. numeric = mgr.get_numeric_data()
  594. tm.assert_index_equal(numeric.items,
  595. pd.Index(['int', 'float', 'complex', 'bool']))
  596. assert_almost_equal(
  597. mgr.get('float', fastpath=False), numeric.get('float',
  598. fastpath=False))
  599. assert_almost_equal(
  600. mgr.get('float').internal_values(),
  601. numeric.get('float').internal_values())
  602. # Check sharing
  603. numeric.set('float', np.array([100., 200., 300.]))
  604. assert_almost_equal(
  605. mgr.get('float', fastpath=False), np.array([100., 200., 300.]))
  606. assert_almost_equal(
  607. mgr.get('float').internal_values(), np.array([100., 200., 300.]))
  608. numeric2 = mgr.get_numeric_data(copy=True)
  609. tm.assert_index_equal(numeric.items,
  610. pd.Index(['int', 'float', 'complex', 'bool']))
  611. numeric2.set('float', np.array([1000., 2000., 3000.]))
  612. assert_almost_equal(
  613. mgr.get('float', fastpath=False), np.array([100., 200., 300.]))
  614. assert_almost_equal(
  615. mgr.get('float').internal_values(), np.array([100., 200., 300.]))
  616. def test_get_bool_data(self):
  617. mgr = create_mgr('int: int; float: float; complex: complex;'
  618. 'str: object; bool: bool; obj: object; dt: datetime',
  619. item_shape=(3, ))
  620. mgr.set('obj', np.array([True, False, True], dtype=np.object_))
  621. bools = mgr.get_bool_data()
  622. tm.assert_index_equal(bools.items, pd.Index(['bool']))
  623. assert_almost_equal(mgr.get('bool', fastpath=False),
  624. bools.get('bool', fastpath=False))
  625. assert_almost_equal(
  626. mgr.get('bool').internal_values(),
  627. bools.get('bool').internal_values())
  628. bools.set('bool', np.array([True, False, True]))
  629. tm.assert_numpy_array_equal(mgr.get('bool', fastpath=False),
  630. np.array([True, False, True]))
  631. tm.assert_numpy_array_equal(mgr.get('bool').internal_values(),
  632. np.array([True, False, True]))
  633. # Check sharing
  634. bools2 = mgr.get_bool_data(copy=True)
  635. bools2.set('bool', np.array([False, True, False]))
  636. tm.assert_numpy_array_equal(mgr.get('bool', fastpath=False),
  637. np.array([True, False, True]))
  638. tm.assert_numpy_array_equal(mgr.get('bool').internal_values(),
  639. np.array([True, False, True]))
  640. def test_unicode_repr_doesnt_raise(self):
  641. repr(create_mgr(u('b,\u05d0: object')))
  642. def test_missing_unicode_key(self):
  643. df = DataFrame({"a": [1]})
  644. try:
  645. df.loc[:, u("\u05d0")] # should not raise UnicodeEncodeError
  646. except KeyError:
  647. pass # this is the expected exception
  648. def test_equals(self):
  649. # unique items
  650. bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2')
  651. bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
  652. assert bm1.equals(bm2)
  653. bm1 = create_mgr('a,a,a: i8-1; b,b,b: i8-2')
  654. bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
  655. assert bm1.equals(bm2)
  656. def test_equals_block_order_different_dtypes(self):
  657. # GH 9330
  658. mgr_strings = [
  659. "a:i8;b:f8", # basic case
  660. "a:i8;b:f8;c:c8;d:b", # many types
  661. "a:i8;e:dt;f:td;g:string", # more types
  662. "a:i8;b:category;c:category2;d:category2", # categories
  663. "c:sparse;d:sparse_na;b:f8", # sparse
  664. ]
  665. for mgr_string in mgr_strings:
  666. bm = create_mgr(mgr_string)
  667. block_perms = itertools.permutations(bm.blocks)
  668. for bm_perm in block_perms:
  669. bm_this = BlockManager(bm_perm, bm.axes)
  670. assert bm.equals(bm_this)
  671. assert bm_this.equals(bm)
  672. def test_single_mgr_ctor(self):
  673. mgr = create_single_mgr('f8', num_rows=5)
  674. assert mgr.as_array().tolist() == [0., 1., 2., 3., 4.]
  675. def test_validate_bool_args(self):
  676. invalid_values = [1, "True", [1, 2, 3], 5.0]
  677. bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2')
  678. for value in invalid_values:
  679. with pytest.raises(ValueError):
  680. bm1.replace_list([1], [2], inplace=value)
  681. class TestIndexing(object):
  682. # Nosetests-style data-driven tests.
  683. #
  684. # This test applies different indexing routines to block managers and
  685. # compares the outcome to the result of same operations on np.ndarray.
  686. #
  687. # NOTE: sparse (SparseBlock with fill_value != np.nan) fail a lot of tests
  688. # and are disabled.
  689. MANAGERS = [
  690. create_single_mgr('f8', N),
  691. create_single_mgr('i8', N),
  692. # 2-dim
  693. create_mgr('a,b,c,d,e,f: f8', item_shape=(N,)),
  694. create_mgr('a,b,c,d,e,f: i8', item_shape=(N,)),
  695. create_mgr('a,b: f8; c,d: i8; e,f: string', item_shape=(N,)),
  696. create_mgr('a,b: f8; c,d: i8; e,f: f8', item_shape=(N,)),
  697. # 3-dim
  698. create_mgr('a,b,c,d,e,f: f8', item_shape=(N, N)),
  699. create_mgr('a,b,c,d,e,f: i8', item_shape=(N, N)),
  700. create_mgr('a,b: f8; c,d: i8; e,f: string', item_shape=(N, N)),
  701. create_mgr('a,b: f8; c,d: i8; e,f: f8', item_shape=(N, N)),
  702. ]
  703. # MANAGERS = [MANAGERS[6]]
  704. def test_get_slice(self):
  705. def assert_slice_ok(mgr, axis, slobj):
  706. # import pudb; pudb.set_trace()
  707. mat = mgr.as_array()
  708. # we maybe using an ndarray to test slicing and
  709. # might not be the full length of the axis
  710. if isinstance(slobj, np.ndarray):
  711. ax = mgr.axes[axis]
  712. if len(ax) and len(slobj) and len(slobj) != len(ax):
  713. slobj = np.concatenate([slobj, np.zeros(
  714. len(ax) - len(slobj), dtype=bool)])
  715. sliced = mgr.get_slice(slobj, axis=axis)
  716. mat_slobj = (slice(None), ) * axis + (slobj, )
  717. tm.assert_numpy_array_equal(mat[mat_slobj], sliced.as_array(),
  718. check_dtype=False)
  719. tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis])
  720. for mgr in self.MANAGERS:
  721. for ax in range(mgr.ndim):
  722. # slice
  723. assert_slice_ok(mgr, ax, slice(None))
  724. assert_slice_ok(mgr, ax, slice(3))
  725. assert_slice_ok(mgr, ax, slice(100))
  726. assert_slice_ok(mgr, ax, slice(1, 4))
  727. assert_slice_ok(mgr, ax, slice(3, 0, -2))
  728. # boolean mask
  729. assert_slice_ok(
  730. mgr, ax, np.array([], dtype=np.bool_))
  731. assert_slice_ok(
  732. mgr, ax,
  733. np.ones(mgr.shape[ax], dtype=np.bool_))
  734. assert_slice_ok(
  735. mgr, ax,
  736. np.zeros(mgr.shape[ax], dtype=np.bool_))
  737. if mgr.shape[ax] >= 3:
  738. assert_slice_ok(
  739. mgr, ax,
  740. np.arange(mgr.shape[ax]) % 3 == 0)
  741. assert_slice_ok(
  742. mgr, ax, np.array(
  743. [True, True, False], dtype=np.bool_))
  744. # fancy indexer
  745. assert_slice_ok(mgr, ax, [])
  746. assert_slice_ok(mgr, ax, lrange(mgr.shape[ax]))
  747. if mgr.shape[ax] >= 3:
  748. assert_slice_ok(mgr, ax, [0, 1, 2])
  749. assert_slice_ok(mgr, ax, [-1, -2, -3])
  750. def test_take(self):
  751. def assert_take_ok(mgr, axis, indexer):
  752. mat = mgr.as_array()
  753. taken = mgr.take(indexer, axis)
  754. tm.assert_numpy_array_equal(np.take(mat, indexer, axis),
  755. taken.as_array(), check_dtype=False)
  756. tm.assert_index_equal(mgr.axes[axis].take(indexer),
  757. taken.axes[axis])
  758. for mgr in self.MANAGERS:
  759. for ax in range(mgr.ndim):
  760. # take/fancy indexer
  761. assert_take_ok(mgr, ax, [])
  762. assert_take_ok(mgr, ax, [0, 0, 0])
  763. assert_take_ok(mgr, ax, lrange(mgr.shape[ax]))
  764. if mgr.shape[ax] >= 3:
  765. assert_take_ok(mgr, ax, [0, 1, 2])
  766. assert_take_ok(mgr, ax, [-1, -2, -3])
  767. def test_reindex_axis(self):
  768. def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value):
  769. mat = mgr.as_array()
  770. indexer = mgr.axes[axis].get_indexer_for(new_labels)
  771. reindexed = mgr.reindex_axis(new_labels, axis,
  772. fill_value=fill_value)
  773. tm.assert_numpy_array_equal(algos.take_nd(mat, indexer, axis,
  774. fill_value=fill_value),
  775. reindexed.as_array(),
  776. check_dtype=False)
  777. tm.assert_index_equal(reindexed.axes[axis], new_labels)
  778. for mgr in self.MANAGERS:
  779. for ax in range(mgr.ndim):
  780. for fill_value in (None, np.nan, 100.):
  781. assert_reindex_axis_is_ok(
  782. mgr, ax,
  783. pd.Index([]), fill_value)
  784. assert_reindex_axis_is_ok(
  785. mgr, ax, mgr.axes[ax],
  786. fill_value)
  787. assert_reindex_axis_is_ok(
  788. mgr, ax,
  789. mgr.axes[ax][[0, 0, 0]], fill_value)
  790. assert_reindex_axis_is_ok(
  791. mgr, ax,
  792. pd.Index(['foo', 'bar', 'baz']), fill_value)
  793. assert_reindex_axis_is_ok(
  794. mgr, ax,
  795. pd.Index(['foo', mgr.axes[ax][0], 'baz']),
  796. fill_value)
  797. if mgr.shape[ax] >= 3:
  798. assert_reindex_axis_is_ok(
  799. mgr, ax,
  800. mgr.axes[ax][:-3], fill_value)
  801. assert_reindex_axis_is_ok(
  802. mgr, ax,
  803. mgr.axes[ax][-3::-1], fill_value)
  804. assert_reindex_axis_is_ok(
  805. mgr, ax,
  806. mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value)
  807. def test_reindex_indexer(self):
  808. def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer,
  809. fill_value):
  810. mat = mgr.as_array()
  811. reindexed_mat = algos.take_nd(mat, indexer, axis,
  812. fill_value=fill_value)
  813. reindexed = mgr.reindex_indexer(new_labels, indexer, axis,
  814. fill_value=fill_value)
  815. tm.assert_numpy_array_equal(reindexed_mat,
  816. reindexed.as_array(),
  817. check_dtype=False)
  818. tm.assert_index_equal(reindexed.axes[axis], new_labels)
  819. for mgr in self.MANAGERS:
  820. for ax in range(mgr.ndim):
  821. for fill_value in (None, np.nan, 100.):
  822. assert_reindex_indexer_is_ok(
  823. mgr, ax,
  824. pd.Index([]), [], fill_value)
  825. assert_reindex_indexer_is_ok(
  826. mgr, ax,
  827. mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value)
  828. assert_reindex_indexer_is_ok(
  829. mgr, ax,
  830. pd.Index(['foo'] * mgr.shape[ax]),
  831. np.arange(mgr.shape[ax]), fill_value)
  832. assert_reindex_indexer_is_ok(
  833. mgr, ax,
  834. mgr.axes[ax][::-1], np.arange(mgr.shape[ax]),
  835. fill_value)
  836. assert_reindex_indexer_is_ok(
  837. mgr, ax, mgr.axes[ax],
  838. np.arange(mgr.shape[ax])[::-1], fill_value)
  839. assert_reindex_indexer_is_ok(
  840. mgr, ax,
  841. pd.Index(['foo', 'bar', 'baz']),
  842. [0, 0, 0], fill_value)
  843. assert_reindex_indexer_is_ok(
  844. mgr, ax,
  845. pd.Index(['foo', 'bar', 'baz']),
  846. [-1, 0, -1], fill_value)
  847. assert_reindex_indexer_is_ok(
  848. mgr, ax,
  849. pd.Index(['foo', mgr.axes[ax][0], 'baz']),
  850. [-1, -1, -1], fill_value)
  851. if mgr.shape[ax] >= 3:
  852. assert_reindex_indexer_is_ok(
  853. mgr, ax,
  854. pd.Index(['foo', 'bar', 'baz']),
  855. [0, 1, 2], fill_value)
  856. # test_get_slice(slice_like, axis)
  857. # take(indexer, axis)
  858. # reindex_axis(new_labels, axis)
  859. # reindex_indexer(new_labels, indexer, axis)
  860. class TestBlockPlacement(object):
  861. def test_slice_len(self):
  862. assert len(BlockPlacement(slice(0, 4))) == 4
  863. assert len(BlockPlacement(slice(0, 4, 2))) == 2
  864. assert len(BlockPlacement(slice(0, 3, 2))) == 2
  865. assert len(BlockPlacement(slice(0, 1, 2))) == 1
  866. assert len(BlockPlacement(slice(1, 0, -1))) == 1
  867. def test_zero_step_raises(self):
  868. with pytest.raises(ValueError):
  869. BlockPlacement(slice(1, 1, 0))
  870. with pytest.raises(ValueError):
  871. BlockPlacement(slice(1, 2, 0))
  872. def test_unbounded_slice_raises(self):
  873. def assert_unbounded_slice_error(slc):
  874. with pytest.raises(ValueError, match="unbounded slice"):
  875. BlockPlacement(slc)
  876. assert_unbounded_slice_error(slice(None, None))
  877. assert_unbounded_slice_error(slice(10, None))
  878. assert_unbounded_slice_error(slice(None, None, -1))
  879. assert_unbounded_slice_error(slice(None, 10, -1))
  880. # These are "unbounded" because negative index will change depending on
  881. # container shape.
  882. assert_unbounded_slice_error(slice(-1, None))
  883. assert_unbounded_slice_error(slice(None, -1))
  884. assert_unbounded_slice_error(slice(-1, -1))
  885. assert_unbounded_slice_error(slice(-1, None, -1))
  886. assert_unbounded_slice_error(slice(None, -1, -1))
  887. assert_unbounded_slice_error(slice(-1, -1, -1))
  888. def test_not_slice_like_slices(self):
  889. def assert_not_slice_like(slc):
  890. assert not BlockPlacement(slc).is_slice_like
  891. assert_not_slice_like(slice(0, 0))
  892. assert_not_slice_like(slice(100, 0))
  893. assert_not_slice_like(slice(100, 100, -1))
  894. assert_not_slice_like(slice(0, 100, -1))
  895. assert not BlockPlacement(slice(0, 0)).is_slice_like
  896. assert not BlockPlacement(slice(100, 100)).is_slice_like
  897. def test_array_to_slice_conversion(self):
  898. def assert_as_slice_equals(arr, slc):
  899. assert BlockPlacement(arr).as_slice == slc
  900. assert_as_slice_equals([0], slice(0, 1, 1))
  901. assert_as_slice_equals([100], slice(100, 101, 1))
  902. assert_as_slice_equals([0, 1, 2], slice(0, 3, 1))
  903. assert_as_slice_equals([0, 5, 10], slice(0, 15, 5))
  904. assert_as_slice_equals([0, 100], slice(0, 200, 100))
  905. assert_as_slice_equals([2, 1], slice(2, 0, -1))
  906. if not PY361:
  907. assert_as_slice_equals([2, 1, 0], slice(2, None, -1))
  908. assert_as_slice_equals([100, 0], slice(100, None, -100))
  909. def test_not_slice_like_arrays(self):
  910. def assert_not_slice_like(arr):
  911. assert not BlockPlacement(arr).is_slice_like
  912. assert_not_slice_like([])
  913. assert_not_slice_like([-1])
  914. assert_not_slice_like([-1, -2, -3])
  915. assert_not_slice_like([-10])
  916. assert_not_slice_like([-1])
  917. assert_not_slice_like([-1, 0, 1, 2])
  918. assert_not_slice_like([-2, 0, 2, 4])
  919. assert_not_slice_like([1, 0, -1])
  920. assert_not_slice_like([1, 1, 1])
  921. def test_slice_iter(self):
  922. assert list(BlockPlacement(slice(0, 3))) == [0, 1, 2]
  923. assert list(BlockPlacement(slice(0, 0))) == []
  924. assert list(BlockPlacement(slice(3, 0))) == []
  925. if not PY361:
  926. assert list(BlockPlacement(slice(3, 0, -1))) == [3, 2, 1]
  927. assert list(BlockPlacement(slice(3, None, -1))) == [3, 2, 1, 0]
  928. def test_slice_to_array_conversion(self):
  929. def assert_as_array_equals(slc, asarray):
  930. tm.assert_numpy_array_equal(
  931. BlockPlacement(slc).as_array,
  932. np.asarray(asarray, dtype=np.int64))
  933. assert_as_array_equals(slice(0, 3), [0, 1, 2])
  934. assert_as_array_equals(slice(0, 0), [])
  935. assert_as_array_equals(slice(3, 0), [])
  936. assert_as_array_equals(slice(3, 0, -1), [3, 2, 1])
  937. if not PY361:
  938. assert_as_array_equals(slice(3, None, -1), [3, 2, 1, 0])
  939. assert_as_array_equals(slice(31, None, -10), [31, 21, 11, 1])
  940. def test_blockplacement_add(self):
  941. bpl = BlockPlacement(slice(0, 5))
  942. assert bpl.add(1).as_slice == slice(1, 6, 1)
  943. assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2)
  944. assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5]
  945. def test_blockplacement_add_int(self):
  946. def assert_add_equals(val, inc, result):
  947. assert list(BlockPlacement(val).add(inc)) == result
  948. assert_add_equals(slice(0, 0), 0, [])
  949. assert_add_equals(slice(1, 4), 0, [1, 2, 3])
  950. assert_add_equals(slice(3, 0, -1), 0, [3, 2, 1])
  951. assert_add_equals([1, 2, 4], 0, [1, 2, 4])
  952. assert_add_equals(slice(0, 0), 10, [])
  953. assert_add_equals(slice(1, 4), 10, [11, 12, 13])
  954. assert_add_equals(slice(3, 0, -1), 10, [13, 12, 11])
  955. assert_add_equals([1, 2, 4], 10, [11, 12, 14])
  956. assert_add_equals(slice(0, 0), -1, [])
  957. assert_add_equals(slice(1, 4), -1, [0, 1, 2])
  958. assert_add_equals([1, 2, 4], -1, [0, 1, 3])
  959. with pytest.raises(ValueError):
  960. BlockPlacement(slice(1, 4)).add(-10)
  961. with pytest.raises(ValueError):
  962. BlockPlacement([1, 2, 4]).add(-10)
  963. if not PY361:
  964. assert_add_equals(slice(3, 0, -1), -1, [2, 1, 0])
  965. assert_add_equals(slice(2, None, -1), 0, [2, 1, 0])
  966. assert_add_equals(slice(2, None, -1), 10, [12, 11, 10])
  967. with pytest.raises(ValueError):
  968. BlockPlacement(slice(2, None, -1)).add(-1)
  969. class DummyElement(object):
  970. def __init__(self, value, dtype):
  971. self.value = value
  972. self.dtype = np.dtype(dtype)
  973. def __array__(self):
  974. return np.array(self.value, dtype=self.dtype)
  975. def __str__(self):
  976. return "DummyElement({}, {})".format(self.value, self.dtype)
  977. def __repr__(self):
  978. return str(self)
  979. def astype(self, dtype, copy=False):
  980. self.dtype = dtype
  981. return self
  982. def view(self, dtype):
  983. return type(self)(self.value.view(dtype), dtype)
  984. def any(self, axis=None):
  985. return bool(self.value)
  986. class TestCanHoldElement(object):
  987. @pytest.mark.parametrize('value, dtype', [
  988. (1, 'i8'),
  989. (1.0, 'f8'),
  990. (2**63, 'f8'),
  991. (1j, 'complex128'),
  992. (2**63, 'complex128'),
  993. (True, 'bool'),
  994. (np.timedelta64(20, 'ns'), '<m8[ns]'),
  995. (np.datetime64(20, 'ns'), '<M8[ns]'),
  996. ])
  997. @pytest.mark.parametrize('op', [
  998. operator.add,
  999. operator.sub,
  1000. operator.mul,
  1001. operator.truediv,
  1002. operator.mod,
  1003. operator.pow,
  1004. ], ids=lambda x: x.__name__)
  1005. def test_binop_other(self, op, value, dtype):
  1006. skip = {(operator.add, 'bool'),
  1007. (operator.sub, 'bool'),
  1008. (operator.mul, 'bool'),
  1009. (operator.truediv, 'bool'),
  1010. (operator.mod, 'i8'),
  1011. (operator.mod, 'complex128'),
  1012. (operator.pow, 'bool')}
  1013. if (op, dtype) in skip:
  1014. pytest.skip("Invalid combination {},{}".format(op, dtype))
  1015. e = DummyElement(value, dtype)
  1016. s = pd.DataFrame({"A": [e.value, e.value]}, dtype=e.dtype)
  1017. invalid = {(operator.pow, '<M8[ns]'),
  1018. (operator.mod, '<M8[ns]'),
  1019. (operator.truediv, '<M8[ns]'),
  1020. (operator.mul, '<M8[ns]'),
  1021. (operator.add, '<M8[ns]'),
  1022. (operator.pow, '<m8[ns]'),
  1023. (operator.mul, '<m8[ns]')}
  1024. if (op, dtype) in invalid:
  1025. with pytest.raises(TypeError):
  1026. op(s, e.value)
  1027. else:
  1028. # FIXME: Since dispatching to Series, this test no longer
  1029. # asserts anything meaningful
  1030. result = op(s, e.value).dtypes
  1031. expected = op(s, value).dtypes
  1032. assert_series_equal(result, expected)
  1033. @pytest.mark.parametrize('typestr, holder', [
  1034. ('category', Categorical),
  1035. ('M8[ns]', DatetimeArray),
  1036. ('M8[ns, US/Central]', DatetimeArray),
  1037. ('m8[ns]', TimedeltaArray),
  1038. ('sparse', SparseArray),
  1039. ])
  1040. def test_holder(typestr, holder):
  1041. blk = create_block(typestr, [1])
  1042. assert blk._holder is holder
  1043. def test_deprecated_fastpath():
  1044. # GH#19265
  1045. values = np.random.rand(3, 3)
  1046. with tm.assert_produces_warning(DeprecationWarning,
  1047. check_stacklevel=False):
  1048. make_block(values, placement=np.arange(3), fastpath=True)
  1049. def test_validate_ndim():
  1050. values = np.array([1.0, 2.0])
  1051. placement = slice(2)
  1052. msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]"
  1053. with pytest.raises(ValueError, match=msg):
  1054. make_block(values, placement, ndim=2)
  1055. def test_block_shape():
  1056. idx = pd.Index([0, 1, 2, 3, 4])
  1057. a = pd.Series([1, 2, 3]).reindex(idx)
  1058. b = pd.Series(pd.Categorical([1, 2, 3])).reindex(idx)
  1059. assert (a._data.blocks[0].mgr_locs.indexer ==
  1060. b._data.blocks[0].mgr_locs.indexer)