test_integer.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. import pytest
  4. from pandas.core.dtypes.generic import ABCIndexClass
  5. import pandas as pd
  6. from pandas.api.types import is_float, is_float_dtype, is_integer, is_scalar
  7. from pandas.core.arrays import IntegerArray, integer_array
  8. from pandas.core.arrays.integer import (
  9. Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype,
  10. UInt32Dtype, UInt64Dtype)
  11. from pandas.tests.extension.base import BaseOpsUtil
  12. import pandas.util.testing as tm
  13. def make_data():
  14. return (list(range(8)) +
  15. [np.nan] +
  16. list(range(10, 98)) +
  17. [np.nan] +
  18. [99, 100])
  19. @pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
  20. UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype])
  21. def dtype(request):
  22. return request.param()
  23. @pytest.fixture
  24. def data(dtype):
  25. return integer_array(make_data(), dtype=dtype)
  26. @pytest.fixture
  27. def data_missing(dtype):
  28. return integer_array([np.nan, 1], dtype=dtype)
  29. @pytest.fixture(params=['data', 'data_missing'])
  30. def all_data(request, data, data_missing):
  31. """Parametrized fixture giving 'data' and 'data_missing'"""
  32. if request.param == 'data':
  33. return data
  34. elif request.param == 'data_missing':
  35. return data_missing
  36. def test_dtypes(dtype):
  37. # smoke tests on auto dtype construction
  38. if dtype.is_signed_integer:
  39. assert np.dtype(dtype.type).kind == 'i'
  40. else:
  41. assert np.dtype(dtype.type).kind == 'u'
  42. assert dtype.name is not None
  43. @pytest.mark.parametrize('dtype, expected', [
  44. (Int8Dtype(), 'Int8Dtype()'),
  45. (Int16Dtype(), 'Int16Dtype()'),
  46. (Int32Dtype(), 'Int32Dtype()'),
  47. (Int64Dtype(), 'Int64Dtype()'),
  48. (UInt8Dtype(), 'UInt8Dtype()'),
  49. (UInt16Dtype(), 'UInt16Dtype()'),
  50. (UInt32Dtype(), 'UInt32Dtype()'),
  51. (UInt64Dtype(), 'UInt64Dtype()'),
  52. ])
  53. def test_repr_dtype(dtype, expected):
  54. assert repr(dtype) == expected
  55. def test_repr_array():
  56. result = repr(integer_array([1, None, 3]))
  57. expected = (
  58. '<IntegerArray>\n'
  59. '[1, NaN, 3]\n'
  60. 'Length: 3, dtype: Int64'
  61. )
  62. assert result == expected
  63. def test_repr_array_long():
  64. data = integer_array([1, 2, None] * 1000)
  65. expected = (
  66. "<IntegerArray>\n"
  67. "[ 1, 2, NaN, 1, 2, NaN, 1, 2, NaN, 1,\n"
  68. " ...\n"
  69. " NaN, 1, 2, NaN, 1, 2, NaN, 1, 2, NaN]\n"
  70. "Length: 3000, dtype: Int64"
  71. )
  72. result = repr(data)
  73. assert result == expected
  74. class TestConstructors(object):
  75. def test_from_dtype_from_float(self, data):
  76. # construct from our dtype & string dtype
  77. dtype = data.dtype
  78. # from float
  79. expected = pd.Series(data)
  80. result = pd.Series(np.array(data).astype('float'), dtype=str(dtype))
  81. tm.assert_series_equal(result, expected)
  82. # from int / list
  83. expected = pd.Series(data)
  84. result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
  85. tm.assert_series_equal(result, expected)
  86. # from int / array
  87. expected = pd.Series(data).dropna().reset_index(drop=True)
  88. dropped = np.array(data.dropna()).astype(np.dtype((dtype.type)))
  89. result = pd.Series(dropped, dtype=str(dtype))
  90. tm.assert_series_equal(result, expected)
  91. class TestArithmeticOps(BaseOpsUtil):
  92. def _check_divmod_op(self, s, op, other, exc=None):
  93. super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None)
  94. def _check_op(self, s, op_name, other, exc=None):
  95. op = self.get_op_from_name(op_name)
  96. result = op(s, other)
  97. # compute expected
  98. mask = s.isna()
  99. # if s is a DataFrame, squeeze to a Series
  100. # for comparison
  101. if isinstance(s, pd.DataFrame):
  102. result = result.squeeze()
  103. s = s.squeeze()
  104. mask = mask.squeeze()
  105. # other array is an Integer
  106. if isinstance(other, IntegerArray):
  107. omask = getattr(other, 'mask', None)
  108. mask = getattr(other, 'data', other)
  109. if omask is not None:
  110. mask |= omask
  111. # 1 ** na is na, so need to unmask those
  112. if op_name == '__pow__':
  113. mask = np.where(s == 1, False, mask)
  114. elif op_name == '__rpow__':
  115. mask = np.where(other == 1, False, mask)
  116. # float result type or float op
  117. if ((is_float_dtype(other) or is_float(other) or
  118. op_name in ['__rtruediv__', '__truediv__',
  119. '__rdiv__', '__div__'])):
  120. rs = s.astype('float')
  121. expected = op(rs, other)
  122. self._check_op_float(result, expected, mask, s, op_name, other)
  123. # integer result type
  124. else:
  125. rs = pd.Series(s.values._data)
  126. expected = op(rs, other)
  127. self._check_op_integer(result, expected, mask, s, op_name, other)
  128. def _check_op_float(self, result, expected, mask, s, op_name, other):
  129. # check comparisions that are resulting in float dtypes
  130. expected[mask] = np.nan
  131. tm.assert_series_equal(result, expected)
  132. def _check_op_integer(self, result, expected, mask, s, op_name, other):
  133. # check comparisions that are resulting in integer dtypes
  134. # to compare properly, we convert the expected
  135. # to float, mask to nans and convert infs
  136. # if we have uints then we process as uints
  137. # then conert to float
  138. # and we ultimately want to create a IntArray
  139. # for comparisons
  140. fill_value = 0
  141. # mod/rmod turn floating 0 into NaN while
  142. # integer works as expected (no nan)
  143. if op_name in ['__mod__', '__rmod__']:
  144. if is_scalar(other):
  145. if other == 0:
  146. expected[s.values == 0] = 0
  147. else:
  148. expected = expected.fillna(0)
  149. else:
  150. expected[(s.values == 0) &
  151. ((expected == 0) | expected.isna())] = 0
  152. try:
  153. expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
  154. original = expected
  155. expected = expected.astype(s.dtype)
  156. except ValueError:
  157. expected = expected.astype(float)
  158. expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
  159. original = expected
  160. expected = expected.astype(s.dtype)
  161. expected[mask] = np.nan
  162. # assert that the expected astype is ok
  163. # (skip for unsigned as they have wrap around)
  164. if not s.dtype.is_unsigned_integer:
  165. original = pd.Series(original)
  166. # we need to fill with 0's to emulate what an astype('int') does
  167. # (truncation) for certain ops
  168. if op_name in ['__rtruediv__', '__rdiv__']:
  169. mask |= original.isna()
  170. original = original.fillna(0).astype('int')
  171. original = original.astype('float')
  172. original[mask] = np.nan
  173. tm.assert_series_equal(original, expected.astype('float'))
  174. # assert our expected result
  175. tm.assert_series_equal(result, expected)
  176. def test_arith_integer_array(self, data, all_arithmetic_operators):
  177. # we operate with a rhs of an integer array
  178. op = all_arithmetic_operators
  179. s = pd.Series(data)
  180. rhs = pd.Series([1] * len(data), dtype=data.dtype)
  181. rhs.iloc[-1] = np.nan
  182. self._check_op(s, op, rhs)
  183. def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
  184. # scalar
  185. op = all_arithmetic_operators
  186. s = pd.Series(data)
  187. self._check_op(s, op, 1, exc=TypeError)
  188. def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
  189. # frame & scalar
  190. op = all_arithmetic_operators
  191. df = pd.DataFrame({'A': data})
  192. self._check_op(df, op, 1, exc=TypeError)
  193. def test_arith_series_with_array(self, data, all_arithmetic_operators):
  194. # ndarray & other series
  195. op = all_arithmetic_operators
  196. s = pd.Series(data)
  197. other = np.ones(len(s), dtype=s.dtype.type)
  198. self._check_op(s, op, other, exc=TypeError)
  199. def test_arith_coerce_scalar(self, data, all_arithmetic_operators):
  200. op = all_arithmetic_operators
  201. s = pd.Series(data)
  202. other = 0.01
  203. self._check_op(s, op, other)
  204. @pytest.mark.parametrize("other", [1., 1.0, np.array(1.), np.array([1.])])
  205. def test_arithmetic_conversion(self, all_arithmetic_operators, other):
  206. # if we have a float operand we should have a float result
  207. # if that is equal to an integer
  208. op = self.get_op_from_name(all_arithmetic_operators)
  209. s = pd.Series([1, 2, 3], dtype='Int64')
  210. result = op(s, other)
  211. assert result.dtype is np.dtype('float')
  212. @pytest.mark.parametrize("other", [0, 0.5])
  213. def test_arith_zero_dim_ndarray(self, other):
  214. arr = integer_array([1, None, 2])
  215. result = arr + np.array(other)
  216. expected = arr + other
  217. tm.assert_equal(result, expected)
  218. def test_error(self, data, all_arithmetic_operators):
  219. # invalid ops
  220. op = all_arithmetic_operators
  221. s = pd.Series(data)
  222. ops = getattr(s, op)
  223. opa = getattr(data, op)
  224. # invalid scalars
  225. with pytest.raises(TypeError):
  226. ops('foo')
  227. with pytest.raises(TypeError):
  228. ops(pd.Timestamp('20180101'))
  229. # invalid array-likes
  230. with pytest.raises(TypeError):
  231. ops(pd.Series('foo', index=s.index))
  232. if op != '__rpow__':
  233. # TODO(extension)
  234. # rpow with a datetimelike coerces the integer array incorrectly
  235. with pytest.raises(TypeError):
  236. ops(pd.Series(pd.date_range('20180101', periods=len(s))))
  237. # 2d
  238. with pytest.raises(NotImplementedError):
  239. opa(pd.DataFrame({'A': s}))
  240. with pytest.raises(NotImplementedError):
  241. opa(np.arange(len(s)).reshape(-1, len(s)))
  242. def test_pow(self):
  243. # https://github.com/pandas-dev/pandas/issues/22022
  244. a = integer_array([1, np.nan, np.nan, 1])
  245. b = integer_array([1, np.nan, 1, np.nan])
  246. result = a ** b
  247. expected = pd.core.arrays.integer_array([1, np.nan, np.nan, 1])
  248. tm.assert_extension_array_equal(result, expected)
  249. def test_rpow_one_to_na(self):
  250. # https://github.com/pandas-dev/pandas/issues/22022
  251. arr = integer_array([np.nan, np.nan])
  252. result = np.array([1.0, 2.0]) ** arr
  253. expected = np.array([1.0, np.nan])
  254. tm.assert_numpy_array_equal(result, expected)
  255. class TestComparisonOps(BaseOpsUtil):
  256. def _compare_other(self, data, op_name, other):
  257. op = self.get_op_from_name(op_name)
  258. # array
  259. result = pd.Series(op(data, other))
  260. expected = pd.Series(op(data._data, other))
  261. # fill the nan locations
  262. expected[data._mask] = True if op_name == '__ne__' else False
  263. tm.assert_series_equal(result, expected)
  264. # series
  265. s = pd.Series(data)
  266. result = op(s, other)
  267. expected = pd.Series(data._data)
  268. expected = op(expected, other)
  269. # fill the nan locations
  270. expected[data._mask] = True if op_name == '__ne__' else False
  271. tm.assert_series_equal(result, expected)
  272. def test_compare_scalar(self, data, all_compare_operators):
  273. op_name = all_compare_operators
  274. self._compare_other(data, op_name, 0)
  275. def test_compare_array(self, data, all_compare_operators):
  276. op_name = all_compare_operators
  277. other = pd.Series([0] * len(data))
  278. self._compare_other(data, op_name, other)
  279. class TestCasting(object):
  280. pass
  281. @pytest.mark.parametrize('dropna', [True, False])
  282. def test_construct_index(self, all_data, dropna):
  283. # ensure that we do not coerce to Float64Index, rather
  284. # keep as Index
  285. all_data = all_data[:10]
  286. if dropna:
  287. other = np.array(all_data[~all_data.isna()])
  288. else:
  289. other = all_data
  290. result = pd.Index(integer_array(other, dtype=all_data.dtype))
  291. expected = pd.Index(other, dtype=object)
  292. tm.assert_index_equal(result, expected)
  293. @pytest.mark.parametrize('dropna', [True, False])
  294. def test_astype_index(self, all_data, dropna):
  295. # as an int/uint index to Index
  296. all_data = all_data[:10]
  297. if dropna:
  298. other = all_data[~all_data.isna()]
  299. else:
  300. other = all_data
  301. dtype = all_data.dtype
  302. idx = pd.Index(np.array(other))
  303. assert isinstance(idx, ABCIndexClass)
  304. result = idx.astype(dtype)
  305. expected = idx.astype(object).astype(dtype)
  306. tm.assert_index_equal(result, expected)
  307. def test_astype(self, all_data):
  308. all_data = all_data[:10]
  309. ints = all_data[~all_data.isna()]
  310. mixed = all_data
  311. dtype = Int8Dtype()
  312. # coerce to same type - ints
  313. s = pd.Series(ints)
  314. result = s.astype(all_data.dtype)
  315. expected = pd.Series(ints)
  316. tm.assert_series_equal(result, expected)
  317. # coerce to same other - ints
  318. s = pd.Series(ints)
  319. result = s.astype(dtype)
  320. expected = pd.Series(ints, dtype=dtype)
  321. tm.assert_series_equal(result, expected)
  322. # coerce to same numpy_dtype - ints
  323. s = pd.Series(ints)
  324. result = s.astype(all_data.dtype.numpy_dtype)
  325. expected = pd.Series(ints._data.astype(
  326. all_data.dtype.numpy_dtype))
  327. tm.assert_series_equal(result, expected)
  328. # coerce to same type - mixed
  329. s = pd.Series(mixed)
  330. result = s.astype(all_data.dtype)
  331. expected = pd.Series(mixed)
  332. tm.assert_series_equal(result, expected)
  333. # coerce to same other - mixed
  334. s = pd.Series(mixed)
  335. result = s.astype(dtype)
  336. expected = pd.Series(mixed, dtype=dtype)
  337. tm.assert_series_equal(result, expected)
  338. # coerce to same numpy_dtype - mixed
  339. s = pd.Series(mixed)
  340. with pytest.raises(ValueError):
  341. s.astype(all_data.dtype.numpy_dtype)
  342. # coerce to object
  343. s = pd.Series(mixed)
  344. result = s.astype('object')
  345. expected = pd.Series(np.asarray(mixed))
  346. tm.assert_series_equal(result, expected)
  347. @pytest.mark.parametrize('dtype', [Int8Dtype(), 'Int8',
  348. UInt32Dtype(), 'UInt32'])
  349. def test_astype_specific_casting(self, dtype):
  350. s = pd.Series([1, 2, 3], dtype='Int64')
  351. result = s.astype(dtype)
  352. expected = pd.Series([1, 2, 3], dtype=dtype)
  353. tm.assert_series_equal(result, expected)
  354. s = pd.Series([1, 2, 3, None], dtype='Int64')
  355. result = s.astype(dtype)
  356. expected = pd.Series([1, 2, 3, None], dtype=dtype)
  357. tm.assert_series_equal(result, expected)
  358. def test_construct_cast_invalid(self, dtype):
  359. msg = "cannot safely"
  360. arr = [1.2, 2.3, 3.7]
  361. with pytest.raises(TypeError, match=msg):
  362. integer_array(arr, dtype=dtype)
  363. with pytest.raises(TypeError, match=msg):
  364. pd.Series(arr).astype(dtype)
  365. arr = [1.2, 2.3, 3.7, np.nan]
  366. with pytest.raises(TypeError, match=msg):
  367. integer_array(arr, dtype=dtype)
  368. with pytest.raises(TypeError, match=msg):
  369. pd.Series(arr).astype(dtype)
  370. def test_frame_repr(data_missing):
  371. df = pd.DataFrame({'A': data_missing})
  372. result = repr(df)
  373. expected = ' A\n0 NaN\n1 1'
  374. assert result == expected
  375. def test_conversions(data_missing):
  376. # astype to object series
  377. df = pd.DataFrame({'A': data_missing})
  378. result = df['A'].astype('object')
  379. expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A')
  380. tm.assert_series_equal(result, expected)
  381. # convert to object ndarray
  382. # we assert that we are exactly equal
  383. # including type conversions of scalars
  384. result = df['A'].astype('object').values
  385. expected = np.array([np.nan, 1], dtype=object)
  386. tm.assert_numpy_array_equal(result, expected)
  387. for r, e in zip(result, expected):
  388. if pd.isnull(r):
  389. assert pd.isnull(e)
  390. elif is_integer(r):
  391. # PY2 can be int or long
  392. assert r == e
  393. assert is_integer(e)
  394. else:
  395. assert r == e
  396. assert type(r) == type(e)
  397. def test_integer_array_constructor():
  398. values = np.array([1, 2, 3, 4], dtype='int64')
  399. mask = np.array([False, False, False, True], dtype='bool')
  400. result = IntegerArray(values, mask)
  401. expected = integer_array([1, 2, 3, np.nan], dtype='int64')
  402. tm.assert_extension_array_equal(result, expected)
  403. with pytest.raises(TypeError):
  404. IntegerArray(values.tolist(), mask)
  405. with pytest.raises(TypeError):
  406. IntegerArray(values, mask.tolist())
  407. with pytest.raises(TypeError):
  408. IntegerArray(values.astype(float), mask)
  409. with pytest.raises(TypeError):
  410. IntegerArray(values)
  411. @pytest.mark.parametrize('a, b', [
  412. ([1, None], [1, np.nan]),
  413. ([None], [np.nan]),
  414. ([None, np.nan], [np.nan, np.nan]),
  415. ([np.nan, np.nan], [np.nan, np.nan]),
  416. ])
  417. def test_integer_array_constructor_none_is_nan(a, b):
  418. result = integer_array(a)
  419. expected = integer_array(b)
  420. tm.assert_extension_array_equal(result, expected)
  421. def test_integer_array_constructor_copy():
  422. values = np.array([1, 2, 3, 4], dtype='int64')
  423. mask = np.array([False, False, False, True], dtype='bool')
  424. result = IntegerArray(values, mask)
  425. assert result._data is values
  426. assert result._mask is mask
  427. result = IntegerArray(values, mask, copy=True)
  428. assert result._data is not values
  429. assert result._mask is not mask
  430. @pytest.mark.parametrize(
  431. 'values',
  432. [
  433. ['foo', 'bar'],
  434. ['1', '2'],
  435. 'foo',
  436. 1,
  437. 1.0,
  438. pd.date_range('20130101', periods=2),
  439. np.array(['foo']),
  440. [[1, 2], [3, 4]],
  441. [np.nan, {'a': 1}]])
  442. def test_to_integer_array_error(values):
  443. # error in converting existing arrays to IntegerArrays
  444. with pytest.raises(TypeError):
  445. integer_array(values)
  446. def test_to_integer_array_inferred_dtype():
  447. # if values has dtype -> respect it
  448. result = integer_array(np.array([1, 2], dtype='int8'))
  449. assert result.dtype == Int8Dtype()
  450. result = integer_array(np.array([1, 2], dtype='int32'))
  451. assert result.dtype == Int32Dtype()
  452. # if values have no dtype -> always int64
  453. result = integer_array([1, 2])
  454. assert result.dtype == Int64Dtype()
  455. def test_to_integer_array_dtype_keyword():
  456. result = integer_array([1, 2], dtype='int8')
  457. assert result.dtype == Int8Dtype()
  458. # if values has dtype -> override it
  459. result = integer_array(np.array([1, 2], dtype='int8'), dtype='int32')
  460. assert result.dtype == Int32Dtype()
  461. def test_to_integer_array_float():
  462. result = integer_array([1., 2.])
  463. expected = integer_array([1, 2])
  464. tm.assert_extension_array_equal(result, expected)
  465. with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
  466. integer_array([1.5, 2.])
  467. # for float dtypes, the itemsize is not preserved
  468. result = integer_array(np.array([1., 2.], dtype='float32'))
  469. assert result.dtype == Int64Dtype()
  470. @pytest.mark.parametrize(
  471. 'values, to_dtype, result_dtype',
  472. [
  473. (np.array([1], dtype='int64'), None, Int64Dtype),
  474. (np.array([1, np.nan]), None, Int64Dtype),
  475. (np.array([1, np.nan]), 'int8', Int8Dtype)])
  476. def test_to_integer_array(values, to_dtype, result_dtype):
  477. # convert existing arrays to IntegerArrays
  478. result = integer_array(values, dtype=to_dtype)
  479. assert result.dtype == result_dtype()
  480. expected = integer_array(values, dtype=result_dtype())
  481. tm.assert_extension_array_equal(result, expected)
  482. def test_cross_type_arithmetic():
  483. df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
  484. 'B': pd.Series([1, np.nan, 3], dtype='UInt8'),
  485. 'C': [1, 2, 3]})
  486. result = df.A + df.C
  487. expected = pd.Series([2, 4, np.nan], dtype='Int64')
  488. tm.assert_series_equal(result, expected)
  489. result = (df.A + df.C) * 3 == 12
  490. expected = pd.Series([False, True, False])
  491. tm.assert_series_equal(result, expected)
  492. result = df.A + df.B
  493. expected = pd.Series([2, np.nan, np.nan], dtype='Int64')
  494. tm.assert_series_equal(result, expected)
  495. @pytest.mark.parametrize('op', ['sum', 'min', 'max', 'prod'])
  496. def test_preserve_dtypes(op):
  497. # TODO(#22346): preserve Int64 dtype
  498. # for ops that enable (mean would actually work here
  499. # but generally it is a float return value)
  500. df = pd.DataFrame({
  501. "A": ['a', 'b', 'b'],
  502. "B": [1, None, 3],
  503. "C": integer_array([1, None, 3], dtype='Int64'),
  504. })
  505. # op
  506. result = getattr(df.C, op)()
  507. assert isinstance(result, int)
  508. # groupby
  509. result = getattr(df.groupby("A"), op)()
  510. expected = pd.DataFrame({
  511. "B": np.array([1.0, 3.0]),
  512. "C": integer_array([1, 3], dtype="Int64")
  513. }, index=pd.Index(['a', 'b'], name='A'))
  514. tm.assert_frame_equal(result, expected)
  515. @pytest.mark.parametrize('op', ['mean'])
  516. def test_reduce_to_float(op):
  517. # some reduce ops always return float, even if the result
  518. # is a rounded number
  519. df = pd.DataFrame({
  520. "A": ['a', 'b', 'b'],
  521. "B": [1, None, 3],
  522. "C": integer_array([1, None, 3], dtype='Int64'),
  523. })
  524. # op
  525. result = getattr(df.C, op)()
  526. assert isinstance(result, float)
  527. # groupby
  528. result = getattr(df.groupby("A"), op)()
  529. expected = pd.DataFrame({
  530. "B": np.array([1.0, 3.0]),
  531. "C": integer_array([1, 3], dtype="Int64")
  532. }, index=pd.Index(['a', 'b'], name='A'))
  533. tm.assert_frame_equal(result, expected)
  534. def test_astype_nansafe():
  535. # see gh-22343
  536. arr = integer_array([np.nan, 1, 2], dtype="Int8")
  537. msg = "cannot convert float NaN to integer"
  538. with pytest.raises(ValueError, match=msg):
  539. arr.astype('uint32')
  540. # TODO(jreback) - these need testing / are broken
  541. # shift
  542. # set_index (destroys type)