test_array.py 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203
  1. import operator
  2. import re
  3. import warnings
  4. import numpy as np
  5. import pytest
  6. from pandas._libs.sparse import IntIndex
  7. from pandas.compat import range
  8. import pandas.util._test_decorators as td
  9. import pandas as pd
  10. from pandas import isna
  11. from pandas.core.sparse.api import SparseArray, SparseDtype, SparseSeries
  12. import pandas.util.testing as tm
  13. from pandas.util.testing import assert_almost_equal
  14. @pytest.fixture(params=["integer", "block"])
  15. def kind(request):
  16. return request.param
  17. class TestSparseArray(object):
  18. def setup_method(self, method):
  19. self.arr_data = np.array([np.nan, np.nan, 1, 2, 3,
  20. np.nan, 4, 5, np.nan, 6])
  21. self.arr = SparseArray(self.arr_data)
  22. self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
  23. def test_constructor_dtype(self):
  24. arr = SparseArray([np.nan, 1, 2, np.nan])
  25. assert arr.dtype == SparseDtype(np.float64, np.nan)
  26. assert arr.dtype.subtype == np.float64
  27. assert np.isnan(arr.fill_value)
  28. arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
  29. assert arr.dtype == SparseDtype(np.float64, 0)
  30. assert arr.fill_value == 0
  31. arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
  32. assert arr.dtype == SparseDtype(np.float64, np.nan)
  33. assert np.isnan(arr.fill_value)
  34. arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
  35. assert arr.dtype == SparseDtype(np.int64, 0)
  36. assert arr.fill_value == 0
  37. arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
  38. assert arr.dtype == SparseDtype(np.int64, 0)
  39. assert arr.fill_value == 0
  40. arr = SparseArray([0, 1, 2, 4], dtype=None)
  41. assert arr.dtype == SparseDtype(np.int64, 0)
  42. assert arr.fill_value == 0
  43. arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
  44. assert arr.dtype == SparseDtype(np.int64, 0)
  45. assert arr.fill_value == 0
  46. def test_constructor_dtype_str(self):
  47. result = SparseArray([1, 2, 3], dtype='int')
  48. expected = SparseArray([1, 2, 3], dtype=int)
  49. tm.assert_sp_array_equal(result, expected)
  50. def test_constructor_sparse_dtype(self):
  51. result = SparseArray([1, 0, 0, 1], dtype=SparseDtype('int64', -1))
  52. expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
  53. tm.assert_sp_array_equal(result, expected)
  54. assert result.sp_values.dtype == np.dtype('int64')
  55. def test_constructor_sparse_dtype_str(self):
  56. result = SparseArray([1, 0, 0, 1], dtype='Sparse[int32]')
  57. expected = SparseArray([1, 0, 0, 1], dtype=np.int32)
  58. tm.assert_sp_array_equal(result, expected)
  59. assert result.sp_values.dtype == np.dtype('int32')
  60. def test_constructor_object_dtype(self):
  61. # GH 11856
  62. arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object)
  63. assert arr.dtype == SparseDtype(np.object)
  64. assert np.isnan(arr.fill_value)
  65. arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object,
  66. fill_value='A')
  67. assert arr.dtype == SparseDtype(np.object, 'A')
  68. assert arr.fill_value == 'A'
  69. # GH 17574
  70. data = [False, 0, 100.0, 0.0]
  71. arr = SparseArray(data, dtype=np.object, fill_value=False)
  72. assert arr.dtype == SparseDtype(np.object, False)
  73. assert arr.fill_value is False
  74. arr_expected = np.array(data, dtype=np.object)
  75. it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
  76. assert np.fromiter(it, dtype=np.bool).all()
  77. @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])
  78. def test_constructor_na_dtype(self, dtype):
  79. with pytest.raises(ValueError, match="Cannot convert"):
  80. SparseArray([0, 1, np.nan], dtype=dtype)
  81. def test_constructor_spindex_dtype(self):
  82. arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
  83. # XXX: Behavior change: specifying SparseIndex no longer changes the
  84. # fill_value
  85. expected = SparseArray([0, 1, 2, 0], kind='integer')
  86. tm.assert_sp_array_equal(arr, expected)
  87. assert arr.dtype == SparseDtype(np.int64)
  88. assert arr.fill_value == 0
  89. arr = SparseArray(data=[1, 2, 3],
  90. sparse_index=IntIndex(4, [1, 2, 3]),
  91. dtype=np.int64, fill_value=0)
  92. exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
  93. tm.assert_sp_array_equal(arr, exp)
  94. assert arr.dtype == SparseDtype(np.int64)
  95. assert arr.fill_value == 0
  96. arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
  97. fill_value=0, dtype=np.int64)
  98. exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
  99. tm.assert_sp_array_equal(arr, exp)
  100. assert arr.dtype == SparseDtype(np.int64)
  101. assert arr.fill_value == 0
  102. arr = SparseArray(data=[1, 2, 3],
  103. sparse_index=IntIndex(4, [1, 2, 3]),
  104. dtype=None, fill_value=0)
  105. exp = SparseArray([0, 1, 2, 3], dtype=None)
  106. tm.assert_sp_array_equal(arr, exp)
  107. assert arr.dtype == SparseDtype(np.int64)
  108. assert arr.fill_value == 0
  109. @pytest.mark.parametrize("sparse_index", [
  110. None, IntIndex(1, [0]),
  111. ])
  112. def test_constructor_spindex_dtype_scalar(self, sparse_index):
  113. # scalar input
  114. arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
  115. exp = SparseArray([1], dtype=None)
  116. tm.assert_sp_array_equal(arr, exp)
  117. assert arr.dtype == SparseDtype(np.int64)
  118. assert arr.fill_value == 0
  119. arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
  120. exp = SparseArray([1], dtype=None)
  121. tm.assert_sp_array_equal(arr, exp)
  122. assert arr.dtype == SparseDtype(np.int64)
  123. assert arr.fill_value == 0
  124. def test_constructor_spindex_dtype_scalar_broadcasts(self):
  125. arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
  126. fill_value=0, dtype=None)
  127. exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
  128. tm.assert_sp_array_equal(arr, exp)
  129. assert arr.dtype == SparseDtype(np.int64)
  130. assert arr.fill_value == 0
  131. @pytest.mark.parametrize('data, fill_value', [
  132. (np.array([1, 2]), 0),
  133. (np.array([1.0, 2.0]), np.nan),
  134. ([True, False], False),
  135. ([pd.Timestamp('2017-01-01')], pd.NaT),
  136. ])
  137. def test_constructor_inferred_fill_value(self, data, fill_value):
  138. result = SparseArray(data).fill_value
  139. if pd.isna(fill_value):
  140. assert pd.isna(result)
  141. else:
  142. assert result == fill_value
  143. @pytest.mark.parametrize('scalar,dtype', [
  144. (False, SparseDtype(bool, False)),
  145. (0.0, SparseDtype('float64', 0)),
  146. (1, SparseDtype('int64', 1)),
  147. ('z', SparseDtype('object', 'z'))])
  148. def test_scalar_with_index_infer_dtype(self, scalar, dtype):
  149. # GH 19163
  150. arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar)
  151. exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)
  152. tm.assert_sp_array_equal(arr, exp)
  153. assert arr.dtype == dtype
  154. assert exp.dtype == dtype
  155. @pytest.mark.parametrize("fill", [1, np.nan, 0])
  156. def test_sparse_series_round_trip(self, kind, fill):
  157. # see gh-13999
  158. arr = SparseArray([np.nan, 1, np.nan, 2, 3],
  159. kind=kind, fill_value=fill)
  160. res = SparseArray(SparseSeries(arr))
  161. tm.assert_sp_array_equal(arr, res)
  162. arr = SparseArray([0, 0, 0, 1, 1, 2], dtype=np.int64,
  163. kind=kind, fill_value=fill)
  164. res = SparseArray(SparseSeries(arr), dtype=np.int64)
  165. tm.assert_sp_array_equal(arr, res)
  166. res = SparseArray(SparseSeries(arr))
  167. tm.assert_sp_array_equal(arr, res)
  168. @pytest.mark.parametrize("fill", [True, False, np.nan])
  169. def test_sparse_series_round_trip2(self, kind, fill):
  170. # see gh-13999
  171. arr = SparseArray([True, False, True, True], dtype=np.bool,
  172. kind=kind, fill_value=fill)
  173. res = SparseArray(SparseSeries(arr))
  174. tm.assert_sp_array_equal(arr, res)
  175. res = SparseArray(SparseSeries(arr))
  176. tm.assert_sp_array_equal(arr, res)
  177. def test_get_item(self):
  178. assert np.isnan(self.arr[1])
  179. assert self.arr[2] == 1
  180. assert self.arr[7] == 5
  181. assert self.zarr[0] == 0
  182. assert self.zarr[2] == 1
  183. assert self.zarr[7] == 5
  184. errmsg = re.compile("bounds")
  185. with pytest.raises(IndexError, match=errmsg):
  186. self.arr[11]
  187. with pytest.raises(IndexError, match=errmsg):
  188. self.arr[-11]
  189. assert self.arr[-1] == self.arr[len(self.arr) - 1]
  190. def test_take_scalar_raises(self):
  191. msg = "'indices' must be an array, not a scalar '2'."
  192. with pytest.raises(ValueError, match=msg):
  193. self.arr.take(2)
  194. def test_take(self):
  195. exp = SparseArray(np.take(self.arr_data, [2, 3]))
  196. tm.assert_sp_array_equal(self.arr.take([2, 3]), exp)
  197. exp = SparseArray(np.take(self.arr_data, [0, 1, 2]))
  198. tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp)
  199. def test_take_fill_value(self):
  200. data = np.array([1, np.nan, 0, 3, 0])
  201. sparse = SparseArray(data, fill_value=0)
  202. exp = SparseArray(np.take(data, [0]), fill_value=0)
  203. tm.assert_sp_array_equal(sparse.take([0]), exp)
  204. exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
  205. tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
  206. def test_take_negative(self):
  207. exp = SparseArray(np.take(self.arr_data, [-1]))
  208. tm.assert_sp_array_equal(self.arr.take([-1]), exp)
  209. exp = SparseArray(np.take(self.arr_data, [-4, -3, -2]))
  210. tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp)
  211. @pytest.mark.parametrize('fill_value', [0, None, np.nan])
  212. def test_shift_fill_value(self, fill_value):
  213. # GH #24128
  214. sparse = SparseArray(np.array([1, 0, 0, 3, 0]),
  215. fill_value=8.0)
  216. res = sparse.shift(1, fill_value=fill_value)
  217. if isna(fill_value):
  218. fill_value = res.dtype.na_value
  219. exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]),
  220. fill_value=8.0)
  221. tm.assert_sp_array_equal(res, exp)
  222. def test_bad_take(self):
  223. with pytest.raises(IndexError, match="bounds"):
  224. self.arr.take([11])
  225. def test_take_filling(self):
  226. # similar tests as GH 12631
  227. sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
  228. result = sparse.take(np.array([1, 0, -1]))
  229. expected = SparseArray([np.nan, np.nan, 4])
  230. tm.assert_sp_array_equal(result, expected)
  231. # XXX: test change: fill_value=True -> allow_fill=True
  232. result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
  233. expected = SparseArray([np.nan, np.nan, np.nan])
  234. tm.assert_sp_array_equal(result, expected)
  235. # allow_fill=False
  236. result = sparse.take(np.array([1, 0, -1]),
  237. allow_fill=False, fill_value=True)
  238. expected = SparseArray([np.nan, np.nan, 4])
  239. tm.assert_sp_array_equal(result, expected)
  240. msg = "Invalid value in 'indices'"
  241. with pytest.raises(ValueError, match=msg):
  242. sparse.take(np.array([1, 0, -2]), allow_fill=True)
  243. with pytest.raises(ValueError, match=msg):
  244. sparse.take(np.array([1, 0, -5]), allow_fill=True)
  245. with pytest.raises(IndexError):
  246. sparse.take(np.array([1, -6]))
  247. with pytest.raises(IndexError):
  248. sparse.take(np.array([1, 5]))
  249. with pytest.raises(IndexError):
  250. sparse.take(np.array([1, 5]), allow_fill=True)
  251. def test_take_filling_fill_value(self):
  252. # same tests as GH 12631
  253. sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
  254. result = sparse.take(np.array([1, 0, -1]))
  255. expected = SparseArray([0, np.nan, 4], fill_value=0)
  256. tm.assert_sp_array_equal(result, expected)
  257. # fill_value
  258. result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
  259. # XXX: behavior change.
  260. # the old way of filling self.fill_value doesn't follow EA rules.
  261. # It's supposed to be self.dtype.na_value (nan in this case)
  262. expected = SparseArray([0, np.nan, np.nan], fill_value=0)
  263. tm.assert_sp_array_equal(result, expected)
  264. # allow_fill=False
  265. result = sparse.take(np.array([1, 0, -1]),
  266. allow_fill=False, fill_value=True)
  267. expected = SparseArray([0, np.nan, 4], fill_value=0)
  268. tm.assert_sp_array_equal(result, expected)
  269. msg = ("Invalid value in 'indices'.")
  270. with pytest.raises(ValueError, match=msg):
  271. sparse.take(np.array([1, 0, -2]), allow_fill=True)
  272. with pytest.raises(ValueError, match=msg):
  273. sparse.take(np.array([1, 0, -5]), allow_fill=True)
  274. with pytest.raises(IndexError):
  275. sparse.take(np.array([1, -6]))
  276. with pytest.raises(IndexError):
  277. sparse.take(np.array([1, 5]))
  278. with pytest.raises(IndexError):
  279. sparse.take(np.array([1, 5]), fill_value=True)
  280. def test_take_filling_all_nan(self):
  281. sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan])
  282. # XXX: did the default kind from take change?
  283. result = sparse.take(np.array([1, 0, -1]))
  284. expected = SparseArray([np.nan, np.nan, np.nan], kind='block')
  285. tm.assert_sp_array_equal(result, expected)
  286. result = sparse.take(np.array([1, 0, -1]), fill_value=True)
  287. expected = SparseArray([np.nan, np.nan, np.nan], kind='block')
  288. tm.assert_sp_array_equal(result, expected)
  289. with pytest.raises(IndexError):
  290. sparse.take(np.array([1, -6]))
  291. with pytest.raises(IndexError):
  292. sparse.take(np.array([1, 5]))
  293. with pytest.raises(IndexError):
  294. sparse.take(np.array([1, 5]), fill_value=True)
  295. def test_set_item(self):
  296. def setitem():
  297. self.arr[5] = 3
  298. def setslice():
  299. self.arr[1:5] = 2
  300. with pytest.raises(TypeError, match="assignment via setitem"):
  301. setitem()
  302. with pytest.raises(TypeError, match="assignment via setitem"):
  303. setslice()
  304. def test_constructor_from_too_large_array(self):
  305. with pytest.raises(TypeError, match="expected dimension <= 1 data"):
  306. SparseArray(np.arange(10).reshape((2, 5)))
  307. def test_constructor_from_sparse(self):
  308. res = SparseArray(self.zarr)
  309. assert res.fill_value == 0
  310. assert_almost_equal(res.sp_values, self.zarr.sp_values)
  311. def test_constructor_copy(self):
  312. cp = SparseArray(self.arr, copy=True)
  313. cp.sp_values[:3] = 0
  314. assert not (self.arr.sp_values[:3] == 0).any()
  315. not_copy = SparseArray(self.arr)
  316. not_copy.sp_values[:3] = 0
  317. assert (self.arr.sp_values[:3] == 0).all()
  318. def test_constructor_bool(self):
  319. # GH 10648
  320. data = np.array([False, False, True, True, False, False])
  321. arr = SparseArray(data, fill_value=False, dtype=bool)
  322. assert arr.dtype == SparseDtype(bool)
  323. tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
  324. # Behavior change: np.asarray densifies.
  325. # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
  326. tm.assert_numpy_array_equal(arr.sp_index.indices,
  327. np.array([2, 3], np.int32))
  328. for dense in [arr.to_dense(), arr.values]:
  329. assert dense.dtype == bool
  330. tm.assert_numpy_array_equal(dense, data)
  331. def test_constructor_bool_fill_value(self):
  332. arr = SparseArray([True, False, True], dtype=None)
  333. assert arr.dtype == SparseDtype(np.bool)
  334. assert not arr.fill_value
  335. arr = SparseArray([True, False, True], dtype=np.bool)
  336. assert arr.dtype == SparseDtype(np.bool)
  337. assert not arr.fill_value
  338. arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True)
  339. assert arr.dtype == SparseDtype(np.bool, True)
  340. assert arr.fill_value
  341. def test_constructor_float32(self):
  342. # GH 10648
  343. data = np.array([1., np.nan, 3], dtype=np.float32)
  344. arr = SparseArray(data, dtype=np.float32)
  345. assert arr.dtype == SparseDtype(np.float32)
  346. tm.assert_numpy_array_equal(arr.sp_values,
  347. np.array([1, 3], dtype=np.float32))
  348. # Behavior change: np.asarray densifies.
  349. # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
  350. tm.assert_numpy_array_equal(arr.sp_index.indices,
  351. np.array([0, 2], dtype=np.int32))
  352. for dense in [arr.to_dense(), arr.values]:
  353. assert dense.dtype == np.float32
  354. tm.assert_numpy_array_equal(dense, data)
  355. def test_astype(self):
  356. # float -> float
  357. arr = SparseArray([None, None, 0, 2])
  358. result = arr.astype("Sparse[float32]")
  359. expected = SparseArray([None, None, 0, 2], dtype=np.dtype('float32'))
  360. tm.assert_sp_array_equal(result, expected)
  361. dtype = SparseDtype("float64", fill_value=0)
  362. result = arr.astype(dtype)
  363. expected = SparseArray._simple_new(np.array([0., 2.],
  364. dtype=dtype.subtype),
  365. IntIndex(4, [2, 3]),
  366. dtype)
  367. tm.assert_sp_array_equal(result, expected)
  368. dtype = SparseDtype("int64", 0)
  369. result = arr.astype(dtype)
  370. expected = SparseArray._simple_new(np.array([0, 2], dtype=np.int64),
  371. IntIndex(4, [2, 3]),
  372. dtype)
  373. tm.assert_sp_array_equal(result, expected)
  374. arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
  375. with pytest.raises(ValueError, match='NA'):
  376. arr.astype('Sparse[i8]')
  377. def test_astype_bool(self):
  378. a = pd.SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
  379. result = a.astype(bool)
  380. expected = SparseArray([True, 0, 0, True],
  381. dtype=SparseDtype(bool, 0))
  382. tm.assert_sp_array_equal(result, expected)
  383. # update fill value
  384. result = a.astype(SparseDtype(bool, False))
  385. expected = SparseArray([True, False, False, True],
  386. dtype=SparseDtype(bool, False))
  387. tm.assert_sp_array_equal(result, expected)
  388. def test_astype_all(self, any_real_dtype):
  389. vals = np.array([1, 2, 3])
  390. arr = SparseArray(vals, fill_value=1)
  391. typ = np.dtype(any_real_dtype)
  392. res = arr.astype(typ)
  393. assert res.dtype == SparseDtype(typ, 1)
  394. assert res.sp_values.dtype == typ
  395. tm.assert_numpy_array_equal(np.asarray(res.values),
  396. vals.astype(typ))
  397. @pytest.mark.parametrize('array, dtype, expected', [
  398. (SparseArray([0, 1]), 'float',
  399. SparseArray([0., 1.], dtype=SparseDtype(float, 0.0))),
  400. (SparseArray([0, 1]), bool, SparseArray([False, True])),
  401. (SparseArray([0, 1], fill_value=1), bool,
  402. SparseArray([False, True], dtype=SparseDtype(bool, True))),
  403. pytest.param(
  404. SparseArray([0, 1]), 'datetime64[ns]',
  405. SparseArray(np.array([0, 1], dtype='datetime64[ns]'),
  406. dtype=SparseDtype('datetime64[ns]',
  407. pd.Timestamp('1970'))),
  408. marks=[pytest.mark.xfail(reason="NumPy-7619")],
  409. ),
  410. (SparseArray([0, 1, 10]), str,
  411. SparseArray(['0', '1', '10'], dtype=SparseDtype(str, '0'))),
  412. (SparseArray(['10', '20']), float, SparseArray([10.0, 20.0])),
  413. (SparseArray([0, 1, 0]), object,
  414. SparseArray([0, 1, 0], dtype=SparseDtype(object, 0))),
  415. ])
  416. def test_astype_more(self, array, dtype, expected):
  417. result = array.astype(dtype)
  418. tm.assert_sp_array_equal(result, expected)
  419. def test_astype_nan_raises(self):
  420. arr = SparseArray([1.0, np.nan])
  421. with pytest.raises(ValueError, match='Cannot convert non-finite'):
  422. arr.astype(int)
  423. def test_set_fill_value(self):
  424. arr = SparseArray([1., np.nan, 2.], fill_value=np.nan)
  425. arr.fill_value = 2
  426. assert arr.fill_value == 2
  427. arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64)
  428. arr.fill_value = 2
  429. assert arr.fill_value == 2
  430. # XXX: this seems fine? You can construct an integer
  431. # sparsearray with NaN fill value, why not update one?
  432. # coerces to int
  433. # msg = "unable to set fill_value 3\\.1 to int64 dtype"
  434. # with pytest.raises(ValueError, match=msg):
  435. arr.fill_value = 3.1
  436. assert arr.fill_value == 3.1
  437. # msg = "unable to set fill_value nan to int64 dtype"
  438. # with pytest.raises(ValueError, match=msg):
  439. arr.fill_value = np.nan
  440. assert np.isnan(arr.fill_value)
  441. arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool)
  442. arr.fill_value = True
  443. assert arr.fill_value
  444. # coerces to bool
  445. # msg = "unable to set fill_value 0 to bool dtype"
  446. # with pytest.raises(ValueError, match=msg):
  447. arr.fill_value = 0
  448. assert arr.fill_value == 0
  449. # msg = "unable to set fill_value nan to bool dtype"
  450. # with pytest.raises(ValueError, match=msg):
  451. arr.fill_value = np.nan
  452. assert np.isnan(arr.fill_value)
  453. @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)])
  454. def test_set_fill_invalid_non_scalar(self, val):
  455. arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool)
  456. msg = "fill_value must be a scalar"
  457. with pytest.raises(ValueError, match=msg):
  458. arr.fill_value = val
  459. def test_copy_shallow(self):
  460. arr2 = self.arr.copy(deep=False)
  461. assert arr2.sp_values is self.arr.sp_values
  462. assert arr2.sp_index is self.arr.sp_index
  463. def test_values_asarray(self):
  464. assert_almost_equal(self.arr.values, self.arr_data)
  465. assert_almost_equal(self.arr.to_dense(), self.arr_data)
  466. @pytest.mark.parametrize('data,shape,dtype', [
  467. ([0, 0, 0, 0, 0], (5,), None),
  468. ([], (0,), None),
  469. ([0], (1,), None),
  470. (['A', 'A', np.nan, 'B'], (4,), np.object)
  471. ])
  472. def test_shape(self, data, shape, dtype):
  473. # GH 21126
  474. out = SparseArray(data, dtype=dtype)
  475. assert out.shape == shape
  476. @pytest.mark.parametrize("vals", [
  477. [np.nan, np.nan, np.nan, np.nan, np.nan],
  478. [1, np.nan, np.nan, 3, np.nan],
  479. [1, np.nan, 0, 3, 0],
  480. ])
  481. @pytest.mark.parametrize("method", ["to_dense", "get_values"])
  482. @pytest.mark.parametrize("fill_value", [None, 0])
  483. def test_dense_repr(self, vals, fill_value, method):
  484. vals = np.array(vals)
  485. arr = SparseArray(vals, fill_value=fill_value)
  486. dense_func = getattr(arr, method)
  487. res = dense_func()
  488. tm.assert_numpy_array_equal(res, vals)
  489. def test_getitem(self):
  490. def _checkit(i):
  491. assert_almost_equal(self.arr[i], self.arr.values[i])
  492. for i in range(len(self.arr)):
  493. _checkit(i)
  494. _checkit(-i)
  495. def test_getitem_arraylike_mask(self):
  496. arr = SparseArray([0, 1, 2])
  497. result = arr[[True, False, True]]
  498. expected = SparseArray([0, 2])
  499. tm.assert_sp_array_equal(result, expected)
  500. def test_getslice(self):
  501. result = self.arr[:-3]
  502. exp = SparseArray(self.arr.values[:-3])
  503. tm.assert_sp_array_equal(result, exp)
  504. result = self.arr[-4:]
  505. exp = SparseArray(self.arr.values[-4:])
  506. tm.assert_sp_array_equal(result, exp)
  507. # two corner cases from Series
  508. result = self.arr[-12:]
  509. exp = SparseArray(self.arr)
  510. tm.assert_sp_array_equal(result, exp)
  511. result = self.arr[:-12]
  512. exp = SparseArray(self.arr.values[:0])
  513. tm.assert_sp_array_equal(result, exp)
  514. def test_getslice_tuple(self):
  515. dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])
  516. sparse = SparseArray(dense)
  517. res = sparse[4:, ]
  518. exp = SparseArray(dense[4:, ])
  519. tm.assert_sp_array_equal(res, exp)
  520. sparse = SparseArray(dense, fill_value=0)
  521. res = sparse[4:, ]
  522. exp = SparseArray(dense[4:, ], fill_value=0)
  523. tm.assert_sp_array_equal(res, exp)
  524. with pytest.raises(IndexError):
  525. sparse[4:, :]
  526. with pytest.raises(IndexError):
  527. # check numpy compat
  528. dense[4:, :]
  529. def test_boolean_slice_empty(self):
  530. arr = pd.SparseArray([0, 1, 2])
  531. res = arr[[False, False, False]]
  532. assert res.dtype == arr.dtype
  533. @pytest.mark.parametrize("op", ["add", "sub", "mul",
  534. "truediv", "floordiv", "pow"])
  535. def test_binary_operators(self, op):
  536. op = getattr(operator, op)
  537. data1 = np.random.randn(20)
  538. data2 = np.random.randn(20)
  539. data1[::2] = np.nan
  540. data2[::3] = np.nan
  541. arr1 = SparseArray(data1)
  542. arr2 = SparseArray(data2)
  543. data1[::2] = 3
  544. data2[::3] = 3
  545. farr1 = SparseArray(data1, fill_value=3)
  546. farr2 = SparseArray(data2, fill_value=3)
  547. def _check_op(op, first, second):
  548. res = op(first, second)
  549. exp = SparseArray(op(first.values, second.values),
  550. fill_value=first.fill_value)
  551. assert isinstance(res, SparseArray)
  552. assert_almost_equal(res.values, exp.values)
  553. res2 = op(first, second.values)
  554. assert isinstance(res2, SparseArray)
  555. tm.assert_sp_array_equal(res, res2)
  556. res3 = op(first.values, second)
  557. assert isinstance(res3, SparseArray)
  558. tm.assert_sp_array_equal(res, res3)
  559. res4 = op(first, 4)
  560. assert isinstance(res4, SparseArray)
  561. # Ignore this if the actual op raises (e.g. pow).
  562. try:
  563. exp = op(first.values, 4)
  564. exp_fv = op(first.fill_value, 4)
  565. except ValueError:
  566. pass
  567. else:
  568. assert_almost_equal(res4.fill_value, exp_fv)
  569. assert_almost_equal(res4.values, exp)
  570. with np.errstate(all="ignore"):
  571. for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]:
  572. _check_op(op, first_arr, second_arr)
  573. def test_pickle(self):
  574. def _check_roundtrip(obj):
  575. unpickled = tm.round_trip_pickle(obj)
  576. tm.assert_sp_array_equal(unpickled, obj)
  577. _check_roundtrip(self.arr)
  578. _check_roundtrip(self.zarr)
  579. def test_generator_warnings(self):
  580. sp_arr = SparseArray([1, 2, 3])
  581. with warnings.catch_warnings(record=True) as w:
  582. warnings.filterwarnings(action='always',
  583. category=DeprecationWarning)
  584. warnings.filterwarnings(action='always',
  585. category=PendingDeprecationWarning)
  586. for _ in sp_arr:
  587. pass
  588. assert len(w) == 0
  589. def test_fillna(self):
  590. s = SparseArray([1, np.nan, np.nan, 3, np.nan])
  591. res = s.fillna(-1)
  592. exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64)
  593. tm.assert_sp_array_equal(res, exp)
  594. s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
  595. res = s.fillna(-1)
  596. exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64)
  597. tm.assert_sp_array_equal(res, exp)
  598. s = SparseArray([1, np.nan, 0, 3, 0])
  599. res = s.fillna(-1)
  600. exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64)
  601. tm.assert_sp_array_equal(res, exp)
  602. s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
  603. res = s.fillna(-1)
  604. exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64)
  605. tm.assert_sp_array_equal(res, exp)
  606. s = SparseArray([np.nan, np.nan, np.nan, np.nan])
  607. res = s.fillna(-1)
  608. exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64)
  609. tm.assert_sp_array_equal(res, exp)
  610. s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
  611. res = s.fillna(-1)
  612. exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64)
  613. tm.assert_sp_array_equal(res, exp)
  614. # float dtype's fill_value is np.nan, replaced by -1
  615. s = SparseArray([0., 0., 0., 0.])
  616. res = s.fillna(-1)
  617. exp = SparseArray([0., 0., 0., 0.], fill_value=-1)
  618. tm.assert_sp_array_equal(res, exp)
  619. # int dtype shouldn't have missing. No changes.
  620. s = SparseArray([0, 0, 0, 0])
  621. assert s.dtype == SparseDtype(np.int64)
  622. assert s.fill_value == 0
  623. res = s.fillna(-1)
  624. tm.assert_sp_array_equal(res, s)
  625. s = SparseArray([0, 0, 0, 0], fill_value=0)
  626. assert s.dtype == SparseDtype(np.int64)
  627. assert s.fill_value == 0
  628. res = s.fillna(-1)
  629. exp = SparseArray([0, 0, 0, 0], fill_value=0)
  630. tm.assert_sp_array_equal(res, exp)
  631. # fill_value can be nan if there is no missing hole.
  632. # only fill_value will be changed
  633. s = SparseArray([0, 0, 0, 0], fill_value=np.nan)
  634. assert s.dtype == SparseDtype(np.int64, fill_value=np.nan)
  635. assert np.isnan(s.fill_value)
  636. res = s.fillna(-1)
  637. exp = SparseArray([0, 0, 0, 0], fill_value=-1)
  638. tm.assert_sp_array_equal(res, exp)
  639. def test_fillna_overlap(self):
  640. s = SparseArray([1, np.nan, np.nan, 3, np.nan])
  641. # filling with existing value doesn't replace existing value with
  642. # fill_value, i.e. existing 3 remains in sp_values
  643. res = s.fillna(3)
  644. exp = np.array([1, 3, 3, 3, 3], dtype=np.float64)
  645. tm.assert_numpy_array_equal(res.to_dense(), exp)
  646. s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
  647. res = s.fillna(3)
  648. exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64)
  649. tm.assert_sp_array_equal(res, exp)
  650. def test_nonzero(self):
  651. # Tests regression #21172.
  652. sa = pd.SparseArray([
  653. float('nan'),
  654. float('nan'),
  655. 1, 0, 0,
  656. 2, 0, 0, 0,
  657. 3, 0, 0
  658. ])
  659. expected = np.array([2, 5, 9], dtype=np.int32)
  660. result, = sa.nonzero()
  661. tm.assert_numpy_array_equal(expected, result)
  662. sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0])
  663. result, = sa.nonzero()
  664. tm.assert_numpy_array_equal(expected, result)
  665. class TestSparseArrayAnalytics(object):
  666. @pytest.mark.parametrize('data,pos,neg', [
  667. ([True, True, True], True, False),
  668. ([1, 2, 1], 1, 0),
  669. ([1.0, 2.0, 1.0], 1.0, 0.0)
  670. ])
  671. def test_all(self, data, pos, neg):
  672. # GH 17570
  673. out = SparseArray(data).all()
  674. assert out
  675. out = SparseArray(data, fill_value=pos).all()
  676. assert out
  677. data[1] = neg
  678. out = SparseArray(data).all()
  679. assert not out
  680. out = SparseArray(data, fill_value=pos).all()
  681. assert not out
  682. @pytest.mark.parametrize('data,pos,neg', [
  683. ([True, True, True], True, False),
  684. ([1, 2, 1], 1, 0),
  685. ([1.0, 2.0, 1.0], 1.0, 0.0)
  686. ])
  687. @td.skip_if_np_lt_115 # prior didn't dispatch
  688. def test_numpy_all(self, data, pos, neg):
  689. # GH 17570
  690. out = np.all(SparseArray(data))
  691. assert out
  692. out = np.all(SparseArray(data, fill_value=pos))
  693. assert out
  694. data[1] = neg
  695. out = np.all(SparseArray(data))
  696. assert not out
  697. out = np.all(SparseArray(data, fill_value=pos))
  698. assert not out
  699. # raises with a different message on py2.
  700. msg = "the \'out\' parameter is not supported"
  701. with pytest.raises(ValueError, match=msg):
  702. np.all(SparseArray(data), out=np.array([]))
  703. @pytest.mark.parametrize('data,pos,neg', [
  704. ([False, True, False], True, False),
  705. ([0, 2, 0], 2, 0),
  706. ([0.0, 2.0, 0.0], 2.0, 0.0)
  707. ])
  708. def test_any(self, data, pos, neg):
  709. # GH 17570
  710. out = SparseArray(data).any()
  711. assert out
  712. out = SparseArray(data, fill_value=pos).any()
  713. assert out
  714. data[1] = neg
  715. out = SparseArray(data).any()
  716. assert not out
  717. out = SparseArray(data, fill_value=pos).any()
  718. assert not out
  719. @pytest.mark.parametrize('data,pos,neg', [
  720. ([False, True, False], True, False),
  721. ([0, 2, 0], 2, 0),
  722. ([0.0, 2.0, 0.0], 2.0, 0.0)
  723. ])
  724. @td.skip_if_np_lt_115 # prior didn't dispatch
  725. def test_numpy_any(self, data, pos, neg):
  726. # GH 17570
  727. out = np.any(SparseArray(data))
  728. assert out
  729. out = np.any(SparseArray(data, fill_value=pos))
  730. assert out
  731. data[1] = neg
  732. out = np.any(SparseArray(data))
  733. assert not out
  734. out = np.any(SparseArray(data, fill_value=pos))
  735. assert not out
  736. msg = "the \'out\' parameter is not supported"
  737. with pytest.raises(ValueError, match=msg):
  738. np.any(SparseArray(data), out=out)
  739. def test_sum(self):
  740. data = np.arange(10).astype(float)
  741. out = SparseArray(data).sum()
  742. assert out == 45.0
  743. data[5] = np.nan
  744. out = SparseArray(data, fill_value=2).sum()
  745. assert out == 40.0
  746. out = SparseArray(data, fill_value=np.nan).sum()
  747. assert out == 40.0
  748. def test_numpy_sum(self):
  749. data = np.arange(10).astype(float)
  750. out = np.sum(SparseArray(data))
  751. assert out == 45.0
  752. data[5] = np.nan
  753. out = np.sum(SparseArray(data, fill_value=2))
  754. assert out == 40.0
  755. out = np.sum(SparseArray(data, fill_value=np.nan))
  756. assert out == 40.0
  757. msg = "the 'dtype' parameter is not supported"
  758. with pytest.raises(ValueError, match=msg):
  759. np.sum(SparseArray(data), dtype=np.int64)
  760. msg = "the 'out' parameter is not supported"
  761. with pytest.raises(ValueError, match=msg):
  762. np.sum(SparseArray(data), out=out)
  763. @pytest.mark.parametrize("data,expected", [
  764. (np.array([1, 2, 3, 4, 5], dtype=float), # non-null data
  765. SparseArray(np.array([1.0, 3.0, 6.0, 10.0, 15.0]))),
  766. (np.array([1, 2, np.nan, 4, 5], dtype=float), # null data
  767. SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])))
  768. ])
  769. @pytest.mark.parametrize("numpy", [True, False])
  770. def test_cumsum(self, data, expected, numpy):
  771. cumsum = np.cumsum if numpy else lambda s: s.cumsum()
  772. out = cumsum(SparseArray(data))
  773. tm.assert_sp_array_equal(out, expected)
  774. out = cumsum(SparseArray(data, fill_value=np.nan))
  775. tm.assert_sp_array_equal(out, expected)
  776. out = cumsum(SparseArray(data, fill_value=2))
  777. tm.assert_sp_array_equal(out, expected)
  778. if numpy: # numpy compatibility checks.
  779. msg = "the 'dtype' parameter is not supported"
  780. with pytest.raises(ValueError, match=msg):
  781. np.cumsum(SparseArray(data), dtype=np.int64)
  782. msg = "the 'out' parameter is not supported"
  783. with pytest.raises(ValueError, match=msg):
  784. np.cumsum(SparseArray(data), out=out)
  785. else:
  786. axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid.
  787. msg = "axis\\(={axis}\\) out of bounds".format(axis=axis)
  788. with pytest.raises(ValueError, match=msg):
  789. SparseArray(data).cumsum(axis=axis)
  790. def test_mean(self):
  791. data = np.arange(10).astype(float)
  792. out = SparseArray(data).mean()
  793. assert out == 4.5
  794. data[5] = np.nan
  795. out = SparseArray(data).mean()
  796. assert out == 40.0 / 9
  797. def test_numpy_mean(self):
  798. data = np.arange(10).astype(float)
  799. out = np.mean(SparseArray(data))
  800. assert out == 4.5
  801. data[5] = np.nan
  802. out = np.mean(SparseArray(data))
  803. assert out == 40.0 / 9
  804. msg = "the 'dtype' parameter is not supported"
  805. with pytest.raises(ValueError, match=msg):
  806. np.mean(SparseArray(data), dtype=np.int64)
  807. msg = "the 'out' parameter is not supported"
  808. with pytest.raises(ValueError, match=msg):
  809. np.mean(SparseArray(data), out=out)
  810. def test_ufunc(self):
  811. # GH 13853 make sure ufunc is applied to fill_value
  812. sparse = SparseArray([1, np.nan, 2, np.nan, -2])
  813. result = SparseArray([1, np.nan, 2, np.nan, 2])
  814. tm.assert_sp_array_equal(abs(sparse), result)
  815. tm.assert_sp_array_equal(np.abs(sparse), result)
  816. sparse = SparseArray([1, -1, 2, -2], fill_value=1)
  817. result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index,
  818. fill_value=1)
  819. tm.assert_sp_array_equal(abs(sparse), result)
  820. tm.assert_sp_array_equal(np.abs(sparse), result)
  821. sparse = SparseArray([1, -1, 2, -2], fill_value=-1)
  822. result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index,
  823. fill_value=1)
  824. tm.assert_sp_array_equal(abs(sparse), result)
  825. tm.assert_sp_array_equal(np.abs(sparse), result)
  826. sparse = SparseArray([1, np.nan, 2, np.nan, -2])
  827. result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2]))
  828. tm.assert_sp_array_equal(np.sin(sparse), result)
  829. sparse = SparseArray([1, -1, 2, -2], fill_value=1)
  830. result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1))
  831. tm.assert_sp_array_equal(np.sin(sparse), result)
  832. sparse = SparseArray([1, -1, 0, -2], fill_value=0)
  833. result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0))
  834. tm.assert_sp_array_equal(np.sin(sparse), result)
  835. def test_ufunc_args(self):
  836. # GH 13853 make sure ufunc is applied to fill_value, including its arg
  837. sparse = SparseArray([1, np.nan, 2, np.nan, -2])
  838. result = SparseArray([2, np.nan, 3, np.nan, -1])
  839. tm.assert_sp_array_equal(np.add(sparse, 1), result)
  840. sparse = SparseArray([1, -1, 2, -2], fill_value=1)
  841. result = SparseArray([2, 0, 3, -1], fill_value=2)
  842. tm.assert_sp_array_equal(np.add(sparse, 1), result)
  843. sparse = SparseArray([1, -1, 0, -2], fill_value=0)
  844. result = SparseArray([2, 0, 1, -1], fill_value=1)
  845. tm.assert_sp_array_equal(np.add(sparse, 1), result)
  846. def test_nbytes_integer(self):
  847. arr = SparseArray([1, 0, 0, 0, 2], kind='integer')
  848. result = arr.nbytes
  849. # (2 * 8) + 2 * 4
  850. assert result == 24
  851. def test_nbytes_block(self):
  852. arr = SparseArray([1, 2, 0, 0, 0], kind='block')
  853. result = arr.nbytes
  854. # (2 * 8) + 4 + 4
  855. # sp_values, blocs, blenghts
  856. assert result == 24
  857. def test_asarray_datetime64(self):
  858. s = pd.SparseArray(
  859. pd.to_datetime(['2012', None, None, '2013'])
  860. )
  861. np.asarray(s)
  862. def test_density(self):
  863. arr = SparseArray([0, 1])
  864. assert arr.density == 0.5
  865. def test_npoints(self):
  866. arr = SparseArray([0, 1])
  867. assert arr.npoints == 1
  868. class TestAccessor(object):
  869. @pytest.mark.parametrize('attr', [
  870. 'npoints', 'density', 'fill_value', 'sp_values',
  871. ])
  872. def test_get_attributes(self, attr):
  873. arr = SparseArray([0, 1])
  874. ser = pd.Series(arr)
  875. result = getattr(ser.sparse, attr)
  876. expected = getattr(arr, attr)
  877. assert result == expected
  878. def test_from_coo(self):
  879. sparse = pytest.importorskip("scipy.sparse")
  880. row = [0, 3, 1, 0]
  881. col = [0, 3, 1, 2]
  882. data = [4, 5, 7, 9]
  883. sp_array = sparse.coo_matrix((data, (row, col)))
  884. result = pd.Series.sparse.from_coo(sp_array)
  885. index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]])
  886. expected = pd.Series([4, 9, 7, 5], index=index, dtype='Sparse[int]')
  887. tm.assert_series_equal(result, expected)
  888. def test_to_coo(self):
  889. sparse = pytest.importorskip("scipy.sparse")
  890. ser = pd.Series([1, 2, 3],
  891. index=pd.MultiIndex.from_product([[0], [1, 2, 3]],
  892. names=['a', 'b']),
  893. dtype='Sparse[int]')
  894. A, _, _ = ser.sparse.to_coo()
  895. assert isinstance(A, sparse.coo.coo_matrix)
  896. def test_non_sparse_raises(self):
  897. ser = pd.Series([1, 2, 3])
  898. with pytest.raises(AttributeError, match='.sparse'):
  899. ser.sparse.density
  900. def test_setting_fill_value_fillna_still_works():
  901. # This is why letting users update fill_value / dtype is bad
  902. # astype has the same problem.
  903. arr = SparseArray([1., np.nan, 1.0], fill_value=0.0)
  904. arr.fill_value = np.nan
  905. result = arr.isna()
  906. # Can't do direct comparison, since the sp_index will be different
  907. # So let's convert to ndarray and check there.
  908. result = np.asarray(result)
  909. expected = np.array([False, True, False])
  910. tm.assert_numpy_array_equal(result, expected)
  911. def test_setting_fill_value_updates():
  912. arr = SparseArray([0.0, np.nan], fill_value=0)
  913. arr.fill_value = np.nan
  914. # use private constructor to get the index right
  915. # otherwise both nans would be un-stored.
  916. expected = SparseArray._simple_new(
  917. sparse_array=np.array([np.nan]),
  918. sparse_index=IntIndex(2, [1]),
  919. dtype=SparseDtype(float, np.nan),
  920. )
  921. tm.assert_sp_array_equal(arr, expected)
  922. @pytest.mark.parametrize("arr, loc", [
  923. ([None, 1, 2], 0),
  924. ([0, None, 2], 1),
  925. ([0, 1, None], 2),
  926. ([0, 1, 1, None, None], 3),
  927. ([1, 1, 1, 2], -1),
  928. ([], -1),
  929. ])
  930. def test_first_fill_value_loc(arr, loc):
  931. result = SparseArray(arr)._first_fill_value_loc()
  932. assert result == loc
  933. @pytest.mark.parametrize('arr', [
  934. [1, 2, np.nan, np.nan],
  935. [1, np.nan, 2, np.nan],
  936. [1, 2, np.nan],
  937. ])
  938. @pytest.mark.parametrize("fill_value", [
  939. np.nan, 0, 1
  940. ])
  941. def test_unique_na_fill(arr, fill_value):
  942. a = pd.SparseArray(arr, fill_value=fill_value).unique()
  943. b = pd.Series(arr).unique()
  944. assert isinstance(a, SparseArray)
  945. a = np.asarray(a)
  946. tm.assert_numpy_array_equal(a, b)
  947. def test_unique_all_sparse():
  948. # https://github.com/pandas-dev/pandas/issues/23168
  949. arr = SparseArray([0, 0])
  950. result = arr.unique()
  951. expected = SparseArray([0])
  952. tm.assert_sp_array_equal(result, expected)
  953. def test_map():
  954. arr = SparseArray([0, 1, 2])
  955. expected = SparseArray([10, 11, 12], fill_value=10)
  956. # dict
  957. result = arr.map({0: 10, 1: 11, 2: 12})
  958. tm.assert_sp_array_equal(result, expected)
  959. # series
  960. result = arr.map(pd.Series({0: 10, 1: 11, 2: 12}))
  961. tm.assert_sp_array_equal(result, expected)
  962. # function
  963. result = arr.map(pd.Series({0: 10, 1: 11, 2: 12}))
  964. expected = SparseArray([10, 11, 12], fill_value=10)
  965. tm.assert_sp_array_equal(result, expected)
  966. def test_map_missing():
  967. arr = SparseArray([0, 1, 2])
  968. expected = SparseArray([10, 11, None], fill_value=10)
  969. result = arr.map({0: 10, 1: 11})
  970. tm.assert_sp_array_equal(result, expected)