test_series.py 57 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523
  1. # pylint: disable-msg=E1101,W0612
  2. from datetime import datetime
  3. import operator
  4. import numpy as np
  5. from numpy import nan
  6. import pytest
  7. from pandas._libs.sparse import BlockIndex, IntIndex
  8. from pandas.compat import PY36, range
  9. from pandas.errors import PerformanceWarning
  10. import pandas.util._test_decorators as td
  11. import pandas as pd
  12. from pandas import (
  13. DataFrame, Series, SparseDtype, SparseSeries, bdate_range, compat, isna)
  14. from pandas.core.reshape.util import cartesian_product
  15. import pandas.core.sparse.frame as spf
  16. from pandas.tests.series.test_api import SharedWithSparse
  17. import pandas.util.testing as tm
  18. from pandas.tseries.offsets import BDay
  19. def _test_data1():
  20. # nan-based
  21. arr = np.arange(20, dtype=float)
  22. index = np.arange(20)
  23. arr[:2] = nan
  24. arr[5:10] = nan
  25. arr[-3:] = nan
  26. return arr, index
  27. def _test_data2():
  28. # nan-based
  29. arr = np.arange(15, dtype=float)
  30. index = np.arange(15)
  31. arr[7:12] = nan
  32. arr[-1:] = nan
  33. return arr, index
  34. def _test_data1_zero():
  35. # zero-based
  36. arr, index = _test_data1()
  37. arr[np.isnan(arr)] = 0
  38. return arr, index
  39. def _test_data2_zero():
  40. # zero-based
  41. arr, index = _test_data2()
  42. arr[np.isnan(arr)] = 0
  43. return arr, index
  44. class TestSparseSeries(SharedWithSparse):
  45. series_klass = SparseSeries
  46. # SharedWithSparse tests use generic, series_klass-agnostic assertion
  47. _assert_series_equal = staticmethod(tm.assert_sp_series_equal)
  48. def setup_method(self, method):
  49. arr, index = _test_data1()
  50. date_index = bdate_range('1/1/2011', periods=len(index))
  51. self.bseries = SparseSeries(arr, index=index, kind='block',
  52. name='bseries')
  53. self.ts = self.bseries
  54. self.btseries = SparseSeries(arr, index=date_index, kind='block')
  55. self.iseries = SparseSeries(arr, index=index, kind='integer',
  56. name='iseries')
  57. arr, index = _test_data2()
  58. self.bseries2 = SparseSeries(arr, index=index, kind='block')
  59. self.iseries2 = SparseSeries(arr, index=index, kind='integer')
  60. arr, index = _test_data1_zero()
  61. self.zbseries = SparseSeries(arr, index=index, kind='block',
  62. fill_value=0, name='zbseries')
  63. self.ziseries = SparseSeries(arr, index=index, kind='integer',
  64. fill_value=0)
  65. arr, index = _test_data2_zero()
  66. self.zbseries2 = SparseSeries(arr, index=index, kind='block',
  67. fill_value=0)
  68. self.ziseries2 = SparseSeries(arr, index=index, kind='integer',
  69. fill_value=0)
  70. def test_constructor_dict_input(self):
  71. # gh-16905
  72. constructor_dict = {1: 1.}
  73. index = [0, 1, 2]
  74. # Series with index passed in
  75. series = pd.Series(constructor_dict)
  76. expected = SparseSeries(series, index=index)
  77. result = SparseSeries(constructor_dict, index=index)
  78. tm.assert_sp_series_equal(result, expected)
  79. # Series with index and dictionary with no index
  80. expected = SparseSeries(series)
  81. result = SparseSeries(constructor_dict)
  82. tm.assert_sp_series_equal(result, expected)
  83. def test_constructor_dict_order(self):
  84. # GH19018
  85. # initialization ordering: by insertion order if python>= 3.6, else
  86. # order by value
  87. d = {'b': 1, 'a': 0, 'c': 2}
  88. result = SparseSeries(d)
  89. if PY36:
  90. expected = SparseSeries([1, 0, 2], index=list('bac'))
  91. else:
  92. expected = SparseSeries([0, 1, 2], index=list('abc'))
  93. tm.assert_sp_series_equal(result, expected)
  94. def test_constructor_dtype(self):
  95. arr = SparseSeries([np.nan, 1, 2, np.nan])
  96. assert arr.dtype == SparseDtype(np.float64)
  97. assert np.isnan(arr.fill_value)
  98. arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0)
  99. assert arr.dtype == SparseDtype(np.float64, 0)
  100. assert arr.fill_value == 0
  101. arr = SparseSeries([0, 1, 2, 4], dtype=np.int64, fill_value=np.nan)
  102. assert arr.dtype == SparseDtype(np.int64, np.nan)
  103. assert np.isnan(arr.fill_value)
  104. arr = SparseSeries([0, 1, 2, 4], dtype=np.int64)
  105. assert arr.dtype == SparseDtype(np.int64, 0)
  106. assert arr.fill_value == 0
  107. arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64)
  108. assert arr.dtype == SparseDtype(np.int64, 0)
  109. assert arr.fill_value == 0
  110. def test_iteration_and_str(self):
  111. [x for x in self.bseries]
  112. str(self.bseries)
  113. def test_construct_DataFrame_with_sp_series(self):
  114. # it works!
  115. df = DataFrame({'col': self.bseries})
  116. # printing & access
  117. df.iloc[:1]
  118. df['col']
  119. df.dtypes
  120. str(df)
  121. # blocking
  122. expected = Series({'col': 'float64:sparse'})
  123. result = df.ftypes
  124. tm.assert_series_equal(expected, result)
  125. def test_constructor_preserve_attr(self):
  126. arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0)
  127. assert arr.dtype == SparseDtype(np.int64)
  128. assert arr.fill_value == 0
  129. s = pd.SparseSeries(arr, name='x')
  130. assert s.dtype == SparseDtype(np.int64)
  131. assert s.fill_value == 0
  132. def test_series_density(self):
  133. # GH2803
  134. ts = Series(np.random.randn(10))
  135. ts[2:-2] = nan
  136. sts = ts.to_sparse()
  137. density = sts.density # don't die
  138. assert density == 4 / 10.0
  139. def test_sparse_to_dense(self):
  140. arr, index = _test_data1()
  141. series = self.bseries.to_dense()
  142. tm.assert_series_equal(series, Series(arr, name='bseries'))
  143. series = self.iseries.to_dense()
  144. tm.assert_series_equal(series, Series(arr, name='iseries'))
  145. arr, index = _test_data1_zero()
  146. series = self.zbseries.to_dense()
  147. tm.assert_series_equal(series, Series(arr, name='zbseries'))
  148. series = self.ziseries.to_dense()
  149. tm.assert_series_equal(series, Series(arr))
  150. def test_to_dense_fill_value(self):
  151. s = pd.Series([1, np.nan, np.nan, 3, np.nan])
  152. res = SparseSeries(s).to_dense()
  153. tm.assert_series_equal(res, s)
  154. res = SparseSeries(s, fill_value=0).to_dense()
  155. tm.assert_series_equal(res, s)
  156. s = pd.Series([1, np.nan, 0, 3, 0])
  157. res = SparseSeries(s, fill_value=0).to_dense()
  158. tm.assert_series_equal(res, s)
  159. res = SparseSeries(s, fill_value=0).to_dense()
  160. tm.assert_series_equal(res, s)
  161. s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
  162. res = SparseSeries(s).to_dense()
  163. tm.assert_series_equal(res, s)
  164. s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
  165. res = SparseSeries(s, fill_value=0).to_dense()
  166. tm.assert_series_equal(res, s)
  167. def test_dense_to_sparse(self):
  168. series = self.bseries.to_dense()
  169. bseries = series.to_sparse(kind='block')
  170. iseries = series.to_sparse(kind='integer')
  171. tm.assert_sp_series_equal(bseries, self.bseries)
  172. tm.assert_sp_series_equal(iseries, self.iseries, check_names=False)
  173. assert iseries.name == self.bseries.name
  174. assert len(series) == len(bseries)
  175. assert len(series) == len(iseries)
  176. assert series.shape == bseries.shape
  177. assert series.shape == iseries.shape
  178. # non-NaN fill value
  179. series = self.zbseries.to_dense()
  180. zbseries = series.to_sparse(kind='block', fill_value=0)
  181. ziseries = series.to_sparse(kind='integer', fill_value=0)
  182. tm.assert_sp_series_equal(zbseries, self.zbseries)
  183. tm.assert_sp_series_equal(ziseries, self.ziseries, check_names=False)
  184. assert ziseries.name == self.zbseries.name
  185. assert len(series) == len(zbseries)
  186. assert len(series) == len(ziseries)
  187. assert series.shape == zbseries.shape
  188. assert series.shape == ziseries.shape
  189. def test_to_dense_preserve_name(self):
  190. assert (self.bseries.name is not None)
  191. result = self.bseries.to_dense()
  192. assert result.name == self.bseries.name
  193. def test_constructor(self):
  194. # test setup guys
  195. assert np.isnan(self.bseries.fill_value)
  196. assert isinstance(self.bseries.sp_index, BlockIndex)
  197. assert np.isnan(self.iseries.fill_value)
  198. assert isinstance(self.iseries.sp_index, IntIndex)
  199. assert self.zbseries.fill_value == 0
  200. tm.assert_numpy_array_equal(self.zbseries.values.values,
  201. self.bseries.to_dense().fillna(0).values)
  202. # pass SparseSeries
  203. def _check_const(sparse, name):
  204. # use passed series name
  205. result = SparseSeries(sparse)
  206. tm.assert_sp_series_equal(result, sparse)
  207. assert sparse.name == name
  208. assert result.name == name
  209. # use passed name
  210. result = SparseSeries(sparse, name='x')
  211. tm.assert_sp_series_equal(result, sparse, check_names=False)
  212. assert result.name == 'x'
  213. _check_const(self.bseries, 'bseries')
  214. _check_const(self.iseries, 'iseries')
  215. _check_const(self.zbseries, 'zbseries')
  216. # Sparse time series works
  217. date_index = bdate_range('1/1/2000', periods=len(self.bseries))
  218. s5 = SparseSeries(self.bseries, index=date_index)
  219. assert isinstance(s5, SparseSeries)
  220. # pass Series
  221. bseries2 = SparseSeries(self.bseries.to_dense())
  222. tm.assert_numpy_array_equal(self.bseries.sp_values, bseries2.sp_values)
  223. # pass dict?
  224. # don't copy the data by default
  225. values = np.ones(self.bseries.npoints)
  226. sp = SparseSeries(values, sparse_index=self.bseries.sp_index)
  227. sp.sp_values[:5] = 97
  228. assert values[0] == 97
  229. assert len(sp) == 20
  230. assert sp.shape == (20, )
  231. # but can make it copy!
  232. sp = SparseSeries(values, sparse_index=self.bseries.sp_index,
  233. copy=True)
  234. sp.sp_values[:5] = 100
  235. assert values[0] == 97
  236. assert len(sp) == 20
  237. assert sp.shape == (20, )
  238. def test_constructor_scalar(self):
  239. data = 5
  240. sp = SparseSeries(data, np.arange(100))
  241. sp = sp.reindex(np.arange(200))
  242. assert (sp.loc[:99] == data).all()
  243. assert isna(sp.loc[100:]).all()
  244. data = np.nan
  245. sp = SparseSeries(data, np.arange(100))
  246. assert len(sp) == 100
  247. assert sp.shape == (100, )
  248. def test_constructor_ndarray(self):
  249. pass
  250. def test_constructor_nonnan(self):
  251. arr = [0, 0, 0, nan, nan]
  252. sp_series = SparseSeries(arr, fill_value=0)
  253. tm.assert_numpy_array_equal(sp_series.values.values, np.array(arr))
  254. assert len(sp_series) == 5
  255. assert sp_series.shape == (5, )
  256. def test_constructor_empty(self):
  257. # see gh-9272
  258. sp = SparseSeries()
  259. assert len(sp.index) == 0
  260. assert sp.shape == (0, )
  261. def test_copy_astype(self):
  262. cop = self.bseries.astype(np.float64)
  263. assert cop is not self.bseries
  264. assert cop.sp_index is self.bseries.sp_index
  265. assert cop.dtype == SparseDtype(np.float64)
  266. cop2 = self.iseries.copy()
  267. tm.assert_sp_series_equal(cop, self.bseries)
  268. tm.assert_sp_series_equal(cop2, self.iseries)
  269. # test that data is copied
  270. cop[:5] = 97
  271. assert cop.sp_values[0] == 97
  272. assert self.bseries.sp_values[0] != 97
  273. # correct fill value
  274. zbcop = self.zbseries.copy()
  275. zicop = self.ziseries.copy()
  276. tm.assert_sp_series_equal(zbcop, self.zbseries)
  277. tm.assert_sp_series_equal(zicop, self.ziseries)
  278. # no deep copy
  279. view = self.bseries.copy(deep=False)
  280. view.sp_values[:5] = 5
  281. assert (self.bseries.sp_values[:5] == 5).all()
  282. def test_shape(self):
  283. # see gh-10452
  284. assert self.bseries.shape == (20, )
  285. assert self.btseries.shape == (20, )
  286. assert self.iseries.shape == (20, )
  287. assert self.bseries2.shape == (15, )
  288. assert self.iseries2.shape == (15, )
  289. assert self.zbseries2.shape == (15, )
  290. assert self.ziseries2.shape == (15, )
  291. def test_astype(self):
  292. result = self.bseries.astype(SparseDtype(np.int64, 0))
  293. expected = (self.bseries.to_dense()
  294. .fillna(0)
  295. .astype(np.int64)
  296. .to_sparse(fill_value=0))
  297. tm.assert_sp_series_equal(result, expected)
  298. def test_astype_all(self):
  299. orig = pd.Series(np.array([1, 2, 3]))
  300. s = SparseSeries(orig)
  301. types = [np.float64, np.float32, np.int64,
  302. np.int32, np.int16, np.int8]
  303. for typ in types:
  304. dtype = SparseDtype(typ)
  305. res = s.astype(dtype)
  306. assert res.dtype == dtype
  307. tm.assert_series_equal(res.to_dense(), orig.astype(typ))
  308. def test_kind(self):
  309. assert self.bseries.kind == 'block'
  310. assert self.iseries.kind == 'integer'
  311. def test_to_frame(self):
  312. # GH 9850
  313. s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x')
  314. exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]})
  315. tm.assert_sp_frame_equal(s.to_frame(), exp)
  316. exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
  317. tm.assert_sp_frame_equal(s.to_frame(name='y'), exp)
  318. s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0)
  319. exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]},
  320. default_fill_value=0)
  321. tm.assert_sp_frame_equal(s.to_frame(), exp)
  322. exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
  323. tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp)
  324. def test_pickle(self):
  325. def _test_roundtrip(series):
  326. unpickled = tm.round_trip_pickle(series)
  327. tm.assert_sp_series_equal(series, unpickled)
  328. tm.assert_series_equal(series.to_dense(), unpickled.to_dense())
  329. self._check_all(_test_roundtrip)
  330. def _check_all(self, check_func):
  331. check_func(self.bseries)
  332. check_func(self.iseries)
  333. check_func(self.zbseries)
  334. check_func(self.ziseries)
  335. def test_getitem(self):
  336. def _check_getitem(sp, dense):
  337. for idx, val in compat.iteritems(dense):
  338. tm.assert_almost_equal(val, sp[idx])
  339. for i in range(len(dense)):
  340. tm.assert_almost_equal(sp[i], dense[i])
  341. # j = np.float64(i)
  342. # assert_almost_equal(sp[j], dense[j])
  343. # API change 1/6/2012
  344. # negative getitem works
  345. # for i in xrange(len(dense)):
  346. # assert_almost_equal(sp[-i], dense[-i])
  347. _check_getitem(self.bseries, self.bseries.to_dense())
  348. _check_getitem(self.btseries, self.btseries.to_dense())
  349. _check_getitem(self.zbseries, self.zbseries.to_dense())
  350. _check_getitem(self.iseries, self.iseries.to_dense())
  351. _check_getitem(self.ziseries, self.ziseries.to_dense())
  352. # exception handling
  353. pytest.raises(Exception, self.bseries.__getitem__,
  354. len(self.bseries) + 1)
  355. # index not contained
  356. pytest.raises(Exception, self.btseries.__getitem__,
  357. self.btseries.index[-1] + BDay())
  358. def test_get_get_value(self):
  359. tm.assert_almost_equal(self.bseries.get(10), self.bseries[10])
  360. assert self.bseries.get(len(self.bseries) + 1) is None
  361. dt = self.btseries.index[10]
  362. result = self.btseries.get(dt)
  363. expected = self.btseries.to_dense()[dt]
  364. tm.assert_almost_equal(result, expected)
  365. with tm.assert_produces_warning(FutureWarning,
  366. check_stacklevel=False):
  367. tm.assert_almost_equal(
  368. self.bseries.get_value(10), self.bseries[10])
  369. def test_set_value(self):
  370. idx = self.btseries.index[7]
  371. with tm.assert_produces_warning(FutureWarning,
  372. check_stacklevel=False):
  373. self.btseries.set_value(idx, 0)
  374. assert self.btseries[idx] == 0
  375. with tm.assert_produces_warning(FutureWarning,
  376. check_stacklevel=False):
  377. self.iseries.set_value('foobar', 0)
  378. assert self.iseries.index[-1] == 'foobar'
  379. assert self.iseries['foobar'] == 0
  380. def test_getitem_slice(self):
  381. idx = self.bseries.index
  382. res = self.bseries[::2]
  383. assert isinstance(res, SparseSeries)
  384. expected = self.bseries.reindex(idx[::2])
  385. tm.assert_sp_series_equal(res, expected)
  386. res = self.bseries[:5]
  387. assert isinstance(res, SparseSeries)
  388. tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:5]))
  389. res = self.bseries[5:]
  390. tm.assert_sp_series_equal(res, self.bseries.reindex(idx[5:]))
  391. # negative indices
  392. res = self.bseries[:-3]
  393. tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:-3]))
  394. def test_take(self):
  395. def _compare_with_dense(sp):
  396. dense = sp.to_dense()
  397. def _compare(idx):
  398. dense_result = dense.take(idx).values
  399. sparse_result = sp.take(idx)
  400. assert isinstance(sparse_result, SparseSeries)
  401. tm.assert_almost_equal(dense_result,
  402. sparse_result.values.values)
  403. _compare([1., 2., 3., 4., 5., 0.])
  404. _compare([7, 2, 9, 0, 4])
  405. _compare([3, 6, 3, 4, 7])
  406. self._check_all(_compare_with_dense)
  407. pytest.raises(Exception, self.bseries.take,
  408. [0, len(self.bseries) + 1])
  409. # Corner case
  410. # XXX: changed test. Why wsa this considered a corner case?
  411. sp = SparseSeries(np.ones(10) * nan)
  412. exp = pd.Series(np.repeat(nan, 5))
  413. tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp.to_sparse())
  414. with tm.assert_produces_warning(FutureWarning):
  415. sp.take([1, 5], convert=True)
  416. with tm.assert_produces_warning(FutureWarning):
  417. sp.take([1, 5], convert=False)
  418. def test_numpy_take(self):
  419. sp = SparseSeries([1.0, 2.0, 3.0])
  420. indices = [1, 2]
  421. tm.assert_series_equal(np.take(sp, indices, axis=0).to_dense(),
  422. np.take(sp.to_dense(), indices, axis=0))
  423. msg = "the 'out' parameter is not supported"
  424. with pytest.raises(ValueError, match=msg):
  425. np.take(sp, indices, out=np.empty(sp.shape))
  426. msg = "the 'mode' parameter is not supported"
  427. with pytest.raises(ValueError, match=msg):
  428. np.take(sp, indices, out=None, mode='clip')
  429. def test_setitem(self):
  430. self.bseries[5] = 7.
  431. assert self.bseries[5] == 7.
  432. def test_setslice(self):
  433. self.bseries[5:10] = 7.
  434. tm.assert_series_equal(self.bseries[5:10].to_dense(),
  435. Series(7., index=range(5, 10),
  436. name=self.bseries.name))
  437. def test_operators(self):
  438. def _check_op(a, b, op):
  439. sp_result = op(a, b)
  440. adense = a.to_dense() if isinstance(a, SparseSeries) else a
  441. bdense = b.to_dense() if isinstance(b, SparseSeries) else b
  442. dense_result = op(adense, bdense)
  443. tm.assert_almost_equal(sp_result.to_dense(), dense_result)
  444. def check(a, b):
  445. _check_op(a, b, operator.add)
  446. _check_op(a, b, operator.sub)
  447. _check_op(a, b, operator.truediv)
  448. _check_op(a, b, operator.floordiv)
  449. _check_op(a, b, operator.mul)
  450. _check_op(a, b, lambda x, y: operator.add(y, x))
  451. _check_op(a, b, lambda x, y: operator.sub(y, x))
  452. _check_op(a, b, lambda x, y: operator.truediv(y, x))
  453. _check_op(a, b, lambda x, y: operator.floordiv(y, x))
  454. _check_op(a, b, lambda x, y: operator.mul(y, x))
  455. # NaN ** 0 = 1 in C?
  456. # _check_op(a, b, operator.pow)
  457. # _check_op(a, b, lambda x, y: operator.pow(y, x))
  458. check(self.bseries, self.bseries)
  459. check(self.iseries, self.iseries)
  460. check(self.bseries, self.iseries)
  461. check(self.bseries, self.bseries2)
  462. check(self.bseries, self.iseries2)
  463. check(self.iseries, self.iseries2)
  464. # scalar value
  465. check(self.bseries, 5)
  466. # zero-based
  467. check(self.zbseries, self.zbseries * 2)
  468. check(self.zbseries, self.zbseries2)
  469. check(self.ziseries, self.ziseries2)
  470. # with dense
  471. result = self.bseries + self.bseries.to_dense()
  472. tm.assert_sp_series_equal(result, self.bseries + self.bseries)
  473. def test_binary_operators(self):
  474. # skipping for now #####
  475. import pytest
  476. pytest.skip("skipping sparse binary operators test")
  477. def _check_inplace_op(iop, op):
  478. tmp = self.bseries.copy()
  479. expected = op(tmp, self.bseries)
  480. iop(tmp, self.bseries)
  481. tm.assert_sp_series_equal(tmp, expected)
  482. inplace_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow']
  483. for op in inplace_ops:
  484. _check_inplace_op(getattr(operator, "i%s" % op),
  485. getattr(operator, op))
  486. @pytest.mark.parametrize("values, op, fill_value", [
  487. ([True, False, False, True], operator.invert, True),
  488. ([True, False, False, True], operator.invert, False),
  489. ([0, 1, 2, 3], operator.pos, 0),
  490. ([0, 1, 2, 3], operator.neg, 0),
  491. ([0, np.nan, 2, 3], operator.pos, np.nan),
  492. ([0, np.nan, 2, 3], operator.neg, np.nan),
  493. ])
  494. def test_unary_operators(self, values, op, fill_value):
  495. # https://github.com/pandas-dev/pandas/issues/22835
  496. values = np.asarray(values)
  497. if op is operator.invert:
  498. new_fill_value = not fill_value
  499. else:
  500. new_fill_value = op(fill_value)
  501. s = SparseSeries(values,
  502. fill_value=fill_value,
  503. index=['a', 'b', 'c', 'd'],
  504. name='name')
  505. result = op(s)
  506. expected = SparseSeries(op(values),
  507. fill_value=new_fill_value,
  508. index=['a', 'b', 'c', 'd'],
  509. name='name')
  510. tm.assert_sp_series_equal(result, expected)
  511. def test_abs(self):
  512. s = SparseSeries([1, 2, -3], name='x')
  513. expected = SparseSeries([1, 2, 3], name='x')
  514. result = s.abs()
  515. tm.assert_sp_series_equal(result, expected)
  516. assert result.name == 'x'
  517. result = abs(s)
  518. tm.assert_sp_series_equal(result, expected)
  519. assert result.name == 'x'
  520. result = np.abs(s)
  521. tm.assert_sp_series_equal(result, expected)
  522. assert result.name == 'x'
  523. s = SparseSeries([1, -2, 2, -3], fill_value=-2, name='x')
  524. expected = SparseSeries([1, 2, 3], sparse_index=s.sp_index,
  525. fill_value=2, name='x')
  526. result = s.abs()
  527. tm.assert_sp_series_equal(result, expected)
  528. assert result.name == 'x'
  529. result = abs(s)
  530. tm.assert_sp_series_equal(result, expected)
  531. assert result.name == 'x'
  532. result = np.abs(s)
  533. tm.assert_sp_series_equal(result, expected)
  534. assert result.name == 'x'
  535. def test_reindex(self):
  536. def _compare_with_series(sps, new_index):
  537. spsre = sps.reindex(new_index)
  538. series = sps.to_dense()
  539. seriesre = series.reindex(new_index)
  540. seriesre = seriesre.to_sparse(fill_value=sps.fill_value)
  541. tm.assert_sp_series_equal(spsre, seriesre)
  542. tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense())
  543. _compare_with_series(self.bseries, self.bseries.index[::2])
  544. _compare_with_series(self.bseries, list(self.bseries.index[::2]))
  545. _compare_with_series(self.bseries, self.bseries.index[:10])
  546. _compare_with_series(self.bseries, self.bseries.index[5:])
  547. _compare_with_series(self.zbseries, self.zbseries.index[::2])
  548. _compare_with_series(self.zbseries, self.zbseries.index[:10])
  549. _compare_with_series(self.zbseries, self.zbseries.index[5:])
  550. # special cases
  551. same_index = self.bseries.reindex(self.bseries.index)
  552. tm.assert_sp_series_equal(self.bseries, same_index)
  553. assert same_index is not self.bseries
  554. # corner cases
  555. sp = SparseSeries([], index=[])
  556. # TODO: sp_zero is not used anywhere...remove?
  557. sp_zero = SparseSeries([], index=[], fill_value=0) # noqa
  558. _compare_with_series(sp, np.arange(10))
  559. # with copy=False
  560. reindexed = self.bseries.reindex(self.bseries.index, copy=True)
  561. reindexed.sp_values[:] = 1.
  562. assert (self.bseries.sp_values != 1.).all()
  563. reindexed = self.bseries.reindex(self.bseries.index, copy=False)
  564. reindexed.sp_values[:] = 1.
  565. tm.assert_numpy_array_equal(self.bseries.sp_values, np.repeat(1., 10))
  566. def test_sparse_reindex(self):
  567. length = 10
  568. def _check(values, index1, index2, fill_value):
  569. first_series = SparseSeries(values, sparse_index=index1,
  570. fill_value=fill_value)
  571. reindexed = first_series.sparse_reindex(index2)
  572. assert reindexed.sp_index is index2
  573. int_indices1 = index1.to_int_index().indices
  574. int_indices2 = index2.to_int_index().indices
  575. expected = Series(values, index=int_indices1)
  576. expected = expected.reindex(int_indices2).fillna(fill_value)
  577. tm.assert_almost_equal(expected.values, reindexed.sp_values)
  578. # make sure level argument asserts
  579. # TODO: expected is not used anywhere...remove?
  580. expected = expected.reindex(int_indices2).fillna(fill_value) # noqa
  581. def _check_with_fill_value(values, first, second, fill_value=nan):
  582. i_index1 = IntIndex(length, first)
  583. i_index2 = IntIndex(length, second)
  584. b_index1 = i_index1.to_block_index()
  585. b_index2 = i_index2.to_block_index()
  586. _check(values, i_index1, i_index2, fill_value)
  587. _check(values, b_index1, b_index2, fill_value)
  588. def _check_all(values, first, second):
  589. _check_with_fill_value(values, first, second, fill_value=nan)
  590. _check_with_fill_value(values, first, second, fill_value=0)
  591. index1 = [2, 4, 5, 6, 8, 9]
  592. values1 = np.arange(6.)
  593. _check_all(values1, index1, [2, 4, 5])
  594. _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9])
  595. _check_all(values1, index1, [0, 1])
  596. _check_all(values1, index1, [0, 1, 7, 8, 9])
  597. _check_all(values1, index1, [])
  598. first_series = SparseSeries(values1,
  599. sparse_index=IntIndex(length, index1),
  600. fill_value=nan)
  601. with pytest.raises(TypeError,
  602. match='new index must be a SparseIndex'):
  603. first_series.sparse_reindex(0)
  604. def test_repr(self):
  605. # TODO: These aren't used
  606. bsrepr = repr(self.bseries) # noqa
  607. isrepr = repr(self.iseries) # noqa
  608. def test_iter(self):
  609. pass
  610. def test_truncate(self):
  611. pass
  612. def test_fillna(self):
  613. pass
  614. def test_groupby(self):
  615. pass
  616. def test_reductions(self):
  617. def _compare_with_dense(obj, op):
  618. sparse_result = getattr(obj, op)()
  619. series = obj.to_dense()
  620. dense_result = getattr(series, op)()
  621. assert sparse_result == dense_result
  622. to_compare = ['count', 'sum', 'mean', 'std', 'var', 'skew']
  623. def _compare_all(obj):
  624. for op in to_compare:
  625. _compare_with_dense(obj, op)
  626. _compare_all(self.bseries)
  627. self.bseries.sp_values[5:10] = np.NaN
  628. _compare_all(self.bseries)
  629. _compare_all(self.zbseries)
  630. self.zbseries.sp_values[5:10] = np.NaN
  631. _compare_all(self.zbseries)
  632. series = self.zbseries.copy()
  633. series.fill_value = 2
  634. _compare_all(series)
  635. nonna = Series(np.random.randn(20)).to_sparse()
  636. _compare_all(nonna)
  637. nonna2 = Series(np.random.randn(20)).to_sparse(fill_value=0)
  638. _compare_all(nonna2)
  639. def test_dropna(self):
  640. sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0)
  641. sp_valid = sp.dropna()
  642. expected = sp.to_dense().dropna()
  643. expected = expected[expected != 0]
  644. exp_arr = pd.SparseArray(expected.values, fill_value=0, kind='block')
  645. tm.assert_sp_array_equal(sp_valid.values, exp_arr)
  646. tm.assert_index_equal(sp_valid.index, expected.index)
  647. assert len(sp_valid.sp_values) == 2
  648. result = self.bseries.dropna()
  649. expected = self.bseries.to_dense().dropna()
  650. assert not isinstance(result, SparseSeries)
  651. tm.assert_series_equal(result, expected)
  652. def test_homogenize(self):
  653. def _check_matches(indices, expected):
  654. data = {i: SparseSeries(idx.to_int_index().indices,
  655. sparse_index=idx, fill_value=np.nan)
  656. for i, idx in enumerate(indices)}
  657. # homogenized is only valid with NaN fill values
  658. homogenized = spf.homogenize(data)
  659. for k, v in compat.iteritems(homogenized):
  660. assert (v.sp_index.equals(expected))
  661. indices1 = [BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]),
  662. BlockIndex(10, [0], [10])]
  663. expected1 = BlockIndex(10, [2, 6], [2, 3])
  664. _check_matches(indices1, expected1)
  665. indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])]
  666. expected2 = indices2[0]
  667. _check_matches(indices2, expected2)
  668. # must have NaN fill value
  669. data = {'a': SparseSeries(np.arange(7), sparse_index=expected2,
  670. fill_value=0)}
  671. with pytest.raises(TypeError, match="NaN fill value"):
  672. spf.homogenize(data)
  673. def test_fill_value_corner(self):
  674. cop = self.zbseries.copy()
  675. cop.fill_value = 0
  676. result = self.bseries / cop
  677. assert np.isnan(result.fill_value)
  678. cop2 = self.zbseries.copy()
  679. cop2.fill_value = 1
  680. result = cop2 / cop
  681. # 1 / 0 is inf
  682. assert np.isinf(result.fill_value)
  683. def test_fill_value_when_combine_const(self):
  684. # GH12723
  685. s = SparseSeries([0, 1, np.nan, 3, 4, 5], index=np.arange(6))
  686. exp = s.fillna(0).add(2)
  687. res = s.add(2, fill_value=0)
  688. tm.assert_series_equal(res, exp)
  689. def test_shift(self):
  690. series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6))
  691. shifted = series.shift(0)
  692. # assert shifted is not series
  693. tm.assert_sp_series_equal(shifted, series)
  694. f = lambda s: s.shift(1)
  695. _dense_series_compare(series, f)
  696. f = lambda s: s.shift(-2)
  697. _dense_series_compare(series, f)
  698. series = SparseSeries([nan, 1., 2., 3., nan, nan],
  699. index=bdate_range('1/1/2000', periods=6))
  700. f = lambda s: s.shift(2, freq='B')
  701. _dense_series_compare(series, f)
  702. f = lambda s: s.shift(2, freq=BDay())
  703. _dense_series_compare(series, f)
  704. def test_shift_nan(self):
  705. # GH 12908
  706. orig = pd.Series([np.nan, 2, np.nan, 4, 0, np.nan, 0])
  707. sparse = orig.to_sparse()
  708. tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse(),
  709. check_kind=False)
  710. tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse(),
  711. check_kind=False)
  712. tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse(),
  713. check_kind=False)
  714. tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse(),
  715. check_kind=False)
  716. tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse())
  717. tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse())
  718. tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse())
  719. tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse())
  720. sparse = orig.to_sparse(fill_value=0)
  721. tm.assert_sp_series_equal(
  722. sparse.shift(0),
  723. orig.shift(0).to_sparse(fill_value=sparse.fill_value)
  724. )
  725. tm.assert_sp_series_equal(sparse.shift(1),
  726. orig.shift(1).to_sparse(fill_value=0),
  727. check_kind=False)
  728. tm.assert_sp_series_equal(sparse.shift(2),
  729. orig.shift(2).to_sparse(fill_value=0),
  730. check_kind=False)
  731. tm.assert_sp_series_equal(sparse.shift(3),
  732. orig.shift(3).to_sparse(fill_value=0),
  733. check_kind=False)
  734. tm.assert_sp_series_equal(sparse.shift(-1),
  735. orig.shift(-1).to_sparse(fill_value=0),
  736. check_kind=False)
  737. tm.assert_sp_series_equal(sparse.shift(-2),
  738. orig.shift(-2).to_sparse(fill_value=0),
  739. check_kind=False)
  740. tm.assert_sp_series_equal(sparse.shift(-3),
  741. orig.shift(-3).to_sparse(fill_value=0),
  742. check_kind=False)
  743. tm.assert_sp_series_equal(sparse.shift(-4),
  744. orig.shift(-4).to_sparse(fill_value=0),
  745. check_kind=False)
  746. def test_shift_dtype(self):
  747. # GH 12908
  748. orig = pd.Series([1, 2, 3, 4], dtype=np.int64)
  749. sparse = orig.to_sparse()
  750. tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse())
  751. sparse = orig.to_sparse(fill_value=np.nan)
  752. tm.assert_sp_series_equal(sparse.shift(0),
  753. orig.shift(0).to_sparse(fill_value=np.nan))
  754. # shift(1) or more span changes dtype to float64
  755. # XXX: SparseSeries doesn't need to shift dtype here.
  756. # Do we want to astype in shift, for backwards compat?
  757. # If not, document it.
  758. tm.assert_sp_series_equal(sparse.shift(1).astype('f8'),
  759. orig.shift(1).to_sparse(kind='integer'))
  760. tm.assert_sp_series_equal(sparse.shift(2).astype('f8'),
  761. orig.shift(2).to_sparse(kind='integer'))
  762. tm.assert_sp_series_equal(sparse.shift(3).astype('f8'),
  763. orig.shift(3).to_sparse(kind='integer'))
  764. tm.assert_sp_series_equal(sparse.shift(-1).astype('f8'),
  765. orig.shift(-1).to_sparse(),
  766. check_kind=False)
  767. tm.assert_sp_series_equal(sparse.shift(-2).astype('f8'),
  768. orig.shift(-2).to_sparse(),
  769. check_kind=False)
  770. tm.assert_sp_series_equal(sparse.shift(-3).astype('f8'),
  771. orig.shift(-3).to_sparse(),
  772. check_kind=False)
  773. tm.assert_sp_series_equal(sparse.shift(-4).astype('f8'),
  774. orig.shift(-4).to_sparse(),
  775. check_kind=False)
  776. @pytest.mark.parametrize("fill_value", [
  777. 0,
  778. 1,
  779. np.nan
  780. ])
  781. @pytest.mark.parametrize("periods", [0, 1, 2, 3, -1, -2, -3, -4])
  782. def test_shift_dtype_fill_value(self, fill_value, periods):
  783. # GH 12908
  784. orig = pd.Series([1, 0, 0, 4], dtype=np.dtype('int64'))
  785. sparse = orig.to_sparse(fill_value=fill_value)
  786. result = sparse.shift(periods)
  787. expected = orig.shift(periods).to_sparse(fill_value=fill_value)
  788. tm.assert_sp_series_equal(result, expected,
  789. check_kind=False,
  790. consolidate_block_indices=True)
  791. def test_combine_first(self):
  792. s = self.bseries
  793. result = s[::2].combine_first(s)
  794. result2 = s[::2].combine_first(s.to_dense())
  795. expected = s[::2].to_dense().combine_first(s.to_dense())
  796. expected = expected.to_sparse(fill_value=s.fill_value)
  797. tm.assert_sp_series_equal(result, result2)
  798. tm.assert_sp_series_equal(result, expected)
  799. @pytest.mark.parametrize('deep', [True, False])
  800. @pytest.mark.parametrize('fill_value', [0, 1, np.nan, None])
  801. def test_memory_usage_deep(self, deep, fill_value):
  802. values = [1.0] + [fill_value] * 20
  803. sparse_series = SparseSeries(values, fill_value=fill_value)
  804. dense_series = Series(values)
  805. sparse_usage = sparse_series.memory_usage(deep=deep)
  806. dense_usage = dense_series.memory_usage(deep=deep)
  807. assert sparse_usage < dense_usage
  808. class TestSparseHandlingMultiIndexes(object):
  809. def setup_method(self, method):
  810. miindex = pd.MultiIndex.from_product(
  811. [["x", "y"], ["10", "20"]], names=['row-foo', 'row-bar'])
  812. micol = pd.MultiIndex.from_product(
  813. [['a', 'b', 'c'], ["1", "2"]], names=['col-foo', 'col-bar'])
  814. dense_multiindex_frame = pd.DataFrame(
  815. index=miindex, columns=micol).sort_index().sort_index(axis=1)
  816. self.dense_multiindex_frame = dense_multiindex_frame.fillna(value=3.14)
  817. def test_to_sparse_preserve_multiindex_names_columns(self):
  818. sparse_multiindex_frame = self.dense_multiindex_frame.to_sparse()
  819. sparse_multiindex_frame = sparse_multiindex_frame.copy()
  820. tm.assert_index_equal(sparse_multiindex_frame.columns,
  821. self.dense_multiindex_frame.columns)
  822. def test_round_trip_preserve_multiindex_names(self):
  823. sparse_multiindex_frame = self.dense_multiindex_frame.to_sparse()
  824. round_trip_multiindex_frame = sparse_multiindex_frame.to_dense()
  825. tm.assert_frame_equal(self.dense_multiindex_frame,
  826. round_trip_multiindex_frame,
  827. check_column_type=True,
  828. check_names=True)
  829. @td.skip_if_no_scipy
  830. @pytest.mark.filterwarnings(
  831. "ignore:the matrix subclass:PendingDeprecationWarning"
  832. )
  833. class TestSparseSeriesScipyInteraction(object):
  834. # Issue 8048: add SparseSeries coo methods
  835. def setup_method(self, method):
  836. import scipy.sparse
  837. # SparseSeries inputs used in tests, the tests rely on the order
  838. self.sparse_series = []
  839. s = pd.Series([3.0, nan, 1.0, 2.0, nan, nan])
  840. s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0),
  841. (1, 2, 'a', 1),
  842. (1, 1, 'b', 0),
  843. (1, 1, 'b', 1),
  844. (2, 1, 'b', 0),
  845. (2, 1, 'b', 1)],
  846. names=['A', 'B', 'C', 'D'])
  847. self.sparse_series.append(s.to_sparse())
  848. ss = self.sparse_series[0].copy()
  849. ss.index.names = [3, 0, 1, 2]
  850. self.sparse_series.append(ss)
  851. ss = pd.Series([
  852. nan
  853. ] * 12, index=cartesian_product((range(3), range(4)))).to_sparse()
  854. for k, v in zip([(0, 0), (1, 2), (1, 3)], [3.0, 1.0, 2.0]):
  855. ss[k] = v
  856. self.sparse_series.append(ss)
  857. # results used in tests
  858. self.coo_matrices = []
  859. self.coo_matrices.append(scipy.sparse.coo_matrix(
  860. ([3.0, 1.0, 2.0], ([0, 1, 1], [0, 2, 3])), shape=(3, 4)))
  861. self.coo_matrices.append(scipy.sparse.coo_matrix(
  862. ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4)))
  863. self.coo_matrices.append(scipy.sparse.coo_matrix(
  864. ([3.0, 1.0, 2.0], ([0, 1, 1], [0, 0, 1])), shape=(3, 2)))
  865. self.ils = [[(1, 2), (1, 1), (2, 1)], [(1, 1), (1, 2), (2, 1)],
  866. [(1, 2, 'a'), (1, 1, 'b'), (2, 1, 'b')]]
  867. self.jls = [[('a', 0), ('a', 1), ('b', 0), ('b', 1)], [0, 1]]
  868. def test_to_coo_text_names_integer_row_levels_nosort(self):
  869. ss = self.sparse_series[0]
  870. kwargs = {'row_levels': [0, 1], 'column_levels': [2, 3]}
  871. result = (self.coo_matrices[0], self.ils[0], self.jls[0])
  872. self._run_test(ss, kwargs, result)
  873. def test_to_coo_text_names_integer_row_levels_sort(self):
  874. ss = self.sparse_series[0]
  875. kwargs = {'row_levels': [0, 1],
  876. 'column_levels': [2, 3],
  877. 'sort_labels': True}
  878. result = (self.coo_matrices[1], self.ils[1], self.jls[0])
  879. self._run_test(ss, kwargs, result)
  880. def test_to_coo_text_names_text_row_levels_nosort_col_level_single(self):
  881. ss = self.sparse_series[0]
  882. kwargs = {'row_levels': ['A', 'B', 'C'],
  883. 'column_levels': ['D'],
  884. 'sort_labels': False}
  885. result = (self.coo_matrices[2], self.ils[2], self.jls[1])
  886. self._run_test(ss, kwargs, result)
  887. def test_to_coo_integer_names_integer_row_levels_nosort(self):
  888. ss = self.sparse_series[1]
  889. kwargs = {'row_levels': [3, 0], 'column_levels': [1, 2]}
  890. result = (self.coo_matrices[0], self.ils[0], self.jls[0])
  891. self._run_test(ss, kwargs, result)
  892. def test_to_coo_text_names_text_row_levels_nosort(self):
  893. ss = self.sparse_series[0]
  894. kwargs = {'row_levels': ['A', 'B'], 'column_levels': ['C', 'D']}
  895. result = (self.coo_matrices[0], self.ils[0], self.jls[0])
  896. self._run_test(ss, kwargs, result)
  897. def test_to_coo_bad_partition_nonnull_intersection(self):
  898. ss = self.sparse_series[0]
  899. pytest.raises(ValueError, ss.to_coo, ['A', 'B', 'C'], ['C', 'D'])
  900. def test_to_coo_bad_partition_small_union(self):
  901. ss = self.sparse_series[0]
  902. pytest.raises(ValueError, ss.to_coo, ['A'], ['C', 'D'])
  903. def test_to_coo_nlevels_less_than_two(self):
  904. ss = self.sparse_series[0]
  905. ss.index = np.arange(len(ss.index))
  906. pytest.raises(ValueError, ss.to_coo)
  907. def test_to_coo_bad_ilevel(self):
  908. ss = self.sparse_series[0]
  909. pytest.raises(KeyError, ss.to_coo, ['A', 'B'], ['C', 'D', 'E'])
  910. def test_to_coo_duplicate_index_entries(self):
  911. ss = pd.concat([self.sparse_series[0],
  912. self.sparse_series[0]]).to_sparse()
  913. pytest.raises(ValueError, ss.to_coo, ['A', 'B'], ['C', 'D'])
  914. def test_from_coo_dense_index(self):
  915. ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=True)
  916. check = self.sparse_series[2]
  917. tm.assert_sp_series_equal(ss, check)
  918. def test_from_coo_nodense_index(self):
  919. ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=False)
  920. check = self.sparse_series[2]
  921. check = check.dropna().to_sparse()
  922. tm.assert_sp_series_equal(ss, check)
  923. def test_from_coo_long_repr(self):
  924. # GH 13114
  925. # test it doesn't raise error. Formatting is tested in test_format
  926. import scipy.sparse
  927. sparse = SparseSeries.from_coo(scipy.sparse.rand(350, 18))
  928. repr(sparse)
  929. def _run_test(self, ss, kwargs, check):
  930. results = ss.to_coo(**kwargs)
  931. self._check_results_to_coo(results, check)
  932. # for every test, also test symmetry property (transpose), switch
  933. # row_levels and column_levels
  934. d = kwargs.copy()
  935. d['row_levels'] = kwargs['column_levels']
  936. d['column_levels'] = kwargs['row_levels']
  937. results = ss.to_coo(**d)
  938. results = (results[0].T, results[2], results[1])
  939. self._check_results_to_coo(results, check)
  940. def _check_results_to_coo(self, results, check):
  941. (A, il, jl) = results
  942. (A_result, il_result, jl_result) = check
  943. # convert to dense and compare
  944. tm.assert_numpy_array_equal(A.todense(), A_result.todense())
  945. # or compare directly as difference of sparse
  946. # assert(abs(A - A_result).max() < 1e-12) # max is failing in python
  947. # 2.6
  948. assert il == il_result
  949. assert jl == jl_result
  950. def test_concat(self):
  951. val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
  952. val2 = np.array([3, np.nan, 4, 0, 0])
  953. for kind in ['integer', 'block']:
  954. sparse1 = pd.SparseSeries(val1, name='x', kind=kind)
  955. sparse2 = pd.SparseSeries(val2, name='y', kind=kind)
  956. res = pd.concat([sparse1, sparse2])
  957. exp = pd.concat([pd.Series(val1), pd.Series(val2)])
  958. exp = pd.SparseSeries(exp, kind=kind)
  959. tm.assert_sp_series_equal(res, exp)
  960. sparse1 = pd.SparseSeries(val1, fill_value=0, name='x', kind=kind)
  961. sparse2 = pd.SparseSeries(val2, fill_value=0, name='y', kind=kind)
  962. res = pd.concat([sparse1, sparse2])
  963. exp = pd.concat([pd.Series(val1), pd.Series(val2)])
  964. exp = pd.SparseSeries(exp, fill_value=0, kind=kind)
  965. tm.assert_sp_series_equal(res, exp,
  966. consolidate_block_indices=True)
  967. def test_concat_axis1(self):
  968. val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
  969. val2 = np.array([3, np.nan, 4, 0, 0])
  970. sparse1 = pd.SparseSeries(val1, name='x')
  971. sparse2 = pd.SparseSeries(val2, name='y')
  972. res = pd.concat([sparse1, sparse2], axis=1)
  973. exp = pd.concat([pd.Series(val1, name='x'),
  974. pd.Series(val2, name='y')], axis=1)
  975. exp = pd.SparseDataFrame(exp)
  976. tm.assert_sp_frame_equal(res, exp)
  977. def test_concat_different_fill(self):
  978. val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
  979. val2 = np.array([3, np.nan, 4, 0, 0])
  980. for kind in ['integer', 'block']:
  981. sparse1 = pd.SparseSeries(val1, name='x', kind=kind)
  982. sparse2 = pd.SparseSeries(val2, name='y', kind=kind, fill_value=0)
  983. with tm.assert_produces_warning(PerformanceWarning):
  984. res = pd.concat([sparse1, sparse2])
  985. exp = pd.concat([pd.Series(val1), pd.Series(val2)])
  986. exp = pd.SparseSeries(exp, kind=kind)
  987. tm.assert_sp_series_equal(res, exp)
  988. with tm.assert_produces_warning(PerformanceWarning):
  989. res = pd.concat([sparse2, sparse1])
  990. exp = pd.concat([pd.Series(val2), pd.Series(val1)])
  991. exp = pd.SparseSeries(exp, kind=kind, fill_value=0)
  992. tm.assert_sp_series_equal(res, exp)
  993. def test_concat_axis1_different_fill(self):
  994. val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
  995. val2 = np.array([3, np.nan, 4, 0, 0])
  996. sparse1 = pd.SparseSeries(val1, name='x')
  997. sparse2 = pd.SparseSeries(val2, name='y', fill_value=0)
  998. res = pd.concat([sparse1, sparse2], axis=1)
  999. exp = pd.concat([pd.Series(val1, name='x'),
  1000. pd.Series(val2, name='y')], axis=1)
  1001. assert isinstance(res, pd.SparseDataFrame)
  1002. tm.assert_frame_equal(res.to_dense(), exp)
  1003. def test_concat_different_kind(self):
  1004. val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
  1005. val2 = np.array([3, np.nan, 4, 0, 0])
  1006. sparse1 = pd.SparseSeries(val1, name='x', kind='integer')
  1007. sparse2 = pd.SparseSeries(val2, name='y', kind='block', fill_value=0)
  1008. with tm.assert_produces_warning(PerformanceWarning):
  1009. res = pd.concat([sparse1, sparse2])
  1010. exp = pd.concat([pd.Series(val1), pd.Series(val2)])
  1011. exp = pd.SparseSeries(exp, kind='integer')
  1012. tm.assert_sp_series_equal(res, exp)
  1013. with tm.assert_produces_warning(PerformanceWarning):
  1014. res = pd.concat([sparse2, sparse1])
  1015. exp = pd.concat([pd.Series(val2), pd.Series(val1)])
  1016. exp = pd.SparseSeries(exp, kind='block', fill_value=0)
  1017. tm.assert_sp_series_equal(res, exp)
  1018. def test_concat_sparse_dense(self):
  1019. # use first input's fill_value
  1020. val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
  1021. val2 = np.array([3, np.nan, 4, 0, 0])
  1022. for kind in ['integer', 'block']:
  1023. sparse = pd.SparseSeries(val1, name='x', kind=kind)
  1024. dense = pd.Series(val2, name='y')
  1025. res = pd.concat([sparse, dense])
  1026. exp = pd.concat([pd.Series(val1), dense])
  1027. exp = pd.SparseSeries(exp, kind=kind)
  1028. tm.assert_sp_series_equal(res, exp)
  1029. res = pd.concat([dense, sparse, dense])
  1030. exp = pd.concat([dense, pd.Series(val1), dense])
  1031. exp = exp.astype("Sparse")
  1032. tm.assert_series_equal(res, exp)
  1033. sparse = pd.SparseSeries(val1, name='x', kind=kind, fill_value=0)
  1034. dense = pd.Series(val2, name='y')
  1035. res = pd.concat([sparse, dense])
  1036. exp = pd.concat([pd.Series(val1), dense])
  1037. exp = exp.astype(SparseDtype(exp.dtype, 0))
  1038. tm.assert_series_equal(res, exp)
  1039. res = pd.concat([dense, sparse, dense])
  1040. exp = pd.concat([dense, pd.Series(val1), dense])
  1041. exp = exp.astype(SparseDtype(exp.dtype, 0))
  1042. tm.assert_series_equal(res, exp)
  1043. def test_value_counts(self):
  1044. vals = [1, 2, nan, 0, nan, 1, 2, nan, nan, 1, 2, 0, 1, 1]
  1045. dense = pd.Series(vals, name='xx')
  1046. sparse = pd.SparseSeries(vals, name='xx')
  1047. tm.assert_series_equal(sparse.value_counts(),
  1048. dense.value_counts())
  1049. tm.assert_series_equal(sparse.value_counts(dropna=False),
  1050. dense.value_counts(dropna=False))
  1051. sparse = pd.SparseSeries(vals, name='xx', fill_value=0)
  1052. tm.assert_series_equal(sparse.value_counts(),
  1053. dense.value_counts())
  1054. tm.assert_series_equal(sparse.value_counts(dropna=False),
  1055. dense.value_counts(dropna=False))
  1056. def test_value_counts_dup(self):
  1057. vals = [1, 2, nan, 0, nan, 1, 2, nan, nan, 1, 2, 0, 1, 1]
  1058. # numeric op may cause sp_values to include the same value as
  1059. # fill_value
  1060. dense = pd.Series(vals, name='xx') / 0.
  1061. sparse = pd.SparseSeries(vals, name='xx') / 0.
  1062. tm.assert_series_equal(sparse.value_counts(),
  1063. dense.value_counts())
  1064. tm.assert_series_equal(sparse.value_counts(dropna=False),
  1065. dense.value_counts(dropna=False))
  1066. vals = [1, 2, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 1, 1]
  1067. dense = pd.Series(vals, name='xx') * 0.
  1068. sparse = pd.SparseSeries(vals, name='xx') * 0.
  1069. tm.assert_series_equal(sparse.value_counts(),
  1070. dense.value_counts())
  1071. tm.assert_series_equal(sparse.value_counts(dropna=False),
  1072. dense.value_counts(dropna=False))
  1073. def test_value_counts_int(self):
  1074. vals = [1, 2, 0, 1, 2, 1, 2, 0, 1, 1]
  1075. dense = pd.Series(vals, name='xx')
  1076. # fill_value is np.nan, but should not be included in the result
  1077. sparse = pd.SparseSeries(vals, name='xx')
  1078. tm.assert_series_equal(sparse.value_counts(),
  1079. dense.value_counts())
  1080. tm.assert_series_equal(sparse.value_counts(dropna=False),
  1081. dense.value_counts(dropna=False))
  1082. sparse = pd.SparseSeries(vals, name='xx', fill_value=0)
  1083. tm.assert_series_equal(sparse.value_counts(),
  1084. dense.value_counts())
  1085. tm.assert_series_equal(sparse.value_counts(dropna=False),
  1086. dense.value_counts(dropna=False))
  1087. def test_isna(self):
  1088. # GH 8276
  1089. s = pd.SparseSeries([np.nan, np.nan, 1, 2, np.nan], name='xxx')
  1090. res = s.isna()
  1091. exp = pd.SparseSeries([True, True, False, False, True], name='xxx',
  1092. fill_value=True)
  1093. tm.assert_sp_series_equal(res, exp)
  1094. # if fill_value is not nan, True can be included in sp_values
  1095. s = pd.SparseSeries([np.nan, 0., 1., 2., 0.], name='xxx',
  1096. fill_value=0.)
  1097. res = s.isna()
  1098. assert isinstance(res, pd.SparseSeries)
  1099. exp = pd.Series([True, False, False, False, False], name='xxx')
  1100. tm.assert_series_equal(res.to_dense(), exp)
  1101. def test_notna(self):
  1102. # GH 8276
  1103. s = pd.SparseSeries([np.nan, np.nan, 1, 2, np.nan], name='xxx')
  1104. res = s.notna()
  1105. exp = pd.SparseSeries([False, False, True, True, False], name='xxx',
  1106. fill_value=False)
  1107. tm.assert_sp_series_equal(res, exp)
  1108. # if fill_value is not nan, True can be included in sp_values
  1109. s = pd.SparseSeries([np.nan, 0., 1., 2., 0.], name='xxx',
  1110. fill_value=0.)
  1111. res = s.notna()
  1112. assert isinstance(res, pd.SparseSeries)
  1113. exp = pd.Series([False, True, True, True, True], name='xxx')
  1114. tm.assert_series_equal(res.to_dense(), exp)
  1115. def _dense_series_compare(s, f):
  1116. result = f(s)
  1117. assert (isinstance(result, SparseSeries))
  1118. dense_result = f(s.to_dense())
  1119. tm.assert_series_equal(result.to_dense(), dense_result)
  1120. class TestSparseSeriesAnalytics(object):
  1121. def setup_method(self, method):
  1122. arr, index = _test_data1()
  1123. self.bseries = SparseSeries(arr, index=index, kind='block',
  1124. name='bseries')
  1125. arr, index = _test_data1_zero()
  1126. self.zbseries = SparseSeries(arr, index=index, kind='block',
  1127. fill_value=0, name='zbseries')
  1128. def test_cumsum(self):
  1129. result = self.bseries.cumsum()
  1130. expected = SparseSeries(self.bseries.to_dense().cumsum())
  1131. tm.assert_sp_series_equal(result, expected)
  1132. result = self.zbseries.cumsum()
  1133. expected = self.zbseries.to_dense().cumsum().to_sparse()
  1134. tm.assert_series_equal(result, expected)
  1135. axis = 1 # Series is 1-D, so only axis = 0 is valid.
  1136. msg = "No axis named {axis}".format(axis=axis)
  1137. with pytest.raises(ValueError, match=msg):
  1138. self.bseries.cumsum(axis=axis)
  1139. def test_numpy_cumsum(self):
  1140. result = np.cumsum(self.bseries)
  1141. expected = SparseSeries(self.bseries.to_dense().cumsum())
  1142. tm.assert_sp_series_equal(result, expected)
  1143. result = np.cumsum(self.zbseries)
  1144. expected = self.zbseries.to_dense().cumsum().to_sparse()
  1145. tm.assert_series_equal(result, expected)
  1146. msg = "the 'dtype' parameter is not supported"
  1147. with pytest.raises(ValueError, match=msg):
  1148. np.cumsum(self.bseries, dtype=np.int64)
  1149. msg = "the 'out' parameter is not supported"
  1150. with pytest.raises(ValueError, match=msg):
  1151. np.cumsum(self.zbseries, out=result)
  1152. def test_numpy_func_call(self):
  1153. # no exception should be raised even though
  1154. # numpy passes in 'axis=None' or `axis=-1'
  1155. funcs = ['sum', 'cumsum', 'var', 'mean',
  1156. 'prod', 'cumprod', 'std', 'argsort',
  1157. 'min', 'max']
  1158. for func in funcs:
  1159. for series in ('bseries', 'zbseries'):
  1160. getattr(np, func)(getattr(self, series))
  1161. def test_deprecated_numpy_func_call(self):
  1162. # NOTE: These should be add to the 'test_numpy_func_call' test above
  1163. # once the behavior of argmin/argmax is corrected.
  1164. funcs = ['argmin', 'argmax']
  1165. for func in funcs:
  1166. for series in ('bseries', 'zbseries'):
  1167. with tm.assert_produces_warning(FutureWarning,
  1168. check_stacklevel=False):
  1169. getattr(np, func)(getattr(self, series))
  1170. with tm.assert_produces_warning(FutureWarning,
  1171. check_stacklevel=False):
  1172. getattr(getattr(self, series), func)()
  1173. def test_deprecated_reindex_axis(self):
  1174. # https://github.com/pandas-dev/pandas/issues/17833
  1175. with tm.assert_produces_warning(FutureWarning) as m:
  1176. self.bseries.reindex_axis([0, 1, 2])
  1177. assert 'reindex' in str(m[0].message)
  1178. @pytest.mark.parametrize(
  1179. 'datetime_type', (np.datetime64,
  1180. pd.Timestamp,
  1181. lambda x: datetime.strptime(x, '%Y-%m-%d')))
  1182. def test_constructor_dict_datetime64_index(datetime_type):
  1183. # GH 9456
  1184. dates = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15']
  1185. values = [42544017.198965244, 1234565, 40512335.181958228, -1]
  1186. result = SparseSeries(dict(zip(map(datetime_type, dates), values)))
  1187. expected = SparseSeries(values, map(pd.Timestamp, dates))
  1188. tm.assert_sp_series_equal(result, expected)
  1189. def test_to_sparse():
  1190. # https://github.com/pandas-dev/pandas/issues/22389
  1191. arr = pd.SparseArray([1, 2, None, 3])
  1192. result = pd.Series(arr).to_sparse()
  1193. assert len(result) == 4
  1194. tm.assert_sp_array_equal(result.values, arr, check_kind=False)
  1195. def test_constructor_mismatched_raises():
  1196. msg = "Length of passed values is 2, index implies 3"
  1197. with pytest.raises(ValueError, match=msg):
  1198. SparseSeries([1, 2], index=[1, 2, 3])
  1199. def test_block_deprecated():
  1200. s = SparseSeries([1])
  1201. with tm.assert_produces_warning(FutureWarning):
  1202. s.block