1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369 |
- # pylint: disable-msg=E1101,W0612
- import operator
- import numpy as np
- from numpy import nan
- import pytest
- from pandas._libs.sparse import BlockIndex, IntIndex
- from pandas.compat import lrange
- from pandas.errors import PerformanceWarning
- import pandas as pd
- from pandas import DataFrame, Panel, Series, bdate_range, compat
- from pandas.core.indexes.datetimes import DatetimeIndex
- from pandas.core.sparse import frame as spf
- from pandas.core.sparse.api import (
- SparseArray, SparseDataFrame, SparseDtype, SparseSeries)
- from pandas.tests.frame.test_api import SharedWithSparse
- from pandas.util import testing as tm
- from pandas.tseries.offsets import BDay
- class TestSparseDataFrame(SharedWithSparse):
- klass = SparseDataFrame
- # SharedWithSparse tests use generic, klass-agnostic assertion
- _assert_frame_equal = staticmethod(tm.assert_sp_frame_equal)
- _assert_series_equal = staticmethod(tm.assert_sp_series_equal)
- def test_iterrows(self, float_frame, float_string_frame):
- # Same as parent, but we don't ensure the sparse kind is the same.
- for k, v in float_frame.iterrows():
- exp = float_frame.loc[k]
- tm.assert_sp_series_equal(v, exp, check_kind=False)
- for k, v in float_string_frame.iterrows():
- exp = float_string_frame.loc[k]
- tm.assert_sp_series_equal(v, exp, check_kind=False)
- def test_itertuples(self, float_frame):
- for i, tup in enumerate(float_frame.itertuples()):
- s = self.klass._constructor_sliced(tup[1:])
- s.name = tup[0]
- expected = float_frame.iloc[i, :].reset_index(drop=True)
- tm.assert_sp_series_equal(s, expected, check_kind=False)
- def test_fill_value_when_combine_const(self):
- # GH12723
- dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float')
- df = SparseDataFrame({'foo': dat}, index=range(6))
- exp = df.fillna(0).add(2)
- res = df.add(2, fill_value=0)
- tm.assert_sp_frame_equal(res, exp)
- def test_values(self, empty_frame, float_frame):
- empty = empty_frame.values
- assert empty.shape == (0, 0)
- no_cols = SparseDataFrame(index=np.arange(10))
- mat = no_cols.values
- assert mat.shape == (10, 0)
- no_index = SparseDataFrame(columns=np.arange(10))
- mat = no_index.values
- assert mat.shape == (0, 10)
- def test_copy(self, float_frame):
- cp = float_frame.copy()
- assert isinstance(cp, SparseDataFrame)
- tm.assert_sp_frame_equal(cp, float_frame)
- # as of v0.15.0
- # this is now identical (but not is_a )
- assert cp.index.identical(float_frame.index)
- def test_constructor(self, float_frame, float_frame_int_kind,
- float_frame_fill0):
- for col, series in compat.iteritems(float_frame):
- assert isinstance(series, SparseSeries)
- assert isinstance(float_frame_int_kind['A'].sp_index, IntIndex)
- # constructed zframe from matrix above
- assert float_frame_fill0['A'].fill_value == 0
- # XXX: changed asarray
- expected = pd.SparseArray([0, 0, 0, 0, 1., 2., 3., 4., 5., 6.],
- fill_value=0, kind='block')
- tm.assert_sp_array_equal(expected,
- float_frame_fill0['A'].values)
- tm.assert_numpy_array_equal(np.array([0., 0., 0., 0., 1., 2.,
- 3., 4., 5., 6.]),
- float_frame_fill0['A'].to_dense().values)
- # construct no data
- sdf = SparseDataFrame(columns=np.arange(10), index=np.arange(10))
- for col, series in compat.iteritems(sdf):
- assert isinstance(series, SparseSeries)
- # construct from nested dict
- data = {c: s.to_dict() for c, s in compat.iteritems(float_frame)}
- sdf = SparseDataFrame(data)
- tm.assert_sp_frame_equal(sdf, float_frame)
- # TODO: test data is copied from inputs
- # init dict with different index
- idx = float_frame.index[:5]
- cons = SparseDataFrame(
- float_frame, index=idx, columns=float_frame.columns,
- default_fill_value=float_frame.default_fill_value,
- default_kind=float_frame.default_kind, copy=True)
- reindexed = float_frame.reindex(idx)
- tm.assert_sp_frame_equal(cons, reindexed, exact_indices=False)
- # assert level parameter breaks reindex
- with pytest.raises(TypeError):
- float_frame.reindex(idx, level=0)
- repr(float_frame)
- def test_constructor_dict_order(self):
- # GH19018
- # initialization ordering: by insertion order if python>= 3.6, else
- # order by value
- d = {'b': [2, 3], 'a': [0, 1]}
- frame = SparseDataFrame(data=d)
- if compat.PY36:
- expected = SparseDataFrame(data=d, columns=list('ba'))
- else:
- expected = SparseDataFrame(data=d, columns=list('ab'))
- tm.assert_sp_frame_equal(frame, expected)
- def test_constructor_ndarray(self, float_frame):
- # no index or columns
- sp = SparseDataFrame(float_frame.values)
- # 1d
- sp = SparseDataFrame(float_frame['A'].values, index=float_frame.index,
- columns=['A'])
- tm.assert_sp_frame_equal(sp, float_frame.reindex(columns=['A']))
- # raise on level argument
- pytest.raises(TypeError, float_frame.reindex, columns=['A'],
- level=1)
- # wrong length index / columns
- with pytest.raises(ValueError, match="^Index length"):
- SparseDataFrame(float_frame.values, index=float_frame.index[:-1])
- with pytest.raises(ValueError, match="^Column length"):
- SparseDataFrame(float_frame.values,
- columns=float_frame.columns[:-1])
- # GH 9272
- def test_constructor_empty(self):
- sp = SparseDataFrame()
- assert len(sp.index) == 0
- assert len(sp.columns) == 0
- def test_constructor_dataframe(self, float_frame):
- dense = float_frame.to_dense()
- sp = SparseDataFrame(dense)
- tm.assert_sp_frame_equal(sp, float_frame)
- def test_constructor_convert_index_once(self):
- arr = np.array([1.5, 2.5, 3.5])
- sdf = SparseDataFrame(columns=lrange(4), index=arr)
- assert sdf[0].index is sdf[1].index
- def test_constructor_from_series(self):
- # GH 2873
- x = Series(np.random.randn(10000), name='a')
- x = x.to_sparse(fill_value=0)
- assert isinstance(x, SparseSeries)
- df = SparseDataFrame(x)
- assert isinstance(df, SparseDataFrame)
- x = Series(np.random.randn(10000), name='a')
- y = Series(np.random.randn(10000), name='b')
- x2 = x.astype(float)
- x2.loc[:9998] = np.NaN
- # TODO: x_sparse is unused...fix
- x_sparse = x2.to_sparse(fill_value=np.NaN) # noqa
- # Currently fails too with weird ufunc error
- # df1 = SparseDataFrame([x_sparse, y])
- y.loc[:9998] = 0
- # TODO: y_sparse is unsused...fix
- y_sparse = y.to_sparse(fill_value=0) # noqa
- # without sparse value raises error
- # df2 = SparseDataFrame([x2_sparse, y])
- def test_constructor_from_dense_series(self):
- # GH 19393
- # series with name
- x = Series(np.random.randn(10000), name='a')
- result = SparseDataFrame(x)
- expected = x.to_frame().to_sparse()
- tm.assert_sp_frame_equal(result, expected)
- # series with no name
- x = Series(np.random.randn(10000))
- result = SparseDataFrame(x)
- expected = x.to_frame().to_sparse()
- tm.assert_sp_frame_equal(result, expected)
- def test_constructor_from_unknown_type(self):
- # GH 19393
- class Unknown(object):
- pass
- with pytest.raises(TypeError,
- match=('SparseDataFrame called with unknown type '
- '"Unknown" for data argument')):
- SparseDataFrame(Unknown())
- def test_constructor_preserve_attr(self):
- # GH 13866
- arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0)
- assert arr.dtype == SparseDtype(np.int64)
- assert arr.fill_value == 0
- df = pd.SparseDataFrame({'x': arr})
- assert df['x'].dtype == SparseDtype(np.int64)
- assert df['x'].fill_value == 0
- s = pd.SparseSeries(arr, name='x')
- assert s.dtype == SparseDtype(np.int64)
- assert s.fill_value == 0
- df = pd.SparseDataFrame(s)
- assert df['x'].dtype == SparseDtype(np.int64)
- assert df['x'].fill_value == 0
- df = pd.SparseDataFrame({'x': s})
- assert df['x'].dtype == SparseDtype(np.int64)
- assert df['x'].fill_value == 0
- def test_constructor_nan_dataframe(self):
- # GH 10079
- trains = np.arange(100)
- thresholds = [10, 20, 30, 40, 50, 60]
- tuples = [(i, j) for i in trains for j in thresholds]
- index = pd.MultiIndex.from_tuples(tuples,
- names=['trains', 'thresholds'])
- matrix = np.empty((len(index), len(trains)))
- matrix.fill(np.nan)
- df = pd.DataFrame(matrix, index=index, columns=trains, dtype=float)
- result = df.to_sparse()
- expected = pd.SparseDataFrame(matrix, index=index, columns=trains,
- dtype=float)
- tm.assert_sp_frame_equal(result, expected)
- def test_type_coercion_at_construction(self):
- # GH 15682
- result = pd.SparseDataFrame(
- {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8',
- default_fill_value=0)
- expected = pd.SparseDataFrame(
- {'a': pd.SparseSeries([1, 0, 0], dtype='uint8'),
- 'b': pd.SparseSeries([0, 1, 0], dtype='uint8'),
- 'c': pd.SparseSeries([0, 0, 1], dtype='uint8')},
- default_fill_value=0)
- tm.assert_sp_frame_equal(result, expected)
- def test_dtypes(self):
- df = DataFrame(np.random.randn(10000, 4))
- df.loc[:9998] = np.nan
- sdf = df.to_sparse()
- result = sdf.get_dtype_counts()
- expected = Series({'Sparse[float64, nan]': 4})
- tm.assert_series_equal(result, expected)
- def test_shape(self, float_frame, float_frame_int_kind,
- float_frame_fill0, float_frame_fill2):
- # see gh-10452
- assert float_frame.shape == (10, 4)
- assert float_frame_int_kind.shape == (10, 4)
- assert float_frame_fill0.shape == (10, 4)
- assert float_frame_fill2.shape == (10, 4)
- def test_str(self):
- df = DataFrame(np.random.randn(10000, 4))
- df.loc[:9998] = np.nan
- sdf = df.to_sparse()
- str(sdf)
- def test_array_interface(self, float_frame):
- res = np.sqrt(float_frame)
- dres = np.sqrt(float_frame.to_dense())
- tm.assert_frame_equal(res.to_dense(), dres)
- def test_pickle(self, float_frame, float_frame_int_kind, float_frame_dense,
- float_frame_fill0, float_frame_fill0_dense,
- float_frame_fill2, float_frame_fill2_dense):
- def _test_roundtrip(frame, orig):
- result = tm.round_trip_pickle(frame)
- tm.assert_sp_frame_equal(frame, result)
- tm.assert_frame_equal(result.to_dense(), orig, check_dtype=False)
- _test_roundtrip(SparseDataFrame(), DataFrame())
- _test_roundtrip(float_frame, float_frame_dense)
- _test_roundtrip(float_frame_int_kind, float_frame_dense)
- _test_roundtrip(float_frame_fill0, float_frame_fill0_dense)
- _test_roundtrip(float_frame_fill2, float_frame_fill2_dense)
- def test_dense_to_sparse(self):
- df = DataFrame({'A': [nan, nan, nan, 1, 2],
- 'B': [1, 2, nan, nan, nan]})
- sdf = df.to_sparse()
- assert isinstance(sdf, SparseDataFrame)
- assert np.isnan(sdf.default_fill_value)
- assert isinstance(sdf['A'].sp_index, BlockIndex)
- tm.assert_frame_equal(sdf.to_dense(), df)
- sdf = df.to_sparse(kind='integer')
- assert isinstance(sdf['A'].sp_index, IntIndex)
- df = DataFrame({'A': [0, 0, 0, 1, 2],
- 'B': [1, 2, 0, 0, 0]}, dtype=float)
- sdf = df.to_sparse(fill_value=0)
- assert sdf.default_fill_value == 0
- tm.assert_frame_equal(sdf.to_dense(), df)
- def test_density(self):
- df = SparseSeries([nan, nan, nan, 0, 1, 2, 3, 4, 5, 6])
- assert df.density == 0.7
- df = SparseDataFrame({'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
- 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
- 'C': np.arange(10),
- 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]})
- assert df.density == 0.75
- def test_sparse_to_dense(self):
- pass
- def test_sparse_series_ops(self, float_frame):
- self._check_frame_ops(float_frame)
- def test_sparse_series_ops_i(self, float_frame_int_kind):
- self._check_frame_ops(float_frame_int_kind)
- def test_sparse_series_ops_z(self, float_frame_fill0):
- self._check_frame_ops(float_frame_fill0)
- def test_sparse_series_ops_fill(self, float_frame_fill2):
- self._check_frame_ops(float_frame_fill2)
- def _check_frame_ops(self, frame):
- def _compare_to_dense(a, b, da, db, op):
- sparse_result = op(a, b)
- dense_result = op(da, db)
- fill = sparse_result.default_fill_value
- dense_result = dense_result.to_sparse(fill_value=fill)
- tm.assert_sp_frame_equal(sparse_result, dense_result,
- exact_indices=False)
- if isinstance(a, DataFrame) and isinstance(db, DataFrame):
- mixed_result = op(a, db)
- assert isinstance(mixed_result, SparseDataFrame)
- tm.assert_sp_frame_equal(mixed_result, sparse_result,
- exact_indices=False)
- opnames = ['add', 'sub', 'mul', 'truediv', 'floordiv']
- ops = [getattr(operator, name) for name in opnames]
- fidx = frame.index
- # time series operations
- series = [frame['A'], frame['B'], frame['C'], frame['D'],
- frame['A'].reindex(fidx[:7]), frame['A'].reindex(fidx[::2]),
- SparseSeries(
- [], index=[])]
- for op in opnames:
- _compare_to_dense(frame, frame[::2], frame.to_dense(),
- frame[::2].to_dense(), getattr(operator, op))
- # 2304, no auto-broadcasting
- for i, s in enumerate(series):
- f = lambda a, b: getattr(a, op)(b, axis='index')
- _compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), f)
- # rops are not implemented
- # _compare_to_dense(s, frame, s.to_dense(),
- # frame.to_dense(), f)
- # cross-sectional operations
- series = [frame.xs(fidx[0]), frame.xs(fidx[3]), frame.xs(fidx[5]),
- frame.xs(fidx[7]), frame.xs(fidx[5])[:2]]
- for op in ops:
- for s in series:
- _compare_to_dense(frame, s, frame.to_dense(), s, op)
- _compare_to_dense(s, frame, s, frame.to_dense(), op)
- # it works!
- result = frame + frame.loc[:, ['A', 'B']] # noqa
- def test_op_corners(self, float_frame, empty_frame):
- empty = empty_frame + empty_frame
- assert empty.empty
- foo = float_frame + empty_frame
- assert isinstance(foo.index, DatetimeIndex)
- tm.assert_frame_equal(foo, float_frame * np.nan)
- foo = empty_frame + float_frame
- tm.assert_frame_equal(foo, float_frame * np.nan)
- def test_scalar_ops(self):
- pass
- def test_getitem(self):
- # 1585 select multiple columns
- sdf = SparseDataFrame(index=[0, 1, 2], columns=['a', 'b', 'c'])
- result = sdf[['a', 'b']]
- exp = sdf.reindex(columns=['a', 'b'])
- tm.assert_sp_frame_equal(result, exp)
- pytest.raises(Exception, sdf.__getitem__, ['a', 'd'])
- def test_iloc(self, float_frame):
- # GH 2227
- result = float_frame.iloc[:, 0]
- assert isinstance(result, SparseSeries)
- tm.assert_sp_series_equal(result, float_frame['A'])
- # preserve sparse index type. #2251
- data = {'A': [0, 1]}
- iframe = SparseDataFrame(data, default_kind='integer')
- tm.assert_class_equal(iframe['A'].sp_index,
- iframe.iloc[:, 0].sp_index)
- def test_set_value(self, float_frame):
- # ok, as the index gets converted to object
- frame = float_frame.copy()
- with tm.assert_produces_warning(FutureWarning,
- check_stacklevel=False):
- res = frame.set_value('foobar', 'B', 1.5)
- assert res.index.dtype == 'object'
- res = float_frame
- res.index = res.index.astype(object)
- with tm.assert_produces_warning(FutureWarning,
- check_stacklevel=False):
- res = float_frame.set_value('foobar', 'B', 1.5)
- assert res is not float_frame
- assert res.index[-1] == 'foobar'
- with tm.assert_produces_warning(FutureWarning,
- check_stacklevel=False):
- assert res.get_value('foobar', 'B') == 1.5
- with tm.assert_produces_warning(FutureWarning,
- check_stacklevel=False):
- res2 = res.set_value('foobar', 'qux', 1.5)
- assert res2 is not res
- tm.assert_index_equal(res2.columns,
- pd.Index(list(float_frame.columns) + ['qux']))
- with tm.assert_produces_warning(FutureWarning,
- check_stacklevel=False):
- assert res2.get_value('foobar', 'qux') == 1.5
- def test_fancy_index_misc(self, float_frame):
- # axis = 0
- sliced = float_frame.iloc[-2:, :]
- expected = float_frame.reindex(index=float_frame.index[-2:])
- tm.assert_sp_frame_equal(sliced, expected)
- # axis = 1
- sliced = float_frame.iloc[:, -2:]
- expected = float_frame.reindex(columns=float_frame.columns[-2:])
- tm.assert_sp_frame_equal(sliced, expected)
- def test_getitem_overload(self, float_frame):
- # slicing
- sl = float_frame[:20]
- tm.assert_sp_frame_equal(sl,
- float_frame.reindex(float_frame.index[:20]))
- # boolean indexing
- d = float_frame.index[5]
- indexer = float_frame.index > d
- subindex = float_frame.index[indexer]
- subframe = float_frame[indexer]
- tm.assert_index_equal(subindex, subframe.index)
- pytest.raises(Exception, float_frame.__getitem__, indexer[:-1])
- def test_setitem(self, float_frame, float_frame_int_kind,
- float_frame_dense,
- float_frame_fill0, float_frame_fill0_dense,
- float_frame_fill2, float_frame_fill2_dense):
- def _check_frame(frame, orig):
- N = len(frame)
- # insert SparseSeries
- frame['E'] = frame['A']
- assert isinstance(frame['E'], SparseSeries)
- tm.assert_sp_series_equal(frame['E'], frame['A'],
- check_names=False)
- # insert SparseSeries differently-indexed
- to_insert = frame['A'][::2]
- frame['E'] = to_insert
- expected = to_insert.to_dense().reindex(frame.index)
- result = frame['E'].to_dense()
- tm.assert_series_equal(result, expected, check_names=False)
- assert result.name == 'E'
- # insert Series
- frame['F'] = frame['A'].to_dense()
- assert isinstance(frame['F'], SparseSeries)
- tm.assert_sp_series_equal(frame['F'], frame['A'],
- check_names=False)
- # insert Series differently-indexed
- to_insert = frame['A'].to_dense()[::2]
- frame['G'] = to_insert
- expected = to_insert.reindex(frame.index)
- expected.name = 'G'
- tm.assert_series_equal(frame['G'].to_dense(), expected)
- # insert ndarray
- frame['H'] = np.random.randn(N)
- assert isinstance(frame['H'], SparseSeries)
- to_sparsify = np.random.randn(N)
- to_sparsify[N // 2:] = frame.default_fill_value
- frame['I'] = to_sparsify
- assert len(frame['I'].sp_values) == N // 2
- # insert ndarray wrong size
- pytest.raises(Exception, frame.__setitem__, 'foo',
- np.random.randn(N - 1))
- # scalar value
- frame['J'] = 5
- assert len(frame['J'].sp_values) == N
- assert (frame['J'].sp_values == 5).all()
- frame['K'] = frame.default_fill_value
- assert len(frame['K'].sp_values) == 0
- _check_frame(float_frame, float_frame_dense)
- _check_frame(float_frame_int_kind, float_frame_dense)
- _check_frame(float_frame_fill0, float_frame_fill0_dense)
- _check_frame(float_frame_fill2, float_frame_fill2_dense)
- @pytest.mark.parametrize('values', [
- [True, False],
- [0, 1],
- [1, None],
- ['a', 'b'],
- [pd.Timestamp('2017'), pd.NaT],
- [pd.Timedelta('10s'), pd.NaT],
- ])
- def test_setitem_more(self, values):
- df = pd.DataFrame({"A": values})
- df['A'] = pd.SparseArray(values)
- expected = pd.DataFrame({'A': pd.SparseArray(values)})
- tm.assert_frame_equal(df, expected)
- def test_setitem_corner(self, float_frame):
- float_frame['a'] = float_frame['B']
- tm.assert_sp_series_equal(float_frame['a'], float_frame['B'],
- check_names=False)
- def test_setitem_array(self, float_frame):
- arr = float_frame['B']
- float_frame['E'] = arr
- tm.assert_sp_series_equal(float_frame['E'], float_frame['B'],
- check_names=False)
- float_frame['F'] = arr[:-1]
- index = float_frame.index[:-1]
- tm.assert_sp_series_equal(float_frame['E'].reindex(index),
- float_frame['F'].reindex(index),
- check_names=False)
- def test_setitem_chained_no_consolidate(self):
- # https://github.com/pandas-dev/pandas/pull/19268
- # issuecomment-361696418
- # chained setitem used to cause consolidation
- sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
- with pd.option_context('mode.chained_assignment', None):
- sdf[0][1] = 2
- assert len(sdf._data.blocks) == 2
- def test_delitem(self, float_frame):
- A = float_frame['A']
- C = float_frame['C']
- del float_frame['B']
- assert 'B' not in float_frame
- tm.assert_sp_series_equal(float_frame['A'], A)
- tm.assert_sp_series_equal(float_frame['C'], C)
- del float_frame['D']
- assert 'D' not in float_frame
- del float_frame['A']
- assert 'A' not in float_frame
- def test_set_columns(self, float_frame):
- float_frame.columns = float_frame.columns
- pytest.raises(Exception, setattr, float_frame, 'columns',
- float_frame.columns[:-1])
- def test_set_index(self, float_frame):
- float_frame.index = float_frame.index
- pytest.raises(Exception, setattr, float_frame, 'index',
- float_frame.index[:-1])
- def test_ctor_reindex(self):
- idx = pd.Index([0, 1, 2, 3])
- with pytest.raises(ValueError, match=''):
- pd.SparseDataFrame({"A": [1, 2]}, index=idx)
- def test_append(self, float_frame):
- a = float_frame[:5]
- b = float_frame[5:]
- appended = a.append(b)
- tm.assert_sp_frame_equal(appended, float_frame, exact_indices=False)
- a = float_frame.iloc[:5, :3]
- b = float_frame.iloc[5:]
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- # Stacklevel is set for pd.concat, not append
- appended = a.append(b)
- tm.assert_sp_frame_equal(appended.iloc[:, :3], float_frame.iloc[:, :3],
- exact_indices=False)
- a = a[['B', 'C', 'A']].head(2)
- b = b.head(2)
- expected = pd.SparseDataFrame({
- "B": [0., 1, None, 3],
- "C": [0., 1, 5, 6],
- "A": [None, None, 2, 3],
- "D": [None, None, 5, None],
- }, index=a.index | b.index, columns=['B', 'C', 'A', 'D'])
- with tm.assert_produces_warning(None):
- appended = a.append(b, sort=False)
- tm.assert_frame_equal(appended, expected)
- with tm.assert_produces_warning(None):
- appended = a.append(b, sort=True)
- tm.assert_sp_frame_equal(appended, expected[['A', 'B', 'C', 'D']],
- consolidate_block_indices=True,
- check_kind=False)
- def test_astype(self):
- sparse = pd.SparseDataFrame({'A': SparseArray([1, 2, 3, 4],
- dtype=np.int64),
- 'B': SparseArray([4, 5, 6, 7],
- dtype=np.int64)})
- assert sparse['A'].dtype == SparseDtype(np.int64)
- assert sparse['B'].dtype == SparseDtype(np.int64)
- # retain fill_value
- res = sparse.astype(np.float64)
- exp = pd.SparseDataFrame({'A': SparseArray([1., 2., 3., 4.],
- fill_value=0,
- kind='integer'),
- 'B': SparseArray([4., 5., 6., 7.],
- fill_value=0,
- kind='integer')},
- default_fill_value=np.nan)
- tm.assert_sp_frame_equal(res, exp)
- assert res['A'].dtype == SparseDtype(np.float64, 0)
- assert res['B'].dtype == SparseDtype(np.float64, 0)
- # update fill_value
- res = sparse.astype(SparseDtype(np.float64, np.nan))
- exp = pd.SparseDataFrame({'A': SparseArray([1., 2., 3., 4.],
- fill_value=np.nan,
- kind='integer'),
- 'B': SparseArray([4., 5., 6., 7.],
- fill_value=np.nan,
- kind='integer')},
- default_fill_value=np.nan)
- tm.assert_sp_frame_equal(res, exp)
- assert res['A'].dtype == SparseDtype(np.float64, np.nan)
- assert res['B'].dtype == SparseDtype(np.float64, np.nan)
- def test_astype_bool(self):
- sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4],
- fill_value=0,
- dtype=np.int64),
- 'B': SparseArray([0, 5, 0, 7],
- fill_value=0,
- dtype=np.int64)},
- default_fill_value=0)
- assert sparse['A'].dtype == SparseDtype(np.int64)
- assert sparse['B'].dtype == SparseDtype(np.int64)
- res = sparse.astype(SparseDtype(bool, False))
- exp = pd.SparseDataFrame({'A': SparseArray([False, True, False, True],
- dtype=np.bool,
- fill_value=False,
- kind='integer'),
- 'B': SparseArray([False, True, False, True],
- dtype=np.bool,
- fill_value=False,
- kind='integer')},
- default_fill_value=False)
- tm.assert_sp_frame_equal(res, exp)
- assert res['A'].dtype == SparseDtype(np.bool)
- assert res['B'].dtype == SparseDtype(np.bool)
- def test_astype_object(self):
- # This may change in GH-23125
- df = pd.DataFrame({"A": SparseArray([0, 1]),
- "B": SparseArray([0, 1])})
- result = df.astype(object)
- dtype = SparseDtype(object, 0)
- expected = pd.DataFrame({"A": SparseArray([0, 1], dtype=dtype),
- "B": SparseArray([0, 1], dtype=dtype)})
- tm.assert_frame_equal(result, expected)
- def test_fillna(self, float_frame_fill0, float_frame_fill0_dense):
- df = float_frame_fill0.reindex(lrange(5))
- dense = float_frame_fill0_dense.reindex(lrange(5))
- result = df.fillna(0)
- expected = dense.fillna(0)
- tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0),
- exact_indices=False)
- tm.assert_frame_equal(result.to_dense(), expected)
- result = df.copy()
- result.fillna(0, inplace=True)
- expected = dense.fillna(0)
- tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0),
- exact_indices=False)
- tm.assert_frame_equal(result.to_dense(), expected)
- result = df.copy()
- result = df['A']
- result.fillna(0, inplace=True)
- expected = dense['A'].fillna(0)
- # this changes internal SparseArray repr
- # tm.assert_sp_series_equal(result, expected.to_sparse(fill_value=0))
- tm.assert_series_equal(result.to_dense(), expected)
- def test_fillna_fill_value(self):
- df = pd.DataFrame({'A': [1, 0, 0], 'B': [np.nan, np.nan, 4]})
- sparse = pd.SparseDataFrame(df)
- tm.assert_frame_equal(sparse.fillna(-1).to_dense(),
- df.fillna(-1), check_dtype=False)
- sparse = pd.SparseDataFrame(df, default_fill_value=0)
- tm.assert_frame_equal(sparse.fillna(-1).to_dense(),
- df.fillna(-1), check_dtype=False)
- def test_sparse_frame_pad_backfill_limit(self):
- index = np.arange(10)
- df = DataFrame(np.random.randn(10, 4), index=index)
- sdf = df.to_sparse()
- result = sdf[:2].reindex(index, method='pad', limit=5)
- with tm.assert_produces_warning(PerformanceWarning):
- expected = sdf[:2].reindex(index).fillna(method='pad')
- expected = expected.to_dense()
- expected.values[-3:] = np.nan
- expected = expected.to_sparse()
- tm.assert_frame_equal(result, expected)
- result = sdf[-2:].reindex(index, method='backfill', limit=5)
- with tm.assert_produces_warning(PerformanceWarning):
- expected = sdf[-2:].reindex(index).fillna(method='backfill')
- expected = expected.to_dense()
- expected.values[:3] = np.nan
- expected = expected.to_sparse()
- tm.assert_frame_equal(result, expected)
- def test_sparse_frame_fillna_limit(self):
- index = np.arange(10)
- df = DataFrame(np.random.randn(10, 4), index=index)
- sdf = df.to_sparse()
- result = sdf[:2].reindex(index)
- with tm.assert_produces_warning(PerformanceWarning):
- result = result.fillna(method='pad', limit=5)
- with tm.assert_produces_warning(PerformanceWarning):
- expected = sdf[:2].reindex(index).fillna(method='pad')
- expected = expected.to_dense()
- expected.values[-3:] = np.nan
- expected = expected.to_sparse()
- tm.assert_frame_equal(result, expected)
- result = sdf[-2:].reindex(index)
- with tm.assert_produces_warning(PerformanceWarning):
- result = result.fillna(method='backfill', limit=5)
- with tm.assert_produces_warning(PerformanceWarning):
- expected = sdf[-2:].reindex(index).fillna(method='backfill')
- expected = expected.to_dense()
- expected.values[:3] = np.nan
- expected = expected.to_sparse()
- tm.assert_frame_equal(result, expected)
- def test_rename(self, float_frame):
- result = float_frame.rename(index=str)
- expected = SparseDataFrame(float_frame.values,
- index=float_frame.index.strftime(
- "%Y-%m-%d %H:%M:%S"),
- columns=list('ABCD'))
- tm.assert_sp_frame_equal(result, expected)
- result = float_frame.rename(columns=lambda x: '%s%d' % (x, 1))
- data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
- 'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
- 'C1': np.arange(10, dtype=np.float64),
- 'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
- expected = SparseDataFrame(data, index=float_frame.index)
- tm.assert_sp_frame_equal(result, expected)
- def test_corr(self, float_frame):
- res = float_frame.corr()
- # XXX: this stays sparse
- tm.assert_frame_equal(res, float_frame.to_dense().corr().to_sparse())
- def test_describe(self, float_frame):
- float_frame['foo'] = np.nan
- float_frame.get_dtype_counts()
- str(float_frame)
- desc = float_frame.describe() # noqa
- def test_join(self, float_frame):
- left = float_frame.loc[:, ['A', 'B']]
- right = float_frame.loc[:, ['C', 'D']]
- joined = left.join(right)
- tm.assert_sp_frame_equal(joined, float_frame, exact_indices=False)
- right = float_frame.loc[:, ['B', 'D']]
- pytest.raises(Exception, left.join, right)
- with pytest.raises(ValueError, match='Other Series must have a name'):
- float_frame.join(Series(
- np.random.randn(len(float_frame)), index=float_frame.index))
- def test_reindex(self, float_frame, float_frame_int_kind,
- float_frame_fill0, float_frame_fill2):
- def _check_frame(frame):
- index = frame.index
- sidx = index[::2]
- sidx2 = index[:5] # noqa
- sparse_result = frame.reindex(sidx)
- dense_result = frame.to_dense().reindex(sidx)
- tm.assert_frame_equal(sparse_result.to_dense(), dense_result)
- tm.assert_frame_equal(frame.reindex(list(sidx)).to_dense(),
- dense_result)
- sparse_result2 = sparse_result.reindex(index)
- dense_result2 = dense_result.reindex(index)
- tm.assert_frame_equal(sparse_result2.to_dense(), dense_result2)
- # propagate CORRECT fill value
- tm.assert_almost_equal(sparse_result.default_fill_value,
- frame.default_fill_value)
- tm.assert_almost_equal(sparse_result['A'].fill_value,
- frame['A'].fill_value)
- # length zero
- length_zero = frame.reindex([])
- assert len(length_zero) == 0
- assert len(length_zero.columns) == len(frame.columns)
- assert len(length_zero['A']) == 0
- # frame being reindexed has length zero
- length_n = length_zero.reindex(index)
- assert len(length_n) == len(frame)
- assert len(length_n.columns) == len(frame.columns)
- assert len(length_n['A']) == len(frame)
- # reindex columns
- reindexed = frame.reindex(columns=['A', 'B', 'Z'])
- assert len(reindexed.columns) == 3
- tm.assert_almost_equal(reindexed['Z'].fill_value,
- frame.default_fill_value)
- assert np.isnan(reindexed['Z'].sp_values).all()
- _check_frame(float_frame)
- _check_frame(float_frame_int_kind)
- _check_frame(float_frame_fill0)
- _check_frame(float_frame_fill2)
- # with copy=False
- reindexed = float_frame.reindex(float_frame.index, copy=False)
- reindexed['F'] = reindexed['A']
- assert 'F' in float_frame
- reindexed = float_frame.reindex(float_frame.index)
- reindexed['G'] = reindexed['A']
- assert 'G' not in float_frame
- def test_reindex_fill_value(self, float_frame_fill0,
- float_frame_fill0_dense):
- rng = bdate_range('20110110', periods=20)
- result = float_frame_fill0.reindex(rng, fill_value=0)
- exp = float_frame_fill0_dense.reindex(rng, fill_value=0)
- exp = exp.to_sparse(float_frame_fill0.default_fill_value)
- tm.assert_sp_frame_equal(result, exp)
- def test_reindex_method(self):
- sparse = SparseDataFrame(data=[[11., 12., 14.],
- [21., 22., 24.],
- [41., 42., 44.]],
- index=[1, 2, 4],
- columns=[1, 2, 4],
- dtype=float)
- # Over indices
- # default method
- result = sparse.reindex(index=range(6))
- expected = SparseDataFrame(data=[[nan, nan, nan],
- [11., 12., 14.],
- [21., 22., 24.],
- [nan, nan, nan],
- [41., 42., 44.],
- [nan, nan, nan]],
- index=range(6),
- columns=[1, 2, 4],
- dtype=float)
- tm.assert_sp_frame_equal(result, expected)
- # method='bfill'
- result = sparse.reindex(index=range(6), method='bfill')
- expected = SparseDataFrame(data=[[11., 12., 14.],
- [11., 12., 14.],
- [21., 22., 24.],
- [41., 42., 44.],
- [41., 42., 44.],
- [nan, nan, nan]],
- index=range(6),
- columns=[1, 2, 4],
- dtype=float)
- tm.assert_sp_frame_equal(result, expected)
- # method='ffill'
- result = sparse.reindex(index=range(6), method='ffill')
- expected = SparseDataFrame(data=[[nan, nan, nan],
- [11., 12., 14.],
- [21., 22., 24.],
- [21., 22., 24.],
- [41., 42., 44.],
- [41., 42., 44.]],
- index=range(6),
- columns=[1, 2, 4],
- dtype=float)
- tm.assert_sp_frame_equal(result, expected)
- # Over columns
- # default method
- result = sparse.reindex(columns=range(6))
- expected = SparseDataFrame(data=[[nan, 11., 12., nan, 14., nan],
- [nan, 21., 22., nan, 24., nan],
- [nan, 41., 42., nan, 44., nan]],
- index=[1, 2, 4],
- columns=range(6),
- dtype=float)
- tm.assert_sp_frame_equal(result, expected)
- # method='bfill'
- with pytest.raises(NotImplementedError):
- sparse.reindex(columns=range(6), method='bfill')
- # method='ffill'
- with pytest.raises(NotImplementedError):
- sparse.reindex(columns=range(6), method='ffill')
- def test_take(self, float_frame):
- result = float_frame.take([1, 0, 2], axis=1)
- expected = float_frame.reindex(columns=['B', 'A', 'C'])
- tm.assert_sp_frame_equal(result, expected)
- def test_to_dense(self, float_frame, float_frame_int_kind,
- float_frame_dense,
- float_frame_fill0, float_frame_fill0_dense,
- float_frame_fill2, float_frame_fill2_dense):
- def _check(frame, orig):
- dense_dm = frame.to_dense()
- # Sparse[float] != float
- tm.assert_frame_equal(frame, dense_dm, check_dtype=False)
- tm.assert_frame_equal(dense_dm, orig, check_dtype=False)
- _check(float_frame, float_frame_dense)
- _check(float_frame_int_kind, float_frame_dense)
- _check(float_frame_fill0, float_frame_fill0_dense)
- _check(float_frame_fill2, float_frame_fill2_dense)
- @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
- def test_stack_sparse_frame(self, float_frame, float_frame_int_kind,
- float_frame_fill0, float_frame_fill2):
- def _check(frame):
- dense_frame = frame.to_dense() # noqa
- wp = Panel.from_dict({'foo': frame})
- from_dense_lp = wp.to_frame()
- from_sparse_lp = spf.stack_sparse_frame(frame)
- tm.assert_numpy_array_equal(from_dense_lp.values,
- from_sparse_lp.values)
- _check(float_frame)
- _check(float_frame_int_kind)
- # for now
- pytest.raises(Exception, _check, float_frame_fill0)
- pytest.raises(Exception, _check, float_frame_fill2)
- def test_transpose(self, float_frame, float_frame_int_kind,
- float_frame_dense,
- float_frame_fill0, float_frame_fill0_dense,
- float_frame_fill2, float_frame_fill2_dense):
- def _check(frame, orig):
- transposed = frame.T
- untransposed = transposed.T
- tm.assert_sp_frame_equal(frame, untransposed)
- tm.assert_frame_equal(frame.T.to_dense(), orig.T)
- tm.assert_frame_equal(frame.T.T.to_dense(), orig.T.T)
- tm.assert_sp_frame_equal(frame, frame.T.T, exact_indices=False)
- _check(float_frame, float_frame_dense)
- _check(float_frame_int_kind, float_frame_dense)
- _check(float_frame_fill0, float_frame_fill0_dense)
- _check(float_frame_fill2, float_frame_fill2_dense)
- def test_shift(self, float_frame, float_frame_int_kind, float_frame_dense,
- float_frame_fill0, float_frame_fill0_dense,
- float_frame_fill2, float_frame_fill2_dense):
- def _check(frame, orig):
- shifted = frame.shift(0)
- exp = orig.shift(0)
- tm.assert_frame_equal(shifted.to_dense(), exp)
- shifted = frame.shift(1)
- exp = orig.shift(1)
- tm.assert_frame_equal(shifted.to_dense(), exp)
- shifted = frame.shift(-2)
- exp = orig.shift(-2)
- tm.assert_frame_equal(shifted.to_dense(), exp)
- shifted = frame.shift(2, freq='B')
- exp = orig.shift(2, freq='B')
- exp = exp.to_sparse(frame.default_fill_value,
- kind=frame.default_kind)
- tm.assert_frame_equal(shifted, exp)
- shifted = frame.shift(2, freq=BDay())
- exp = orig.shift(2, freq=BDay())
- exp = exp.to_sparse(frame.default_fill_value,
- kind=frame.default_kind)
- tm.assert_frame_equal(shifted, exp)
- _check(float_frame, float_frame_dense)
- _check(float_frame_int_kind, float_frame_dense)
- _check(float_frame_fill0, float_frame_fill0_dense)
- _check(float_frame_fill2, float_frame_fill2_dense)
- def test_count(self, float_frame):
- dense_result = float_frame.to_dense().count()
- result = float_frame.count()
- tm.assert_series_equal(result.to_dense(), dense_result)
- result = float_frame.count(axis=None)
- tm.assert_series_equal(result.to_dense(), dense_result)
- result = float_frame.count(axis=0)
- tm.assert_series_equal(result.to_dense(), dense_result)
- result = float_frame.count(axis=1)
- dense_result = float_frame.to_dense().count(axis=1)
- # win32 don't check dtype
- tm.assert_series_equal(result, dense_result, check_dtype=False)
- def test_numpy_transpose(self):
- sdf = SparseDataFrame([1, 2, 3], index=[1, 2, 3], columns=['a'])
- result = np.transpose(np.transpose(sdf))
- tm.assert_sp_frame_equal(result, sdf)
- msg = "the 'axes' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- np.transpose(sdf, axes=1)
- def test_combine_first(self, float_frame):
- df = float_frame
- result = df[::2].combine_first(df)
- expected = df[::2].to_dense().combine_first(df.to_dense())
- expected = expected.to_sparse(fill_value=df.default_fill_value)
- tm.assert_sp_frame_equal(result, expected)
- @pytest.mark.xfail(reason="No longer supported.")
- def test_combine_first_with_dense(self):
- # We could support this if we allow
- # pd.core.dtypes.cast.find_common_type to special case SparseDtype
- # but I don't think that's worth it.
- df = self.frame
- result = df[::2].combine_first(df.to_dense())
- expected = df[::2].to_dense().combine_first(df.to_dense())
- expected = expected.to_sparse(fill_value=df.default_fill_value)
- tm.assert_sp_frame_equal(result, expected)
- def test_combine_add(self, float_frame):
- df = float_frame.to_dense()
- df2 = df.copy()
- df2['C'][:3] = np.nan
- df['A'][:3] = 5.7
- result = df.to_sparse().add(df2.to_sparse(), fill_value=0)
- expected = df.add(df2, fill_value=0).to_sparse()
- tm.assert_sp_frame_equal(result, expected)
- def test_isin(self):
- sparse_df = DataFrame({'flag': [1., 0., 1.]}).to_sparse(fill_value=0.)
- xp = sparse_df[sparse_df.flag == 1.]
- rs = sparse_df[sparse_df.flag.isin([1.])]
- tm.assert_frame_equal(xp, rs)
- def test_sparse_pow_issue(self):
- # 2220
- df = SparseDataFrame({'A': [1.1, 3.3], 'B': [2.5, -3.9]})
- # note : no error without nan
- df = SparseDataFrame({'A': [nan, 0, 1]})
- # note that 2 ** df works fine, also df ** 1
- result = 1 ** df
- r1 = result.take([0], 1)['A']
- r2 = result['A']
- assert len(r2.sp_values) == len(r1.sp_values)
- def test_as_blocks(self):
- df = SparseDataFrame({'A': [1.1, 3.3], 'B': [nan, -3.9]},
- dtype='float64')
- # deprecated 0.21.0
- with tm.assert_produces_warning(FutureWarning,
- check_stacklevel=False):
- df_blocks = df.blocks
- assert list(df_blocks.keys()) == ['Sparse[float64, nan]']
- tm.assert_frame_equal(df_blocks['Sparse[float64, nan]'], df)
- @pytest.mark.xfail(reason='nan column names in _init_dict problematic '
- '(GH#16894)')
- def test_nan_columnname(self):
- # GH 8822
- nan_colname = DataFrame(Series(1.0, index=[0]), columns=[nan])
- nan_colname_sparse = nan_colname.to_sparse()
- assert np.isnan(nan_colname_sparse.columns[0])
- def test_isna(self):
- # GH 8276
- df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan],
- 'B': [0, np.nan, np.nan, 2, np.nan]})
- res = df.isna()
- exp = pd.SparseDataFrame({'A': [True, True, False, False, True],
- 'B': [False, True, True, False, True]},
- default_fill_value=True)
- exp._default_fill_value = np.nan
- tm.assert_sp_frame_equal(res, exp)
- # if fill_value is not nan, True can be included in sp_values
- df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan],
- 'B': [0, np.nan, 0, 2, np.nan]},
- default_fill_value=0.)
- res = df.isna()
- assert isinstance(res, pd.SparseDataFrame)
- exp = pd.DataFrame({'A': [False, False, False, False, True],
- 'B': [False, True, False, False, True]})
- tm.assert_frame_equal(res.to_dense(), exp)
- def test_notna(self):
- # GH 8276
- df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan],
- 'B': [0, np.nan, np.nan, 2, np.nan]})
- res = df.notna()
- exp = pd.SparseDataFrame({'A': [False, False, True, True, False],
- 'B': [True, False, False, True, False]},
- default_fill_value=False)
- exp._default_fill_value = np.nan
- tm.assert_sp_frame_equal(res, exp)
- # if fill_value is not nan, True can be included in sp_values
- df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan],
- 'B': [0, np.nan, 0, 2, np.nan]},
- default_fill_value=0.)
- res = df.notna()
- assert isinstance(res, pd.SparseDataFrame)
- exp = pd.DataFrame({'A': [True, True, True, True, False],
- 'B': [True, False, True, True, False]})
- tm.assert_frame_equal(res.to_dense(), exp)
- class TestSparseDataFrameArithmetic(object):
- def test_numeric_op_scalar(self):
- df = pd.DataFrame({'A': [nan, nan, 0, 1, ],
- 'B': [0, 1, 2, nan],
- 'C': [1., 2., 3., 4.],
- 'D': [nan, nan, nan, nan]})
- sparse = df.to_sparse()
- tm.assert_sp_frame_equal(sparse + 1, (df + 1).to_sparse())
- def test_comparison_op_scalar(self):
- # GH 13001
- df = pd.DataFrame({'A': [nan, nan, 0, 1, ],
- 'B': [0, 1, 2, nan],
- 'C': [1., 2., 3., 4.],
- 'D': [nan, nan, nan, nan]})
- sparse = df.to_sparse()
- # comparison changes internal repr, compare with dense
- res = sparse > 1
- assert isinstance(res, pd.SparseDataFrame)
- tm.assert_frame_equal(res.to_dense(), df > 1)
- res = sparse != 0
- assert isinstance(res, pd.SparseDataFrame)
- tm.assert_frame_equal(res.to_dense(), df != 0)
- class TestSparseDataFrameAnalytics(object):
- def test_cumsum(self, float_frame):
- expected = SparseDataFrame(float_frame.to_dense().cumsum())
- result = float_frame.cumsum()
- tm.assert_sp_frame_equal(result, expected)
- result = float_frame.cumsum(axis=None)
- tm.assert_sp_frame_equal(result, expected)
- result = float_frame.cumsum(axis=0)
- tm.assert_sp_frame_equal(result, expected)
- def test_numpy_cumsum(self, float_frame):
- result = np.cumsum(float_frame)
- expected = SparseDataFrame(float_frame.to_dense().cumsum())
- tm.assert_sp_frame_equal(result, expected)
- msg = "the 'dtype' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- np.cumsum(float_frame, dtype=np.int64)
- msg = "the 'out' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- np.cumsum(float_frame, out=result)
- def test_numpy_func_call(self, float_frame):
- # no exception should be raised even though
- # numpy passes in 'axis=None' or `axis=-1'
- funcs = ['sum', 'cumsum', 'var',
- 'mean', 'prod', 'cumprod',
- 'std', 'min', 'max']
- for func in funcs:
- getattr(np, func)(float_frame)
- @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)')
- def test_quantile(self):
- # GH 17386
- data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
- q = 0.1
- sparse_df = SparseDataFrame(data)
- result = sparse_df.quantile(q)
- dense_df = DataFrame(data)
- dense_expected = dense_df.quantile(q)
- sparse_expected = SparseSeries(dense_expected)
- tm.assert_series_equal(result, dense_expected)
- tm.assert_sp_series_equal(result, sparse_expected)
- @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)')
- def test_quantile_multi(self):
- # GH 17386
- data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
- q = [0.1, 0.5]
- sparse_df = SparseDataFrame(data)
- result = sparse_df.quantile(q)
- dense_df = DataFrame(data)
- dense_expected = dense_df.quantile(q)
- sparse_expected = SparseDataFrame(dense_expected)
- tm.assert_frame_equal(result, dense_expected)
- tm.assert_sp_frame_equal(result, sparse_expected)
- def test_assign_with_sparse_frame(self):
- # GH 19163
- df = pd.DataFrame({"a": [1, 2, 3]})
- res = df.to_sparse(fill_value=False).assign(newcol=False)
- exp = df.assign(newcol=False).to_sparse(fill_value=False)
- tm.assert_sp_frame_equal(res, exp)
- for column in res.columns:
- assert type(res[column]) is SparseSeries
- @pytest.mark.parametrize("inplace", [True, False])
- @pytest.mark.parametrize("how", ["all", "any"])
- def test_dropna(self, inplace, how):
- # Tests regression #21172.
- expected = pd.SparseDataFrame({"F2": [0, 1]})
- input_df = pd.SparseDataFrame(
- {"F1": [float('nan'), float('nan')], "F2": [0, 1]}
- )
- result_df = input_df.dropna(axis=1, inplace=inplace, how=how)
- if inplace:
- result_df = input_df
- tm.assert_sp_frame_equal(expected, result_df)
|