123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- import datetime
- import decimal
- import numpy as np
- import pytest
- import pytz
- from pandas.core.dtypes.dtypes import registry
- import pandas as pd
- from pandas.api.extensions import register_extension_dtype
- from pandas.core.arrays import PandasArray, integer_array, period_array
- from pandas.tests.extension.decimal import (
- DecimalArray, DecimalDtype, to_decimal)
- import pandas.util.testing as tm
- @pytest.mark.parametrize("data, dtype, expected", [
- # Basic NumPy defaults.
- ([1, 2], None, PandasArray(np.array([1, 2]))),
- ([1, 2], object, PandasArray(np.array([1, 2], dtype=object))),
- ([1, 2], np.dtype('float32'),
- PandasArray(np.array([1., 2.0], dtype=np.dtype('float32')))),
- (np.array([1, 2]), None, PandasArray(np.array([1, 2]))),
- # String alias passes through to NumPy
- ([1, 2], 'float32', PandasArray(np.array([1, 2], dtype='float32'))),
- # Period alias
- ([pd.Period('2000', 'D'), pd.Period('2001', 'D')], 'Period[D]',
- period_array(['2000', '2001'], freq='D')),
- # Period dtype
- ([pd.Period('2000', 'D')], pd.PeriodDtype('D'),
- period_array(['2000'], freq='D')),
- # Datetime (naive)
- ([1, 2], np.dtype('datetime64[ns]'),
- pd.arrays.DatetimeArray._from_sequence(
- np.array([1, 2], dtype='datetime64[ns]'))),
- (np.array([1, 2], dtype='datetime64[ns]'), None,
- pd.arrays.DatetimeArray._from_sequence(
- np.array([1, 2], dtype='datetime64[ns]'))),
- (pd.DatetimeIndex(['2000', '2001']), np.dtype('datetime64[ns]'),
- pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
- (pd.DatetimeIndex(['2000', '2001']), None,
- pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
- (['2000', '2001'], np.dtype('datetime64[ns]'),
- pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
- # Datetime (tz-aware)
- (['2000', '2001'], pd.DatetimeTZDtype(tz="CET"),
- pd.arrays.DatetimeArray._from_sequence(
- ['2000', '2001'], dtype=pd.DatetimeTZDtype(tz="CET"))),
- # Timedelta
- (['1H', '2H'], np.dtype('timedelta64[ns]'),
- pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
- (pd.TimedeltaIndex(['1H', '2H']), np.dtype('timedelta64[ns]'),
- pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
- (pd.TimedeltaIndex(['1H', '2H']), None,
- pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
- # Category
- (['a', 'b'], 'category', pd.Categorical(['a', 'b'])),
- (['a', 'b'], pd.CategoricalDtype(None, ordered=True),
- pd.Categorical(['a', 'b'], ordered=True)),
- # Interval
- ([pd.Interval(1, 2), pd.Interval(3, 4)], 'interval',
- pd.arrays.IntervalArray.from_tuples([(1, 2), (3, 4)])),
- # Sparse
- ([0, 1], 'Sparse[int64]', pd.SparseArray([0, 1], dtype='int64')),
- # IntegerNA
- ([1, None], 'Int16', integer_array([1, None], dtype='Int16')),
- (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
- # Index
- (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
- # Series[EA] returns the EA
- (pd.Series(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])),
- None,
- pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])),
- # "3rd party" EAs work
- ([decimal.Decimal(0), decimal.Decimal(1)], 'decimal', to_decimal([0, 1])),
- # pass an ExtensionArray, but a different dtype
- (period_array(['2000', '2001'], freq='D'),
- 'category',
- pd.Categorical([pd.Period('2000', 'D'), pd.Period('2001', 'D')])),
- ])
- def test_array(data, dtype, expected):
- result = pd.array(data, dtype=dtype)
- tm.assert_equal(result, expected)
- def test_array_copy():
- a = np.array([1, 2])
- # default is to copy
- b = pd.array(a)
- assert np.shares_memory(a, b._ndarray) is False
- # copy=True
- b = pd.array(a, copy=True)
- assert np.shares_memory(a, b._ndarray) is False
- # copy=False
- b = pd.array(a, copy=False)
- assert np.shares_memory(a, b._ndarray) is True
- cet = pytz.timezone("CET")
- @pytest.mark.parametrize('data, expected', [
- # period
- ([pd.Period("2000", "D"), pd.Period("2001", "D")],
- period_array(["2000", "2001"], freq="D")),
- # interval
- ([pd.Interval(0, 1), pd.Interval(1, 2)],
- pd.arrays.IntervalArray.from_breaks([0, 1, 2])),
- # datetime
- ([pd.Timestamp('2000',), pd.Timestamp('2001')],
- pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
- ([datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
- pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
- (np.array([1, 2], dtype='M8[ns]'),
- pd.arrays.DatetimeArray(np.array([1, 2], dtype='M8[ns]'))),
- (np.array([1, 2], dtype='M8[us]'),
- pd.arrays.DatetimeArray(np.array([1000, 2000], dtype='M8[ns]'))),
- # datetimetz
- ([pd.Timestamp('2000', tz='CET'), pd.Timestamp('2001', tz='CET')],
- pd.arrays.DatetimeArray._from_sequence(
- ['2000', '2001'], dtype=pd.DatetimeTZDtype(tz='CET'))),
- ([datetime.datetime(2000, 1, 1, tzinfo=cet),
- datetime.datetime(2001, 1, 1, tzinfo=cet)],
- pd.arrays.DatetimeArray._from_sequence(['2000', '2001'],
- tz=cet)),
- # timedelta
- ([pd.Timedelta('1H'), pd.Timedelta('2H')],
- pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
- (np.array([1, 2], dtype='m8[ns]'),
- pd.arrays.TimedeltaArray(np.array([1, 2], dtype='m8[ns]'))),
- (np.array([1, 2], dtype='m8[us]'),
- pd.arrays.TimedeltaArray(np.array([1000, 2000], dtype='m8[ns]'))),
- ])
- def test_array_inference(data, expected):
- result = pd.array(data)
- tm.assert_equal(result, expected)
- @pytest.mark.parametrize('data', [
- # mix of frequencies
- [pd.Period("2000", "D"), pd.Period("2001", "A")],
- # mix of closed
- [pd.Interval(0, 1, closed='left'), pd.Interval(1, 2, closed='right')],
- # Mix of timezones
- [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")],
- # Mix of tz-aware and tz-naive
- [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")],
- np.array([pd.Timestamp('2000'), pd.Timestamp('2000', tz='CET')]),
- ])
- def test_array_inference_fails(data):
- result = pd.array(data)
- expected = PandasArray(np.array(data, dtype=object))
- tm.assert_extension_array_equal(result, expected)
- @pytest.mark.parametrize("data", [
- np.array([[1, 2], [3, 4]]),
- [[1, 2], [3, 4]],
- ])
- def test_nd_raises(data):
- with pytest.raises(ValueError, match='PandasArray must be 1-dimensional'):
- pd.array(data)
- def test_scalar_raises():
- with pytest.raises(ValueError,
- match="Cannot pass scalar '1'"):
- pd.array(1)
- # ---------------------------------------------------------------------------
- # A couple dummy classes to ensure that Series and Indexes are unboxed before
- # getting to the EA classes.
- @register_extension_dtype
- class DecimalDtype2(DecimalDtype):
- name = 'decimal2'
- @classmethod
- def construct_array_type(cls):
- return DecimalArray2
- class DecimalArray2(DecimalArray):
- @classmethod
- def _from_sequence(cls, scalars, dtype=None, copy=False):
- if isinstance(scalars, (pd.Series, pd.Index)):
- raise TypeError
- return super(DecimalArray2, cls)._from_sequence(
- scalars, dtype=dtype, copy=copy
- )
- @pytest.mark.parametrize("box", [pd.Series, pd.Index])
- def test_array_unboxes(box):
- data = box([decimal.Decimal('1'), decimal.Decimal('2')])
- # make sure it works
- with pytest.raises(TypeError):
- DecimalArray2._from_sequence(data)
- result = pd.array(data, dtype='decimal2')
- expected = DecimalArray2._from_sequence(data.values)
- tm.assert_equal(result, expected)
- @pytest.fixture
- def registry_without_decimal():
- idx = registry.dtypes.index(DecimalDtype)
- registry.dtypes.pop(idx)
- yield
- registry.dtypes.append(DecimalDtype)
- def test_array_not_registered(registry_without_decimal):
- # check we aren't on it
- assert registry.find('decimal') is None
- data = [decimal.Decimal('1'), decimal.Decimal('2')]
- result = pd.array(data, dtype=DecimalDtype)
- expected = DecimalArray._from_sequence(data)
- tm.assert_equal(result, expected)
|