1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120 |
- """An interface for extending pandas with custom arrays.
- .. warning::
- This is an experimental API and subject to breaking changes
- without warning.
- """
- import operator
- import numpy as np
- from pandas.compat import PY3, set_function_name
- from pandas.compat.numpy import function as nv
- from pandas.errors import AbstractMethodError
- from pandas.util._decorators import Appender, Substitution
- from pandas.core.dtypes.common import is_list_like
- from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
- from pandas.core.dtypes.missing import isna
- from pandas.core import ops
- _not_implemented_message = "{} does not implement {}."
- _extension_array_shared_docs = dict()
- class ExtensionArray(object):
- """
- Abstract base class for custom 1-D array types.
- pandas will recognize instances of this class as proper arrays
- with a custom type and will not attempt to coerce them to objects. They
- may be stored directly inside a :class:`DataFrame` or :class:`Series`.
- .. versionadded:: 0.23.0
- Notes
- -----
- The interface includes the following abstract methods that must be
- implemented by subclasses:
- * _from_sequence
- * _from_factorized
- * __getitem__
- * __len__
- * dtype
- * nbytes
- * isna
- * take
- * copy
- * _concat_same_type
- A default repr displaying the type, (truncated) data, length,
- and dtype is provided. It can be customized or replaced by
- by overriding:
- * __repr__ : A default repr for the ExtensionArray.
- * _formatter : Print scalars inside a Series or DataFrame.
- Some methods require casting the ExtensionArray to an ndarray of Python
- objects with ``self.astype(object)``, which may be expensive. When
- performance is a concern, we highly recommend overriding the following
- methods:
- * fillna
- * dropna
- * unique
- * factorize / _values_for_factorize
- * argsort / _values_for_argsort
- * searchsorted
- The remaining methods implemented on this class should be performant,
- as they only compose abstract methods. Still, a more efficient
- implementation may be available, and these methods can be overridden.
- One can implement methods to handle array reductions.
- * _reduce
- One can implement methods to handle parsing from strings that will be used
- in methods such as ``pandas.io.parsers.read_csv``.
- * _from_sequence_of_strings
- This class does not inherit from 'abc.ABCMeta' for performance reasons.
- Methods and properties required by the interface raise
- ``pandas.errors.AbstractMethodError`` and no ``register`` method is
- provided for registering virtual subclasses.
- ExtensionArrays are limited to 1 dimension.
- They may be backed by none, one, or many NumPy arrays. For example,
- ``pandas.Categorical`` is an extension array backed by two arrays,
- one for codes and one for categories. An array of IPv6 address may
- be backed by a NumPy structured array with two fields, one for the
- lower 64 bits and one for the upper 64 bits. Or they may be backed
- by some other storage type, like Python lists. Pandas makes no
- assumptions on how the data are stored, just that it can be converted
- to a NumPy array.
- The ExtensionArray interface does not impose any rules on how this data
- is stored. However, currently, the backing data cannot be stored in
- attributes called ``.values`` or ``._values`` to ensure full compatibility
- with pandas internals. But other names as ``.data``, ``._data``,
- ``._items``, ... can be freely used.
- """
- # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
- # Don't override this.
- _typ = 'extension'
- # ------------------------------------------------------------------------
- # Constructors
- # ------------------------------------------------------------------------
- @classmethod
- def _from_sequence(cls, scalars, dtype=None, copy=False):
- """
- Construct a new ExtensionArray from a sequence of scalars.
- Parameters
- ----------
- scalars : Sequence
- Each element will be an instance of the scalar type for this
- array, ``cls.dtype.type``.
- dtype : dtype, optional
- Construct for this particular dtype. This should be a Dtype
- compatible with the ExtensionArray.
- copy : boolean, default False
- If True, copy the underlying data.
- Returns
- -------
- ExtensionArray
- """
- raise AbstractMethodError(cls)
- @classmethod
- def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
- """Construct a new ExtensionArray from a sequence of strings.
- .. versionadded:: 0.24.0
- Parameters
- ----------
- strings : Sequence
- Each element will be an instance of the scalar type for this
- array, ``cls.dtype.type``.
- dtype : dtype, optional
- Construct for this particular dtype. This should be a Dtype
- compatible with the ExtensionArray.
- copy : boolean, default False
- If True, copy the underlying data.
- Returns
- -------
- ExtensionArray
- """
- raise AbstractMethodError(cls)
- @classmethod
- def _from_factorized(cls, values, original):
- """
- Reconstruct an ExtensionArray after factorization.
- Parameters
- ----------
- values : ndarray
- An integer ndarray with the factorized values.
- original : ExtensionArray
- The original ExtensionArray that factorize was called on.
- See Also
- --------
- pandas.factorize
- ExtensionArray.factorize
- """
- raise AbstractMethodError(cls)
- # ------------------------------------------------------------------------
- # Must be a Sequence
- # ------------------------------------------------------------------------
- def __getitem__(self, item):
- # type (Any) -> Any
- """
- Select a subset of self.
- Parameters
- ----------
- item : int, slice, or ndarray
- * int: The position in 'self' to get.
- * slice: A slice object, where 'start', 'stop', and 'step' are
- integers or None
- * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
- Returns
- -------
- item : scalar or ExtensionArray
- Notes
- -----
- For scalar ``item``, return a scalar value suitable for the array's
- type. This should be an instance of ``self.dtype.type``.
- For slice ``key``, return an instance of ``ExtensionArray``, even
- if the slice is length 0 or 1.
- For a boolean mask, return an instance of ``ExtensionArray``, filtered
- to the values where ``item`` is True.
- """
- raise AbstractMethodError(self)
- def __setitem__(self, key, value):
- # type: (Union[int, np.ndarray], Any) -> None
- """
- Set one or more values inplace.
- This method is not required to satisfy the pandas extension array
- interface.
- Parameters
- ----------
- key : int, ndarray, or slice
- When called from, e.g. ``Series.__setitem__``, ``key`` will be
- one of
- * scalar int
- * ndarray of integers.
- * boolean ndarray
- * slice object
- value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
- value or values to be set of ``key``.
- Returns
- -------
- None
- """
- # Some notes to the ExtensionArray implementor who may have ended up
- # here. While this method is not required for the interface, if you
- # *do* choose to implement __setitem__, then some semantics should be
- # observed:
- #
- # * Setting multiple values : ExtensionArrays should support setting
- # multiple values at once, 'key' will be a sequence of integers and
- # 'value' will be a same-length sequence.
- #
- # * Broadcasting : For a sequence 'key' and a scalar 'value',
- # each position in 'key' should be set to 'value'.
- #
- # * Coercion : Most users will expect basic coercion to work. For
- # example, a string like '2018-01-01' is coerced to a datetime
- # when setting on a datetime64ns array. In general, if the
- # __init__ method coerces that value, then so should __setitem__
- # Note, also, that Series/DataFrame.where internally use __setitem__
- # on a copy of the data.
- raise NotImplementedError(_not_implemented_message.format(
- type(self), '__setitem__')
- )
- def __len__(self):
- # type: () -> int
- """
- Length of this array
- Returns
- -------
- length : int
- """
- raise AbstractMethodError(self)
- def __iter__(self):
- """
- Iterate over elements of the array.
- """
- # This needs to be implemented so that pandas recognizes extension
- # arrays as list-like. The default implementation makes successive
- # calls to ``__getitem__``, which may be slower than necessary.
- for i in range(len(self)):
- yield self[i]
- # ------------------------------------------------------------------------
- # Required attributes
- # ------------------------------------------------------------------------
- @property
- def dtype(self):
- # type: () -> ExtensionDtype
- """
- An instance of 'ExtensionDtype'.
- """
- raise AbstractMethodError(self)
- @property
- def shape(self):
- # type: () -> Tuple[int, ...]
- """
- Return a tuple of the array dimensions.
- """
- return (len(self),)
- @property
- def ndim(self):
- # type: () -> int
- """
- Extension Arrays are only allowed to be 1-dimensional.
- """
- return 1
- @property
- def nbytes(self):
- # type: () -> int
- """
- The number of bytes needed to store this object in memory.
- """
- # If this is expensive to compute, return an approximate lower bound
- # on the number of bytes needed.
- raise AbstractMethodError(self)
- # ------------------------------------------------------------------------
- # Additional Methods
- # ------------------------------------------------------------------------
- def astype(self, dtype, copy=True):
- """
- Cast to a NumPy array with 'dtype'.
- Parameters
- ----------
- dtype : str or dtype
- Typecode or data-type to which the array is cast.
- copy : bool, default True
- Whether to copy the data, even if not necessary. If False,
- a copy is made only if the old dtype does not match the
- new dtype.
- Returns
- -------
- array : ndarray
- NumPy ndarray with 'dtype' for its dtype.
- """
- return np.array(self, dtype=dtype, copy=copy)
- def isna(self):
- # type: () -> Union[ExtensionArray, np.ndarray]
- """
- A 1-D array indicating if each value is missing.
- Returns
- -------
- na_values : Union[np.ndarray, ExtensionArray]
- In most cases, this should return a NumPy ndarray. For
- exceptional cases like ``SparseArray``, where returning
- an ndarray would be expensive, an ExtensionArray may be
- returned.
- Notes
- -----
- If returning an ExtensionArray, then
- * ``na_values._is_boolean`` should be True
- * `na_values` should implement :func:`ExtensionArray._reduce`
- * ``na_values.any`` and ``na_values.all`` should be implemented
- """
- raise AbstractMethodError(self)
- def _values_for_argsort(self):
- # type: () -> ndarray
- """
- Return values for sorting.
- Returns
- -------
- ndarray
- The transformed values should maintain the ordering between values
- within the array.
- See Also
- --------
- ExtensionArray.argsort
- """
- # Note: this is used in `ExtensionArray.argsort`.
- return np.array(self)
- def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
- """
- Return the indices that would sort this array.
- Parameters
- ----------
- ascending : bool, default True
- Whether the indices should result in an ascending
- or descending sort.
- kind : {'quicksort', 'mergesort', 'heapsort'}, optional
- Sorting algorithm.
- *args, **kwargs:
- passed through to :func:`numpy.argsort`.
- Returns
- -------
- index_array : ndarray
- Array of indices that sort ``self``.
- See Also
- --------
- numpy.argsort : Sorting implementation used internally.
- """
- # Implementor note: You have two places to override the behavior of
- # argsort.
- # 1. _values_for_argsort : construct the values passed to np.argsort
- # 2. argsort : total control over sorting.
- ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
- values = self._values_for_argsort()
- result = np.argsort(values, kind=kind, **kwargs)
- if not ascending:
- result = result[::-1]
- return result
- def fillna(self, value=None, method=None, limit=None):
- """
- Fill NA/NaN values using the specified method.
- Parameters
- ----------
- value : scalar, array-like
- If a scalar value is passed it is used to fill all missing values.
- Alternatively, an array-like 'value' can be given. It's expected
- that the array-like have the same length as 'self'.
- method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
- Method to use for filling holes in reindexed Series
- pad / ffill: propagate last valid observation forward to next valid
- backfill / bfill: use NEXT valid observation to fill gap
- limit : int, default None
- If method is specified, this is the maximum number of consecutive
- NaN values to forward/backward fill. In other words, if there is
- a gap with more than this number of consecutive NaNs, it will only
- be partially filled. If method is not specified, this is the
- maximum number of entries along the entire axis where NaNs will be
- filled.
- Returns
- -------
- filled : ExtensionArray with NA/NaN filled
- """
- from pandas.api.types import is_array_like
- from pandas.util._validators import validate_fillna_kwargs
- from pandas.core.missing import pad_1d, backfill_1d
- value, method = validate_fillna_kwargs(value, method)
- mask = self.isna()
- if is_array_like(value):
- if len(value) != len(self):
- raise ValueError("Length of 'value' does not match. Got ({}) "
- " expected {}".format(len(value), len(self)))
- value = value[mask]
- if mask.any():
- if method is not None:
- func = pad_1d if method == 'pad' else backfill_1d
- new_values = func(self.astype(object), limit=limit,
- mask=mask)
- new_values = self._from_sequence(new_values, dtype=self.dtype)
- else:
- # fill with value
- new_values = self.copy()
- new_values[mask] = value
- else:
- new_values = self.copy()
- return new_values
- def dropna(self):
- """
- Return ExtensionArray without NA values
- Returns
- -------
- valid : ExtensionArray
- """
- return self[~self.isna()]
- def shift(self, periods=1, fill_value=None):
- # type: (int, object) -> ExtensionArray
- """
- Shift values by desired number.
- Newly introduced missing values are filled with
- ``self.dtype.na_value``.
- .. versionadded:: 0.24.0
- Parameters
- ----------
- periods : int, default 1
- The number of periods to shift. Negative values are allowed
- for shifting backwards.
- fill_value : object, optional
- The scalar value to use for newly introduced missing values.
- The default is ``self.dtype.na_value``
- .. versionadded:: 0.24.0
- Returns
- -------
- shifted : ExtensionArray
- Notes
- -----
- If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
- returned.
- If ``periods > len(self)``, then an array of size
- len(self) is returned, with all values filled with
- ``self.dtype.na_value``.
- """
- # Note: this implementation assumes that `self.dtype.na_value` can be
- # stored in an instance of your ExtensionArray with `self.dtype`.
- if not len(self) or periods == 0:
- return self.copy()
- if isna(fill_value):
- fill_value = self.dtype.na_value
- empty = self._from_sequence(
- [fill_value] * min(abs(periods), len(self)),
- dtype=self.dtype
- )
- if periods > 0:
- a = empty
- b = self[:-periods]
- else:
- a = self[abs(periods):]
- b = empty
- return self._concat_same_type([a, b])
- def unique(self):
- """
- Compute the ExtensionArray of unique values.
- Returns
- -------
- uniques : ExtensionArray
- """
- from pandas import unique
- uniques = unique(self.astype(object))
- return self._from_sequence(uniques, dtype=self.dtype)
- def searchsorted(self, value, side="left", sorter=None):
- """
- Find indices where elements should be inserted to maintain order.
- .. versionadded:: 0.24.0
- Find the indices into a sorted array `self` (a) such that, if the
- corresponding elements in `v` were inserted before the indices, the
- order of `self` would be preserved.
- Assuming that `a` is sorted:
- ====== ============================
- `side` returned index `i` satisfies
- ====== ============================
- left ``self[i-1] < v <= self[i]``
- right ``self[i-1] <= v < self[i]``
- ====== ============================
- Parameters
- ----------
- value : array_like
- Values to insert into `self`.
- side : {'left', 'right'}, optional
- If 'left', the index of the first suitable location found is given.
- If 'right', return the last such index. If there is no suitable
- index, return either 0 or N (where N is the length of `self`).
- sorter : 1-D array_like, optional
- Optional array of integer indices that sort array a into ascending
- order. They are typically the result of argsort.
- Returns
- -------
- indices : array of ints
- Array of insertion points with the same shape as `value`.
- See Also
- --------
- numpy.searchsorted : Similar method from NumPy.
- """
- # Note: the base tests provided by pandas only test the basics.
- # We do not test
- # 1. Values outside the range of the `data_for_sorting` fixture
- # 2. Values between the values in the `data_for_sorting` fixture
- # 3. Missing values.
- arr = self.astype(object)
- return arr.searchsorted(value, side=side, sorter=sorter)
- def _values_for_factorize(self):
- # type: () -> Tuple[ndarray, Any]
- """
- Return an array and missing value suitable for factorization.
- Returns
- -------
- values : ndarray
- An array suitable for factorization. This should maintain order
- and be a supported dtype (Float64, Int64, UInt64, String, Object).
- By default, the extension array is cast to object dtype.
- na_value : object
- The value in `values` to consider missing. This will be treated
- as NA in the factorization routines, so it will be coded as
- `na_sentinal` and not included in `uniques`. By default,
- ``np.nan`` is used.
- Notes
- -----
- The values returned by this method are also used in
- :func:`pandas.util.hash_pandas_object`.
- """
- return self.astype(object), np.nan
- def factorize(self, na_sentinel=-1):
- # type: (int) -> Tuple[ndarray, ExtensionArray]
- """
- Encode the extension array as an enumerated type.
- Parameters
- ----------
- na_sentinel : int, default -1
- Value to use in the `labels` array to indicate missing values.
- Returns
- -------
- labels : ndarray
- An integer NumPy array that's an indexer into the original
- ExtensionArray.
- uniques : ExtensionArray
- An ExtensionArray containing the unique values of `self`.
- .. note::
- uniques will *not* contain an entry for the NA value of
- the ExtensionArray if there are any missing values present
- in `self`.
- See Also
- --------
- pandas.factorize : Top-level factorize method that dispatches here.
- Notes
- -----
- :meth:`pandas.factorize` offers a `sort` keyword as well.
- """
- # Impelmentor note: There are two ways to override the behavior of
- # pandas.factorize
- # 1. _values_for_factorize and _from_factorize.
- # Specify the values passed to pandas' internal factorization
- # routines, and how to convert from those values back to the
- # original ExtensionArray.
- # 2. ExtensionArray.factorize.
- # Complete control over factorization.
- from pandas.core.algorithms import _factorize_array
- arr, na_value = self._values_for_factorize()
- labels, uniques = _factorize_array(arr, na_sentinel=na_sentinel,
- na_value=na_value)
- uniques = self._from_factorized(uniques, self)
- return labels, uniques
- _extension_array_shared_docs['repeat'] = """
- Repeat elements of a %(klass)s.
- Returns a new %(klass)s where each element of the current %(klass)s
- is repeated consecutively a given number of times.
- Parameters
- ----------
- repeats : int or array of ints
- The number of repetitions for each element. This should be a
- non-negative integer. Repeating 0 times will return an empty
- %(klass)s.
- axis : None
- Must be ``None``. Has no effect but is accepted for compatibility
- with numpy.
- Returns
- -------
- repeated_array : %(klass)s
- Newly created %(klass)s with repeated elements.
- See Also
- --------
- Series.repeat : Equivalent function for Series.
- Index.repeat : Equivalent function for Index.
- numpy.repeat : Similar method for :class:`numpy.ndarray`.
- ExtensionArray.take : Take arbitrary positions.
- Examples
- --------
- >>> cat = pd.Categorical(['a', 'b', 'c'])
- >>> cat
- [a, b, c]
- Categories (3, object): [a, b, c]
- >>> cat.repeat(2)
- [a, a, b, b, c, c]
- Categories (3, object): [a, b, c]
- >>> cat.repeat([1, 2, 3])
- [a, b, b, c, c, c]
- Categories (3, object): [a, b, c]
- """
- @Substitution(klass='ExtensionArray')
- @Appender(_extension_array_shared_docs['repeat'])
- def repeat(self, repeats, axis=None):
- nv.validate_repeat(tuple(), dict(axis=axis))
- ind = np.arange(len(self)).repeat(repeats)
- return self.take(ind)
- # ------------------------------------------------------------------------
- # Indexing methods
- # ------------------------------------------------------------------------
- def take(self, indices, allow_fill=False, fill_value=None):
- # type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray
- """
- Take elements from an array.
- Parameters
- ----------
- indices : sequence of integers
- Indices to be taken.
- allow_fill : bool, default False
- How to handle negative values in `indices`.
- * False: negative values in `indices` indicate positional indices
- from the right (the default). This is similar to
- :func:`numpy.take`.
- * True: negative values in `indices` indicate
- missing values. These values are set to `fill_value`. Any other
- other negative values raise a ``ValueError``.
- fill_value : any, optional
- Fill value to use for NA-indices when `allow_fill` is True.
- This may be ``None``, in which case the default NA value for
- the type, ``self.dtype.na_value``, is used.
- For many ExtensionArrays, there will be two representations of
- `fill_value`: a user-facing "boxed" scalar, and a low-level
- physical NA value. `fill_value` should be the user-facing version,
- and the implementation should handle translating that to the
- physical version for processing the take if necessary.
- Returns
- -------
- ExtensionArray
- Raises
- ------
- IndexError
- When the indices are out of bounds for the array.
- ValueError
- When `indices` contains negative values other than ``-1``
- and `allow_fill` is True.
- Notes
- -----
- ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
- ``iloc``, when `indices` is a sequence of values. Additionally,
- it's called by :meth:`Series.reindex`, or any other method
- that causes realignment, with a `fill_value`.
- See Also
- --------
- numpy.take
- pandas.api.extensions.take
- Examples
- --------
- Here's an example implementation, which relies on casting the
- extension array to object dtype. This uses the helper method
- :func:`pandas.api.extensions.take`.
- .. code-block:: python
- def take(self, indices, allow_fill=False, fill_value=None):
- from pandas.core.algorithms import take
- # If the ExtensionArray is backed by an ndarray, then
- # just pass that here instead of coercing to object.
- data = self.astype(object)
- if allow_fill and fill_value is None:
- fill_value = self.dtype.na_value
- # fill value should always be translated from the scalar
- # type for the array, to the physical storage type for
- # the data, before passing to take.
- result = take(data, indices, fill_value=fill_value,
- allow_fill=allow_fill)
- return self._from_sequence(result, dtype=self.dtype)
- """
- # Implementer note: The `fill_value` parameter should be a user-facing
- # value, an instance of self.dtype.type. When passed `fill_value=None`,
- # the default of `self.dtype.na_value` should be used.
- # This may differ from the physical storage type your ExtensionArray
- # uses. In this case, your implementation is responsible for casting
- # the user-facing type to the storage type, before using
- # pandas.api.extensions.take
- raise AbstractMethodError(self)
- def copy(self, deep=False):
- # type: (bool) -> ExtensionArray
- """
- Return a copy of the array.
- Parameters
- ----------
- deep : bool, default False
- Also copy the underlying data backing this array.
- Returns
- -------
- ExtensionArray
- """
- raise AbstractMethodError(self)
- # ------------------------------------------------------------------------
- # Printing
- # ------------------------------------------------------------------------
- def __repr__(self):
- from pandas.io.formats.printing import format_object_summary
- template = (
- u'{class_name}'
- u'{data}\n'
- u'Length: {length}, dtype: {dtype}'
- )
- # the short repr has no trailing newline, while the truncated
- # repr does. So we include a newline in our template, and strip
- # any trailing newlines from format_object_summary
- data = format_object_summary(self, self._formatter(),
- indent_for_name=False).rstrip(', \n')
- class_name = u'<{}>\n'.format(self.__class__.__name__)
- return template.format(class_name=class_name, data=data,
- length=len(self),
- dtype=self.dtype)
- def _formatter(self, boxed=False):
- # type: (bool) -> Callable[[Any], Optional[str]]
- """Formatting function for scalar values.
- This is used in the default '__repr__'. The returned formatting
- function receives instances of your scalar type.
- Parameters
- ----------
- boxed: bool, default False
- An indicated for whether or not your array is being printed
- within a Series, DataFrame, or Index (True), or just by
- itself (False). This may be useful if you want scalar values
- to appear differently within a Series versus on its own (e.g.
- quoted or not).
- Returns
- -------
- Callable[[Any], str]
- A callable that gets instances of the scalar type and
- returns a string. By default, :func:`repr` is used
- when ``boxed=False`` and :func:`str` is used when
- ``boxed=True``.
- """
- if boxed:
- return str
- return repr
- def _formatting_values(self):
- # type: () -> np.ndarray
- # At the moment, this has to be an array since we use result.dtype
- """
- An array of values to be printed in, e.g. the Series repr
- .. deprecated:: 0.24.0
- Use :meth:`ExtensionArray._formatter` instead.
- """
- return np.array(self)
- # ------------------------------------------------------------------------
- # Reshaping
- # ------------------------------------------------------------------------
- @classmethod
- def _concat_same_type(cls, to_concat):
- # type: (Sequence[ExtensionArray]) -> ExtensionArray
- """
- Concatenate multiple array
- Parameters
- ----------
- to_concat : sequence of this type
- Returns
- -------
- ExtensionArray
- """
- raise AbstractMethodError(cls)
- # The _can_hold_na attribute is set to True so that pandas internals
- # will use the ExtensionDtype.na_value as the NA value in operations
- # such as take(), reindex(), shift(), etc. In addition, those results
- # will then be of the ExtensionArray subclass rather than an array
- # of objects
- _can_hold_na = True
- @property
- def _ndarray_values(self):
- # type: () -> np.ndarray
- """
- Internal pandas method for lossy conversion to a NumPy ndarray.
- This method is not part of the pandas interface.
- The expectation is that this is cheap to compute, and is primarily
- used for interacting with our indexers.
- """
- return np.array(self)
- def _reduce(self, name, skipna=True, **kwargs):
- """
- Return a scalar result of performing the reduction operation.
- Parameters
- ----------
- name : str
- Name of the function, supported values are:
- { any, all, min, max, sum, mean, median, prod,
- std, var, sem, kurt, skew }.
- skipna : bool, default True
- If True, skip NaN values.
- **kwargs
- Additional keyword arguments passed to the reduction function.
- Currently, `ddof` is the only supported kwarg.
- Returns
- -------
- scalar
- Raises
- ------
- TypeError : subclass does not define reductions
- """
- raise TypeError("cannot perform {name} with type {dtype}".format(
- name=name, dtype=self.dtype))
- class ExtensionOpsMixin(object):
- """
- A base class for linking the operators to their dunder names.
- .. note::
- You may want to set ``__array_priority__`` if you want your
- implementation to be called when involved in binary operations
- with NumPy arrays.
- """
- @classmethod
- def _add_arithmetic_ops(cls):
- cls.__add__ = cls._create_arithmetic_method(operator.add)
- cls.__radd__ = cls._create_arithmetic_method(ops.radd)
- cls.__sub__ = cls._create_arithmetic_method(operator.sub)
- cls.__rsub__ = cls._create_arithmetic_method(ops.rsub)
- cls.__mul__ = cls._create_arithmetic_method(operator.mul)
- cls.__rmul__ = cls._create_arithmetic_method(ops.rmul)
- cls.__pow__ = cls._create_arithmetic_method(operator.pow)
- cls.__rpow__ = cls._create_arithmetic_method(ops.rpow)
- cls.__mod__ = cls._create_arithmetic_method(operator.mod)
- cls.__rmod__ = cls._create_arithmetic_method(ops.rmod)
- cls.__floordiv__ = cls._create_arithmetic_method(operator.floordiv)
- cls.__rfloordiv__ = cls._create_arithmetic_method(ops.rfloordiv)
- cls.__truediv__ = cls._create_arithmetic_method(operator.truediv)
- cls.__rtruediv__ = cls._create_arithmetic_method(ops.rtruediv)
- if not PY3:
- cls.__div__ = cls._create_arithmetic_method(operator.div)
- cls.__rdiv__ = cls._create_arithmetic_method(ops.rdiv)
- cls.__divmod__ = cls._create_arithmetic_method(divmod)
- cls.__rdivmod__ = cls._create_arithmetic_method(ops.rdivmod)
- @classmethod
- def _add_comparison_ops(cls):
- cls.__eq__ = cls._create_comparison_method(operator.eq)
- cls.__ne__ = cls._create_comparison_method(operator.ne)
- cls.__lt__ = cls._create_comparison_method(operator.lt)
- cls.__gt__ = cls._create_comparison_method(operator.gt)
- cls.__le__ = cls._create_comparison_method(operator.le)
- cls.__ge__ = cls._create_comparison_method(operator.ge)
- class ExtensionScalarOpsMixin(ExtensionOpsMixin):
- """
- A mixin for defining ops on an ExtensionArray.
- It is assumed that the underlying scalar objects have the operators
- already defined.
- Notes
- -----
- If you have defined a subclass MyExtensionArray(ExtensionArray), then
- use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to
- get the arithmetic operators. After the definition of MyExtensionArray,
- insert the lines
- MyExtensionArray._add_arithmetic_ops()
- MyExtensionArray._add_comparison_ops()
- to link the operators to your class.
- .. note::
- You may want to set ``__array_priority__`` if you want your
- implementation to be called when involved in binary operations
- with NumPy arrays.
- """
- @classmethod
- def _create_method(cls, op, coerce_to_dtype=True):
- """
- A class method that returns a method that will correspond to an
- operator for an ExtensionArray subclass, by dispatching to the
- relevant operator defined on the individual elements of the
- ExtensionArray.
- Parameters
- ----------
- op : function
- An operator that takes arguments op(a, b)
- coerce_to_dtype : bool, default True
- boolean indicating whether to attempt to convert
- the result to the underlying ExtensionArray dtype.
- If it's not possible to create a new ExtensionArray with the
- values, an ndarray is returned instead.
- Returns
- -------
- Callable[[Any, Any], Union[ndarray, ExtensionArray]]
- A method that can be bound to a class. When used, the method
- receives the two arguments, one of which is the instance of
- this class, and should return an ExtensionArray or an ndarray.
- Returning an ndarray may be necessary when the result of the
- `op` cannot be stored in the ExtensionArray. The dtype of the
- ndarray uses NumPy's normal inference rules.
- Example
- -------
- Given an ExtensionArray subclass called MyExtensionArray, use
- >>> __add__ = cls._create_method(operator.add)
- in the class definition of MyExtensionArray to create the operator
- for addition, that will be based on the operator implementation
- of the underlying elements of the ExtensionArray
- """
- def _binop(self, other):
- def convert_values(param):
- if isinstance(param, ExtensionArray) or is_list_like(param):
- ovalues = param
- else: # Assume its an object
- ovalues = [param] * len(self)
- return ovalues
- if isinstance(other, (ABCSeries, ABCIndexClass)):
- # rely on pandas to unbox and dispatch to us
- return NotImplemented
- lvalues = self
- rvalues = convert_values(other)
- # If the operator is not defined for the underlying objects,
- # a TypeError should be raised
- res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
- def _maybe_convert(arr):
- if coerce_to_dtype:
- # https://github.com/pandas-dev/pandas/issues/22850
- # We catch all regular exceptions here, and fall back
- # to an ndarray.
- try:
- res = self._from_sequence(arr)
- except Exception:
- res = np.asarray(arr)
- else:
- res = np.asarray(arr)
- return res
- if op.__name__ in {'divmod', 'rdivmod'}:
- a, b = zip(*res)
- res = _maybe_convert(a), _maybe_convert(b)
- else:
- res = _maybe_convert(res)
- return res
- op_name = ops._get_op_name(op, True)
- return set_function_name(_binop, op_name, cls)
- @classmethod
- def _create_arithmetic_method(cls, op):
- return cls._create_method(op)
- @classmethod
- def _create_comparison_method(cls, op):
- return cls._create_method(op, coerce_to_dtype=False)
|