diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index cdba53662e6fa..276ccbdc76fdd 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -25,7 +25,6 @@ is_platform_windows, ) from pandas.errors import AbstractMethodError -from pandas.util._decorators import doc from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import maybe_downcast_to_dtype @@ -169,8 +168,15 @@ def _cast_pointwise_result(self, values) -> ArrayLike: return result @classmethod - @doc(ExtensionArray._empty) def _empty(cls, shape: Shape, dtype: ExtensionDtype) -> Self: + """ + Create an ExtensionArray with the given shape and dtype. + + See also + -------- + ExtensionDtype.empty + ExtensionDtype.empty is the 'official' public version of this API. + """ dtype = cast(BaseMaskedDtype, dtype) values: np.ndarray = np.empty(shape, dtype=dtype.type) values.fill(dtype._internal_fill_value) @@ -252,8 +258,44 @@ def _pad_or_backfill( new_values = self return new_values - @doc(ExtensionArray.fillna) def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self: + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, array-like + If a scalar value is passed it is used to fill all missing values. + Alternatively, an array-like "value" can be given. It's expected + that the array-like have the same length as 'self'. + limit : int, default None + The maximum number of entries where NA values will be filled. + copy : bool, default True + Whether to make a copy of the data before filling. If False, then + the original should be modified and no new memory should be allocated. + For ExtensionArray subclasses that cannot do this, it is at the + author's discretion whether to ignore "copy=False" or to raise. + + Returns + ------- + ExtensionArray + With NA/NaN filled. + + See Also + -------- + api.extensions.ExtensionArray.dropna : Return ExtensionArray without + NA values. + api.extensions.ExtensionArray.isna : A 1-D array indicating if + each value is missing. + + Examples + -------- + >>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan]) + >>> arr.fillna(0) + + [0, 0, 2, 3, 0, 0] + Length: 6, dtype: Int64 + """ mask = self._mask if limit is not None and limit < len(self): modify = mask.cumsum() > limit @@ -548,8 +590,30 @@ def to_numpy( data = self._data.astype(dtype, copy=copy) return data - @doc(ExtensionArray.tolist) def tolist(self) -> list: + """ + Return a list of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + + Returns + ------- + list + Python list of values in array. + + See Also + -------- + Index.to_list: Return a list of the values in the Index. + Series.to_list: Return a list of the values in the Series. + + Examples + -------- + >>> arr = pd.array([1, 2, 3]) + >>> arr.tolist() + [1, 2, 3] + """ if self.ndim > 1: return [x.tolist() for x in self] dtype = None if self._hasna else self._data.dtype @@ -1075,10 +1139,37 @@ def _rank( return FloatingArray(result, mask=mask) - @doc(ExtensionArray.duplicated) def duplicated( self, keep: Literal["first", "last", False] = "first" ) -> npt.NDArray[np.bool_]: + """ + Return boolean ndarray denoting duplicate values. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + - ``first`` : Mark duplicates as ``True`` except for the first occurrence. + - ``last`` : Mark duplicates as ``True`` except for the last occurrence. + - False : Mark all duplicates as ``True``. + + Returns + ------- + ndarray[bool] + With true in indices where elements are duplicated and false otherwise. + + See Also + -------- + DataFrame.duplicated : Return boolean Series denoting + duplicate rows. + Series.duplicated : Indicate duplicate Series values. + api.extensions.ExtensionArray.unique : Compute the ExtensionArray + of unique values. + + Examples + -------- + >>> pd.array([1, 1, 2, 3, 3], dtype="Int64").duplicated() + array([False, True, False, False, True]) + """ values = self._data mask = self._mask return algos.duplicated(values, keep=keep, mask=mask) @@ -1094,13 +1185,56 @@ def unique(self) -> Self: uniques, mask = algos.unique_with_mask(self._data, self._mask) return self._simple_new(uniques, mask) - @doc(ExtensionArray.searchsorted) def searchsorted( self, value: NumpyValueArrayLike | ExtensionArray, side: Literal["left", "right"] = "left", sorter: NumpySorter | None = None, ) -> npt.NDArray[np.intp] | np.intp: + """ + Find indices where elements should be inserted to maintain order. + + Find the indices into a sorted array `self` (a) such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `self` would be preserved. + + Assuming that `self` is sorted: + + ====== ================================ + `side` returned index `i` satisfies + ====== ================================ + left ``self[i-1] < value <= self[i]`` + right ``self[i-1] <= value < self[i]`` + ====== ================================ + + Parameters + ---------- + value : array-like, list or scalar + Value(s) to insert into `self`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array-like, optional + Optional array of integer indices that sort array a into ascending + order. They are typically the result of argsort. + + Returns + ------- + array of ints or int + If value is array-like, array of insertion points. + If value is scalar, a single integer. + + See Also + -------- + numpy.searchsorted : Similar method from NumPy. + + Examples + -------- + >>> arr = pd.array([1, 2, 3, 5]) + >>> arr.searchsorted([4]) + array([3]) + """ if self._hasna: raise ValueError( "searchsorted requires array to be sorted, which is impossible " @@ -1111,11 +1245,56 @@ def searchsorted( # Base class searchsorted would cast to object, which is *much* slower. return self._data.searchsorted(value, side=side, sorter=sorter) - @doc(ExtensionArray.factorize) def factorize( self, use_na_sentinel: bool = True, ) -> tuple[np.ndarray, ExtensionArray]: + """ + Encode the extension array as an enumerated type. + + Parameters + ---------- + use_na_sentinel : bool, default True + If True, the sentinel -1 will be used for NaN values. If False, + NaN values will be encoded as non-negative integers and will not drop the + NaN from the uniques of the values. + + .. versionadded:: 1.5.0 + + Returns + ------- + codes : ndarray + An integer NumPy array that's an indexer into the original + ExtensionArray. + uniques : ExtensionArray + An ExtensionArray containing the unique values of `self`. + + .. note:: + + uniques will *not* contain an entry for the NA value of + the ExtensionArray if there are any missing values present + in `self`. + + See Also + -------- + factorize : Top-level factorize method that dispatches here. + + Notes + ----- + :meth:`pandas.factorize` offers a `sort` keyword as well. + + Examples + -------- + >>> idx1 = pd.PeriodIndex( + ... ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], + ... freq="M", + ... ) + >>> arr, idx = idx1.factorize() + >>> arr + array([0, 0, 1, 1, 2, 2]) + >>> idx + PeriodIndex(['2014-01', '2014-02', '2014-03'], dtype='period[M]') + """ arr = self._data mask = self._mask @@ -1148,8 +1327,38 @@ def factorize( return codes, uniques_ea - @doc(ExtensionArray._values_for_argsort) def _values_for_argsort(self) -> np.ndarray: + """ + Return values for sorting. + + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + + See Also + -------- + ExtensionArray.argsort : Return the indices that would sort this array. + + Notes + ----- + The caller is responsible for *not* modifying these values in-place, so + it is safe for implementers to give views on ``self``. + + Functions that use this (e.g. ``ExtensionArray.argsort``) should ignore + entries with missing values in the original array (according to + ``self.isna()``). This means that the corresponding entries in the returned + array don't need to be modified to sort correctly. + + Examples + -------- + In most cases, this is the underlying Numpy array of the ``ExtensionArray``: + + >>> arr = pd.array([1, 2, 3]) + >>> arr._values_for_argsort() + array([1, 2, 3]) + """ return self._data def value_counts(self, dropna: bool = True) -> Series: @@ -1198,8 +1407,42 @@ def _mode(self, dropna: bool = True) -> Self: result = type(self)(result, res_mask) return result[result.argsort()] - @doc(ExtensionArray.equals) def equals(self, other) -> bool: + """ + Return if another array is equivalent to this array. + + Equivalent means that both arrays have the same shape and dtype, and + all values compare equal. Missing values in the same location are + considered equal (in contrast with normal equality). + + Parameters + ---------- + other : ExtensionArray + Array to compare to this Array. + + Returns + ------- + boolean + Whether the arrays are equivalent. + + See Also + -------- + numpy.array_equal : Equivalent method for numpy array. + Series.equals : Equivalent method for Series. + DataFrame.equals : Equivalent method for DataFrame. + + Examples + -------- + >>> arr1 = pd.array([1, 2, np.nan]) + >>> arr2 = pd.array([1, 2, np.nan]) + >>> arr1.equals(arr2) + True + + >>> arr1 = pd.array([1, 3, np.nan]) + >>> arr2 = pd.array([1, 2, np.nan]) + >>> arr1.equals(arr2) + False + """ if type(self) != type(other): return False if other.dtype != self.dtype: