diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index f20ca44728664..9d553fd714c1c 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -284,29 +284,424 @@ def _isna_recarray_dtype(values: np.rec.recarray) -> npt.NDArray[np.bool_]: return result -@overload -def notna(obj: Scalar | Pattern | NAType | NaTType) -> bool: ... +@set_module("pandas") +def notna(obj: Scalar | Pattern | NAType | NaTType) -> bool: + """ + Detect non-missing values for an array-like object. + This function takes a scalar or array-like object and indicates + whether values are valid (not missing, which is ``NaN`` in numeric + arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). -@overload + Parameters + ---------- + obj : array-like or object value + Object to check for *not* null or *non*-missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is valid. + + See Also + -------- + isna : Boolean inverse of pandas.notna. + Series.notna : Detect valid values in a Series. + DataFrame.notna : Detect valid values in a DataFrame. + Index.notna : Detect valid values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.notna("dog") + True + + >>> pd.notna(pd.NA) + False + + >>> pd.notna(np.nan) + False + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.notna(array) + array([[ True, False, True], + [ True, True, False]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[s]', freq=None) + >>> pd.notna(index) + array([ True, True, False, True]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.notna(df) + 0 1 2 + 0 True True True + 1 True False True + + >>> pd.notna(df[1]) + 0 True + 1 False + Name: 1, dtype: bool + """ + res = isna(obj) + if isinstance(res, bool): + return not res + return ~res + + +@set_module("pandas") def notna( obj: ArrayLike | Index | list, -) -> npt.NDArray[np.bool_]: ... +) -> npt.NDArray[np.bool_]: + """ + Detect non-missing values for an array-like object. + This function takes a scalar or array-like object and indicates + whether values are valid (not missing, which is ``NaN`` in numeric + arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). -@overload -def notna(obj: NDFrameT) -> NDFrameT: ... + Parameters + ---------- + obj : array-like or object value + Object to check for *not* null or *non*-missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is valid. + + See Also + -------- + isna : Boolean inverse of pandas.notna. + Series.notna : Detect valid values in a Series. + DataFrame.notna : Detect valid values in a DataFrame. + Index.notna : Detect valid values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.notna("dog") + True + + >>> pd.notna(pd.NA) + False + + >>> pd.notna(np.nan) + False + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.notna(array) + array([[ True, False, True], + [ True, True, False]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[s]', freq=None) + >>> pd.notna(index) + array([ True, True, False, True]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.notna(df) + 0 1 2 + 0 True True True + 1 True False True + + >>> pd.notna(df[1]) + 0 True + 1 False + Name: 1, dtype: bool + """ + res = isna(obj) + if isinstance(res, bool): + return not res + return ~res + + +@set_module("pandas") +def notna(obj: NDFrameT) -> NDFrameT: + """ + Detect non-missing values for an array-like object. + + This function takes a scalar or array-like object and indicates + whether values are valid (not missing, which is ``NaN`` in numeric + arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). + + Parameters + ---------- + obj : array-like or object value + Object to check for *not* null or *non*-missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is valid. + + See Also + -------- + isna : Boolean inverse of pandas.notna. + Series.notna : Detect valid values in a Series. + DataFrame.notna : Detect valid values in a DataFrame. + Index.notna : Detect valid values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.notna("dog") + True + + >>> pd.notna(pd.NA) + False + + >>> pd.notna(np.nan) + False + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.notna(array) + array([[ True, False, True], + [ True, True, False]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[s]', freq=None) + >>> pd.notna(index) + array([ True, True, False, True]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.notna(df) + 0 1 2 + 0 True True True + 1 True False True + + >>> pd.notna(df[1]) + 0 True + 1 False + Name: 1, dtype: bool + """ + res = isna(obj) + if isinstance(res, bool): + return not res + return ~res # handle unions -@overload +@set_module("pandas") def notna( obj: NDFrameT | ArrayLike | Index | list, -) -> NDFrameT | npt.NDArray[np.bool_]: ... +) -> NDFrameT | npt.NDArray[np.bool_]: + """ + Detect non-missing values for an array-like object. + This function takes a scalar or array-like object and indicates + whether values are valid (not missing, which is ``NaN`` in numeric + arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). -@overload -def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ... + Parameters + ---------- + obj : array-like or object value + Object to check for *not* null or *non*-missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is valid. + + See Also + -------- + isna : Boolean inverse of pandas.notna. + Series.notna : Detect valid values in a Series. + DataFrame.notna : Detect valid values in a DataFrame. + Index.notna : Detect valid values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.notna("dog") + True + + >>> pd.notna(pd.NA) + False + + >>> pd.notna(np.nan) + False + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.notna(array) + array([[ True, False, True], + [ True, True, False]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[s]', freq=None) + >>> pd.notna(index) + array([ True, True, False, True]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.notna(df) + 0 1 2 + 0 True True True + 1 True False True + + >>> pd.notna(df[1]) + 0 True + 1 False + Name: 1, dtype: bool + """ + res = isna(obj) + if isinstance(res, bool): + return not res + return ~res + + +@set_module("pandas") +def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: + """ + Detect non-missing values for an array-like object. + + This function takes a scalar or array-like object and indicates + whether values are valid (not missing, which is ``NaN`` in numeric + arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). + + Parameters + ---------- + obj : array-like or object value + Object to check for *not* null or *non*-missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is valid. + + See Also + -------- + isna : Boolean inverse of pandas.notna. + Series.notna : Detect valid values in a Series. + DataFrame.notna : Detect valid values in a DataFrame. + Index.notna : Detect valid values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.notna("dog") + True + + >>> pd.notna(pd.NA) + False + + >>> pd.notna(np.nan) + False + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.notna(array) + array([[ True, False, True], + [ True, True, False]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[s]', freq=None) + >>> pd.notna(index) + array([ True, True, False, True]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.notna(df) + 0 1 2 + 0 True True True + 1 True False True + + >>> pd.notna(df[1]) + 0 True + 1 False + Name: 1, dtype: bool + """ + res = isna(obj) + if isinstance(res, bool): + return not res + return ~res @set_module("pandas") diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index eb6773310da69..9cc6976a1a8b8 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2483,7 +2483,7 @@ def set_sticky( for i, level in enumerate(levels_): styles.append( { - "selector": f"thead tr:nth-child({level+1}) th", + "selector": f"thead tr:nth-child({level + 1}) th", "props": props + ( f"top:{i * pixel_size}px; height:{pixel_size}px; " @@ -2494,7 +2494,7 @@ def set_sticky( if not all(name is None for name in self.index.names): styles.append( { - "selector": f"thead tr:nth-child({obj.nlevels+1}) th", + "selector": f"thead tr:nth-child({obj.nlevels + 1}) th", "props": props + ( f"top:{(len(levels_)) * pixel_size}px; " @@ -2514,7 +2514,7 @@ def set_sticky( styles.extend( [ { - "selector": f"thead tr th:nth-child({level+1})", + "selector": f"thead tr th:nth-child({level + 1})", "props": props_ + "z-index:3 !important;", }, { @@ -3877,7 +3877,8 @@ def _validate_apply_axis_arg( ------- ndarray """ - dtype = {"dtype": dtype} if dtype else {} + dtype_kw = {"dtype": dtype} if dtype is not None else {} + # raise if input is wrong for axis: # raise if input is wrong for axis: if isinstance(arg, Series) and isinstance(data, DataFrame): raise ValueError( @@ -3890,17 +3891,36 @@ def _validate_apply_axis_arg( f"operations is a Series with 'axis in [0,1]'" ) if isinstance(arg, (Series, DataFrame)): # align indx / cols to data - arg = arg.reindex_like(data).to_numpy(**dtype) - else: - arg = np.asarray(arg, **dtype) - assert isinstance(arg, np.ndarray) # mypy requirement + # Memory-efficient: reindex_like then to_numpy only once + arg = arg.reindex_like(data).to_numpy(**dtype_kw) if arg.shape != data.shape: # check valid input raise ValueError( f"supplied '{arg_name}' is not correct shape for data over " f"selected 'axis': got {arg.shape}, " f"expected {data.shape}" ) - return arg + return arg + elif isinstance(arg, np.ndarray): + # already ndarray; no need to convert with np.asarray + if arg.shape != data.shape: + raise ValueError( + f"supplied '{arg_name}' is not correct shape for data over " + f"selected 'axis': got {arg.shape}, " + f"expected {data.shape}" + ) + if dtype is not None and arg.dtype != np.dtype(dtype): + arg = arg.astype(dtype, copy=False) + return arg + else: + # fast path for python Sequence + arr = np.asarray(arg, **dtype_kw) + if arr.shape != data.shape: + raise ValueError( + f"supplied '{arg_name}' is not correct shape for data over " + f"selected 'axis': got {arr.shape}, " + f"expected {data.shape}" + ) + return arr def _background_gradient( @@ -3989,11 +4009,15 @@ def _highlight_between( """ Return an array of css props based on condition of data values within given range. """ - if np.iterable(left) and not isinstance(left, str): - left = _validate_apply_axis_arg(left, "left", None, data) + # Efficient conversion and validation of bounds + left_array = None + right_array = None + if left is not None and np.iterable(left) and not isinstance(left, str): + left_array = _validate_apply_axis_arg(left, "left", None, data) + if right is not None and np.iterable(right) and not isinstance(right, str): + right_array = _validate_apply_axis_arg(right, "right", None, data) - if np.iterable(right) and not isinstance(right, str): - right = _validate_apply_axis_arg(right, "right", None, data) + # get ops with correct boundary attribution # get ops with correct boundary attribution if inclusive == "both": @@ -4010,28 +4034,26 @@ def _highlight_between( f"got {inclusive}" ) - g_left = ( - # error: Argument 2 to "ge" has incompatible type "Union[str, float, - # Period, Timedelta, Interval[Any], datetime64, timedelta64, datetime, - # Sequence[Any], ndarray[Any, Any], NDFrame]"; expected "Union - # [SupportsDunderLE, SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]" - ops[0](data, left) # type: ignore[arg-type] - if left is not None - else np.full(data.shape, True, dtype=bool) - ) + # Use direct masking; only convert pd objects to ndarrays if necessary + if left is not None: + g_left = ops[0](data, left_array if left_array is not None else left) + else: + g_left = np.full(data.shape, True, dtype=bool) + if isinstance(g_left, (DataFrame, Series)): g_left = g_left.where(pd.notna(g_left), False) - l_right = ( - # error: Argument 2 to "le" has incompatible type "Union[str, float, - # Period, Timedelta, Interval[Any], datetime64, timedelta64, datetime, - # Sequence[Any], ndarray[Any, Any], NDFrame]"; expected "Union - # [SupportsDunderLE, SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]" - ops[1](data, right) # type: ignore[arg-type] - if right is not None - else np.full(data.shape, True, dtype=bool) - ) + g_left = g_left.to_numpy(dtype=bool, copy=False) + + if right is not None: + l_right = ops[1](data, right_array if right_array is not None else right) + else: + l_right = np.full(data.shape, True, dtype=bool) + if isinstance(l_right, (DataFrame, Series)): l_right = l_right.where(pd.notna(l_right), False) + l_right = l_right.to_numpy(dtype=bool, copy=False) + + # Only perform a single np.where after all masks are computed as ndarrays return np.where(g_left & l_right, props, "") @@ -4109,8 +4131,10 @@ def css_bar(start: float, end: float, color: str) -> str: if end > start: cell_css += "background: linear-gradient(90deg," if start > 0: - cell_css += f" transparent {start*100:.1f}%, {color} {start*100:.1f}%," - cell_css += f" {color} {end*100:.1f}%, transparent {end*100:.1f}%)" + cell_css += ( + f" transparent {start * 100:.1f}%, {color} {start * 100:.1f}%," + ) + cell_css += f" {color} {end * 100:.1f}%, transparent {end * 100:.1f}%)" return cell_css def css_calc(x, left: float, right: float, align: str, color: str | list | tuple):