From 31e174f2a11e97dc58caa141b855a93a534d93ba Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 30 Oct 2025 18:57:16 +0000 Subject: [PATCH 1/5] DEPR: Deprecate DataFrame Interchange Protocol --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 17 +- pandas/core/interchange/from_dataframe.py | 24 ++- pandas/tests/interchange/test_impl.py | 146 +++++++++++------- .../interchange/test_spec_conformance.py | 31 ++-- 5 files changed, 153 insertions(+), 66 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 38755aef32b85..929d6dfacd4ed 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -738,6 +738,7 @@ Other Deprecations - Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`) - Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`) - Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`) +- Deprecated support for the Dataframe Interchange Protocol (:issue:`56732`) - Deprecated the 'inplace' keyword from :meth:`Resampler.interpolate`, as passing ``True`` raises ``AttributeError`` (:issue:`58690`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68ea6795d47dd..bee99afd7993a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -929,6 +929,14 @@ def __dataframe__( - converting to pandas: for pandas >= 2.0.3 - converting from pandas: for pandas >= 3.0.0 + .. deprecated:: 3.0.0 + + The Dataframe Interchange Protocol is deprecated. + For dataframe-agnostic code, you may want to look into: + + - `Arrow PyCapsule Interface `_ + - `Narwhals `_ + Parameters ---------- nan_as_null : bool, default False @@ -970,7 +978,14 @@ def __dataframe__( These methods (``column_names``, ``select_columns_by_name``) should work for any dataframe library which implements the interchange protocol. """ - + warnings.warn( + "The Dataframe Interchange Protocol is deprecated.\n" + "For dataframe-agnostic code, you may want to look into:\n" + "- Arrow PyCapsule Interface: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html\n" + "- Narwhals: https://github.com/narwhals-dev/narwhals\n", + Pandas4Warning, + stacklevel=find_stack_level(), + ) from pandas.core.interchange.dataframe import PandasDataFrameXchg return PandasDataFrameXchg(self, allow_copy=allow_copy) diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index bcbeb546f845c..04278c0e7856d 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -6,13 +6,16 @@ Any, overload, ) +import warnings import numpy as np from pandas._config import using_string_dtype from pandas.compat._optional import import_optional_dependency +from pandas.errors import Pandas4Warning from pandas.util._decorators import set_module +from pandas.util._exceptions import find_stack_level import pandas as pd from pandas.core.interchange.dataframe_protocol import ( @@ -47,6 +50,9 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame: From pandas 3.0 onwards, `from_dataframe` uses the PyCapsule Interface, only falling back to the interchange protocol if that fails. + From pandas 4.0 onwards, that fallback will no longer be available and only + the PyCapsule Interface will be used. + .. warning:: Due to severe implementation issues, we recommend only considering using the @@ -99,7 +105,14 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame: pa = import_optional_dependency("pyarrow", min_version="14.0.0") except ImportError: # fallback to _from_dataframe - pass + warnings.warn( + "Conversion using Arrow PyCapsule Interface failed due to " + "missing PyArrow>=14 dependency, falling back to (deprecated) " + "interchange protocol. We recommend that you install " + "PyArrow>=14.0.0.", + UserWarning, + stacklevel=find_stack_level(), + ) else: try: return pa.table(df).to_pandas(zero_copy_only=not allow_copy) @@ -109,6 +122,15 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame: if not hasattr(df, "__dataframe__"): raise ValueError("`df` does not support __dataframe__") + warnings.warn( + "The Dataframe Interchange Protocol is deprecated.\n" + "For dataframe-agnostic code, you may want to look into:\n" + "- Arrow PyCapsule Interface: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html\n" + "- Narwhals: https://github.com/narwhals-dev/narwhals\n", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + return _from_dataframe( df.__dataframe__(allow_copy=allow_copy), allow_copy=allow_copy ) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 5b7564e77d0ab..73147f14bbf92 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -31,7 +31,8 @@ def test_categorical_dtype(data): } df = pd.DataFrame({"A": (data_categorical[data[0]])}) - col = df.__dataframe__().get_column_by_name("A") + with tm.assert_produces_warning(match="Interchange"): + col = df.__dataframe__().get_column_by_name("A") assert col.dtype[0] == DtypeKind.CATEGORICAL assert col.null_count == 0 assert col.describe_null == (ColumnNullType.USE_SENTINEL, -1) @@ -44,7 +45,8 @@ def test_categorical_dtype(data): desc_cat["categories"]._col, pd.Series(["a", "d", "e", "s", "t"]) ) - tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) + with tm.assert_produces_warning(match="Interchange"): + tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) def test_categorical_pyarrow(): @@ -54,7 +56,8 @@ def test_categorical_pyarrow(): arr = ["Mon", "Tue", "Mon", "Wed", "Mon", "Thu", "Fri", "Sat", "Sun"] table = pa.table({"weekday": pa.array(arr).dictionary_encode()}) exchange_df = table.__dataframe__() - result = from_dataframe(exchange_df) + with tm.assert_produces_warning(match="Interchange"): + result = from_dataframe(exchange_df) weekday = pd.Categorical( arr, categories=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] ) @@ -72,7 +75,8 @@ def test_empty_categorical_pyarrow(): arr = [None] table = pa.table({"arr": pa.array(arr, "float64").dictionary_encode()}) exchange_df = table.__dataframe__() - result = pd.api.interchange.from_dataframe(exchange_df) + with tm.assert_produces_warning(match="Interchange"): + result = pd.api.interchange.from_dataframe(exchange_df) expected = pd.DataFrame({"arr": pd.Categorical([np.nan])}) tm.assert_frame_equal(result, expected) @@ -84,7 +88,8 @@ def test_large_string_pyarrow(): arr = ["Mon", "Tue"] table = pa.table({"weekday": pa.array(arr, "large_string")}) exchange_df = table.__dataframe__() - result = from_dataframe(exchange_df) + with tm.assert_produces_warning(match="Interchange"): + result = from_dataframe(exchange_df) expected = pd.DataFrame({"weekday": ["Mon", "Tue"]}) tm.assert_frame_equal(result, expected) @@ -110,12 +115,14 @@ def test_bitmasks_pyarrow(offset, length, expected_values): arr = [3.3, None, 2.1] table = pa.table({"arr": arr}).slice(offset, length) exchange_df = table.__dataframe__() - result = from_dataframe(exchange_df) + with tm.assert_produces_warning(match="Interchange"): + result = from_dataframe(exchange_df) expected = pd.DataFrame({"arr": expected_values}) tm.assert_frame_equal(result, expected) # check round-trip - assert pa.Table.equals(pa.interchange.from_dataframe(result), table) + with tm.assert_produces_warning(match="Interchange", check_stacklevel=False): + assert pa.Table.equals(pa.interchange.from_dataframe(result), table) @pytest.mark.parametrize( @@ -140,7 +147,8 @@ def test_dataframe(data): } df = pd.DataFrame(data) - df2 = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + df2 = df.__dataframe__() assert df2.num_columns() == NCOLS assert df2.num_rows() == NROWS @@ -150,8 +158,9 @@ def test_dataframe(data): indices = (0, 2) names = tuple(list(data.keys())[idx] for idx in indices) - result = from_dataframe(df2.select_columns(indices)) - expected = from_dataframe(df2.select_columns_by_name(names)) + with tm.assert_produces_warning(match="Interchange"): + result = from_dataframe(df2.select_columns(indices)) + expected = from_dataframe(df2.select_columns_by_name(names)) tm.assert_frame_equal(result, expected) assert isinstance(result.attrs["_INTERCHANGE_PROTOCOL_BUFFERS"], list) @@ -175,7 +184,8 @@ def test_missing_from_masked(): ] df.loc[null_idx, col] = None - df2 = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + df2 = df.__dataframe__() assert df2.get_column_by_name("x").null_count == dict_null["x"] assert df2.get_column_by_name("y").null_count == dict_null["y"] @@ -196,7 +206,8 @@ def test_missing_from_masked(): ) def test_mixed_data(data): df = pd.DataFrame(data) - df2 = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + df2 = df.__dataframe__() for col_name in df.columns: assert df2.get_column_by_name(col_name).null_count == 0 @@ -211,7 +222,8 @@ def test_mixed_missing(): } ) - df2 = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + df2 = df.__dataframe__() for col_name in df.columns: assert df2.get_column_by_name(col_name).null_count == 2 @@ -229,7 +241,8 @@ def test_string(): } test_str_data = string_data["separator data"] + [""] df = pd.DataFrame({"A": test_str_data}) - col = df.__dataframe__().get_column_by_name("A") + with tm.assert_produces_warning(match="Interchange"): + col = df.__dataframe__().get_column_by_name("A") assert col.size() == 6 assert col.null_count == 1 @@ -237,7 +250,8 @@ def test_string(): assert col.describe_null == (ColumnNullType.USE_BYTEMASK, 0) df_sliced = df[1:] - col = df_sliced.__dataframe__().get_column_by_name("A") + with tm.assert_produces_warning(match="Interchange"): + col = df_sliced.__dataframe__().get_column_by_name("A") assert col.size() == 5 assert col.null_count == 1 assert col.dtype[0] == DtypeKind.STRING @@ -246,27 +260,31 @@ def test_string(): def test_nonstring_object(): df = pd.DataFrame({"A": ["a", 10, 1.0, ()]}) - col = df.__dataframe__().get_column_by_name("A") + with tm.assert_produces_warning(match="Interchange"): + col = df.__dataframe__().get_column_by_name("A") with pytest.raises(NotImplementedError, match="not supported yet"): col.dtype def test_datetime(): df = pd.DataFrame({"A": [pd.Timestamp("2022-01-01"), pd.NaT]}) - col = df.__dataframe__().get_column_by_name("A") + with tm.assert_produces_warning(match="Interchange"): + col = df.__dataframe__().get_column_by_name("A") assert col.size() == 2 assert col.null_count == 1 assert col.dtype[0] == DtypeKind.DATETIME assert col.describe_null == (ColumnNullType.USE_SENTINEL, iNaT) - tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) + with tm.assert_produces_warning(match="Interchange"): + tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) def test_categorical_to_numpy_dlpack(): # https://github.com/pandas-dev/pandas/issues/48393 df = pd.DataFrame({"A": pd.Categorical(["a", "b", "a"])}) - col = df.__dataframe__().get_column_by_name("A") + with tm.assert_produces_warning(match="Interchange"): + col = df.__dataframe__().get_column_by_name("A") result = np.from_dlpack(col.get_buffers()["data"][0]) expected = np.array([0, 1, 0], dtype="int8") tm.assert_numpy_array_equal(result, expected) @@ -301,11 +319,15 @@ def test_multi_chunk_column() -> None: ser = pd.Series([1, 2, None], dtype="Int64[pyarrow]") df = pd.concat([ser, ser], ignore_index=True).to_frame("a") df_orig = df.copy() - with pytest.raises( - RuntimeError, match="Found multi-chunk pyarrow array, but `allow_copy` is False" - ): - pd.api.interchange.from_dataframe(df.__dataframe__(allow_copy=False)) - result = pd.api.interchange.from_dataframe(df.__dataframe__(allow_copy=True)) + + with tm.assert_produces_warning(match="Interchange"): + with pytest.raises( + RuntimeError, + match="Found multi-chunk pyarrow array, but `allow_copy` is False", + ): + pd.api.interchange.from_dataframe(df.__dataframe__(allow_copy=False)) + with tm.assert_produces_warning(match="Interchange"): + result = pd.api.interchange.from_dataframe(df.__dataframe__(allow_copy=True)) # Interchange protocol defaults to creating numpy-backed columns, so currently this # is 'float64'. expected = pd.DataFrame({"a": [1.0, 2.0, None, 1.0, 2.0, None]}, dtype="float64") @@ -334,8 +356,9 @@ def test_timestamp_ns_pyarrow(): name="col0", ).to_frame() - dfi = df.__dataframe__() - result = pd.api.interchange.from_dataframe(dfi)["col0"].item() + with tm.assert_produces_warning(match="Interchange"): + dfi = df.__dataframe__() + result = pd.api.interchange.from_dataframe(dfi)["col0"].item() expected = pd.Timestamp(**timestamp_args) assert result == expected @@ -348,7 +371,8 @@ def test_datetimetzdtype(tz, unit): pd.date_range("2018-01-01", periods=5, freq="D").tz_localize(tz).as_unit(unit) ) df = pd.DataFrame({"ts_tz": tz_data}) - tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) + with tm.assert_produces_warning(match="Interchange"): + tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) def test_interchange_from_non_pandas_tz_aware(request): @@ -370,7 +394,8 @@ def test_interchange_from_non_pandas_tz_aware(request): arr = pc.assume_timezone(arr, "Asia/Kathmandu") table = pa.table({"arr": arr}) exchange_df = table.__dataframe__() - result = from_dataframe(exchange_df) + with tm.assert_produces_warning(match="Interchange"): + result = from_dataframe(exchange_df) expected = pd.DataFrame( ["2020-01-01 00:00:00+05:45", "NaT", "2020-01-02 00:00:00+05:45"], @@ -382,8 +407,9 @@ def test_interchange_from_non_pandas_tz_aware(request): def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None: # https://github.com/pandas-dev/pandas/issues/54781 - df = pd.DataFrame({"a": ["foo", "bar"]}).__dataframe__() - interchange = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + df = pd.DataFrame({"a": ["foo", "bar"]}).__dataframe__() + interchange = df.__dataframe__() column = interchange.get_column_by_name("a") buffers = column.get_buffers() buffers_data = buffers["data"] @@ -398,14 +424,16 @@ def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None: column.get_buffers = lambda: buffers interchange.get_column_by_name = lambda _: column monkeypatch.setattr(df, "__dataframe__", lambda allow_copy: interchange) - pd.api.interchange.from_dataframe(df) + with tm.assert_produces_warning(match="Interchange"): + pd.api.interchange.from_dataframe(df) def test_empty_string_column(): # https://github.com/pandas-dev/pandas/issues/56703 df = pd.DataFrame({"a": []}, dtype=str) - df2 = df.__dataframe__() - result = pd.api.interchange.from_dataframe(df2) + with tm.assert_produces_warning(match="Interchange"): + df2 = df.__dataframe__() + result = pd.api.interchange.from_dataframe(df2) tm.assert_frame_equal(df, result) @@ -413,7 +441,8 @@ def test_large_string(): # GH#56702 pytest.importorskip("pyarrow") df = pd.DataFrame({"a": ["x"]}, dtype="large_string[pyarrow]") - result = pd.api.interchange.from_dataframe(df.__dataframe__()) + with tm.assert_produces_warning(match="Interchange"): + result = pd.api.interchange.from_dataframe(df.__dataframe__()) expected = pd.DataFrame({"a": ["x"]}, dtype="str") tm.assert_frame_equal(result, expected) @@ -421,25 +450,28 @@ def test_large_string(): def test_non_str_names(): # https://github.com/pandas-dev/pandas/issues/56701 df = pd.Series([1, 2, 3], name=0).to_frame() - names = df.__dataframe__().column_names() + with tm.assert_produces_warning(match="Interchange"): + names = df.__dataframe__().column_names() assert names == ["0"] def test_non_str_names_w_duplicates(): # https://github.com/pandas-dev/pandas/issues/56701 df = pd.DataFrame({"0": [1, 2, 3], 0: [4, 5, 6]}) - dfi = df.__dataframe__() - with pytest.raises( - TypeError, - match=( - "Expected a Series, got a DataFrame. This likely happened because you " - "called __dataframe__ on a DataFrame which, after converting column " - r"names to string, resulted in duplicated names: Index\(\['0', '0'\], " - r"dtype='(str|object)'\). Please rename these columns before using the " - "interchange protocol." - ), - ): - pd.api.interchange.from_dataframe(dfi, allow_copy=False) + with tm.assert_produces_warning(match="Interchange"): + dfi = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + with pytest.raises( + TypeError, + match=( + "Expected a Series, got a DataFrame. This likely happened because you " + "called __dataframe__ on a DataFrame which, after converting column " + r"names to string, resulted in duplicated names: Index\(\['0', '0'\], " + r"dtype='(str|object)'\). Please rename these columns before using the " + "interchange protocol." + ), + ): + pd.api.interchange.from_dataframe(dfi, allow_copy=False) @pytest.mark.parametrize( @@ -498,7 +530,8 @@ def test_pandas_nullable_with_missing_values( expected_dtype = pa.timestamp("us", "Asia/Kathmandu") df = pd.DataFrame({"a": data}, dtype=dtype) - result = pai.from_dataframe(df.__dataframe__())["a"] + with tm.assert_produces_warning(match="Interchange"): + result = pai.from_dataframe(df.__dataframe__())["a"] assert result.type == expected_dtype assert result[0].as_py() == data[0] assert result[1].as_py() == data[1] @@ -564,7 +597,8 @@ def test_pandas_nullable_without_missing_values( expected_dtype = pa.timestamp("us", "Asia/Kathmandu") df = pd.DataFrame({"a": data}, dtype=dtype) - result = pai.from_dataframe(df.__dataframe__())["a"] + with tm.assert_produces_warning(match="Interchange"): + result = pai.from_dataframe(df.__dataframe__())["a"] assert result.type == expected_dtype assert result[0].as_py() == data[0] assert result[1].as_py() == data[1] @@ -575,7 +609,8 @@ def test_string_validity_buffer() -> None: # https://github.com/pandas-dev/pandas/issues/57761 pytest.importorskip("pyarrow", "11.0.0") df = pd.DataFrame({"a": ["x"]}, dtype="large_string[pyarrow]") - result = df.__dataframe__().get_column_by_name("a").get_buffers()["validity"] + with tm.assert_produces_warning(match="Interchange"): + result = df.__dataframe__().get_column_by_name("a").get_buffers()["validity"] assert result is None @@ -583,7 +618,8 @@ def test_string_validity_buffer_no_missing() -> None: # https://github.com/pandas-dev/pandas/issues/57762 pytest.importorskip("pyarrow", "11.0.0") df = pd.DataFrame({"a": ["x", None]}, dtype="large_string[pyarrow]") - validity = df.__dataframe__().get_column_by_name("a").get_buffers()["validity"] + with tm.assert_produces_warning(match="Interchange"): + validity = df.__dataframe__().get_column_by_name("a").get_buffers()["validity"] assert validity is not None result = validity[1] expected = (DtypeKind.BOOL, 1, ArrowCTypes.BOOL, "=") @@ -593,8 +629,9 @@ def test_string_validity_buffer_no_missing() -> None: def test_empty_dataframe(): # https://github.com/pandas-dev/pandas/issues/56700 df = pd.DataFrame({"a": []}, dtype="int8") - dfi = df.__dataframe__() - result = pd.api.interchange.from_dataframe(dfi, allow_copy=False) + with tm.assert_produces_warning(match="Interchange"): + dfi = df.__dataframe__() + result = pd.api.interchange.from_dataframe(dfi, allow_copy=False) expected = pd.DataFrame({"a": []}, dtype="int8") tm.assert_frame_equal(result, expected) @@ -639,7 +676,8 @@ def test_buffer_dtype_categorical( ) -> None: # https://github.com/pandas-dev/pandas/issues/54781 df = pd.DataFrame({"data": data}) - dfi = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + dfi = df.__dataframe__() col = dfi.get_column_by_name("data") assert col.dtype == expected_dtype assert col.get_buffers()["data"][1] == expected_buffer_dtype diff --git a/pandas/tests/interchange/test_spec_conformance.py b/pandas/tests/interchange/test_spec_conformance.py index 55e42ed2023cd..04e19b290f886 100644 --- a/pandas/tests/interchange/test_spec_conformance.py +++ b/pandas/tests/interchange/test_spec_conformance.py @@ -9,6 +9,7 @@ import pytest import pandas as pd +import pandas._testing as tm @pytest.fixture @@ -32,7 +33,8 @@ def maker(dct, is_categorical=False): def test_only_one_dtype(test_data, df_from_dict): columns = list(test_data.keys()) df = df_from_dict(test_data) - dfX = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + dfX = df.__dataframe__() column_size = len(test_data[columns[0]]) for column in columns: @@ -54,7 +56,8 @@ def test_mixed_dtypes(df_from_dict): "f": ["a", "", "c"], # dtype kind STRING = 21 } ) - dfX = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + dfX = df.__dataframe__() # for meanings of dtype[0] see the spec; we cannot import the spec here as this # file is expected to be vendored *anywhere*; # values for dtype[0] are explained above @@ -74,7 +77,8 @@ def test_mixed_dtypes(df_from_dict): def test_na_float(df_from_dict): df = df_from_dict({"a": [1.0, math.nan, 2.0]}) - dfX = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + dfX = df.__dataframe__() colX = dfX.get_column_by_name("a") assert colX.null_count == 1 assert isinstance(colX.null_count, int) @@ -82,7 +86,8 @@ def test_na_float(df_from_dict): def test_noncategorical(df_from_dict): df = df_from_dict({"a": [1, 2, 3]}) - dfX = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + dfX = df.__dataframe__() colX = dfX.get_column_by_name("a") with pytest.raises(TypeError, match=".*categorical.*"): colX.describe_categorical @@ -94,7 +99,8 @@ def test_categorical(df_from_dict): is_categorical=True, ) - colX = df.__dataframe__().get_column_by_name("weekday") + with tm.assert_produces_warning(match="Interchange"): + colX = df.__dataframe__().get_column_by_name("weekday") categorical = colX.describe_categorical assert isinstance(categorical["is_ordered"], bool) assert isinstance(categorical["is_dictionary"], bool) @@ -104,7 +110,8 @@ def test_dataframe(df_from_dict): df = df_from_dict( {"x": [True, True, False], "y": [1, 2, 0], "z": [9.2, 10.5, 11.8]} ) - dfX = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + dfX = df.__dataframe__() assert dfX.num_columns() == 3 assert dfX.num_rows() == 3 @@ -118,7 +125,8 @@ def test_dataframe(df_from_dict): @pytest.mark.parametrize(["size", "n_chunks"], [(10, 3), (12, 3), (12, 5)]) def test_df_get_chunks(size, n_chunks, df_from_dict): df = df_from_dict({"x": list(range(size))}) - dfX = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + dfX = df.__dataframe__() chunks = list(dfX.get_chunks(n_chunks)) assert len(chunks) == n_chunks assert sum(chunk.num_rows() for chunk in chunks) == size @@ -127,7 +135,8 @@ def test_df_get_chunks(size, n_chunks, df_from_dict): @pytest.mark.parametrize(["size", "n_chunks"], [(10, 3), (12, 3), (12, 5)]) def test_column_get_chunks(size, n_chunks, df_from_dict): df = df_from_dict({"x": list(range(size))}) - dfX = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + dfX = df.__dataframe__() chunks = list(dfX.get_column(0).get_chunks(n_chunks)) assert len(chunks) == n_chunks assert sum(chunk.size() for chunk in chunks) == size @@ -135,7 +144,8 @@ def test_column_get_chunks(size, n_chunks, df_from_dict): def test_get_columns(df_from_dict): df = df_from_dict({"a": [0, 1], "b": [2.5, 3.5]}) - dfX = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + dfX = df.__dataframe__() for colX in dfX.get_columns(): assert colX.size() == 2 assert colX.num_chunks() == 1 @@ -148,7 +158,8 @@ def test_get_columns(df_from_dict): def test_buffer(df_from_dict): arr = [0, 1, -1] df = df_from_dict({"a": arr}) - dfX = df.__dataframe__() + with tm.assert_produces_warning(match="Interchange"): + dfX = df.__dataframe__() colX = dfX.get_column(0) bufX = colX.get_buffers() From 09f989328b3c682b0cd485a79a6cf9e1f03b1659 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 31 Oct 2025 09:07:03 +0000 Subject: [PATCH 2/5] docstring validation, fixup tests --- pandas/core/frame.py | 16 ++++++++-------- pandas/tests/interchange/test_impl.py | 12 +++++++++--- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bee99afd7993a..9d5b61d0d0197 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -916,6 +916,14 @@ def __dataframe__( """ Return the dataframe interchange object implementing the interchange protocol. + .. deprecated:: 3.0.0 + + The Dataframe Interchange Protocol is deprecated. + For dataframe-agnostic code, you may want to look into: + + - `Arrow PyCapsule Interface `_ + - `Narwhals `_ + .. note:: For new development, we highly recommend using the Arrow C Data Interface @@ -929,14 +937,6 @@ def __dataframe__( - converting to pandas: for pandas >= 2.0.3 - converting from pandas: for pandas >= 3.0.0 - .. deprecated:: 3.0.0 - - The Dataframe Interchange Protocol is deprecated. - For dataframe-agnostic code, you may want to look into: - - - `Arrow PyCapsule Interface `_ - - `Narwhals `_ - Parameters ---------- nan_as_null : bool, default False diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 73147f14bbf92..bb59024441f5b 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -94,7 +94,9 @@ def test_large_string_pyarrow(): tm.assert_frame_equal(result, expected) # check round-trip - assert pa.Table.equals(pa.interchange.from_dataframe(result), table) + # Don't check stacklevel as PyArrow calls the deprecated `__dataframe__` method. + with tm.assert_produces_warning(match="Interchange", check_stacklevel=False): + assert pa.Table.equals(pa.interchange.from_dataframe(result), table) @pytest.mark.parametrize( @@ -121,6 +123,7 @@ def test_bitmasks_pyarrow(offset, length, expected_values): tm.assert_frame_equal(result, expected) # check round-trip + # Don't check stacklevel as PyArrow calls the deprecated `__dataframe__` method. with tm.assert_produces_warning(match="Interchange", check_stacklevel=False): assert pa.Table.equals(pa.interchange.from_dataframe(result), table) @@ -297,7 +300,9 @@ def test_empty_pyarrow(data): from pyarrow.interchange import from_dataframe as pa_from_dataframe expected = pd.DataFrame(data) - arrow_df = pa_from_dataframe(expected) + # Don't check stacklevel as PyArrow calls the deprecated `__dataframe__` method. + with tm.assert_produces_warning(match="Interchange", check_stacklevel=False): + arrow_df = pa_from_dataframe(expected) result = from_dataframe(arrow_df) tm.assert_frame_equal(result, expected, check_column_type=False) @@ -441,7 +446,8 @@ def test_large_string(): # GH#56702 pytest.importorskip("pyarrow") df = pd.DataFrame({"a": ["x"]}, dtype="large_string[pyarrow]") - with tm.assert_produces_warning(match="Interchange"): + # Don't check stacklevel as PyArrow calls the deprecated `__dataframe__` method. + with tm.assert_produces_warning(match="Interchange", check_stacklevel=False): result = pd.api.interchange.from_dataframe(df.__dataframe__()) expected = pd.DataFrame({"a": ["x"]}, dtype="str") tm.assert_frame_equal(result, expected) From 82177f0709b788b6cd889c6f8ad986bbc528a322 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 31 Oct 2025 09:46:23 +0000 Subject: [PATCH 3/5] ignore deprecation warning in doctest --- pandas/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 82501cae4634d..06bd52b5fd24b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -141,6 +141,7 @@ def pytest_collection_modifyitems(items, config) -> None: ("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"), ("is_categorical_dtype", "is_categorical_dtype is deprecated"), ("is_sparse", "is_sparse is deprecated"), + ("DataFrame.__dataframe__", "Interchange Protocol is deprecated"), ("DataFrameGroupBy.fillna", "DataFrameGroupBy.fillna is deprecated"), ("DataFrameGroupBy.corrwith", "DataFrameGroupBy.corrwith is deprecated"), ("NDFrame.replace", "Series.replace without 'value'"), From fd47b4805802c37eb6121dff567408fb23e42ee2 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 31 Oct 2025 09:49:36 +0000 Subject: [PATCH 4/5] fixup minimum versions test --- pandas/tests/interchange/test_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index bb59024441f5b..3551cbc52b755 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -296,7 +296,7 @@ def test_categorical_to_numpy_dlpack(): @pytest.mark.parametrize("data", [{}, {"a": []}]) def test_empty_pyarrow(data): # GH 53155 - pytest.importorskip("pyarrow", "11.0.0") + pytest.importorskip("pyarrow", "14.0.0") from pyarrow.interchange import from_dataframe as pa_from_dataframe expected = pd.DataFrame(data) From 53bb0b9adf0795a25f09b6dc93efdf10bd095e5a Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 31 Oct 2025 11:33:54 +0000 Subject: [PATCH 5/5] from_dataframe doctest --- pandas/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 06bd52b5fd24b..7fe4ec7a5ee4f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -135,6 +135,7 @@ def pytest_collection_modifyitems(items, config) -> None: # Warnings from doctests that can be ignored; place reason in comment above. # Each entry specifies (path, message) - see the ignore_doctest_warning function ignored_doctest_warnings = [ + ("api.interchange.from_dataframe", ".*Interchange Protocol is deprecated"), ("is_int64_dtype", "is_int64_dtype is deprecated"), ("is_interval_dtype", "is_interval_dtype is deprecated"), ("is_period_dtype", "is_period_dtype is deprecated"),