diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 83dd11dacb..a258c01195 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -383,9 +383,16 @@ def to_series( name = self.name if name is None else name if index is None: - return bigframes.series.Series(data=self, index=self, name=name) + return bigframes.series.Series( + data=self, index=self, name=name, session=self._session + ) else: - return bigframes.series.Series(data=self, index=Index(index), name=name) + return bigframes.series.Series( + data=self, + index=Index(index, session=self._session), + name=name, + session=self._session, + ) def get_level_values(self, level) -> Index: level_n = level if isinstance(level, int) else self.names.index(level) diff --git a/bigframes/core/indexes/multi.py b/bigframes/core/indexes/multi.py index a8b4b7dffe..a611442b88 100644 --- a/bigframes/core/indexes/multi.py +++ b/bigframes/core/indexes/multi.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import cast, Hashable, Iterable, Sequence +from typing import cast, Hashable, Iterable, Optional, Sequence, TYPE_CHECKING import bigframes_vendored.pandas.core.indexes.multi as vendored_pandas_multindex import pandas @@ -23,6 +23,9 @@ from bigframes.core import expression as ex from bigframes.core.indexes.base import Index +if TYPE_CHECKING: + import bigframes.session + class MultiIndex(Index, vendored_pandas_multindex.MultiIndex): __doc__ = vendored_pandas_multindex.MultiIndex.__doc__ @@ -33,10 +36,12 @@ def from_tuples( tuples: Iterable[tuple[Hashable, ...]], sortorder: int | None = None, names: Sequence[Hashable] | Hashable | None = None, + *, + session: Optional[bigframes.session.Session] = None, ) -> MultiIndex: pd_index = pandas.MultiIndex.from_tuples(tuples, sortorder, names) # Index.__new__ should detect multiple levels and properly create a multiindex - return cast(MultiIndex, Index(pd_index)) + return cast(MultiIndex, Index(pd_index, session=session)) @classmethod def from_arrays( @@ -44,10 +49,12 @@ def from_arrays( arrays, sortorder: int | None = None, names=None, + *, + session: Optional[bigframes.session.Session] = None, ) -> MultiIndex: pd_index = pandas.MultiIndex.from_arrays(arrays, sortorder, names) # Index.__new__ should detect multiple levels and properly create a multiindex - return cast(MultiIndex, Index(pd_index)) + return cast(MultiIndex, Index(pd_index, session=session)) def __eq__(self, other) -> Index: # type: ignore import bigframes.operations as ops @@ -71,3 +78,38 @@ def __eq__(self, other) -> Index: # type: ignore index_labels=[None], ) ) + + +class MultiIndexAccessor: + """Proxy to MultiIndex constructors to allow a session to be passed in.""" + + def __init__(self, session: bigframes.session.Session): + self._session = session + + def __call__(self, *args, **kwargs) -> MultiIndex: + """Construct a MultiIndex using the associated Session. + + See :class:`bigframes.pandas.MultiIndex`. + """ + return MultiIndex(*args, session=self._session, **kwargs) + + def from_arrays(self, *args, **kwargs) -> MultiIndex: + """Construct a MultiIndex using the associated Session. + + See :func:`bigframes.pandas.MultiIndex.from_arrays`. + """ + return MultiIndex.from_arrays(*args, session=self._session, **kwargs) + + def from_frame(self, *args, **kwargs) -> MultiIndex: + """Construct a MultiIndex using the associated Session. + + See :func:`bigframes.pandas.MultiIndex.from_frame`. + """ + return cast(MultiIndex, MultiIndex.from_frame(*args, **kwargs)) + + def from_tuples(self, *args, **kwargs) -> MultiIndex: + """Construct a MultiIndex using the associated Session. + + See :func:`bigframes.pandas.MultiIndex.from_tuples`. + """ + return MultiIndex.from_tuples(*args, session=self._session, **kwargs) diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py index 3ec1e86dc7..8179ffbeed 100644 --- a/bigframes/core/log_adapter.py +++ b/bigframes/core/log_adapter.py @@ -155,7 +155,9 @@ def method_logger(method=None, /, *, custom_base_name: Optional[str] = None): def outer_wrapper(method): @functools.wraps(method) def wrapper(*args, **kwargs): - api_method_name = getattr(method, LOG_OVERRIDE_NAME, method.__name__) + api_method_name = getattr( + method, LOG_OVERRIDE_NAME, method.__name__ + ).lower() if custom_base_name is None: qualname_parts = getattr(method, "__qualname__", method.__name__).split( "." diff --git a/bigframes/core/reshape/tile.py b/bigframes/core/reshape/tile.py index 74a941be54..a2efa8f927 100644 --- a/bigframes/core/reshape/tile.py +++ b/bigframes/core/reshape/tile.py @@ -15,6 +15,7 @@ from __future__ import annotations import typing +from typing import Optional, TYPE_CHECKING import bigframes_vendored.constants as constants import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile @@ -31,6 +32,9 @@ import bigframes.operations.aggregations as agg_ops import bigframes.series +if TYPE_CHECKING: + import bigframes.session + def cut( x, @@ -42,6 +46,7 @@ def cut( *, right: typing.Optional[bool] = True, labels: typing.Union[typing.Iterable[str], bool, None] = None, + session: Optional[bigframes.session.Session] = None, ) -> bigframes.series.Series: if ( labels is not None @@ -65,7 +70,7 @@ def cut( raise ValueError("Cannot cut empty array.") if not isinstance(x, bigframes.series.Series): - x = bigframes.series.Series(x) + x = bigframes.series.Series(x, session=session) if isinstance(bins, int): if bins <= 0: diff --git a/bigframes/core/tools/datetimes.py b/bigframes/core/tools/datetimes.py index 7edf2fa2e4..0e5594d498 100644 --- a/bigframes/core/tools/datetimes.py +++ b/bigframes/core/tools/datetimes.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + from collections.abc import Mapping from datetime import date, datetime -from typing import Optional, Union +from typing import Optional, TYPE_CHECKING, Union import bigframes_vendored.constants as constants import bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes @@ -25,6 +27,9 @@ import bigframes.operations as ops import bigframes.series +if TYPE_CHECKING: + import bigframes.session + def to_datetime( arg: Union[ @@ -37,6 +42,7 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, + session: Optional[bigframes.session.Session] = None, ) -> Union[pd.Timestamp, datetime, bigframes.series.Series]: if isinstance(arg, (int, float, str, datetime, date)): return pd.to_datetime( @@ -52,7 +58,7 @@ def to_datetime( f"to datetime is not implemented. {constants.FEEDBACK_LINK}" ) - arg = bigframes.series.Series(arg) + arg = bigframes.series.Series(arg, session=session) if format and unit and arg.dtype in (bigframes.dtypes.INT_DTYPE, bigframes.dtypes.FLOAT_DTYPE): # type: ignore raise ValueError("cannot specify both format and unit") diff --git a/bigframes/formatting_helpers.py b/bigframes/formatting_helpers.py index f75394c47d..55731069a3 100644 --- a/bigframes/formatting_helpers.py +++ b/bigframes/formatting_helpers.py @@ -105,8 +105,14 @@ def progress_callback( """Displays a progress bar while the query is running""" global current_display, current_display_id, previous_display_html - import bigframes._config - import bigframes.core.events + try: + import bigframes._config + import bigframes.core.events + except ImportError: + # Since this gets called from __del__, skip if the import fails to avoid + # ImportError: sys.meta_path is None, Python is likely shutting down. + # This will allow cleanup to continue. + return progress_bar = bigframes._config.options.display.progress_bar diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 2455637b0a..6fcb71f0d8 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -16,8 +16,8 @@ from __future__ import annotations -from collections import namedtuple -from datetime import date, datetime +import collections +import datetime import inspect import sys import typing @@ -198,18 +198,18 @@ def to_datetime( @typing.overload def to_datetime( - arg: Union[int, float, str, datetime, date], + arg: Union[int, float, str, datetime.datetime, datetime.date], *, utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> Union[pandas.Timestamp, datetime]: +) -> Union[pandas.Timestamp, datetime.datetime]: ... def to_datetime( arg: Union[ - Union[int, float, str, datetime, date], + Union[int, float, str, datetime.datetime, datetime.date], vendored_pandas_datetimes.local_iterables, bigframes.series.Series, bigframes.dataframe.DataFrame, @@ -218,8 +218,9 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> Union[pandas.Timestamp, datetime, bigframes.series.Series]: - return bigframes.core.tools.to_datetime( +) -> Union[pandas.Timestamp, datetime.datetime, bigframes.series.Series]: + return global_session.with_default_session( + bigframes.session.Session.to_datetime, arg, utc=utc, format=format, @@ -322,7 +323,7 @@ def clean_up_by_session_id( __version__ = bigframes.version.__version__ # Other public pandas attributes -NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"]) +NamedAgg = collections.namedtuple("NamedAgg", ["column", "aggfunc"]) options = config.options """Global :class:`~bigframes._config.Options` to configure BigQuery DataFrames.""" diff --git a/bigframes/pandas/core/tools/timedeltas.py b/bigframes/pandas/core/tools/timedeltas.py index 070a41d62d..eb01f9f846 100644 --- a/bigframes/pandas/core/tools/timedeltas.py +++ b/bigframes/pandas/core/tools/timedeltas.py @@ -35,7 +35,7 @@ def to_timedelta( return arg._apply_unary_op(ops.ToTimedeltaOp(canonical_unit)) if pdtypes.is_list_like(arg): - return to_timedelta(series.Series(arg), unit, session=session) + return to_timedelta(series.Series(arg, session=session), unit, session=session) return pd.to_timedelta(arg, unit) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 46fb56b88e..886072b884 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -68,6 +68,8 @@ import bigframes.core from bigframes.core import blocks, log_adapter, utils import bigframes.core.events +import bigframes.core.indexes +import bigframes.core.indexes.multi import bigframes.core.pyformat import bigframes.formatting_helpers import bigframes.functions._function_session as bff_session @@ -79,7 +81,6 @@ # Avoid circular imports. if typing.TYPE_CHECKING: - import bigframes.core.indexes import bigframes.dataframe as dataframe import bigframes.series import bigframes.streaming.dataframe as streaming_dataframe @@ -320,6 +321,15 @@ def bqconnectionmanager(self): ) return self._bq_connection_manager + @property + def options(self) -> bigframes._config.Options: + """Options for configuring BigQuery DataFrames. + + Included for compatibility between bpd and Session. + """ + # TODO(tswast): Consider making a separate session-level options object. + return bigframes._config.options + @property def session_id(self): return self._session_id @@ -1826,7 +1836,7 @@ def udf( Turning an arbitrary python function into a BigQuery managed python udf: >>> bq_name = datetime.datetime.now().strftime("bigframes_%Y%m%d%H%M%S%f") - >>> @bpd.udf(dataset="bigfranes_testing", name=bq_name) + >>> @bpd.udf(dataset="bigfranes_testing", name=bq_name) # doctest: +SKIP ... def minutes_to_hours(x: int) -> float: ... return x/60 @@ -1839,8 +1849,8 @@ def udf( 4 120 dtype: Int64 - >>> hours = minutes.apply(minutes_to_hours) - >>> hours + >>> hours = minutes.apply(minutes_to_hours) # doctest: +SKIP + >>> hours # doctest: +SKIP 0 0.0 1 0.5 2 1.0 @@ -1853,7 +1863,7 @@ def udf( packages (optionally with the package version) via `packages` param. >>> bq_name = datetime.datetime.now().strftime("bigframes_%Y%m%d%H%M%S%f") - >>> @bpd.udf( + >>> @bpd.udf( # doctest: +SKIP ... dataset="bigfranes_testing", ... name=bq_name, ... packages=["cryptography"] @@ -1870,14 +1880,14 @@ def udf( ... return f.encrypt(input.encode()).decode() >>> names = bpd.Series(["Alice", "Bob"]) - >>> hashes = names.apply(get_hash) + >>> hashes = names.apply(get_hash) # doctest: +SKIP You can clean-up the BigQuery functions created above using the BigQuery client from the BigQuery DataFrames session: >>> session = bpd.get_global_session() - >>> session.bqclient.delete_routine(minutes_to_hours.bigframes_bigquery_function) - >>> session.bqclient.delete_routine(get_hash.bigframes_bigquery_function) + >>> session.bqclient.delete_routine(minutes_to_hours.bigframes_bigquery_function) # doctest: +SKIP + >>> session.bqclient.delete_routine(get_hash.bigframes_bigquery_function) # doctest: +SKIP Args: input_types (type or sequence(type), Optional): @@ -2297,6 +2307,104 @@ def read_gbq_object_table( s = self._loader.read_gbq_table(object_table)["uri"].str.to_blob(connection) return s.rename(name).to_frame() + # ========================================================================= + # bigframes.pandas attributes + # + # These are included so that Session and bigframes.pandas can be used + # interchangeably. + # ========================================================================= + def cut(self, *args, **kwargs) -> bigframes.series.Series: + """Cuts a BigQuery DataFrames object. + + Included for compatibility between bpd and Session. + + See :func:`bigframes.pandas.cut` for full documentation. + """ + import bigframes.core.reshape.tile + + return bigframes.core.reshape.tile.cut( + *args, + session=self, + **kwargs, + ) + + def DataFrame(self, *args, **kwargs): + """Constructs a DataFrame. + + Included for compatibility between bpd and Session. + + See :class:`bigframes.pandas.DataFrame` for full documentation. + """ + import bigframes.dataframe + + return bigframes.dataframe.DataFrame(*args, session=self, **kwargs) + + @property + def MultiIndex(self) -> bigframes.core.indexes.multi.MultiIndexAccessor: + """Constructs a MultiIndex. + + Included for compatibility between bpd and Session. + + See :class:`bigframes.pandas.MulitIndex` for full documentation. + """ + import bigframes.core.indexes.multi + + return bigframes.core.indexes.multi.MultiIndexAccessor(self) + + def Index(self, *args, **kwargs): + """Constructs a Index. + + Included for compatibility between bpd and Session. + + See :class:`bigframes.pandas.Index` for full documentation. + """ + import bigframes.core.indexes + + return bigframes.core.indexes.Index(*args, session=self, **kwargs) + + def Series(self, *args, **kwargs): + """Constructs a Series. + + Included for compatibility between bpd and Session. + + See :class:`bigframes.pandas.Series` for full documentation. + """ + import bigframes.series + + return bigframes.series.Series(*args, session=self, **kwargs) + + def to_datetime( + self, *args, **kwargs + ) -> Union[pandas.Timestamp, datetime.datetime, bigframes.series.Series]: + """Converts a BigQuery DataFrames object to datetime dtype. + + Included for compatibility between bpd and Session. + + See :func:`bigframes.pandas.to_datetime` for full documentation. + """ + import bigframes.core.tools + + return bigframes.core.tools.to_datetime( + *args, + session=self, + **kwargs, + ) + + def to_timedelta(self, *args, **kwargs): + """Converts a BigQuery DataFrames object to timedelta/duration dtype. + + Included for compatibility between bpd and Session. + + See :func:`bigframes.pandas.to_timedelta` for full documentation. + """ + import bigframes.pandas.core.tools.timedeltas + + return bigframes.pandas.core.tools.timedeltas.to_timedelta( + *args, + session=self, + **kwargs, + ) + def connect(context: Optional[bigquery_options.BigQueryOptions] = None) -> Session: return Session(context) diff --git a/tests/system/small/test_session_as_bpd.py b/tests/system/small/test_session_as_bpd.py new file mode 100644 index 0000000000..e280c551cb --- /dev/null +++ b/tests/system/small/test_session_as_bpd.py @@ -0,0 +1,154 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Check that bpd and Session can be used interchangablely.""" + +from __future__ import annotations + +from typing import cast + +import numpy as np +import pandas.testing + +import bigframes.pandas as bpd +import bigframes.session + + +def test_cut(session: bigframes.session.Session): + sc = [30, 80, 40, 90, 60, 45, 95, 75, 55, 100, 65, 85] + x = [20, 40, 60, 80, 100] + + bpd_result = bpd.cut(sc, x) + session_result = session.cut(sc, x) + + global_session = bpd.get_global_session() + assert global_session is not session + assert bpd_result._session is global_session + assert session_result._session is session + + bpd_pd = bpd_result.to_pandas() + session_pd = session_result.to_pandas() + pandas.testing.assert_series_equal(bpd_pd, session_pd) + + +def test_dataframe(session: bigframes.session.Session): + data = {"col": ["local", None, "data"]} + + bpd_result = bpd.DataFrame(data) + session_result = session.DataFrame(data) + + global_session = bpd.get_global_session() + assert global_session is not session + assert bpd_result._session is global_session + assert session_result._session is session + + bpd_pd = bpd_result.to_pandas() + session_pd = session_result.to_pandas() + pandas.testing.assert_frame_equal(bpd_pd, session_pd) + + +def test_multiindex_from_arrays(session: bigframes.session.Session): + arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]] + + bpd_result = bpd.MultiIndex.from_arrays(arrays, names=("number", "color")) + session_result = session.MultiIndex.from_arrays(arrays, names=("number", "color")) + + global_session = bpd.get_global_session() + assert global_session is not session + assert bpd_result._session is global_session + assert session_result._session is session + + bpd_pd = bpd_result.to_pandas() + session_pd = session_result.to_pandas() + pandas.testing.assert_index_equal(bpd_pd, session_pd) + + +def test_multiindex_from_tuples(session: bigframes.session.Session): + tuples = [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")] + + bpd_result = bpd.MultiIndex.from_tuples(tuples, names=("number", "color")) + session_result = session.MultiIndex.from_tuples(tuples, names=("number", "color")) + + global_session = bpd.get_global_session() + assert global_session is not session + assert bpd_result._session is global_session + assert session_result._session is session + + bpd_pd = bpd_result.to_pandas() + session_pd = session_result.to_pandas() + pandas.testing.assert_index_equal(bpd_pd, session_pd) + + +def test_index(session: bigframes.session.Session): + index = [1, 2, 3] + + bpd_result = bpd.Index(index) + session_result = session.Index(index) + + global_session = bpd.get_global_session() + assert global_session is not session + assert bpd_result._session is global_session + assert session_result._session is session + + bpd_pd = bpd_result.to_pandas() + session_pd = session_result.to_pandas() + pandas.testing.assert_index_equal(bpd_pd, session_pd) + + +def test_series(session: bigframes.session.Session): + series = [1, 2, 3] + + bpd_result = bpd.Series(series) + session_result = session.Series(series) + + global_session = bpd.get_global_session() + assert global_session is not session + assert bpd_result._session is global_session + assert session_result._session is session + + bpd_pd = bpd_result.to_pandas() + session_pd = session_result.to_pandas() + pandas.testing.assert_series_equal(bpd_pd, session_pd) + + +def test_to_datetime(session: bigframes.session.Session): + datetimes = ["2018-10-26 12:00:00", "2018-10-26 13:00:15"] + + bpd_result = bpd.to_datetime(datetimes) + session_result = cast(bpd.Series, session.to_datetime(datetimes)) + + global_session = bpd.get_global_session() + assert global_session is not session + assert bpd_result._session is global_session + assert session_result._session is session + + bpd_pd = bpd_result.to_pandas() + session_pd = session_result.to_pandas() + pandas.testing.assert_series_equal(bpd_pd, session_pd) + + +def test_to_timedelta(session: bigframes.session.Session): + offsets = np.arange(5) + + bpd_result = bpd.to_timedelta(offsets, unit="s") + session_result = session.to_timedelta(offsets, unit="s") + + global_session = bpd.get_global_session() + assert global_session is not session + assert bpd_result._session is global_session + assert session_result._session is session + + bpd_pd = bpd_result.to_pandas() + session_pd = session_result.to_pandas() + pandas.testing.assert_series_equal(bpd_pd, session_pd) diff --git a/tests/unit/test_pandas.py b/tests/unit/test_pandas.py index 73e0b7f2d6..5e75e6b20f 100644 --- a/tests/unit/test_pandas.py +++ b/tests/unit/test_pandas.py @@ -64,8 +64,12 @@ def test_method_matches_session(method_name: str): pandas_method = getattr(bigframes.pandas, method_name) pandas_doc = inspect.getdoc(pandas_method) assert pandas_doc is not None, "docstrings are required" - assert re.sub(leading_whitespace, "", pandas_doc) == re.sub( - leading_whitespace, "", session_doc + + pandas_doc_stripped = re.sub(leading_whitespace, "", pandas_doc) + session_doc_stripped = re.sub(leading_whitespace, "", session_doc) + assert ( + pandas_doc_stripped == session_doc_stripped + or ":`bigframes.pandas" in session_doc_stripped ) # Add `eval_str = True` so that deferred annotations are turned into their @@ -75,18 +79,20 @@ def test_method_matches_session(method_name: str): eval_str=True, globals={**vars(bigframes.session), **{"dataframe": bigframes.dataframe}}, ) - pandas_signature = inspect.signature(pandas_method, eval_str=True) - assert [ - # Kind includes position, which will be an offset. - parameter.replace(kind=inspect.Parameter.POSITIONAL_ONLY) - for parameter in pandas_signature.parameters.values() - ] == [ + session_args = [ # Kind includes position, which will be an offset. parameter.replace(kind=inspect.Parameter.POSITIONAL_ONLY) for parameter in session_signature.parameters.values() # Don't include the first parameter, which is `self: Session` - ][ - 1: + ][1:] + pandas_signature = inspect.signature(pandas_method, eval_str=True) + pandas_args = [ + # Kind includes position, which will be an offset. + parameter.replace(kind=inspect.Parameter.POSITIONAL_ONLY) + for parameter in pandas_signature.parameters.values() + ] + assert session_args == pandas_args or ["args", "kwargs"] == [ + parameter.name for parameter in session_args ] assert pandas_signature.return_annotation == session_signature.return_annotation