From 9797095b56a6cf3c094b36c40025afe957279c6f Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 1 Oct 2025 17:22:19 +0000 Subject: [PATCH] feat: Implement GeoSeries scalar operators This commit implements 6 new GeoSeries scalar properties and methods: - `is_empty` - `geom_type` - `is_ring` - `is_simple` - `is_valid` - `union` This change includes: - Defining the new operations in `bigframes/operations/geo_ops.py`. - Implementing the compilation logic for both Ibis and Polars backends. - Adding the new properties and methods to the `GeoSeries` class. - Adding unit tests for all new features. --- .../ibis_compiler/operations/geo_ops.py | 104 +++++++++++++ bigframes/core/compile/polars/compiler.py | 79 ++++++++++ bigframes/geopandas/geoseries.py | 33 +++++ bigframes/operations/__init__.py | 6 + bigframes/operations/geo_ops.py | 45 ++++++ .../system/small/geopandas/test_geoseries.py | 29 ++++ tests/unit/test_geoseries.py | 137 ++++++++++++++++++ 7 files changed, 433 insertions(+) create mode 100644 bigframes/core/compile/ibis_compiler/operations/geo_ops.py create mode 100644 tests/unit/test_geoseries.py diff --git a/bigframes/core/compile/ibis_compiler/operations/geo_ops.py b/bigframes/core/compile/ibis_compiler/operations/geo_ops.py new file mode 100644 index 0000000000..1787dd9c0d --- /dev/null +++ b/bigframes/core/compile/ibis_compiler/operations/geo_ops.py @@ -0,0 +1,104 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +BigFrames -> Ibis compilation for the operations in bigframes.operations.geo_ops. + +Please keep implementations in sequential order by op name. +""" + +from __future__ import annotations + +from bigframes_vendored.ibis.expr import types as ibis_types +import bigframes_vendored.ibis.udf.scalar as ibis_udf + +from bigframes.core.compile.ibis_compiler.scalar_op_compiler import scalar_op_compiler +from bigframes.operations import geo_ops + +register_unary_op = scalar_op_compiler.register_unary_op +register_binary_op = scalar_op_compiler.register_binary_op + + +@ibis_udf.scalar.builtin("ST_IsEmpty") +def st_isempty(x: ibis_types.GeoValue) -> ibis_types.BooleanValue: + ... + + +@register_unary_op(geo_ops.geo_st_isempty_op) +def geo_st_isempty_op_impl(x: ibis_types.Value): + return st_isempty(x) + + +@ibis_udf.scalar.builtin("ST_GeometryType") +def st_geometrytype(x: ibis_types.GeoValue) -> ibis_types.StringValue: + ... + + +@register_unary_op(geo_ops.geo_st_geometrytype_op) +def geo_st_geometrytype_op_impl(x: ibis_types.Value): + return st_geometrytype(x) + + +@ibis_udf.scalar.builtin("ST_IsRing") +def st_isring(x: ibis_types.GeoValue) -> ibis_types.BooleanValue: + ... + + +@register_unary_op(geo_ops.geo_st_isring_op) +def geo_st_isring_op_impl(x: ibis_types.Value): + return st_isring(x) + + +@ibis_udf.scalar.builtin("ST_EQUALS") +def st_equals( + x: ibis_types.GeoValue, y: ibis_types.GeoValue +) -> ibis_types.BooleanValue: + ... + + +@ibis_udf.scalar.builtin("ST_SIMPLIFY") +def st_simplify( + x: ibis_types.GeoValue, tolerance: ibis_types.NumericValue +) -> ibis_types.GeoValue: + ... + + +@register_unary_op(geo_ops.geo_st_issimple_op) +def geo_st_issimple_op_impl(x: ibis_types.Value): + simplified = st_simplify(x, 0.0) + return st_equals(x, simplified) + + +@ibis_udf.scalar.builtin("ST_ISVALID") +def st_isvalid(x: ibis_types.GeoValue) -> ibis_types.BooleanValue: + ... + + +@register_unary_op(geo_ops.geo_st_isvalid_op) +def geo_st_isvalid_op_impl(x: ibis_types.Value): + return st_isvalid(x) + + +@ibis_udf.scalar.builtin("ST_UNION") +def st_union( + x: ibis_types.GeoValue, y: ibis_types.GeoValue +) -> ibis_types.GeoValue: + ... + + +@register_binary_op(geo_ops.geo_st_union_op) +def geo_st_union_op_impl( + x: ibis_types.Value, y: ibis_types.Value +) -> ibis_types.Value: + return st_union(x, y) \ No newline at end of file diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py index f7c742e852..f18dba1690 100644 --- a/bigframes/core/compile/polars/compiler.py +++ b/bigframes/core/compile/polars/compiler.py @@ -38,6 +38,7 @@ import bigframes.operations.datetime_ops as dt_ops import bigframes.operations.frequency_ops as freq_ops import bigframes.operations.generic_ops as gen_ops +import bigframes.operations.geo_ops as geo_ops import bigframes.operations.json_ops as json_ops import bigframes.operations.numeric_ops as num_ops import bigframes.operations.string_ops as string_ops @@ -437,6 +438,84 @@ def _(self, op: ops.ArrayReduceOp, input: pl.Expr) -> pl.Expr: f"Haven't implemented array aggregation: {op.aggregation}" ) + @compile_op.register(geo_ops.GeoStIsemptyOp) + def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: + return input.str.contains("EMPTY", literal=True) + + @compile_op.register(geo_ops.GeoStGeometrytypeOp) + def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: + return "ST_" + input.str.extract(r"^(\w+)", 1) + + @compile_op.register(geo_ops.GeoStIsringOp) + def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: + from shapely.errors import WKTReadingError + import shapely.wkt + + def is_ring(s: str | None) -> bool | None: + if not s: + return None + try: + geom = shapely.wkt.loads(s) + return getattr(geom, "is_ring", False) + except WKTReadingError: + return None + + return input.map_elements(is_ring, return_dtype=pl.Boolean()) + + @compile_op.register(geo_ops.GeoStIssimpleOp) + def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: + from shapely.errors import WKTReadingError + import shapely.wkt + + def is_simple(s: str | None) -> bool | None: + if not s: + return None + try: + geom = shapely.wkt.loads(s) + return getattr(geom, "is_simple", False) + except WKTReadingError: + return None + + return input.map_elements(is_simple, return_dtype=pl.Boolean()) + + @compile_op.register(geo_ops.GeoStIsvalidOp) + def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: + from shapely.errors import WKTReadingError + import shapely.wkt + + def is_valid(s: str | None) -> bool | None: + if not s: + return None + try: + geom = shapely.wkt.loads(s) + return getattr(geom, "is_valid", False) + except WKTReadingError: + return None + + return input.map_elements(is_valid, return_dtype=pl.Boolean()) + + @compile_op.register(geo_ops.GeoStUnionOp) + def _(self, op: ops.ScalarOp, left: pl.Expr, right: pl.Expr) -> pl.Expr: + from shapely.errors import WKTReadingError + import shapely.wkt + + def union(struct_val: dict[str, str | None]) -> str | None: + # The fields in the struct are not guaranteed to be named. + # Let's get them by order. + s1, s2 = list(struct_val.values()) + if not s1 or not s2: + return None + try: + g1 = shapely.wkt.loads(s1) + g2 = shapely.wkt.loads(s2) + return g1.union(g2).wkt + except WKTReadingError: + return None + + return pl.struct([left, right]).map_elements( + union, return_dtype=pl.String() + ) + @dataclasses.dataclass(frozen=True) class PolarsAggregateCompiler: scalar_compiler = PolarsExpressionCompiler() diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index f3558e4b34..18be398a07 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -74,6 +74,36 @@ def is_closed(self) -> bigframes.series.Series: f"GeoSeries.is_closed is not supported. Use bigframes.bigquery.st_isclosed(series), instead. {constants.FEEDBACK_LINK}" ) + @property + def is_empty(self) -> bigframes.series.Series: + series = self._apply_unary_op(ops.geo_st_isempty_op) + series.name = "is_empty" + return series + + @property + def geom_type(self) -> bigframes.series.Series: + series = self._apply_unary_op(ops.geo_st_geometrytype_op) + series.name = "geom_type" + return series + + @property + def is_ring(self) -> bigframes.series.Series: + series = self._apply_unary_op(ops.geo_st_isring_op) + series.name = "is_ring" + return series + + @property + def is_simple(self) -> bigframes.series.Series: + series = self._apply_unary_op(ops.geo_st_issimple_op) + series.name = "is_simple" + return series + + @property + def is_valid(self) -> bigframes.series.Series: + series = self._apply_unary_op(ops.geo_st_isvalid_op) + series.name = "is_valid" + return series + @classmethod def from_wkt( cls, @@ -123,3 +153,6 @@ def distance(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # t def intersection(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore return self._apply_binary_op(other, ops.geo_st_intersection_op) + + def union(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore + return self._apply_binary_op(other, ops.geo_st_union_op) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index b14d15245a..922d35d80a 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -108,6 +108,12 @@ geo_st_geogpoint_op, geo_st_intersection_op, geo_st_isclosed_op, + geo_st_isempty_op, + geo_st_geometrytype_op, + geo_st_isring_op, + geo_st_issimple_op, + geo_st_isvalid_op, + geo_st_union_op, geo_x_op, geo_y_op, GeoStBufferOp, diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index 3b7754a47a..6a7eb7287a 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -84,6 +84,51 @@ ) geo_st_isclosed_op = GeoStIsclosedOp() +GeoStIsemptyOp = base_ops.create_unary_op( + name="geo_st_isempty", + type_signature=op_typing.FixedOutputType( + dtypes.is_geo_like, dtypes.BOOL_DTYPE, description="geo-like" + ), +) +geo_st_isempty_op = GeoStIsemptyOp() + +GeoStGeometrytypeOp = base_ops.create_unary_op( + name="geo_st_geometrytype", + type_signature=op_typing.FixedOutputType( + dtypes.is_geo_like, dtypes.STRING_DTYPE, description="geo-like" + ), +) +geo_st_geometrytype_op = GeoStGeometrytypeOp() + +GeoStIsringOp = base_ops.create_unary_op( + name="geo_st_isring", + type_signature=op_typing.FixedOutputType( + dtypes.is_geo_like, dtypes.BOOL_DTYPE, description="geo-like" + ), +) +geo_st_isring_op = GeoStIsringOp() + +GeoStIssimpleOp = base_ops.create_unary_op( + name="geo_st_issimple", + type_signature=op_typing.FixedOutputType( + dtypes.is_geo_like, dtypes.BOOL_DTYPE, description="geo-like" + ), +) +geo_st_issimple_op = GeoStIssimpleOp() + +GeoStIsvalidOp = base_ops.create_unary_op( + name="geo_st_isvalid", + type_signature=op_typing.FixedOutputType( + dtypes.is_geo_like, dtypes.BOOL_DTYPE, description="geo-like" + ), +) +geo_st_isvalid_op = GeoStIsvalidOp() + +GeoStUnionOp = base_ops.create_binary_op( + name="geo_st_union", type_signature=op_typing.BinaryGeo() +) +geo_st_union_op = GeoStUnionOp() + GeoXOp = base_ops.create_unary_op( name="geo_x", type_signature=op_typing.FixedOutputType( diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index a2f0759161..72fbf348f7 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -490,6 +490,35 @@ def test_geo_is_closed_not_supported(session: bigframes.session.Session): bf_series.is_closed +def test_geo_is_empty(session: bigframes.session.Session): + bf_s = bigframes.geopandas.GeoSeries( + [ + Polygon([]), + Point(0, 0), + LineString([]), + Polygon([(0, 0), (1, 1), (0, 1)]), + GeometryCollection([]), + None, + ], + session=session, + ) + pd_s = geopandas.GeoSeries( + [ + Polygon([]), + Point(0, 0), + LineString([]), + Polygon([(0, 0), (1, 1), (0, 1)]), + GeometryCollection([]), + None, + ] + ) + + bf_result = bf_s.is_empty.to_pandas() + pd_result = pd_s.is_empty.astype("boolean") + + assert_series_equal(bf_result, pd_result, check_index=False) + + def test_geo_buffer_raises_notimplemented(session: bigframes.session.Session): """GeoPandas takes distance in units of the coordinate system, but BigQuery uses meters. diff --git a/tests/unit/test_geoseries.py b/tests/unit/test_geoseries.py new file mode 100644 index 0000000000..f0b2a3d823 --- /dev/null +++ b/tests/unit/test_geoseries.py @@ -0,0 +1,137 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import geopandas as gpd # type: ignore +import pandas as pd +import pytest + +import bigframes.geopandas as bpd +import geopandas as gpd +import geopandas.testing +import pandas as pd +import pytest + + +def test_geoseries_is_empty(polars_session): + session = polars_session + geometries = [ + "POINT (0 0)", + "POLYGON EMPTY", + ] + gseries = gpd.GeoSeries.from_wkt(geometries) + + bf_gseries = bpd.GeoSeries(gseries, session=session) + + result = bf_gseries.is_empty.to_pandas() + expected = pd.Series([False, True], dtype="boolean", name="is_empty") + + pd.testing.assert_series_equal(expected, result, check_index=False) + + +def test_geoseries_union(polars_session): + session = polars_session + gseries1 = gpd.GeoSeries.from_wkt( + [ + "POINT (0 0)", + "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", + ] + ) + gseries2 = gpd.GeoSeries.from_wkt( + [ + "POINT (1 1)", + "POLYGON ((2 0, 3 0, 3 1, 2 1, 2 0))", + ] + ) + expected_union = gpd.GeoSeries.from_wkt( + [ + "MULTIPOINT (0 0, 1 1)", + "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 0, 3 0, 3 1, 2 1, 2 0)))", + ] + ) + + bf_gseries1 = bpd.GeoSeries(gseries1, session=session) + bf_gseries2 = bpd.GeoSeries(gseries2, session=session) + + result = bf_gseries1.union(bf_gseries2).to_pandas() + expected = pd.Series(expected_union, dtype=gpd.array.GeometryDtype()) + + gpd.testing.assert_geoseries_equal(result, expected, check_series_type=False) + + +def test_geoseries_is_valid(polars_session): + session = polars_session + geometries = [ + "POLYGON ((0 0, 1 1, 0 1, 0 0))", + "POLYGON ((0 0, 1 1, 1 0, 0 1, 0 0))", + ] + gseries = gpd.GeoSeries.from_wkt(geometries) + + bf_gseries = bpd.GeoSeries(gseries, session=session) + + result = bf_gseries.is_valid.to_pandas() + expected = pd.Series([True, False], dtype="boolean", name="is_valid") + + pd.testing.assert_series_equal(expected, result, check_index=False) + + +def test_geoseries_is_simple(polars_session): + session = polars_session + geometries = [ + "LINESTRING (0 0, 1 1)", + "LINESTRING (0 0, 1 1, 0 1, 1 0)", + ] + gseries = gpd.GeoSeries.from_wkt(geometries) + + bf_gseries = bpd.GeoSeries(gseries, session=session) + + result = bf_gseries.is_simple.to_pandas() + expected = pd.Series([True, False], dtype="boolean", name="is_simple") + + pd.testing.assert_series_equal(expected, result, check_index=False) + + +def test_geoseries_is_ring(polars_session): + session = polars_session + geometries = [ + "LINESTRING (0 0, 1 0, 1 1, 0 1, 0 0)", + "LINESTRING (0 0, 1 1, 1 0, 0 1)", + ] + gseries = gpd.GeoSeries.from_wkt(geometries) + + bf_gseries = bpd.GeoSeries(gseries, session=session) + + result = bf_gseries.is_ring.to_pandas() + expected = pd.Series([True, False], dtype="boolean", name="is_ring") + + pd.testing.assert_series_equal(expected, result, check_index=False) + + +def test_geoseries_geom_type(polars_session): + session = polars_session + geometries = [ + "POINT (0 0)", + "POLYGON ((0 0, 1 1, 0 1, 0 0))", + ] + gseries = gpd.GeoSeries.from_wkt(geometries) + + bf_gseries = bpd.GeoSeries(gseries, session=session) + + result = bf_gseries.geom_type.to_pandas() + expected = pd.Series( + ["ST_POINT", "ST_POLYGON"], dtype="string[pyarrow]", name="geom_type" + ) + + pd.testing.assert_series_equal(expected, result, check_index=False) \ No newline at end of file