diff --git a/src/harpy/shape/__init__.pyi b/src/harpy/shape/__init__.pyi
index 99ffd363..bace2cb3 100644
--- a/src/harpy/shape/__init__.pyi
+++ b/src/harpy/shape/__init__.pyi
@@ -1,5 +1,11 @@
 from ._cell_expansion import create_voronoi_boundaries
-from ._shape import add_shapes_layer, filter_shapes_layer, intersect_rectangles, vectorize
+from ._filters import (
+    filter_shapes_by_shapes,
+    filter_shapes_categorical,
+    filter_shapes_numerical,
+    morphological_features,
+)
+from ._shape import add_shapes_layer, filter_shapes_layer, intersect_rectangles, prep_region_annotations, vectorize
 
 __all__ = [
     "add_shapes_layer",
@@ -7,4 +13,9 @@ __all__ = [
     "filter_shapes_layer",
     "intersect_rectangles",
     "vectorize",
+    "prep_region_annotations",
+    "morphological_features",
+    "filter_shapes_numerical",
+    "filter_shapes_categorical",
+    "filter_shapes_by_shapes",
 ]
diff --git a/src/harpy/shape/_filters.py b/src/harpy/shape/_filters.py
new file mode 100755
index 00000000..027c77ac
--- /dev/null
+++ b/src/harpy/shape/_filters.py
@@ -0,0 +1,561 @@
+from collections.abc import Sequence
+from typing import Literal
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+from shapely.geometry import LineString, MultiPolygon, Polygon
+from shapely.ops import unary_union
+from spatialdata import SpatialData
+from spatialdata.transformations import get_transformation
+
+from harpy.shape._shape import add_shapes_layer
+from harpy.utils.pylogger import get_pylogger
+
+log = get_pylogger(__name__)
+
+
+# Extract exterior and interior boundaries
+def _extract_exterior_lines(geom):
+    if isinstance(geom, Polygon):
+        return [geom.exterior]
+    elif isinstance(geom, MultiPolygon):
+        return [poly.exterior for poly in geom.geoms]
+    return []
+
+
+def _extract_interior_lines(geom):
+    if isinstance(geom, Polygon):
+        return list(geom.interiors)
+    elif isinstance(geom, MultiPolygon):
+        lines = []
+        for poly in geom.geoms:
+            lines.extend(poly.interiors)
+        return lines
+    return []
+
+
+def morphological_features(
+    sdata: SpatialData,
+    shapes_layer: str,
+    output_shapes_layer: str,
+    shape_names_column: str = "name",
+    grouped_features: bool = False,
+    pixel_size_um: float = 1.0,
+    overwrite: bool = False,
+) -> SpatialData:
+    """
+    Compute morphological features for polygons in a shapes layer. Only Polygon and MultiPolygon are supported.
+    It is recommended to run `hp.sh.prep_region_annotations` first when computing features for region annotations that
+    may contain multipolygons, point geometries, unnamed annotations, etc.
+
+        - `area`: Area of the polygon (px² or µm²).
+        - `perimeter`: Perimeter length (px or µm).
+        - `equivalent_diameter`: Diameter of a circle with the same area (px or µm).
+        - `convex_area`: Area of convex hull (px² or µm²).
+        - `convex_perimeter`: Perimeter of convex hull (px or µm).
+        - `circularity`: 4π * area / perimeter².
+            → 1 for a perfect circle; lower = irregular shape.
+        - `compactness`: perimeter² / area.
+        - `solidity`: area / convex_area.
+            → Low values mean concave or fragmented shapes.
+        - `convexity`: convex_perimeter / perimeter.
+            → 1 for perfectly convex shapes. Lower for rough or spiky boundaries.
+        - `centroid_x`: X-coordinate of the polygon centroid (px).
+        - `centroid_y`: Y-coordinate of the polygon centroid (px).
+        - `centroid_dif`: Distance between polygon and convex hull centroids normalized by the polygon area.
+            → Captures off-centered concavity or asymmetry.
+        - `major_axis_length`: Length of the longest side of the minimum rotated bounding box (px or µm).
+        - `minor_axis_length`: Length of the shortest side of the minimum rotated bounding box (px or µm).
+        - `major_minor_axis_ratio`: major_axis_length / minor_axis_length.
+        - `num_vertices`: Number of vertices along exterior boundaries.
+        - `boundary_complexity`: num_vertices / perimeter.
+            → Normalized measure of boundary irregularity.
+        - `num_holes`: Count of internal holes.
+        - `hole_area`: Area covered by internal holes (px² or µm²)
+
+    Parameters
+    ----------
+    sdata
+        SpatialData object containing the shapes layer.
+    shapes_layer
+        Name of the input shapes layer.
+    output_shapes_layer
+        Name of the output shapes layer to store results.
+    shape_names_column
+        Column name in shapes layer containing geometry names. Required when using grouped filters.
+    grouped_features
+        If True, also computes grouped morphological features by dissolving polygons sharing
+        the same name in `shape_names_column`.
+    pixel_size_um
+        Scale factor to convert geometric measurements from pixel to micron units.
+        - Applied to:
+            * "perimeter", "major_axis_length", and "minor_axis_length" → scaled by (pixel_size_um)
+            * "area" and "convex_area" → scaled by (pixel_size_um)²
+        - Dimensionless ratios (e.g., "circularity", "compactness", "solidity", "convexity", "centroid_dif", "major_minor_axis_ratio")
+            are unaffected by scaling but are computed using the scaled geometric quantities.
+        - XY-coordinates are kept in original units.
+        Defaults to 1.0 (no scaling, i.e. units remain in pixels).
+    overwrite
+        Whether to overwrite existing shapes layer.
+
+    Returns
+    -------
+    SpatialData
+        Object with updated shapes layer containing morphological feature columns.
+    """
+    # Create copy of shapes layer
+    gdf = sdata.shapes[shapes_layer].copy()
+
+    # Filter out geometries that are not Polygon or MultiPolygon
+    supported_mask = gdf.geometry.apply(lambda x: isinstance(x, (Polygon, MultiPolygon)))
+
+    unssuported = len(gdf) - supported_mask.sum()
+    if unssuported > 0:
+        raise ValueError(
+            f"Found {unssuported} non-polygon geometries in {shapes_layer}. Consider running `hp.sh.prep_region_annotations` first to clean up geometries."
+        )
+    if unssuported == len(gdf):
+        log.warning(
+            f"No supported geometries (Polygon, MultiPolygon) found. Skipping addition of shapes layer '{output_shapes_layer}' to sdata."
+        )
+        return sdata
+
+    gdf = gdf[supported_mask].copy()
+
+    # Multipolygon check
+    n_multipolygons = gdf.geometry.apply(lambda x: isinstance(x, MultiPolygon)).sum()
+    if n_multipolygons > 0:
+        log.warning(
+            f"Detected {n_multipolygons} MultiPolygon geometries in '{shapes_layer}'. "
+            "Consider running `hp.sh.prep_region_annotations` first to split MultiPolygons before feature extraction."
+        )
+
+    # Dissolve polygons by name
+    grouped_gdf = None
+    if grouped_features:
+        if shape_names_column not in gdf.columns:
+            raise ValueError(f"Grouped computation requires column '{shape_names_column}'.")
+        grouped_gdf = gdf.dissolve(by=shape_names_column, as_index=False, aggfunc="first").copy()
+        grouped_gdf["geometry"] = gdf.groupby(shape_names_column).geometry.apply(lambda x: x.unary_union).values
+        log.info(f"Merged {len(gdf)} polygons into {len(grouped_gdf)} groups by '{shape_names_column}'.")
+
+    # Compute features
+    def _compute_features(gdf: gpd.GeoDataFrame, pixel_size_um, suffix: str = ""):
+        # Base area & perimeter
+        gdf[f"area{suffix}"] = gdf.geometry.area * pixel_size_um**2
+        gdf[f"perimeter{suffix}"] = gdf.geometry.length * pixel_size_um
+
+        # Equivalent diameter
+        gdf[f"equivalent_diameter{suffix}"] = np.sqrt(4 * gdf[f"area{suffix}"] / np.pi)
+
+        # Convex geometry
+        convex_hulls = gdf.geometry.convex_hull
+        gdf[f"convex_area{suffix}"] = convex_hulls.area * pixel_size_um**2
+        gdf[f"convex_perimeter{suffix}"] = convex_hulls.length * pixel_size_um
+
+        # Derived metrics
+        gdf[f"circularity{suffix}"] = 4 * np.pi * gdf[f"area{suffix}"] / (gdf[f"perimeter{suffix}"] ** 2)
+        gdf[f"compactness{suffix}"] = (gdf[f"perimeter{suffix}"] ** 2) / gdf[f"area{suffix}"]
+        gdf[f"solidity{suffix}"] = gdf[f"area{suffix}"] / gdf[f"convex_area{suffix}"]
+        gdf[f"convexity{suffix}"] = gdf[f"convex_perimeter{suffix}"] / gdf[f"perimeter{suffix}"]
+
+        # Centroid-based metrics
+        gdf[f"centroid_x{suffix}"] = gdf.geometry.centroid.x
+        gdf[f"centroid_y{suffix}"] = gdf.geometry.centroid.y
+        hull_centroids = convex_hulls.centroid
+        gdf[f"centroid_dif{suffix}"] = np.sqrt(
+            (gdf[f"centroid_x{suffix}"] - hull_centroids.x) ** 2 + (gdf[f"centroid_y{suffix}"] - hull_centroids.y) ** 2
+        ) / np.sqrt(gdf[f"area{suffix}"])
+
+        # Major/minor axis (from rotated rectangle)
+        def _rotated_rect_axes(geom):
+            try:
+                rect = geom.minimum_rotated_rectangle
+                x, y = rect.exterior.coords.xy
+                edges = np.sqrt(np.diff(x) ** 2 + np.diff(y) ** 2)
+                edges = np.sort(edges[:-1])  # remove duplicate closing edge
+                minor, major = edges[0], edges[1]
+                ratio = major / minor if minor > 0 else np.nan
+                return major, minor, ratio
+            except Exception:
+                return np.nan, np.nan, np.nan
+
+        results = np.array([_rotated_rect_axes(g) for g in gdf.geometry])
+        major_axis_length, minor_axis_length, major_minor_axis_ratio = results.T
+        gdf[f"major_axis_length{suffix}"] = major_axis_length * pixel_size_um
+        gdf[f"minor_axis_length{suffix}"] = minor_axis_length * pixel_size_um
+        gdf[f"major_minor_axis_ratio{suffix}"] = major_minor_axis_ratio
+
+        # Vertex metrics
+        def _num_vertices(geom):
+            exteriors = _extract_exterior_lines(geom)
+            return sum(len(line.coords) for line in exteriors)
+
+        gdf[f"num_vertices{suffix}"] = gdf.geometry.apply(_num_vertices)
+        gdf[f"boundary_complexity{suffix}"] = gdf[f"num_vertices{suffix}"] / gdf[f"perimeter{suffix}"]
+
+        # Hole topology
+        def _num_holes(geom):
+            return len(_extract_interior_lines(geom))
+
+        def _hole_area(g):
+            interiors = _extract_interior_lines(g)
+            if not interiors:
+                return 0.0
+            try:
+                holes = [Polygon(ring) for ring in interiors]
+                total_hole_area = sum(h.area for h in holes)
+                return total_hole_area
+            except Exception:
+                return np.nan
+
+        gdf[f"num_holes{suffix}"] = gdf.geometry.apply(_num_holes)
+        gdf[f"hole_area{suffix}"] = gdf.geometry.apply(_hole_area) * pixel_size_um**2
+
+        return gdf
+
+    gdf = _compute_features(gdf, pixel_size_um, "")
+
+    if grouped_features and not grouped_gdf.empty:
+        grouped_gdf = _compute_features(grouped_gdf, pixel_size_um, "-grouped")
+
+        grouped_metrics = [col for col in grouped_gdf.columns if col.endswith("-grouped")]
+
+        grouped_lookup = grouped_gdf.set_index(shape_names_column)
+        for col in grouped_metrics:
+            gdf[col] = gdf[shape_names_column].map(grouped_lookup[col])
+
+    # sanity check. If this sanity check would fail in spatialdata at some point, then pass transformation to transformations parameter of add_shapes_layer.
+    assert get_transformation(gdf, get_all=True) == get_transformation(sdata[shapes_layer], get_all=True)
+
+    sdata = add_shapes_layer(
+        sdata,
+        input=gdf,
+        output_layer=output_shapes_layer,
+        overwrite=overwrite,
+    )
+
+    return sdata
+
+
+def filter_shapes_numerical(
+    sdata: SpatialData,
+    shapes_layer: str,
+    output_shapes_layer: str,
+    numerical_column: str,
+    min_value: float | int | None = None,
+    max_value: float | int | None = None,
+    overwrite: bool = False,
+) -> SpatialData:
+    """
+    All polygons with a size outside of the min and max size range are removed using the `numerical_column` in `shapes_layer`.
+    Polygons are kept if `min_value` ≤ `numerical_column` ≤ `max_value`.
+
+    Parameters
+    ----------
+    sdata
+        SpatialData object containing the shapes layer.
+    shapes_layer
+        Name of the input shapes layer.
+    output_shapes_layer
+        Name of the output shapes layer to store the filtered polygons.
+    numerical_column
+        Name of numerical column in the shapes layer used for filtering.
+    min_value
+        minimum value of `numerical_column` (inclusive). If None, lower bound is ignored.
+    max_value
+        maximum value of `numerical_column` (inclusive). If None, upper bound is ignored.
+    overwrite
+        Whether to overwrite existing output layer.
+
+    Returns
+    -------
+    SpatialData
+        Object with updated shapes layer containing only polygons meeting criteria.
+    """
+    gdf = sdata.shapes[shapes_layer].copy()
+
+    if numerical_column not in gdf.columns:
+        raise ValueError(f"Column '{numerical_column}' not found in '{shapes_layer}.obs'. ")
+
+    if not np.issubdtype(gdf[numerical_column].dtype, np.number):
+        raise ValueError(f"Column '{numerical_column}' must be numeric, but dtype is {gdf[numerical_column].dtype}.")
+
+    # Filter cells based on min and max values
+    start = len(gdf)
+    mask = pd.Series(True, index=gdf.index)
+
+    if min_value is not None:
+        below = (gdf[numerical_column] < min_value).sum()
+        log.info(f"Removed {below} cells below {min_value}.")
+        mask &= gdf[numerical_column] >= min_value
+    if max_value is not None:
+        above = (gdf[numerical_column] > max_value).sum()
+        log.info(f"Removed {above} cells above {max_value}.")
+        mask &= gdf[numerical_column] <= max_value
+
+    filtered_gdf = gdf[mask].copy()
+    kept = len(filtered_gdf)
+    removed = start - len(filtered_gdf)
+    log.info(
+        f"Removed {removed}/{start} polygons outside {min_value}–{max_value} for '{numerical_column}' (kept {kept})."
+    )
+
+    # sanity check. If this sanity check would fail in spatialdata at some point, then pass transformation to transformations parameter of add_shapes_layer.
+    assert get_transformation(filtered_gdf, get_all=True) == get_transformation(sdata[shapes_layer], get_all=True)
+
+    # Add to SpatialData
+    sdata = add_shapes_layer(
+        sdata,
+        input=filtered_gdf,
+        output_layer=output_shapes_layer,
+        overwrite=overwrite,
+    )
+
+    return sdata
+
+
+def filter_shapes_categorical(
+    sdata: SpatialData,
+    shapes_layer: str,
+    output_shapes_layer: str,
+    categorical_column: str | None = None,
+    include_values: str | Sequence[str] | None = None,
+    exclude_values: str | Sequence[str] | None = None,
+    overwrite: bool = False,
+) -> SpatialData:
+    """
+    Filter polygons in a shapes layer based on categorical values or index.
+
+    Parameters
+    ----------
+    sdata
+        SpatialData object containing the shapes layer.
+    shapes_layer
+        Name of the input shapes layer.
+    output_shapes_layer
+        Name of the output shapes layer to store filtered polygons.
+    categorical_column
+        Name of the categorical column in the shapes layer. If None, filtering is performed based on the index.
+    include_values
+        Value(s) to keep. Only polygons whose `categorical_column` matches one of these
+        values will be kept. Mutually exclusive with `exclude_values`.
+    exclude_values
+        Value(s) to remove. polygons whose `categorical_column` matches one of these
+        values will be removed. Mutually exclusive with `include_values`.
+    overwrite
+        If True, overwrites the `output_shapes_layer` if it already exists in `sdata`.
+
+    Returns
+    -------
+    SpatialData
+        Object with updated shapes layer containing filtered polygons.
+    """
+    if include_values is not None and exclude_values is not None:
+        raise ValueError("Specify only one of 'include_values' or 'exclude_values'.")
+
+    gdf = sdata.shapes[shapes_layer].copy()
+
+    if categorical_column is not None and categorical_column not in gdf.columns:
+        raise ValueError(f"Column '{categorical_column}' not found in '{shapes_layer}'.")
+
+    # Ensure include/exclude are lists
+    if isinstance(include_values, (str, int)):
+        include_values = [include_values]
+    if isinstance(exclude_values, (str, int)):
+        exclude_values = [exclude_values]
+
+    # Filter
+    start = len(gdf)
+    mask = pd.Series(True, index=gdf.index)
+
+    if categorical_column is not None:
+        # Filtering by column
+        if include_values is not None:
+            mask &= gdf[categorical_column].isin(include_values)
+        elif exclude_values is not None:
+            mask &= ~gdf[categorical_column].isin(exclude_values)
+    else:
+        # Filtering by index
+        if include_values is not None:
+            mask &= gdf.index.isin(include_values)
+        elif exclude_values is not None:
+            mask &= ~gdf.index.isin(exclude_values)
+
+    filtered_gdf = gdf[mask].copy()
+    kept = len(filtered_gdf)
+    removed = start - kept
+    if categorical_column is None:
+        categorical_column = "index"
+    log.info(f"Removed {removed}/{start} polygons based on '{categorical_column}' filtering (kept {kept}).")
+
+    # sanity check. If this sanity check would fail in spatialdata at some point, then pass transformation to transformations parameter of add_shapes_layer.
+    assert get_transformation(filtered_gdf, get_all=True) == get_transformation(sdata[shapes_layer], get_all=True)
+
+    # Add to SpatialData
+    sdata = add_shapes_layer(
+        sdata,
+        input=filtered_gdf,
+        output_layer=output_shapes_layer,
+        overwrite=overwrite,
+    )
+
+    return sdata
+
+
+def filter_shapes_by_shapes(
+    sdata: SpatialData,
+    target_shapes_layer: str,
+    mask_shapes_layer: str,
+    output_shapes_layer: str,
+    shape_names_column: str | None = None,
+    shape_names: str | list[str] | None = None,
+    mode: Literal[
+        "intersects",
+        "within",
+        "centroid_within",
+        "touches",
+        "disjoint",
+        "overlap_fraction",
+        "edge",
+        "outer_edge",
+        "inner_edge",
+    ] = "centroid_within",
+    keep: bool = True,
+    threshold: float | None = None,
+    overwrite: bool = False,
+):
+    """
+    Filter polygons in a target shapes layer based on geometric relationships to polygons of a mask shapes layer.
+    A typical use-case would be to filter cell segmentations based on their location within annotated regions.
+
+    Parameters
+    ----------
+    sdata
+        SpatialData object containing the target and mask shapes layers.
+    target_shapes_layer
+        Name of shapes layer whose polygons will be filtered.
+    mask_shapes_layer
+        Name of shapes layer used as mask for filtering.
+    output_shapes_layer
+        Name of the output shapes layer to store filtered polygons.
+    shape_names_column
+        Optional column in mask_shapes_layer to select specific polygons.
+    shape_names
+        Name or list of names of polygons in mask_shapes_layer to use for filtering. Ignored if shape_names_column is None.
+    mode
+        Geometric relationship to use for filtering.
+        Supported modes:
+            - `"intersects"`: Keep/remove polygons that intersect mask polygons (i.e. have partial or full overlap).
+            - `"within"`: Keep/remove polygons that are fully contained within mask polygons.
+            - `"centroid_within"`: Keep/remove polygons whose centroids are within mask polygons.
+            - `"touches"`: Keep/remove polygons that touch the boundary of mask polygons but do not overlap.
+            - `"disjoint"`: Keep/remove polygons that have no overlap with mask polygons at all.
+            - `"overlap_fraction"`: Keep/remove polygons whose overlap fraction with mask polygons exceeds a threshold.
+            - `"edge"`: Keep/remove polygons that partially overlap either the outer or inner edge of the mask polygons.
+            - `"outer_edge"`: Keep/remove polygons that overlap the outer edge (shell) of the mask polygons.
+            - `"inner_edge"`: Keep/remove polygons that overlap the inner edge (holes) of the mask polygons.
+    keep
+        Whether to keep (True) or remove (False) polygons matching the condition.
+    threshold
+        Threshold to use for overlap_fraction.
+    overwrite
+        If True, overwrites the output shapes layer if it exists.
+
+    Returns
+    -------
+    SpatialData object with filtered shapes layer.
+    """
+    # Copy target layer
+    target_gdf = sdata.shapes[target_shapes_layer].copy()
+
+    # Copy mask layer
+    mask_gdf = sdata.shapes[mask_shapes_layer].copy()
+
+    # Optionally select subset of mask polygons
+    if shape_names_column is not None and shape_names is not None:
+        if shape_names_column not in mask_gdf.columns:
+            raise ValueError(f"Column '{shape_names_column}' not found in mask layer '{mask_shapes_layer}'.")
+        if isinstance(shape_names, str):
+            shape_names = [shape_names]
+        mask_gdf = mask_gdf[mask_gdf[shape_names_column].isin(shape_names)].copy()
+        if mask_gdf.empty:
+            raise ValueError(f"No geometries found in '{mask_shapes_layer}' matching {shape_names}.")
+
+    # Build mask union
+    mask_union = mask_gdf.unary_union
+
+    # Compute condition
+    if mode == "intersects":
+        condition = target_gdf.geometry.intersects(mask_union)
+    elif mode == "within":
+        condition = target_gdf.geometry.within(mask_union)
+    elif mode == "centroid_within":
+        condition = target_gdf.geometry.centroid.within(mask_union)
+    elif mode == "touches":
+        condition = target_gdf.geometry.touches(mask_union)
+    elif mode == "disjoint":
+        condition = target_gdf.geometry.disjoint(mask_union)
+    elif mode == "overlap_fraction":
+        if threshold is None:
+            raise ValueError("Must specify 'threshold' for overlap_fraction mode.")
+        intersecting = target_gdf[target_gdf.geometry.intersects(mask_union)]
+        overlaps = intersecting.geometry.intersection(mask_union)
+        fraction = overlaps.area / intersecting.geometry.area
+        condition = pd.Series(False, index=target_gdf.index)
+        condition.loc[intersecting.index] = fraction > threshold
+    elif mode == "edge":
+        condition = target_gdf.geometry.intersects(mask_union) & ~target_gdf.geometry.within(mask_union)
+    elif mode in ["outer_edge", "inner_edge"]:
+        outer_boundaries = []
+        inner_boundaries = []
+
+        if isinstance(mask_union, gpd.GeoSeries):
+            for geom in mask_union:
+                outer_boundaries.extend(_extract_exterior_lines(geom))
+                inner_boundaries.extend(_extract_interior_lines(geom))
+        else:
+            outer_boundaries.extend(_extract_exterior_lines(mask_union))
+            inner_boundaries.extend(_extract_interior_lines(mask_union))
+
+        outer_boundary_union = (
+            unary_union([LineString(boundary) for boundary in outer_boundaries]) if outer_boundaries else None
+        )
+        inner_boundary_union = (
+            unary_union([LineString(boundary) for boundary in inner_boundaries]) if inner_boundaries else None
+        )
+
+        if mode == "outer_edge":
+            condition = target_gdf.geometry.intersects(outer_boundary_union)
+        elif mode == "inner_edge":
+            condition = (
+                target_gdf.geometry.intersects(inner_boundary_union) if inner_boundary_union is not None else False
+            )
+
+    else:
+        raise ValueError(f"Unsupported mode '{mode}'.")
+
+    # Apply keep/remove
+    filtered_gdf = target_gdf[condition if keep else ~condition].copy()
+    removed = len(target_gdf) - len(filtered_gdf)
+
+    if keep:
+        log.info(f"Kept {len(filtered_gdf)} / {len(target_gdf)} geometries (removed {removed}).")
+    else:
+        log.info(f"Removed {removed} / {len(target_gdf)} geometries (kept {len(filtered_gdf)}).")
+
+    # sanity check. If this sanity check would fail in spatialdata at some point, then pass transformation to transformations parameter of add_shapes_layer.
+    assert get_transformation(filtered_gdf, get_all=True) == get_transformation(
+        sdata[target_shapes_layer], get_all=True
+    )
+
+    # Add to SpatialData
+    sdata = add_shapes_layer(
+        sdata,
+        input=filtered_gdf,
+        output_layer=output_shapes_layer,
+        overwrite=overwrite,
+    )
+
+    return sdata
diff --git a/src/harpy/shape/_shape.py b/src/harpy/shape/_shape.py
index c5473db8..52eabd24 100644
--- a/src/harpy/shape/_shape.py
+++ b/src/harpy/shape/_shape.py
@@ -1,14 +1,18 @@
 from __future__ import annotations
 
+import numpy as np
 from dask.array import Array
 from geopandas import GeoDataFrame
-from shapely.geometry import GeometryCollection, MultiPolygon, Polygon
+from shapely.geometry import GeometryCollection, MultiPolygon, Point, Polygon
 from spatialdata import SpatialData
 from spatialdata.models._utils import MappingToCoordinateSystem_t
 from spatialdata.transformations import get_transformation
 
 from harpy.image._image import _get_spatial_element
 from harpy.shape._manager import ShapesLayerManager
+from harpy.utils.pylogger import get_pylogger
+
+log = get_pylogger(__name__)
 
 
 def vectorize(
@@ -179,3 +183,125 @@ def intersect_rectangles(rect1: list[int | float], rect2: list[int | float]) ->
         return [x_min, x_max, y_min, y_max]
     else:
         return None
+
+
+def prep_region_annotations(
+    sdata: SpatialData,
+    shapes_layer: str,
+    output_shapes_layer: str,
+    shape_names_column: str = "name",
+    unnamed: str = "unnamed",
+    unique_shape_names_column: str = "name-unique",
+    erosion: float = 0.5,
+    overwrite: bool = False,
+):
+    """
+    Prepares region annotations in a shapes layer for `hp.sh.morphological_features`, `hp.tb.assign_cells_to_shapes` and `hp.tb.compute_distance_to_shapes`.
+    Operations performed:
+        - Ensures a shape name column exists and fills missing names.
+        - Converts Points with a `radius` column into circular polygons. Points without a `radius` column will be preserved as Points.
+        - Slightly erodes polygons to avoid shared edges.
+        - Explodes multipolygons into separate single polygons.
+        - Generates unique names for shapes with duplicate base names.
+        - Shift indices by +1 if index 0 is present (index 0 is reserved for background in labels layers).
+
+    Parameters
+    ----------
+    sdata
+        The SpatialData object containing the input shapes layer.
+    shapes_layer
+        The shapes layer in `sdata.shapes` to use as input.
+    output_shapes_layer
+        The output shapes layer in `sdata.tables` to which the updated shapes layer will be written.
+    shape_names_column
+        Column name in shapes layer containing geometry names. If not present, new names will be generated.
+    unnamed
+        Name to be assigned to any unnamed geometries in `shape_names_column`. Defaults to 'unnamed'.
+    unique_shape_names_column
+        Column name in which unique names will be created for single polygons by appending a counter to the original name in `shape_names_column` for polygons with the same name. Note
+        that multipolygons will be split in individual polygons and each will get a unique name based on the original name of the multipolygon. Unique names will be stored in
+        `{shape_names_column}-unique` in the updated shapes layer.
+    erosion
+        Number of pixels to erode polygons by. This can avoid problems with overlapping edges of geometries when calculating distances. Default is 0.5 (i.e. erosion by 0.5 pixels).
+    overwrite
+        If True, overwrites the `output_shapes_layer` if it already exists in `sdata`.
+
+    Returns
+    -------
+    Modified `sdata` object with updated updated shapes layer.
+    """
+    # Create copy of shapes layer
+    gdf = sdata.shapes[shapes_layer].copy()
+    log.info(f"Found {len(gdf)} geometries in {shapes_layer}.")
+
+    # Ensure shape_names_column exist
+    if shape_names_column not in gdf.columns:
+        gdf[shape_names_column] = unnamed
+
+    gdf[shape_names_column] = gdf[shape_names_column].fillna("").astype(str)
+    unnamed_mask = gdf[shape_names_column] == ""
+    for _i, idx in enumerate(gdf[unnamed_mask].index):
+        gdf.at[idx, shape_names_column] = unnamed
+
+    # Convert Points with a radius column to circular polygons
+    def _point_to_circle(geom, radius=None):
+        if isinstance(geom, Point) and radius is not None:
+            return geom.buffer(radius, resolution=16)
+        return geom
+
+    if "radius" in gdf.columns:
+        mask_points_with_radius = gdf.geometry.geom_type.eq("Point") & gdf["radius"].notna()
+        n_converted = mask_points_with_radius.sum()
+
+        if n_converted > 0:
+            log.info(f"Converting {n_converted} Point geometries with 'radius' to circular polygons.")
+            gdf.loc[mask_points_with_radius, "geometry"] = gdf.loc[mask_points_with_radius].apply(
+                lambda row: _point_to_circle(row.geometry, getattr(row, "radius", None)), axis=1
+            )
+
+    # Slightly erode all polygons (this avoids any shared borders between polygons)
+    gdf["geometry"] = gdf.geometry.apply(
+        lambda geom: geom.buffer(-erosion, join_style=2, resolution=16)
+        if geom.geom_type in ["Polygon", "MultiPolygon"]
+        else geom
+    )
+    polygon_mask = ~gdf.geometry.is_empty
+    removed = len(gdf) - polygon_mask.sum()
+    gdf = gdf[polygon_mask]  # drop any polygons that collapsed to empty
+    if removed > 0:
+        log.info(f"Removed {removed} polygons that collapsed to empty after erosion.")
+
+    # Explode multipolygons into single polygons (this allows us to treat multipolygons as unique polygons)
+    n_multipolygons = gdf.geometry.apply(lambda g: g.geom_type == "MultiPolygon").sum()
+    gdf = gdf.explode(index_parts=False).reset_index(drop=True)
+    n_after = len(gdf)
+    log.info(
+        f"Split {n_multipolygons} multipolygons into individual polygons. Total number of geometries after splitting multipolgons is {n_after}."
+    )
+
+    # Create unique names
+    sizes = gdf.groupby(shape_names_column)[shape_names_column].transform("size")
+    counter = gdf.groupby(shape_names_column).cumcount() + 1
+    gdf[unique_shape_names_column] = np.where(
+        sizes.eq(1), gdf[shape_names_column], gdf[shape_names_column] + counter.astype(str)
+    )
+
+    # Avoid index 0
+    if 0 in gdf.index:
+        log.warning(
+            "Index 0 found in shapes layer — shifting all indices by +1 since index 0 is reserved for background in labels layers."
+        )
+        gdf.index = gdf.index + 1
+
+    # sanity check. If this sanity check would fail in spatialdata at some point, then pass transformation to transformations parameter of add_shapes_layer.
+    assert get_transformation(gdf, get_all=True) == get_transformation(sdata[shapes_layer], get_all=True)
+
+    # Add filtered shapes layer
+    sdata = add_shapes_layer(
+        sdata,
+        input=gdf,
+        output_layer=output_shapes_layer,
+        overwrite=overwrite,
+    )
+
+    return sdata
diff --git a/src/harpy/table/__init__.pyi b/src/harpy/table/__init__.pyi
index bd9b5837..a254ba3a 100644
--- a/src/harpy/table/__init__.pyi
+++ b/src/harpy/table/__init__.pyi
@@ -4,8 +4,9 @@ from ._annotation import cluster_cleanliness, score_genes, score_genes_iter
 from ._clustering import kmeans, leiden
 from ._enrichment import nhood_enrichment
 from ._preprocess import preprocess_proteomics, preprocess_transcriptomics
+from ._region_annotations import assign_cells_to_shapes, compute_distance_to_shapes
 from ._regionprops import add_regionprop_features
-from ._table import add_table_layer, correct_marker_genes, filter_on_size
+from ._table import add_table_layer, correct_marker_genes, filter_categorical, filter_numerical, filter_on_size
 from .cell_clustering._clustering import flowsom
 from .cell_clustering._preprocess import cell_clustering_preprocess
 from .cell_clustering._weighted_channel_expression import weighted_channel_expression
@@ -16,6 +17,8 @@ __all__ = [
     "add_table_layer",
     "correct_marker_genes",
     "filter_on_size",
+    "filter_numerical",
+    "filter_categorical",
     "flowsom",
     "weighted_channel_expression",
     "cell_clustering_preprocess",
@@ -33,4 +36,6 @@ __all__ = [
     "allocate",
     "bin_counts",
     "allocate_intensity",
+    "assign_cells_to_shapes",
+    "compute_distance_to_shapes",
 ]
diff --git a/src/harpy/table/_region_annotations.py b/src/harpy/table/_region_annotations.py
new file mode 100755
index 00000000..78944fff
--- /dev/null
+++ b/src/harpy/table/_region_annotations.py
@@ -0,0 +1,604 @@
+from typing import Literal
+
+import geopandas as gpd
+import pandas as pd
+from shapely.geometry import LineString, MultiLineString, MultiPolygon, Point, Polygon
+from spatialdata import SpatialData
+
+from harpy.table._table import add_table_layer
+from harpy.utils._keys import _REGION_KEY
+from harpy.utils.pylogger import get_pylogger
+
+log = get_pylogger(__name__)
+
+
+def assign_cells_to_shapes(
+    sdata: SpatialData,
+    shapes_layer: str,
+    table_layer: str,
+    output_table_layer: str,
+    shape_names_column: str = "name",
+    unique_shape_names_column: str = "name-unique",
+    output_column: str = None,
+    mode: Literal["original_names", "unique_names", "both"] = "original_names",
+    create_column_per_shape: bool = False,
+    overlap_tolerance: float = 0.1,
+    spatial_key: str = "spatial",
+    xy_columns: tuple = None,
+    overwrite: bool = False,
+):
+    """
+    Assign cells to polygons in a shapes layer and update the sdata table layer. It is recommended to run `hp.sh.prep_region_annotations` first.
+
+    Parameters
+    ----------
+    sdata
+        The SpatialData object containing the input table layer and shapes layer.
+    shapes_layer
+        The shapes layer in `sdata.shapes` to use as input.
+    table_layer
+        The table layer in `sdata.tables` to use as input.
+    output_table_layer
+        The output table layer in `sdata.tables` to which the updated table layer will be written.
+    shape_names_column
+        Column name in shapes layer containing geometry names.
+    unique_shape_names_column
+        Column name in shapes layer containing unique geometry names.
+    output_column
+        Name of the output column in `sdata.tables[table_layer].obs` to store the shape name of the geometries a cell was found in (if `create_column_per_shape` is False).
+        For the `unique_names` mode, the output will be stored in `{output_column}-unique` in `sdata.tables[table_layer].obs` (if `create_column_per_shape` is False).
+        If create_column_per_shape is True and output_column is not None, then column names will be in the format `{output_colum}-{shape_name}`.
+        If create_column_per_shape is True and output_column is None, then column names will be in the format `{shape_name}`.
+    mode
+        When set to `original_names`, original polygon names from `shape_names_column` will be used.
+        When set to `unique_names`, unique polygon names from `unique_shape_names_column` will be used. Use `both`, to run both modes at the same time.
+    create_column_per_shape
+        If True, create one column (named according to the shape names) per shape indicating whether a cell is located inside it.
+    overlap_tolerance
+        Tolerance for detecting overlapping polygons (area units of geometry CRS).
+    spatial_key
+        Key in `sdata.tables[table_layer].obsm` containing spatial coordinates. Ignored if `xy_columns` is provided.
+    xy_columns
+        Tuple of column names in `sdata.tables[table_layer].obs` containing the x and y coordinates the cells.
+        If None, defaults to using coordinates from `sdata.tables[table_layer].obsm[spatial_key]`.
+    overwrite
+        If True, overwrites the `output_table_layer` and/or `output_shapes_layer` if it already exists in `sdata`.
+
+    Notes
+    -----
+    - Only `Polygon` and `MultiPolygon` geometries are supported. Non-polygon geometries (e.g., `Point`, `LineString`) are skipped.
+
+    Returns
+    -------
+    Modified `sdata` object with updated table layer.
+    """
+    if not output_column and not create_column_per_shape:
+        raise ValueError("Specify `output_column` or set `create_column_per_shape=True`.")
+
+    # Create copy of shapes layer
+    gdf = sdata.shapes[shapes_layer].copy()
+
+    # Create copy of table layer
+    adata = sdata.tables[table_layer].copy()
+
+    # Filter out geometries that are not Polygon or MultiPolygon
+    supported_mask = gdf.geometry.apply(lambda x: isinstance(x, (Polygon, MultiPolygon)))
+    skipped = len(gdf) - supported_mask.sum()
+    if skipped > 0:
+        log.info(f"Skipped {skipped} non-polygon geometries in {shapes_layer}.")
+    if skipped == len(gdf):
+        log.warning(
+            f"No supported geometries (Polygon, MultiPolygon) found. Skipping addition of table layer '{output_table_layer}' to sdata."
+        )
+        return sdata
+
+    gdf = gdf[supported_mask].copy().reset_index(drop=True)
+
+    # Check for overlapping polygons
+    total_area = gdf.geometry.area.sum()
+    union_area = gdf.geometry.unary_union.area
+    if total_area - union_area > overlap_tolerance and not create_column_per_shape:
+        raise ValueError(
+            f"Overlapping polygons detected in {shapes_layer}. Correct polygons or use create_column_per_shape."
+        )
+    elif total_area - union_area > 0 and not create_column_per_shape:
+        log.warning(
+            f"Overlaps detected (Δ={total_area - union_area:.3f}), below tolerance threshold {overlap_tolerance}."
+        )
+
+    # Get cell coordinates
+    if xy_columns is not None:
+        x_col, y_col = xy_columns
+        coords = adata.obs[[x_col, y_col]].to_numpy()
+    else:
+        if spatial_key not in adata.obsm:
+            raise KeyError(f"No spatial coordinates found in `obsm['{spatial_key}']` and `xy_columns` not provided.")
+        coords = adata.obsm[spatial_key]
+        if coords.shape[1] != 2:
+            raise ValueError(f"`obsm['{spatial_key}']` must have shape (n_cells, 2).")
+
+    # Function to assign cell to a polygon
+    def _assign_region(x, y, gdf, column, sindex):
+        point = Point(x, y)
+
+        # Quick bounding box search to prefilter the cells
+        candidate_idx = list(sindex.intersection(point.bounds))
+        if not candidate_idx:
+            return None
+
+        # Slower point‑in‑polygon search
+        candidate_gdf = gdf.iloc[candidate_idx]
+        match = candidate_gdf[candidate_gdf.contains(point)]
+
+        if match.empty:
+            return None
+
+        return match[column].iloc[0]
+
+    # Assign cells to polygons
+    if create_column_per_shape:
+        # Collect all relevant name columns based on mode
+        name_columns = []
+        if mode in ("original_names", "both"):
+            name_columns.append(shape_names_column)
+        if mode in ("unique_names", "both"):
+            name_columns.append(unique_shape_names_column)
+
+        # Collect all unique names across both columns
+        name_to_column = {}
+        for name_column in name_columns:
+            for name in gdf[name_column].dropna().astype(str).unique():
+                if name not in name_to_column:
+                    name_to_column[name] = name_column
+
+        # Create column per shape name
+        for shape_name, name_column in name_to_column.items():
+            col_name = f"{output_column}-{shape_name}" if output_column else shape_name
+
+            gdf_shape = gdf[gdf[name_column] == shape_name]
+            if gdf_shape.empty:
+                continue
+
+            sidx_shape = gdf_shape.sindex  # Build spatial index
+            assigned = [_assign_region(x, y, gdf_shape, name_column, sidx_shape) for x, y in coords]
+            adata.obs[col_name] = assigned
+
+    else:
+        # One output column per mode
+        run_assign_dict = {}
+        if mode in ("original_names", "both"):
+            run_assign_dict[output_column] = shape_names_column
+        if mode in ("unique_names", "both"):
+            run_assign_dict[f"{output_column}-unique"] = unique_shape_names_column
+
+        for output_column_name, name_column in run_assign_dict.items():
+            # Assign one shape name per cell (to be avoided with overlapping regions)
+            sidx = gdf.sindex  # Build spatial index
+            assigned = [_assign_region(x, y, gdf, name_column, sidx) for x, y in coords]
+            adata.obs[output_column_name] = assigned
+
+    # Add table layer
+    sdata = add_table_layer(
+        sdata,
+        adata=adata,
+        output_layer=output_table_layer,
+        region=adata.obs[_REGION_KEY].cat.categories.to_list(),
+        overwrite=overwrite,
+    )
+
+    return sdata
+
+
+def compute_distance_to_shapes(
+    sdata: SpatialData,
+    shapes_layer: str,
+    table_layer: str,
+    output_table_layer: str,
+    shape_names_column: str = "name",
+    unique_shape_names_column: str = "name-unique",
+    output_name: str = None,
+    pixel_size_um: float = 1.0,
+    modes: list[
+        Literal[
+            "nearest_edge",
+            "nearest_edge_grouped",
+            "all_edges",
+            "nearest_outer_edge",
+            "nearest_outer_edge_grouped",
+            "all_outer_edges",
+            "nearest_inner_edge",
+            "nearest_inner_edge_grouped",
+            "nearest_inner_edge_grouped_unique",
+            "all_inner_edges",
+            "nearest_centroid",
+            "nearest_centroid_grouped",
+            "all_centroids",
+            "nearest_point",
+            "nearest_point_grouped",
+            "all_points",
+        ]
+    ] = None,
+    spatial_key: str = "spatial",
+    xy_columns: tuple = None,
+    overwrite: bool = False,
+):
+    """
+    Compute distances from cells (using their centroid coordinates) to polygons in a shapes layer and update the sdata table layer.
+    It is recommended to run `hp.sh.prep_region_annotations` first.
+
+    Parameters
+    ----------
+    sdata
+        The SpatialData object containing the input table layer and shapes layer.
+    shapes_layer
+        The shapes layer in `sdata.shapes` to use as input.
+    table_layer
+        The table layer in `sdata.tables` to use as input.
+    output_table_layer
+        The output table layer in `sdata.tables` to which the updated table layer will be written.
+    shape_names_column
+        Column name in shapes layer containing geometry names.
+    unique_shape_names_column
+        Column name in shapes layer containing unique geometry names.
+    output_name
+        Prefix for new distance columns in `.obs`. If None, no prefix is added.
+    pixel_size_um
+        Scale factor to convert distances to microns. Defaults to 1 (i.e. distances are in pixels).
+    modes
+        Which distance features to calculate. Options include:
+
+        Edge distances (For Polygon and MultiPolygon geometries)
+        - `"nearest_edge"`: Distance to the nearest edge (outer or inner) of any polygon.
+                Creates `{output_name}-distance_to_nearest_edge` and `{output_name}-name_of_nearest_edge` columns.
+        - `"nearest_edge_grouped"`: For each group of shapes with the same name, compute the distance to the edge that is nearest.
+                Creates `{output_name}-distance_to_nearest_edge_of_<name>` and `{output_name}-name_of_nearest_edge_of_<name>` columns.
+        - `"all_edges"`: For each individual polygon, compute the distance to its edge.
+                Creates `{output_name}-distance_to_edge_of_<shape_unique_name>` column.
+
+        Outer edge distances (For Polygon and MultiPolygon geometries)
+        - `"nearest_outer_edge"`: Distance to the nearest outer edge of any polygon.
+                Creates `{output_name}-distance_to_nearest_outer_edge` and `{output_name}-name_of_nearest_outer_edge` columns.
+        - `"nearest_outer_edge_grouped"`: For each group of shapes with the same name, compute the distance to the outer edge that is nearest.
+                Creates `{output_name}-distance_to_nearest_outer_edge_of_<name>` and `{output_name}-name_of_nearest_outer_edge_of_<name>` columns.
+        - `"all_outer_edges"`: For each individual polygon, compute the distance to its outer edge.
+                Creates `{output_name}-distance_to_outer_edge_of_<shape_unique_name>` column.
+
+        Inner edge (hole) distances (For Polygon and MultiPolygon geometries)
+        - `"nearest_inner_edge"`: Distance to the nearest interior edge (“hole”) of any polygon.
+                Creates `{output_name}-distance_to_nearest_inner_edge` and `{output_name}-name_of_nearest_inner_edge` columns.
+        - `"nearest_inner_edge_grouped"`: For each group of shapes with the same name, compute the distance to the nearest inner edge of that group.
+                Creates `{output_name}-distance_to_nearest_inner_edge_of_<name>` and `{output_name}-name_of_nearest_inner_edge_of_<name>` columns.
+        - `"nearest_inner_edge_grouped_unique"`: For each individual polygon, compute the distance to the nearest inner edge of that polygon.
+                Creates `{output_name}-distance_to_nearest_inner_edge_of_<shape_unique_name>` and `{output_name}-name_of_nearest_inner_edge_of_<shape_unique_name>` columns.
+        - `"all_inner_edges"`: For all holes, compute the distance to each inner edge.
+                Creates `{output_name}-distance_to_inner_edge_of_<shape_unique_name>-hole<i>` column.
+
+        Centroid distances (For Polygon and MultiPolygon geometries)
+        - `"nearest_centroid"`: Distance to the nearest centroid of any polygon.
+                Creates `{output_name}-distance_to_nearest_centroid` and `{output_name}-name_of_nearest_centroid` columns.
+        - `"nearest_centroid_grouped"`: For each group of shapes with the same name, compute the distance to the centroid that is nearest.
+                Creates `{output_name}-distance_to_nearest_centroid_of_<name>` and `{output_name}-name_of_nearest_centroid_of_<name>` columns.
+        - `"all_centroids"`: For each individual polygon, compute the distance to its centroid.
+                Creates `{output_name}-distance_to_centroid_of_<shape_unique_name>` column.
+
+        Point distances (For Point geometries)
+        - `"nearest_point"`: Distance to the nearest point.
+                Creates `{output_name}-distance_to_nearest_point` and `{output_name}-name_of_nearest_point` columns.
+        - `"nearest_point_grouped"`: For each group of points with the same name, compute the nearest distance.
+                Creates `{output_name}-distance_to_nearest_point_of_<name>` and `{output_name}-name_of_nearest_point_of_<name>` columns.
+        - `"all_points"`: For each individual Point, compute the distance to point coordinates.
+                Creates `{output_name}-distance_to_point_<shape_unique_name>` column.
+
+    spatial_key
+        Key in `sdata.tables[table_layer].obsm` containing spatial coordinates. Ignored if `xy_columns` is provided.
+    xy_columns
+        Tuple of column names in `sdata.tables[table_layer].obs` containing the x and y coordinates the cells.
+        If None, defaults to using coordinates from `sdata.tables[table_layer].obsm[spatial_key]`.
+    overwrite
+        If True, overwrites the `output_table_layer` if it already exists in `sdata`.
+
+    Notes
+    -----
+    - Only `Polygon`, `MultiPolygon` and `Point` geometries are supported. Other geometries (e.g., `LineString`, `MultiPoint`) are skipped.
+
+    Returns
+    -------
+    Modified `sdata` object with updated table layer.
+    """
+    if modes is None:
+        modes = ["all_edges"]
+    if output_name is not None:
+        output_name = f"{output_name}-"
+    elif output_name is None:
+        output_name = ""
+
+    # Create copy of shapes layer
+    gdf = sdata.shapes[shapes_layer].copy()
+
+    # Create copy of table layer
+    adata = sdata.tables[table_layer].copy()
+
+    # Filter out geometries that are not Polygon, MultiPolygon or Point
+    supported_mask = gdf.geometry.apply(lambda x: isinstance(x, (Polygon, MultiPolygon, Point)))
+    skipped = len(gdf) - supported_mask.sum()
+    if skipped > 0:
+        log.info(f"Skipped {skipped} geometries in {shapes_layer} that are not Polygon, MultiPolygon or Point.")
+
+    gdf = gdf[supported_mask].copy().reset_index(drop=True)
+
+    # Separate gdf by type
+    gdf_polygons = gdf[gdf.geometry.geom_type.isin(["Polygon", "MultiPolygon"])].copy()
+    gdf_points = gdf[gdf.geometry.geom_type == "Point"].copy()
+
+    has_polygons = not gdf_polygons.empty
+    has_points = not gdf_points.empty
+
+    if not has_polygons and not has_points:
+        log.warning(
+            f"No supported geometries (Polygon, MultiPolygon, Point) found. Skipping addition of table layer '{output_table_layer}' to sdata."
+        )
+        return sdata
+
+    # Get cell coordinates
+    if xy_columns is not None:
+        x_col, y_col = xy_columns
+        coords = adata.obs[[x_col, y_col]].to_numpy()
+    else:
+        if spatial_key not in adata.obsm:
+            raise KeyError(f"No spatial coordinates found in `obsm['{spatial_key}']` and `xy_columns` not provided.")
+        coords = adata.obsm[spatial_key]
+        if coords.shape[1] != 2:
+            raise ValueError(f"`obsm['{spatial_key}']` must have shape (n_cells, 2).")
+
+    # Build point GeoDataFrame
+    pts = gpd.GeoSeries([Point(x, y) for x, y in coords], crs=gdf.crs)
+
+    pts_gdf = gpd.GeoDataFrame({"geometry": pts}, crs=gdf.crs).set_index(adata.obs.index)
+
+    # Polygon and MultiPolygon
+    if has_polygons:
+        # Extract exterior and interior boundaries
+        def _extract_exterior_lines(geom):
+            if isinstance(geom, Polygon):
+                return [geom.exterior]
+            elif isinstance(geom, MultiPolygon):
+                return [poly.exterior for poly in geom.geoms]
+            return []
+
+        def _extract_interior_lines(geom):
+            if isinstance(geom, Polygon):
+                return list(geom.interiors)
+            elif isinstance(geom, MultiPolygon):
+                lines = []
+                for poly in geom.geoms:
+                    lines.extend(poly.interiors)
+                return lines
+            return []
+
+        names = gdf_polygons[unique_shape_names_column].tolist()
+        exteriors = [MultiLineString(_extract_exterior_lines(geom)) for geom in gdf_polygons.geometry]
+        interiors = [MultiLineString(_extract_interior_lines(geom)) for geom in gdf_polygons.geometry]
+
+        ext_gdf = gpd.GeoDataFrame({unique_shape_names_column: names}, geometry=exteriors, crs=gdf.crs)
+        int_gdf = gpd.GeoDataFrame({unique_shape_names_column: names}, geometry=interiors, crs=gdf.crs)
+
+        # Edges (outer and inner)
+        if "nearest_edge" in modes:
+            log.info("Calculating 'nearest_edge' distances'")
+            all_edges_gdf = gpd.GeoDataFrame(pd.concat([ext_gdf, int_gdf], ignore_index=True), crs=gdf.crs)
+            joined = gpd.sjoin_nearest(pts_gdf, all_edges_gdf, how="left", distance_col="dist")
+            adata.obs[f"{output_name}distance_to_nearest_edge"] = joined["dist"].to_numpy() * pixel_size_um
+            adata.obs[f"{output_name}name_of_nearest_edge"] = joined[unique_shape_names_column]
+
+        if "nearest_edge_grouped" in modes:
+            log.info("Calculating 'nearest_edge_grouped' distances'")
+            for name, group in gdf_polygons.groupby(shape_names_column):
+                edges = []
+                labels = []
+                for _idx, row in group.iterrows():
+                    edge_lines = _extract_exterior_lines(row.geometry) + _extract_interior_lines(row.geometry)
+                    edges.extend(edge_lines)
+                    labels.extend([row[unique_shape_names_column]] * len(edge_lines))
+
+                edge_gdf = gpd.GeoDataFrame({"geometry": edges, unique_shape_names_column: labels}, crs=gdf.crs)
+                joined = gpd.sjoin_nearest(pts_gdf, edge_gdf, how="left", distance_col="dist")
+
+                adata.obs[f"{output_name}distance_to_nearest_edge_of_{name}"] = (
+                    joined["dist"].to_numpy() * pixel_size_um
+                )
+                adata.obs[f"{output_name}name_of_nearest_edge_of_{name}"] = joined[unique_shape_names_column]
+
+        if "all_edges" in modes:
+            log.info("Calculating 'all_edges' distances'")
+            for _, feat in gdf_polygons.reset_index(drop=True).iterrows():
+                adata.obs[f"{output_name}distance_to_edge_of_{feat[unique_shape_names_column]}"] = (
+                    pts.distance(feat.geometry.boundary).to_numpy() * pixel_size_um
+                )
+
+        # Outer edges
+        if "nearest_outer_edge" in modes:
+            log.info("Calculating 'nearest_outer_edge' distances'")
+            joined = gpd.sjoin_nearest(pts_gdf, ext_gdf, how="left", distance_col="dist")
+            adata.obs[f"{output_name}distance_to_nearest_outer_edge"] = joined["dist"].to_numpy() * pixel_size_um
+            adata.obs[f"{output_name}name_of_nearest_outer_edge"] = joined[unique_shape_names_column]
+
+        if "nearest_outer_edge_grouped" in modes:
+            log.info("Calculating 'nearest_outer_edge_grouped' distances'")
+            for name, group in gdf_polygons.groupby(shape_names_column):
+                edges = []
+                labels = []
+                for _idx, row in group.iterrows():
+                    edge_lines = _extract_exterior_lines(row.geometry)
+                    edges.extend(edge_lines)
+                    labels.extend([row[unique_shape_names_column]] * len(edge_lines))
+
+                edge_gdf = gpd.GeoDataFrame({"geometry": edges, unique_shape_names_column: labels}, crs=gdf.crs)
+                joined = gpd.sjoin_nearest(pts_gdf, edge_gdf, how="left", distance_col="dist")
+
+                adata.obs[f"{output_name}distance_to_nearest_outer_edge_of_{name}"] = (
+                    joined["dist"].to_numpy() * pixel_size_um
+                )
+                adata.obs[f"{output_name}name_of_nearest_outer_edge_of_{name}"] = joined[unique_shape_names_column]
+
+        if "all_outer_edges" in modes:
+            log.info("Calculating 'all_outer_edges' distances'")
+            for _, feat in gdf_polygons.reset_index(drop=True).iterrows():
+                adata.obs[f"{output_name}distance_to_outer_edge_of_{feat[unique_shape_names_column]}"] = (
+                    pts.distance(feat.geometry.exterior).to_numpy() * pixel_size_um
+                )
+
+        # Inner edges
+        hole_geoms = []
+        hole_names = []
+
+        for _, row in gdf_polygons.reset_index(drop=True).iterrows():
+            base = row[unique_shape_names_column]
+            geom = row.geometry
+
+            holes = _extract_interior_lines(geom)
+
+            if len(holes) == 0:
+                continue
+
+            if len(holes) == 1:
+                hole = holes[0]
+                hole_geoms.append(LineString(hole.coords))
+                hole_names.append(base)
+
+            else:
+                for i, hole in enumerate(holes, start=1):
+                    hole_geoms.append(LineString(hole.coords))
+                    hole_names.append(f"{base}-hole{i}")
+
+        holes_gdf = gpd.GeoDataFrame({unique_shape_names_column: hole_names}, geometry=hole_geoms, crs=gdf.crs)
+
+        if "nearest_inner_edge" in modes and not holes_gdf.empty:
+            log.info("Calculating 'nearest_inner_edge' distances'")
+            joined = gpd.sjoin_nearest(pts_gdf, holes_gdf, how="left", distance_col="dist")
+            adata.obs[f"{output_name}distance_to_nearest_inner_edge"] = joined["dist"].to_numpy() * pixel_size_um
+            adata.obs[f"{output_name}name_of_nearest_inner_edge"] = joined[unique_shape_names_column]
+
+        if "nearest_inner_edge_grouped" in modes:
+            log.info("Calculating 'nearest_inner_edge_grouped' distances'")
+            for name, group in gdf_polygons.groupby(shape_names_column):
+                hole_lines = []
+                hole_labels = []
+
+                for _idx, row in group.iterrows():
+                    base = row[unique_shape_names_column]
+                    for i, hole in enumerate(_extract_interior_lines(row.geometry), start=1):
+                        hole_lines.append(LineString(hole.coords))
+                        hole_labels.append(f"{base}-hole{i}")
+
+                if not hole_lines:
+                    continue
+
+                hole_gdf = gpd.GeoDataFrame({"geometry": hole_lines, "hole_name": hole_labels}, crs=gdf.crs)
+
+                joined = gpd.sjoin_nearest(pts_gdf, hole_gdf, how="left", distance_col="dist")
+
+                adata.obs[f"{output_name}distance_to_nearest_inner_edge_of_{name}"] = (
+                    joined["dist"].to_numpy() * pixel_size_um
+                )
+                adata.obs[f"{output_name}name_of_nearest_inner_edge_of_{name}"] = joined["hole_name"]
+
+        if "nearest_inner_edge_grouped_unique" in modes:
+            log.info("Calculating 'nearest_inner_edge_grouped_unique' distances'")
+            for _idx, row in gdf_polygons.iterrows():
+                base = row[unique_shape_names_column]
+                holes = _extract_interior_lines(row.geometry)
+
+                if not holes:
+                    continue
+
+                hole_geoms = [LineString(hole.coords) for hole in holes]
+                hole_names = [f"{base}-hole{i + 1}" for i in range(len(holes))]
+
+                hole_gdf = gpd.GeoDataFrame({"geometry": hole_geoms, "hole_name": hole_names}, crs=gdf.crs)
+
+                joined = gpd.sjoin_nearest(pts_gdf, hole_gdf, how="left", distance_col="dist")
+
+                adata.obs[f"{output_name}distance_to_nearest_inner_edge_of_{base}"] = (
+                    joined["dist"].to_numpy() * pixel_size_um
+                )
+                adata.obs[f"{output_name}name_of_nearest_inner_edge_of_{base}"] = joined["hole_name"]
+
+        if "all_inner_edges" in modes and not holes_gdf.empty:
+            log.info("Calculating 'all_inner_edges' distances'")
+            for _, hole in holes_gdf.iterrows():
+                hole_name = hole[unique_shape_names_column]
+                col = f"{output_name}distance_to_inner_edge_of_{hole_name}"
+                adata.obs[col] = pts.distance(hole.geometry).to_numpy() * pixel_size_um
+
+        # Centroids
+        centroids_df = gdf_polygons.reset_index(drop=True)[[unique_shape_names_column]].copy()
+        centroids_df["geometry"] = gdf_polygons.geometry.centroid.reset_index(drop=True)
+
+        centroids_gdf = gpd.GeoDataFrame(centroids_df, geometry="geometry", crs=gdf.crs)
+
+        if "nearest_centroid" in modes:
+            log.info("Calculating 'nearest_centroid' distances'")
+            joined = gpd.sjoin_nearest(pts_gdf, centroids_gdf, how="left", distance_col="dist")
+            adata.obs.loc[joined.index, f"{output_name}distance_to_nearest_centroid"] = (
+                joined["dist"].to_numpy() * pixel_size_um
+            )
+            adata.obs.loc[joined.index, f"{output_name}name_of_nearest_centroid"] = joined[
+                unique_shape_names_column
+            ].to_numpy()
+
+        if "nearest_centroid_grouped" in modes:
+            log.info("Calculating 'nearest_centroid_grouped' distances'")
+            for name, group in gdf_polygons.groupby(shape_names_column):
+                centroids = group.geometry.centroid
+                labels = group[unique_shape_names_column].tolist()
+
+                centroid_gdf = gpd.GeoDataFrame({"geometry": centroids, unique_shape_names_column: labels}, crs=gdf.crs)
+
+                joined = gpd.sjoin_nearest(pts_gdf, centroid_gdf, how="left", distance_col="dist")
+
+                adata.obs[f"{output_name}distance_to_nearest_centroid_of_{name}"] = (
+                    joined["dist"].to_numpy() * pixel_size_um
+                )
+                adata.obs[f"{output_name}name_of_nearest_centroid_of_{name}"] = joined[unique_shape_names_column]
+
+        if "all_centroids" in modes:
+            log.info("Calculating 'all_centroids' distances'")
+            for _idx, feat in gdf_polygons.reset_index(drop=True).iterrows():
+                adata.obs[f"{output_name}distance_to_centroid_of_{feat[unique_shape_names_column]}"] = (
+                    pts.distance(feat.geometry.centroid).to_numpy() * pixel_size_um
+                )
+
+    # Point
+    if has_points:
+        if "nearest_point" in modes:
+            log.info("Calculating 'nearest_point' distances'")
+            joined = gpd.sjoin_nearest(pts_gdf, gdf_points, how="left", distance_col="dist")
+            adata.obs[f"{output_name}distance_to_nearest_point"] = joined["dist"].to_numpy() * pixel_size_um
+            adata.obs[f"{output_name}name_of_nearest_point"] = joined[f"{shape_names_column}-unique"]
+
+        if "nearest_point_grouped" in modes:
+            log.info("Calculating 'nearest_point_grouped' distances'")
+            for name, group in gdf_points.groupby(shape_names_column):
+                joined = gpd.sjoin_nearest(pts_gdf, group, how="left", distance_col="dist")
+                adata.obs[f"{output_name}distance_to_nearest_point_of_{name}"] = (
+                    joined["dist"].to_numpy() * pixel_size_um
+                )
+                adata.obs[f"{output_name}name_of_nearest_point_of_{name}"] = joined[f"{shape_names_column}-unique"]
+
+        if "all_points" in modes:
+            log.info("Calculating 'all_points' distances'")
+            for _, row in gdf_points.iterrows():
+                adata.obs[f"{output_name}distance_to_point_{row[unique_shape_names_column]}"] = (
+                    pts.distance(row.geometry).to_numpy() * pixel_size_um
+                )
+
+    # Add table layer
+    sdata = add_table_layer(
+        sdata,
+        adata=adata,
+        output_layer=output_table_layer,
+        region=adata.obs[_REGION_KEY].cat.categories.to_list(),
+        overwrite=overwrite,
+    )
+
+    return sdata
diff --git a/src/harpy/table/_table.py b/src/harpy/table/_table.py
index bc467b02..69f1dbe4 100644
--- a/src/harpy/table/_table.py
+++ b/src/harpy/table/_table.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 
-from collections.abc import Iterable
+from collections.abc import Iterable, Sequence
+from typing import Literal
 
 import numpy as np
+import pandas as pd
 from anndata import AnnData
 from spatialdata import SpatialData
 from spatialdata.models import TableModel
@@ -223,15 +225,17 @@ def filter_on_size(
     labels_layer: list[str],
     table_layer: str,
     output_layer: str,
-    min_size: int = 100,
-    max_size: int = 100000,
-    update_shapes_layers: bool = True,
     cellsize_key=_CELLSIZE_KEY,
+    min_size: float | int | None = None,
+    max_size: float | int | None = None,
+    update_shapes_layers: bool = True,
+    prefix_filtered_shapes_layer: str = "filtered_size",
     overwrite: bool = False,
 ) -> SpatialData:
     """Returns the updated SpatialData object.
 
     All cells with a size outside of the min and max size range are removed using the `cellsize_key` in `.obs`. Run e.g. `harpy.tb.preprocess_transcriptomics` or `harpy.tb.preprocess_proteomics` to obtain cell sizes.
+    Cells are kept if min_size ≤ cellsize_key ≤ max_size.
 
     Parameters
     ----------
@@ -246,16 +250,90 @@ def filter_on_size(
         The table layer in `sdata`.
     output_layer
         The output table layer in `sdata`.
+    cellsize_key
+        Column in `sdata.tables[table_layer].obs` containing cell sizes.
     min_size
-        minimum size in pixels.
+        minimum size in pixels. If None, this value is not used for filtering.
     max_size
-        maximum size in pixels.
+        maximum size in pixels. If None, this value is not used for filtering.
     update_shapes_layers
         Whether to filter the shapes layers associated with `labels_layer`.
         If set to `True`, cells that do not appear in resulting `output_layer` (with `_REGION_KEY` equal to `labels_layer`) will be removed from the shapes layers (via `_INSTANCE_KEY`) in the `sdata` object.
-        Filtered shapes will be added to `sdata` with prefix 'filtered_size'.
-    cellsize_key
-        Column in `sdata.tables[table_layer].obs` containing cell sizes.
+        Filtered shapes will be added to `sdata` with prefix `prefix_filtered_shapes_layer`.
+    prefix_filtered_shapes_layer
+        prefix to use for filtered shapes layer if update_shapes_layers is True. Defaults to 'filtered_size'.
+    overwrite
+        If True, overwrites the `output_layer` if it already exists in `sdata`.
+
+    Returns
+    -------
+    The updated SpatialData object.
+    """
+    sdata = filter_numerical(
+        sdata=sdata,
+        labels_layer=labels_layer,
+        table_layer=table_layer,
+        output_layer=output_layer,
+        numerical_column=cellsize_key,
+        min_value=min_size,
+        max_value=max_size,
+        target="obs",
+        update_shapes_layers=update_shapes_layers,
+        prefix_filtered_shapes_layer=prefix_filtered_shapes_layer,
+        overwrite=overwrite,
+    )
+
+    return sdata
+
+
+def filter_numerical(
+    sdata: SpatialData,
+    labels_layer: list[str],
+    table_layer: str,
+    output_layer: str,
+    numerical_column: str,
+    min_value: float | int | None = None,
+    max_value: float | int | None = None,
+    target: Literal["obs", "var"] = "obs",
+    update_shapes_layers: bool = True,
+    prefix_filtered_shapes_layer: str = "filtered",
+    overwrite: bool = False,
+) -> SpatialData:
+    """Returns the updated SpatialData object.
+
+    All observations (obs) or features (var) with values outside of the min and max size range are removed using the `numerical_column` in `.obs` or `.var`.
+    observations/features are kept if `min_value` ≤ `numerical_column` ≤ `max_value`.
+
+    Parameters
+    ----------
+    sdata
+        The SpatialData object.
+    labels_layer
+        The labels layer(s) of `sdata` used to select the observations via the _REGION_KEY  in `sdata.tables[table_layer].obs`.
+        Note that if `output_layer` is equal to `table_layer` and overwrite is True,
+        observations in `sdata.tables[table_layer]` linked to other `labels_layer` (via the _REGION_KEY), will be removed from `sdata.tables[table_layer]`
+        (also from the backing zarr store if it is backed).
+    table_layer
+        The table layer in `sdata`.
+    output_layer
+        The output table layer in `sdata`.
+    numerical_column
+        Name of numerical column to use for filtering.
+        If `target='obs'`, this refers to `.obs[numerical_column]`.
+        If `target='var'`, this refers to `.var[numerical_column]`.
+    target
+        Whether to filter observations ('obs') or features ('var').
+    min_value
+        minimum value of `numerical_column` (inclusive). If None, lower bound is ignored.
+    max_value
+        maximum value of `numerical_column` (inclusive). If None, upper bound is ignored.
+    update_shapes_layers
+        Whether to filter the shapes layers associated with `labels_layer`.
+        If set to `True`, observations that do not appear in resulting `output_layer` (with `_REGION_KEY` equal to `labels_layer`) will be removed from the shapes layers (via `_INSTANCE_KEY`) in the `sdata` object.
+        Filtered shapes will be added to `sdata` with prefix `prefix_filtered_shapes_layer`.
+        Ignored if `target='var'`.
+    prefix_filtered_shapes_layer
+        prefix to use for filtered shapes layer if update_shapes_layers is True. Defaults to 'filtered'.
     overwrite
         If True, overwrites the `output_layer` if it already exists in `sdata`.
 
@@ -263,14 +341,164 @@ def filter_on_size(
     -------
     The updated SpatialData object.
     """
+    if target not in {"obs", "var"}:
+        raise ValueError("target must be either 'obs' or 'var'")
+
     process_table_instance = ProcessTable(sdata, labels_layer=labels_layer, table_layer=table_layer)
-    adata = process_table_instance._get_adata()
-    start = adata.shape[0]
+    adata = process_table_instance._get_adata().copy()
+    start = adata.shape[0] if target == "obs" else adata.shape[1]
+
+    df = getattr(adata, target)
+    label = "observations" if target == "obs" else "features"
 
-    # Filter cells based on size and distance
-    # need to do the copy because we pop the spatialdata_attrs in add_table_layer, otherwise it would not be updated inplace
-    adata = adata[adata.obs[cellsize_key] < max_size, :].copy()
-    adata = adata[adata.obs[cellsize_key] > min_size, :].copy()
+    if numerical_column not in df.columns:
+        raise ValueError(f"Column '{numerical_column}' not found in '{table_layer}.{target}'. ")
+
+    if not np.issubdtype(df[numerical_column].dtype, np.number):
+        raise ValueError(f"Column '{numerical_column}' must be numeric, but dtype is {df[numerical_column].dtype}.")
+
+    # Filter observations based on min and max values
+    mask = pd.Series(True, index=df.index)
+
+    if min_value is not None:
+        below = (df[numerical_column] < min_value).sum()
+        log.info(f"Removed {below} {label} below {min_value}.")
+        mask &= df[numerical_column] >= min_value
+    if max_value is not None:
+        above = (df[numerical_column] > max_value).sum()
+        log.info(f"Removed {above} {label} above {max_value}.")
+        mask &= df[numerical_column] <= max_value
+
+    if target == "obs":
+        adata = adata[mask, :].copy()
+    else:
+        adata = adata[:, mask].copy()
+
+    sdata = add_table_layer(
+        sdata,
+        adata=adata,
+        output_layer=output_layer,
+        region=process_table_instance.labels_layer,
+        overwrite=overwrite,
+    )
+
+    if target == "obs" and update_shapes_layers:
+        for _labels_layer in process_table_instance.labels_layer:
+            sdata = filter_shapes_layer(
+                sdata,
+                table_layer=output_layer,
+                labels_layer=_labels_layer,
+                prefix_filtered_shapes_layer=prefix_filtered_shapes_layer,
+            )
+
+    removed = start - (adata.shape[0] if target == "obs" else adata.shape[1])
+    kept = start - removed
+    log.info(
+        f"Removed {removed}/{start} {label} based on '{numerical_column}' (min={min_value}, max={max_value}) and kept {kept}."
+    )
+
+    return sdata
+
+
+def filter_categorical(
+    sdata: SpatialData,
+    labels_layer: list[str],
+    table_layer: str,
+    output_layer: str,
+    categorical_column: str | None = None,
+    include_values: str | Sequence[str] | None = None,
+    exclude_values: str | Sequence[str] | None = None,
+    target: Literal["obs", "var"] = "obs",
+    update_shapes_layers: bool = True,
+    prefix_filtered_shapes_layer: str = "filtered",
+    overwrite: bool = False,
+) -> SpatialData:
+    """
+    Removes or keeps observations (obs) or features (var) based on specific values in a categorical column of
+    `.obs` or `.var`. or based on their index.
+
+    Parameters
+    ----------
+    sdata
+        The SpatialData object.
+    labels_layer
+        The labels layer(s) of `sdata` used to select the observations via the _REGION_KEY  in `sdata.tables[table_layer].obs`.
+        Note that if `output_layer` is equal to `table_layer` and overwrite is True,
+        observations in `sdata.tables[table_layer]` linked to other `labels_layer` (via the _REGION_KEY), will be removed from `sdata.tables[table_layer]`
+        (also from the backing zarr store if it is backed).
+    table_layer
+        The table layer in `sdata`.
+    output_layer
+        The output table layer in `sdata`.
+    categorical_column
+        Name of categorical column to use for filtering.
+        If `target='obs'`, this refers to `.obs[categorical_column]`.
+        If `target='var'`, this refers to `.var[categorical_column]`.
+        If None, filtering is performed based on the index.
+    include_values
+        Value(s) to keep. Only observations/features whose `categorical_column` matches one of these
+        values will be kept. Mutually exclusive with `exclude_values`.
+    exclude_values
+        Value(s) to remove. Only observations/features whose `categorical_column` matches one of these
+        values will be removed. Mutually exclusive with `include_values`.
+    target
+        Whether to filter observations ('obs') or features ('var').
+    update_shapes_layers
+        Whether to filter the shapes layers associated with `labels_layer`.
+        If set to `True`, observations that do not appear in resulting `output_layer` (with `_REGION_KEY` equal to `labels_layer`) will be removed from the shapes layers (via `_INSTANCE_KEY`) in the `sdata` object.
+        Filtered shapes will be added to `sdata` with prefix `prefix_filtered_shapes_layer`.
+        Ignored if `target='var'`.
+    prefix_filtered_shapes_layer
+        prefix to use for filtered shapes layer if update_shapes_layers is True. Defaults to 'filtered'.
+    overwrite
+        If True, overwrites the `output_layer` if it already exists in `sdata`.
+
+    Returns
+    -------
+    The updated SpatialData object.
+    """
+    if include_values is not None and exclude_values is not None:
+        raise ValueError("Specify only one of 'include_values' or 'exclude_values'.")
+
+    if target not in {"obs", "var"}:
+        raise ValueError("target must be either 'obs' or 'var'")
+
+    process_table_instance = ProcessTable(sdata, labels_layer=labels_layer, table_layer=table_layer)
+    adata = process_table_instance._get_adata().copy()
+    start = adata.shape[0] if target == "obs" else adata.shape[1]
+
+    df = getattr(adata, target)
+    label = "observations" if target == "obs" else "features"
+
+    if categorical_column is not None and categorical_column not in df.columns:
+        raise ValueError(f"Column '{categorical_column}' not found in '{table_layer}.{target}'.")
+
+    # Ensure include/exclude are lists
+    if isinstance(include_values, str):
+        include_values = [include_values]
+    if isinstance(exclude_values, str):
+        exclude_values = [exclude_values]
+
+    # Filter
+    mask = pd.Series(True, index=df.index)
+
+    if categorical_column is not None:
+        # Filtering by column
+        if include_values is not None:
+            mask &= df[categorical_column].isin(include_values)
+        elif exclude_values is not None:
+            mask &= ~df[categorical_column].isin(exclude_values)
+    else:
+        # Filtering by index
+        if include_values is not None:
+            mask &= df.index.isin(include_values)
+        elif exclude_values is not None:
+            mask &= ~df.index.isin(exclude_values)
+
+    if target == "obs":
+        adata = adata[mask, :].copy()
+    else:
+        adata = adata[:, mask].copy()
 
     sdata = add_table_layer(
         sdata,
@@ -280,17 +508,20 @@ def filter_on_size(
         overwrite=overwrite,
     )
 
-    if update_shapes_layers:
+    if target == "obs" and update_shapes_layers:
         for _labels_layer in process_table_instance.labels_layer:
             sdata = filter_shapes_layer(
                 sdata,
                 table_layer=output_layer,
                 labels_layer=_labels_layer,
-                prefix_filtered_shapes_layer="filtered_size",
+                prefix_filtered_shapes_layer=prefix_filtered_shapes_layer,
             )
 
-    filtered = start - adata.shape[0]
-    log.info(f"{filtered} cells were filtered out based on size.")
+    kept = adata.shape[0] if target == "obs" else adata.shape[1]
+    removed = start - kept
+    if categorical_column is None:
+        categorical_column = "index"
+    log.info(f"Removed {removed}/{start} {label} based on '{categorical_column}' filtering (kept {kept}).")
 
     return sdata