diff --git a/.gitignore b/.gitignore
index ccd784f..36a793e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -209,3 +209,10 @@ cython_debug/
 marimo/_static/
 marimo/_lsp/
 __marimo__/
+
+# model files
+*.pt
+*.pth
+*.onnx
+*.engine
+*.ts
diff --git a/README.md b/README.md
index 49392b8..21352a8 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,8 @@ ROS2 perception stack for generalist robotics.
 ## Packages
 
 - **track_anything** - EdgeTAM tracking + 3D segmentation with RGBD
+- **detect_anything** - YOLO-E detection node and utilities
+- **semantic_mapping** - placeholder for future mapping components (currently empty)
 - **vector_perception_utils** - Image and point cloud utilities
 
 ## Installation
@@ -21,6 +23,7 @@ pip install -r requirements.txt
 # Build
 source /opt/ros/jazzy/setup.bash
 colcon build
+```
 
 
 
diff --git a/detect_anything/CMakeLists.txt b/detect_anything/CMakeLists.txt
new file mode 100644
index 0000000..c8e9188
--- /dev/null
+++ b/detect_anything/CMakeLists.txt
@@ -0,0 +1,47 @@
+cmake_minimum_required(VERSION 3.10)
+project(detect_anything)
+
+find_package(ament_cmake REQUIRED)
+find_package(rclpy REQUIRED)
+find_package(sensor_msgs REQUIRED)
+find_package(std_msgs REQUIRED)
+find_package(vision_msgs REQUIRED)
+find_package(cv_bridge REQUIRED)
+find_package(rosidl_default_generators REQUIRED)
+find_package(Python3 REQUIRED COMPONENTS Interpreter)
+
+rosidl_generate_interfaces(${PROJECT_NAME}
+  "msg/DetectionResult.msg"
+  DEPENDENCIES std_msgs sensor_msgs
+)
+
+set(PYTHON_INSTALL_DIR "lib/python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}/site-packages")
+
+install(
+  DIRECTORY detect_anything
+  DESTINATION ${PYTHON_INSTALL_DIR}
+)
+
+install(
+  PROGRAMS
+    scripts/detection_node
+  DESTINATION lib/${PROJECT_NAME}
+)
+
+install(
+  FILES resource/detect_anything
+  DESTINATION share/${PROJECT_NAME}/resource
+)
+
+install(
+  DIRECTORY config
+  DESTINATION share/${PROJECT_NAME}
+)
+
+set(ENV_HOOK "${CMAKE_CURRENT_SOURCE_DIR}/env-hooks/venv_pythonpath.sh.in")
+set(ENV_HOOK_OUT "${CMAKE_CURRENT_BINARY_DIR}/ament_cmake_environment_hooks/venv_pythonpath.sh")
+configure_file(${ENV_HOOK} ${ENV_HOOK_OUT} @ONLY)
+ament_environment_hooks(${ENV_HOOK_OUT})
+
+ament_export_dependencies(rosidl_default_runtime)
+ament_package()
diff --git a/detect_anything/README.md b/detect_anything/README.md
new file mode 100644
index 0000000..1e94a06
--- /dev/null
+++ b/detect_anything/README.md
@@ -0,0 +1,28 @@
+# detect_anything
+
+YOLO-E detection node that publishes `DetectionResult` with cropped masks and an annotated overlay topic.
+
+## What’s inside
+- `detect_anything/detection.py`: detection results container and Ultralytics parsing helpers.
+- `detect_anything/yoloe.py`: YOLO-E wrapper with prompt support and basic filtering.
+- `detect_anything/detection_node.py`: ROS2 node wiring the detector to `DetectionResult`.
+- `msg/DetectionResult.msg`: compressed image + cropped mask array.
+
+## Quick start
+```bash
+source ~/vector_venv/bin/activate
+source /opt/ros/jazzy/setup.bash
+colcon build --packages-select detect_anything
+source install/setup.bash
+
+ros2 run detect_anything detection_node \
+  --ros-args -p model_path:=/path/to/yoloe/models \
+             -p model_name:=yoloe-11s-seg-pf.pt \
+             -p conf:=0.6 \
+             -p max_area_ratio:=0.3 \
+             -p image_topic:=/camera/image
+```
+
+Topics:
+- Publishes `/detection_result` (`detect_anything/DetectionResult`) and `/annotated_image_detection` (`sensor_msgs/Image`).
+- Subscribes to `/camera/image` (or `/camera/image/compressed` if `use_compressed:=true`).
diff --git a/detect_anything/config/objects.yaml b/detect_anything/config/objects.yaml
new file mode 100644
index 0000000..371f186
--- /dev/null
+++ b/detect_anything/config/objects.yaml
@@ -0,0 +1,46 @@
+# Simple list of object names for prompting YOLO-E
+- chair
+- desk
+- tv_monitor
+- sofa
+- unknown
+- printer
+- coffee machine
+- refrigerator
+- trash can
+- shoe
+- sink
+- table
+- oven
+- bed
+- painting
+- bulletin board
+- plant
+- vase
+- cabinet
+- shelf
+- book
+- cup
+- sculpture
+- keyboard
+- mouse
+- clock
+- phone
+- toilet
+- bathtub
+- microwave oven
+- pan
+- suitcase
+- light
+- curtain
+- whiteboard
+- shower knob
+- bottle
+- water dispenser
+- vending machine
+- laptop
+- bag
+- locker
+- picture
+- cardboard
+- extinguisher
diff --git a/detect_anything/detect_anything/__init__.py b/detect_anything/detect_anything/__init__.py
new file mode 100644
index 0000000..9c61aab
--- /dev/null
+++ b/detect_anything/detect_anything/__init__.py
@@ -0,0 +1 @@
+"""detect_anything package."""
diff --git a/detect_anything/detect_anything/detection.py b/detect_anything/detect_anything/detection.py
new file mode 100644
index 0000000..1d64d76
--- /dev/null
+++ b/detect_anything/detect_anything/detection.py
@@ -0,0 +1,256 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import List, Optional, Sequence
+
+from cv_bridge import CvBridge
+import cv2
+import numpy as np
+
+from detect_anything.msg import DetectionResult
+
+_BRIDGE = CvBridge()
+
+
+@dataclass
+class SingleDetectionResult:
+    """Simple container for a YOLO/Ultralytics detection with a segmentation mask."""
+
+    bbox: tuple[float, float, float, float]
+    track_id: int
+    class_id: int
+    label: str
+    confidence: float
+    mask: np.ndarray
+
+    def area(self) -> float:
+        x1, y1, x2, y2 = self.bbox
+        return max(0.0, (x2 - x1)) * max(0.0, (y2 - y1))
+
+
+@dataclass
+class DetectionResults:
+    """Container for detections plus the source image."""
+
+    image: np.ndarray
+    detections: List[SingleDetectionResult]
+
+    def overlay(self, alpha: float = 0.4, thickness: int = 2) -> np.ndarray:
+        """Draw bboxes and masks on the image."""
+        output = self.image.copy()
+        for det in self.detections:
+            rng = np.random.default_rng(det.track_id)
+            color = tuple(int(c) for c in rng.integers(50, 255, size=3))
+
+            if det.mask.size:
+                mask_bool = det.mask.astype(bool)
+                if mask_bool.shape[:2] == output.shape[:2]:
+                    colored_mask = np.zeros_like(output)
+                    colored_mask[mask_bool] = color
+                    output[mask_bool] = cv2.addWeighted(
+                        output[mask_bool], 1 - alpha, colored_mask[mask_bool], alpha, 0
+                    )
+                    contours, _ = cv2.findContours(det.mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                    cv2.drawContours(output, contours, -1, color, thickness)
+
+            x1, y1, x2, y2 = map(int, det.bbox)
+            cv2.rectangle(output, (x1, y1), (x2, y2), color, thickness)
+
+            label_text = f"{det.label} {det.confidence:.2f}"
+            label_text = f"{label_text} | id {det.track_id}"
+
+            (text_w, text_h), baseline = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+            cv2.rectangle(
+                output,
+                (x1, max(0, y1 - text_h - baseline - 2)),
+                (x1 + text_w, max(0, y1)),
+                color,
+                -1,
+            )
+            cv2.putText(
+                output,
+                label_text,
+                (x1, max(0, y1 - baseline - 2)),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.5,
+                (255, 255, 255),
+                1,
+                cv2.LINE_AA,
+            )
+        return output
+
+    def to_msg(self, header) -> DetectionResult:
+        """
+        Convert detections into a DetectionResult ROS message.
+        """
+        msg = DetectionResult()
+        msg.header = header
+        msg.image = _BRIDGE.cv2_to_compressed_imgmsg(self.image, dst_format="jpeg")
+        msg.image.header = header
+
+        for det in self.detections:
+            msg.track_id.append(int(det.track_id))
+            msg.x1.append(float(det.bbox[0]))
+            msg.y1.append(float(det.bbox[1]))
+            msg.x2.append(float(det.bbox[2]))
+            msg.y2.append(float(det.bbox[3]))
+            msg.label.append(str(det.label))
+            msg.confidence.append(float(det.confidence))
+
+            mask = det.mask
+            # Crop mask to bbox and align origin to bbox top-left for transport.
+            x1, y1, x2, y2 = [int(v) for v in det.bbox]
+            h, w = self.image.shape[:2]
+            x1 = max(0, min(x1, w))
+            x2 = max(0, min(x2, w))
+            y1 = max(0, min(y1, h))
+            y2 = max(0, min(y2, h))
+            crop_h = max(1, y2 - y1)
+            crop_w = max(1, x2 - x1)
+            if mask.size == 0:
+                mask = np.zeros((crop_h, crop_w), dtype=np.uint8)
+            else:
+                mask = mask[y1:y2, x1:x2]
+                if mask.shape != (crop_h, crop_w):
+                    mask = cv2.resize(
+                        mask.astype(np.uint8),
+                        (crop_w, crop_h),
+                        interpolation=cv2.INTER_NEAREST,
+                    )
+
+            mask_msg = _BRIDGE.cv2_to_compressed_imgmsg(mask, dst_format="png")
+            mask_msg.header = header
+            msg.masks.append(mask_msg)
+
+        return msg
+
+    @classmethod
+    def from_msg(cls, msg: DetectionResult) -> DetectionResults:
+        """
+        Construct DetectionResults from a DetectionResult ROS message.
+        Masks are converted back to full-size overlays at bbox locations.
+        """
+        image = _BRIDGE.compressed_imgmsg_to_cv2(msg.image)
+        detections: List[SingleDetectionResult] = []
+
+        for i in range(len(msg.track_id)):
+            bbox = (
+                float(msg.x1[i]),
+                float(msg.y1[i]),
+                float(msg.x2[i]),
+                float(msg.y2[i]),
+            )
+            mask_img = _BRIDGE.compressed_imgmsg_to_cv2(msg.masks[i], desired_encoding="mono8")
+
+            full_mask = np.zeros(image.shape[:2], dtype=np.uint8)
+            x1, y1, x2, y2 = [int(v) for v in bbox]
+            h, w = image.shape[:2]
+            x1 = max(0, min(x1, w))
+            x2 = max(0, min(x2, w))
+            y1 = max(0, min(y1, h))
+            y2 = max(0, min(y2, h))
+            crop_h = max(1, y2 - y1)
+            crop_w = max(1, x2 - x1)
+            if mask_img.shape[:2] != (crop_h, crop_w):
+                mask_img = cv2.resize(mask_img, (crop_w, crop_h), interpolation=cv2.INTER_NEAREST)
+            full_mask[y1:y2, x1:x2] = mask_img
+
+            detections.append(
+                SingleDetectionResult(
+                    bbox=bbox,
+                    track_id=int(msg.track_id[i]),
+                    class_id=0,  # class_id is not transmitted in DetectionResult
+                    label=str(msg.label[i]),
+                    confidence=float(msg.confidence[i]),
+                    mask=full_mask,
+                )
+            )
+
+        return cls(image=image, detections=detections)
+
+
+def _from_ultralytics_result(
+    result,
+    idx: int,
+    image_shape: Optional[tuple[int, int]] = None,
+    names: Optional[Sequence[str]] = None,
+) -> SingleDetectionResult:
+    """Convert a single Ultralytics Results entry into a SingleDetectionResult."""
+    from ultralytics.engine.results import Results  # local import
+
+    if not isinstance(result, Results):
+        raise TypeError("result must be an ultralytics Results object")
+    if result.boxes is None:
+        raise ValueError("Result has no boxes")
+
+    bbox_array = result.boxes.xyxy[idx].cpu().numpy()
+    bbox = (
+        float(bbox_array[0]),
+        float(bbox_array[1]),
+        float(bbox_array[2]),
+        float(bbox_array[3]),
+    )
+
+    confidence = float(result.boxes.conf[idx].cpu())
+    class_id = int(result.boxes.cls[idx].cpu())
+
+    resolved_names = names
+    if resolved_names is None and hasattr(result, "names"):
+        resolved_names = result.names  # type: ignore[assignment]
+
+    label = f"class_{class_id}"
+    if resolved_names is not None:
+        if isinstance(resolved_names, dict):
+            label = resolved_names.get(class_id, label)
+        elif isinstance(resolved_names, (list, tuple)) and class_id < len(resolved_names):
+            label = resolved_names[class_id]
+
+    track_id = idx
+    if hasattr(result.boxes, "id") and result.boxes.id is not None:
+        track_id = int(result.boxes.id[idx].cpu())
+
+    target_h, target_w = (image_shape or (None, None))
+    if target_h is None or target_w is None:
+        if hasattr(result, "orig_shape") and result.orig_shape:
+            target_h, target_w = result.orig_shape[:2]
+
+    mask = np.zeros((target_h, target_w), dtype=np.uint8) if target_h and target_w else np.zeros((0, 0), dtype=np.uint8)
+    if result.masks is not None and idx < len(result.masks.data):
+        mask_tensor = result.masks.data[idx]
+        mask_np = mask_tensor.cpu().numpy()
+
+        if mask_np.ndim == 3:
+            mask_np = mask_np.squeeze()
+
+        if target_h and target_w and mask_np.shape != (target_h, target_w):
+            mask_np = cv2.resize(mask_np.astype(np.float32), (target_w, target_h), interpolation=cv2.INTER_LINEAR)
+
+        mask = (mask_np > 0.5).astype(np.uint8) * 255  # type: ignore[assignment]
+
+    return SingleDetectionResult(
+        bbox=bbox,
+        track_id=track_id,
+        class_id=class_id,
+        label=label,
+        confidence=confidence,
+        mask=mask,
+    )
+
+
+def from_ultralytics_results(
+    results,
+    image_shape: Optional[tuple[int, int]] = None,
+    names: Optional[Sequence[str]] = None,
+) -> List[SingleDetectionResult]:
+    """Convert a list of Ultralytics Results objects into detections with masks."""
+    detections: List[SingleDetectionResult] = []
+    for result in results:
+        if result.boxes is None:
+            continue
+        num_detections = len(result.boxes.xyxy)
+        for i in range(num_detections):
+            detections.append(_from_ultralytics_result(result, i, image_shape=image_shape, names=names))
+    return detections
+
+
+__all__ = ["DetectionResults", "SingleDetectionResult", "from_ultralytics_results"]
diff --git a/detect_anything/detect_anything/detection_node.py b/detect_anything/detect_anything/detection_node.py
new file mode 100644
index 0000000..aebd0e8
--- /dev/null
+++ b/detect_anything/detect_anything/detection_node.py
@@ -0,0 +1,169 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import List, Optional
+
+import yaml
+import rclpy
+from cv_bridge import CvBridge
+from sensor_msgs.msg import CompressedImage, Image
+from rclpy.node import Node
+from rclpy.qos import DurabilityPolicy, QoSProfile, ReliabilityPolicy
+
+from detect_anything.detection import DetectionResults
+from detect_anything.yoloe import Yoloe2DDetector, YoloePromptMode
+from detect_anything.msg import DetectionResult
+
+
+class DetectionNode(Node):
+    """ROS2 node that runs YOLO-E detection and publishes DetectionResult."""
+
+    def __init__(self) -> None:
+        super().__init__("detect_anything_detection_node")
+        self.bridge = CvBridge()
+
+        # Parameters
+        defaults = self._default_params()
+        self.declare_parameter("image_topic", defaults["image_topic"])
+        self.declare_parameter("compressed_image_topic", defaults["compressed_image_topic"])
+        self.declare_parameter("use_compressed", defaults["use_compressed"])
+        self.declare_parameter("model_path", defaults["model_path"])
+        self.declare_parameter("model_name", defaults["model_name"])
+        self.declare_parameter("device", defaults["device"])
+        self.declare_parameter("prompt_mode", defaults["prompt_mode"])
+        self.declare_parameter("objects_file", defaults["objects_file"])
+        self.declare_parameter("conf", defaults["conf"])
+        self.declare_parameter("max_area_ratio", defaults["max_area_ratio"])
+
+        params = self._default_params()
+        image_topic = str(self.get_parameter("image_topic").value)
+        compressed_image_topic = str(self.get_parameter("compressed_image_topic").value)
+        self.use_compressed = bool(self.get_parameter("use_compressed").value)
+
+        model_path_val = self.get_parameter("model_path").value
+        model_path = str(model_path_val) if model_path_val not in (None, "") else params["model_path"]
+        if str(model_path).lower() == "none":
+            model_path = params["model_path"]
+
+        model_name = str(self.get_parameter("model_name").value or params["model_name"])
+        device = str(self.get_parameter("device").value or params["device"])
+        prompt_mode_str = str(self.get_parameter("prompt_mode").value or params["prompt_mode"]).lower()
+        objects_file = Path(str(self.get_parameter("objects_file").value or params["objects_file"]))
+        conf = float(self.get_parameter("conf").value)
+        max_area_ratio = float(self.get_parameter("max_area_ratio").value)
+
+        prompt_mode = YoloePromptMode(prompt_mode_str)
+
+        # Auto-pick the right default weight if user didn't override
+        if prompt_mode == YoloePromptMode.LRPC and ("seg.pt" in model_name and "seg-pf.pt" not in model_name):
+            model_name = "yoloe-11l-seg-pf.pt"
+            self.get_logger().info(f"Prompt-free mode: switching model_name to {model_name}")
+        if prompt_mode == YoloePromptMode.PROMPT and "seg-pf.pt" in model_name:
+            model_name = "yoloe-11l-seg.pt"
+            self.get_logger().info(f"Prompt mode: switching model_name to {model_name}")
+
+        self.detector = Yoloe2DDetector(
+            model_path=model_path,
+            model_name=model_name,
+            device=device or None,
+            prompt_mode=prompt_mode,
+            max_area_ratio=max_area_ratio,
+            conf=conf,
+        )
+
+        prompts = self._load_prompts(objects_file)
+        if prompts and prompt_mode == YoloePromptMode.PROMPT:
+            self.detector.set_prompts(text=prompts)
+            self.get_logger().info(f"Loaded {len(prompts)} prompts from {objects_file}")
+
+        qos = QoSProfile(
+            depth=10,
+            reliability=ReliabilityPolicy.RELIABLE,
+            durability=DurabilityPolicy.VOLATILE,
+        )
+        if self.use_compressed:
+            self.create_subscription(
+                CompressedImage,
+                compressed_image_topic,
+                self._compressed_image_callback,
+                qos,
+            )
+        else:
+            self.create_subscription(
+                Image,
+                image_topic,
+                self._image_callback,
+                qos,
+            )
+
+        self.detection_pub = self.create_publisher(DetectionResult, "/detection_result", qos)
+        self.overlay_pub = self.create_publisher(Image, "/annotated_image_detection", qos)
+
+        self.get_logger().info("detect_anything detection node started")
+
+    def _default_params(self) -> dict:
+        base_share = Path(__file__).resolve().parent.parent
+        return {
+            "image_topic": "/camera/image",
+            "compressed_image_topic": "/camera/image/compressed",
+            "use_compressed": True,
+            "model_path": str(base_share / "models"),
+            "model_name": "yoloe-11l-seg.pt",
+            "device": "cuda",
+            "prompt_mode": YoloePromptMode.PROMPT.value,
+            "objects_file": str(base_share / "config" / "objects.yaml"),
+            "conf": 0.25,
+            "max_area_ratio": 0.3,
+        }
+
+    def _load_prompts(self, objects_file: Path) -> List[str]:
+        if not objects_file.exists():
+            return []
+        try:
+            with open(objects_file, "r", encoding="utf-8") as f:
+                data = yaml.safe_load(f) or []
+                if isinstance(data, list):
+                    return [str(p) for p in data]
+        except Exception as exc:
+            self.get_logger().warn(f"Failed to read prompts from {objects_file}: {exc}")
+        return []
+
+    def _image_callback(self, msg: Image) -> None:
+        cv_image = self.bridge.imgmsg_to_cv2(msg, desired_encoding="bgr8")
+        self._process_image(cv_image, msg.header)
+
+    def _compressed_image_callback(self, msg: CompressedImage) -> None:
+        cv_image = self.bridge.compressed_imgmsg_to_cv2(msg, desired_encoding="bgr8")
+        self._process_image(cv_image, msg.header)
+
+    def _process_image(self, cv_image, header) -> None:
+        detections: DetectionResults = self.detector.process_image(cv_image)
+
+        detection_msg = detections.to_msg(header)
+        self.detection_pub.publish(detection_msg)
+
+        overlay = detections.overlay()
+        overlay_msg = self.bridge.cv2_to_imgmsg(overlay, encoding="bgr8")
+        overlay_msg.header = header
+        self.overlay_pub.publish(overlay_msg)
+
+    def destroy_node(self) -> bool:
+        if hasattr(self, "detector") and self.detector:
+            self.detector.stop()
+        return super().destroy_node()
+
+
+def main(args: Optional[list[str]] = None) -> None:
+    rclpy.init(args=args)
+    node = DetectionNode()
+    try:
+        rclpy.spin(node)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        node.destroy_node()
+        rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/detect_anything/detect_anything/yoloe.py b/detect_anything/detect_anything/yoloe.py
new file mode 100644
index 0000000..303afb9
--- /dev/null
+++ b/detect_anything/detect_anything/yoloe.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+import threading
+from typing import Any, List, Optional, Sequence
+
+import numpy as np
+import torch
+from ultralytics import YOLOE  # type: ignore[attr-defined, import-not-found]
+from ultralytics.utils import SETTINGS
+
+from detect_anything.detection import (
+    DetectionResults,
+    SingleDetectionResult,
+    from_ultralytics_results,
+)
+
+
+class YoloePromptMode(Enum):
+    """YOLO-E prompt modes."""
+
+    LRPC = "lrpc"
+    PROMPT = "prompt"
+
+
+class Yoloe2DDetector:
+    """Thin wrapper around YOLO-E for 2D detection + tracking with masks."""
+
+    def __init__(
+        self,
+        model_path: str | None = None,
+        model_name: str | None = None,
+        device: str | None = None,
+        prompt_mode: YoloePromptMode = YoloePromptMode.LRPC,
+        max_area_ratio: Optional[float] = 0.3,
+        conf: float = 0.6,
+        iou: float = 0.6,
+    ) -> None:
+        if model_name is None:
+            model_name = "yoloe-11s-seg-pf.pt" if prompt_mode == YoloePromptMode.LRPC else "yoloe-11s-seg.pt"
+
+        model_dir = Path(model_path) if model_path else Path("models/yoloe")
+        model_dir = model_dir.expanduser().resolve()
+        SETTINGS["weights_dir"] = str(model_dir)
+        self.model = YOLOE(model_dir / model_name)
+        self.prompt_mode = prompt_mode
+        self._visual_prompts: dict[str, np.ndarray] | None = None
+        self.max_area_ratio = max_area_ratio
+        self._lock = threading.Lock()
+        self.conf = conf
+        self.iou = iou
+        self._names_override: Sequence[str] | None = None
+
+        if prompt_mode == YoloePromptMode.PROMPT:
+            self.set_prompts(text=["nothing"])
+
+        if self.max_area_ratio is not None and not (0.0 < self.max_area_ratio <= 1.0):
+            raise ValueError("max_area_ratio must be in the range (0, 1].")
+
+        if device:
+            self.device = device
+        elif torch.cuda.is_available():
+            self.device = "cuda"
+        else:
+            self.device = "cpu"
+
+    def set_prompts(
+        self,
+        text: Optional[List[str]] = None,
+        bboxes: Optional[np.ndarray] = None,
+    ) -> None:
+        """Set prompts for detection. Provide either text or bboxes, not both."""
+        if self.prompt_mode == YoloePromptMode.LRPC:
+            # Prompt-free model cannot accept text/visual prompts.
+            return
+        if text is not None and bboxes is not None:
+            raise ValueError("Provide either text or bboxes, not both.")
+        if text is None and bboxes is None:
+            raise ValueError("Must provide either text or bboxes.")
+
+        with self._lock:
+            self.model.predictor = None
+            if text is not None:
+                self.model.set_classes(text, self.model.get_text_pe(text))  # type: ignore[no-untyped-call]
+                self._visual_prompts = None
+                self._names_override = list(text)
+            else:
+                cls = np.arange(len(bboxes), dtype=np.int16)  # type: ignore[arg-type]
+                self._visual_prompts = {"bboxes": bboxes, "cls": cls}  # type: ignore[dict-item]
+                self._names_override = None
+
+    def process_image(self, image: np.ndarray) -> DetectionResults:
+        """Process an image and return detection results."""
+        track_kwargs: dict[str, Any] = {
+            "source": image,
+            "device": self.device,
+            "conf": self.conf,
+            "iou": self.iou,
+            "persist": True,
+            "verbose": False,
+        }
+
+        with self._lock:
+            if self._visual_prompts is not None:
+                track_kwargs["visual_prompts"] = self._visual_prompts
+
+            results = self.model.track(**track_kwargs)  # type: ignore[arg-type]
+
+        detections = from_ultralytics_results(results, image_shape=image.shape[:2], names=self._names_override)
+        filtered = self._apply_filters(image, detections)
+        return DetectionResults(image=image, detections=filtered)
+
+    def _apply_filters(
+        self,
+        image: np.ndarray,
+        detections: List[SingleDetectionResult],
+    ) -> List[SingleDetectionResult]:
+        if self.max_area_ratio is None:
+            return detections
+
+        image_area = float(image.shape[0] * image.shape[1])
+        if image_area <= 0:
+            return detections
+
+        return [det for det in detections if (det.area() / image_area) <= self.max_area_ratio]
+
+    def stop(self) -> None:
+        """Clean up resources used by the detector."""
+        if hasattr(self.model, "predictor") and self.model.predictor is not None:
+            predictor = self.model.predictor
+            if hasattr(predictor, "trackers") and predictor.trackers:
+                for tracker in predictor.trackers:
+                    if hasattr(tracker, "tracker") and hasattr(tracker.tracker, "gmc"):
+                        gmc = tracker.tracker.gmc
+                        if hasattr(gmc, "executor") and gmc.executor is not None:
+                            gmc.executor.shutdown(wait=True)
+            self.model.predictor = None
+
+
+__all__ = ["Yoloe2DDetector", "YoloePromptMode"]
diff --git a/detect_anything/env-hooks/venv_pythonpath.sh.in b/detect_anything/env-hooks/venv_pythonpath.sh.in
new file mode 100644
index 0000000..9212bac
--- /dev/null
+++ b/detect_anything/env-hooks/venv_pythonpath.sh.in
@@ -0,0 +1 @@
+ament_prepend_unique_value PYTHONPATH "@PYTHON_INSTALL_DIR@"
diff --git a/detect_anything/msg/DetectionResult.msg b/detect_anything/msg/DetectionResult.msg
new file mode 100644
index 0000000..766d5f1
--- /dev/null
+++ b/detect_anything/msg/DetectionResult.msg
@@ -0,0 +1,10 @@
+std_msgs/Header header
+int32[] track_id
+float32[] x1
+float32[] y1
+float32[] x2
+float32[] y2
+string[] label
+float32[] confidence
+sensor_msgs/CompressedImage image
+sensor_msgs/CompressedImage[] masks  # cropped to bbox; aligned to bbox top-left; encoding: mono8
diff --git a/detect_anything/package.xml b/detect_anything/package.xml
new file mode 100644
index 0000000..a7699ae
--- /dev/null
+++ b/detect_anything/package.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>detect_anything</name>
+  <version>0.0.1</version>
+  <description>YOLO-E detection node and utilities.</description>
+  <maintainer email="alex.lin416@outlook.com">Alex Lin</maintainer>
+  <license>MIT</license>
+
+  <buildtool_depend>ament_cmake</buildtool_depend>
+
+  <build_depend>rosidl_default_generators</build_depend>
+  <exec_depend>rosidl_default_runtime</exec_depend>
+  <member_of_group>rosidl_interface_packages</member_of_group>
+
+  <depend>rclpy</depend>
+  <depend>std_msgs</depend>
+  <depend>sensor_msgs</depend>
+  <depend>vision_msgs</depend>
+  <depend>cv_bridge</depend>
+  <depend>vector_perception_utils</depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
diff --git a/detect_anything/resource/detect_anything b/detect_anything/resource/detect_anything
new file mode 100644
index 0000000..eac574f
--- /dev/null
+++ b/detect_anything/resource/detect_anything
@@ -0,0 +1 @@
+detect_anything
diff --git a/detect_anything/scripts/detection_node b/detect_anything/scripts/detection_node
new file mode 100755
index 0000000..822cacf
--- /dev/null
+++ b/detect_anything/scripts/detection_node
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+from detect_anything.detection_node import main
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
index 7b6c110..67894e1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,13 +4,14 @@ opencv-python
 scipy
 pillow
 tqdm
+pyyaml
+lark
 
 # 3D processing
 open3d
 
-# deep learning
-torch<=2.4.0
-torchvision<=1.19.0
+# vision
+ultralytics
 
 # SAM2 and EdgeTAM dependencies
 sam2
diff --git a/rviz/semantic_mapping.rviz b/rviz/semantic_mapping.rviz
new file mode 100644
index 0000000..d56223b
--- /dev/null
+++ b/rviz/semantic_mapping.rviz
@@ -0,0 +1,267 @@
+Panels:
+  - Class: rviz_common/Displays
+    Help Height: 78
+    Name: Displays
+    Property Tree Widget:
+      Expanded:
+        - /Global Options1
+        - /Status1
+        - /Odometry1
+      Splitter Ratio: 0.5
+    Tree Height: 413
+  - Class: rviz_common/Selection
+    Name: Selection
+  - Class: rviz_common/Tool Properties
+    Expanded:
+      - /2D Goal Pose1
+      - /Publish Point1
+    Name: Tool Properties
+    Splitter Ratio: 0.5886790156364441
+  - Class: rviz_common/Views
+    Expanded:
+      - /Current View1
+    Name: Views
+    Splitter Ratio: 0.5
+  - Class: rviz_common/Time
+    Experimental: false
+    Name: Time
+    SyncMode: 0
+    SyncSource: PointCloud2
+Visualization Manager:
+  Class: ""
+  Displays:
+    - Alpha: 0.5
+      Cell Size: 1
+      Class: rviz_default_plugins/Grid
+      Color: 160; 160; 164
+      Enabled: true
+      Line Style:
+        Line Width: 0.029999999329447746
+        Value: Lines
+      Name: Grid
+      Normal Cell Count: 0
+      Offset:
+        X: 0
+        Y: 0
+        Z: 0
+      Plane: XY
+      Plane Cell Count: 10
+      Reference Frame: <Fixed Frame>
+      Value: true
+    - Class: rviz_default_plugins/Image
+      Enabled: true
+      Max Value: 1
+      Median window: 5
+      Min Value: 0
+      Name: Image
+      Normalize Range: true
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /annotated_image_detection
+      Value: true
+    - Angle Tolerance: 0.10000000149011612
+      Class: rviz_default_plugins/Odometry
+      Covariance:
+        Orientation:
+          Alpha: 0.5
+          Color: 255; 255; 127
+          Color Style: Unique
+          Frame: Local
+          Offset: 1
+          Scale: 1
+          Value: true
+        Position:
+          Alpha: 0.30000001192092896
+          Color: 204; 51; 204
+          Scale: 1
+          Value: true
+        Value: false
+      Enabled: true
+      Keep: 5
+      Name: Odometry
+      Position Tolerance: 0.10000000149011612
+      Shape:
+        Alpha: 1
+        Axes Length: 1
+        Axes Radius: 0.10000000149011612
+        Color: 255; 25; 0
+        Head Length: 0.30000001192092896
+        Head Radius: 0.10000000149011612
+        Shaft Length: 1
+        Shaft Radius: 0.05000000074505806
+        Value: Arrow
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        Filter size: 10
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /state_estimation
+      Value: true
+    - Alpha: 0.10000000149011612
+      Autocompute Intensity Bounds: true
+      Autocompute Value Bounds:
+        Max Value: 10
+        Min Value: -10
+        Value: true
+      Axis: Z
+      Channel Name: intensity
+      Class: rviz_default_plugins/PointCloud2
+      Color: 255; 255; 255
+      Color Transformer: FlatColor
+      Decay Time: 3
+      Enabled: true
+      Invert Rainbow: false
+      Max Color: 255; 255; 255
+      Max Intensity: 165
+      Min Color: 0; 0; 0
+      Min Intensity: 0
+      Name: PointCloud2
+      Position Transformer: XYZ
+      Selectable: true
+      Size (Pixels): 3
+      Size (m): 0.004999999888241291
+      Style: Flat Squares
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /registered_scan
+      Use Fixed Frame: true
+      Use rainbow: true
+      Value: true
+    - Class: rviz_default_plugins/MarkerArray
+      Enabled: true
+      Name: MarkerArray
+      Namespaces:
+        bbox: true
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /obj_boxes
+      Value: true
+    - Alpha: 1
+      Autocompute Intensity Bounds: true
+      Autocompute Value Bounds:
+        Max Value: 10
+        Min Value: -10
+        Value: true
+      Axis: Z
+      Channel Name: intensity
+      Class: rviz_default_plugins/PointCloud2
+      Color: 255; 255; 255
+      Color Transformer: RGB8
+      Decay Time: 0
+      Enabled: true
+      Invert Rainbow: false
+      Max Color: 255; 255; 255
+      Max Intensity: 4096
+      Min Color: 0; 0; 0
+      Min Intensity: 0
+      Name: PointCloud2
+      Position Transformer: XYZ
+      Selectable: true
+      Size (Pixels): 3
+      Size (m): 0.009999999776482582
+      Style: Flat Squares
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /obj_points
+      Use Fixed Frame: true
+      Use rainbow: true
+      Value: true
+  Enabled: true
+  Global Options:
+    Background Color: 48; 48; 48
+    Fixed Frame: map
+    Frame Rate: 30
+  Name: root
+  Tools:
+    - Class: rviz_default_plugins/Interact
+      Hide Inactive Objects: true
+    - Class: rviz_default_plugins/MoveCamera
+    - Class: rviz_default_plugins/Select
+    - Class: rviz_default_plugins/FocusCamera
+    - Class: rviz_default_plugins/Measure
+      Line color: 128; 128; 0
+    - Class: rviz_default_plugins/SetInitialPose
+      Covariance x: 0.25
+      Covariance y: 0.25
+      Covariance yaw: 0.06853891909122467
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /initialpose
+    - Class: rviz_default_plugins/SetGoal
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /goal_pose
+    - Class: rviz_default_plugins/PublishPoint
+      Single click: true
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /clicked_point
+  Transformation:
+    Current:
+      Class: rviz_default_plugins/TF
+  Value: true
+  Views:
+    Current:
+      Class: rviz_default_plugins/Orbit
+      Distance: 21.720943450927734
+      Enable Stereo Rendering:
+        Stereo Eye Separation: 0.05999999865889549
+        Stereo Focal Distance: 1
+        Swap Stereo Eyes: false
+        Value: false
+      Focal Point:
+        X: 3.1760172843933105
+        Y: 11.240063667297363
+        Z: 2.561012029647827
+      Focal Shape Fixed Size: false
+      Focal Shape Size: 0.05000000074505806
+      Invert Z Axis: false
+      Name: Current View
+      Near Clip Distance: 0.009999999776482582
+      Pitch: 1.1497966051101685
+      Target Frame: <Fixed Frame>
+      Value: Orbit (rviz_default_plugins)
+      Yaw: 3.4885518550872803
+    Saved: ~
+Window Geometry:
+  Displays:
+    collapsed: false
+  Height: 996
+  Hide Left Dock: false
+  Hide Right Dock: false
+  Image:
+    collapsed: false
+  QMainWindow State: 000000ff00000000fd00000004000000000000027b00000342fc0200000009fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000005d00fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c006100790073010000003f0000022b000000cc00fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261fb0000000a0049006d0061006700650100000270000001110000001700ffffff000000010000010f00000342fc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a00560069006500770073010000003f00000342000000a900fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000006b90000003efc0100000002fb0000000800540069006d00650100000000000006b90000026f00fffffffb0000000800540069006d00650100000000000004500000000000000000000003230000034200000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000
+  Selection:
+    collapsed: false
+  Time:
+    collapsed: false
+  Tool Properties:
+    collapsed: false
+  Views:
+    collapsed: false
+  Width: 1721
+  X: 3248
+  Y: 331
diff --git a/semantic_mapping/CMakeLists.txt b/semantic_mapping/CMakeLists.txt
new file mode 100644
index 0000000..7266468
--- /dev/null
+++ b/semantic_mapping/CMakeLists.txt
@@ -0,0 +1,52 @@
+cmake_minimum_required(VERSION 3.10)
+project(semantic_mapping)
+
+find_package(ament_cmake REQUIRED)
+find_package(rclpy REQUIRED)
+find_package(std_msgs REQUIRED)
+find_package(sensor_msgs REQUIRED)
+find_package(geometry_msgs REQUIRED)
+find_package(nav_msgs REQUIRED)
+find_package(visualization_msgs REQUIRED)
+find_package(cv_bridge REQUIRED)
+find_package(rosidl_default_generators REQUIRED)
+find_package(Python3 REQUIRED COMPONENTS Interpreter)
+
+rosidl_generate_interfaces(${PROJECT_NAME}
+  "msg/ObjectNode.msg"
+  "msg/ObjectType.msg"
+  "msg/TargetObjectInstruction.msg"
+  "msg/ViewpointRep.msg"
+  DEPENDENCIES std_msgs sensor_msgs geometry_msgs
+)
+
+set(PYTHON_INSTALL_DIR "lib/python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}/site-packages")
+
+install(
+  DIRECTORY semantic_mapping
+  DESTINATION ${PYTHON_INSTALL_DIR}
+)
+
+install(
+  PROGRAMS
+    scripts/semantic_mapping_node
+  DESTINATION lib/${PROJECT_NAME}
+)
+
+install(
+  FILES resource/semantic_mapping
+  DESTINATION share/${PROJECT_NAME}/resource
+)
+
+install(
+  DIRECTORY config
+  DESTINATION share/${PROJECT_NAME}
+)
+
+install(
+  DIRECTORY launch
+  DESTINATION share/${PROJECT_NAME}
+)
+
+ament_export_dependencies(rosidl_default_runtime)
+ament_package()
diff --git a/semantic_mapping/README.md b/semantic_mapping/README.md
new file mode 100644
index 0000000..62b78e5
--- /dev/null
+++ b/semantic_mapping/README.md
@@ -0,0 +1,152 @@
+# Semantic Mapping
+
+ROS2 package for semantic 3D object mapping with VLM integration.
+
+## Overview
+
+This package builds a persistent semantic map of objects detected by `detect_anything`, fusing 2D detections with LiDAR point clouds to create 3D object representations. It publishes `ObjectNode` messages compatible with the tare_planner exploration system.
+The pipeline stores voxelized geometry, merges observations, and exports planner‑friendly ROS messages.
+
+## Status: Work in Progress
+
+| Component | File | Status |
+|-----------|------|--------|
+| Messages | `msg/*.msg` | Complete |
+| Data Models | `map_object.py` | Complete |
+| Matching | `utils/matching.py` | Complete |
+| Object Mapper | `mapper.py` | Complete |
+| Cloud Projection | `vector_perception_utils/pointcloud_utils.py` | Complete |
+| Publishers | `utils/map_object_utils.py` | Complete |
+| Image Storage | `utils/storage.py` | Complete |
+| Mapping Node | `mapping_node.py` | Complete |
+| Launch Files | `launch/` | Complete |
+
+## Messages
+
+### ObjectNode.msg
+Main output message for planner integration:
+- `object_id[]`: Track IDs (may contain multiple merged IDs)
+- `label`: Dominant class label
+- `position`: Object centroid in world frame
+- `bbox3d[8]`: 3D bounding box corners
+- `cloud`: Object point cloud
+- `status`: Lifecycle status ("new", "updated", "unchanged")
+- `img_path`: Path to best saved image crop
+- `is_asked_vlm`: VLM query flag
+- `viewpoint_id`: Associated viewpoint
+
+### ObjectType.msg
+VLM query/answer for object relabeling.
+
+### TargetObjectInstruction.msg
+User instruction for target object search.
+
+### ViewpointRep.msg
+Viewpoint trigger for image saving.
+
+Note: `detect_anything/DetectionResult` masks are cropped to the bbox and aligned to
+the bbox top-left (as documented in `detect_anything/msg/DetectionResult.msg`).
+The mapping node reconstructs full-image masks before projection.
+
+## Architecture
+
+```
+detect_anything/DetectionResult
+        │
+        ▼
+┌───────────────────────────────────────────────┐
+│              semantic_mapping_node            │
+│                                               │
+│  ┌────────────────────┐   ┌──────────────┐    │
+│  │ pointcloud_utils   │   │  ObjectMapper │    │
+│  │ (projection + mask)├──▶│   (pending/   │    │
+│  │                    │   │  permanent)  │    │
+│  └────────────────────┘   └──────────────┘    │
+│                 │                 │            │
+│                 ▼                 ▼            │
+│        ┌────────────────┐  ┌──────────────┐   │
+│        │    MapObject   │  │ map_object_  │   │
+│        │ (voxels/votes) ├─▶│   utils      │   │
+│        └────────────────┘  └──────────────┘   │
+│                 │                 │            │
+│                 ▼                 ▼            │
+│       utils/storage.py      mapping_node.py    │
+└───────────────────────────────────────────────┘
+        │
+        ▼
+/object_nodes, /obj_points, /obj_boxes, /obj_labels
+```
+
+Key components:
+- `map_object.py`: Object state, voxel voting, pose de‑duplication, cached geometry.
+- `utils/matching.py`: Track‑id and spatial matching with IoU/ratio heuristics.
+- `mapper.py`: Pending/permanent tiers, merge rules, and pruning.
+- `vector_perception_utils/pointcloud_utils.py`: Projection + mask segmentation utilities.
+- `utils/map_object_utils.py`: ROS message construction helpers.
+- `utils/storage.py`: Async image save queue.
+- `mapping_node.py`: ROS2 orchestration (subscriptions, sync, publish).
+
+## Dependencies
+
+- `detect_anything`: Detection results with masks
+- `vector_perception_utils`: Point cloud and image utilities
+- ROS2 messages: `sensor_msgs`, `geometry_msgs`, `nav_msgs`, `visualization_msgs`
+
+## Usage
+
+```bash
+# Build
+colcon build --packages-select semantic_mapping
+
+# Run (after implementation complete)
+ros2 launch semantic_mapping semantic_mapping.launch.py
+```
+
+## Configuration
+
+- `config/params.yaml`: Default node parameters for `semantic_mapping_node`.
+- `config/sensor_to_camera_tf.yaml`: Static sensor→camera transform for projection.
+- `config/objects.yaml`: Object taxonomy for detection labels.
+
+Key runtime params:
+- `regularize_voxel_size`: Connectivity voxel size used in shape regularization.
+- `ground_filter`, `ground_radius`, `ground_z_clearance`: Simple ground clamp around robot.
+- `visualization_max_objects`: Limits RViz markers/pointcloud to most recent objects.
+
+## Topics
+
+### Subscriptions
+| Topic | Type | Description |
+|-------|------|-------------|
+| `/detection_result` | `detect_anything/DetectionResult` | 2D detections with masks |
+| `/registered_scan` | `sensor_msgs/PointCloud2` | LiDAR point cloud |
+| `/state_estimation` | `nav_msgs/Odometry` | Robot odometry |
+| `/object_type_answer` | `ObjectType` | VLM relabeling response |
+| `/target_object_instruction` | `TargetObjectInstruction` | Target object search |
+| `/viewpoint_rep_header` | `ViewpointRep` | Viewpoint trigger |
+
+### Publications
+| Topic | Type | Description |
+|-------|------|-------------|
+| `/object_nodes` | `ObjectNode` | Semantic object map (for planner) |
+| `/obj_points` | `sensor_msgs/PointCloud2` | Colored object point cloud |
+| `/obj_boxes` | `visualization_msgs/MarkerArray` | 3D bounding box wireframes |
+| `/obj_labels` | `visualization_msgs/MarkerArray` | Object label text |
+| `/object_type_query` | `ObjectType` | VLM query for relabeling |
+
+## References
+
+- Full migration plan: [`docs/SEMANTIC_MAPPING_MIGRATION.md`](docs/SEMANTIC_MAPPING_MIGRATION.md)
+- Original implementation: `VLM_ROS/src/semantic_mapping/` (main branch)
+  - SAM2 for segmentation
+  - External Gemini API for VLM reasoning
+- Detection: `detect_anything` package (decoupled)
+- dimos patterns: `dimos/perception/detection/objectDB.py`
+
+## Handoff Notes
+
+- Regularization uses vote-weighted filtering + voxel connected components; adjust
+  `regularize_voxel_size` and the regularization percentile (in `MapObject`) if
+  thin structures are dropped.
+- Mapping node logs summary stats and a single mapping loop time per cycle.
+- Permanent objects persist; only pending objects time out via `pending_ttl_s`.
diff --git a/semantic_mapping/config/params.yaml b/semantic_mapping/config/params.yaml
new file mode 100644
index 0000000..0749d09
--- /dev/null
+++ b/semantic_mapping/config/params.yaml
@@ -0,0 +1,48 @@
+semantic_mapping_node:
+  ros__parameters:
+    detection_topic: /detection_result
+    cloud_topic: /registered_scan
+    odom_topic: /state_estimation
+    viewpoint_topic: /viewpoint_rep_header
+    object_type_answer_topic: /object_type_answer
+    object_type_query_topic: /object_type_query
+    target_object_instruction_topic: /target_object_instruction
+    object_nodes_topic: /object_nodes
+    obj_points_topic: /obj_points
+    obj_boxes_topic: /obj_boxes
+    obj_labels_topic: /obj_labels
+    map_frame: map
+
+    confidence_threshold: 0.35
+    voxel_size: 0.05
+    regularize_voxel_size: 0.2
+    distance_threshold: 0.3
+    iou_threshold: 0.3
+    min_detections_for_permanent: 4
+    pending_ttl_s: 5.0
+    num_angle_bins: 15
+
+    detection_linear_state_time_bias: 0.0
+    detection_angular_state_time_bias: 0.0
+    cloud_window_before: 0.5
+    cloud_window_after: 0.1
+
+    projection: equirect
+    axis_mode: xz
+    image_width: 1920
+    image_height: 640
+    depth_filter: true
+    depth_filter_margin: 0.15
+    depth_jump_threshold: 0.3
+    sensor_to_camera_tf: config/sensor_to_camera_tf.yaml
+    mask_erode_kernel: 5
+    mask_erode_iterations: 2
+    max_cloud_distance: 8.0
+    visualization_max_objects: 50
+    ground_filter: true
+    ground_radius: 2.5
+    ground_z_clearance: 0.0
+
+    target_object: refrigerator
+    save_png: True
+    output_dir: output/object_images
diff --git a/semantic_mapping/config/sensor_to_camera_tf.yaml b/semantic_mapping/config/sensor_to_camera_tf.yaml
new file mode 100644
index 0000000..e48c06f
--- /dev/null
+++ b/semantic_mapping/config/sensor_to_camera_tf.yaml
@@ -0,0 +1,11 @@
+sensor_to_camera:
+  parent_frame: lidar
+  child_frame: camera
+  translation:
+    x: -0.12
+    y: -0.075
+    z: 0.265
+  rotation_rpy:
+    roll: -1.5707963
+    pitch: 0.0
+    yaw: -1.5707963
diff --git a/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
new file mode 100644
index 0000000..3f22372
--- /dev/null
+++ b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
@@ -0,0 +1,487 @@
+# Semantic Mapping Migration Plan
+
+## Overview
+
+This document outlines the comprehensive plan for porting the `semantic_mapping` package from `VLM_ROS` into `vector_perception_ros/semantic_mapping`, following the coding standards of `vector_perception_ros` while preserving functional compatibility with downstream consumers (particularly the tare_planner).
+
+> **Note**: Detection is decoupled into `detect_anything` package. VLM reasoning uses external Gemini API.
+
+## Design Principles
+
+1. **Logic cleanup, not restructure** - Preserve the core algorithms (voxel voting, observation angles, shape regularization, IoU/distance matching) while cleaning up dead code and improving organization.
+2. **Same I/O contract** - Maintain the same ROS topic names, message types, and semantics so existing planner integration continues to work.
+3. **vector_perception_ros coding standards** - Full type hints, Google-style docstrings, dataclass containers, proper ROS2 patterns.
+4. **ObjectDB-style patterns** - Adopt the two-tier (pending/permanent) architecture and dual matching (track_id + distance) from dimos where it improves clarity.
+
+---
+
+## Implementation Progress
+
+| Step | Component | Status |
+|------|-----------|--------|
+| 1 | Package Setup & Messages | **COMPLETED** |
+| 2 | Data Models (`map_object.py`) | **COMPLETED** |
+| 2.1 | Refactor to use shared utilities | **COMPLETED** |
+| 3 | Matching Utilities (`utils/matching.py`) | **COMPLETED** |
+| 4 | Object Mapper (`mapper.py`) | **COMPLETED** |
+| 5 | Cloud-Image Projection (`pointcloud_utils.py`) | **COMPLETED** |
+| 6 | Message Publishers (`utils/map_object_utils.py`) | **COMPLETED** |
+| 7 | Image Storage (`utils/storage.py`) | **COMPLETED** |
+| 8 | Mapping Node (`mapping_node.py`) | **COMPLETED** |
+| 9 | Launch Files & Config | **COMPLETED** |
+| 10 | Integration Testing | **COMPLETED** |
+
+### Step 2.1: Refactored map_object.py to use shared utilities
+
+Added to `vector_perception_utils/transform_utils.py`:
+- `normalize_angle()` - now supports both scalar and array input
+- `discretize_angles()` - discretize angles into bin indices
+
+Added to `vector_perception_utils/pointcloud_utils.py`:
+- `get_oriented_bbox()` - returns (center, corners) tuple from minimal OBB
+
+Refactored in `map_object.py`:
+- Removed duplicate `_normalize_angles_to_pi` and `_discretize_angles` functions
+- Simplified `_compute_geometry()` from 43 lines to 15 using `get_oriented_bbox()`
+- Use `create_pointcloud()` in `regularize_shape()` instead of manual o3d creation
+- Use `compute_centroid()` instead of `np.mean(..., axis=0)`
+
+---
+
+## VLM_ROS Reference (Source Implementation)
+
+Key files in `VLM_ROS/src/semantic_mapping/`:
+- `semantic_map_new.py`: `ObjMapper` manages the persistent map
+- `single_object_new.py`: `SingleObject` holds per-object state
+- `semantic_mapping_node.py`: `MappingNode` orchestrates SAM2 masks, LiDAR projection, odom transforms
+
+### ObjMapper Core Flow
+- Input: dict with `bboxes`, `labels`, `confidences`, `ids`, `masks` (from SAM2), plus LiDAR cloud and odom
+- Memory fix: Uses `.copy()` on detection data extraction to prevent memory leaks
+- VLM query threshold: Objects need `best_image_score > 500` before querying VLM
+- `update_map()` steps:
+  1. Transform incoming cloud into body/world frame using odom
+  2. Filter detections by confidence threshold
+  3. For each detection: reconstruct full-image masks (bbox-cropped in `detect_anything/msg/DetectionResult.msg`), project masked cloud, build per-object voxels, then either merge or create a `SingleObject`
+  4. Merge heuristics: match by tracker `obj_id`, 3D IoU, distance thresholds, and optional primitive groups (e.g., chair/sofa)
+  5. Maintain delete lists, save queues (best image/mask per object)
+  6. Publish ROS messages: `ObjectNode`, bbox markers, label markers, colored point cloud
+
+### SingleObject Highlights
+- Keeps class_id→counts/confidence, track_id list, voxel grid (`VoxelFeatureManager`), robot poses, masks, best image saving
+- Computes 3D bbox (axis-aligned + oriented), centroids, clustering flags, and validity caches
+- Manages persistence (`life`, `inactive_frame`, `publish_status`) and VLM interaction state
+
+### dimos Patterns to Adopt
+- `ObjMapper` ⇄ `ObjectDB`: explicit pending/permanent tiers, track_id + spatial matching, promotion thresholds
+- `SingleObject` ⇄ `Object`: cleaner update patterns, separation of core geometry vs VLM metadata
+
+---
+
+## Phase 1: Package Structure & Messages (COMPLETED)
+
+### 1.1 Package Structure
+
+```
+semantic_mapping/
+├── CMakeLists.txt            # Hybrid CMake for message generation + Python install
+├── package.xml               # Dependencies: detect_anything, vector_perception_utils, ROS msgs
+├── msg/
+│   ├── ObjectNode.msg
+│   ├── ObjectType.msg
+│   ├── TargetObjectInstruction.msg
+│   └── ViewpointRep.msg
+├── config/
+│   ├── objects.yaml          # Object taxonomy (to be copied from VLM_ROS)
+│   └── params.yaml           # Default node parameters
+├── launch/
+│   └── semantic_mapping.launch.py
+├── semantic_mapping/
+│   ├── __init__.py
+│   ├── mapping_node.py       # ROS2 node (orchestration)
+│   ├── mapper.py             # ObjectMapper class (core logic)
+│   ├── map_object.py         # Data classes: MapObject, Observation, VoxelManager
+│   ├── utils/
+│   │   ├── matching.py       # Object matching utilities (IoU, distance, track_id)
+│   │   ├── map_object_utils.py   # ROS message conversion helpers
+│   │   └── storage.py        # Async image saving queue
+│   ├── (moved to vector_perception_utils)  # Cloud-image projection helpers
+├── docs/
+│   └── SEMANTIC_MAPPING_MIGRATION.md
+└── resource/
+    └── semantic_mapping      # ament resource marker
+```
+
+### 1.2 Message Definitions
+
+**ObjectNode.msg** (preserves exact schema for planner compatibility):
+```
+std_msgs/Header header
+int32[] object_id
+string label
+geometry_msgs/Point position
+geometry_msgs/Point[8] bbox3d    # 8 corner points in world frame
+sensor_msgs/PointCloud2 cloud
+string status                    # "new", "updated", "unchanged"
+string img_path
+bool is_asked_vlm
+int32 viewpoint_id
+```
+
+**ObjectType.msg**: VLM query/answer for object relabeling
+**TargetObjectInstruction.msg**: User-set target/anchor objects
+**ViewpointRep.msg**: Viewpoint trigger message
+
+---
+
+## Phase 2: Data Models (COMPLETED)
+
+### 2.1 ObjectStatus Enum
+
+```python
+class ObjectStatus(Enum):
+    """Object lifecycle status (ObjectDB-style tiering)."""
+    PENDING = "pending"       # < min_detections threshold
+    PERMANENT = "permanent"   # >= threshold, confirmed object
+    DELETED = "deleted"       # Marked for removal
+```
+
+### 2.2 Observation
+
+```python
+@dataclass
+class Observation:
+    """A single detection observation to update a MapObject."""
+    track_id: int
+    label: str
+    confidence: float
+    points: np.ndarray                    # (N, 3) world-frame points
+    robot_pose: dict                      # {R: (3,3), t: (3,)}
+    mask: np.ndarray | None = None        # 2D segmentation mask
+    image: np.ndarray | None = None       # RGB crop for best-image selection
+    timestamp: float = 0.0
+```
+
+### 2.3 VoxelFeatureManager
+
+```python
+@dataclass
+class VoxelFeatureManager:
+    """Manages voxelized point cloud with observation angle voting."""
+    voxels: np.ndarray                    # (N, 3) world coords
+    votes: np.ndarray                     # (N,) detection count per voxel
+    observation_angles: np.ndarray        # (N, num_bins) angle histogram
+    regularized_mask: np.ndarray | None   # (N,) valid voxel mask after clustering
+    voxel_size: float = 0.03
+    num_angle_bins: int = 20
+
+    def update(self, new_points: np.ndarray, robot_position: np.ndarray) -> None:
+        """Add new points with KDTree-based merge and angle voting."""
+
+    def get_valid_voxels(self, min_votes: int = 1) -> np.ndarray:
+        """Return voxels passing vote threshold and regularization mask."""
+```
+
+### 2.4 MapObject
+
+```python
+@dataclass
+class MapObject:
+    """Represents a tracked object in the semantic map."""
+    # Identity
+    object_id: str                        # UUID for this object
+    track_ids: list[int]                  # Associated tracker IDs (can merge)
+
+    # Classification (voted)
+    class_votes: dict[str, int]           # label -> observation count
+    confidence_scores: dict[str, float]   # label -> best confidence
+
+    # Geometry
+    voxel_manager: VoxelFeatureManager
+    robot_poses: list[dict]               # [{R, t}, ...] observation poses
+
+    # Lifecycle (ObjectDB-style)
+    detection_count: int = 0
+    inactive_frames: int = 0
+    status: ObjectStatus = ObjectStatus.PENDING
+
+    # VLM interaction
+    is_asked_vlm: bool = False
+    updated_by_vlm: bool = False
+    best_image_path: str | None = None
+    best_image_score: float = 0.0
+
+    @property
+    def label(self) -> str: ...
+    @property
+    def centroid(self) -> np.ndarray: ...
+    @property
+    def bbox3d(self) -> np.ndarray: ...
+    def update(self, observation: Observation) -> None: ...
+    def merge(self, other: MapObject) -> None: ...
+    def regularize_shape(self, percentile: float = 0.85) -> None: ...
+```
+
+---
+
+## Phase 3: Matching Utilities (utils/matching.py)
+
+```python
+def compute_iou_3d(bbox1: np.ndarray, bbox2: np.ndarray) -> float:
+    """Compute 3D IoU between two axis-aligned bounding boxes."""
+
+def compute_center_distance(obj1: MapObject, obj2: MapObject) -> float:
+    """Compute Euclidean distance between object centroids."""
+
+def are_merge_compatible(label1: str, label2: str, merge_groups: list[set[str]]) -> bool:
+    """Check if two labels can be merged (same class or in same merge group)."""
+
+def find_best_match(
+    obs: Observation,
+    candidates: list[MapObject],
+    distance_threshold: float,
+    iou_threshold: float,
+) -> MapObject | None:
+    """Find best matching object by distance and IoU criteria."""
+```
+
+---
+
+## Phase 4: Object Mapper (mapper.py)
+
+```python
+class ObjectMapper:
+    """
+    Manages the semantic object map with two-tier state management.
+    Pending objects are promoted to permanent after detection_count >= threshold.
+    """
+
+    def __init__(
+        self,
+        voxel_size: float = 0.03,
+        confidence_threshold: float = 0.30,
+        distance_threshold: float = 0.5,
+        iou_threshold: float = 0.2,
+        min_detections_for_permanent: int = 3,
+        pending_ttl_s: float = 10.0,
+        max_inactive_frames: int = 20,
+    ) -> None:
+        self._pending_objects: dict[str, MapObject] = {}
+        self._permanent_objects: dict[str, MapObject] = {}
+        self._track_id_map: dict[int, str] = {}
+        self._lock = threading.RLock()
+        self._merge_groups: list[set[str]] = [
+            {"chair", "sofa", "couch", "armchair"},
+            {"table", "desk"},
+        ]
+
+    def update(self, observations: list[Observation], target_object: str | None = None) -> UpdateStats: ...
+    def _match_observation(self, obs: Observation) -> MapObject | None: ...
+    def _create_object(self, obs: Observation) -> MapObject: ...
+    def _check_promotion(self, obj: MapObject) -> None: ...
+    def _try_merge_objects(self) -> None: ...
+    def _prune_inactive(self, target_object: str | None) -> list[MapObject]: ...
+    def get_objects(self, include_pending: bool = False) -> list[MapObject]: ...
+```
+
+---
+
+## Phase 5: Mapping Node (mapping_node.py)
+
+### 5.1 Node Structure
+
+```python
+class SemanticMappingNode(Node):
+    """ROS2 node for semantic 3D object mapping."""
+
+    def __init__(self) -> None:
+        super().__init__("semantic_mapping_node")
+        self._declare_parameters()
+        self.mapper = ObjectMapper(...)
+        self.projection = pointcloud_utils
+        self.image_saver = ImageSaveQueue(...)
+
+        # Data buffers (with locks)
+        self._cloud_buffer: deque[tuple[float, np.ndarray]] = deque(maxlen=50)
+        self._odom_buffer: deque[tuple[float, Pose]] = deque(maxlen=100)
+        self._lock = threading.Lock()
+
+        self._create_subscriptions()
+        self._create_publishers()
+        self.create_timer(0.5, self._mapping_callback)
+```
+
+### 5.2 Subscriptions
+
+| Topic | Type | QoS |
+|-------|------|-----|
+| `/detection_result` | `detect_anything/DetectionResult` | Reliable |
+| `/registered_scan` | `PointCloud2` | Best Effort |
+| `/state_estimation` | `Odometry` | Best Effort |
+| `/object_type_answer` | `ObjectType` | Reliable |
+| `/target_object_instruction` | `TargetObjectInstruction` | Reliable |
+| `/viewpoint_rep_header` | `ViewpointRep` | Reliable |
+
+### 5.3 Publishers
+
+| Topic | Type | Description |
+|-------|------|-------------|
+| `/object_nodes` | `ObjectNode` | Main output for planner |
+| `/obj_points` | `PointCloud2` | Colored object cloud |
+| `/obj_boxes` | `MarkerArray` | Bbox wireframes |
+| `/obj_labels` | `MarkerArray` | Text labels |
+| `/annotated_image` | `Image` | Debug visualization |
+| `/object_type_query` | `ObjectType` | VLM query |
+
+---
+
+## Phase 6: Cloud-Image Projection (pointcloud_utils.py)
+
+```python
+def project_pointcloud_to_image(...): ...
+def segment_pointcloud_by_mask(...): ...
+def segment_pointclouds_from_projection(...): ...
+def transform_segmented_clouds_to_world(...): ...
+```
+
+---
+
+## Phase 7: Message Publishers (utils/map_object_utils.py)
+
+```python
+def map_object_to_object_node(obj: MapObject, header: Header, status: str, target_object: str | None = None) -> ObjectNode | None: ...
+def create_bbox_markers(objects: list[MapObject], header: Header) -> MarkerArray: ...
+def create_label_markers(objects: list[MapObject], header: Header) -> MarkerArray: ...
+def create_object_pointcloud(objects: list[MapObject], header: Header) -> PointCloud2 | None: ...
+def create_delete_markers(deleted_ids: list[int], header: Header, namespace: str) -> MarkerArray: ...
+```
+
+Publisher helpers:
+- `map_object_to_object_node` converts a `MapObject` into an `ObjectNode` with centroid, bbox corners, and point cloud.
+- Marker helpers emit bbox wireframes and text labels for RViz.
+- `create_object_pointcloud` builds a colored aggregate cloud from object voxels.
+
+---
+
+## Phase 8: Image Storage (utils/storage.py)
+
+```python
+class ImageSaveQueue:
+    """Background worker for saving object images to disk."""
+
+    def __init__(self, output_dir: Path, max_queue_size: int = 100) -> None: ...
+    def start(self) -> None: ...
+    def stop(self) -> None: ...
+    def queue_save(self, object_id: str, image: np.ndarray, mask: np.ndarray | None = None) -> str: ...
+```
+
+Image storage:
+- Async worker writes `.npy` (and optional `.png`) crops to disk.
+- Queue is bounded to avoid blocking the main mapping loop.
+
+---
+
+## Phase 9: Launch Files
+
+```python
+# semantic_mapping.launch.py
+DeclareLaunchArgument('platform', default_value='mecanum_sim')
+DeclareLaunchArgument('detection_topic', default_value='/detection_result')
+DeclareLaunchArgument('cloud_topic', default_value='/registered_scan')
+DeclareLaunchArgument('odom_topic', default_value='/state_estimation')
+DeclareLaunchArgument('voxel_size', default_value='0.03')
+DeclareLaunchArgument('confidence_threshold', default_value='0.30')
+DeclareLaunchArgument('target_object', default_value='')
+```
+
+---
+
+## Implementation Checklist
+
+### Completed
+- [x] Create `CMakeLists.txt` for message generation
+- [x] Port message definitions from tare_planner
+- [x] Set up `package.xml` with dependencies
+- [x] Implement `map_object.py` with VoxelFeatureManager, MapObject, Observation
+- [x] Port lazy computation patterns (centroid, bbox3d, regularization)
+- [x] Refactor `map_object.py` to use shared utilities from `vector_perception_utils`
+- [x] Add `normalize_angle`, `discretize_angles` to `transform_utils.py`
+- [x] Add `get_oriented_bbox` to `pointcloud_utils.py`
+- [x] Simplify `_compute_geometry()` using Open3D oriented bbox
+- [x] Implement `utils/matching.py` with IoU, distance, merge-compatibility functions
+- [x] Implement `mapper.py` with ObjectDB-style tiering
+- [x] Implement cloud-image projection helpers in `vector_perception_utils/pointcloud_utils.py`
+- [x] Implement `utils/map_object_utils.py` message conversion functions
+- [x] Implement `utils/storage.py` with async queue
+- [x] Implement `mapping_node.py` following detect_anything patterns
+
+### Remaining
+- [x] Create launch files and params.yaml
+- [x] Integration testing with bag files
+
+---
+
+## Code Cleanup Items
+
+Items from VLM_ROS to preserve:
+- Memory fixes with `.copy()` on detection data extraction
+- VLM query threshold (`best_image_score > 500`)
+
+Items to remove:
+1. Unused publishers: `/cloud_image`
+2. Unused methods: `serialize_objs_to_bag()`, `is_merge_allowed()`, `handle_object_query()`
+3. Review constants: `BACKGROUND_OBJECTS`, `VERTICAL_OBJECTS`, `BIG_OBJECTS`, `SMALL_OBJECTS`
+4. Don't port: `byte_track`, `Grounded-SAM-2`, `mmconfig` directories
+
+---
+
+## Compatibility Checklist
+
+- [x] `/object_nodes` publishes ObjectNode with exact schema
+- [x] `/obj_points` publishes colored PointCloud2
+- [x] `/obj_boxes` publishes MarkerArray wireframes
+- [x] `/obj_labels` publishes MarkerArray text
+- [x] `/object_type_query` publishes ObjectType for VLM
+- [x] Subscribes to `/detection_result` from detect_anything
+- [x] Subscribes to `/registered_scan` PointCloud2
+- [x] Subscribes to `/state_estimation` Odometry
+- [x] Subscribes to `/target_object_instruction`
+- [x] Subscribes to `/viewpoint_rep_header`
+- [x] Subscribes to `/object_type_answer`
+- [x] Object positions, bbox corners, status flags match expected semantics
+- [x] Delete markers properly remove objects from RViz
+
+---
+
+## Recent Updates (Jan 2026)
+
+- Unified launch to `semantic_mapping.launch.py` with mode handling, detect_anything integration, and bagfile defaults.
+- Fixed cloud/image projection transform to match legacy `CloudImageFusion` math.
+- Added mask erosion, depth-jump pruning, distance gating, and light outlier filtering for cleaner object clouds.
+- Added reprojected observation-angle voting for better multi-view aggregation.
+- Fixed bbox ordering and switched `/obj_boxes` to axis-aligned markers (via `get_oriented_bbox(axis_aligned=True)`).
+- Replaced DBSCAN regularization with vote-weighted filtering + voxel connected components.
+- Added `regularize_voxel_size` to grow connectivity for thin structures.
+- Simplified ground filtering to radius-based Z clamp around robot.
+- Added `visualization_max_objects` to keep recent objects visible in RViz.
+- Logging: mapping stats (total/total_deleted/total_permanent) + mapping loop time.
+- Fixed image saving color order and ensured output dir is cleared on start.
+- Set MobileCLIP weights cache to `detect_anything/models` and removed corrupted caches.
+
+## Temporary Handoff Notes
+
+- Regularization is now connected-components; tune `regularize_voxel_size` and keep ratio
+  (`MapObject.regularize_shape` percentile) if legs/sparse parts are dropped.
+- Ground filter uses `ground_filter`, `ground_radius`, `ground_z_clearance`.
+- If bagfile sync issues persist, consider switching mapping inputs to `/sensor_scan` + `/state_estimation_at_scan`.
+- Verify VLM objects.yaml path in VLM_ROS (`vlm_reasoning_node` still expects old path).
+
+---
+
+## Planner Dependencies
+
+The exploration planner (`sensor_coverage_planner_ground.cpp` in tare_planner) is tightly coupled to `ObjectNode`:
+- Subscribes to `/object_nodes` and stores per-object reps
+- Uses object positions, bbox corners, clouds, status, `is_asked_vlm`, and `img_path` for navigation decisions
+
+**Implication**: Keep the `ObjectNode` schema stable. Any new fields from the `MapObject` model can be mapped into extensions, but existing fields and semantics must remain for planner compatibility.
diff --git a/semantic_mapping/launch/semantic_mapping.launch.py b/semantic_mapping/launch/semantic_mapping.launch.py
new file mode 100644
index 0000000..0b7d8ab
--- /dev/null
+++ b/semantic_mapping/launch/semantic_mapping.launch.py
@@ -0,0 +1,137 @@
+"""Launch semantic mapping and detect_anything with a mode selector."""
+
+import os
+
+from ament_index_python.packages import get_package_share_directory
+from launch import LaunchDescription
+from launch.actions import DeclareLaunchArgument, OpaqueFunction
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import Node
+
+
+def _parse_bool(value: str, default: bool) -> bool:
+    if value == "":
+        return default
+    return value.lower() in ("true", "1", "yes")
+
+
+def _launch_nodes(context):
+    mode = LaunchConfiguration("mode").perform(context).lower()
+    use_sim_time_arg = LaunchConfiguration("use_sim_time").perform(context)
+    use_compressed_arg = LaunchConfiguration("use_compressed").perform(context)
+    odom_topic_arg = LaunchConfiguration("odom_topic").perform(context)
+
+    use_sim_time = _parse_bool(use_sim_time_arg, default=mode == "bagfile")
+    use_compressed = _parse_bool(use_compressed_arg, default=mode in ("bagfile", "sim"))
+
+    if odom_topic_arg:
+        odom_topic = odom_topic_arg
+    else:
+        odom_topic = "/aft_mapped_to_init_incremental" if mode == "bagfile" else "/state_estimation"
+
+    params_file = LaunchConfiguration("params_file").perform(context)
+    sensor_to_camera_tf = LaunchConfiguration("sensor_to_camera_tf").perform(context)
+    detection_topic = LaunchConfiguration("detection_topic").perform(context)
+    cloud_topic = LaunchConfiguration("cloud_topic").perform(context)
+    viewpoint_topic = LaunchConfiguration("viewpoint_topic").perform(context)
+    object_type_answer_topic = LaunchConfiguration("object_type_answer_topic").perform(context)
+    object_type_query_topic = LaunchConfiguration("object_type_query_topic").perform(context)
+    target_object_instruction_topic = LaunchConfiguration("target_object_instruction_topic").perform(
+        context
+    )
+    object_nodes_topic = LaunchConfiguration("object_nodes_topic").perform(context)
+    obj_points_topic = LaunchConfiguration("obj_points_topic").perform(context)
+    obj_boxes_topic = LaunchConfiguration("obj_boxes_topic").perform(context)
+    obj_labels_topic = LaunchConfiguration("obj_labels_topic").perform(context)
+    map_frame = LaunchConfiguration("map_frame").perform(context)
+    target_object = LaunchConfiguration("target_object").perform(context)
+    save_png = _parse_bool(LaunchConfiguration("save_png").perform(context), default=False)
+    output_dir = LaunchConfiguration("output_dir").perform(context)
+    image_topic = LaunchConfiguration("image_topic").perform(context)
+    compressed_image_topic = LaunchConfiguration("compressed_image_topic").perform(context)
+    conf = float(LaunchConfiguration("conf").perform(context))
+    max_area_ratio = float(LaunchConfiguration("max_area_ratio").perform(context))
+
+    detect_anything_node = Node(
+        package="detect_anything",
+        executable="detection_node",
+        name="detect_anything_detection_node",
+        output="screen",
+        parameters=[
+            {
+                "use_sim_time": use_sim_time,
+                "use_compressed": use_compressed,
+                "image_topic": image_topic,
+                "compressed_image_topic": compressed_image_topic,
+                "conf": conf,
+                "max_area_ratio": max_area_ratio,
+            }
+        ],
+    )
+
+    semantic_mapping_node = Node(
+        package="semantic_mapping",
+        executable="semantic_mapping_node",
+        name="semantic_mapping_node",
+        output="screen",
+        parameters=[
+            params_file,
+            {
+                "use_sim_time": use_sim_time,
+                "sensor_to_camera_tf": sensor_to_camera_tf,
+                "detection_topic": detection_topic,
+                "cloud_topic": cloud_topic,
+                "odom_topic": odom_topic,
+                "viewpoint_topic": viewpoint_topic,
+                "object_type_answer_topic": object_type_answer_topic,
+                "object_type_query_topic": object_type_query_topic,
+                "target_object_instruction_topic": target_object_instruction_topic,
+                "object_nodes_topic": object_nodes_topic,
+                "obj_points_topic": obj_points_topic,
+                "obj_boxes_topic": obj_boxes_topic,
+                "obj_labels_topic": obj_labels_topic,
+                "map_frame": map_frame,
+                "target_object": target_object,
+                "save_png": save_png,
+                "output_dir": output_dir,
+            },
+        ],
+    )
+
+    return [detect_anything_node, semantic_mapping_node]
+
+
+def generate_launch_description() -> LaunchDescription:
+    pkg_share = get_package_share_directory("semantic_mapping")
+    default_params = os.path.join(pkg_share, "config", "params.yaml")
+    default_sensor_tf = os.path.join(pkg_share, "config", "sensor_to_camera_tf.yaml")
+
+    return LaunchDescription(
+        [
+            DeclareLaunchArgument("mode", default_value="sim"),
+            DeclareLaunchArgument("use_sim_time", default_value=""),
+            DeclareLaunchArgument("use_compressed", default_value=""),
+            DeclareLaunchArgument("params_file", default_value=default_params),
+            DeclareLaunchArgument("sensor_to_camera_tf", default_value=default_sensor_tf),
+            DeclareLaunchArgument("detection_topic", default_value="/detection_result"),
+            DeclareLaunchArgument("cloud_topic", default_value="/registered_scan"),
+            DeclareLaunchArgument("odom_topic", default_value=""),
+            DeclareLaunchArgument("viewpoint_topic", default_value="/viewpoint_rep_header"),
+            DeclareLaunchArgument("object_type_answer_topic", default_value="/object_type_answer"),
+            DeclareLaunchArgument("object_type_query_topic", default_value="/object_type_query"),
+            DeclareLaunchArgument("target_object_instruction_topic", default_value="/target_object_instruction"),
+            DeclareLaunchArgument("object_nodes_topic", default_value="/object_nodes"),
+            DeclareLaunchArgument("obj_points_topic", default_value="/obj_points"),
+            DeclareLaunchArgument("obj_boxes_topic", default_value="/obj_boxes"),
+            DeclareLaunchArgument("obj_labels_topic", default_value="/obj_labels"),
+            DeclareLaunchArgument("map_frame", default_value="map"),
+            DeclareLaunchArgument("target_object", default_value="refrigerator"),
+            DeclareLaunchArgument("save_png", default_value="false"),
+            DeclareLaunchArgument("output_dir", default_value="output/object_images"),
+            DeclareLaunchArgument("image_topic", default_value="/camera/image"),
+            DeclareLaunchArgument("compressed_image_topic", default_value="/camera/image/compressed"),
+            DeclareLaunchArgument("conf", default_value="0.35"),
+            DeclareLaunchArgument("max_area_ratio", default_value="0.3"),
+            OpaqueFunction(function=_launch_nodes),
+        ]
+    )
diff --git a/semantic_mapping/msg/ObjectNode.msg b/semantic_mapping/msg/ObjectNode.msg
new file mode 100644
index 0000000..b2d7686
--- /dev/null
+++ b/semantic_mapping/msg/ObjectNode.msg
@@ -0,0 +1,35 @@
+# Semantic object representation for planner integration.
+#
+# Each message represents a single object in the semantic map with its
+# 3D bounding box, point cloud, and metadata for VLM interaction.
+
+std_msgs/Header header
+
+# Object identity - may contain multiple merged track IDs
+int32[] object_id
+
+# Dominant class label (by weighted vote)
+string label
+
+# Object centroid in world frame
+geometry_msgs/Point position
+
+# 3D bounding box corners (8 points in world frame)
+# Order: [min_x,min_y,min_z], [max_x,min_y,min_z], [max_x,max_y,min_z], [min_x,max_y,min_z],
+#        [min_x,min_y,max_z], [max_x,min_y,max_z], [max_x,max_y,max_z], [min_x,max_y,max_z]
+geometry_msgs/Point[8] bbox3d
+
+# Object point cloud in world frame
+sensor_msgs/PointCloud2 cloud
+
+# Lifecycle status: "new", "updated", "unchanged"
+string status
+
+# Path to best saved image crop of this object
+string img_path
+
+# VLM interaction flags
+bool is_asked_vlm
+
+# Associated viewpoint ID (for viewpoint-triggered saves)
+int32 viewpoint_id
diff --git a/semantic_mapping/msg/ObjectType.msg b/semantic_mapping/msg/ObjectType.msg
new file mode 100644
index 0000000..ab02ea6
--- /dev/null
+++ b/semantic_mapping/msg/ObjectType.msg
@@ -0,0 +1,17 @@
+# VLM query/answer message for object relabeling.
+#
+# Used bidirectionally:
+# - Query (mapping -> VLM): object_id, img_path, labels[] populated
+# - Answer (VLM -> mapping): final_label populated with VLM decision
+
+# Object ID to query/update
+int32 object_id
+
+# Path to object image for VLM analysis
+string img_path
+
+# VLM-determined final label (populated in answer)
+string final_label
+
+# Candidate labels from detection (populated in query)
+string[] labels
diff --git a/semantic_mapping/msg/TargetObjectInstruction.msg b/semantic_mapping/msg/TargetObjectInstruction.msg
new file mode 100644
index 0000000..1a88005
--- /dev/null
+++ b/semantic_mapping/msg/TargetObjectInstruction.msg
@@ -0,0 +1,22 @@
+# User instruction for target object search.
+#
+# Specifies the target object to find and optional spatial/attribute
+# conditions relative to an anchor object.
+
+# Primary target object label to search for
+string target_object
+
+# Room condition (e.g., "in the kitchen")
+string room_condition
+
+# Spatial condition relative to anchor (e.g., "next to", "on top of")
+string spatial_condition
+
+# Attribute condition for target (e.g., "red", "large")
+string attribute_condition
+
+# Anchor object for spatial reference
+string anchor_object
+
+# Attribute condition for anchor object
+string attribute_condition_anchor
diff --git a/semantic_mapping/msg/ViewpointRep.msg b/semantic_mapping/msg/ViewpointRep.msg
new file mode 100644
index 0000000..5bb1bfe
--- /dev/null
+++ b/semantic_mapping/msg/ViewpointRep.msg
@@ -0,0 +1,8 @@
+# Viewpoint trigger message for image saving.
+#
+# Published by planner to request saving images at specific viewpoints.
+
+std_msgs/Header header
+
+# Unique viewpoint identifier
+int32 viewpoint_id
diff --git a/semantic_mapping/package.xml b/semantic_mapping/package.xml
new file mode 100644
index 0000000..4804d1a
--- /dev/null
+++ b/semantic_mapping/package.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>semantic_mapping</name>
+  <version>0.0.1</version>
+  <description>Semantic 3D object mapping node with VLM integration.</description>
+  <maintainer email="alex.lin416@outlook.com">Alex Lin</maintainer>
+  <license>MIT</license>
+
+  <buildtool_depend>ament_cmake</buildtool_depend>
+
+  <build_depend>rosidl_default_generators</build_depend>
+  <exec_depend>rosidl_default_runtime</exec_depend>
+  <member_of_group>rosidl_interface_packages</member_of_group>
+
+  <depend>rclpy</depend>
+  <depend>std_msgs</depend>
+  <depend>sensor_msgs</depend>
+  <depend>geometry_msgs</depend>
+  <depend>nav_msgs</depend>
+  <depend>visualization_msgs</depend>
+  <depend>cv_bridge</depend>
+  <depend>detect_anything</depend>
+  <depend>vector_perception_utils</depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
diff --git a/semantic_mapping/resource/semantic_mapping b/semantic_mapping/resource/semantic_mapping
new file mode 100644
index 0000000..e69de29
diff --git a/semantic_mapping/scripts/semantic_mapping_node b/semantic_mapping/scripts/semantic_mapping_node
new file mode 100755
index 0000000..0312e88
--- /dev/null
+++ b/semantic_mapping/scripts/semantic_mapping_node
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+"""Entry point for semantic_mapping_node."""
+
+from semantic_mapping.mapping_node import main
+
+if __name__ == "__main__":
+    main()
diff --git a/semantic_mapping/semantic_mapping/__init__.py b/semantic_mapping/semantic_mapping/__init__.py
new file mode 100644
index 0000000..8df6f20
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/__init__.py
@@ -0,0 +1 @@
+"""Semantic 3D object mapping for vector_perception_ros."""
diff --git a/semantic_mapping/semantic_mapping/map_object.py b/semantic_mapping/semantic_mapping/map_object.py
new file mode 100644
index 0000000..a9e3633
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/map_object.py
@@ -0,0 +1,705 @@
+"""Data models for semantic mapping.
+
+This module defines the core data structures for the semantic mapping system:
+- VoxelFeatureManager: Manages voxelized point clouds with observation angle voting
+- MapObject: Represents a tracked object in the semantic map
+- Observation: A single detection observation
+- ObjectStatus: Enum for object lifecycle status
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Optional
+
+import numpy as np
+from scipy.spatial import cKDTree
+
+from vector_perception_utils.transform_utils import discretize_angles, normalize_angle
+from vector_perception_utils.pointcloud_utils import (
+    compute_centroid,
+    create_pointcloud,
+)
+
+
+class ObjectStatus(Enum):
+    """Object lifecycle status (ObjectDB-style tiering)."""
+
+    PENDING = "pending"  # < min_detections threshold
+    PERMANENT = "permanent"  # >= threshold, confirmed object
+    DELETED = "deleted"  # Marked for removal
+
+
+@dataclass
+class Observation:
+    """A single detection observation to update a MapObject.
+
+    Attributes:
+        track_id: Tracker ID from detection.
+        label: Class label string.
+        confidence: Detection confidence score.
+        points: World-frame point cloud (N, 3).
+        robot_R: Robot rotation matrix (3, 3) at observation time.
+        robot_t: Robot translation vector (3,) at observation time.
+        mask: Optional 2D segmentation mask.
+        image: Optional RGB image crop for best-image selection.
+        timestamp: Observation timestamp in seconds.
+    """
+
+    track_id: int
+    label: str
+    confidence: float
+    points: np.ndarray
+    robot_R: np.ndarray
+    robot_t: np.ndarray
+    mask: Optional[np.ndarray] = None
+    image: Optional[np.ndarray] = None
+    timestamp: float = 0.0
+
+
+class VoxelFeatureManager:
+    """Manages voxelized point cloud with observation angle voting.
+
+    This class tracks 3D voxels representing an object, along with:
+    - Vote counts per voxel (how many times each voxel was observed)
+    - Observation angles (which directions each voxel was seen from)
+    - Regularization mask (which voxels pass shape filtering)
+
+    The observation angle voting helps distinguish between reliable points
+    (seen from multiple angles) and spurious points (seen from only one angle).
+
+    Attributes:
+        voxels: Point coordinates (N, 3) in world frame.
+        votes: Detection count per voxel (N,).
+        observation_angles: Binary angle histogram per voxel (N, num_bins).
+        regularized_mask: Boolean mask for valid voxels after clustering (N,).
+        voxel_size: Size of voxel grid cells in meters.
+        num_angle_bins: Number of discretized angle bins.
+    """
+
+    def __init__(
+        self,
+        points: np.ndarray,
+        voxel_size: float,
+        robot_R: np.ndarray,
+        robot_t: np.ndarray,
+        num_angle_bins: int = 15,
+    ) -> None:
+        """Initialize VoxelFeatureManager with initial points.
+
+        Args:
+            points: Initial point cloud (N, 3) in world frame.
+            voxel_size: Voxel grid cell size in meters.
+            robot_R: Robot rotation matrix (3, 3) at observation time.
+            robot_t: Robot translation vector (3,) at observation time.
+            num_angle_bins: Number of angle bins for observation tracking.
+        """
+        num_points = points.shape[0]
+
+        # Compute observation angles from robot position
+        vectors = points - robot_t
+        obs_angles = np.arctan2(vectors[:, 1], vectors[:, 0])
+        obs_bins = discretize_angles(obs_angles, num_angle_bins)
+
+        self.voxels = points.copy()
+        self.voxel_size = voxel_size
+        self.num_angle_bins = num_angle_bins
+        self._tree = cKDTree(points)
+
+        # Initialize vote counts
+        self.votes = np.ones(num_points, dtype=np.float32)
+
+        # Initialize observation angle histogram (binary: seen/not seen from each angle)
+        self.observation_angles = np.zeros((num_points, num_angle_bins), dtype=np.uint8)
+        self.observation_angles[np.arange(num_points), obs_bins] = 1
+
+        # Regularization mask (all False until regularize_shape is called)
+        self.regularized_mask = np.zeros(num_points, dtype=bool)
+
+        # Remove vote for dynamic object clearing (not commonly used)
+        self._remove_votes = np.zeros(num_points, dtype=np.int32)
+
+    def update(
+        self,
+        new_points: np.ndarray,
+        robot_R: np.ndarray,
+        robot_t: np.ndarray,
+    ) -> None:
+        """Add new points with KDTree-based merge and angle voting.
+
+        Points within voxel_size of existing points are merged (votes incremented).
+        Points farther away are added as new voxels.
+
+        Args:
+            new_points: New point cloud (M, 3) in world frame.
+            robot_R: Robot rotation matrix (3, 3) at observation time.
+            robot_t: Robot translation vector (3,) at observation time.
+        """
+        if new_points.shape[0] == 0:
+            return
+
+        # Compute observation angles for new points
+        vectors = new_points - robot_t
+        obs_angles = np.arctan2(vectors[:, 1], vectors[:, 0])
+        obs_bins = discretize_angles(obs_angles, self.num_angle_bins)
+
+        # Find nearest existing voxels
+        distances, indices = self._tree.query(new_points)
+
+        # Merge points within voxel_size
+        merge_mask = distances < self.voxel_size
+        merge_indices = indices[merge_mask]
+        merge_bins = obs_bins[merge_mask]
+
+        self.votes[merge_indices] += 1
+        self.observation_angles[merge_indices, merge_bins] = 1
+
+        # Add new points that don't merge
+        new_mask = ~merge_mask
+        if np.any(new_mask):
+            new_voxels = new_points[new_mask]
+            new_bins = obs_bins[new_mask]
+            num_new = new_voxels.shape[0]
+
+            # Extend arrays
+            self.voxels = np.vstack([self.voxels, new_voxels])
+            self.votes = np.concatenate([self.votes, np.ones(num_new, dtype=np.float32)])
+
+            new_obs = np.zeros((num_new, self.num_angle_bins), dtype=np.uint8)
+            new_obs[np.arange(num_new), new_bins] = 1
+            self.observation_angles = np.vstack([self.observation_angles, new_obs])
+
+            self.regularized_mask = np.concatenate(
+                [self.regularized_mask, np.zeros(num_new, dtype=bool)]
+            )
+            self._remove_votes = np.concatenate(
+                [self._remove_votes, np.zeros(num_new, dtype=np.int32)]
+            )
+
+            # Rebuild KDTree
+            self._tree = cKDTree(self.voxels)
+
+    def update_observation_angles(self, voxel_indices: np.ndarray, obs_bins: np.ndarray) -> None:
+        """Update observation angle histogram for selected voxels."""
+        if voxel_indices.size == 0:
+            return
+        self.observation_angles[voxel_indices, obs_bins] = 1
+
+    def merge_from(self, other: VoxelFeatureManager) -> None:
+        """Merge voxels from another VoxelFeatureManager.
+
+        Args:
+            other: Another VoxelFeatureManager to merge from.
+        """
+        if other.voxels.shape[0] == 0:
+            return
+
+        distances, indices = self._tree.query(other.voxels)
+
+        # Merge overlapping voxels
+        merge_mask = distances < self.voxel_size
+        merge_indices = indices[merge_mask]
+
+        self.votes[merge_indices] += other.votes[merge_mask]
+        self.observation_angles[merge_indices] = np.logical_or(
+            self.observation_angles[merge_indices],
+            other.observation_angles[merge_mask],
+        ).astype(np.uint8)
+
+        # Add non-overlapping voxels
+        new_mask = ~merge_mask
+        if np.any(new_mask):
+            self.voxels = np.vstack([self.voxels, other.voxels[new_mask]])
+            self.votes = np.concatenate([self.votes, other.votes[new_mask]])
+            self.observation_angles = np.vstack(
+                [self.observation_angles, other.observation_angles[new_mask]]
+            )
+            self.regularized_mask = np.concatenate(
+                [self.regularized_mask, other.regularized_mask[new_mask]]
+            )
+            self._remove_votes = np.concatenate(
+                [self._remove_votes, np.zeros(np.sum(new_mask), dtype=np.int32)]
+            )
+            self._tree = cKDTree(self.voxels)
+
+    def get_valid_indices(
+        self,
+        diversity_percentile: float = 0.3,
+        use_regularized: bool = True,
+    ) -> np.ndarray:
+        """Get indices of valid voxels based on observation angle diversity.
+
+        Voxels are ranked by their observation angle diversity (number of angles
+        from which they were observed), weighted by vote count. The top percentile
+        of voxels are returned.
+
+        Args:
+            diversity_percentile: Fraction of total weight to include (0-1).
+            use_regularized: If True, only consider voxels in regularized_mask.
+
+        Returns:
+            Indices of valid voxels.
+        """
+        if use_regularized:
+            mask = self.regularized_mask
+            if not np.any(mask):
+                return np.array([], dtype=int)
+            obs_angles = self.observation_angles[mask]
+            votes = self.votes[mask]
+        else:
+            obs_angles = self.observation_angles
+            votes = self.votes
+
+        if len(obs_angles) == 0:
+            return np.array([], dtype=int)
+
+        # Compute diversity score (number of angles observed)
+        angle_diversity = np.sum(obs_angles, axis=1)
+
+        # Sort by diversity (ascending)
+        sorted_indices = np.argsort(angle_diversity)
+        sorted_diversity = angle_diversity[sorted_indices]
+        sorted_weights = votes[sorted_indices]
+
+        # Weight by diversity * votes
+        weighted = sorted_diversity * sorted_weights
+        total_weight = np.sum(weighted)
+
+        if total_weight == 0:
+            return np.array([], dtype=int)
+
+        # Find cutoff index for percentile
+        target_weight = (1 - diversity_percentile) * total_weight
+        cumsum = np.cumsum(weighted)
+        cutoff_idx = np.searchsorted(cumsum, target_weight)
+
+        # Return indices above cutoff (higher diversity)
+        return sorted_indices[cutoff_idx:]
+
+    def get_valid_voxels(
+        self,
+        diversity_percentile: float = 0.3,
+        use_regularized: bool = True,
+    ) -> np.ndarray:
+        """Get valid voxel coordinates.
+
+        Args:
+            diversity_percentile: Fraction of total weight to include (0-1).
+            use_regularized: If True, only consider voxels in regularized_mask.
+
+        Returns:
+            Valid voxel coordinates (M, 3).
+        """
+        indices = self.get_valid_indices(diversity_percentile, use_regularized)
+        if use_regularized and np.any(self.regularized_mask):
+            voxels = self.voxels[self.regularized_mask]
+        else:
+            voxels = self.voxels
+
+        if len(indices) == 0:
+            return np.empty((0, 3), dtype=np.float32)
+        return voxels[indices]
+
+    @property
+    def num_voxels(self) -> int:
+        """Total number of voxels."""
+        return self.voxels.shape[0]
+
+    @property
+    def num_regularized(self) -> int:
+        """Number of voxels passing regularization."""
+        return int(np.sum(self.regularized_mask))
+
+
+@dataclass
+class MapObject:
+    """Represents a tracked object in the semantic map.
+
+    This class combines patterns from VLM_ROS SingleObject and dimos Object,
+    with ObjectDB-style lifecycle management.
+
+    Attributes:
+        object_id: Unique identifier for this object (UUID string).
+        track_ids: List of associated tracker IDs (can merge multiple).
+        voxel_manager: Manages the object's voxelized point cloud.
+        class_votes: Label -> observation count mapping.
+        confidence_scores: Label -> best confidence score mapping.
+        points_count: Label -> total points count mapping.
+        robot_poses: List of robot poses when object was observed.
+        detection_count: Total number of detections.
+        inactive_frames: Frames since last update.
+        last_update_time: Timestamp of last update.
+        status: Object lifecycle status.
+        is_asked_vlm: Whether VLM has been queried for this object.
+        updated_by_vlm: Whether object was relabeled by VLM.
+        best_image_path: Path to best saved image crop.
+        best_image_score: Score of best saved image.
+        centroid: Cached centroid position (None if needs recompute).
+        bbox3d_corners: Cached 3D bbox corners (None if needs recompute).
+    """
+
+    object_id: str
+    track_ids: list[int]
+    voxel_manager: VoxelFeatureManager
+
+    # Classification (voted)
+    class_votes: dict[str, int] = field(default_factory=dict)
+    confidence_scores: dict[str, float] = field(default_factory=dict)
+    points_count: dict[str, int] = field(default_factory=dict)
+
+    # Robot poses at observation times
+    robot_poses: list[dict] = field(default_factory=list)
+
+    # Lifecycle (ObjectDB-style)
+    detection_count: int = 1
+    inactive_frames: int = 0
+    last_update_time: float = 0.0
+    status: ObjectStatus = ObjectStatus.PENDING
+
+    # VLM interaction
+    is_asked_vlm: bool = False
+    updated_by_vlm: bool = False
+
+    # Image storage
+    best_image_path: Optional[str] = None
+    best_image_score: float = 0.0
+
+    # Cached geometry (None means needs recompute)
+    _centroid: Optional[np.ndarray] = field(default=None, repr=False)
+    _bbox3d_corners: Optional[np.ndarray] = field(default=None, repr=False)
+    _needs_recompute: bool = field(default=True, repr=False)
+
+    # Configuration
+    _diversity_percentile: float = field(default=0.85, repr=False)
+    _angle_threshold: float = field(default=np.deg2rad(5), repr=False)
+    _distance_threshold: float = field(default=0.3, repr=False)
+    _needs_regularization: bool = field(default=True, repr=False)
+    _regularize_voxel_size: float = field(default=0.06, repr=False)
+
+    @classmethod
+    def from_observation(
+        cls,
+        object_id: str,
+        obs: Observation,
+        voxel_size: float = 0.03,
+        regularize_voxel_size: float = 0.06,
+        num_angle_bins: int = 15,
+    ) -> MapObject:
+        """Create a new MapObject from an initial observation.
+
+        Args:
+            object_id: Unique identifier for this object.
+            obs: Initial observation.
+            voxel_size: Voxel grid cell size in meters.
+            regularize_voxel_size: Voxel size for regularization clustering.
+            num_angle_bins: Number of angle bins for observation tracking.
+
+        Returns:
+            New MapObject instance.
+        """
+        voxel_manager = VoxelFeatureManager(
+            points=obs.points,
+            voxel_size=voxel_size,
+            robot_R=obs.robot_R,
+            robot_t=obs.robot_t,
+            num_angle_bins=num_angle_bins,
+        )
+
+        obj = cls(
+            object_id=object_id,
+            track_ids=[obs.track_id],
+            voxel_manager=voxel_manager,
+            class_votes={obs.label: 1},
+            confidence_scores={obs.label: obs.confidence},
+            points_count={obs.label: len(obs.points)},
+            robot_poses=[{"R": obs.robot_R.copy(), "t": obs.robot_t.copy()}],
+            last_update_time=obs.timestamp,
+            _regularize_voxel_size=regularize_voxel_size,
+        )
+        return obj
+
+    def _invalidate_cache(self) -> None:
+        """Mark cached geometry as needing recompute."""
+        self._needs_recompute = True
+        self._centroid = None
+        self._bbox3d_corners = None
+
+    def _is_similar_pose(self, new_R: np.ndarray, new_t: np.ndarray) -> bool:
+        """Check if a new pose is similar to existing poses.
+
+        Used to avoid redundant updates from the same viewpoint.
+        Compares observation angle and distance to object center.
+
+        Args:
+            new_R: New robot rotation matrix (3, 3). Not used but kept for API.
+            new_t: New robot translation vector (3,).
+
+        Returns:
+            True if pose is similar to an existing pose.
+        """
+        _ = new_R  # Unused but kept for API consistency
+        obj_center = self.centroid
+        if obj_center is None:
+            obj_center = compute_centroid(self.voxel_manager.voxels)
+
+        for pose in self.robot_poses:
+            old_t = pose["t"]  # R not used for similarity check
+
+            old_to_obj = obj_center - old_t
+            new_to_obj = obj_center - new_t
+
+            old_angle = np.arctan2(old_to_obj[1], old_to_obj[0])
+            new_angle = np.arctan2(new_to_obj[1], new_to_obj[0])
+            angle_diff = abs(normalize_angle(np.array([new_angle - old_angle]))[0])
+
+            old_dist = np.linalg.norm(old_to_obj)
+            new_dist = np.linalg.norm(new_to_obj)
+            dist_diff = abs(new_dist - old_dist)
+
+            if angle_diff < self._angle_threshold and dist_diff < self._distance_threshold:
+                return True
+
+        return False
+
+    def update(self, obs: Observation) -> None:
+        """Update object with a new observation.
+
+        Args:
+            obs: New observation to incorporate.
+        """
+        # Update voxel manager
+        self.voxel_manager.update(obs.points, obs.robot_R, obs.robot_t)
+        self.detection_count += 1
+        self.inactive_frames = 0
+        self.last_update_time = obs.timestamp
+        self._needs_regularization = True
+
+        # Check if pose is new (not similar to existing)
+        is_similar = self._is_similar_pose(obs.robot_R, obs.robot_t)
+
+        if not is_similar:
+            # Update class statistics
+            self.class_votes[obs.label] = self.class_votes.get(obs.label, 0) + 1
+            self.confidence_scores[obs.label] = max(
+                self.confidence_scores.get(obs.label, 0), obs.confidence
+            )
+            self.points_count[obs.label] = (
+                self.points_count.get(obs.label, 0) + len(obs.points)
+            )
+
+            # Add pose
+            self.robot_poses.append({"R": obs.robot_R.copy(), "t": obs.robot_t.copy()})
+
+        # Add track ID if new
+        if obs.track_id not in self.track_ids:
+            self.track_ids.append(obs.track_id)
+
+        self._invalidate_cache()
+
+    def merge(self, other: MapObject) -> None:
+        """Merge another MapObject into this one.
+
+        Args:
+            other: Object to merge from.
+        """
+        # Merge track IDs
+        for tid in other.track_ids:
+            if tid not in self.track_ids:
+                self.track_ids.append(tid)
+
+        # Merge voxel managers
+        self.voxel_manager.merge_from(other.voxel_manager)
+        self._needs_regularization = True
+
+        # Merge class statistics
+        for label, count in other.class_votes.items():
+            self.class_votes[label] = self.class_votes.get(label, 0) + count
+            self.confidence_scores[label] = max(
+                self.confidence_scores.get(label, 0),
+                other.confidence_scores.get(label, 0),
+            )
+            self.points_count[label] = (
+                self.points_count.get(label, 0) + other.points_count.get(label, 0)
+            )
+
+        # Merge poses (only unique ones)
+        for pose in other.robot_poses:
+            if not self._is_similar_pose(pose["R"], pose["t"]):
+                self.robot_poses.append(pose)
+
+        # Update lifecycle
+        self.detection_count += other.detection_count
+        self.last_update_time = max(self.last_update_time, other.last_update_time)
+        self.is_asked_vlm = self.is_asked_vlm or other.is_asked_vlm
+
+        self._invalidate_cache()
+
+    @property
+    def label(self) -> str:
+        """Get dominant class label by weighted vote.
+
+        Returns:
+            Most likely class label based on points_count * confidence.
+        """
+        if not self.class_votes:
+            return "unknown"
+
+        # Compute weighted scores
+        weighted_scores = {}
+        for lbl in self.class_votes:
+            pts = self.points_count.get(lbl, 0)
+            conf = self.confidence_scores.get(lbl, 0)
+            weighted_scores[lbl] = pts * conf
+
+        return max(weighted_scores, key=weighted_scores.get)
+
+    @property
+    def confidence(self) -> float:
+        """Get confidence of the dominant label."""
+        return self.confidence_scores.get(self.label, 0.0)
+
+    @property
+    def centroid(self) -> Optional[np.ndarray]:
+        """Compute centroid weighted by observation angles.
+
+        Returns:
+            Centroid position (3,) or None if no valid voxels.
+        """
+        if self._needs_recompute or self._centroid is None:
+            self._compute_geometry()
+        return self._centroid
+
+    @property
+    def bbox3d_corners(self) -> Optional[np.ndarray]:
+        """Compute axis-aligned 3D bounding box corners.
+
+        Returns:
+            8 corner points (8, 3) or None if no valid voxels.
+        """
+        if self._needs_recompute or self._bbox3d_corners is None:
+            self._compute_geometry()
+        return self._bbox3d_corners
+
+    def _compute_geometry(self) -> None:
+        """Compute cached centroid and bbox from valid voxels using Open3D.
+
+        Returns:
+            None. Updates cached centroid and bbox in-place.
+        """
+        # Get valid voxels (regularized first, then fallback to all)
+        valid_voxels = self.voxel_manager.get_valid_voxels(
+            diversity_percentile=self._diversity_percentile,
+            use_regularized=True,
+        )
+        if len(valid_voxels) == 0:
+            valid_voxels = self.voxel_manager.get_valid_voxels(
+                diversity_percentile=self._diversity_percentile,
+                use_regularized=False,
+            )
+
+        if len(valid_voxels) < 4:
+            self._centroid = compute_centroid(valid_voxels) if len(valid_voxels) > 0 else None
+            self._bbox3d_corners = None
+        else:
+            pcd = create_pointcloud(valid_voxels)
+            obb = pcd.get_minimal_oriented_bounding_box()
+            self._centroid = np.asarray(obb.center)
+            half_extent = 0.5 * np.asarray(obb.extent)
+            local_corners = np.array(
+                [
+                    [-half_extent[0], -half_extent[1], -half_extent[2]],
+                    [half_extent[0], -half_extent[1], -half_extent[2]],
+                    [half_extent[0], half_extent[1], -half_extent[2]],
+                    [-half_extent[0], half_extent[1], -half_extent[2]],
+                    [-half_extent[0], -half_extent[1], half_extent[2]],
+                    [half_extent[0], -half_extent[1], half_extent[2]],
+                    [half_extent[0], half_extent[1], half_extent[2]],
+                    [-half_extent[0], half_extent[1], half_extent[2]],
+                ],
+                dtype=float,
+            )
+            self._bbox3d_corners = local_corners @ np.asarray(obb.R).T + np.asarray(obb.center)
+
+        self._needs_recompute = False
+
+    def regularize_shape(self, percentile: float = 0.85) -> None:
+        """Apply vote-weighted regularization with light outlier removal."""
+        if not self._needs_regularization:
+            return
+        voxels = self.voxel_manager.voxels
+        if len(voxels) < 3:
+            self.voxel_manager.regularized_mask = np.ones(len(voxels), dtype=bool)
+            self._needs_regularization = False
+            self._invalidate_cache()
+            return
+
+        weights = np.sum(self.voxel_manager.observation_angles, axis=1) * self.voxel_manager.votes
+        keep_ratio = percentile if percentile is not None else 1.0
+        if keep_ratio >= 1.0:
+            keep_mask = np.ones(voxels.shape[0], dtype=bool)
+        else:
+            cutoff = float(np.quantile(weights, 1.0 - keep_ratio))
+            keep_mask = weights >= cutoff
+
+        if not np.any(keep_mask):
+            keep_mask = np.ones(voxels.shape[0], dtype=bool)
+
+        selected_indices = np.nonzero(keep_mask)[0]
+        points_sel = voxels[selected_indices]
+        weights_sel = weights[selected_indices]
+        if points_sel.shape[0] >= 3:
+            voxel_size = self._regularize_voxel_size
+            keys = np.floor(points_sel / voxel_size).astype(np.int32)
+            voxel_map: dict[tuple[int, int, int], list[int]] = {}
+            for idx, key in enumerate(map(tuple, keys)):
+                voxel_map.setdefault(key, []).append(idx)
+
+            neighbor_offsets = [
+                (dx, dy, dz)
+                for dx in (-1, 0, 1)
+                for dy in (-1, 0, 1)
+                for dz in (-1, 0, 1)
+                if not (dx == 0 and dy == 0 and dz == 0)
+            ]
+            visited: set[tuple[int, int, int]] = set()
+            components: list[tuple[np.ndarray, float, np.ndarray]] = []
+            for key in voxel_map:
+                if key in visited:
+                    continue
+                stack = [key]
+                visited.add(key)
+                comp_indices: list[int] = []
+                while stack:
+                    current = stack.pop()
+                    comp_indices.extend(voxel_map[current])
+                    for dx, dy, dz in neighbor_offsets:
+                        neighbor = (current[0] + dx, current[1] + dy, current[2] + dz)
+                        if neighbor in voxel_map and neighbor not in visited:
+                            visited.add(neighbor)
+                            stack.append(neighbor)
+
+                comp_idx = np.array(comp_indices, dtype=int)
+                comp_weight = float(np.sum(weights_sel[comp_idx]))
+                comp_centroid = compute_centroid(points_sel[comp_idx])
+                components.append((comp_idx, comp_weight, comp_centroid))
+
+            if components:
+                main_idx = int(np.argmax([c[1] for c in components]))
+                main_points = points_sel[components[main_idx][0]]
+                main_centroid = components[main_idx][2]
+                extent = np.max(main_points, axis=0) - np.min(main_points, axis=0)
+                dist_thresh = max(0.3, 0.5 * float(np.max(extent)))
+
+                keep_sel_mask = np.zeros(points_sel.shape[0], dtype=bool)
+                for comp_idx, _, comp_centroid in components:
+                    if np.linalg.norm(comp_centroid - main_centroid) <= dist_thresh:
+                        keep_sel_mask[comp_idx] = True
+                if np.any(keep_sel_mask):
+                    keep_mask = np.zeros(voxels.shape[0], dtype=bool)
+                    keep_mask[selected_indices[keep_sel_mask]] = True
+
+        self.voxel_manager.regularized_mask = keep_mask
+
+        self._needs_regularization = False
+        self._invalidate_cache()
diff --git a/semantic_mapping/semantic_mapping/mapper.py b/semantic_mapping/semantic_mapping/mapper.py
new file mode 100644
index 0000000..cc67cf1
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/mapper.py
@@ -0,0 +1,275 @@
+"""Object map manager for semantic mapping."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+import threading
+import time
+import uuid
+
+from .utils.matching import compute_bbox, compute_iou_3d_with_ratios, find_best_match
+from .map_object import MapObject, ObjectStatus, Observation
+
+
+@dataclass
+class UpdateStats:
+    """Summary of updates applied in a mapper step."""
+
+    created: int = 0
+    updated: int = 0
+    merged: int = 0
+    deleted: int = 0
+    promoted: int = 0
+    total: int = 0
+
+
+class ObjectMapper:
+    """Manages the semantic object map with two-tier state management."""
+
+    def __init__(
+        self,
+        voxel_size: float = 0.03,
+        regularize_voxel_size: float = 0.06,
+        confidence_threshold: float = 0.30,
+        distance_threshold: float = 0.5,
+        iou_threshold: float = 0.2,
+        min_detections_for_permanent: int = 3,
+        pending_ttl_s: float = 10.0,
+        num_angle_bins: int = 15,
+    ) -> None:
+        """Initialize the object mapper and its thresholds.
+
+        Args:
+            voxel_size: Voxel grid cell size in meters.
+            regularize_voxel_size: Voxel size for regularization clustering.
+            confidence_threshold: Minimum detection confidence to accept.
+            distance_threshold: Max centroid distance for spatial matching.
+            iou_threshold: IoU threshold for merge checks.
+            min_detections_for_permanent: Detections before promotion.
+            pending_ttl_s: Time-to-live for pending objects.
+            num_angle_bins: Observation angle bins for voxel manager.
+        """
+        self._pending_objects: dict[str, MapObject] = {}
+        self._permanent_objects: dict[str, MapObject] = {}
+        self._track_id_map: dict[int, str] = {}
+        self._lock = threading.RLock()
+
+        self._voxel_size = voxel_size
+        self._regularize_voxel_size = regularize_voxel_size
+        self._confidence_threshold = confidence_threshold
+        self._distance_threshold = distance_threshold
+        self._iou_threshold = iou_threshold
+        self._min_detections_for_permanent = min_detections_for_permanent
+        self._pending_ttl_s = pending_ttl_s
+        self._num_angle_bins = num_angle_bins
+
+    def update(self, observations: list[Observation], target_object: str | None = None) -> UpdateStats:
+        """Update the map with new observations.
+
+        Args:
+            observations: List of new observations to integrate.
+            target_object: Optional label to protect from pruning.
+
+        Returns:
+            UpdateStats summarizing changes applied.
+        """
+        stats = UpdateStats()
+        now = max((obs.timestamp for obs in observations), default=time.time())
+        updated_ids: set[str] = set()
+        regularized_ids: set[str] = set()
+
+        with self._lock:
+            for obs in observations:
+                if obs.confidence < self._confidence_threshold:
+                    continue
+
+                obj = self._match_observation(obs)
+                if obj is None:
+                    obj = self._create_object(obs)
+                    self._pending_objects[obj.object_id] = obj
+                    stats.created += 1
+                else:
+                    obj.update(obs)
+                    stats.updated += 1
+
+                updated_ids.add(obj.object_id)
+                for track_id in obj.track_ids:
+                    self._track_id_map[track_id] = obj.object_id
+
+                if obj.status == ObjectStatus.PENDING:
+                    if self._check_promotion(obj):
+                        stats.promoted += 1
+
+                if obj.object_id not in regularized_ids and obj.inactive_frames == 0:
+                    obj.regularize_shape(percentile=obj._diversity_percentile)
+                    regularized_ids.add(obj.object_id)
+
+            for obj in list(self._pending_objects.values()) + list(self._permanent_objects.values()):
+                if obj.object_id not in updated_ids:
+                    obj.inactive_frames += 1
+
+            deleted = self._prune_inactive(target_object, now)
+            stats.deleted = len(deleted)
+            stats.merged += self.merge_objects()
+            stats.total = len(self._permanent_objects)
+
+        return stats
+
+    def _match_observation(self, obs: Observation) -> MapObject | None:
+        """Match an observation to an existing object."""
+        if obs.track_id in self._track_id_map:
+            obj_id = self._track_id_map[obs.track_id]
+            if obj_id in self._permanent_objects:
+                return self._permanent_objects[obj_id]
+            if obj_id in self._pending_objects:
+                return self._pending_objects[obj_id]
+
+        candidates = list(self._permanent_objects.values()) + list(self._pending_objects.values())
+        return find_best_match(
+            obs=obs,
+            candidates=candidates,
+            distance_threshold=self._distance_threshold,
+            iou_threshold=self._iou_threshold,
+        )
+
+    def _create_object(self, obs: Observation) -> MapObject:
+        """Create a new MapObject from an observation."""
+        obj = MapObject.from_observation(
+            object_id=str(uuid.uuid4()),
+            obs=obs,
+            voxel_size=self._voxel_size,
+            regularize_voxel_size=self._regularize_voxel_size,
+            num_angle_bins=self._num_angle_bins,
+        )
+        self._track_id_map[obs.track_id] = obj.object_id
+        return obj
+
+    def _check_promotion(self, obj: MapObject) -> bool:
+        """Promote object from pending to permanent if threshold met."""
+        if obj.status != ObjectStatus.PENDING:
+            return False
+        if obj.detection_count < self._min_detections_for_permanent:
+            return False
+        if obj.object_id in self._pending_objects:
+            self._pending_objects.pop(obj.object_id)
+        obj.status = ObjectStatus.PERMANENT
+        self._permanent_objects[obj.object_id] = obj
+        return True
+
+    def merge_objects(self) -> int:
+        """Merge overlapping permanent objects.
+
+        Returns:
+            Number of merges performed.
+        """
+        merged = 0
+        objs = list(self._permanent_objects.values())
+        i = 0
+        while i < len(objs):
+            obj = objs[i]
+            j = i + 1
+            while j < len(objs):
+                other = objs[j]
+                should_merge = False
+
+                if set(obj.track_ids) & set(other.track_ids):
+                    should_merge = True
+                else:
+                    if obj.centroid is None or other.centroid is None:
+                        j += 1
+                        continue
+                    dist = float(((obj.centroid - other.centroid) ** 2).sum() ** 0.5)
+                    if dist > self._distance_threshold:
+                        j += 1
+                        continue
+
+                    obj_points = obj.voxel_manager.get_valid_voxels(
+                        diversity_percentile=obj._diversity_percentile,
+                        use_regularized=True,
+                    )
+                    if len(obj_points) == 0:
+                        obj_points = obj.voxel_manager.get_valid_voxels(
+                            diversity_percentile=obj._diversity_percentile,
+                            use_regularized=False,
+                        )
+                    other_points = other.voxel_manager.get_valid_voxels(
+                        diversity_percentile=other._diversity_percentile,
+                        use_regularized=True,
+                    )
+                    if len(other_points) == 0:
+                        other_points = other.voxel_manager.get_valid_voxels(
+                            diversity_percentile=other._diversity_percentile,
+                            use_regularized=False,
+                        )
+
+                    obj_bbox = compute_bbox(obj_points) if len(obj_points) >= 3 else None
+                    other_bbox = compute_bbox(other_points) if len(other_points) >= 3 else None
+                    obj_corners = obj_bbox[0] if obj_bbox is not None else None
+                    other_corners = other_bbox[0] if other_bbox is not None else None
+                    obj_extent = obj_bbox[1] if obj_bbox is not None else None
+                    other_extent = other_bbox[1] if other_bbox is not None else None
+
+                    dist_merge = dist < 0.25
+                    if obj_extent is not None and other_extent is not None:
+                        dist_thresh = (((obj_extent / 2 + other_extent / 2) / 2) ** 2).sum() ** 0.5
+                        dist_merge = dist_merge or dist < dist_thresh * 0.5
+
+                    if dist_merge:
+                        should_merge = True
+                    elif obj_corners is not None and other_corners is not None:
+                        iou, ratio_obj, ratio_other = compute_iou_3d_with_ratios(
+                            obj_corners, other_corners
+                        )
+                        if (
+                            iou > self._iou_threshold
+                            or ratio_obj > 0.4
+                            or ratio_other > 0.4
+                        ):
+                            should_merge = True
+
+                if should_merge:
+                    obj.merge(other)
+                    for track_id in other.track_ids:
+                        self._track_id_map[track_id] = obj.object_id
+                    self._permanent_objects.pop(other.object_id, None)
+                    objs.pop(j)
+                    merged += 1
+                else:
+                    j += 1
+            i += 1
+
+        return merged
+
+    def _prune_inactive(self, target_object: str | None, now: float) -> list[MapObject]:
+        """Remove inactive pending/permanent objects."""
+        deleted: list[MapObject] = []
+
+        for obj_id, obj in list(self._pending_objects.items()):
+            if (now - obj.last_update_time) > self._pending_ttl_s:
+                deleted.append(self._pending_objects.pop(obj_id))
+
+        for obj_id, obj in list(self._permanent_objects.items()):
+            if target_object and obj.label == target_object:
+                continue
+            if obj.status == ObjectStatus.DELETED:
+                deleted.append(self._permanent_objects.pop(obj_id))
+
+        for obj in deleted:
+            for track_id in obj.track_ids:
+                if self._track_id_map.get(track_id) == obj.object_id:
+                    self._track_id_map.pop(track_id, None)
+
+        return deleted
+
+    def get_objects(self, include_pending: bool = False) -> list[MapObject]:
+        """Return current objects.
+
+        Args:
+            include_pending: If True, include pending objects.
+
+        Returns:
+            List of MapObject instances.
+        """
+        if include_pending:
+            return list(self._permanent_objects.values()) + list(self._pending_objects.values())
+        return list(self._permanent_objects.values())
diff --git a/semantic_mapping/semantic_mapping/mapping_node.py b/semantic_mapping/semantic_mapping/mapping_node.py
new file mode 100644
index 0000000..16b5649
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/mapping_node.py
@@ -0,0 +1,861 @@
+"""ROS2 mapping node for semantic 3D object mapping."""
+
+from __future__ import annotations
+
+from collections import deque
+from pathlib import Path
+import time
+
+import numpy as np
+import yaml
+from cv_bridge import CvBridge
+import cv2
+import rclpy
+from rclpy.node import Node
+from rclpy.qos import QoSProfile, ReliabilityPolicy
+from sensor_msgs.msg import CompressedImage, PointCloud2
+from sensor_msgs_py import point_cloud2
+from nav_msgs.msg import Odometry
+from std_msgs.msg import Header
+from visualization_msgs.msg import MarkerArray
+from scipy.spatial.transform import Rotation, Slerp
+
+from detect_anything.msg import DetectionResult
+from semantic_mapping.msg import ObjectType, ObjectNode, TargetObjectInstruction, ViewpointRep
+
+from vector_perception_utils.pointcloud_utils import (
+    project_pointcloud_to_image,
+    segment_pointclouds_from_projection,
+    transform_segmented_clouds_to_world,
+)
+from vector_perception_utils.transform_utils import discretize_angles
+
+from .map_object import Observation
+from .utils.map_object_utils import (
+    create_bbox_markers,
+    create_delete_markers,
+    create_label_markers,
+    create_object_pointcloud,
+    map_object_to_object_node,
+)
+from .mapper import ObjectMapper
+from .utils.storage import ImageSaveQueue
+
+
+class SemanticMappingNode(Node):
+    """ROS2 node for semantic object mapping.
+
+    This node fuses 2D detections and LiDAR point clouds into persistent 3D objects:
+    - Buffers detections, clouds, and odometry with timestamps
+    - Interpolates odometry at detection time (position + orientation)
+    - Segments point clouds by detection masks and projects to world frame
+    - Updates the object mapper and publishes ROS messages
+    - Queues cropped images for VLM queries
+    """
+
+    def __init__(self) -> None:
+        """Initialize the mapping node, parameters, buffers, and ROS I/O."""
+        super().__init__("semantic_mapping_node")
+        self._bridge = CvBridge()
+
+        self.declare_parameter("detection_topic", "/detection_result")
+        self.declare_parameter("cloud_topic", "/registered_scan")
+        self.declare_parameter("odom_topic", "/state_estimation")
+        self.declare_parameter("viewpoint_topic", "/viewpoint_rep_header")
+        self.declare_parameter("object_type_answer_topic", "/object_type_answer")
+        self.declare_parameter("object_type_query_topic", "/object_type_query")
+        self.declare_parameter("target_object_instruction_topic", "/target_object_instruction")
+        self.declare_parameter("object_nodes_topic", "/object_nodes")
+        self.declare_parameter("obj_points_topic", "/obj_points")
+        self.declare_parameter("obj_boxes_topic", "/obj_boxes")
+        self.declare_parameter("obj_labels_topic", "/obj_labels")
+        self.declare_parameter("map_frame", "map")
+
+        self.declare_parameter("confidence_threshold", 0.3)
+        self.declare_parameter("voxel_size", 0.03)
+        self.declare_parameter("regularize_voxel_size", 0.06)
+        self.declare_parameter("distance_threshold", 0.5)
+        self.declare_parameter("iou_threshold", 0.2)
+        self.declare_parameter("min_detections_for_permanent", 3)
+        self.declare_parameter("pending_ttl_s", 10.0)
+        self.declare_parameter("num_angle_bins", 15)
+
+        self.declare_parameter("detection_linear_state_time_bias", 0.0)
+        self.declare_parameter("detection_angular_state_time_bias", 0.0)
+        self.declare_parameter("cloud_window_before", 0.5)
+        self.declare_parameter("cloud_window_after", 0.1)
+
+        self.declare_parameter("projection", "equirect")
+        self.declare_parameter("axis_mode", "xz")
+        self.declare_parameter("image_width", 1920)
+        self.declare_parameter("image_height", 640)
+        self.declare_parameter("depth_filter", False)
+        self.declare_parameter("depth_filter_margin", 0.15)
+        self.declare_parameter("depth_jump_threshold", 0.3)
+        self.declare_parameter("sensor_to_camera_tf", "config/sensor_to_camera_tf.yaml")
+        self.declare_parameter("mask_erode_kernel", 3)
+        self.declare_parameter("mask_erode_iterations", 2)
+        self.declare_parameter("max_cloud_distance", 8.0)
+        self.declare_parameter("visualization_max_objects", 50)
+        self.declare_parameter("ground_filter", True)
+        self.declare_parameter("ground_radius", 2.0)
+        self.declare_parameter("ground_z_clearance", 0.0)
+
+        self.declare_parameter("target_object", "")
+        self.declare_parameter("save_png", False)
+        self.declare_parameter("output_dir", "output/object_images")
+
+        detection_topic = str(self.get_parameter("detection_topic").value)
+        cloud_topic = str(self.get_parameter("cloud_topic").value)
+        odom_topic = str(self.get_parameter("odom_topic").value)
+        viewpoint_topic = str(self.get_parameter("viewpoint_topic").value)
+        object_type_answer_topic = str(self.get_parameter("object_type_answer_topic").value)
+        object_type_query_topic = str(self.get_parameter("object_type_query_topic").value)
+        target_object_instruction_topic = str(self.get_parameter("target_object_instruction_topic").value)
+        object_nodes_topic = str(self.get_parameter("object_nodes_topic").value)
+        obj_points_topic = str(self.get_parameter("obj_points_topic").value)
+        obj_boxes_topic = str(self.get_parameter("obj_boxes_topic").value)
+        obj_labels_topic = str(self.get_parameter("obj_labels_topic").value)
+        self._map_frame = str(self.get_parameter("map_frame").value)
+
+        self._confidence_threshold = float(self.get_parameter("confidence_threshold").value)
+        self._detection_linear_bias = float(self.get_parameter("detection_linear_state_time_bias").value)
+        self._detection_angular_bias = float(self.get_parameter("detection_angular_state_time_bias").value)
+        self._cloud_window_before = float(self.get_parameter("cloud_window_before").value)
+        self._cloud_window_after = float(self.get_parameter("cloud_window_after").value)
+
+        self._projection = str(self.get_parameter("projection").value)
+        self._axis_mode = str(self.get_parameter("axis_mode").value)
+        self._image_width = int(self.get_parameter("image_width").value)
+        self._image_height = int(self.get_parameter("image_height").value)
+        self._depth_filter = bool(self.get_parameter("depth_filter").value)
+        self._depth_filter_margin = float(self.get_parameter("depth_filter_margin").value)
+        self._depth_jump_threshold = float(self.get_parameter("depth_jump_threshold").value)
+        self._sensor_to_camera = self._load_sensor_to_camera(
+            str(self.get_parameter("sensor_to_camera_tf").value)
+        )
+        self._mask_erode_kernel_size = max(1, int(self.get_parameter("mask_erode_kernel").value))
+        self._mask_erode_iterations = max(0, int(self.get_parameter("mask_erode_iterations").value))
+        self._mask_erode_kernel = cv2.getStructuringElement(
+            cv2.MORPH_RECT,
+            (self._mask_erode_kernel_size, self._mask_erode_kernel_size),
+        )
+        self._max_cloud_distance = float(self.get_parameter("max_cloud_distance").value)
+        self._visualization_max_objects = int(
+            self.get_parameter("visualization_max_objects").value
+        )
+        self._ground_filter = bool(self.get_parameter("ground_filter").value)
+        self._ground_radius = float(self.get_parameter("ground_radius").value)
+        self._ground_z_clearance = float(self.get_parameter("ground_z_clearance").value)
+        self._ground_percentile = 0.05
+        self._ground_min_points = 80
+
+        self._target_object = str(self.get_parameter("target_object").value)
+        self._save_png = bool(self.get_parameter("save_png").value)
+        self._output_dir = Path(str(self.get_parameter("output_dir").value))
+
+        self._mapper = ObjectMapper(
+            voxel_size=float(self.get_parameter("voxel_size").value),
+            regularize_voxel_size=float(self.get_parameter("regularize_voxel_size").value),
+            confidence_threshold=self._confidence_threshold,
+            distance_threshold=float(self.get_parameter("distance_threshold").value),
+            iou_threshold=float(self.get_parameter("iou_threshold").value),
+            min_detections_for_permanent=int(self.get_parameter("min_detections_for_permanent").value),
+            pending_ttl_s=float(self.get_parameter("pending_ttl_s").value),
+            num_angle_bins=int(self.get_parameter("num_angle_bins").value),
+        )
+
+        self._image_saver = ImageSaveQueue(
+            output_dir=self._output_dir,
+            max_queue_size=100,
+            save_png=self._save_png,
+        )
+        self._image_saver.start()
+
+        self._cloud_buffer: deque[tuple[float, np.ndarray]] = deque(maxlen=200)
+        self._odom_buffer: deque[tuple[float, dict]] = deque(maxlen=400)
+        self._detection_buffer: deque[DetectionResult] = deque(maxlen=100)
+        self._viewpoint_queue: deque[tuple[float, int]] = deque(maxlen=50)
+        self._processed_viewpoints: set[float] = set()
+        self._published_ids: set[int] = set()
+        self._mapping_busy = False
+
+        qos_cloud = QoSProfile(depth=10, reliability=ReliabilityPolicy.BEST_EFFORT)
+        qos_default = QoSProfile(depth=10, reliability=ReliabilityPolicy.RELIABLE)
+
+        self.create_subscription(DetectionResult, detection_topic, self._detection_callback, qos_default)
+        self.create_subscription(PointCloud2, cloud_topic, self._cloud_callback, qos_cloud)
+        self.create_subscription(Odometry, odom_topic, self._odom_callback, qos_cloud)
+        self.create_subscription(ViewpointRep, viewpoint_topic, self._viewpoint_callback, qos_default)
+        self.create_subscription(ObjectType, object_type_answer_topic, self._object_type_answer_callback, qos_default)
+        self.create_subscription(
+            TargetObjectInstruction, target_object_instruction_topic, self._target_object_callback, qos_default
+        )
+
+        self._object_type_query_pub = self.create_publisher(ObjectType, object_type_query_topic, qos_default)
+        self._object_nodes_pub = self.create_publisher(ObjectNode, object_nodes_topic, qos_default)
+        self._obj_points_pub = self.create_publisher(PointCloud2, obj_points_topic, qos_default)
+        self._obj_boxes_pub = self.create_publisher(MarkerArray, obj_boxes_topic, qos_default)
+        self._obj_labels_pub = self.create_publisher(MarkerArray, obj_labels_topic, qos_default)
+
+        self.create_timer(0.5, self._mapping_callback)
+
+    def destroy_node(self) -> bool:
+        """Stop background workers and destroy the ROS node."""
+        self._image_saver.stop()
+        return super().destroy_node()
+
+    def _detection_callback(self, msg: DetectionResult) -> None:
+        """Buffer detection results for later synchronization.
+
+        Args:
+            msg: DetectionResult message with bboxes, labels, and masks.
+        """
+        self._detection_buffer.append(msg)
+
+    def _cloud_callback(self, msg: PointCloud2) -> None:
+        """Buffer incoming point clouds with timestamps.
+
+        Args:
+            msg: PointCloud2 message in the sensor frame.
+        """
+        stamp = msg.header.stamp.sec + msg.header.stamp.nanosec / 1e9
+        points = point_cloud2.read_points_numpy(msg, field_names=("x", "y", "z"))
+        self._cloud_buffer.append((stamp, points))
+
+    def _odom_callback(self, msg: Odometry) -> None:
+        """Buffer odometry data for interpolation.
+
+        Args:
+            msg: Odometry message containing pose and twist.
+        """
+        stamp = msg.header.stamp.sec + msg.header.stamp.nanosec / 1e9
+        odom = {
+            "position": np.array(
+                [msg.pose.pose.position.x, msg.pose.pose.position.y, msg.pose.pose.position.z]
+            ),
+            "orientation": np.array(
+                [
+                    msg.pose.pose.orientation.x,
+                    msg.pose.pose.orientation.y,
+                    msg.pose.pose.orientation.z,
+                    msg.pose.pose.orientation.w,
+                ]
+            ),
+            "linear_velocity": np.array(
+                [msg.twist.twist.linear.x, msg.twist.twist.linear.y, msg.twist.twist.linear.z]
+            ),
+            "angular_velocity": np.array(
+                [msg.twist.twist.angular.x, msg.twist.twist.angular.y, msg.twist.twist.angular.z]
+            ),
+        }
+        self._odom_buffer.append((stamp, odom))
+
+    def _viewpoint_callback(self, msg: ViewpointRep) -> None:
+        """Queue viewpoint trigger requests.
+
+        Args:
+            msg: ViewpointRep message with timestamp and viewpoint id.
+        """
+        stamp = msg.header.stamp.sec + msg.header.stamp.nanosec / 1e9
+        if stamp in self._processed_viewpoints:
+            return
+        self._viewpoint_queue.append((stamp, msg.viewpoint_id))
+
+    def _target_object_callback(self, msg: TargetObjectInstruction) -> None:
+        """Update target object label used for VLM query decisions.
+
+        Args:
+            msg: TargetObjectInstruction message.
+        """
+        self._target_object = msg.target_object
+
+    def _object_type_answer_callback(self, msg: ObjectType) -> None:
+        """Apply VLM relabeling updates to tracked objects.
+
+        Args:
+            msg: ObjectType message with final label for a track id.
+        """
+        for obj in self._mapper.get_objects(include_pending=True):
+            if msg.object_id in obj.track_ids:
+                obj.class_votes[msg.final_label] = obj.class_votes.get(msg.final_label, 0) + 50
+                obj.confidence_scores[msg.final_label] = max(
+                    obj.confidence_scores.get(msg.final_label, 0.0), 1.0
+                )
+                obj.points_count[msg.final_label] = obj.points_count.get(msg.final_label, 0)
+                obj.updated_by_vlm = True
+                obj._invalidate_cache()
+                break
+
+    def _mapping_callback(self) -> None:
+        """Main mapping loop: synchronize inputs, update map, and publish outputs."""
+        if self._mapping_busy:
+            return
+        if len(self._detection_buffer) < 1 or len(self._cloud_buffer) < 1 or len(self._odom_buffer) < 2:
+            return
+
+        self._mapping_busy = True
+        start_time = time.perf_counter()
+        try:
+            detection_msg, viewpoint_stamp, viewpoint_id = self._select_detection()
+            if detection_msg is None:
+                return
+
+            detection_stamp = detection_msg.header.stamp.sec + detection_msg.header.stamp.nanosec / 1e9
+            image = self._bridge.compressed_imgmsg_to_cv2(detection_msg.image, desired_encoding="bgr8")
+
+            odom = self._interpolate_odom(detection_stamp)
+            if odom is None:
+                return
+
+            neighbor_cloud = self._collect_cloud_window(detection_stamp)
+            if neighbor_cloud is None:
+                return
+
+            observations = self._build_observations(
+                detection_msg=detection_msg,
+                image=image,
+                odom=odom,
+                cloud=neighbor_cloud,
+            )
+            if len(observations) == 0:
+                return
+
+            stats = self._mapper.update(observations, target_object=self._target_object)
+            self.get_logger().info(
+                "Mapping stats:\n"
+                f"  total: {stats.total}\n"
+                f"  deleted: {stats.deleted}"
+            )
+            self._update_observation_angles(observations)
+            self._update_best_images(observations, image)
+            self._publish_map(detection_stamp, viewpoint_id)
+            self._publish_object_type_queries()
+
+            if viewpoint_stamp is not None:
+                self._processed_viewpoints.add(viewpoint_stamp)
+        finally:
+            elapsed_ms = (time.perf_counter() - start_time) * 1000.0
+            self.get_logger().info(f"Mapping loop time: {elapsed_ms:.2f} ms")
+            self._mapping_busy = False
+
+    def _select_detection(self) -> tuple[DetectionResult | None, float | None, int]:
+        """Pick the detection to process, optionally aligned to a viewpoint.
+
+        Returns:
+            Tuple of (detection_msg, viewpoint_stamp, viewpoint_id).
+        """
+        viewpoint_stamp = None
+        viewpoint_id = -1
+
+        if len(self._viewpoint_queue) > 0:
+            stamp, vid = self._viewpoint_queue[0]
+            detections = list(self._detection_buffer)
+            stamps = [msg.header.stamp.sec + msg.header.stamp.nanosec / 1e9 for msg in detections]
+            diffs = np.abs(np.array(stamps) - stamp)
+            idx = int(np.argmin(diffs))
+            if diffs[idx] <= 1.0:
+                detection_msg = detections[idx]
+                viewpoint_stamp = stamp
+                viewpoint_id = vid
+                self._viewpoint_queue.popleft()
+                return detection_msg, viewpoint_stamp, viewpoint_id
+            self._viewpoint_queue.popleft()
+
+        idx = -2 if len(self._detection_buffer) > 1 else -1
+        return self._detection_buffer[idx], viewpoint_stamp, viewpoint_id
+
+    def _interpolate_odom(self, detection_stamp: float) -> dict | None:
+        """Interpolate odometry at the detection timestamp.
+
+        Args:
+            detection_stamp: Timestamp in seconds.
+
+        Returns:
+            Interpolated odometry dict or None if interpolation is not possible.
+        """
+        target_stamp = detection_stamp + self._detection_linear_bias
+        angular_stamp = detection_stamp + self._detection_angular_bias
+        stamps = [s for s, _ in self._odom_buffer]
+        if len(stamps) < 2:
+            return None
+
+        if target_stamp < stamps[0] or target_stamp > stamps[-1]:
+            return None
+        if angular_stamp < stamps[0] or angular_stamp > stamps[-1]:
+            return None
+
+        right_idx = next(i for i, s in enumerate(stamps) if s >= target_stamp)
+        left_idx = max(right_idx - 1, 0)
+        angular_right_idx = next(i for i, s in enumerate(stamps) if s >= angular_stamp)
+        angular_left_idx = max(angular_right_idx - 1, 0)
+
+        left_stamp, left_odom = self._odom_buffer[left_idx]
+        right_stamp, right_odom = self._odom_buffer[right_idx]
+        angular_left_stamp, angular_left_odom = self._odom_buffer[angular_left_idx]
+        angular_right_stamp, angular_right_odom = self._odom_buffer[angular_right_idx]
+        denom = right_stamp - left_stamp
+        ratio = (target_stamp - left_stamp) / denom if denom > 0 else 0.5
+        angular_denom = angular_right_stamp - angular_left_stamp
+        angular_ratio = (angular_stamp - angular_left_stamp) / angular_denom if angular_denom > 0 else 0.5
+
+        camera_odom = {}
+        camera_odom["position"] = right_odom["position"] * ratio + left_odom["position"] * (1 - ratio)
+        camera_odom["linear_velocity"] = (
+            right_odom["linear_velocity"] * ratio + left_odom["linear_velocity"] * (1 - ratio)
+        )
+        rotations = Rotation.from_quat(
+            [angular_left_odom["orientation"], angular_right_odom["orientation"]]
+        )
+        slerp = Slerp([0, 1], rotations)
+        camera_odom["orientation"] = slerp(angular_ratio).as_quat()
+        camera_odom["angular_velocity"] = (
+            angular_right_odom["angular_velocity"] * angular_ratio
+            + angular_left_odom["angular_velocity"] * (1 - angular_ratio)
+        )
+        return camera_odom
+
+    def _collect_cloud_window(self, detection_stamp: float) -> np.ndarray | None:
+        """Aggregate point clouds around a detection timestamp.
+
+        Args:
+            detection_stamp: Timestamp in seconds.
+
+        Returns:
+            Concatenated point cloud or None if no data in window.
+        """
+        start = detection_stamp - self._cloud_window_before
+        end = detection_stamp + self._cloud_window_after
+        clouds = [pts for stamp, pts in self._cloud_buffer if start <= stamp <= end]
+        if len(clouds) == 0:
+            return None
+        return np.concatenate(clouds, axis=0)
+
+    def _compute_ground_z_threshold(
+        self,
+        points: np.ndarray,
+        robot_xy: np.ndarray,
+    ) -> float | None:
+        if not self._ground_filter:
+            return None
+        if points.shape[0] < self._ground_min_points:
+            return None
+        if self._ground_radius <= 0.0:
+            return None
+        if not 0.0 < self._ground_percentile < 1.0:
+            return None
+
+        diffs = points[:, :2] - robot_xy
+        dist2 = diffs[:, 0] ** 2 + diffs[:, 1] ** 2
+        radius2 = self._ground_radius ** 2
+        mask = dist2 <= radius2
+        if np.count_nonzero(mask) < self._ground_min_points:
+            return None
+
+        z_vals = points[mask, 2]
+        z_thresh = float(np.percentile(z_vals, self._ground_percentile * 100.0))
+        return z_thresh + self._ground_z_clearance
+
+    def _build_observations(
+        self,
+        detection_msg: DetectionResult,
+        image: np.ndarray,
+        odom: dict,
+        cloud: np.ndarray,
+    ) -> list[Observation]:
+        """Build Observation objects from detections and segmented point clouds.
+
+        Args:
+            detection_msg: DetectionResult with bboxes, labels, and masks.
+            image: RGB image used for masks and cropping.
+            odom: Interpolated odometry dict (position + orientation).
+            cloud: Aggregated point cloud in sensor frame.
+
+        Returns:
+            List of Observation instances for object updates.
+        """
+        det_stamp = detection_msg.header.stamp.sec + detection_msg.header.stamp.nanosec / 1e9
+        image_shape = image.shape[:2]
+
+        masks = []
+        labels = []
+        confidences = []
+        track_ids = []
+
+        for i in range(len(detection_msg.track_id)):
+            conf = float(detection_msg.confidence[i])
+            if conf < self._confidence_threshold:
+                continue
+            x1 = int(detection_msg.x1[i])
+            y1 = int(detection_msg.y1[i])
+            x2 = int(detection_msg.x2[i])
+            y2 = int(detection_msg.y2[i])
+            x1 = max(0, min(x1, image_shape[1] - 1))
+            x2 = max(0, min(x2, image_shape[1]))
+            y1 = max(0, min(y1, image_shape[0] - 1))
+            y2 = max(0, min(y2, image_shape[0]))
+            if x2 <= x1 or y2 <= y1:
+                continue
+
+            full_mask = np.zeros(image_shape, dtype=bool)
+            if i < len(detection_msg.masks):
+                mask_msg: CompressedImage = detection_msg.masks[i]
+                mask_crop = self._bridge.compressed_imgmsg_to_cv2(mask_msg)
+                if mask_crop.ndim == 3:
+                    mask_crop = cv2.cvtColor(mask_crop, cv2.COLOR_BGR2GRAY)
+                mask_bool = mask_crop.astype(bool)
+                if self._mask_erode_iterations > 0:
+                    mask_eroded = cv2.erode(
+                        mask_bool.astype(np.uint8),
+                        self._mask_erode_kernel,
+                        iterations=self._mask_erode_iterations,
+                    )
+                    mask_bool = mask_eroded.astype(bool)
+                crop_h = min(mask_bool.shape[0], y2 - y1)
+                crop_w = min(mask_bool.shape[1], x2 - x1)
+                full_mask[y1 : y1 + crop_h, x1 : x1 + crop_w] = mask_bool[:crop_h, :crop_w]
+
+            masks.append(full_mask)
+            labels.append(detection_msg.label[i])
+            confidences.append(conf)
+            track_ids.append(int(detection_msg.track_id[i]))
+
+        if len(masks) == 0:
+            return []
+
+        R_b2w = Rotation.from_quat(odom["orientation"]).as_matrix()
+        t_b2w = np.array(odom["position"])
+        R_w2b = R_b2w.T
+        t_w2b = -R_w2b @ t_b2w
+        cloud_body = cloud @ R_w2b.T + t_w2b
+
+        _, pixel_idx = project_pointcloud_to_image(
+            points=cloud_body,
+            projection=self._projection,
+            image_width=self._image_width,
+            image_height=self._image_height,
+            axis_mode=self._axis_mode,
+            intrinsics=None,
+            depth_filter=self._depth_filter,
+            depth_filter_margin=self._depth_filter_margin,
+            transform=self._sensor_to_camera,
+        )
+        seg_clouds, seg_depths = segment_pointclouds_from_projection(
+            points=cloud_body,
+            masks=masks,
+            pixel_idx=pixel_idx,
+        )
+        if self._depth_filter:
+            filtered_clouds = []
+            filtered_depths = []
+            for cloud, depth in zip(seg_clouds, seg_depths):
+                if depth.shape[0] == 0:
+                    filtered_clouds.append(cloud)
+                    filtered_depths.append(depth)
+                    continue
+                min_depth = float(np.min(depth))
+                keep = depth[:, 0] <= (min_depth + self._depth_filter_margin)
+                filtered_clouds.append(cloud[keep])
+                filtered_depths.append(depth[keep])
+            seg_clouds = filtered_clouds
+            seg_depths = filtered_depths
+        world_clouds = transform_segmented_clouds_to_world(
+            clouds=seg_clouds,
+            depths=seg_depths,
+            R_b2w=R_b2w,
+            t_b2w=t_b2w,
+            depth_jump_threshold=self._depth_jump_threshold,
+        )
+        cloud_world = cloud_body @ R_b2w.T + t_b2w
+        ground_z_threshold = self._compute_ground_z_threshold(cloud_world, t_b2w[:2])
+
+        observations = []
+        for idx, world_cloud in enumerate(world_clouds):
+            if self._max_cloud_distance > 0:
+                dist = np.linalg.norm(world_cloud - t_b2w, axis=1)
+                world_cloud = world_cloud[dist < self._max_cloud_distance]
+            if ground_z_threshold is not None:
+                world_cloud = world_cloud[world_cloud[:, 2] > ground_z_threshold]
+            if world_cloud.shape[0] == 0:
+                continue
+            observations.append(
+                Observation(
+                    track_id=track_ids[idx],
+                    label=labels[idx],
+                    confidence=confidences[idx],
+                    points=world_cloud,
+                    robot_R=R_b2w,
+                    robot_t=t_b2w,
+                    mask=masks[idx],
+                    image=image,
+                    timestamp=det_stamp,
+                )
+            )
+
+        return observations
+
+    def _update_observation_angles(self, observations: list[Observation]) -> None:
+        """Reproject object voxels onto the current mask to update angle votes."""
+        if len(observations) == 0:
+            return
+
+        objects = self._mapper.get_objects(include_pending=True)
+        track_to_obj = {}
+        for obj in objects:
+            for track_id in obj.track_ids:
+                track_to_obj[track_id] = obj
+
+        for obs in observations:
+            obj = track_to_obj.get(obs.track_id)
+            if obj is None or obs.mask is None:
+                continue
+            voxels = obj.voxel_manager.voxels
+            if voxels.shape[0] == 0:
+                continue
+
+            R_b2w = obs.robot_R
+            t_b2w = obs.robot_t
+            R_w2b = R_b2w.T
+            t_w2b = -R_w2b @ t_b2w
+            voxels_body = voxels @ R_w2b.T + t_w2b
+            _, pixel_idx = project_pointcloud_to_image(
+                points=voxels_body,
+                projection=self._projection,
+                image_width=self._image_width,
+                image_height=self._image_height,
+                axis_mode=self._axis_mode,
+                intrinsics=None,
+                depth_filter=self._depth_filter,
+                depth_filter_margin=self._depth_filter_margin,
+                transform=self._sensor_to_camera,
+            )
+            pix = pixel_idx.astype(int)
+            valid = (
+                (pix[:, 0] >= 0)
+                & (pix[:, 0] < obs.mask.shape[1])
+                & (pix[:, 1] >= 0)
+                & (pix[:, 1] < obs.mask.shape[0])
+            )
+            if not np.any(valid):
+                continue
+            pix = pix[valid]
+            voxel_indices = np.nonzero(valid)[0]
+            on_mask = obs.mask[pix[:, 1], pix[:, 0]].astype(bool)
+            if not np.any(on_mask):
+                continue
+            voxel_indices = voxel_indices[on_mask]
+            vectors = voxels[voxel_indices] - t_b2w
+            obs_angles = np.arctan2(vectors[:, 1], vectors[:, 0])
+            obs_bins = discretize_angles(obs_angles, obj.voxel_manager.num_angle_bins)
+            obj.voxel_manager.update_observation_angles(voxel_indices, obs_bins)
+            obj._invalidate_cache()
+
+    def _update_best_images(self, observations: list[Observation], image: np.ndarray) -> None:
+        """Update best image crops for objects based on mask area.
+
+        Args:
+            observations: Observations created for the current frame.
+            image: Full RGB image associated with the detections.
+        """
+        for obs in observations:
+            obj = None
+            for candidate in self._mapper.get_objects(include_pending=True):
+                if obs.track_id in candidate.track_ids:
+                    obj = candidate
+                    break
+            if obj is None or obs.mask is None:
+                continue
+
+            coords = np.argwhere(obs.mask)
+            if coords.size == 0:
+                continue
+            mask_area = coords.shape[0]
+            if mask_area <= obj.best_image_score:
+                continue
+
+            y0, x0 = coords.min(axis=0)
+            y1, x1 = coords.max(axis=0) + 1
+            padding = max(int(y1 - y0), int(x1 - x0), 80)
+            y0 = max(0, y0 - padding)
+            x0 = max(0, x0 - padding)
+            y1 = min(obs.mask.shape[0], y1 + padding)
+            x1 = min(obs.mask.shape[1], x1 + padding)
+
+            if obj.best_image_path:
+                image_path = Path(obj.best_image_path)
+                mask_path = image_path.with_name(image_path.stem + "_mask.npy")
+                if image_path.exists():
+                    image_path.unlink()
+                if mask_path.exists():
+                    mask_path.unlink()
+                if self._save_png:
+                    png_path = image_path.with_suffix(".png")
+                    mask_png_path = image_path.with_name(image_path.stem + "_mask.png")
+                    if png_path.exists():
+                        png_path.unlink()
+                    if mask_png_path.exists():
+                        mask_png_path.unlink()
+
+            cropped_image = image[y0:y1, x0:x1]
+            cropped_mask = obs.mask[y0:y1, x0:x1]
+
+            obj.best_image_path = self._image_saver.queue_save(
+                object_id=str(obs.track_id),
+                image=cropped_image,
+                mask=cropped_mask,
+            )
+            obj.best_image_score = float(mask_area)
+            obj.is_asked_vlm = False
+
+    def _publish_map(self, stamp: float, viewpoint_id: int) -> None:
+        """Publish object nodes, markers, and aggregated point clouds.
+
+        Args:
+            stamp: Timestamp (seconds) for message headers.
+            viewpoint_id: Viewpoint id for ObjectNode messages.
+        """
+        seconds = int(stamp)
+        nanoseconds = int((stamp - seconds) * 1e9)
+        header = Header()
+        header.stamp.sec = seconds
+        header.stamp.nanosec = nanoseconds
+        header.frame_id = self._map_frame
+
+        objects = self._mapper.get_objects(include_pending=True)
+        for obj in objects:
+            if obj.detection_count == 1:
+                status = "new"
+            elif obj.inactive_frames == 0:
+                status = "updated"
+            else:
+                status = "unchanged"
+            msg = map_object_to_object_node(
+                obj,
+                header,
+                status=status,
+                viewpoint_id=viewpoint_id,
+                target_object=self._target_object,
+            )
+            if msg is not None:
+                self._object_nodes_pub.publish(msg)
+
+        viz_objects = objects
+        if self._visualization_max_objects > 0 and len(objects) > self._visualization_max_objects:
+            viz_objects = sorted(
+                objects, key=lambda obj: obj.last_update_time, reverse=True
+            )[: self._visualization_max_objects]
+
+        bbox_markers = create_bbox_markers(viz_objects, header)
+        label_markers = create_label_markers(viz_objects, header)
+        self._obj_boxes_pub.publish(bbox_markers)
+        self._obj_labels_pub.publish(label_markers)
+
+        obj_cloud = create_object_pointcloud(viz_objects, header)
+        if obj_cloud is not None:
+            self._obj_points_pub.publish(obj_cloud)
+
+        current_ids = {int(obj.track_ids[0]) if obj.track_ids else 0 for obj in viz_objects}
+        deleted_ids = list(self._published_ids - current_ids)
+        if len(deleted_ids) > 0:
+            delete_markers = create_delete_markers(deleted_ids, header, namespace="bbox")
+            self._obj_boxes_pub.publish(delete_markers)
+            delete_labels = create_delete_markers(deleted_ids, header, namespace="labels")
+            self._obj_labels_pub.publish(delete_labels)
+        self._published_ids = current_ids
+
+    def _publish_object_type_queries(self) -> None:
+        """Publish VLM queries for target objects with good image crops."""
+        for obj in self._mapper.get_objects(include_pending=False):
+            if obj.label != self._target_object:
+                continue
+            if obj.is_asked_vlm:
+                continue
+            if obj.best_image_score <= 500:
+                continue
+            msg = ObjectType()
+            msg.object_id = int(obj.track_ids[0]) if obj.track_ids else -1
+            msg.img_path = obj.best_image_path or ""
+            msg.labels = list(obj.class_votes.keys())
+            self._object_type_query_pub.publish(msg)
+            obj.is_asked_vlm = True
+
+    def _load_sensor_to_camera(self, path: str) -> np.ndarray | None:
+        """Load sensor->camera transform from YAML.
+
+        Args:
+            path: Path to YAML config with translation and rpy.
+
+        Returns:
+            4x4 transform matrix or None if config not found.
+        """
+        config_path = Path(path)
+        if not config_path.is_absolute():
+            config_path = Path(__file__).resolve().parent.parent / config_path
+        if not config_path.exists():
+            return None
+
+        data = yaml.safe_load(config_path.read_text()) or {}
+        tf_data = data.get("sensor_to_camera", {})
+        trans = tf_data.get("translation", {})
+        rpy = tf_data.get("rotation_rpy", {})
+        cam_offset = np.array(
+            [
+                float(trans.get("x", 0.0)),
+                float(trans.get("y", 0.0)),
+                float(trans.get("z", 0.0)),
+            ]
+        )
+        roll = float(rpy.get("roll", 0.0))
+        pitch = float(rpy.get("pitch", 0.0))
+        yaw = float(rpy.get("yaw", 0.0))
+        cos_roll = np.cos(roll)
+        sin_roll = np.sin(roll)
+        cos_pitch = np.cos(pitch)
+        sin_pitch = np.sin(pitch)
+        cos_yaw = np.cos(yaw)
+        sin_yaw = np.sin(yaw)
+        r_z = np.array(
+            [
+                [cos_yaw, -sin_yaw, 0.0],
+                [sin_yaw, cos_yaw, 0.0],
+                [0.0, 0.0, 1.0],
+            ]
+        )
+        r_y = np.array(
+            [
+                [cos_pitch, 0.0, sin_pitch],
+                [0.0, 1.0, 0.0],
+                [-sin_pitch, 0.0, cos_pitch],
+            ]
+        )
+        r_x = np.array(
+            [
+                [1.0, 0.0, 0.0],
+                [0.0, cos_roll, -sin_roll],
+                [0.0, sin_roll, cos_roll],
+            ]
+        )
+        cam_rotation = r_z @ r_y @ r_x
+        rotation = cam_rotation.T
+        translation = -rotation @ cam_offset
+        transform = np.eye(4)
+        transform[:3, :3] = rotation
+        transform[:3, 3] = translation
+        return transform
+
+
+def main(args: list[str] | None = None) -> None:
+    """Entry point for the semantic mapping node."""
+    rclpy.init(args=args)
+    node = SemanticMappingNode()
+    try:
+        rclpy.spin(node)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        node.destroy_node()
+        rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/semantic_mapping/semantic_mapping/utils/__init__.py b/semantic_mapping/semantic_mapping/utils/__init__.py
new file mode 100644
index 0000000..d949aeb
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/utils/__init__.py
@@ -0,0 +1 @@
+"""Utility modules for semantic mapping."""
diff --git a/semantic_mapping/semantic_mapping/utils/map_object_utils.py b/semantic_mapping/semantic_mapping/utils/map_object_utils.py
new file mode 100644
index 0000000..5ad4163
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/utils/map_object_utils.py
@@ -0,0 +1,264 @@
+"""ROS message helpers for MapObject publishers."""
+
+from __future__ import annotations
+
+import zlib
+from typing import Optional
+
+import numpy as np
+from geometry_msgs.msg import Point
+from sensor_msgs.msg import PointCloud2
+from std_msgs.msg import Header
+from visualization_msgs.msg import Marker, MarkerArray
+
+from semantic_mapping.msg import ObjectNode
+from vector_perception_utils.pointcloud_utils import create_pointcloud2_msg
+
+from ..map_object import MapObject
+
+
+def _axis_aligned_corners(points: np.ndarray) -> Optional[np.ndarray]:
+    if points.size == 0:
+        return None
+    min_bound = np.min(points, axis=0)
+    max_bound = np.max(points, axis=0)
+    return np.array(
+        [
+            [min_bound[0], min_bound[1], min_bound[2]],
+            [max_bound[0], min_bound[1], min_bound[2]],
+            [max_bound[0], max_bound[1], min_bound[2]],
+            [min_bound[0], max_bound[1], min_bound[2]],
+            [min_bound[0], min_bound[1], max_bound[2]],
+            [max_bound[0], min_bound[1], max_bound[2]],
+            [max_bound[0], max_bound[1], max_bound[2]],
+            [min_bound[0], max_bound[1], max_bound[2]],
+        ],
+        dtype=float,
+    )
+
+
+def get_object_seed(obj: MapObject) -> int:
+    """Generate a deterministic seed from a MapObject for color generation.
+    
+    Args:
+        obj: MapObject instance.
+        
+    Returns:
+        CRC32 hash of the object's identifier.
+    """
+    seed_key = obj.object_id or (str(obj.track_ids[0]) if obj.track_ids else obj.label)
+    return zlib.crc32(seed_key.encode("utf-8"))
+
+
+def map_object_to_object_node(
+    obj: MapObject,
+    header: Header,
+    status: str = "new",
+    viewpoint_id: int = -1,
+    target_object: str | None = None,
+) -> Optional[ObjectNode]:
+    """Convert a MapObject into an ObjectNode message.
+
+    Args:
+        obj: MapObject instance.
+        header: ROS header to attach to the message.
+        status: Lifecycle status string ("new", "updated", "unchanged").
+        viewpoint_id: Viewpoint identifier for image saving.
+        target_object: Target object label for potential labeling.
+
+    Returns:
+        ObjectNode message or None if required geometry is missing.
+    """
+    if obj.centroid is None or obj.bbox3d_corners is None:
+        return None
+
+    msg = ObjectNode()
+    msg.header = header
+    msg.object_id = obj.track_ids
+    if target_object and obj.label == target_object and not obj.is_asked_vlm:
+        msg.label = "Potential Target"
+    else:
+        msg.label = obj.label
+    msg.status = status
+    msg.position.x = float(obj.centroid[0])
+    msg.position.y = float(obj.centroid[1])
+    msg.position.z = float(obj.centroid[2])
+
+    for i in range(8):
+        msg.bbox3d[i].x = float(obj.bbox3d_corners[i, 0])
+        msg.bbox3d[i].y = float(obj.bbox3d_corners[i, 1])
+        msg.bbox3d[i].z = float(obj.bbox3d_corners[i, 2])
+
+    voxels = obj.voxel_manager.get_valid_voxels(
+        diversity_percentile=obj._diversity_percentile,
+        use_regularized=True,
+    )
+    if len(voxels) == 0:
+        voxels = obj.voxel_manager.get_valid_voxels(
+            diversity_percentile=obj._diversity_percentile,
+            use_regularized=False,
+        )
+    if len(voxels) > 0:
+        seed = get_object_seed(obj)
+        color = np.array(
+            [
+                (seed & 0xFF) / 255.0,
+                ((seed >> 8) & 0xFF) / 255.0,
+                ((seed >> 16) & 0xFF) / 255.0,
+            ],
+            dtype=np.float32,
+        )
+        colors = np.tile(color, (voxels.shape[0], 1))
+        msg.cloud = create_pointcloud2_msg(points=voxels, colors=colors, header=header)
+
+    msg.img_path = obj.best_image_path or ""
+    msg.is_asked_vlm = obj.is_asked_vlm
+    msg.viewpoint_id = -1 if obj.updated_by_vlm else viewpoint_id
+    return msg
+
+
+def create_bbox_markers(
+    objects: list[MapObject], header: Header, namespace: str = "bbox"
+) -> MarkerArray:
+    """Create wireframe bounding box markers for MapObjects."""
+    markers = MarkerArray()
+    edges = [
+        (0, 1),
+        (1, 2),
+        (2, 3),
+        (3, 0),
+        (4, 5),
+        (5, 6),
+        (6, 7),
+        (7, 4),
+        (0, 4),
+        (1, 5),
+        (2, 6),
+        (3, 7),
+    ]
+
+    for obj in objects:
+        voxels = obj.voxel_manager.get_valid_voxels(
+            diversity_percentile=obj._diversity_percentile,
+            use_regularized=True,
+        )
+        if len(voxels) == 0:
+            voxels = obj.voxel_manager.get_valid_voxels(
+                diversity_percentile=obj._diversity_percentile,
+                use_regularized=False,
+            )
+        corners = _axis_aligned_corners(voxels) if len(voxels) > 0 else None
+        if corners is None:
+            continue
+        marker = Marker()
+        marker.header = header
+        marker.ns = namespace
+        marker.id = int(obj.track_ids[0]) if obj.track_ids else 0
+        marker.type = Marker.LINE_LIST
+        marker.action = Marker.ADD
+        marker.scale.x = 0.02
+
+        seed = get_object_seed(obj)
+        marker.color.r = (seed & 0xFF) / 255.0
+        marker.color.g = ((seed >> 8) & 0xFF) / 255.0
+        marker.color.b = ((seed >> 16) & 0xFF) / 255.0
+        marker.color.a = 1.0
+
+        for i, j in edges:
+            marker.points.append(Point(x=float(corners[i, 0]), y=float(corners[i, 1]), z=float(corners[i, 2])))
+            marker.points.append(Point(x=float(corners[j, 0]), y=float(corners[j, 1]), z=float(corners[j, 2])))
+
+        markers.markers.append(marker)
+
+    return markers
+
+
+def create_label_markers(
+    objects: list[MapObject],
+    header: Header,
+    namespace: str = "labels",
+    text_height: float = 0.4,
+) -> MarkerArray:
+    """Create text label markers for MapObjects."""
+    markers = MarkerArray()
+
+    for obj in objects:
+        if obj.centroid is None:
+            continue
+        marker = Marker()
+        marker.header = header
+        marker.ns = namespace
+        marker.id = int(obj.track_ids[0]) if obj.track_ids else 0
+        marker.type = Marker.TEXT_VIEW_FACING
+        marker.action = Marker.ADD
+        marker.scale.z = text_height
+        marker.text = obj.label
+        marker.pose.position.x = float(obj.centroid[0])
+        marker.pose.position.y = float(obj.centroid[1])
+        marker.pose.position.z = float(obj.centroid[2]) + 0.3
+
+        seed = get_object_seed(obj)
+        marker.color.r = (seed & 0xFF) / 255.0
+        marker.color.g = ((seed >> 8) & 0xFF) / 255.0
+        marker.color.b = ((seed >> 16) & 0xFF) / 255.0
+        marker.color.a = 1.0
+
+        markers.markers.append(marker)
+
+    return markers
+
+
+def create_object_pointcloud(objects: list[MapObject], header: Header) -> Optional[PointCloud2]:
+    """Create a colored point cloud for a list of MapObjects."""
+    points_list = []
+    colors_list = []
+
+    for obj in objects:
+        voxels = obj.voxel_manager.get_valid_voxels(
+            diversity_percentile=obj._diversity_percentile,
+            use_regularized=True,
+        )
+        if len(voxels) == 0:
+            voxels = obj.voxel_manager.get_valid_voxels(
+                diversity_percentile=obj._diversity_percentile,
+                use_regularized=False,
+            )
+        if len(voxels) == 0:
+            continue
+
+        seed = get_object_seed(obj)
+        color = np.array(
+            [
+                (seed & 0xFF) / 255.0,
+                ((seed >> 8) & 0xFF) / 255.0,
+                ((seed >> 16) & 0xFF) / 255.0,
+            ],
+            dtype=np.float32,
+        )
+        colors = np.tile(color, (voxels.shape[0], 1))
+        points_list.append(voxels)
+        colors_list.append(colors)
+
+    if len(points_list) == 0:
+        return None
+
+    points = np.concatenate(points_list, axis=0)
+    colors = np.concatenate(colors_list, axis=0)
+    return create_pointcloud2_msg(points=points, colors=colors, header=header)
+
+
+def create_delete_markers(
+    deleted_ids: list[int],
+    header: Header,
+    namespace: str = "bbox",
+) -> MarkerArray:
+    """Create deletion markers for a list of marker IDs."""
+    markers = MarkerArray()
+    for marker_id in deleted_ids:
+        marker = Marker()
+        marker.header = header
+        marker.ns = namespace
+        marker.id = int(marker_id)
+        marker.action = Marker.DELETE
+        markers.markers.append(marker)
+    return markers
diff --git a/semantic_mapping/semantic_mapping/utils/matching.py b/semantic_mapping/semantic_mapping/utils/matching.py
new file mode 100644
index 0000000..e5aaa19
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/utils/matching.py
@@ -0,0 +1,256 @@
+"""Object matching utilities for semantic mapping."""
+
+from __future__ import annotations
+
+from typing import Optional
+
+import cv2
+import numpy as np
+
+from vector_perception_utils.pointcloud_utils import compute_centroid
+
+from ..map_object import MapObject, Observation
+
+
+def _polygon_area(poly: np.ndarray) -> float:
+    """Compute polygon area using the shoelace formula.
+
+    Args:
+        poly: Polygon vertices in order (N, 2).
+
+    Returns:
+        Polygon area.
+    """
+    if poly.shape[0] < 3:
+        return 0.0
+    x = poly[:, 0]
+    y = poly[:, 1]
+    return 0.5 * abs(float(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1))))
+
+
+def _ensure_ccw(poly: np.ndarray) -> np.ndarray:
+    """Ensure polygon vertices are ordered counter-clockwise."""
+    if poly.shape[0] < 3:
+        return poly
+    x = poly[:, 0]
+    y = poly[:, 1]
+    signed = float(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1)))
+    if signed < 0:
+        return poly[::-1]
+    return poly
+
+
+def compute_bbox(points: np.ndarray) -> Optional[tuple[np.ndarray, np.ndarray]]:
+    """Compute yaw-only oriented bbox corners + extents."""
+    if points.shape[0] < 3:
+        return None
+
+    xy = points[:, :2].astype(np.float32)
+    unique_xy = np.unique(xy, axis=0)
+    if unique_xy.shape[0] < 3:
+        return None
+
+    rect = cv2.minAreaRect(unique_xy)
+    (width, height) = rect[1]
+    if width == 0.0 or height == 0.0:
+        return None
+
+    box = cv2.boxPoints(rect).astype(np.float64)
+    rect_xy = _ensure_ccw(box)
+
+    zmin = float(np.min(points[:, 2]))
+    zmax = float(np.max(points[:, 2]))
+    extent = np.array([width, height, zmax - zmin], dtype=float)
+    z_center = zmax - extent[2] / 2.0
+    half_z = extent[2] / 2.0
+
+    bottom = np.hstack([rect_xy, np.full((4, 1), z_center - half_z)])
+    top = np.hstack([rect_xy, np.full((4, 1), z_center + half_z)])
+    corners = np.vstack([bottom, top])
+    return corners, extent
+
+
+def _line_intersection(p1: np.ndarray, p2: np.ndarray, cp1: np.ndarray, cp2: np.ndarray) -> np.ndarray:
+    """Intersection of line p1->p2 with line cp1->cp2.
+
+    Args:
+        p1: Line segment start (2,).
+        p2: Line segment end (2,).
+        cp1: Clip edge start (2,).
+        cp2: Clip edge end (2,).
+
+    Returns:
+        Intersection point (2,).
+    """
+    dc = cp1 - cp2
+    dp = p1 - p2
+    denom = dc[0] * dp[1] - dc[1] * dp[0]
+    if denom == 0:
+        return p2
+    n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
+    n2 = p1[0] * p2[1] - p1[1] * p2[0]
+    x = (n1 * dp[0] - n2 * dc[0]) / denom
+    y = (n1 * dp[1] - n2 * dc[1]) / denom
+    return np.array([x, y], dtype=np.float64)
+
+
+def _polygon_clip(subject: np.ndarray, clip: np.ndarray) -> np.ndarray:
+    """Clip a polygon with a convex clip polygon (Sutherland–Hodgman).
+
+    Args:
+        subject: Subject polygon vertices (N, 2).
+        clip: Clip polygon vertices (M, 2).
+
+    Returns:
+        Clipped polygon vertices (K, 2).
+    """
+    if subject.shape[0] == 0:
+        return subject
+
+    def inside(p: np.ndarray, cp1: np.ndarray, cp2: np.ndarray) -> bool:
+        return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) - (cp2[1] - cp1[1]) * (
+            p[0] - cp1[0]
+        ) >= 0
+
+    output = subject
+    for i in range(len(clip)):
+        cp1 = clip[i]
+        cp2 = clip[(i + 1) % len(clip)]
+        input_list = output
+        output = []
+        if len(input_list) == 0:
+            return np.empty((0, 2), dtype=np.float64)
+        s = input_list[-1]
+        for e in input_list:
+            if inside(e, cp1, cp2):
+                if not inside(s, cp1, cp2):
+                    output.append(_line_intersection(s, e, cp1, cp2))
+                output.append(e)
+            elif inside(s, cp1, cp2):
+                output.append(_line_intersection(s, e, cp1, cp2))
+            s = e
+        output = np.array(output, dtype=np.float64)
+    return np.array(output, dtype=np.float64)
+
+
+def compute_iou_3d_with_ratios(
+    bbox1: np.ndarray, bbox2: np.ndarray
+) -> tuple[float, float, float]:
+    """Compute IoU and intersection ratios using yaw-only bbox overlap.
+
+    Args:
+        bbox1: Corner points for bbox1 (N, 3).
+        bbox2: Corner points for bbox2 (N, 3).
+
+    Returns:
+        Tuple of (iou, ratio_bbox1, ratio_bbox2).
+    """
+    if bbox1.shape[0] < 4 or bbox2.shape[0] < 4:
+        return 0.0, 0.0, 0.0
+
+    rect1 = _ensure_ccw(bbox1[:4, :2])
+    rect2 = _ensure_ccw(bbox2[:4, :2])
+    if rect1.shape[0] < 3 or rect2.shape[0] < 3:
+        return 0.0, 0.0, 0.0
+
+    poly_inter = _polygon_clip(rect1, rect2)
+    inter_area = _polygon_area(poly_inter) if poly_inter.shape[0] >= 3 else 0.0
+
+    zmin1, zmax1 = float(np.min(bbox1[:, 2])), float(np.max(bbox1[:, 2]))
+    zmin2, zmax2 = float(np.min(bbox2[:, 2])), float(np.max(bbox2[:, 2]))
+    height = max(0.0, min(zmax1, zmax2) - max(zmin1, zmin2))
+    inter_vol = inter_area * height
+
+    area1 = _polygon_area(rect1)
+    area2 = _polygon_area(rect2)
+    vol1 = area1 * (zmax1 - zmin1)
+    vol2 = area2 * (zmax2 - zmin2)
+    denom = vol1 + vol2 - inter_vol
+
+    iou = inter_vol / denom if denom > 0 else 0.0
+    ratio1 = inter_vol / vol1 if vol1 > 0 else 0.0
+    ratio2 = inter_vol / vol2 if vol2 > 0 else 0.0
+    return iou, ratio1, ratio2
+
+
+def find_best_match(
+    obs: Observation,
+    candidates: list[MapObject],
+    distance_threshold: float,
+    iou_threshold: float,
+    ratio_threshold: float = 0.4,
+    min_distance_merge: float = 0.25,
+) -> Optional[MapObject]:
+    """Find best matching object by distance/IoU heuristics.
+
+    Args:
+        obs: Observation to match.
+        candidates: Candidate map objects.
+        distance_threshold: Max centroid distance to consider.
+        iou_threshold: Min IoU to accept a merge.
+        ratio_threshold: Min overlap ratio to accept a merge.
+        min_distance_merge: Absolute distance threshold for merge.
+
+    Returns:
+        Best matching object or None if no match passes thresholds.
+    """
+    if obs.points.shape[0] == 0:
+        return None
+
+    obs_center = compute_centroid(obs.points)
+    obs_bbox = compute_bbox(obs.points)
+    obs_corners = obs_bbox[0] if obs_bbox is not None else None
+    obs_extent = obs_bbox[1] if obs_bbox is not None else None
+
+    best_candidate: Optional[MapObject] = None
+    best_dist = float("inf")
+
+    for candidate in candidates:
+        if obs.track_id in candidate.track_ids:
+            return candidate
+
+        cand_center = candidate.centroid
+        if cand_center is None:
+            continue
+
+        dist = float(np.linalg.norm(obs_center - cand_center))
+        if dist > distance_threshold:
+            continue
+
+        cand_points = candidate.voxel_manager.get_valid_voxels(
+            diversity_percentile=candidate._diversity_percentile,
+            use_regularized=True,
+        )
+        if len(cand_points) == 0:
+            cand_points = candidate.voxel_manager.get_valid_voxels(
+                diversity_percentile=candidate._diversity_percentile,
+                use_regularized=False,
+            )
+        cand_bbox = compute_bbox(cand_points) if len(cand_points) >= 3 else None
+        cand_corners = cand_bbox[0] if cand_bbox is not None else None
+        cand_extent = cand_bbox[1] if cand_bbox is not None else None
+
+        dist_merge = dist < min_distance_merge
+        if obs_extent is not None and cand_extent is not None:
+            dist_thresh = np.linalg.norm((obs_extent / 2 + cand_extent / 2) / 2) * 0.5
+            dist_merge = dist_merge or dist < dist_thresh
+
+        if dist_merge:
+            if dist < best_dist:
+                best_candidate = candidate
+                best_dist = dist
+            continue
+
+        if obs_corners is None or cand_corners is None:
+            continue
+
+        iou, ratio_obs, ratio_cand = compute_iou_3d_with_ratios(
+            obs_corners, cand_corners
+        )
+
+        if iou > iou_threshold or ratio_obs > ratio_threshold or ratio_cand > ratio_threshold:
+            if dist < best_dist:
+                best_candidate = candidate
+                best_dist = dist
+
+    return best_candidate
diff --git a/semantic_mapping/semantic_mapping/utils/storage.py b/semantic_mapping/semantic_mapping/utils/storage.py
new file mode 100644
index 0000000..d2be3cc
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/utils/storage.py
@@ -0,0 +1,90 @@
+"""Async image saving queue for semantic mapping."""
+
+from __future__ import annotations
+
+from pathlib import Path
+import shutil
+import queue
+import threading
+import time
+
+import cv2
+import numpy as np
+
+
+class ImageSaveQueue:
+    """Background worker for saving object images to disk."""
+
+    def __init__(self, output_dir: Path, max_queue_size: int = 100, save_png: bool = False) -> None:
+        """Initialize the image save queue.
+
+        Args:
+            output_dir: Directory where images are stored.
+            max_queue_size: Maximum number of queued save jobs.
+            save_png: If True, also save a .png copy of the image.
+        """
+        self._output_dir = Path(output_dir)
+        if self._output_dir.exists() and any(self._output_dir.iterdir()):
+            shutil.rmtree(self._output_dir)
+        self._output_dir.mkdir(parents=True, exist_ok=True)
+        self._save_png = save_png
+        self._queue: queue.Queue = queue.Queue(maxsize=max_queue_size)
+        self._stop_event = threading.Event()
+        self._thread: threading.Thread | None = None
+
+    def start(self) -> None:
+        """Start the background worker."""
+        if self._thread is not None and self._thread.is_alive():
+            return
+        self._stop_event.clear()
+        self._thread = threading.Thread(target=self._run, daemon=True)
+        self._thread.start()
+
+    def stop(self) -> None:
+        """Stop the background worker."""
+        self._stop_event.set()
+        try:
+            self._queue.put_nowait(None)
+        except queue.Full:
+            pass
+        if self._thread is not None:
+            self._thread.join(timeout=2.0)
+            self._thread = None
+
+    def queue_save(
+        self, object_id: str, image: np.ndarray, mask: np.ndarray | None = None
+    ) -> str:
+        """Queue an image (and optional mask) for saving.
+
+        Args:
+            object_id: Object identifier to embed in filename.
+            image: Image array to save.
+            mask: Optional mask array to save alongside the image.
+
+        Returns:
+            Path to the saved image (.npy).
+        """
+        timestamp_ms = int(time.time() * 1000)
+        image_path = self._output_dir / f"{object_id}_{timestamp_ms}.npy"
+        mask_path = self._output_dir / f"{object_id}_{timestamp_ms}_mask.npy"
+        self._queue.put(("save", str(image_path), str(mask_path), image, mask))
+        return str(image_path)
+
+    def _run(self) -> None:
+        while not self._stop_event.is_set():
+            item = self._queue.get()
+            if item is None:
+                self._queue.task_done()
+                break
+            op, image_path, mask_path, image, mask = item
+            if op == "save":
+                try:
+                    np.save(image_path, image)
+                    if mask is not None:
+                        np.save(mask_path, mask)
+                    if self._save_png:
+                        png_path = Path(image_path).with_suffix(".png")
+                        cv2.imwrite(str(png_path), image)
+                except Exception:
+                    pass
+            self._queue.task_done()
diff --git a/vector_perception_utils/setup.cfg b/vector_perception_utils/setup.cfg
deleted file mode 100644
index 1ce0c22..0000000
--- a/vector_perception_utils/setup.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-[develop]
-script_dir=$base/lib/vector_perception_utils
-[install]
-install_scripts=$base/lib/vector_perception_utils
-
diff --git a/vector_perception_utils/vector_perception_utils/pointcloud_utils.py b/vector_perception_utils/vector_perception_utils/pointcloud_utils.py
index 835db76..e86678d 100644
--- a/vector_perception_utils/vector_perception_utils/pointcloud_utils.py
+++ b/vector_perception_utils/vector_perception_utils/pointcloud_utils.py
@@ -148,6 +148,75 @@ def remove_radius_outliers(
     return points_filtered, colors_filtered, inlier_indices
 
 
+def fit_plane(
+    points: np.ndarray,
+    distance_threshold: float = 0.05,
+    ransac_n: int = 3,
+    num_iterations: int = 50,
+    max_points: int = 8000,
+    min_inlier_ratio: float = 0.0,
+    min_normal_z: float = 0.0,
+    min_above_ratio: float = 0.0,
+    prefer_up: bool = True,
+) -> Optional[np.ndarray]:
+    """Fit a plane to a point cloud using RANSAC.
+
+    Args:
+        points: Point coordinates (N, 3).
+        distance_threshold: Max distance for inliers during RANSAC.
+        ransac_n: Number of points to sample per RANSAC iteration.
+        num_iterations: RANSAC iterations.
+        max_points: Max points to sample for fitting (0 keeps all).
+        min_inlier_ratio: Minimum inlier ratio to accept the plane.
+        min_normal_z: Minimum absolute Z component of the plane normal.
+        min_above_ratio: Minimum ratio of points above the plane.
+        prefer_up: If True, flip normal to have positive Z.
+
+    Returns:
+        Plane coefficients [a, b, c, d] for ax + by + cz + d = 0, or None.
+    """
+    if points.shape[0] < 3:
+        return None
+
+    sample_points = points
+    if max_points > 0 and points.shape[0] > max_points:
+        indices = np.random.choice(points.shape[0], max_points, replace=False)
+        sample_points = points[indices]
+
+    pcd = create_pointcloud(sample_points)
+    plane_model, inliers = pcd.segment_plane(
+        distance_threshold=distance_threshold,
+        ransac_n=ransac_n,
+        num_iterations=num_iterations,
+    )
+    if len(inliers) == 0:
+        return None
+    if min_inlier_ratio > 0.0 and len(inliers) / sample_points.shape[0] < min_inlier_ratio:
+        return None
+
+    normal = np.array(plane_model[:3], dtype=float)
+    norm = float(np.linalg.norm(normal))
+    if norm == 0.0:
+        return None
+    normal /= norm
+    d = float(plane_model[3]) / norm
+
+    if min_normal_z > 0.0 and abs(normal[2]) < min_normal_z:
+        return None
+    if prefer_up and normal[2] < 0:
+        normal = -normal
+        d = -d
+
+    if min_above_ratio > 0.0:
+        check_points = sample_points
+        distances = check_points @ normal + d
+        above_ratio = float(np.mean(distances > 0.0))
+        if above_ratio < min_above_ratio:
+            return None
+
+    return np.array([normal[0], normal[1], normal[2], d], dtype=float)
+
+
 def transform_pointcloud(
     points: np.ndarray,
     transform: np.ndarray
@@ -199,42 +268,126 @@ def crop_pointcloud_bbox(
 def compute_centroid(points: np.ndarray) -> np.ndarray:
     """
     Compute centroid of point cloud.
-    
+
     Args:
         points: Point coordinates (N, 3)
-        
+
     Returns:
         Centroid as (3,) array
     """
     return np.mean(points, axis=0)
 
+def get_bbox_center(bbox) -> np.ndarray:
+    """Return bounding box center for Open3D AABB/OBB variants."""
+    if hasattr(bbox, "center"):
+        return np.asarray(bbox.center)
+    if hasattr(bbox, "get_center"):
+        return np.asarray(bbox.get_center())
+    if hasattr(bbox, "get_min_bound") and hasattr(bbox, "get_max_bound"):
+        min_bound = np.asarray(bbox.get_min_bound())
+        max_bound = np.asarray(bbox.get_max_bound())
+        return (min_bound + max_bound) * 0.5
+    return np.zeros(3, dtype=float)
+
+
+def get_bbox_extent(bbox) -> np.ndarray:
+    """Return bounding box extent for Open3D AABB/OBB variants."""
+    if hasattr(bbox, "extent"):
+        return np.asarray(bbox.extent)
+    if hasattr(bbox, "get_extent"):
+        return np.asarray(bbox.get_extent())
+    if hasattr(bbox, "get_min_bound") and hasattr(bbox, "get_max_bound"):
+        min_bound = np.asarray(bbox.get_min_bound())
+        max_bound = np.asarray(bbox.get_max_bound())
+        return max_bound - min_bound
+    return np.zeros(3, dtype=float)
+
+
+def get_bbox_corners(bbox) -> np.ndarray:
+    """Return 8 bbox corners for Open3D AABB/OBB variants."""
+    if hasattr(bbox, "get_box_points"):
+        return np.asarray(bbox.get_box_points())
+    if hasattr(bbox, "get_min_bound") and hasattr(bbox, "get_max_bound"):
+        min_bound = np.asarray(bbox.get_min_bound())
+        max_bound = np.asarray(bbox.get_max_bound())
+        return np.array(
+            [
+                [min_bound[0], min_bound[1], min_bound[2]],
+                [max_bound[0], min_bound[1], min_bound[2]],
+                [max_bound[0], max_bound[1], min_bound[2]],
+                [min_bound[0], max_bound[1], min_bound[2]],
+                [min_bound[0], min_bound[1], max_bound[2]],
+                [max_bound[0], min_bound[1], max_bound[2]],
+                [max_bound[0], max_bound[1], max_bound[2]],
+                [min_bound[0], max_bound[1], max_bound[2]],
+            ],
+            dtype=float,
+        )
+    return np.zeros((0, 3), dtype=float)
+
+
+def get_oriented_bbox(
+    points: np.ndarray,
+    axis_aligned: bool = True,
+) -> Optional[Tuple[np.ndarray, np.ndarray]]:
+    """
+    Compute bounding box from point cloud.
+
+    Args:
+        points: Point coordinates (N, 3)
+        axis_aligned: If True, compute axis-aligned bounding box.
+            If False, compute minimal oriented bounding box.
+
+    Returns:
+        Tuple of (center, corners) where center is (3,) and corners is (8, 3),
+        or None if insufficient points.
+    """
+    if len(points) < 4:
+        return None
+
+    pcd = create_pointcloud(points)
+
+    try:
+        if axis_aligned:
+            bbox = pcd.get_axis_aligned_bounding_box()
+        else:
+            bbox = pcd.get_minimal_oriented_bounding_box()
+    except Exception:
+        return None
+
+    center = get_bbox_center(bbox)
+    corners = get_bbox_corners(bbox)
+    return center, corners
+
 
 def pointcloud_to_bbox3d(points: np.ndarray, header=None):
     """
     Convert point cloud to minimal oriented 3D bounding box (ROS2 message).
-    
+
     Args:
         points: Point coordinates (N, 3)
         header: Optional ROS2 header for the message
-        
+
     Returns:
         vision_msgs/BoundingBox3D message or None if insufficient points
     """
-    
+
     if len(points) < 4:
         return None
     
     pcd = create_pointcloud(points)
-    
+
     try:
         obb = pcd.get_minimal_oriented_bounding_box()
     except:
         return None
-    
-    center = obb.center
-    extent = obb.extent
-    quat = Rotation.from_matrix(obb.R).as_quat()
-    
+    center = get_bbox_center(obb)
+    extent = get_bbox_extent(obb)
+    if hasattr(obb, "R"):
+        quat = Rotation.from_matrix(obb.R).as_quat()
+    else:
+        quat = np.array([0.0, 0.0, 0.0, 1.0], dtype=float)
+
     bbox = BoundingBox3D()
     bbox.center.position = Point(
         x=float(center[0]), y=float(center[1]), z=float(center[2])
@@ -243,7 +396,7 @@ def pointcloud_to_bbox3d(points: np.ndarray, header=None):
         x=float(quat[0]), y=float(quat[1]), z=float(quat[2]), w=float(quat[3])
     )
     bbox.size = Vector3(x=float(extent[0]), y=float(extent[1]), z=float(extent[2]))
-    
+
     return bbox
 
 
@@ -328,6 +481,192 @@ def pointcloud_to_depth(
     return depth_image
 
 
+def project_pointcloud_to_image(
+    points: np.ndarray,
+    projection: str = "equirect",
+    image_width: int = 1920,
+    image_height: int = 640,
+    axis_mode: str = "xz",
+    intrinsics: Optional[np.ndarray] = None,
+    depth_filter: bool = False,
+    depth_filter_margin: float = 0.15,
+    transform: Optional[np.ndarray] = None,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Project a point cloud into image pixel coordinates.
+
+    Args:
+        points: Point coordinates (N, 3).
+        projection: Projection model ("equirect" or "pinhole").
+        image_width: Output image width in pixels.
+        image_height: Output image height in pixels.
+        axis_mode: Equirect axis convention ("xz" or "xy").
+        intrinsics: Pinhole intrinsics (3, 3) if using "pinhole".
+        depth_filter: Enable depth-based occlusion filtering.
+        depth_filter_margin: Depth margin for occlusion filtering (meters).
+        transform: Optional 4x4 transform applied before projection.
+
+    Returns:
+        Tuple of (camera_points, pixel_idx) where pixel_idx is (N, 3) with (u, v, depth).
+    """
+    camera_points = transform_pointcloud(points, transform) if transform is not None else points
+
+    if projection == "pinhole":
+        if intrinsics is None:
+            raise ValueError("intrinsics required for pinhole projection")
+        u = camera_points[:, 0] * intrinsics[0, 0] / (camera_points[:, 2] + 1e-6) + intrinsics[0, 2]
+        v = camera_points[:, 1] * intrinsics[1, 1] / (camera_points[:, 2] + 1e-6) + intrinsics[1, 2]
+        return camera_points, np.stack([u, v, camera_points[:, 2]], axis=-1)
+
+    if axis_mode == "xz":
+        hori_dis = np.sqrt(camera_points[:, 0] ** 2 + camera_points[:, 2] ** 2)
+        u = (
+            image_width / (2 * np.pi) * np.arctan2(camera_points[:, 0], camera_points[:, 2])
+            + image_width / 2
+            + 1
+        )
+        v = (
+            image_width / (2 * np.pi) * np.arctan(camera_points[:, 1] / hori_dis)
+            + image_height / 2
+            + 1
+        )
+    else:
+        hori_dis = np.sqrt(camera_points[:, 0] ** 2 + camera_points[:, 1] ** 2)
+        u = (
+            -image_width / (2 * np.pi) * np.arctan2(camera_points[:, 1], camera_points[:, 0])
+            + image_width / 2
+            + 1
+        )
+        v = (
+            -image_width / (2 * np.pi) * np.arctan2(camera_points[:, 2], hori_dis)
+            + image_height / 2
+            + 1
+        )
+
+    u = np.clip(u, 0, image_width - 1)
+    v = np.clip(v, 0, image_height - 1)
+
+    if depth_filter:
+        depth_map = np.full((image_height, image_width), np.inf)
+        idx = v.astype(int) * image_width + u.astype(int)
+        np.minimum.at(depth_map.ravel(), idx, hori_dis)
+        remove_mask = hori_dis >= depth_map[v.astype(int), u.astype(int)] + depth_filter_margin
+        u = u.copy()
+        v = v.copy()
+        u[remove_mask] = -1
+        v[remove_mask] = -1
+
+    return camera_points, np.stack([u, v, hori_dis], axis=-1)
+
+
+def segment_pointcloud_by_mask(
+    points: np.ndarray,
+    mask: np.ndarray,
+    pixel_idx: np.ndarray,
+) -> np.ndarray:
+    """
+    Segment a point cloud using a binary mask and projected pixels.
+
+    Args:
+        points: Point coordinates (N, 3).
+        mask: Binary mask (H, W).
+        pixel_idx: Pixel coordinates (N, 3) as (u, v, depth).
+
+    Returns:
+        Masked point cloud (M, 3).
+    """
+    h, w = mask.shape[:2]
+    pixel_idx = pixel_idx.astype(int)
+    valid = (
+        (pixel_idx[:, 0] >= 0)
+        & (pixel_idx[:, 0] < w)
+        & (pixel_idx[:, 1] >= 0)
+        & (pixel_idx[:, 1] < h)
+    )
+    pixel_idx = pixel_idx[valid]
+    points = points[valid]
+    keep = mask[pixel_idx[:, 1], pixel_idx[:, 0]].astype(bool)
+    return points[keep]
+
+
+def segment_pointclouds_from_projection(
+    points: np.ndarray,
+    masks: list[np.ndarray],
+    pixel_idx: np.ndarray,
+) -> Tuple[list[np.ndarray], list[np.ndarray]]:
+    """
+    Segment point clouds from projected pixels.
+
+    Args:
+        points: Point coordinates (N, 3).
+        masks: List of binary masks (H, W).
+        pixel_idx: Pixel coordinates (N, 3) as (u, v, depth).
+
+    Returns:
+        Tuple of (clouds, depths) where each entry corresponds to a mask.
+    """
+    if masks is None or len(masks) == 0:
+        return [], []
+
+    image_shape = masks[0].shape
+    valid = (
+        (pixel_idx[:, 0] >= 0)
+        & (pixel_idx[:, 0] < image_shape[1])
+        & (pixel_idx[:, 1] >= 0)
+        & (pixel_idx[:, 1] < image_shape[0])
+    )
+    pixel_idx = pixel_idx[valid]
+    points = points[valid]
+
+    depths = pixel_idx[:, 2]
+    pixel_idx = pixel_idx.astype(int)
+
+    obj_clouds = []
+    obj_depths = []
+    for obj_mask in masks:
+        cloud_mask = obj_mask[pixel_idx[:, 1], pixel_idx[:, 0]].astype(bool)
+        obj_clouds.append(points[cloud_mask])
+        obj_depths.append(depths[cloud_mask].reshape(-1, 1))
+
+    return obj_clouds, obj_depths
+
+
+def transform_segmented_clouds_to_world(
+    clouds: list[np.ndarray],
+    depths: list[np.ndarray],
+    R_b2w: np.ndarray,
+    t_b2w: np.ndarray,
+    depth_jump_threshold: float = 0.3,
+) -> list[np.ndarray]:
+    """
+    Prune depth jumps and transform segmented clouds to world frame.
+
+    Args:
+        clouds: List of segmented point clouds (sensor frame).
+        depths: List of depth arrays aligned with clouds.
+        R_b2w: Body-to-world rotation matrix (3, 3).
+        t_b2w: Body-to-world translation vector (3,).
+        depth_jump_threshold: Depth jump threshold for pruning (meters).
+
+    Returns:
+        List of segmented object clouds in world frame.
+    """
+    obj_cloud_world_list = []
+    for obj_cloud, obj_depth in zip(clouds, depths):
+        if obj_depth.shape[0] > 1:
+            obj_depth_diff = (obj_depth[1:] - obj_depth[:-1]).squeeze()
+            if obj_depth_diff.size > 0 and np.max(obj_depth_diff) > depth_jump_threshold:
+                idx_tmp = np.ones(len(obj_depth), dtype=bool)
+                jump_idx = int(np.argmax(obj_depth_diff))
+                idx_tmp[jump_idx + 1 :] = False
+                obj_cloud = obj_cloud[idx_tmp]
+
+        obj_cloud_world = obj_cloud[:, :3] @ R_b2w.T + t_b2w
+        obj_cloud_world_list.append(obj_cloud_world)
+
+    return obj_cloud_world_list
+
+
 def create_pointcloud2_msg(points: np.ndarray, colors: np.ndarray, header):
     """
     Create PointCloud2 ROS message from points and colors.
diff --git a/vector_perception_utils/vector_perception_utils/transform_utils.py b/vector_perception_utils/vector_perception_utils/transform_utils.py
index 534d8d2..ef3238e 100644
--- a/vector_perception_utils/vector_perception_utils/transform_utils.py
+++ b/vector_perception_utils/vector_perception_utils/transform_utils.py
@@ -8,7 +8,7 @@
 from geometry_msgs.msg import Pose, Quaternion
 
 
-def normalize_angle(angle: float) -> float:
+def normalize_angle(angle: float | np.ndarray) -> float | np.ndarray:
     """
     Normalize angle to [-pi, pi] range.
 
@@ -21,6 +21,26 @@ def normalize_angle(angle: float) -> float:
     return np.arctan2(np.sin(angle), np.cos(angle))
 
 
+def discretize_angles(angles: np.ndarray, num_bins: int) -> np.ndarray:
+    """
+    Discretize angles into bin indices.
+
+    Normalizes angles to [-pi, pi] then maps to bin indices [0, num_bins-1].
+
+    Args:
+        angles: Angles in radians (any range).
+        num_bins: Number of angle bins.
+
+    Returns:
+        Array of bin indices (0 to num_bins-1), same shape as input.
+    """
+    angles = normalize_angle(angles)
+    # Map [-pi, pi] to [0, 2*pi] then to [0, num_bins)
+    normalized = (angles + np.pi) / (2 * np.pi)
+    bins = (normalized * num_bins).astype(int)
+    return np.clip(bins, 0, num_bins - 1)
+
+
 def pose_to_matrix(pose: Pose) -> np.ndarray:
     """
     Convert pose to 4x4 homogeneous transform matrix.