diff --git a/dimos/robot/unitree_webrtc/unitree_go2_blueprints.py b/dimos/robot/unitree_webrtc/unitree_go2_blueprints.py index 7629644ed6..9ed275e75a 100644 --- a/dimos/robot/unitree_webrtc/unitree_go2_blueprints.py +++ b/dimos/robot/unitree_webrtc/unitree_go2_blueprints.py @@ -43,12 +43,13 @@ from dimos.navigation.replanning_a_star.module import ( replanning_a_star_planner, ) -from dimos.perception.detection.moduleDB import ObjectDBModule, detectionDB_module +from dimos.perception.detection.module3D import Detection3DModule, detection3d_module from dimos.perception.spatial_perception import spatial_memory from dimos.protocol.mcp.mcp import MCPModule from dimos.robot.foxglove_bridge import foxglove_bridge from dimos.robot.unitree.connection.go2 import GO2Connection, go2_connection from dimos.robot.unitree_webrtc.unitree_skill_container import unitree_skills +from dimos.skills.visual_nav2 import VisNavSkills, vis_nav_skills from dimos.utils.monitoring import utilization from dimos.web.websocket_vis.websocket_vis_module import websocket_vis @@ -70,9 +71,9 @@ } ) - linux = autoconnect(foxglove_bridge()) + basic = autoconnect( go2_connection(), linux if platform.system() == "Linux" else mac, @@ -88,45 +89,47 @@ wavefront_frontier_explorer(), ).global_config(n_dask_workers=6, robot_model="unitree_go2") + detection = ( autoconnect( nav, - detectionDB_module( + detection3d_module( camera_info=GO2Connection.camera_info_static, ), + vis_nav_skills(), ) .remappings( [ - (ObjectDBModule, "pointcloud", "global_map"), + (Detection3DModule, "pointcloud", "global_map"), ] ) .transports( { # Detection 3D module outputs - ("detections", ObjectDBModule): LCMTransport( - "/detector3d/detections", Detection2DArray - ), - ("annotations", ObjectDBModule): LCMTransport( + # ("detections", ObjectDBModule): LCMTransport( + # "/detector3d/detections", Detection2DArray + # ), + ("annotations", Detection3DModule): LCMTransport( "/detector3d/annotations", ImageAnnotations ), - # ("scene_update", ObjectDBModule): LCMTransport( + # ("scene_update", Detection3DModule): LCMTransport( # "/detector3d/scene_update", SceneUpdate # ), - ("detected_pointcloud_0", ObjectDBModule): LCMTransport( + ("detected_pointcloud_0", Detection3DModule): LCMTransport( "/detector3d/pointcloud/0", PointCloud2 ), - ("detected_pointcloud_1", ObjectDBModule): LCMTransport( + ("detected_pointcloud_1", Detection3DModule): LCMTransport( "/detector3d/pointcloud/1", PointCloud2 ), - ("detected_pointcloud_2", ObjectDBModule): LCMTransport( + ("detected_pointcloud_2", Detection3DModule): LCMTransport( "/detector3d/pointcloud/2", PointCloud2 ), - ("detected_image_0", ObjectDBModule): LCMTransport("/detector3d/image/0", Image), - ("detected_image_1", ObjectDBModule): LCMTransport("/detector3d/image/1", Image), - ("detected_image_2", ObjectDBModule): LCMTransport("/detector3d/image/2", Image), + ("detected_image_0", Detection3DModule): LCMTransport("/detector3d/image/0", Image), + ("detected_image_1", Detection3DModule): LCMTransport("/detector3d/image/1", Image), + ("detected_image_2", Detection3DModule): LCMTransport("/detector3d/image/2", Image), } ) -) +).global_config(n_dask_workers=8) spatial = autoconnect( diff --git a/dimos/skills/visual_nav2.py b/dimos/skills/visual_nav2.py new file mode 100644 index 0000000000..3ba5889ef5 --- /dev/null +++ b/dimos/skills/visual_nav2.py @@ -0,0 +1,80 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass + +from reactivex import operators as ops +from reactivex.observable import Observable + +from dimos.core import In, Module, ModuleConfig, Out, rpc +from dimos.models.vl.moondream import MoondreamVLModel +from dimos.msgs.sensor_msgs import Image +from dimos.msgs.vision_msgs import Detection2DArray +from dimos.perception.detection.type import ImageDetections2D +from dimos.types.timestamped import align_timestamped +from dimos.utils.reactive import backpressure + + +@dataclass +class Config(ModuleConfig): + vlmodel: VlModel = field(default_factory=MoondreamVLModel) + + +class VisNavSkills(Module[Config]): + color_image: In[Image] + detections: In[Detection2DArray] + + default_config = Config + + config: Config + vlmodel: VlModel + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.vlmodel = self.config.vlmodel() + + def start(self) -> None: + self._disposables.add(self.detections_stream().subscribe(print)) + + def visual_navigation(self, target: str) -> None: + self.color_image.observable().pipe( + ops.map(lambda img: self.vlmodel.query_detections(img, target)), + ops.filter(lambda d: d.detections_length > 0), + ) + + print(f"Navigating to {target} using visual navigation.") + + # def detections_stream(self) -> Observable[ImageDetections2D]: + # return backpressure( + # align_timestamped( + # self.color_image.pure_observable(), + # self.detections.pure_observable().pipe( + # ops.filter(lambda d: d.detections_length > 0) # type: ignore[attr-defined] + # ), + # match_tolerance=0.0, + # buffer_size=2.0, + # ).pipe( + # ops.map( + # lambda pair: ImageDetections2D.from_ros_detection2d_array( # type: ignore[misc] + # *pair + # ) + # ) + # ) + # ) + + +vis_nav_skills = VisNavSkills.blueprint + + +__all__ = ["VisNavSkills", "vis_nav_skills"]