diff --git a/dimos/hardware/can_activate.sh b/dimos/hardware/can_activate.sh
new file mode 100644
index 0000000000..60cc95e7ea
--- /dev/null
+++ b/dimos/hardware/can_activate.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+
+# The default CAN name can be set by the user via command-line parameters.
+DEFAULT_CAN_NAME="${1:-can0}"
+
+# The default bitrate for a single CAN module can be set by the user via command-line parameters.
+DEFAULT_BITRATE="${2:-1000000}"
+
+# USB hardware address (optional parameter)
+USB_ADDRESS="${3}"
+echo "-------------------START-----------------------"
+# Check if ethtool is installed.
+if ! dpkg -l | grep -q "ethtool"; then
+    echo "\e[31mError: ethtool not detected in the system.\e[0m"
+    echo "Please use the following command to install ethtool:"
+    echo "sudo apt update && sudo apt install ethtool"
+    exit 1
+fi
+
+# Check if can-utils is installed.
+if ! dpkg -l | grep -q "can-utils"; then
+    echo "\e[31mError: can-utils not detected in the system.\e[0m"
+    echo "Please use the following command to install ethtool:"
+    echo "sudo apt update && sudo apt install can-utils"
+    exit 1
+fi
+
+echo "Both ethtool and can-utils are installed."
+
+# Retrieve the number of CAN modules in the current system.
+CURRENT_CAN_COUNT=$(ip link show type can | grep -c "link/can")
+
+# Verify if the number of CAN modules in the current system matches the expected value.
+if [ "$CURRENT_CAN_COUNT" -ne "1" ]; then
+    if [ -z "$USB_ADDRESS" ]; then
+        # Iterate through all CAN interfaces.
+        for iface in $(ip -br link show type can | awk '{print $1}'); do
+            # Use ethtool to retrieve bus-info.
+            BUS_INFO=$(sudo ethtool -i "$iface" | grep "bus-info" | awk '{print $2}')
+            
+            if [ -z "$BUS_INFO" ];then
+                echo "Error: Unable to retrieve bus-info for interface $iface."
+                continue
+            fi
+            
+            echo "Interface $iface is inserted into USB port $BUS_INFO"
+        done
+        echo -e " \e[31m Error: The number of CAN modules detected by the system ($CURRENT_CAN_COUNT) does not match the expected number (1). \e[0m"
+        echo -e " \e[31m Please add the USB hardware address parameter, such as: \e[0m"
+        echo -e " bash can_activate.sh can0 1000000 1-2:1.0"
+        echo "-------------------ERROR-----------------------"
+        exit 1
+    fi
+fi
+
+# Load the gs_usb module.
+# sudo modprobe gs_usb
+# if [ $? -ne 0 ]; then
+#     echo "Error: Unable to load the gs_usb module."
+#     exit 1
+# fi
+
+if [ -n "$USB_ADDRESS" ]; then
+    echo "Detected USB hardware address parameter: $USB_ADDRESS"
+    
+    # Use ethtool to find the CAN interface corresponding to the USB hardware address.
+    INTERFACE_NAME=""
+    for iface in $(ip -br link show type can | awk '{print $1}'); do
+        BUS_INFO=$(sudo ethtool -i "$iface" | grep "bus-info" | awk '{print $2}')
+        if [ "$BUS_INFO" = "$USB_ADDRESS" ]; then
+            INTERFACE_NAME="$iface"
+            break
+        fi
+    done
+    
+    if [ -z "$INTERFACE_NAME" ]; then
+        echo "Error: Unable to find CAN interface corresponding to USB hardware address $USB_ADDRESS."
+        exit 1
+    else
+        echo "Found the interface corresponding to USB hardware address $USB_ADDRESS: $INTERFACE_NAME."
+    fi
+else
+    # Retrieve the unique CAN interface.
+    INTERFACE_NAME=$(ip -br link show type can | awk '{print $1}')
+    
+    # Check if the interface name has been retrieved.
+    if [ -z "$INTERFACE_NAME" ]; then
+        echo "Error: Unable to detect CAN interface."
+        exit 1
+    fi
+    BUS_INFO=$(sudo ethtool -i "$INTERFACE_NAME" | grep "bus-info" | awk '{print $2}')
+    echo "Expected to configure a single CAN module, detected interface $INTERFACE_NAME with corresponding USB address $BUS_INFO."
+fi
+
+# Check if the current interface is already activated.
+IS_LINK_UP=$(ip link show "$INTERFACE_NAME" | grep -q "UP" && echo "yes" || echo "no")
+
+# Retrieve the bitrate of the current interface.
+CURRENT_BITRATE=$(ip -details link show "$INTERFACE_NAME" | grep -oP 'bitrate \K\d+')
+
+if [ "$IS_LINK_UP" = "yes" ] && [ "$CURRENT_BITRATE" -eq "$DEFAULT_BITRATE" ]; then
+    echo "Interface $INTERFACE_NAME is already activated with a bitrate of $DEFAULT_BITRATE."
+    
+    # Check if the interface name matches the default name.
+    if [ "$INTERFACE_NAME" != "$DEFAULT_CAN_NAME" ]; then
+        echo "Rename interface $INTERFACE_NAME to $DEFAULT_CAN_NAME."
+        sudo ip link set "$INTERFACE_NAME" down
+        sudo ip link set "$INTERFACE_NAME" name "$DEFAULT_CAN_NAME"
+        sudo ip link set "$DEFAULT_CAN_NAME" up
+        echo "The interface has been renamed to $DEFAULT_CAN_NAME and reactivated."
+    else
+        echo "The interface name is already $DEFAULT_CAN_NAME."
+    fi
+else
+    # If the interface is not activated or the bitrate is different, configure it.
+    if [ "$IS_LINK_UP" = "yes" ]; then
+        echo "Interface $INTERFACE_NAME is already activated, but the bitrate is $CURRENT_BITRATE, which does not match the set value of $DEFAULT_BITRATE."
+    else
+        echo "Interface $INTERFACE_NAME is not activated or bitrate is not set."
+    fi
+    
+    # Set the interface bitrate and activate it.
+    sudo ip link set "$INTERFACE_NAME" down
+    sudo ip link set "$INTERFACE_NAME" type can bitrate $DEFAULT_BITRATE
+    sudo ip link set "$INTERFACE_NAME" up
+    echo "Interface $INTERFACE_NAME has been reset to bitrate $DEFAULT_BITRATE and activated."
+    
+    # Rename the interface to the default name.
+    if [ "$INTERFACE_NAME" != "$DEFAULT_CAN_NAME" ]; then
+        echo "Rename interface $INTERFACE_NAME to $DEFAULT_CAN_NAME."
+        sudo ip link set "$INTERFACE_NAME" down
+        sudo ip link set "$INTERFACE_NAME" name "$DEFAULT_CAN_NAME"
+        sudo ip link set "$DEFAULT_CAN_NAME" up
+        echo "The interface has been renamed to $DEFAULT_CAN_NAME and reactivated."
+    fi
+fi
+
+echo "-------------------OVER------------------------"
diff --git a/dimos/hardware/piper_arm.py b/dimos/hardware/piper_arm.py
new file mode 100644
index 0000000000..774f70b1c6
--- /dev/null
+++ b/dimos/hardware/piper_arm.py
@@ -0,0 +1,517 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# dimos/hardware/piper_arm.py
+
+from typing import (
+    Optional,
+    Tuple,
+)
+from piper_sdk import *  # from the official Piper SDK
+import numpy as np
+import time
+import subprocess
+import kinpy as kp
+import sys
+import termios
+import tty
+import select
+from scipy.spatial.transform import Rotation as R
+from dimos.utils.transform_utils import euler_to_quaternion, quaternion_to_euler
+from dimos.utils.logging_config import setup_logger
+
+import threading
+
+import pytest
+
+import dimos.core as core
+import dimos.protocol.service.lcmservice as lcmservice
+from dimos.core import In, Module, rpc
+from dimos_lcm.geometry_msgs import Pose, Vector3, Twist
+
+logger = setup_logger("dimos.hardware.piper_arm")
+
+
+class PiperArm:
+    def __init__(self, arm_name: str = "arm"):
+        self.arm = C_PiperInterface_V2()
+        self.arm.ConnectPort()
+        self.resetArm()
+        time.sleep(0.5)
+        self.resetArm()
+        time.sleep(0.5)
+        self.enable()
+        self.enable_gripper()  # Enable gripper after arm is enabled
+        self.gotoZero()
+        time.sleep(1)
+        self.init_vel_controller()
+
+    def enable(self):
+        while not self.arm.EnablePiper():
+            pass
+            time.sleep(0.01)
+        logger.info("Arm enabled")
+        # self.arm.ModeCtrl(
+        #     ctrl_mode=0x01,         # CAN command mode
+        #     move_mode=0x01,         # “Move-J”, but ignored in MIT
+        #     move_spd_rate_ctrl=100, # doesn’t matter in MIT
+        #     is_mit_mode=0xAD        # <-- the magic flag
+        # )
+        self.arm.MotionCtrl_2(0x01, 0x01, 80, 0xAD)
+
+    def gotoZero(self):
+        factor = 1000
+        position = [57.0, 0.0, 215.0, 0, 90.0, 0, 0]
+        X = round(position[0] * factor)
+        Y = round(position[1] * factor)
+        Z = round(position[2] * factor)
+        RX = round(position[3] * factor)
+        RY = round(position[4] * factor)
+        RZ = round(position[5] * factor)
+        joint_6 = round(position[6] * factor)
+        logger.debug(f"Going to zero position: X={X}, Y={Y}, Z={Z}, RX={RX}, RY={RY}, RZ={RZ}")
+        self.arm.MotionCtrl_2(0x01, 0x00, 100, 0x00)
+        self.arm.EndPoseCtrl(X, Y, Z, RX, RY, RZ)
+        self.arm.GripperCtrl(0, 1000, 0x01, 0)
+
+    def gotoObserve(self):
+        factor = 1000
+        position = [57.0, 0.0, 280.0, 0, 120.0, 0, 0]
+        X = round(position[0] * factor)
+        Y = round(position[1] * factor)
+        Z = round(position[2] * factor)
+        RX = round(position[3] * factor)
+        RY = round(position[4] * factor)
+        RZ = round(position[5] * factor)
+        joint_6 = round(position[6] * factor)
+        logger.debug(f"Going to zero position: X={X}, Y={Y}, Z={Z}, RX={RX}, RY={RY}, RZ={RZ}")
+        self.arm.MotionCtrl_2(0x01, 0x00, 100, 0x00)
+        self.arm.EndPoseCtrl(X, Y, Z, RX, RY, RZ)
+
+    def softStop(self):
+        self.gotoZero()
+        time.sleep(1)
+        self.arm.MotionCtrl_2(
+            0x01,
+            0x00,
+            100,
+        )
+        self.arm.MotionCtrl_1(0x01, 0, 0)
+        time.sleep(3)
+
+    def cmd_ee_pose_values(self, x, y, z, r, p, y_, line_mode=False):
+        """Command end-effector to target pose in space (position + Euler angles)"""
+        factor = 1000
+        pose = [
+            x * factor * factor,
+            y * factor * factor,
+            z * factor * factor,
+            r * factor,
+            p * factor,
+            y_ * factor,
+        ]
+        self.arm.MotionCtrl_2(0x01, 0x02 if line_mode else 0x00, 100, 0x00)
+        self.arm.EndPoseCtrl(
+            int(pose[0]), int(pose[1]), int(pose[2]), int(pose[3]), int(pose[4]), int(pose[5])
+        )
+
+    def cmd_ee_pose(self, pose: Pose, line_mode=False):
+        """Command end-effector to target pose using Pose message"""
+        # Convert quaternion to euler angles
+        euler = quaternion_to_euler(pose.orientation, degrees=True)
+
+        # Command the pose
+        self.cmd_ee_pose_values(
+            pose.position.x,
+            pose.position.y,
+            pose.position.z,
+            euler.x,
+            euler.y,
+            euler.z,
+            line_mode,
+        )
+
+    def get_ee_pose(self):
+        """Return the current end-effector pose as Pose message with position in meters and quaternion orientation"""
+        pose = self.arm.GetArmEndPoseMsgs()
+        factor = 1000.0
+        # Extract individual pose values and convert to base units
+        # Position values are divided by 1000 to convert from SDK units to meters
+        # Rotation values are divided by 1000 to convert from SDK units to radians
+        x = pose.end_pose.X_axis / factor / factor  # Convert mm to m
+        y = pose.end_pose.Y_axis / factor / factor  # Convert mm to m
+        z = pose.end_pose.Z_axis / factor / factor  # Convert mm to m
+        rx = pose.end_pose.RX_axis / factor
+        ry = pose.end_pose.RY_axis / factor
+        rz = pose.end_pose.RZ_axis / factor
+
+        # Create position vector (already in meters)
+        position = Vector3(x, y, z)
+
+        orientation = euler_to_quaternion(Vector3(rx, ry, rz), degrees=True)
+
+        return Pose(position, orientation)
+
+    def cmd_gripper_ctrl(self, position, effort=0.25):
+        """Command end-effector gripper"""
+        factor = 1000
+        position = position * factor * factor  # meters
+        effort = effort * factor  # N/m
+
+        self.arm.GripperCtrl(abs(round(position)), abs(round(effort)), 0x01, 0)
+        logger.debug(f"Commanding gripper position: {position}mm")
+
+    def enable_gripper(self):
+        """Enable the gripper using the initialization sequence"""
+        logger.info("Enabling gripper...")
+        while not self.arm.EnablePiper():
+            time.sleep(0.01)
+        self.arm.GripperCtrl(0, 1000, 0x02, 0)
+        self.arm.GripperCtrl(0, 1000, 0x01, 0)
+        logger.info("Gripper enabled")
+
+    def release_gripper(self):
+        """Release gripper by opening to 100mm (10cm)"""
+        logger.info("Releasing gripper (opening to 100mm)")
+        self.cmd_gripper_ctrl(0.1)  # 0.1m = 100mm = 10cm
+
+    def get_gripper_feedback(self) -> Tuple[float, float]:
+        """
+        Get current gripper feedback.
+
+        Returns:
+            Tuple of (angle_degrees, effort) where:
+                - angle_degrees: Current gripper angle in degrees
+                - effort: Current gripper effort (0.0 to 1.0 range)
+        """
+        gripper_msg = self.arm.GetArmGripperMsgs()
+        angle_degrees = (
+            gripper_msg.gripper_state.grippers_angle / 1000.0
+        )  # Convert from SDK units to degrees
+        effort = gripper_msg.gripper_state.grippers_effort / 1000.0  # Convert from SDK units to N/m
+        return angle_degrees, effort
+
+    def close_gripper(self, commanded_effort: float = 0.5) -> None:
+        """
+        Close the gripper.
+
+        Args:
+            commanded_effort: Effort to use when closing gripper (default 0.25 N/m)
+        """
+        # Command gripper to close (0.0 position)
+        self.cmd_gripper_ctrl(0.0, effort=commanded_effort)
+        logger.info("Closing gripper")
+
+    def gripper_object_detected(self, commanded_effort: float = 0.25) -> bool:
+        """
+        Check if an object is detected in the gripper based on effort feedback.
+
+        Args:
+            commanded_effort: The effort that was used when closing gripper (default 0.25 N/m)
+
+        Returns:
+            True if object is detected in gripper, False otherwise
+        """
+        # Get gripper feedback
+        angle_degrees, actual_effort = self.get_gripper_feedback()
+
+        # Check if object is grasped (effort > 80% of commanded effort)
+        effort_threshold = 0.8 * commanded_effort
+        object_present = abs(actual_effort) > effort_threshold
+
+        if object_present:
+            logger.info(f"Object detected in gripper (effort: {actual_effort:.3f} N/m)")
+        else:
+            logger.info(f"No object detected (effort: {actual_effort:.3f} N/m)")
+
+        return object_present
+
+    def resetArm(self):
+        self.arm.MotionCtrl_1(0x02, 0, 0)
+        self.arm.MotionCtrl_2(0, 0, 0, 0x00)
+        logger.info("Resetting arm")
+
+    def init_vel_controller(self):
+        self.chain = kp.build_serial_chain_from_urdf(
+            open("dimos/hardware/piper_description.urdf"), "gripper_base"
+        )
+        self.J = self.chain.jacobian(np.zeros(6))
+        self.J_pinv = np.linalg.pinv(self.J)
+        self.dt = 0.01
+
+    def cmd_vel(self, x_dot, y_dot, z_dot, R_dot, P_dot, Y_dot):
+        joint_state = self.arm.GetArmJointMsgs().joint_state
+        # print(f"[PiperArm] Current Joints (direct): {joint_state}", type(joint_state))
+        joint_angles = np.array(
+            [
+                joint_state.joint_1,
+                joint_state.joint_2,
+                joint_state.joint_3,
+                joint_state.joint_4,
+                joint_state.joint_5,
+                joint_state.joint_6,
+            ]
+        )
+        # print(f"[PiperArm] Current Joints: {joint_angles}", type(joint_angles))
+        factor = 57295.7795  # 1000*180/3.1415926
+        joint_angles = joint_angles / factor  # convert to radians
+
+        q = np.array(
+            [
+                joint_angles[0],
+                joint_angles[1],
+                joint_angles[2],
+                joint_angles[3],
+                joint_angles[4],
+                joint_angles[5],
+            ]
+        )
+        J = self.chain.jacobian(q)
+        self.J_pinv = np.linalg.pinv(J)
+        dq = self.J_pinv @ np.array([x_dot, y_dot, z_dot, R_dot, P_dot, Y_dot]) * self.dt
+        newq = q + dq
+
+        newq = newq * factor
+
+        self.arm.MotionCtrl_2(0x01, 0x01, 100, 0xAD)
+        self.arm.JointCtrl(
+            int(round(newq[0])),
+            int(round(newq[1])),
+            int(round(newq[2])),
+            int(round(newq[3])),
+            int(round(newq[4])),
+            int(round(newq[5])),
+        )
+        time.sleep(self.dt)
+        # print(f"[PiperArm] Moving to Joints to : {newq}")
+
+    def cmd_vel_ee(self, x_dot, y_dot, z_dot, RX_dot, PY_dot, YZ_dot):
+        factor = 1000
+        x_dot = x_dot * factor
+        y_dot = y_dot * factor
+        z_dot = z_dot * factor
+        RX_dot = RX_dot * factor
+        PY_dot = PY_dot * factor
+        YZ_dot = YZ_dot * factor
+
+        current_pose_msg = self.get_ee_pose()
+
+        # Convert quaternion to euler angles
+        quat = [
+            current_pose_msg.orientation.x,
+            current_pose_msg.orientation.y,
+            current_pose_msg.orientation.z,
+            current_pose_msg.orientation.w,
+        ]
+        rotation = R.from_quat(quat)
+        euler = rotation.as_euler("xyz")  # Returns [rx, ry, rz] in radians
+
+        # Create current pose array [x, y, z, rx, ry, rz]
+        current_pose = np.array(
+            [
+                current_pose_msg.position.x,
+                current_pose_msg.position.y,
+                current_pose_msg.position.z,
+                euler[0],
+                euler[1],
+                euler[2],
+            ]
+        )
+
+        # Apply velocity increment
+        current_pose = (
+            current_pose + np.array([x_dot, y_dot, z_dot, RX_dot, PY_dot, YZ_dot]) * self.dt
+        )
+
+        self.cmd_ee_pose_values(
+            current_pose[0],
+            current_pose[1],
+            current_pose[2],
+            current_pose[3],
+            current_pose[4],
+            current_pose[5],
+        )
+        time.sleep(self.dt)
+
+    def disable(self):
+        self.softStop()
+
+        while self.arm.DisablePiper():
+            pass
+            time.sleep(0.01)
+        self.arm.DisconnectPort()
+
+
+class VelocityController(Module):
+    cmd_vel: In[Twist] = None
+
+    def __init__(self, arm, period=0.01, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.arm = arm
+        self.period = period
+        self.latest_cmd = None
+        self.last_cmd_time = None
+
+    @rpc
+    def start(self):
+        self.cmd_vel.subscribe(self.handle_cmd_vel)
+
+        def control_loop():
+            while True:
+                # Check for timeout (1 second)
+                if self.last_cmd_time and (time.time() - self.last_cmd_time) > 1.0:
+                    logger.warning(
+                        "No velocity command received for 1 second, stopping control loop"
+                    )
+                    break
+
+                cmd_vel = self.latest_cmd
+
+                joint_state = self.arm.GetArmJointMsgs().joint_state
+                # print(f"[PiperArm] Current Joints (direct): {joint_state}", type(joint_state))
+                joint_angles = np.array(
+                    [
+                        joint_state.joint_1,
+                        joint_state.joint_2,
+                        joint_state.joint_3,
+                        joint_state.joint_4,
+                        joint_state.joint_5,
+                        joint_state.joint_6,
+                    ]
+                )
+                factor = 57295.7795  # 1000*180/3.1415926
+                joint_angles = joint_angles / factor  # convert to radians
+                q = np.array(
+                    [
+                        joint_angles[0],
+                        joint_angles[1],
+                        joint_angles[2],
+                        joint_angles[3],
+                        joint_angles[4],
+                        joint_angles[5],
+                    ]
+                )
+
+                J = self.chain.jacobian(q)
+                self.J_pinv = np.linalg.pinv(J)
+                dq = (
+                    self.J_pinv
+                    @ np.array(
+                        [
+                            cmd_vel.linear.X,
+                            cmd_vel.linear.y,
+                            cmd_vel.linear.z,
+                            cmd_vel.angular.x,
+                            cmd_vel.angular.y,
+                            cmd_vel.angular.z,
+                        ]
+                    )
+                    * self.dt
+                )
+                newq = q + dq
+
+                newq = newq * factor  # convert radians to scaled degree units for joint control
+
+                self.arm.MotionCtrl_2(0x01, 0x01, 100, 0xAD)
+                self.arm.JointCtrl(
+                    int(round(newq[0])),
+                    int(round(newq[1])),
+                    int(round(newq[2])),
+                    int(round(newq[3])),
+                    int(round(newq[4])),
+                    int(round(newq[5])),
+                )
+                time.sleep(self.period)
+
+        thread = threading.Thread(target=control_loop, daemon=True)
+        thread.start()
+
+    def handle_cmd_vel(self, cmd_vel: Twist):
+        self.latest_cmd = cmd_vel
+        self.last_cmd_time = time.time()
+
+
+@pytest.mark.tool
+def run_velocity_controller():
+    lcmservice.autoconf()
+    dimos = core.start(2)
+
+    velocity_controller = dimos.deploy(VelocityController, arm=arm, period=0.01)
+    velocity_controller.cmd_vel.transport = core.LCMTransport("/cmd_vel", Twist)
+
+    velocity_controller.start()
+
+    logger.info("Velocity controller started")
+    while True:
+        time.sleep(1)
+
+
+if __name__ == "__main__":
+    arm = PiperArm()
+
+    def get_key(timeout=0.1):
+        """Non-blocking key reader for arrow keys."""
+        fd = sys.stdin.fileno()
+        old_settings = termios.tcgetattr(fd)
+        try:
+            tty.setraw(fd)
+            rlist, _, _ = select.select([fd], [], [], timeout)
+            if rlist:
+                ch1 = sys.stdin.read(1)
+                if ch1 == "\x1b":  # Arrow keys start with ESC
+                    ch2 = sys.stdin.read(1)
+                    if ch2 == "[":
+                        ch3 = sys.stdin.read(1)
+                        return ch1 + ch2 + ch3
+                else:
+                    return ch1
+            return None
+        finally:
+            termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
+
+    def teleop_linear_vel(arm):
+        print("Use arrow keys to control linear velocity (x/y/z). Press 'q' to quit.")
+        print("Up/Down: +x/-x, Left/Right: +y/-y, 'w'/'s': +z/-z")
+        x_dot, y_dot, z_dot = 0.0, 0.0, 0.0
+        while True:
+            key = get_key(timeout=0.1)
+            if key == "\x1b[A":  # Up arrow
+                x_dot += 0.01
+            elif key == "\x1b[B":  # Down arrow
+                x_dot -= 0.01
+            elif key == "\x1b[C":  # Right arrow
+                y_dot += 0.01
+            elif key == "\x1b[D":  # Left arrow
+                y_dot -= 0.01
+            elif key == "w":
+                z_dot += 0.01
+            elif key == "s":
+                z_dot -= 0.01
+            elif key == "q":
+                logger.info("Exiting teleop")
+                arm.disable()
+                break
+
+            # Optionally, clamp velocities to reasonable limits
+            x_dot = max(min(x_dot, 0.5), -0.5)
+            y_dot = max(min(y_dot, 0.5), -0.5)
+            z_dot = max(min(z_dot, 0.5), -0.5)
+
+            # Only linear velocities, angular set to zero
+            arm.cmd_vel_ee(x_dot, y_dot, z_dot, 0, 0, 0)
+            logger.debug(
+                f"Current linear velocity: x={x_dot:.3f} m/s, y={y_dot:.3f} m/s, z={z_dot:.3f} m/s"
+            )
+
+    run_velocity_controller()
diff --git a/dimos/hardware/piper_description.urdf b/dimos/hardware/piper_description.urdf
new file mode 100755
index 0000000000..21209b6dbb
--- /dev/null
+++ b/dimos/hardware/piper_description.urdf
@@ -0,0 +1,497 @@
+<robot
+  name="piper">
+  <link
+    name="base_link">
+    <inertial>
+      <origin xyz="-0.00473641164191482 2.56829134630247E-05 0.041451518036016" rpy="0 0 0" />
+      <mass value="1.02" />
+      <inertia ixx="0.00267433" ixy="-0.00000073" ixz="-0.00017389" iyy="0.00282612" iyz="0.0000004" izz="0.00089624" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/base_link.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.792156862745098 0.819607843137255 0.933333333333333 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/base_link.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <link
+    name="link1">
+    <inertial>
+      <origin xyz="0.000121504734057468 0.000104632162460536 -0.00438597309559853" rpy="0 0 0" />
+      <mass value="0.71" />
+      <inertia ixx="0.00048916" ixy="-0.00000036" ixz="-0.00000224" iyy="0.00040472" iyz="-0.00000242" izz="0.00043982" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link1.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.792156862745098 0.819607843137255 0.933333333333333 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link1.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="joint1"
+    type="revolute">
+    <origin
+      xyz="0 0 0.123"
+      rpy="0 0 0" />
+    <parent
+      link="base_link" />
+    <child
+      link="link1" />
+    <axis
+      xyz="0 0 1" />
+    <limit
+      lower="-2.618"
+      upper="2.168"
+      effort="100"
+      velocity="5" />
+  </joint>
+  <link
+    name="link2">
+    <inertial>
+      <origin xyz="0.198666145229743 -0.010926924140076 0.00142121714502687" rpy="0 0 0" />
+      <mass value="1.17" />
+      <inertia ixx="0.00116918" ixy="-0.00180037" ixz="0.00025146" iyy="0.06785384" iyz="-0.00000455" izz="0.06774489" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link2.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.792156862745098 0.819607843137255 0.933333333333333 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link2.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="joint2"
+    type="revolute">
+    <origin
+      xyz="0 0 0"
+      rpy="1.5708 -0.1359 -3.1416" />
+      <!-- rpy="1.5708 -0.10095-0.03490659 -3.1416" /> -->
+    <parent
+      link="link1" />
+    <child
+      link="link2" />
+    <axis
+      xyz="0 0 1" />
+    <limit
+      lower="0"
+      upper="3.14"
+      effort="100"
+      velocity="5" />
+  </joint>
+  <link
+    name="link3">
+    <inertial>
+      <origin xyz="-0.0202737662122021 -0.133914995944595 -0.000458682652737356" rpy="0 0 0" />
+      <mass value="0.5" />
+      <inertia ixx="0.01361711" ixy="0.00165794" ixz="-0.00000048" iyy="0.00045024" iyz="-0.00000045" izz="0.01380322" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link3.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.792156862745098 0.819607843137255 0.933333333333333 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link3.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="joint3"
+    type="revolute">
+    <origin
+      xyz="0.28503 0 0"
+      rpy="0 0 -1.7939" />
+      <!-- rpy="0 0 -1.759-0.03490659" /> -->
+    <parent
+      link="link2" />
+    <child
+      link="link3" />
+    <axis
+      xyz="0 0 1" />
+    <limit
+      lower="-2.967"
+      upper="0"
+      effort="100"
+      velocity="5" />
+  </joint>
+  <link
+    name="link4">
+    <inertial>
+      <origin xyz="-9.66635791618542E-05 0.000876064475651083 -0.00496880904640868" rpy="0 0 0" />
+      <mass value="0.38" />
+      <inertia ixx="0.00018501" ixy="0.00000054" ixz="0.00000120" iyy="0.00018965" iyz="-0.00000841" izz="0.00015484" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link4.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.792156862745098 0.819607843137255 0.933333333333333 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link4.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="joint4"
+    type="revolute">
+    <origin
+      xyz="-0.021984 -0.25075 0"
+      rpy="1.5708 0 0" />
+    <parent
+      link="link3" />
+    <child
+      link="link4" />
+    <axis
+      xyz="0 0 1" />
+    <limit
+      lower="-1.745"
+      upper="1.745"
+      effort="100"
+      velocity="5" />
+  </joint>
+  <link
+    name="link5">
+    <inertial>
+      <origin xyz="-4.10554118924211E-05 -0.0566486692356075 -0.0037205791677906" rpy="0 0 0" />
+      <mass value="0.383" />
+      <inertia ixx="0.00166169" ixy="0.00000006" ixz="-0.00000007" iyy="0.00018510" iyz="0.00001026" izz="0.00164321" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link5.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.792156862745098 0.819607843137255 0.933333333333333 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link5.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="joint5"
+    type="revolute">
+    <origin
+      xyz="0 0 0"
+      rpy="-1.5708 0 0" />
+    <parent
+      link="link4" />
+    <child
+      link="link5" />
+    <axis
+      xyz="0 0 1" />
+    <limit
+      lower="-1.22"
+      upper="1.22"
+      effort="100"
+      velocity="5" />
+  </joint>
+  <link
+    name="link6">
+    <inertial>
+      <origin xyz="-8.82590762930069E-05 9.0598378529832E-06 -0.002" rpy="0 0 0" />
+      <mass value="0.00699089613564366" />
+      <inertia ixx="5.73015540542155E-07" ixy="-1.98305403089247E-22" ixz="-7.2791893904596E-23" iyy="5.73015540542155E-07" iyz="-3.4146026640245E-24" izz="1.06738869138926E-06" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link6.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.898039215686275 0.917647058823529 0.929411764705882 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link6.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="joint6"
+    type="revolute">
+    <origin
+      xyz="8.8259E-05 -0.091 0"
+      rpy="1.5708 0 0" />
+    <parent
+      link="link5" />
+    <child
+      link="link6" />
+    <axis
+      xyz="0 0 1" />
+    <limit
+      lower="-2.0944"
+      upper="2.0944"
+      effort="100"
+      velocity="3" />
+  </joint>
+  <link
+    name="gripper_base">
+    <inertial>
+      <origin
+        xyz="-0.000183807162235591 8.05033155577911E-05 0.0321436689908876"
+        rpy="0 0 0" />
+      <mass
+        value="0.45" />
+      <inertia
+        ixx="0.00092934"
+        ixy="0.00000034"
+        ixz="-0.00000738"
+        iyy="0.00071447"
+        iyz="0.00000005"
+        izz="0.00039442" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/gripper_base.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.792156862745098 0.819607843137255 0.933333333333333 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/gripper_base.STL" />
+      </geometry>
+    </collision>
+  </link> 
+  <joint
+    name="joint6_to_gripper_base"
+    type="fixed">
+    <origin
+      xyz="0 0 0"
+      rpy="0 0 0" />
+    <parent
+      link="link6" />
+    <child
+      link="gripper_base" />
+  </joint>
+  <link
+    name="link7">
+    <inertial>
+      <origin
+        xyz="0.00065123185041968 -0.0491929869131989 0.00972258769184025"
+        rpy="0 0 0" />
+      <mass
+        value="0.025" />
+      <inertia
+        ixx="0.00007371"
+        ixy="-0.00000113"
+        ixz="0.00000021"
+        iyy="-0.00000113"
+        iyz="-0.00001372"
+        izz="0.0000747" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link7.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.792156862745098 0.819607843137255 0.933333333333333 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link7.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="joint7"
+    type="prismatic">
+    <origin
+      xyz="0 0 0.1358"
+      rpy="1.5708 0 0" />
+    <parent
+      link="gripper_base" />
+    <child
+      link="link7" />
+    <axis
+      xyz="0 0 1" />
+    <limit
+      lower="0"
+      upper="0.035"
+      effort="10"
+      velocity="1" />
+  </joint>
+  <link
+    name="link8">
+    <inertial>
+      <origin
+        xyz="0.000651231850419722 -0.0491929869131991 0.00972258769184024"
+        rpy="0 0 0" />
+      <mass
+        value="0.025" />
+      <inertia
+        ixx="0.00007371"
+        ixy="-0.00000113"
+        ixz="0.00000021"
+        iyy="-0.00000113"
+        iyz="-0.00001372"
+        izz="0.0000747" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link8.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.792156862745098 0.819607843137255 0.933333333333333 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="package://piper_description/meshes/link8.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="joint8"
+    type="prismatic">
+    <origin
+      xyz="0 0 0.1358"
+      rpy="1.5708 0 -3.1416" />
+    <parent
+      link="gripper_base" />
+    <child
+      link="link8" />
+    <axis
+      xyz="0 0 -1" />
+    <limit
+      lower="-0.035"
+      upper="0"
+      effort="10"
+      velocity="1" />
+  </joint>
+</robot>
diff --git a/dimos/hardware/zed_camera.py b/dimos/hardware/zed_camera.py
index b93d2577e6..7ee2aed634 100644
--- a/dimos/hardware/zed_camera.py
+++ b/dimos/hardware/zed_camera.py
@@ -17,6 +17,10 @@
 import open3d as o3d
 from typing import Optional, Tuple, Dict, Any
 import logging
+import time
+import threading
+from reactivex import interval
+from reactivex import operators as ops
 
 try:
     import pyzed.sl as sl
@@ -25,8 +29,17 @@
     logging.warning("ZED SDK not found. Please install pyzed to use ZED camera functionality.")
 
 from dimos.hardware.stereo_camera import StereoCamera
+from dimos.core import Module, Out, rpc
+from dimos.utils.logging_config import setup_logger
 
-logger = logging.getLogger(__name__)
+# Import LCM message types
+from dimos_lcm.sensor_msgs import Image
+from dimos_lcm.sensor_msgs import CameraInfo
+from dimos_lcm.geometry_msgs import PoseStamped
+from dimos_lcm.std_msgs import Header, Time
+from dimos_lcm.geometry_msgs import Pose, Point, Quaternion
+
+logger = setup_logger(__name__)
 
 
 class ZEDCamera(StereoCamera):
@@ -64,6 +77,7 @@ def __init__(
         self.init_params = sl.InitParameters()
         self.init_params.camera_resolution = resolution
         self.init_params.depth_mode = depth_mode
+        self.init_params.coordinate_system = sl.COORDINATE_SYSTEM.RIGHT_HANDED_Z_UP_X_FWD
         self.init_params.coordinate_units = sl.UNIT.METER
         self.init_params.camera_fps = fps
 
@@ -84,6 +98,12 @@ def __init__(
         self.point_cloud = sl.Mat()
         self.confidence_map = sl.Mat()
 
+        # Positional tracking
+        self.tracking_enabled = False
+        self.tracking_params = sl.PositionalTrackingParameters()
+        self.camera_pose = sl.Pose()
+        self.sensors_data = sl.SensorsData()
+
         self.is_opened = False
 
     def open(self) -> bool:
@@ -109,12 +129,160 @@ def open(self) -> bool:
             logger.error(f"Error opening ZED camera: {e}")
             return False
 
-    def close(self):
-        """Close the ZED camera."""
-        if self.is_opened:
-            self.zed.close()
-            self.is_opened = False
-            logger.info("ZED camera closed")
+    def enable_positional_tracking(
+        self,
+        enable_area_memory: bool = False,
+        enable_pose_smoothing: bool = True,
+        enable_imu_fusion: bool = True,
+        set_floor_as_origin: bool = False,
+        initial_world_transform: Optional[sl.Transform] = None,
+    ) -> bool:
+        """
+        Enable positional tracking on the ZED camera.
+
+        Args:
+            enable_area_memory: Enable area learning to correct tracking drift
+            enable_pose_smoothing: Enable pose smoothing
+            enable_imu_fusion: Enable IMU fusion if available
+            set_floor_as_origin: Set the floor as origin (useful for robotics)
+            initial_world_transform: Initial world transform
+
+        Returns:
+            True if tracking enabled successfully
+        """
+        if not self.is_opened:
+            logger.error("ZED camera not opened")
+            return False
+
+        try:
+            # Configure tracking parameters
+            self.tracking_params.enable_area_memory = enable_area_memory
+            self.tracking_params.enable_pose_smoothing = enable_pose_smoothing
+            self.tracking_params.enable_imu_fusion = enable_imu_fusion
+            self.tracking_params.set_floor_as_origin = set_floor_as_origin
+
+            if initial_world_transform is not None:
+                self.tracking_params.initial_world_transform = initial_world_transform
+
+            # Enable tracking
+            err = self.zed.enable_positional_tracking(self.tracking_params)
+            if err != sl.ERROR_CODE.SUCCESS:
+                logger.error(f"Failed to enable positional tracking: {err}")
+                return False
+
+            self.tracking_enabled = True
+            logger.info("Positional tracking enabled successfully")
+            return True
+
+        except Exception as e:
+            logger.error(f"Error enabling positional tracking: {e}")
+            return False
+
+    def disable_positional_tracking(self):
+        """Disable positional tracking."""
+        if self.tracking_enabled:
+            self.zed.disable_positional_tracking()
+            self.tracking_enabled = False
+            logger.info("Positional tracking disabled")
+
+    def get_pose(
+        self, reference_frame: sl.REFERENCE_FRAME = sl.REFERENCE_FRAME.WORLD
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get the current camera pose.
+
+        Args:
+            reference_frame: Reference frame (WORLD or CAMERA)
+
+        Returns:
+            Dictionary containing:
+                - position: [x, y, z] in meters
+                - rotation: [x, y, z, w] quaternion
+                - euler_angles: [roll, pitch, yaw] in radians
+                - timestamp: Pose timestamp in nanoseconds
+                - confidence: Tracking confidence (0-100)
+                - valid: Whether pose is valid
+        """
+        if not self.tracking_enabled:
+            logger.error("Positional tracking not enabled")
+            return None
+
+        try:
+            # Get current pose
+            tracking_state = self.zed.get_position(self.camera_pose, reference_frame)
+
+            if tracking_state == sl.POSITIONAL_TRACKING_STATE.OK:
+                # Extract translation
+                translation = self.camera_pose.get_translation().get()
+
+                # Extract rotation (quaternion)
+                rotation = self.camera_pose.get_orientation().get()
+
+                # Get Euler angles
+                euler = self.camera_pose.get_euler_angles()
+
+                return {
+                    "position": translation.tolist(),
+                    "rotation": rotation.tolist(),  # [x, y, z, w]
+                    "euler_angles": euler.tolist(),  # [roll, pitch, yaw]
+                    "timestamp": self.camera_pose.timestamp.get_nanoseconds(),
+                    "confidence": self.camera_pose.pose_confidence,
+                    "valid": True,
+                    "tracking_state": str(tracking_state),
+                }
+            else:
+                logger.warning(f"Tracking state: {tracking_state}")
+                return {"valid": False, "tracking_state": str(tracking_state)}
+
+        except Exception as e:
+            logger.error(f"Error getting pose: {e}")
+            return None
+
+    def get_imu_data(self) -> Optional[Dict[str, Any]]:
+        """
+        Get IMU sensor data if available.
+
+        Returns:
+            Dictionary containing:
+                - orientation: IMU orientation quaternion [x, y, z, w]
+                - angular_velocity: [x, y, z] in rad/s
+                - linear_acceleration: [x, y, z] in m/s²
+                - timestamp: IMU data timestamp
+        """
+        if not self.is_opened:
+            logger.error("ZED camera not opened")
+            return None
+
+        try:
+            # Get sensors data synchronized with images
+            if (
+                self.zed.get_sensors_data(self.sensors_data, sl.TIME_REFERENCE.IMAGE)
+                == sl.ERROR_CODE.SUCCESS
+            ):
+                imu = self.sensors_data.get_imu_data()
+
+                # Get IMU orientation
+                imu_orientation = imu.get_pose().get_orientation().get()
+
+                # Get angular velocity
+                angular_vel = imu.get_angular_velocity()
+
+                # Get linear acceleration
+                linear_accel = imu.get_linear_acceleration()
+
+                return {
+                    "orientation": imu_orientation.tolist(),
+                    "angular_velocity": angular_vel.tolist(),
+                    "linear_acceleration": linear_accel.tolist(),
+                    "timestamp": self.sensors_data.timestamp.get_nanoseconds(),
+                    "temperature": self.sensors_data.temperature.get(sl.SENSOR_LOCATION.IMU),
+                }
+            else:
+                return None
+
+        except Exception as e:
+            logger.error(f"Error getting IMU data: {e}")
+            return None
 
     def capture_frame(
         self,
@@ -211,6 +379,52 @@ def capture_pointcloud(self) -> Optional[o3d.geometry.PointCloud]:
             logger.error(f"Error capturing point cloud: {e}")
             return None
 
+    def capture_frame_with_pose(
+        self,
+    ) -> Tuple[
+        Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], Optional[Dict[str, Any]]
+    ]:
+        """
+        Capture a frame with synchronized pose data.
+
+        Returns:
+            Tuple of (left_image, right_image, depth_map, pose_data)
+        """
+        if not self.is_opened:
+            logger.error("ZED camera not opened")
+            return None, None, None, None
+
+        try:
+            # Grab frame
+            if self.zed.grab(self.runtime_params) == sl.ERROR_CODE.SUCCESS:
+                # Get images and depth
+                left_img, right_img, depth = self.capture_frame()
+
+                # Get synchronized pose if tracking is enabled
+                pose_data = None
+                if self.tracking_enabled:
+                    pose_data = self.get_pose()
+
+                return left_img, right_img, depth, pose_data
+            else:
+                logger.warning("Failed to grab frame from ZED camera")
+                return None, None, None, None
+
+        except Exception as e:
+            logger.error(f"Error capturing frame with pose: {e}")
+            return None, None, None, None
+
+    def close(self):
+        """Close the ZED camera."""
+        if self.is_opened:
+            # Disable tracking if enabled
+            if self.tracking_enabled:
+                self.disable_positional_tracking()
+
+            self.zed.close()
+            self.is_opened = False
+            logger.info("ZED camera closed")
+
     def get_camera_info(self) -> Dict[str, Any]:
         """Get ZED camera information and calibration parameters."""
         if not self.is_opened:
@@ -311,3 +525,344 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_val, exc_tb):
         """Context manager exit."""
         self.close()
+
+
+class ZEDModule(Module):
+    """
+    Dask module for ZED camera that publishes sensor data via LCM.
+
+    Publishes:
+        - /zed/color_image: RGB camera images
+        - /zed/depth_image: Depth images
+        - /zed/camera_info: Camera calibration information
+        - /zed/pose: Camera pose (if tracking enabled)
+    """
+
+    # Define LCM outputs
+    color_image: Out[Image] = None
+    depth_image: Out[Image] = None
+    camera_info: Out[CameraInfo] = None
+    pose: Out[PoseStamped] = None
+
+    def __init__(
+        self,
+        camera_id: int = 0,
+        resolution: str = "HD720",
+        depth_mode: str = "NEURAL",
+        fps: int = 30,
+        enable_tracking: bool = True,
+        enable_imu_fusion: bool = True,
+        set_floor_as_origin: bool = True,
+        publish_rate: float = 30.0,
+        frame_id: str = "zed_camera",
+        **kwargs,
+    ):
+        """
+        Initialize ZED Module.
+
+        Args:
+            camera_id: Camera ID (0 for first ZED)
+            resolution: Resolution string ("HD720", "HD1080", "HD2K", "VGA")
+            depth_mode: Depth mode string ("NEURAL", "ULTRA", "QUALITY", "PERFORMANCE")
+            fps: Camera frame rate
+            enable_tracking: Enable positional tracking
+            enable_imu_fusion: Enable IMU fusion for tracking
+            set_floor_as_origin: Set floor as origin for tracking
+            publish_rate: Rate to publish messages (Hz)
+            frame_id: TF frame ID for messages
+        """
+        super().__init__(**kwargs)
+
+        self.camera_id = camera_id
+        self.fps = fps
+        self.enable_tracking = enable_tracking
+        self.enable_imu_fusion = enable_imu_fusion
+        self.set_floor_as_origin = set_floor_as_origin
+        self.publish_rate = publish_rate
+        self.frame_id = frame_id
+
+        # Convert string parameters to ZED enums
+        self.resolution = getattr(sl.RESOLUTION, resolution, sl.RESOLUTION.HD720)
+        self.depth_mode = getattr(sl.DEPTH_MODE, depth_mode, sl.DEPTH_MODE.NEURAL)
+
+        # Internal state
+        self.zed_camera = None
+        self._running = False
+        self._subscription = None
+        self._sequence = 0
+
+        logger.info(f"ZEDModule initialized for camera {camera_id}")
+
+    @rpc
+    def start(self):
+        """Start the ZED module and begin publishing data."""
+        if self._running:
+            logger.warning("ZED module already running")
+            return
+
+        try:
+            # Initialize ZED camera
+            self.zed_camera = ZEDCamera(
+                camera_id=self.camera_id,
+                resolution=self.resolution,
+                depth_mode=self.depth_mode,
+                fps=self.fps,
+            )
+
+            # Open camera
+            if not self.zed_camera.open():
+                logger.error("Failed to open ZED camera")
+                return
+
+            # Enable tracking if requested
+            if self.enable_tracking:
+                success = self.zed_camera.enable_positional_tracking(
+                    enable_imu_fusion=self.enable_imu_fusion,
+                    set_floor_as_origin=self.set_floor_as_origin,
+                    enable_pose_smoothing=True,
+                    enable_area_memory=True,
+                )
+                if not success:
+                    logger.warning("Failed to enable positional tracking")
+                    self.enable_tracking = False
+
+            # Publish camera info once at startup
+            self._publish_camera_info()
+
+            # Start periodic frame capture and publishing
+            self._running = True
+            publish_interval = 1.0 / self.publish_rate
+
+            self._subscription = interval(publish_interval).subscribe(
+                lambda _: self._capture_and_publish()
+            )
+
+            logger.info(f"ZED module started, publishing at {self.publish_rate} Hz")
+
+        except Exception as e:
+            logger.error(f"Error starting ZED module: {e}")
+            self._running = False
+
+    @rpc
+    def stop(self):
+        """Stop the ZED module."""
+        if not self._running:
+            return
+
+        self._running = False
+
+        # Stop subscription
+        if self._subscription:
+            self._subscription.dispose()
+            self._subscription = None
+
+        # Close camera
+        if self.zed_camera:
+            self.zed_camera.close()
+            self.zed_camera = None
+
+        logger.info("ZED module stopped")
+
+    def _capture_and_publish(self):
+        """Capture frame and publish all data."""
+        if not self._running or not self.zed_camera:
+            return
+
+        try:
+            # Capture frame with pose
+            left_img, _, depth, pose_data = self.zed_camera.capture_frame_with_pose()
+
+            if left_img is None or depth is None:
+                return
+
+            # Get timestamp
+            timestamp_ns = time.time_ns()
+            timestamp = Time(sec=timestamp_ns // 1_000_000_000, nsec=timestamp_ns % 1_000_000_000)
+
+            # Create header
+            header = Header(seq=self._sequence, stamp=timestamp, frame_id=self.frame_id)
+            self._sequence += 1
+
+            # Publish color image
+            self._publish_color_image(left_img, header)
+
+            # Publish depth image
+            self._publish_depth_image(depth, header)
+
+            # Publish camera info periodically
+            self._publish_camera_info()
+
+            # Publish pose if tracking enabled and valid
+            if self.enable_tracking and pose_data and pose_data.get("valid", False):
+                self._publish_pose(pose_data, header)
+
+        except Exception as e:
+            logger.error(f"Error in capture and publish: {e}")
+
+    def _publish_color_image(self, image: np.ndarray, header: Header):
+        """Publish color image as LCM message."""
+        try:
+            # Convert BGR to RGB if needed
+            if len(image.shape) == 3 and image.shape[2] == 3:
+                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            else:
+                image_rgb = image
+
+            # Create LCM Image message
+            height, width = image_rgb.shape[:2]
+            encoding = "rgb8" if len(image_rgb.shape) == 3 else "mono8"
+            step = width * (3 if len(image_rgb.shape) == 3 else 1)
+            data = image_rgb.tobytes()
+
+            msg = Image(
+                data_length=len(data),
+                header=header,
+                height=height,
+                width=width,
+                encoding=encoding,
+                is_bigendian=0,
+                step=step,
+                data=data,
+            )
+
+            self.color_image.publish(msg)
+
+        except Exception as e:
+            logger.error(f"Error publishing color image: {e}")
+
+    def _publish_depth_image(self, depth: np.ndarray, header: Header):
+        """Publish depth image as LCM message."""
+        try:
+            # Depth is float32 in meters
+            height, width = depth.shape[:2]
+            encoding = "32FC1"  # 32-bit float, single channel
+            step = width * 4  # 4 bytes per float
+            data = depth.astype(np.float32).tobytes()
+
+            msg = Image(
+                data_length=len(data),
+                header=header,
+                height=height,
+                width=width,
+                encoding=encoding,
+                is_bigendian=0,
+                step=step,
+                data=data,
+            )
+
+            self.depth_image.publish(msg)
+
+        except Exception as e:
+            logger.error(f"Error publishing depth image: {e}")
+
+    def _publish_camera_info(self):
+        """Publish camera calibration information."""
+        try:
+            info = self.zed_camera.get_camera_info()
+            if not info:
+                return
+
+            # Get calibration parameters
+            left_cam = info.get("left_cam", {})
+            resolution = info.get("resolution", {})
+
+            # Create CameraInfo message
+            header = Header(seq=0, stamp=Time(sec=int(time.time()), nsec=0), frame_id=self.frame_id)
+
+            # Create camera matrix K (3x3)
+            K = [
+                left_cam.get("fx", 0),
+                0,
+                left_cam.get("cx", 0),
+                0,
+                left_cam.get("fy", 0),
+                left_cam.get("cy", 0),
+                0,
+                0,
+                1,
+            ]
+
+            # Distortion coefficients
+            D = [
+                left_cam.get("k1", 0),
+                left_cam.get("k2", 0),
+                left_cam.get("p1", 0),
+                left_cam.get("p2", 0),
+                left_cam.get("k3", 0),
+            ]
+
+            # Identity rotation matrix
+            R = [1, 0, 0, 0, 1, 0, 0, 0, 1]
+
+            # Projection matrix P (3x4)
+            P = [
+                left_cam.get("fx", 0),
+                0,
+                left_cam.get("cx", 0),
+                0,
+                0,
+                left_cam.get("fy", 0),
+                left_cam.get("cy", 0),
+                0,
+                0,
+                0,
+                1,
+                0,
+            ]
+
+            msg = CameraInfo(
+                D_length=len(D),
+                header=header,
+                height=resolution.get("height", 0),
+                width=resolution.get("width", 0),
+                distortion_model="plumb_bob",
+                D=D,
+                K=K,
+                R=R,
+                P=P,
+                binning_x=0,
+                binning_y=0,
+            )
+
+            self.camera_info.publish(msg)
+
+        except Exception as e:
+            logger.error(f"Error publishing camera info: {e}")
+
+    def _publish_pose(self, pose_data: Dict[str, Any], header: Header):
+        """Publish camera pose as PoseStamped message."""
+        try:
+            position = pose_data.get("position", [0, 0, 0])
+            rotation = pose_data.get("rotation", [0, 0, 0, 1])  # quaternion [x,y,z,w]
+
+            # Create Pose message
+            pose = Pose(
+                position=Point(x=position[0], y=position[1], z=position[2]),
+                orientation=Quaternion(x=rotation[0], y=rotation[1], z=rotation[2], w=rotation[3]),
+            )
+
+            # Create PoseStamped message
+            msg = PoseStamped(header=header, pose=pose)
+
+            self.pose.publish(msg)
+
+        except Exception as e:
+            logger.error(f"Error publishing pose: {e}")
+
+    @rpc
+    def get_camera_info(self) -> Dict[str, Any]:
+        """Get camera information and calibration parameters."""
+        if self.zed_camera:
+            return self.zed_camera.get_camera_info()
+        return {}
+
+    @rpc
+    def get_pose(self) -> Optional[Dict[str, Any]]:
+        """Get current camera pose if tracking is enabled."""
+        if self.zed_camera and self.enable_tracking:
+            return self.zed_camera.get_pose()
+        return None
+
+    def cleanup(self):
+        """Clean up resources on module destruction."""
+        self.stop()
diff --git a/dimos/perception/manip_aio_pipeline.py b/dimos/manipulation/manip_aio_pipeline.py
similarity index 100%
rename from dimos/perception/manip_aio_pipeline.py
rename to dimos/manipulation/manip_aio_pipeline.py
diff --git a/dimos/perception/manip_aio_processer.py b/dimos/manipulation/manip_aio_processer.py
similarity index 100%
rename from dimos/perception/manip_aio_processer.py
rename to dimos/manipulation/manip_aio_processer.py
diff --git a/dimos/manipulation/visual_servoing/detection3d.py b/dimos/manipulation/visual_servoing/detection3d.py
new file mode 100644
index 0000000000..887fd023ab
--- /dev/null
+++ b/dimos/manipulation/visual_servoing/detection3d.py
@@ -0,0 +1,300 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Real-time 3D object detection processor that extracts object poses from RGB-D data.
+"""
+
+from typing import List, Optional, Tuple
+import numpy as np
+import cv2
+
+from dimos.utils.logging_config import setup_logger
+from dimos.perception.segmentation.sam_2d_seg import Sam2DSegmenter
+from dimos.perception.pointcloud.utils import extract_centroids_from_masks
+from dimos.perception.detection2d.utils import calculate_object_size_from_bbox
+from dimos.perception.common.utils import bbox2d_to_corners
+
+from dimos_lcm.geometry_msgs import Pose, Vector3, Quaternion, Point
+from dimos_lcm.vision_msgs import (
+    Detection3D,
+    Detection3DArray,
+    BoundingBox3D,
+    ObjectHypothesisWithPose,
+    ObjectHypothesis,
+    Detection2D,
+    Detection2DArray,
+    BoundingBox2D,
+    Pose2D,
+    Point2D,
+)
+from dimos_lcm.std_msgs import Header
+from dimos.manipulation.visual_servoing.utils import (
+    estimate_object_depth,
+    visualize_detections_3d,
+    transform_pose,
+)
+
+logger = setup_logger("dimos.manipulation.visual_servoing.detection3d")
+
+
+class Detection3DProcessor:
+    """
+    Real-time 3D detection processor optimized for speed.
+
+    Uses Sam (FastSAM) for segmentation and mask generation, then extracts
+    3D centroids from depth data.
+    """
+
+    def __init__(
+        self,
+        camera_intrinsics: List[float],  # [fx, fy, cx, cy]
+        min_confidence: float = 0.6,
+        min_points: int = 30,
+        max_depth: float = 1.0,
+        max_object_size: float = 0.15,
+    ):
+        """
+        Initialize the real-time 3D detection processor.
+
+        Args:
+            camera_intrinsics: [fx, fy, cx, cy] camera parameters
+            min_confidence: Minimum detection confidence threshold
+            min_points: Minimum 3D points required for valid detection
+            max_depth: Maximum valid depth in meters
+        """
+        self.camera_intrinsics = camera_intrinsics
+        self.min_points = min_points
+        self.max_depth = max_depth
+        self.max_object_size = max_object_size
+
+        # Initialize Sam segmenter with tracking enabled but analysis disabled
+        self.detector = Sam2DSegmenter(
+            use_tracker=False,
+            use_analyzer=False,
+            use_filtering=True,
+        )
+
+        self.min_confidence = min_confidence
+
+        logger.info(
+            f"Initialized Detection3DProcessor with Sam segmenter, confidence={min_confidence}, "
+            f"min_points={min_points}, max_depth={max_depth}m, max_object_size={max_object_size}m"
+        )
+
+    def process_frame(
+        self, rgb_image: np.ndarray, depth_image: np.ndarray, transform: Optional[np.ndarray] = None
+    ) -> Tuple[Detection3DArray, Detection2DArray]:
+        """
+        Process a single RGB-D frame to extract 3D object detections.
+
+        Args:
+            rgb_image: RGB image (H, W, 3)
+            depth_image: Depth image (H, W) in meters
+            transform: Optional 4x4 transformation matrix to transform objects from camera frame to desired frame
+
+        Returns:
+            Tuple of (Detection3DArray, Detection2DArray) with 3D and 2D information
+        """
+
+        # Convert RGB to BGR for Sam (OpenCV format)
+        bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
+
+        # Run Sam segmentation with tracking
+        masks, bboxes, track_ids, probs, names = self.detector.process_image(bgr_image)
+
+        if not masks or len(masks) == 0:
+            return Detection3DArray(
+                detections_length=0, header=Header(), detections=[]
+            ), Detection2DArray(detections_length=0, header=Header(), detections=[])
+
+        # Convert CUDA tensors to numpy arrays if needed
+        numpy_masks = []
+        for mask in masks:
+            if hasattr(mask, "cpu"):  # PyTorch tensor
+                numpy_masks.append(mask.cpu().numpy())
+            else:  # Already numpy array
+                numpy_masks.append(mask)
+
+        # Extract 3D centroids from masks
+        poses = extract_centroids_from_masks(
+            rgb_image=rgb_image,
+            depth_image=depth_image,
+            masks=numpy_masks,
+            camera_intrinsics=self.camera_intrinsics,
+        )
+
+        detections_3d = []
+        detections_2d = []
+        pose_dict = {p["mask_idx"]: p for p in poses if p["centroid"][2] < self.max_depth}
+
+        for i, (bbox, name, prob, track_id) in enumerate(zip(bboxes, names, probs, track_ids)):
+            if i not in pose_dict:
+                continue
+
+            pose = pose_dict[i]
+            obj_cam_pos = pose["centroid"]
+
+            if obj_cam_pos[2] > self.max_depth:
+                continue
+
+            # Calculate object size from bbox and depth
+            width_m, height_m = calculate_object_size_from_bbox(
+                bbox, obj_cam_pos[2], self.camera_intrinsics
+            )
+
+            # Calculate depth dimension using segmentation mask
+            depth_m = estimate_object_depth(
+                depth_image, numpy_masks[i] if i < len(numpy_masks) else None, bbox
+            )
+
+            size_x = max(width_m, 0.01)  # Minimum 1cm width
+            size_y = max(height_m, 0.01)  # Minimum 1cm height
+            size_z = max(depth_m, 0.01)  # Minimum 1cm depth
+
+            if min(size_x, size_y, size_z) > self.max_object_size:
+                continue
+
+            # Transform to desired frame if transform matrix is provided
+            if transform is not None:
+                # Get orientation as euler angles, default to no rotation if not available
+                obj_cam_orientation = pose.get(
+                    "rotation", np.array([0.0, 0.0, 0.0])
+                )  # Default to no rotation
+                transformed_pose = transform_pose(
+                    obj_cam_pos, obj_cam_orientation, transform, to_robot=True
+                )
+                center_pose = transformed_pose
+            else:
+                # If no transform, use camera coordinates
+                center_pose = Pose(
+                    Point(obj_cam_pos[0], obj_cam_pos[1], obj_cam_pos[2]),
+                    Quaternion(0.0, 0.0, 0.0, 1.0),  # Default orientation
+                )
+
+            # Create Detection3D object
+            detection = Detection3D(
+                results_length=1,
+                header=Header(),  # Empty header
+                results=[
+                    ObjectHypothesisWithPose(
+                        hypothesis=ObjectHypothesis(class_id=name, score=float(prob))
+                    )
+                ],
+                bbox=BoundingBox3D(center=center_pose, size=Vector3(size_x, size_y, size_z)),
+                id=str(track_id),
+            )
+
+            detections_3d.append(detection)
+
+            # Create corresponding Detection2D
+            x1, y1, x2, y2 = bbox
+            center_x = (x1 + x2) / 2.0
+            center_y = (y1 + y2) / 2.0
+            width = x2 - x1
+            height = y2 - y1
+
+            detection_2d = Detection2D(
+                results_length=1,
+                header=Header(),
+                results=[
+                    ObjectHypothesisWithPose(
+                        hypothesis=ObjectHypothesis(class_id=name, score=float(prob))
+                    )
+                ],
+                bbox=BoundingBox2D(
+                    center=Pose2D(position=Point2D(center_x, center_y), theta=0.0),
+                    size_x=float(width),
+                    size_y=float(height),
+                ),
+                id=str(track_id),
+            )
+            detections_2d.append(detection_2d)
+
+        # Create and return both arrays
+        return (
+            Detection3DArray(
+                detections_length=len(detections_3d), header=Header(), detections=detections_3d
+            ),
+            Detection2DArray(
+                detections_length=len(detections_2d), header=Header(), detections=detections_2d
+            ),
+        )
+
+    def visualize_detections(
+        self,
+        rgb_image: np.ndarray,
+        detections_3d: List[Detection3D],
+        detections_2d: List[Detection2D],
+        show_coordinates: bool = True,
+    ) -> np.ndarray:
+        """
+        Visualize detections with 3D position overlay next to bounding boxes.
+
+        Args:
+            rgb_image: Original RGB image
+            detections_3d: List of Detection3D objects
+            detections_2d: List of Detection2D objects (must be 1:1 correspondence)
+            show_coordinates: Whether to show 3D coordinates
+
+        Returns:
+            Visualization image
+        """
+        # Extract 2D bboxes from Detection2D objects
+
+        bboxes_2d = []
+        for det_2d in detections_2d:
+            if det_2d.bbox:
+                x1, y1, x2, y2 = bbox2d_to_corners(det_2d.bbox)
+                bboxes_2d.append([x1, y1, x2, y2])
+
+        return visualize_detections_3d(rgb_image, detections_3d, show_coordinates, bboxes_2d)
+
+    def get_closest_detection(
+        self, detections: List[Detection3D], class_filter: Optional[str] = None
+    ) -> Optional[Detection3D]:
+        """
+        Get the closest detection with valid 3D data.
+
+        Args:
+            detections: List of Detection3D objects
+            class_filter: Optional class name to filter by
+
+        Returns:
+            Closest Detection3D or None
+        """
+        valid_detections = []
+        for d in detections:
+            # Check if has valid bbox center position
+            if d.bbox and d.bbox.center and d.bbox.center.position:
+                # Check class filter if specified
+                if class_filter is None or (
+                    d.results_length > 0 and d.results[0].hypothesis.class_id == class_filter
+                ):
+                    valid_detections.append(d)
+
+        if not valid_detections:
+            return None
+
+        # Sort by depth (Z coordinate)
+        def get_z_coord(d):
+            return abs(d.bbox.center.position.z)
+
+        return min(valid_detections, key=get_z_coord)
+
+    def cleanup(self):
+        """Clean up resources."""
+        if hasattr(self.detector, "cleanup"):
+            self.detector.cleanup()
+        logger.info("Detection3DProcessor cleaned up")
diff --git a/dimos/manipulation/visual_servoing/manipulation_module.py b/dimos/manipulation/visual_servoing/manipulation_module.py
new file mode 100644
index 0000000000..b2724fb59a
--- /dev/null
+++ b/dimos/manipulation/visual_servoing/manipulation_module.py
@@ -0,0 +1,961 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Manipulation module for robotic grasping with visual servoing.
+Handles grasping logic, state machine, and hardware coordination as a Dimos module.
+"""
+
+import cv2
+import time
+import threading
+from typing import Optional, Tuple, Any, Dict
+from enum import Enum
+from collections import deque
+
+import numpy as np
+
+from dimos.core import Module, In, Out, rpc
+from dimos_lcm.sensor_msgs import Image, CameraInfo
+from dimos_lcm.geometry_msgs import Vector3, Pose, Point, Quaternion
+from dimos_lcm.vision_msgs import Detection3DArray, Detection2DArray
+
+from dimos.hardware.piper_arm import PiperArm
+from dimos.manipulation.visual_servoing.detection3d import Detection3DProcessor
+from dimos.manipulation.visual_servoing.pbvs import PBVS
+from dimos.perception.common.utils import find_clicked_detection
+from dimos.manipulation.visual_servoing.utils import (
+    create_manipulation_visualization,
+    select_points_from_depth,
+    transform_points_3d,
+    update_target_grasp_pose,
+    apply_grasp_distance,
+    is_target_reached,
+)
+from dimos.utils.transform_utils import (
+    pose_to_matrix,
+    matrix_to_pose,
+    create_transform_from_6dof,
+    compose_transforms,
+)
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger("dimos.manipulation.visual_servoing.manipulation_module")
+
+
+class GraspStage(Enum):
+    """Enum for different grasp stages."""
+
+    IDLE = "idle"
+    PRE_GRASP = "pre_grasp"
+    GRASP = "grasp"
+    CLOSE_AND_RETRACT = "close_and_retract"
+    PLACE = "place"
+    RETRACT = "retract"
+
+
+class Feedback:
+    """Feedback data containing state information about the manipulation process."""
+
+    def __init__(
+        self,
+        grasp_stage: GraspStage,
+        target_tracked: bool,
+        current_executed_pose: Optional[Pose] = None,
+        current_ee_pose: Optional[Pose] = None,
+        current_camera_pose: Optional[Pose] = None,
+        target_pose: Optional[Pose] = None,
+        waiting_for_reach: bool = False,
+        success: Optional[bool] = None,
+    ):
+        self.grasp_stage = grasp_stage
+        self.target_tracked = target_tracked
+        self.current_executed_pose = current_executed_pose
+        self.current_ee_pose = current_ee_pose
+        self.current_camera_pose = current_camera_pose
+        self.target_pose = target_pose
+        self.waiting_for_reach = waiting_for_reach
+        self.success = success
+
+
+class ManipulationModule(Module):
+    """
+    Manipulation module for visual servoing and grasping.
+
+    Subscribes to:
+        - ZED RGB images
+        - ZED depth images
+        - ZED camera info
+
+    Publishes:
+        - Visualization images
+
+    RPC methods:
+        - handle_keyboard_command: Process keyboard input
+        - pick_and_place: Execute pick and place task
+    """
+
+    # LCM inputs
+    rgb_image: In[Image] = None
+    depth_image: In[Image] = None
+    camera_info: In[CameraInfo] = None
+
+    # LCM outputs
+    viz_image: Out[Image] = None
+
+    def __init__(
+        self,
+        ee_to_camera_6dof: Optional[list] = None,
+        **kwargs,
+    ):
+        """
+        Initialize manipulation module.
+
+        Args:
+            ee_to_camera_6dof: EE to camera transform [x, y, z, rx, ry, rz] in meters and radians
+            workspace_min_radius: Minimum workspace radius in meters
+            workspace_max_radius: Maximum workspace radius in meters
+            min_grasp_pitch_degrees: Minimum grasp pitch angle (at max radius)
+            max_grasp_pitch_degrees: Maximum grasp pitch angle (at min radius)
+        """
+        super().__init__(**kwargs)
+
+        self.arm = PiperArm()
+
+        if ee_to_camera_6dof is None:
+            ee_to_camera_6dof = [-0.065, 0.03, -0.095, 0.0, -1.57, 0.0]
+        pos = Vector3(ee_to_camera_6dof[0], ee_to_camera_6dof[1], ee_to_camera_6dof[2])
+        rot = Vector3(ee_to_camera_6dof[3], ee_to_camera_6dof[4], ee_to_camera_6dof[5])
+        self.T_ee_to_camera = create_transform_from_6dof(pos, rot)
+
+        self.camera_intrinsics = None
+        self.detector = None
+        self.pbvs = None
+
+        # Control state
+        self.last_valid_target = None
+        self.waiting_for_reach = False
+        self.current_executed_pose = None  # Track the actual pose sent to arm
+        self.target_updated = False
+        self.waiting_start_time = None
+        self.reach_pose_timeout = 20.0
+
+        # Grasp parameters
+        self.grasp_width_offset = 0.03
+        self.pregrasp_distance = 0.25
+        self.grasp_distance_range = 0.03
+        self.grasp_close_delay = 2.0
+        self.grasp_reached_time = None
+        self.gripper_max_opening = 0.07
+
+        # Workspace limits and dynamic pitch parameters
+        self.workspace_min_radius = 0.2
+        self.workspace_max_radius = 0.75
+        self.min_grasp_pitch_degrees = 5.0
+        self.max_grasp_pitch_degrees = 60.0
+
+        # Grasp stage tracking
+        self.grasp_stage = GraspStage.IDLE
+
+        # Pose stabilization tracking
+        self.pose_history_size = 4
+        self.pose_stabilization_threshold = 0.01
+        self.stabilization_timeout = 25.0
+        self.stabilization_start_time = None
+        self.reached_poses = deque(maxlen=self.pose_history_size)
+        self.adjustment_count = 0
+
+        # Pose reachability tracking
+        self.ee_pose_history = deque(maxlen=20)  # Keep history of EE poses
+        self.stuck_pose_threshold = 0.001  # 1mm movement threshold
+        self.stuck_pose_adjustment_degrees = 5.0
+        self.stuck_count = 0
+        self.max_stuck_reattempts = 7
+
+        # State for visualization
+        self.current_visualization = None
+        self.last_detection_3d_array = None
+        self.last_detection_2d_array = None
+
+        # Grasp result and task tracking
+        self.pick_success = None
+        self.final_pregrasp_pose = None
+        self.task_failed = False
+        self.overall_success = None
+
+        # Task control
+        self.task_running = False
+        self.task_thread = None
+        self.stop_event = threading.Event()
+
+        # Latest sensor data
+        self.latest_rgb = None
+        self.latest_depth = None
+        self.latest_camera_info = None
+
+        # Target selection
+        self.target_click = None
+
+        # Place target position and object info
+        self.home_pose = Pose(Point(0.0, 0.0, 0.0), Quaternion(0.0, 0.0, 0.0, 1.0))
+        self.place_target_position = None
+        self.target_object_height = None
+        self.retract_distance = 0.12
+        self.place_pose = None
+        self.retract_pose = None
+        self.arm.gotoObserve()
+
+    @rpc
+    def start(self):
+        """Start the manipulation module."""
+        # Subscribe to camera data
+        self.rgb_image.subscribe(self._on_rgb_image)
+        self.depth_image.subscribe(self._on_depth_image)
+        self.camera_info.subscribe(self._on_camera_info)
+
+        logger.info("Manipulation module started")
+
+    @rpc
+    def stop(self):
+        """Stop the manipulation module."""
+        # Stop any running task
+        self.stop_event.set()
+        if self.task_thread and self.task_thread.is_alive():
+            self.task_thread.join(timeout=5.0)
+
+        self.reset_to_idle()
+        logger.info("Manipulation module stopped")
+
+    def _on_rgb_image(self, msg: Image):
+        """Handle RGB image messages."""
+        try:
+            data = np.frombuffer(msg.data, dtype=np.uint8)
+            if msg.encoding == "rgb8":
+                self.latest_rgb = data.reshape((msg.height, msg.width, 3))
+            else:
+                logger.warning(f"Unsupported RGB encoding: {msg.encoding}")
+        except Exception as e:
+            logger.error(f"Error processing RGB image: {e}")
+
+    def _on_depth_image(self, msg: Image):
+        """Handle depth image messages."""
+        try:
+            if msg.encoding == "32FC1":
+                data = np.frombuffer(msg.data, dtype=np.float32)
+                self.latest_depth = data.reshape((msg.height, msg.width))
+            else:
+                logger.warning(f"Unsupported depth encoding: {msg.encoding}")
+        except Exception as e:
+            logger.error(f"Error processing depth image: {e}")
+
+    def _on_camera_info(self, msg: CameraInfo):
+        """Handle camera info messages."""
+        try:
+            self.camera_intrinsics = [msg.K[0], msg.K[4], msg.K[2], msg.K[5]]
+
+            if self.detector is None:
+                self.detector = Detection3DProcessor(self.camera_intrinsics)
+                self.pbvs = PBVS()
+                logger.info("Initialized detection and PBVS processors")
+
+            self.latest_camera_info = msg
+        except Exception as e:
+            logger.error(f"Error processing camera info: {e}")
+
+    @rpc
+    def get_single_rgb_frame(self) -> Optional[np.ndarray]:
+        """
+        get the latest rgb frame from the camera
+        """
+        return self.latest_rgb
+
+    @rpc
+    def handle_keyboard_command(self, key: str) -> str:
+        """
+        Handle keyboard commands for robot control.
+
+        Args:
+            key: Keyboard key as string
+
+        Returns:
+            Action taken as string, or empty string if no action
+        """
+        key_code = ord(key) if len(key) == 1 else int(key)
+
+        if key_code == ord("r"):
+            self.stop_event.set()
+            self.task_running = False
+            self.reset_to_idle()
+            return "reset"
+        elif key_code == ord("s"):
+            logger.info("SOFT STOP - Emergency stopping robot!")
+            self.arm.softStop()
+            self.stop_event.set()
+            self.task_running = False
+            return "stop"
+        elif key_code == ord(" ") and self.pbvs and self.pbvs.target_grasp_pose:
+            if self.grasp_stage == GraspStage.PRE_GRASP:
+                self.set_grasp_stage(GraspStage.GRASP)
+            logger.info("Executing target pose")
+            return "execute"
+        elif key_code == ord("g"):
+            logger.info("Opening gripper")
+            self.arm.release_gripper()
+            return "release"
+
+        return ""
+
+    @rpc
+    def pick_and_place(
+        self, target_x: int = None, target_y: int = None, place_x: int = None, place_y: int = None
+    ) -> Dict[str, Any]:
+        """
+        Start a pick and place task.
+
+        Args:
+            target_x: Optional X coordinate of target object
+            target_y: Optional Y coordinate of target object
+            place_x: Optional X coordinate of place location
+            place_y: Optional Y coordinate of place location
+
+        Returns:
+            Dict with status and message
+        """
+        if self.task_running:
+            return {"status": "error", "message": "Task already running"}
+
+        if self.camera_intrinsics is None:
+            return {"status": "error", "message": "Camera not initialized"}
+
+        if target_x is not None and target_y is not None:
+            self.target_click = (target_x, target_y)
+        if place_x is not None and self.latest_depth is not None:
+            points_3d_camera = select_points_from_depth(
+                self.latest_depth,
+                (place_x, place_y),
+                self.camera_intrinsics,
+                radius=10,
+            )
+
+            if points_3d_camera.size > 0:
+                ee_pose = self.arm.get_ee_pose()
+                ee_transform = pose_to_matrix(ee_pose)
+                camera_transform = compose_transforms(ee_transform, self.T_ee_to_camera)
+
+                points_3d_world = transform_points_3d(
+                    points_3d_camera,
+                    camera_transform,
+                    to_robot=True,
+                )
+
+                place_position = np.mean(points_3d_world, axis=0)
+                self.place_target_position = place_position
+                logger.info(
+                    f"Place target set at position: ({place_position[0]:.3f}, {place_position[1]:.3f}, {place_position[2]:.3f})"
+                )
+            else:
+                logger.warning("No valid depth points found at place location")
+                self.place_target_position = None
+        else:
+            self.place_target_position = None
+
+        self.task_failed = False
+        self.stop_event.clear()
+
+        if self.task_thread and self.task_thread.is_alive():
+            self.stop_event.set()
+            self.task_thread.join(timeout=1.0)
+        self.task_thread = threading.Thread(target=self._run_pick_and_place, daemon=True)
+        self.task_thread.start()
+
+        return {"status": "started", "message": "Pick and place task started"}
+
+    def _run_pick_and_place(self):
+        """Run the pick and place task loop."""
+        self.task_running = True
+        logger.info("Starting pick and place task")
+
+        try:
+            while not self.stop_event.is_set():
+                if self.task_failed:
+                    logger.error("Task failed, terminating pick and place")
+                    self.stop_event.set()
+                    break
+
+                feedback = self.update()
+                if feedback is None:
+                    time.sleep(0.01)
+                    continue
+
+                if feedback.success is not None:
+                    if feedback.success:
+                        logger.info("Pick and place completed successfully!")
+                    else:
+                        logger.warning("Pick and place failed")
+                    self.reset_to_idle()
+                    self.stop_event.set()
+                    break
+
+                time.sleep(0.01)
+
+        except Exception as e:
+            logger.error(f"Error in pick and place task: {e}")
+            self.task_failed = True
+        finally:
+            self.task_running = False
+            logger.info("Pick and place task ended")
+
+    def set_grasp_stage(self, stage: GraspStage):
+        """Set the grasp stage."""
+        self.grasp_stage = stage
+        logger.info(f"Grasp stage: {stage.value}")
+
+    def calculate_dynamic_grasp_pitch(self, target_pose: Pose) -> float:
+        """
+        Calculate grasp pitch dynamically based on distance from robot base.
+        Maps workspace radius to grasp pitch angle.
+
+        Args:
+            target_pose: Target pose
+
+        Returns:
+            Grasp pitch angle in degrees
+        """
+        # Calculate 3D distance from robot base (assumes robot at origin)
+        position = target_pose.position
+        distance = np.sqrt(position.x**2 + position.y**2 + position.z**2)
+
+        # Clamp distance to workspace limits
+        distance = np.clip(distance, self.workspace_min_radius, self.workspace_max_radius)
+
+        # Linear interpolation: min_radius -> max_pitch, max_radius -> min_pitch
+        # Normalized distance (0 to 1)
+        normalized_dist = (distance - self.workspace_min_radius) / (
+            self.workspace_max_radius - self.workspace_min_radius
+        )
+
+        # Inverse mapping: closer objects need higher pitch
+        pitch_degrees = self.max_grasp_pitch_degrees - (
+            normalized_dist * (self.max_grasp_pitch_degrees - self.min_grasp_pitch_degrees)
+        )
+
+        return pitch_degrees
+
+    def check_within_workspace(self, target_pose: Pose) -> bool:
+        """
+        Check if pose is within workspace limits and log error if not.
+
+        Args:
+            target_pose: Target pose to validate
+
+        Returns:
+            True if within workspace, False otherwise
+        """
+        # Calculate 3D distance from robot base
+        position = target_pose.position
+        distance = np.sqrt(position.x**2 + position.y**2 + position.z**2)
+
+        if not (self.workspace_min_radius <= distance <= self.workspace_max_radius):
+            logger.error(
+                f"Target outside workspace limits: distance {distance:.3f}m not in [{self.workspace_min_radius:.2f}, {self.workspace_max_radius:.2f}]"
+            )
+            return False
+
+        return True
+
+    def _check_reach_timeout(self) -> Tuple[bool, float]:
+        """Check if robot has exceeded timeout while reaching pose.
+
+        Returns:
+            Tuple of (timed_out, time_elapsed)
+        """
+        if self.waiting_start_time:
+            time_elapsed = time.time() - self.waiting_start_time
+            if time_elapsed > self.reach_pose_timeout:
+                logger.warning(
+                    f"Robot failed to reach pose within {self.reach_pose_timeout}s timeout"
+                )
+                self.task_failed = True
+                self.reset_to_idle()
+                return True, time_elapsed
+            return False, time_elapsed
+        return False, 0.0
+
+    def _check_if_stuck(self) -> bool:
+        """
+        Check if robot is stuck by analyzing pose history.
+
+        Returns:
+            Tuple of (is_stuck, max_std_dev_mm)
+        """
+        if len(self.ee_pose_history) < self.ee_pose_history.maxlen:
+            return False
+
+        # Extract positions from pose history
+        positions = np.array(
+            [[p.position.x, p.position.y, p.position.z] for p in self.ee_pose_history]
+        )
+
+        # Calculate standard deviation of positions
+        std_devs = np.std(positions, axis=0)
+        # Check if all standard deviations are below stuck threshold
+        is_stuck = np.all(std_devs < self.stuck_pose_threshold)
+
+        return is_stuck
+
+    def check_reach_and_adjust(self) -> bool:
+        """
+        Check if robot has reached the current executed pose while waiting.
+        Handles timeout internally by failing the task.
+        Also detects if the robot is stuck (not moving towards target).
+
+        Returns:
+            True if reached, False if still waiting or not in waiting state
+        """
+        if not self.waiting_for_reach or not self.current_executed_pose:
+            return False
+
+        # Get current end-effector pose
+        ee_pose = self.arm.get_ee_pose()
+        target_pose = self.current_executed_pose
+
+        # Check for timeout - this will fail task and reset if timeout occurred
+        timed_out, time_elapsed = self._check_reach_timeout()
+        if timed_out:
+            return False
+
+        self.ee_pose_history.append(ee_pose)
+
+        # Check if robot is stuck
+        is_stuck = self._check_if_stuck()
+        if is_stuck:
+            if self.grasp_stage == GraspStage.RETRACT or self.grasp_stage == GraspStage.PLACE:
+                self.waiting_for_reach = False
+                self.waiting_start_time = None
+                self.stuck_count = 0
+                self.ee_pose_history.clear()
+                return True
+            self.stuck_count += 1
+            pitch_degrees = self.calculate_dynamic_grasp_pitch(target_pose)
+            if self.stuck_count % 2 == 0:
+                pitch_degrees += self.stuck_pose_adjustment_degrees * (1 + self.stuck_count // 2)
+            else:
+                pitch_degrees -= self.stuck_pose_adjustment_degrees * (1 + self.stuck_count // 2)
+
+            pitch_degrees = max(
+                self.min_grasp_pitch_degrees, min(self.max_grasp_pitch_degrees, pitch_degrees)
+            )
+            updated_target_pose = update_target_grasp_pose(target_pose, ee_pose, 0.0, pitch_degrees)
+            self.arm.cmd_ee_pose(updated_target_pose)
+            self.current_executed_pose = updated_target_pose
+            self.ee_pose_history.clear()
+            self.waiting_for_reach = True
+            self.waiting_start_time = time.time()
+            return False
+
+        if self.stuck_count >= self.max_stuck_reattempts:
+            self.task_failed = True
+            self.reset_to_idle()
+            return False
+
+        if is_target_reached(target_pose, ee_pose, self.pbvs.target_tolerance):
+            self.waiting_for_reach = False
+            self.waiting_start_time = None
+            self.stuck_count = 0
+            self.ee_pose_history.clear()
+            return True
+        return False
+
+    def _update_tracking(self, detection_3d_array: Optional[Detection3DArray]) -> bool:
+        """Update tracking with new detections."""
+        if not detection_3d_array or not self.pbvs:
+            return False
+
+        target_tracked = self.pbvs.update_tracking(detection_3d_array)
+        if target_tracked:
+            self.target_updated = True
+            self.last_valid_target = self.pbvs.get_current_target()
+        return target_tracked
+
+    def reset_to_idle(self):
+        """Reset the manipulation system to IDLE state."""
+        if self.pbvs:
+            self.pbvs.clear_target()
+        self.grasp_stage = GraspStage.IDLE
+        self.reached_poses.clear()
+        self.ee_pose_history.clear()
+        self.adjustment_count = 0
+        self.waiting_for_reach = False
+        self.current_executed_pose = None
+        self.target_updated = False
+        self.stabilization_start_time = None
+        self.grasp_reached_time = None
+        self.waiting_start_time = None
+        self.pick_success = None
+        self.final_pregrasp_pose = None
+        self.overall_success = None
+        self.place_pose = None
+        self.retract_pose = None
+        self.stuck_count = 0
+
+        self.arm.gotoObserve()
+
+    def execute_idle(self):
+        """Execute idle stage."""
+        pass
+
+    def execute_pre_grasp(self):
+        """Execute pre-grasp stage: visual servoing to pre-grasp position."""
+        if self.waiting_for_reach:
+            if self.check_reach_and_adjust():
+                self.reached_poses.append(self.current_executed_pose)
+                self.target_updated = False
+                time.sleep(0.2)
+            return
+        if (
+            self.stabilization_start_time
+            and (time.time() - self.stabilization_start_time) > self.stabilization_timeout
+        ):
+            logger.warning(
+                f"Failed to get stable grasp after {self.stabilization_timeout} seconds, resetting"
+            )
+            self.task_failed = True
+            self.reset_to_idle()
+            return
+
+        ee_pose = self.arm.get_ee_pose()
+        dynamic_pitch = self.calculate_dynamic_grasp_pitch(self.pbvs.current_target.bbox.center)
+
+        _, _, _, has_target, target_pose = self.pbvs.compute_control(
+            ee_pose, self.pregrasp_distance, dynamic_pitch
+        )
+        if target_pose and has_target:
+            # Validate target pose is within workspace
+            if not self.check_within_workspace(target_pose):
+                self.task_failed = True
+                self.reset_to_idle()
+                return
+
+            if self.check_target_stabilized():
+                logger.info("Target stabilized, transitioning to GRASP")
+                self.final_pregrasp_pose = self.current_executed_pose
+                self.grasp_stage = GraspStage.GRASP
+                self.adjustment_count = 0
+                self.waiting_for_reach = False
+            elif not self.waiting_for_reach and self.target_updated:
+                self.arm.cmd_ee_pose(target_pose)
+                self.current_executed_pose = target_pose
+                self.waiting_for_reach = True
+                self.waiting_start_time = time.time()
+                self.target_updated = False
+                self.adjustment_count += 1
+                time.sleep(0.2)
+
+    def execute_grasp(self):
+        """Execute grasp stage: move to final grasp position."""
+        if self.waiting_for_reach:
+            if self.check_reach_and_adjust() and not self.grasp_reached_time:
+                self.grasp_reached_time = time.time()
+            return
+
+        if self.grasp_reached_time:
+            if (time.time() - self.grasp_reached_time) >= self.grasp_close_delay:
+                logger.info("Grasp delay completed, closing gripper")
+                self.grasp_stage = GraspStage.CLOSE_AND_RETRACT
+            return
+
+        if self.last_valid_target:
+            # Calculate dynamic pitch for current target
+            dynamic_pitch = self.calculate_dynamic_grasp_pitch(self.last_valid_target.bbox.center)
+            normalized_pitch = dynamic_pitch / 90.0
+            grasp_distance = -self.grasp_distance_range + (
+                2 * self.grasp_distance_range * normalized_pitch
+            )
+
+            ee_pose = self.arm.get_ee_pose()
+            _, _, _, has_target, target_pose = self.pbvs.compute_control(
+                ee_pose, grasp_distance, dynamic_pitch
+            )
+
+            if target_pose and has_target:
+                # Validate grasp pose is within workspace
+                if not self.check_within_workspace(target_pose):
+                    self.task_failed = True
+                    self.reset_to_idle()
+                    return
+
+                object_width = self.last_valid_target.bbox.size.x
+                gripper_opening = max(
+                    0.005, min(object_width + self.grasp_width_offset, self.gripper_max_opening)
+                )
+
+                logger.info(f"Executing grasp: gripper={gripper_opening * 1000:.1f}mm")
+                self.arm.cmd_gripper_ctrl(gripper_opening)
+                self.arm.cmd_ee_pose(target_pose, line_mode=True)
+                self.current_executed_pose = target_pose
+                self.waiting_for_reach = True
+                self.waiting_start_time = time.time()
+
+    def execute_close_and_retract(self):
+        """Execute the retraction sequence after gripper has been closed."""
+        if self.waiting_for_reach and self.final_pregrasp_pose:
+            if self.check_reach_and_adjust():
+                logger.info("Reached pre-grasp retraction position")
+                self.pick_success = self.arm.gripper_object_detected()
+                if self.pick_success:
+                    logger.info("Object successfully grasped!")
+                    if self.place_target_position is not None:
+                        logger.info("Transitioning to PLACE stage")
+                        self.grasp_stage = GraspStage.PLACE
+                    else:
+                        self.overall_success = True
+                else:
+                    logger.warning("No object detected in gripper")
+                    self.task_failed = True
+                    self.overall_success = False
+            return
+        if not self.waiting_for_reach:
+            logger.info("Retracting to pre-grasp position")
+            self.arm.cmd_ee_pose(self.final_pregrasp_pose, line_mode=True)
+            self.current_executed_pose = self.final_pregrasp_pose
+            self.arm.close_gripper()
+            self.waiting_for_reach = True
+            self.waiting_start_time = time.time()
+
+    def execute_place(self):
+        """Execute place stage: move to place position and release object."""
+        if self.waiting_for_reach:
+            # Use the already executed pose instead of recalculating
+            if self.check_reach_and_adjust():
+                logger.info("Reached place position, releasing gripper")
+                self.arm.release_gripper()
+                time.sleep(1.0)
+                self.place_pose = self.current_executed_pose
+                logger.info("Transitioning to RETRACT stage")
+                self.grasp_stage = GraspStage.RETRACT
+            return
+
+        if not self.waiting_for_reach:
+            place_pose = self.get_place_target_pose()
+            if place_pose:
+                logger.info("Moving to place position")
+                self.arm.cmd_ee_pose(place_pose, line_mode=True)
+                self.current_executed_pose = place_pose
+                self.waiting_for_reach = True
+                self.waiting_start_time = time.time()
+            else:
+                logger.error("Failed to get place target pose")
+                self.task_failed = True
+                self.overall_success = False
+
+    def execute_retract(self):
+        """Execute retract stage: retract from place position."""
+        if self.waiting_for_reach and self.retract_pose:
+            if self.check_reach_and_adjust():
+                logger.info("Reached retract position")
+                logger.info("Returning to observe position")
+                self.arm.gotoObserve()
+                self.arm.close_gripper()
+                self.overall_success = True
+                logger.info("Pick and place completed successfully!")
+            return
+
+        if not self.waiting_for_reach:
+            if self.place_pose:
+                pose_pitch = self.calculate_dynamic_grasp_pitch(self.place_pose)
+                self.retract_pose = update_target_grasp_pose(
+                    self.place_pose, self.home_pose, self.retract_distance, pose_pitch
+                )
+                logger.info("Retracting from place position")
+                self.arm.cmd_ee_pose(self.retract_pose, line_mode=True)
+                self.current_executed_pose = self.retract_pose
+                self.waiting_for_reach = True
+                self.waiting_start_time = time.time()
+            else:
+                logger.error("No place pose stored for retraction")
+                self.task_failed = True
+                self.overall_success = False
+
+    def capture_and_process(
+        self,
+    ) -> Tuple[
+        Optional[np.ndarray], Optional[Detection3DArray], Optional[Detection2DArray], Optional[Pose]
+    ]:
+        """Capture frame from camera data and process detections."""
+        if self.latest_rgb is None or self.latest_depth is None or self.detector is None:
+            return None, None, None, None
+
+        ee_pose = self.arm.get_ee_pose()
+        ee_transform = pose_to_matrix(ee_pose)
+        camera_transform = compose_transforms(ee_transform, self.T_ee_to_camera)
+        camera_pose = matrix_to_pose(camera_transform)
+        detection_3d_array, detection_2d_array = self.detector.process_frame(
+            self.latest_rgb, self.latest_depth, camera_transform
+        )
+
+        return self.latest_rgb, detection_3d_array, detection_2d_array, camera_pose
+
+    def pick_target(self, x: int, y: int) -> bool:
+        """Select a target object at the given pixel coordinates."""
+        if not self.last_detection_2d_array or not self.last_detection_3d_array:
+            logger.warning("No detections available for target selection")
+            return False
+
+        clicked_3d = find_clicked_detection(
+            (x, y), self.last_detection_2d_array.detections, self.last_detection_3d_array.detections
+        )
+        if clicked_3d and self.pbvs:
+            # Validate workspace
+            if not self.check_within_workspace(clicked_3d.bbox.center):
+                self.task_failed = True
+                return False
+
+            self.pbvs.set_target(clicked_3d)
+
+            if clicked_3d.bbox and clicked_3d.bbox.size:
+                self.target_object_height = clicked_3d.bbox.size.z
+                logger.info(f"Target object height: {self.target_object_height:.3f}m")
+
+            position = clicked_3d.bbox.center.position
+            logger.info(
+                f"Target selected: ID={clicked_3d.id}, pos=({position.x:.3f}, {position.y:.3f}, {position.z:.3f})"
+            )
+            self.grasp_stage = GraspStage.PRE_GRASP
+            self.reached_poses.clear()
+            self.adjustment_count = 0
+            self.waiting_for_reach = False
+            self.current_executed_pose = None
+            self.stabilization_start_time = time.time()
+            return True
+        return False
+
+    def update(self) -> Optional[Dict[str, Any]]:
+        """Main update function that handles capture, processing, control, and visualization."""
+        rgb, detection_3d_array, detection_2d_array, camera_pose = self.capture_and_process()
+        if rgb is None:
+            return None
+
+        self.last_detection_3d_array = detection_3d_array
+        self.last_detection_2d_array = detection_2d_array
+        if self.target_click:
+            x, y = self.target_click
+            if self.pick_target(x, y):
+                self.target_click = None
+
+        if (
+            detection_3d_array
+            and self.grasp_stage in [GraspStage.PRE_GRASP, GraspStage.GRASP]
+            and not self.waiting_for_reach
+        ):
+            self._update_tracking(detection_3d_array)
+        stage_handlers = {
+            GraspStage.IDLE: self.execute_idle,
+            GraspStage.PRE_GRASP: self.execute_pre_grasp,
+            GraspStage.GRASP: self.execute_grasp,
+            GraspStage.CLOSE_AND_RETRACT: self.execute_close_and_retract,
+            GraspStage.PLACE: self.execute_place,
+            GraspStage.RETRACT: self.execute_retract,
+        }
+        if self.grasp_stage in stage_handlers:
+            stage_handlers[self.grasp_stage]()
+
+        target_tracked = self.pbvs.get_current_target() is not None if self.pbvs else False
+        ee_pose = self.arm.get_ee_pose()
+        feedback = Feedback(
+            grasp_stage=self.grasp_stage,
+            target_tracked=target_tracked,
+            current_executed_pose=self.current_executed_pose,
+            current_ee_pose=ee_pose,
+            current_camera_pose=camera_pose,
+            target_pose=self.pbvs.target_grasp_pose if self.pbvs else None,
+            waiting_for_reach=self.waiting_for_reach,
+            success=self.overall_success,
+        )
+
+        if self.task_running:
+            self.current_visualization = create_manipulation_visualization(
+                rgb, feedback, detection_3d_array, detection_2d_array
+            )
+
+            if self.current_visualization is not None:
+                self._publish_visualization(self.current_visualization)
+
+        return feedback
+
+    def _publish_visualization(self, viz_image: np.ndarray):
+        """Publish visualization image to LCM."""
+        try:
+            viz_rgb = cv2.cvtColor(viz_image, cv2.COLOR_BGR2RGB)
+            height, width = viz_rgb.shape[:2]
+            data = viz_rgb.tobytes()
+
+            msg = Image(
+                data_length=len(data),
+                height=height,
+                width=width,
+                encoding="rgb8",
+                is_bigendian=0,
+                step=width * 3,
+                data=data,
+            )
+
+            self.viz_image.publish(msg)
+        except Exception as e:
+            logger.error(f"Error publishing visualization: {e}")
+
+    def check_target_stabilized(self) -> bool:
+        """Check if the commanded poses have stabilized."""
+        if len(self.reached_poses) < self.reached_poses.maxlen:
+            return False
+
+        positions = np.array(
+            [[p.position.x, p.position.y, p.position.z] for p in self.reached_poses]
+        )
+        std_devs = np.std(positions, axis=0)
+        return np.all(std_devs < self.pose_stabilization_threshold)
+
+    def get_place_target_pose(self) -> Optional[Pose]:
+        """Get the place target pose with z-offset applied based on object height."""
+        if self.place_target_position is None:
+            return None
+
+        place_pos = self.place_target_position.copy()
+        if self.target_object_height is not None:
+            z_offset = self.target_object_height / 2.0
+            place_pos[2] += z_offset + 0.1
+
+        place_center_pose = Pose(
+            Point(place_pos[0], place_pos[1], place_pos[2]), Quaternion(0.0, 0.0, 0.0, 1.0)
+        )
+
+        ee_pose = self.arm.get_ee_pose()
+
+        # Calculate dynamic pitch for place position
+        dynamic_pitch = self.calculate_dynamic_grasp_pitch(place_center_pose)
+
+        place_pose = update_target_grasp_pose(
+            place_center_pose,
+            ee_pose,
+            grasp_distance=0.0,
+            grasp_pitch_degrees=dynamic_pitch,
+        )
+
+        return place_pose
+
+    @rpc
+    def cleanup(self):
+        """Clean up resources on module destruction."""
+        if self.detector and hasattr(self.detector, "cleanup"):
+            self.detector.cleanup()
+        self.arm.disable()
diff --git a/dimos/manipulation/visual_servoing/pbvs.py b/dimos/manipulation/visual_servoing/pbvs.py
new file mode 100644
index 0000000000..e34ec94557
--- /dev/null
+++ b/dimos/manipulation/visual_servoing/pbvs.py
@@ -0,0 +1,490 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Position-Based Visual Servoing (PBVS) system for robotic manipulation.
+Supports both eye-in-hand and eye-to-hand configurations.
+"""
+
+import numpy as np
+from typing import Optional, Tuple, List
+from collections import deque
+from scipy.spatial.transform import Rotation as R
+from dimos_lcm.geometry_msgs import Pose, Vector3, Quaternion, Point
+from dimos_lcm.vision_msgs import Detection3D, Detection3DArray
+from dimos.utils.logging_config import setup_logger
+from dimos.manipulation.visual_servoing.utils import (
+    update_target_grasp_pose,
+    find_best_object_match,
+    create_pbvs_visualization,
+    is_target_reached,
+)
+
+logger = setup_logger("dimos.manipulation.pbvs")
+
+
+class PBVS:
+    """
+    High-level Position-Based Visual Servoing orchestrator.
+
+    Handles:
+    - Object tracking and target management
+    - Pregrasp distance computation
+    - Grasp pose generation
+    - Coordination with low-level controller
+
+    Note: This class is agnostic to camera mounting (eye-in-hand vs eye-to-hand).
+    The caller is responsible for providing appropriate camera and EE poses.
+    """
+
+    def __init__(
+        self,
+        position_gain: float = 0.5,
+        rotation_gain: float = 0.3,
+        max_velocity: float = 0.1,  # m/s
+        max_angular_velocity: float = 0.5,  # rad/s
+        target_tolerance: float = 0.01,  # 1cm
+        max_tracking_distance_threshold: float = 0.12,  # Max distance for target tracking (m)
+        min_size_similarity: float = 0.6,  # Min size similarity threshold (0.0-1.0)
+        direct_ee_control: bool = True,  # If True, output target poses instead of velocities
+    ):
+        """
+        Initialize PBVS system.
+
+        Args:
+            position_gain: Proportional gain for position control
+            rotation_gain: Proportional gain for rotation control
+            max_velocity: Maximum linear velocity command magnitude (m/s)
+            max_angular_velocity: Maximum angular velocity command magnitude (rad/s)
+            target_tolerance: Distance threshold for considering target reached (m)
+            max_tracking_distance: Maximum distance for valid target tracking (m)
+            min_size_similarity: Minimum size similarity for valid target tracking (0.0-1.0)
+            direct_ee_control: If True, output target poses instead of velocity commands
+        """
+        # Initialize low-level controller only if not in direct control mode
+        if not direct_ee_control:
+            self.controller = PBVSController(
+                position_gain=position_gain,
+                rotation_gain=rotation_gain,
+                max_velocity=max_velocity,
+                max_angular_velocity=max_angular_velocity,
+                target_tolerance=target_tolerance,
+            )
+        else:
+            self.controller = None
+
+        # Store parameters for direct mode error computation
+        self.target_tolerance = target_tolerance
+
+        # Target tracking parameters
+        self.max_tracking_distance_threshold = max_tracking_distance_threshold
+        self.min_size_similarity = min_size_similarity
+        self.direct_ee_control = direct_ee_control
+
+        # Target state
+        self.current_target = None
+        self.target_grasp_pose = None
+
+        # Detection history for robust tracking
+        self.detection_history_size = 3
+        self.detection_history = deque(maxlen=self.detection_history_size)
+
+        # For direct control mode visualization
+        self.last_position_error = None
+        self.last_target_reached = False
+
+        logger.info(
+            f"Initialized PBVS system with controller gains: pos={position_gain}, rot={rotation_gain}, "
+            f"tracking_thresholds: distance={max_tracking_distance_threshold}m, size={min_size_similarity:.2f}"
+        )
+
+    def set_target(self, target_object: Detection3D) -> bool:
+        """
+        Set a new target object for servoing.
+
+        Args:
+            target_object: Detection3D object
+
+        Returns:
+            True if target was set successfully
+        """
+        if target_object and target_object.bbox and target_object.bbox.center:
+            self.current_target = target_object
+            self.target_grasp_pose = None  # Will be computed when needed
+            logger.info(f"New target set: ID {target_object.id}")
+            return True
+        return False
+
+    def clear_target(self):
+        """Clear the current target."""
+        self.current_target = None
+        self.target_grasp_pose = None
+        self.last_position_error = None
+        self.last_target_reached = False
+        self.detection_history.clear()
+        if self.controller:
+            self.controller.clear_state()
+        logger.info("Target cleared")
+
+    def get_current_target(self) -> Optional[Detection3D]:
+        """
+        Get the current target object.
+
+        Returns:
+            Current target Detection3D or None if no target selected
+        """
+        return self.current_target
+
+    def update_tracking(self, new_detections: Optional[Detection3DArray] = None) -> bool:
+        """
+        Update target tracking with new detections using a rolling window.
+        If tracking is lost, keeps the old target pose.
+
+        Args:
+            new_detections: Optional new detections for target tracking
+
+        Returns:
+            True if target was successfully tracked, False if lost (but target is kept)
+        """
+        # Check if we have a current target
+        if (
+            not self.current_target
+            or not self.current_target.bbox
+            or not self.current_target.bbox.center
+        ):
+            return False
+
+        # Add new detections to history if provided
+        if new_detections is not None and new_detections.detections_length > 0:
+            self.detection_history.append(new_detections)
+
+        # If no detection history, can't track
+        if not self.detection_history:
+            logger.debug("No detection history for target tracking - using last known pose")
+            return False
+
+        # Collect all candidates from detection history
+        all_candidates = []
+        for detection_array in self.detection_history:
+            all_candidates.extend(detection_array.detections)
+
+        if not all_candidates:
+            logger.debug("No candidates in detection history")
+            return False
+
+        # Use stage-dependent distance threshold
+        max_distance = self.max_tracking_distance_threshold
+
+        # Find best match across all recent detections
+        match_result = find_best_object_match(
+            target_obj=self.current_target,
+            candidates=all_candidates,
+            max_distance=max_distance,
+            min_size_similarity=self.min_size_similarity,
+        )
+
+        if match_result.is_valid_match:
+            self.current_target = match_result.matched_object
+            self.target_grasp_pose = None  # Recompute grasp pose
+            logger.debug(
+                f"Target tracking successful: distance={match_result.distance:.3f}m, "
+                f"size_similarity={match_result.size_similarity:.2f}, "
+                f"confidence={match_result.confidence:.2f}"
+            )
+            return True
+
+        logger.debug(
+            f"Target tracking lost across {len(self.detection_history)} frames: "
+            f"distance={match_result.distance:.3f}m, "
+            f"size_similarity={match_result.size_similarity:.2f}, "
+            f"thresholds: distance={max_distance:.3f}m, size={self.min_size_similarity:.2f}"
+        )
+        return False
+
+    def compute_control(
+        self,
+        ee_pose: Pose,
+        grasp_distance: float = 0.15,
+        grasp_pitch_degrees: float = 45.0,
+    ) -> Tuple[Optional[Vector3], Optional[Vector3], bool, bool, Optional[Pose]]:
+        """
+        Compute PBVS control with position and orientation servoing.
+
+        Args:
+            ee_pose: Current end-effector pose
+            grasp_distance: Distance to maintain from target (meters)
+
+        Returns:
+            Tuple of (velocity_command, angular_velocity_command, target_reached, has_target, target_pose)
+            - velocity_command: Linear velocity vector or None if no target (None in direct_ee_control mode)
+            - angular_velocity_command: Angular velocity vector or None if no target (None in direct_ee_control mode)
+            - target_reached: True if within target tolerance
+            - has_target: True if currently tracking a target
+            - target_pose: Target EE pose (only in direct_ee_control mode, otherwise None)
+        """
+        # Check if we have a target
+        if not self.current_target:
+            return None, None, False, False, None
+
+        # Update target grasp pose with provided distance and pitch
+        self.target_grasp_pose = update_target_grasp_pose(
+            self.current_target.bbox.center, ee_pose, grasp_distance, grasp_pitch_degrees
+        )
+
+        if self.target_grasp_pose is None:
+            logger.warning("Failed to compute grasp pose")
+            return None, None, False, False, None
+
+        # Compute errors for visualization before checking if reached (in case pose gets cleared)
+        if self.direct_ee_control and self.target_grasp_pose:
+            self.last_position_error = Vector3(
+                self.target_grasp_pose.position.x - ee_pose.position.x,
+                self.target_grasp_pose.position.y - ee_pose.position.y,
+                self.target_grasp_pose.position.z - ee_pose.position.z,
+            )
+
+        # Check if target reached using our separate function
+        target_reached = is_target_reached(self.target_grasp_pose, ee_pose, self.target_tolerance)
+
+        # Return appropriate values based on control mode
+        if self.direct_ee_control:
+            # Direct control mode
+            if self.target_grasp_pose:
+                self.last_target_reached = target_reached
+                # Return has_target=True since we have a target
+                return None, None, target_reached, True, self.target_grasp_pose
+            else:
+                return None, None, False, True, None
+        else:
+            # Velocity control mode - use controller
+            velocity_cmd, angular_velocity_cmd, controller_reached = (
+                self.controller.compute_control(ee_pose, self.target_grasp_pose)
+            )
+            # Return has_target=True since we have a target, regardless of tracking status
+            return velocity_cmd, angular_velocity_cmd, target_reached, True, None
+
+    def create_status_overlay(
+        self,
+        image: np.ndarray,
+        grasp_stage=None,
+    ) -> np.ndarray:
+        """
+        Create PBVS status overlay on image.
+
+        Args:
+            image: Input image
+            grasp_stage: Current grasp stage (optional)
+
+        Returns:
+            Image with PBVS status overlay
+        """
+        stage_value = grasp_stage.value if grasp_stage else "idle"
+        return create_pbvs_visualization(
+            image,
+            self.current_target,
+            self.last_position_error,
+            self.last_target_reached,
+            stage_value,
+        )
+
+
+class PBVSController:
+    """
+    Low-level Position-Based Visual Servoing controller.
+    Pure control logic that computes velocity commands from poses.
+
+    Handles:
+    - Position and orientation error computation
+    - Velocity command generation with gain control
+    - Target reached detection
+    """
+
+    def __init__(
+        self,
+        position_gain: float = 0.5,
+        rotation_gain: float = 0.3,
+        max_velocity: float = 0.1,  # m/s
+        max_angular_velocity: float = 0.5,  # rad/s
+        target_tolerance: float = 0.01,  # 1cm
+    ):
+        """
+        Initialize PBVS controller.
+
+        Args:
+            position_gain: Proportional gain for position control
+            rotation_gain: Proportional gain for rotation control
+            max_velocity: Maximum linear velocity command magnitude (m/s)
+            max_angular_velocity: Maximum angular velocity command magnitude (rad/s)
+            target_tolerance: Distance threshold for considering target reached (m)
+        """
+        self.position_gain = position_gain
+        self.rotation_gain = rotation_gain
+        self.max_velocity = max_velocity
+        self.max_angular_velocity = max_angular_velocity
+        self.target_tolerance = target_tolerance
+
+        self.last_position_error = None
+        self.last_rotation_error = None
+        self.last_velocity_cmd = None
+        self.last_angular_velocity_cmd = None
+        self.last_target_reached = False
+
+        logger.info(
+            f"Initialized PBVS controller: pos_gain={position_gain}, rot_gain={rotation_gain}, "
+            f"max_vel={max_velocity}m/s, max_ang_vel={max_angular_velocity}rad/s, "
+            f"target_tolerance={target_tolerance}m"
+        )
+
+    def clear_state(self):
+        """Clear controller state."""
+        self.last_position_error = None
+        self.last_rotation_error = None
+        self.last_velocity_cmd = None
+        self.last_angular_velocity_cmd = None
+        self.last_target_reached = False
+
+    def compute_control(
+        self, ee_pose: Pose, grasp_pose: Pose
+    ) -> Tuple[Optional[Vector3], Optional[Vector3], bool]:
+        """
+        Compute PBVS control with position and orientation servoing.
+
+        Args:
+            ee_pose: Current end-effector pose
+            grasp_pose: Target grasp pose
+
+        Returns:
+            Tuple of (velocity_command, angular_velocity_command, target_reached)
+            - velocity_command: Linear velocity vector
+            - angular_velocity_command: Angular velocity vector
+            - target_reached: True if within target tolerance
+        """
+        # Calculate position error (target - EE position)
+        error = Vector3(
+            grasp_pose.position.x - ee_pose.position.x,
+            grasp_pose.position.y - ee_pose.position.y,
+            grasp_pose.position.z - ee_pose.position.z,
+        )
+        self.last_position_error = error
+
+        # Compute velocity command with proportional control
+        velocity_cmd = Vector3(
+            error.x * self.position_gain,
+            error.y * self.position_gain,
+            error.z * self.position_gain,
+        )
+
+        # Limit velocity magnitude
+        vel_magnitude = np.linalg.norm([velocity_cmd.x, velocity_cmd.y, velocity_cmd.z])
+        if vel_magnitude > self.max_velocity:
+            scale = self.max_velocity / vel_magnitude
+            velocity_cmd = Vector3(
+                float(velocity_cmd.x * scale),
+                float(velocity_cmd.y * scale),
+                float(velocity_cmd.z * scale),
+            )
+
+        self.last_velocity_cmd = velocity_cmd
+
+        # Compute angular velocity for orientation control
+        angular_velocity_cmd = self._compute_angular_velocity(grasp_pose.orientation, ee_pose)
+
+        # Check if target reached
+        error_magnitude = np.linalg.norm([error.x, error.y, error.z])
+        target_reached = bool(error_magnitude < self.target_tolerance)
+        self.last_target_reached = target_reached
+
+        return velocity_cmd, angular_velocity_cmd, target_reached
+
+    def _compute_angular_velocity(self, target_rot: Quaternion, current_pose: Pose) -> Vector3:
+        """
+        Compute angular velocity commands for orientation control.
+        Uses quaternion error computation for better numerical stability.
+
+        Args:
+            target_rot: Target orientation (quaternion)
+            current_pose: Current EE pose
+
+        Returns:
+            Angular velocity command as Vector3
+        """
+        # Use quaternion error for better numerical stability
+
+        # Convert to scipy Rotation objects
+        target_rot_scipy = R.from_quat([target_rot.x, target_rot.y, target_rot.z, target_rot.w])
+        current_rot_scipy = R.from_quat(
+            [
+                current_pose.orientation.x,
+                current_pose.orientation.y,
+                current_pose.orientation.z,
+                current_pose.orientation.w,
+            ]
+        )
+
+        # Compute rotation error: error = target * current^(-1)
+        error_rot = target_rot_scipy * current_rot_scipy.inv()
+
+        # Convert to axis-angle representation for control
+        error_axis_angle = error_rot.as_rotvec()
+
+        # Use axis-angle directly as angular velocity error (small angle approximation)
+        roll_error = error_axis_angle[0]
+        pitch_error = error_axis_angle[1]
+        yaw_error = error_axis_angle[2]
+
+        self.last_rotation_error = Vector3(roll_error, pitch_error, yaw_error)
+
+        # Apply proportional control
+        angular_velocity = Vector3(
+            roll_error * self.rotation_gain,
+            pitch_error * self.rotation_gain,
+            yaw_error * self.rotation_gain,
+        )
+
+        # Limit angular velocity magnitude
+        ang_vel_magnitude = np.sqrt(
+            angular_velocity.x**2 + angular_velocity.y**2 + angular_velocity.z**2
+        )
+        if ang_vel_magnitude > self.max_angular_velocity:
+            scale = self.max_angular_velocity / ang_vel_magnitude
+            angular_velocity = Vector3(
+                angular_velocity.x * scale, angular_velocity.y * scale, angular_velocity.z * scale
+            )
+
+        self.last_angular_velocity_cmd = angular_velocity
+
+        return angular_velocity
+
+    def create_status_overlay(
+        self,
+        image: np.ndarray,
+        current_target: Optional[Detection3D] = None,
+    ) -> np.ndarray:
+        """
+        Create PBVS status overlay on image.
+
+        Args:
+            image: Input image
+            current_target: Current target object Detection3D (for display)
+
+        Returns:
+            Image with PBVS status overlay
+        """
+        return create_pbvs_visualization(
+            image,
+            current_target,
+            self.last_position_error,
+            self.last_target_reached,
+            "velocity_control",
+        )
diff --git a/dimos/manipulation/visual_servoing/utils.py b/dimos/manipulation/visual_servoing/utils.py
new file mode 100644
index 0000000000..4546326ef6
--- /dev/null
+++ b/dimos/manipulation/visual_servoing/utils.py
@@ -0,0 +1,828 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from typing import Dict, Any, Optional, List, Tuple, Union
+from dataclasses import dataclass
+
+from dimos_lcm.geometry_msgs import Pose, Vector3, Quaternion, Point
+from dimos_lcm.vision_msgs import Detection3D, Detection2D
+import cv2
+from dimos.perception.detection2d.utils import plot_results
+from dimos.perception.common.utils import project_2d_points_to_3d
+from dimos.utils.transform_utils import (
+    optical_to_robot_frame,
+    robot_to_optical_frame,
+    pose_to_matrix,
+    matrix_to_pose,
+    euler_to_quaternion,
+    compose_transforms,
+    yaw_towards_point,
+    get_distance,
+)
+
+
+def match_detection_by_id(
+    detection_3d: Detection3D, detections_3d: List[Detection3D], detections_2d: List[Detection2D]
+) -> Optional[Detection2D]:
+    """
+    Find the corresponding Detection2D for a given Detection3D.
+
+    Args:
+        detection_3d: The Detection3D to match
+        detections_3d: List of all Detection3D objects
+        detections_2d: List of all Detection2D objects (must be 1:1 correspondence)
+
+    Returns:
+        Corresponding Detection2D if found, None otherwise
+    """
+    for i, det_3d in enumerate(detections_3d):
+        if det_3d.id == detection_3d.id and i < len(detections_2d):
+            return detections_2d[i]
+    return None
+
+
+def transform_pose(
+    obj_pos: np.ndarray,
+    obj_orientation: np.ndarray,
+    transform_matrix: np.ndarray,
+    to_optical: bool = False,
+    to_robot: bool = False,
+) -> Pose:
+    """
+    Transform object pose with optional frame convention conversion.
+
+    Args:
+        obj_pos: Object position [x, y, z]
+        obj_orientation: Object orientation [roll, pitch, yaw] in radians
+        transform_matrix: 4x4 transformation matrix from camera frame to desired frame
+        to_optical: If True, input is in robot frame → convert result to optical frame
+        to_robot: If True, input is in optical frame → convert to robot frame first
+
+    Returns:
+        Object pose in desired frame as Pose
+    """
+    # Convert euler angles to quaternion using utility function
+    euler_vector = Vector3(obj_orientation[0], obj_orientation[1], obj_orientation[2])
+    obj_orientation_quat = euler_to_quaternion(euler_vector)
+
+    input_pose = Pose(Point(obj_pos[0], obj_pos[1], obj_pos[2]), obj_orientation_quat)
+
+    # Apply input frame conversion based on flags
+    if to_robot:
+        # Input is in optical frame → convert to robot frame first
+        pose_for_transform = optical_to_robot_frame(input_pose)
+    else:
+        # Default or to_optical: use input pose as-is
+        pose_for_transform = input_pose
+
+    # Create transformation matrix from pose (relative to camera)
+    T_camera_object = pose_to_matrix(pose_for_transform)
+
+    # Use compose_transforms to combine transformations
+    T_desired_object = compose_transforms(transform_matrix, T_camera_object)
+
+    # Convert back to pose
+    result_pose = matrix_to_pose(T_desired_object)
+
+    # Apply output frame conversion based on flags
+    if to_optical:
+        # Input was robot frame → convert result to optical frame
+        desired_pose = robot_to_optical_frame(result_pose)
+    else:
+        # Default or to_robot: use result as-is
+        desired_pose = result_pose
+
+    return desired_pose
+
+
+def transform_points_3d(
+    points_3d: np.ndarray,
+    transform_matrix: np.ndarray,
+    to_optical: bool = False,
+    to_robot: bool = False,
+) -> np.ndarray:
+    """
+    Transform 3D points with optional frame convention conversion.
+    Applies the same transformation pipeline as transform_pose but for multiple points.
+
+    Args:
+        points_3d: Nx3 array of 3D points [x, y, z]
+        transform_matrix: 4x4 transformation matrix from camera frame to desired frame
+        to_optical: If True, input is in robot frame → convert result to optical frame
+        to_robot: If True, input is in optical frame → convert to robot frame first
+
+    Returns:
+        Nx3 array of transformed 3D points in desired frame
+    """
+    if points_3d.size == 0:
+        return np.zeros((0, 3), dtype=np.float32)
+
+    points_3d = np.asarray(points_3d)
+    if points_3d.ndim == 1:
+        points_3d = points_3d.reshape(1, -1)
+
+    transformed_points = []
+
+    for point in points_3d:
+        input_point_pose = Pose(
+            Point(point[0], point[1], point[2]),
+            Quaternion(0.0, 0.0, 0.0, 1.0),  # Identity quaternion
+        )
+
+        # Apply input frame conversion based on flags
+        if to_robot:
+            # Input is in optical frame → convert to robot frame first
+            pose_for_transform = optical_to_robot_frame(input_point_pose)
+        else:
+            # Default or to_optical: use input pose as-is
+            pose_for_transform = input_point_pose
+
+        # Create transformation matrix from point pose (relative to camera)
+        T_camera_point = pose_to_matrix(pose_for_transform)
+
+        # Use compose_transforms to combine transformations
+        T_desired_point = compose_transforms(transform_matrix, T_camera_point)
+
+        # Convert back to pose
+        result_pose = matrix_to_pose(T_desired_point)
+
+        # Apply output frame conversion based on flags
+        if to_optical:
+            # Input was robot frame → convert result to optical frame
+            desired_pose = robot_to_optical_frame(result_pose)
+        else:
+            # Default or to_robot: use result as-is
+            desired_pose = result_pose
+
+        transformed_point = [
+            desired_pose.position.x,
+            desired_pose.position.y,
+            desired_pose.position.z,
+        ]
+        transformed_points.append(transformed_point)
+
+    return np.array(transformed_points, dtype=np.float32)
+
+
+def select_points_from_depth(
+    depth_image: np.ndarray,
+    target_point: Tuple[int, int],
+    camera_intrinsics: Union[List[float], np.ndarray],
+    radius: int = 5,
+) -> np.ndarray:
+    """
+    Select points around a target point within a bounding box and project them to 3D.
+
+    Args:
+        depth_image: Depth image in meters (H, W)
+        target_point: (x, y) target point coordinates
+        radius: Half-width of the bounding box (so bbox size is radius*2 x radius*2)
+        camera_intrinsics: Camera parameters as [fx, fy, cx, cy] list or 3x3 matrix
+
+    Returns:
+        Nx3 array of 3D points (X, Y, Z) in camera frame
+    """
+    x_target, y_target = target_point
+    height, width = depth_image.shape
+
+    x_min = max(0, x_target - radius)
+    x_max = min(width, x_target + radius)
+    y_min = max(0, y_target - radius)
+    y_max = min(height, y_target + radius)
+
+    # Create coordinate grids for the bounding box (vectorized)
+    y_coords, x_coords = np.meshgrid(range(y_min, y_max), range(x_min, x_max), indexing="ij")
+
+    # Flatten to get all coordinate pairs
+    x_flat = x_coords.flatten()
+    y_flat = y_coords.flatten()
+
+    # Extract corresponding depth values using advanced indexing
+    depth_flat = depth_image[y_flat, x_flat]
+
+    valid_mask = (depth_flat > 0) & np.isfinite(depth_flat)
+
+    if not np.any(valid_mask):
+        return np.zeros((0, 3), dtype=np.float32)
+
+    points_2d = np.column_stack([x_flat[valid_mask], y_flat[valid_mask]]).astype(np.float32)
+    depth_values = depth_flat[valid_mask].astype(np.float32)
+
+    points_3d = project_2d_points_to_3d(points_2d, depth_values, camera_intrinsics)
+
+    return points_3d
+
+
+def update_target_grasp_pose(
+    target_pose: Pose, ee_pose: Pose, grasp_distance: float = 0.0, grasp_pitch_degrees: float = 45.0
+) -> Optional[Pose]:
+    """
+    Update target grasp pose based on current target pose and EE pose.
+
+    Args:
+        target_pose: Target pose to grasp
+        ee_pose: Current end-effector pose
+        grasp_distance: Distance to maintain from target (pregrasp or grasp distance)
+        grasp_pitch_degrees: Grasp pitch angle in degrees (default 90° for top-down)
+
+    Returns:
+        Target grasp pose or None if target is invalid
+    """
+
+    target_pos = target_pose.position
+
+    # Calculate orientation pointing from target towards EE
+    yaw_to_ee = yaw_towards_point(target_pos, ee_pose.position)
+
+    # Create target pose with proper orientation
+    # Convert grasp pitch from degrees to radians with mapping:
+    # 0° (level) -> π/2 (1.57 rad), 90° (top-down) -> π (3.14 rad)
+    pitch_radians = 1.57 + np.radians(grasp_pitch_degrees)
+
+    # Convert euler angles to quaternion using utility function
+    euler = Vector3(0.0, pitch_radians, yaw_to_ee)  # roll=0, pitch=mapped, yaw=calculated
+    target_orientation = euler_to_quaternion(euler)
+
+    updated_pose = Pose(target_pos, target_orientation)
+
+    if grasp_distance > 0.0:
+        return apply_grasp_distance(updated_pose, grasp_distance)
+    else:
+        return updated_pose
+
+
+def apply_grasp_distance(target_pose: Pose, distance: float) -> Pose:
+    """
+    Apply grasp distance offset to target pose along its approach direction.
+
+    Args:
+        target_pose: Target grasp pose
+        distance: Distance to offset along the approach direction (meters)
+
+    Returns:
+        Target pose offset by the specified distance along its approach direction
+    """
+    # Convert pose to transformation matrix to extract rotation
+    T_target = pose_to_matrix(target_pose)
+    rotation_matrix = T_target[:3, :3]
+
+    # Define the approach vector based on the target pose orientation
+    # Assuming the gripper approaches along its local -z axis (common for downward grasps)
+    # You can change this to [1, 0, 0] for x-axis or [0, 1, 0] for y-axis based on your gripper
+    approach_vector_local = np.array([0, 0, -1])
+
+    # Transform approach vector to world coordinates
+    approach_vector_world = rotation_matrix @ approach_vector_local
+
+    # Apply offset along the approach direction
+    offset_position = Point(
+        target_pose.position.x + distance * approach_vector_world[0],
+        target_pose.position.y + distance * approach_vector_world[1],
+        target_pose.position.z + distance * approach_vector_world[2],
+    )
+
+    return Pose(offset_position, target_pose.orientation)
+
+
+def is_target_reached(target_pose: Pose, current_pose: Pose, tolerance: float = 0.01) -> bool:
+    """
+    Check if the target pose has been reached within tolerance.
+
+    Args:
+        target_pose: Target pose to reach
+        current_pose: Current pose (e.g., end-effector pose)
+        tolerance: Distance threshold for considering target reached (meters, default 0.01 = 1cm)
+
+    Returns:
+        True if target is reached within tolerance, False otherwise
+    """
+    # Calculate position error using distance utility
+    error_magnitude = get_distance(target_pose, current_pose)
+    return error_magnitude < tolerance
+
+
+@dataclass
+class ObjectMatchResult:
+    """Result of object matching with confidence metrics."""
+
+    matched_object: Optional[Detection3D]
+    confidence: float
+    distance: float
+    size_similarity: float
+    is_valid_match: bool
+
+
+def calculate_object_similarity(
+    target_obj: Detection3D,
+    candidate_obj: Detection3D,
+    distance_weight: float = 0.6,
+    size_weight: float = 0.4,
+) -> Tuple[float, float, float]:
+    """
+    Calculate comprehensive similarity between two objects.
+
+    Args:
+        target_obj: Target Detection3D object
+        candidate_obj: Candidate Detection3D object
+        distance_weight: Weight for distance component (0-1)
+        size_weight: Weight for size component (0-1)
+
+    Returns:
+        Tuple of (total_similarity, distance_m, size_similarity)
+    """
+    # Extract positions
+    target_pos = target_obj.bbox.center.position
+    candidate_pos = candidate_obj.bbox.center.position
+
+    target_xyz = np.array([target_pos.x, target_pos.y, target_pos.z])
+    candidate_xyz = np.array([candidate_pos.x, candidate_pos.y, candidate_pos.z])
+
+    # Calculate Euclidean distance
+    distance = np.linalg.norm(target_xyz - candidate_xyz)
+    distance_similarity = 1.0 / (1.0 + distance)  # Exponential decay
+
+    # Calculate size similarity by comparing each dimension individually
+    size_similarity = 1.0  # Default if no size info
+    target_size = target_obj.bbox.size
+    candidate_size = candidate_obj.bbox.size
+
+    if target_size and candidate_size:
+        # Extract dimensions
+        target_dims = [target_size.x, target_size.y, target_size.z]
+        candidate_dims = [candidate_size.x, candidate_size.y, candidate_size.z]
+
+        # Calculate similarity for each dimension pair
+        dim_similarities = []
+        for target_dim, candidate_dim in zip(target_dims, candidate_dims):
+            if target_dim == 0.0 and candidate_dim == 0.0:
+                dim_similarities.append(1.0)  # Both dimensions are zero
+            elif target_dim == 0.0 or candidate_dim == 0.0:
+                dim_similarities.append(0.0)  # One dimension is zero, other is not
+            else:
+                # Calculate similarity as min/max ratio
+                max_dim = max(target_dim, candidate_dim)
+                min_dim = min(target_dim, candidate_dim)
+                dim_similarity = min_dim / max_dim if max_dim > 0 else 0.0
+                dim_similarities.append(dim_similarity)
+
+        # Return average similarity across all dimensions
+        size_similarity = np.mean(dim_similarities) if dim_similarities else 0.0
+
+    # Weighted combination
+    total_similarity = distance_weight * distance_similarity + size_weight * size_similarity
+
+    return total_similarity, distance, size_similarity
+
+
+def find_best_object_match(
+    target_obj: Detection3D,
+    candidates: List[Detection3D],
+    max_distance: float = 0.1,
+    min_size_similarity: float = 0.4,
+    distance_weight: float = 0.7,
+    size_weight: float = 0.3,
+) -> ObjectMatchResult:
+    """
+    Find the best matching object from candidates using distance and size criteria.
+
+    Args:
+        target_obj: Target Detection3D to match against
+        candidates: List of candidate Detection3D objects
+        max_distance: Maximum allowed distance for valid match (meters)
+        min_size_similarity: Minimum size similarity for valid match (0-1)
+        distance_weight: Weight for distance in similarity calculation
+        size_weight: Weight for size in similarity calculation
+
+    Returns:
+        ObjectMatchResult with best match and confidence metrics
+    """
+    if not candidates or not target_obj.bbox or not target_obj.bbox.center:
+        return ObjectMatchResult(None, 0.0, float("inf"), 0.0, False)
+
+    best_match = None
+    best_confidence = 0.0
+    best_distance = float("inf")
+    best_size_sim = 0.0
+
+    for candidate in candidates:
+        if not candidate.bbox or not candidate.bbox.center:
+            continue
+
+        similarity, distance, size_sim = calculate_object_similarity(
+            target_obj, candidate, distance_weight, size_weight
+        )
+
+        # Check validity constraints
+        is_valid = distance <= max_distance and size_sim >= min_size_similarity
+
+        if is_valid and similarity > best_confidence:
+            best_match = candidate
+            best_confidence = similarity
+            best_distance = distance
+            best_size_sim = size_sim
+
+    return ObjectMatchResult(
+        matched_object=best_match,
+        confidence=best_confidence,
+        distance=best_distance,
+        size_similarity=best_size_sim,
+        is_valid_match=best_match is not None,
+    )
+
+
+def parse_zed_pose(zed_pose_data: Dict[str, Any]) -> Optional[Pose]:
+    """
+    Parse ZED pose data dictionary into a Pose object.
+
+    Args:
+        zed_pose_data: Dictionary from ZEDCamera.get_pose() containing:
+            - position: [x, y, z] in meters
+            - rotation: [x, y, z, w] quaternion
+            - euler_angles: [roll, pitch, yaw] in radians
+            - valid: Whether pose is valid
+
+    Returns:
+        Pose object with position and orientation, or None if invalid
+    """
+    if not zed_pose_data or not zed_pose_data.get("valid", False):
+        return None
+
+    # Extract position
+    position = zed_pose_data.get("position", [0, 0, 0])
+    pos_vector = Point(position[0], position[1], position[2])
+
+    quat = zed_pose_data["rotation"]
+    orientation = Quaternion(quat[0], quat[1], quat[2], quat[3])
+    return Pose(pos_vector, orientation)
+
+
+def estimate_object_depth(
+    depth_image: np.ndarray, segmentation_mask: Optional[np.ndarray], bbox: List[float]
+) -> float:
+    """
+    Estimate object depth dimension using segmentation mask and depth data.
+    Optimized for real-time performance.
+
+    Args:
+        depth_image: Depth image in meters
+        segmentation_mask: Binary segmentation mask for the object
+        bbox: Bounding box [x1, y1, x2, y2]
+
+    Returns:
+        Estimated object depth in meters
+    """
+    x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
+
+    # Extract depth ROI once
+    roi_depth = depth_image[y1:y2, x1:x2]
+
+    if segmentation_mask is not None and segmentation_mask.size > 0:
+        # Extract mask ROI efficiently
+        mask_roi = (
+            segmentation_mask[y1:y2, x1:x2]
+            if segmentation_mask.shape != roi_depth.shape
+            else segmentation_mask
+        )
+
+        # Fast mask application using boolean indexing
+        valid_mask = mask_roi > 0
+        if np.sum(valid_mask) > 10:  # Early exit if not enough points
+            masked_depths = roi_depth[valid_mask]
+
+            # Fast percentile calculation using numpy's optimized functions
+            depth_90 = np.percentile(masked_depths, 90)
+            depth_10 = np.percentile(masked_depths, 10)
+            depth_range = depth_90 - depth_10
+
+            # Clamp to reasonable bounds with single operation
+            return np.clip(depth_range, 0.02, 0.5)
+
+    # Fast fallback using area calculation
+    bbox_area = (x2 - x1) * (y2 - y1)
+
+    # Vectorized area-based estimation
+    if bbox_area > 10000:
+        return 0.15
+    elif bbox_area > 5000:
+        return 0.10
+    else:
+        return 0.05
+
+
+# ============= Visualization Functions =============
+
+
+def create_manipulation_visualization(
+    rgb_image: np.ndarray,
+    feedback,
+    detection_3d_array=None,
+    detection_2d_array=None,
+) -> np.ndarray:
+    """
+    Create simple visualization for manipulation class using feedback.
+
+    Args:
+        rgb_image: RGB image array
+        feedback: Feedback object containing all state information
+        detection_3d_array: Optional 3D detections for object visualization
+        detection_2d_array: Optional 2D detections for object visualization
+
+    Returns:
+        BGR image with visualization overlays
+    """
+    # Convert to BGR for OpenCV
+    viz = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
+
+    # Draw detections if available
+    if detection_3d_array and detection_2d_array:
+        # Extract 2D bboxes
+        bboxes_2d = []
+        for det_2d in detection_2d_array.detections:
+            if det_2d.bbox:
+                x1 = det_2d.bbox.center.position.x - det_2d.bbox.size_x / 2
+                y1 = det_2d.bbox.center.position.y - det_2d.bbox.size_y / 2
+                x2 = det_2d.bbox.center.position.x + det_2d.bbox.size_x / 2
+                y2 = det_2d.bbox.center.position.y + det_2d.bbox.size_y / 2
+                bboxes_2d.append([x1, y1, x2, y2])
+
+        # Draw basic detections
+        rgb_with_detections = visualize_detections_3d(
+            rgb_image, detection_3d_array.detections, show_coordinates=True, bboxes_2d=bboxes_2d
+        )
+        viz = cv2.cvtColor(rgb_with_detections, cv2.COLOR_RGB2BGR)
+
+    # Add manipulation status overlay
+    status_y = 30
+    cv2.putText(
+        viz,
+        "Eye-in-Hand Visual Servoing",
+        (10, status_y),
+        cv2.FONT_HERSHEY_SIMPLEX,
+        0.6,
+        (0, 255, 255),
+        2,
+    )
+
+    # Stage information
+    stage_text = f"Stage: {feedback.grasp_stage.value.upper()}"
+    stage_color = {
+        "idle": (100, 100, 100),
+        "pre_grasp": (0, 255, 255),
+        "grasp": (0, 255, 0),
+        "close_and_retract": (255, 0, 255),
+        "place": (0, 150, 255),
+        "retract": (255, 150, 0),
+    }.get(feedback.grasp_stage.value, (255, 255, 255))
+
+    cv2.putText(
+        viz,
+        stage_text,
+        (10, status_y + 25),
+        cv2.FONT_HERSHEY_SIMPLEX,
+        0.5,
+        stage_color,
+        1,
+    )
+
+    # Target tracking status
+    if feedback.target_tracked:
+        cv2.putText(
+            viz,
+            "Target: TRACKED",
+            (10, status_y + 45),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            (0, 255, 0),
+            1,
+        )
+    elif feedback.grasp_stage.value != "idle":
+        cv2.putText(
+            viz,
+            "Target: LOST",
+            (10, status_y + 45),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            (0, 0, 255),
+            1,
+        )
+
+    # Waiting status
+    if feedback.waiting_for_reach:
+        cv2.putText(
+            viz,
+            "Status: WAITING FOR ROBOT",
+            (10, status_y + 65),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            (255, 255, 0),
+            1,
+        )
+
+    # Overall result
+    if feedback.success is not None:
+        result_text = "Pick & Place: SUCCESS" if feedback.success else "Pick & Place: FAILED"
+        result_color = (0, 255, 0) if feedback.success else (0, 0, 255)
+        cv2.putText(
+            viz,
+            result_text,
+            (10, status_y + 85),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            result_color,
+            2,
+        )
+
+    # Control hints (bottom of image)
+    hint_text = "Click object to grasp | s=STOP | r=RESET | g=RELEASE"
+    cv2.putText(
+        viz,
+        hint_text,
+        (10, viz.shape[0] - 10),
+        cv2.FONT_HERSHEY_SIMPLEX,
+        0.4,
+        (200, 200, 200),
+        1,
+    )
+
+    return viz
+
+
+def create_pbvs_visualization(
+    image: np.ndarray,
+    current_target=None,
+    position_error=None,
+    target_reached=False,
+    grasp_stage="idle",
+) -> np.ndarray:
+    """
+    Create simple PBVS visualization overlay.
+
+    Args:
+        image: Input image (RGB or BGR)
+        current_target: Current target Detection3D
+        position_error: Position error Vector3
+        target_reached: Whether target is reached
+        grasp_stage: Current grasp stage string
+
+    Returns:
+        Image with PBVS overlay
+    """
+    viz = image.copy()
+
+    # Only show PBVS info if we have a target
+    if current_target is None:
+        return viz
+
+    # Create status panel at bottom
+    height, width = viz.shape[:2]
+    panel_height = 100
+    panel_y = height - panel_height
+
+    # Semi-transparent overlay
+    overlay = viz.copy()
+    cv2.rectangle(overlay, (0, panel_y), (width, height), (0, 0, 0), -1)
+    viz = cv2.addWeighted(viz, 0.7, overlay, 0.3, 0)
+
+    # PBVS Status
+    y_offset = panel_y + 20
+    cv2.putText(
+        viz,
+        "PBVS Control",
+        (10, y_offset),
+        cv2.FONT_HERSHEY_SIMPLEX,
+        0.6,
+        (0, 255, 255),
+        2,
+    )
+
+    # Position error
+    if position_error:
+        error_mag = np.linalg.norm([position_error.x, position_error.y, position_error.z])
+        error_text = f"Error: {error_mag * 100:.1f}cm"
+        error_color = (0, 255, 0) if target_reached else (0, 255, 255)
+        cv2.putText(
+            viz,
+            error_text,
+            (10, y_offset + 25),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            error_color,
+            1,
+        )
+
+    # Stage
+    cv2.putText(
+        viz,
+        f"Stage: {grasp_stage}",
+        (10, y_offset + 45),
+        cv2.FONT_HERSHEY_SIMPLEX,
+        0.5,
+        (255, 150, 255),
+        1,
+    )
+
+    # Target reached indicator
+    if target_reached:
+        cv2.putText(
+            viz,
+            "TARGET REACHED",
+            (width - 150, y_offset + 25),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.6,
+            (0, 255, 0),
+            2,
+        )
+
+    return viz
+
+
+def visualize_detections_3d(
+    rgb_image: np.ndarray,
+    detections: List[Detection3D],
+    show_coordinates: bool = True,
+    bboxes_2d: Optional[List[List[float]]] = None,
+) -> np.ndarray:
+    """
+    Visualize detections with 3D position overlay next to bounding boxes.
+
+    Args:
+        rgb_image: Original RGB image
+        detections: List of Detection3D objects
+        show_coordinates: Whether to show 3D coordinates next to bounding boxes
+        bboxes_2d: Optional list of 2D bounding boxes corresponding to detections
+
+    Returns:
+        Visualization image
+    """
+    if not detections:
+        return rgb_image.copy()
+
+    # If no 2D bboxes provided, skip visualization
+    if bboxes_2d is None:
+        return rgb_image.copy()
+
+    # Extract data for plot_results function
+    bboxes = bboxes_2d
+    track_ids = [int(det.id) if det.id.isdigit() else i for i, det in enumerate(detections)]
+    class_ids = [i for i in range(len(detections))]
+    confidences = [
+        det.results[0].hypothesis.score if det.results_length > 0 else 0.0 for det in detections
+    ]
+    names = [
+        det.results[0].hypothesis.class_id if det.results_length > 0 else "unknown"
+        for det in detections
+    ]
+
+    # Use plot_results for basic visualization
+    viz = plot_results(rgb_image, bboxes, track_ids, class_ids, confidences, names)
+
+    # Add 3D position coordinates if requested
+    if show_coordinates and bboxes_2d is not None:
+        for i, det in enumerate(detections):
+            if det.bbox and det.bbox.center and i < len(bboxes_2d):
+                position = det.bbox.center.position
+                bbox = bboxes_2d[i]
+
+                pos_xyz = np.array([position.x, position.y, position.z])
+
+                # Get bounding box coordinates
+                x1, y1, x2, y2 = map(int, bbox)
+
+                # Add position text next to bounding box (top-right corner)
+                pos_text = f"({pos_xyz[0]:.2f}, {pos_xyz[1]:.2f}, {pos_xyz[2]:.2f})"
+                text_x = x2 + 5  # Right edge of bbox + small offset
+                text_y = y1 + 15  # Top edge of bbox + small offset
+
+                # Add background rectangle for better readability
+                text_size = cv2.getTextSize(pos_text, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)[0]
+                cv2.rectangle(
+                    viz,
+                    (text_x - 2, text_y - text_size[1] - 2),
+                    (text_x + text_size[0] + 2, text_y + 2),
+                    (0, 0, 0),
+                    -1,
+                )
+
+                cv2.putText(
+                    viz,
+                    pos_text,
+                    (text_x, text_y),
+                    cv2.FONT_HERSHEY_SIMPLEX,
+                    0.4,
+                    (255, 255, 255),
+                    1,
+                )
+
+    return viz
diff --git a/dimos/msgs/geometry_msgs/Quaternion.py b/dimos/msgs/geometry_msgs/Quaternion.py
index 9879e1e263..a7bb5543c1 100644
--- a/dimos/msgs/geometry_msgs/Quaternion.py
+++ b/dimos/msgs/geometry_msgs/Quaternion.py
@@ -22,6 +22,7 @@
 import numpy as np
 from dimos_lcm.geometry_msgs import Quaternion as LCMQuaternion
 from plum import dispatch
+from scipy.spatial.transform import Rotation as R
 
 from dimos.msgs.geometry_msgs.Vector3 import Vector3
 
@@ -117,27 +118,12 @@ def to_euler(self) -> Vector3:
         Returns:
             Vector3: Euler angles as (roll, pitch, yaw) in radians
         """
-        # Convert quaternion to Euler angles using ZYX convention (yaw, pitch, roll)
-        # Source: https://en.wikipedia.org/wiki/Conversion_between_quaternions_and_Euler_angles
-
-        # Roll (x-axis rotation)
-        sinr_cosp = 2 * (self.w * self.x + self.y * self.z)
-        cosr_cosp = 1 - 2 * (self.x * self.x + self.y * self.y)
-        roll = np.arctan2(sinr_cosp, cosr_cosp)
-
-        # Pitch (y-axis rotation)
-        sinp = 2 * (self.w * self.y - self.z * self.x)
-        if abs(sinp) >= 1:
-            pitch = np.copysign(np.pi / 2, sinp)  # Use 90 degrees if out of range
-        else:
-            pitch = np.arcsin(sinp)
-
-        # Yaw (z-axis rotation)
-        siny_cosp = 2 * (self.w * self.z + self.x * self.y)
-        cosy_cosp = 1 - 2 * (self.y * self.y + self.z * self.z)
-        yaw = np.arctan2(siny_cosp, cosy_cosp)
+        # Use scipy for accurate quaternion to euler conversion
+        quat = [self.x, self.y, self.z, self.w]
+        rotation = R.from_quat(quat)
+        euler_angles = rotation.as_euler("xyz")  # roll, pitch, yaw
 
-        return Vector3(roll, pitch, yaw)
+        return Vector3(euler_angles[0], euler_angles[1], euler_angles[2])
 
     def __getitem__(self, idx: int) -> float:
         """Allow indexing into quaternion components: 0=x, 1=y, 2=z, 3=w."""
diff --git a/dimos/perception/common/utils.py b/dimos/perception/common/utils.py
index da9cc58fe0..10d05d9b4d 100644
--- a/dimos/perception/common/utils.py
+++ b/dimos/perception/common/utils.py
@@ -14,15 +14,113 @@
 
 import cv2
 import numpy as np
-from typing import List, Tuple, Optional
+from typing import List, Tuple, Optional, Any, Union
 from dimos.types.manipulation import ObjectData
 from dimos.types.vector import Vector
 from dimos.utils.logging_config import setup_logger
+from dimos_lcm.vision_msgs import Detection3D, Detection2D, BoundingBox2D
 import torch
 
 logger = setup_logger("dimos.perception.common.utils")
 
 
+def project_3d_points_to_2d(
+    points_3d: np.ndarray, camera_intrinsics: Union[List[float], np.ndarray]
+) -> np.ndarray:
+    """
+    Project 3D points to 2D image coordinates using camera intrinsics.
+
+    Args:
+        points_3d: Nx3 array of 3D points (X, Y, Z)
+        camera_intrinsics: Camera parameters as [fx, fy, cx, cy] list or 3x3 matrix
+
+    Returns:
+        Nx2 array of 2D image coordinates (u, v)
+    """
+    if len(points_3d) == 0:
+        return np.zeros((0, 2), dtype=np.int32)
+
+    # Filter out points with zero or negative depth
+    valid_mask = points_3d[:, 2] > 0
+    if not np.any(valid_mask):
+        return np.zeros((0, 2), dtype=np.int32)
+
+    valid_points = points_3d[valid_mask]
+
+    # Extract camera parameters
+    if isinstance(camera_intrinsics, list) and len(camera_intrinsics) == 4:
+        fx, fy, cx, cy = camera_intrinsics
+    else:
+        camera_matrix = np.array(camera_intrinsics)
+        fx = camera_matrix[0, 0]
+        fy = camera_matrix[1, 1]
+        cx = camera_matrix[0, 2]
+        cy = camera_matrix[1, 2]
+
+    # Project to image coordinates
+    u = (valid_points[:, 0] * fx / valid_points[:, 2]) + cx
+    v = (valid_points[:, 1] * fy / valid_points[:, 2]) + cy
+
+    # Round to integer pixel coordinates
+    points_2d = np.column_stack([u, v]).astype(np.int32)
+
+    return points_2d
+
+
+def project_2d_points_to_3d(
+    points_2d: np.ndarray,
+    depth_values: np.ndarray,
+    camera_intrinsics: Union[List[float], np.ndarray],
+) -> np.ndarray:
+    """
+    Project 2D image points to 3D coordinates using depth values and camera intrinsics.
+
+    Args:
+        points_2d: Nx2 array of 2D image coordinates (u, v)
+        depth_values: N-length array of depth values (Z coordinates) for each point
+        camera_intrinsics: Camera parameters as [fx, fy, cx, cy] list or 3x3 matrix
+
+    Returns:
+        Nx3 array of 3D points (X, Y, Z)
+    """
+    if len(points_2d) == 0:
+        return np.zeros((0, 3), dtype=np.float32)
+
+    # Ensure depth_values is a numpy array
+    depth_values = np.asarray(depth_values)
+
+    # Filter out points with zero or negative depth
+    valid_mask = depth_values > 0
+    if not np.any(valid_mask):
+        return np.zeros((0, 3), dtype=np.float32)
+
+    valid_points_2d = points_2d[valid_mask]
+    valid_depths = depth_values[valid_mask]
+
+    # Extract camera parameters
+    if isinstance(camera_intrinsics, list) and len(camera_intrinsics) == 4:
+        fx, fy, cx, cy = camera_intrinsics
+    else:
+        camera_matrix = np.array(camera_intrinsics)
+        fx = camera_matrix[0, 0]
+        fy = camera_matrix[1, 1]
+        cx = camera_matrix[0, 2]
+        cy = camera_matrix[1, 2]
+
+    # Back-project to 3D coordinates
+    # X = (u - cx) * Z / fx
+    # Y = (v - cy) * Z / fy
+    # Z = depth
+    X = (valid_points_2d[:, 0] - cx) * valid_depths / fx
+    Y = (valid_points_2d[:, 1] - cy) * valid_depths / fy
+    Z = valid_depths
+
+    # Stack into 3D points
+    points_3d = np.column_stack([X, Y, Z]).astype(np.float32)
+
+    return points_3d
+
+
 def colorize_depth(depth_img: np.ndarray, max_depth: float = 5.0) -> Optional[np.ndarray]:
     """
     Normalize and colorize depth image using COLORMAP_JET.
@@ -329,3 +427,68 @@ def combine_object_data(
             combined.append(obj_copy)
 
     return combined
+
+
+def point_in_bbox(point: Tuple[int, int], bbox: List[float]) -> bool:
+    """
+    Check if a point is inside a bounding box.
+
+    Args:
+        point: (x, y) coordinates
+        bbox: Bounding box [x1, y1, x2, y2]
+
+    Returns:
+        True if point is inside bbox
+    """
+    x, y = point
+    x1, y1, x2, y2 = bbox
+    return x1 <= x <= x2 and y1 <= y <= y2
+
+
+def bbox2d_to_corners(bbox_2d: BoundingBox2D) -> Tuple[float, float, float, float]:
+    """
+    Convert BoundingBox2D from center format to corner format.
+
+    Args:
+        bbox_2d: BoundingBox2D with center and size
+
+    Returns:
+        Tuple of (x1, y1, x2, y2) corner coordinates
+    """
+    center_x = bbox_2d.center.position.x
+    center_y = bbox_2d.center.position.y
+    half_width = bbox_2d.size_x / 2.0
+    half_height = bbox_2d.size_y / 2.0
+
+    x1 = center_x - half_width
+    y1 = center_y - half_height
+    x2 = center_x + half_width
+    y2 = center_y + half_height
+
+    return x1, y1, x2, y2
+
+
+def find_clicked_detection(
+    click_pos: Tuple[int, int], detections_2d: List[Detection2D], detections_3d: List[Detection3D]
+) -> Optional[Detection3D]:
+    """
+    Find which detection was clicked based on 2D bounding boxes.
+
+    Args:
+        click_pos: (x, y) click position
+        detections_2d: List of Detection2D objects
+        detections_3d: List of Detection3D objects (must be 1:1 correspondence)
+
+    Returns:
+        Corresponding Detection3D object if found, None otherwise
+    """
+    click_x, click_y = click_pos
+
+    for i, det_2d in enumerate(detections_2d):
+        if det_2d.bbox and i < len(detections_3d):
+            x1, y1, x2, y2 = bbox2d_to_corners(det_2d.bbox)
+
+            if x1 <= click_x <= x2 and y1 <= click_y <= y2:
+                return detections_3d[i]
+
+    return None
diff --git a/dimos/perception/detection2d/detic_2d_det.py b/dimos/perception/detection2d/detic_2d_det.py
index fc81526ad2..8bc4f9c4b0 100644
--- a/dimos/perception/detection2d/detic_2d_det.py
+++ b/dimos/perception/detection2d/detic_2d_det.py
@@ -15,6 +15,7 @@
 import numpy as np
 import os
 import sys
+from dimos.perception.detection2d.utils import plot_results
 
 # Add Detic to Python path
 detic_path = os.path.join(os.path.dirname(__file__), "..", "..", "models", "Detic")
@@ -404,7 +405,6 @@ def visualize_results(self, image, bboxes, track_ids, class_ids, confidences, na
         Returns:
             Image with visualized detections
         """
-        from dimos.perception.detection2d.utils import plot_results
 
         return plot_results(image, bboxes, track_ids, class_ids, confidences, names)
 
diff --git a/dimos/perception/grasp_generation/utils.py b/dimos/perception/grasp_generation/utils.py
index 94377363f2..ab0cfd0d15 100644
--- a/dimos/perception/grasp_generation/utils.py
+++ b/dimos/perception/grasp_generation/utils.py
@@ -18,49 +18,7 @@
 import open3d as o3d
 import cv2
 from typing import List, Dict, Tuple, Optional, Union
-
-
-def project_3d_points_to_2d(
-    points_3d: np.ndarray, camera_intrinsics: Union[List[float], np.ndarray]
-) -> np.ndarray:
-    """
-    Project 3D points to 2D image coordinates using camera intrinsics.
-
-    Args:
-        points_3d: Nx3 array of 3D points (X, Y, Z)
-        camera_intrinsics: Camera parameters as [fx, fy, cx, cy] list or 3x3 matrix
-
-    Returns:
-        Nx2 array of 2D image coordinates (u, v)
-    """
-    if len(points_3d) == 0:
-        return np.zeros((0, 2), dtype=np.int32)
-
-    # Filter out points with zero or negative depth
-    valid_mask = points_3d[:, 2] > 0
-    if not np.any(valid_mask):
-        return np.zeros((0, 2), dtype=np.int32)
-
-    valid_points = points_3d[valid_mask]
-
-    # Extract camera parameters
-    if isinstance(camera_intrinsics, list) and len(camera_intrinsics) == 4:
-        fx, fy, cx, cy = camera_intrinsics
-    else:
-        camera_matrix = np.array(camera_intrinsics)
-        fx = camera_matrix[0, 0]
-        fy = camera_matrix[1, 1]
-        cx = camera_matrix[0, 2]
-        cy = camera_matrix[1, 2]
-
-    # Project to image coordinates
-    u = (valid_points[:, 0] * fx / valid_points[:, 2]) + cx
-    v = (valid_points[:, 1] * fy / valid_points[:, 2]) + cy
-
-    # Round to integer pixel coordinates
-    points_2d = np.column_stack([u, v]).astype(np.int32)
-
-    return points_2d
+from dimos.perception.common.utils import project_3d_points_to_2d, project_2d_points_to_3d
 
 
 def create_gripper_geometry(
diff --git a/dimos/perception/pointcloud/utils.py b/dimos/perception/pointcloud/utils.py
index 0813c2ca0e..b3c395bfa3 100644
--- a/dimos/perception/pointcloud/utils.py
+++ b/dimos/perception/pointcloud/utils.py
@@ -24,8 +24,9 @@
 import os
 import cv2
 import open3d as o3d
-from typing import List, Optional, Tuple, Union, Dict
+from typing import List, Optional, Tuple, Union, Dict, Any
 from scipy.spatial import cKDTree
+from dimos.perception.common.utils import project_3d_points_to_2d
 
 
 def load_camera_matrix_from_yaml(
@@ -304,48 +305,6 @@ def filter_point_cloud_radius(
     return pcd.remove_radius_outlier(nb_points=nb_points, radius=radius)
 
 
-def project_3d_points_to_2d(
-    points_3d: np.ndarray, camera_intrinsics: Union[List[float], np.ndarray]
-) -> np.ndarray:
-    """
-    Project 3D points to 2D image coordinates using camera intrinsics.
-
-    Args:
-        points_3d: Nx3 array of 3D points (X, Y, Z)
-        camera_intrinsics: Camera parameters as [fx, fy, cx, cy] list or 3x3 matrix
-
-    Returns:
-        Nx2 array of 2D image coordinates (u, v)
-    """
-    if len(points_3d) == 0:
-        return np.zeros((0, 2), dtype=np.int32)
-
-    # Filter out points with zero or negative depth
-    valid_mask = points_3d[:, 2] > 0
-    if not np.any(valid_mask):
-        return np.zeros((0, 2), dtype=np.int32)
-
-    valid_points = points_3d[valid_mask]
-
-    # Extract camera parameters
-    if isinstance(camera_intrinsics, list) and len(camera_intrinsics) == 4:
-        fx, fy, cx, cy = camera_intrinsics
-    else:
-        fx = camera_intrinsics[0, 0]
-        fy = camera_intrinsics[1, 1]
-        cx = camera_intrinsics[0, 2]
-        cy = camera_intrinsics[1, 2]
-
-    # Project to image coordinates
-    u = (valid_points[:, 0] * fx / valid_points[:, 2]) + cx
-    v = (valid_points[:, 1] * fy / valid_points[:, 2]) + cy
-
-    # Round to integer pixel coordinates
-    points_2d = np.column_stack([u, v]).astype(np.int32)
-
-    return points_2d
-
-
 def overlay_point_clouds_on_image(
     base_image: np.ndarray,
     point_clouds: List[o3d.geometry.PointCloud],
@@ -1080,3 +1039,73 @@ def combine_object_pointclouds(
         combined_pcd.colors = o3d.utility.Vector3dVector(np.vstack(all_colors))
 
     return combined_pcd
+
+
+def extract_centroids_from_masks(
+    rgb_image: np.ndarray,
+    depth_image: np.ndarray,
+    masks: List[np.ndarray],
+    camera_intrinsics: Union[List[float], np.ndarray],
+) -> List[Dict[str, Any]]:
+    """
+    Extract 3D centroids and orientations from segmentation masks.
+
+    Args:
+        rgb_image: RGB image (H, W, 3)
+        depth_image: Depth image (H, W) in meters
+        masks: List of boolean masks (H, W)
+        camera_intrinsics: Camera parameters as [fx, fy, cx, cy] or 3x3 matrix
+
+    Returns:
+        List of dictionaries containing:
+            - centroid: 3D centroid position [x, y, z] in camera frame
+            - orientation: Normalized direction vector from camera to centroid
+            - num_points: Number of valid 3D points
+            - mask_idx: Index of the mask in the input list
+    """
+    # Extract camera parameters
+    if isinstance(camera_intrinsics, list) and len(camera_intrinsics) == 4:
+        fx, fy, cx, cy = camera_intrinsics
+    else:
+        fx = camera_intrinsics[0, 0]
+        fy = camera_intrinsics[1, 1]
+        cx = camera_intrinsics[0, 2]
+        cy = camera_intrinsics[1, 2]
+
+    results = []
+
+    for mask_idx, mask in enumerate(masks):
+        if mask is None or mask.sum() == 0:
+            continue
+
+        # Get pixel coordinates where mask is True
+        y_coords, x_coords = np.where(mask)
+
+        # Get depth values at mask locations
+        depths = depth_image[y_coords, x_coords]
+
+        # Convert to 3D points in camera frame
+        X = (x_coords - cx) * depths / fx
+        Y = (y_coords - cy) * depths / fy
+        Z = depths
+
+        # Calculate centroid
+        centroid_x = np.mean(X)
+        centroid_y = np.mean(Y)
+        centroid_z = np.mean(Z)
+        centroid = np.array([centroid_x, centroid_y, centroid_z])
+
+        # Calculate orientation as normalized direction from camera origin to centroid
+        # Camera origin is at (0, 0, 0)
+        orientation = centroid / np.linalg.norm(centroid)
+
+        results.append(
+            {
+                "centroid": centroid,
+                "orientation": orientation,
+                "num_points": int(mask.sum()),
+                "mask_idx": mask_idx,
+            }
+        )
+
+    return results
diff --git a/dimos/perception/segmentation/sam_2d_seg.py b/dimos/perception/segmentation/sam_2d_seg.py
index fcf27584e6..cb2acaf076 100644
--- a/dimos/perception/segmentation/sam_2d_seg.py
+++ b/dimos/perception/segmentation/sam_2d_seg.py
@@ -42,13 +42,12 @@ def __init__(
         self,
         model_path="models_fastsam",
         model_name="FastSAM-s.onnx",
-        device="cpu",
         min_analysis_interval=5.0,
         use_tracker=True,
         use_analyzer=True,
         use_rich_labeling=False,
+        use_filtering=True,
     ):
-        self.device = device
         if is_cuda_available():
             logger.info("Using CUDA for SAM 2d segmenter")
             if hasattr(onnxruntime, "preload_dlls"):  # Handles CUDA 11 / onnxruntime-gpu<=1.18
@@ -62,6 +61,7 @@ def __init__(
         self.use_tracker = use_tracker
         self.use_analyzer = use_analyzer
         self.use_rich_labeling = use_rich_labeling
+        self.use_filtering = use_filtering
 
         module_dir = os.path.dirname(__file__)
         self.tracker_config = os.path.join(module_dir, "config", "custom_tracker.yaml")
@@ -98,11 +98,10 @@ def process_image(self, image):
             source=image,
             device=self.device,
             retina_masks=True,
-            conf=0.6,
-            iou=0.9,
+            conf=0.3,
+            iou=0.5,
             persist=True,
             verbose=False,
-            tracker=self.tracker_config,
         )
 
         if len(results) > 0:
@@ -112,14 +111,25 @@ def process_image(self, image):
             )
 
             # Filter results
-            (
-                filtered_masks,
-                filtered_bboxes,
-                filtered_track_ids,
-                filtered_probs,
-                filtered_names,
-                filtered_texture_values,
-            ) = filter_segmentation_results(image, masks, bboxes, track_ids, probs, names, areas)
+            if self.use_filtering:
+                (
+                    filtered_masks,
+                    filtered_bboxes,
+                    filtered_track_ids,
+                    filtered_probs,
+                    filtered_names,
+                    filtered_texture_values,
+                ) = filter_segmentation_results(
+                    image, masks, bboxes, track_ids, probs, names, areas
+                )
+            else:
+                # Use original results without filtering
+                filtered_masks = masks
+                filtered_bboxes = bboxes
+                filtered_track_ids = track_ids
+                filtered_probs = probs
+                filtered_names = names
+                filtered_texture_values = []
 
             if self.use_tracker:
                 # Update tracker with filtered results
diff --git a/dimos/robot/agilex/README.md b/dimos/robot/agilex/README.md
new file mode 100644
index 0000000000..1e678cae65
--- /dev/null
+++ b/dimos/robot/agilex/README.md
@@ -0,0 +1,371 @@
+# DIMOS Manipulator Robot Development Guide
+
+This guide explains how to create robot classes, integrate agents, and use the DIMOS module system with LCM transport.
+
+## Table of Contents
+1. [Robot Class Architecture](#robot-class-architecture)
+2. [Module System & LCM Transport](#module-system--lcm-transport)
+3. [Agent Integration](#agent-integration)
+4. [Complete Example](#complete-example)
+
+## Robot Class Architecture
+
+### Basic Robot Class Structure
+
+A DIMOS robot class should follow this pattern:
+
+```python
+from typing import Optional, List
+from dimos import core
+from dimos.types.robot_capabilities import RobotCapability
+
+class YourRobot:
+    """Your robot implementation."""
+    
+    def __init__(self, robot_capabilities: Optional[List[RobotCapability]] = None):
+        # Core components
+        self.dimos = None
+        self.modules = {}
+        self.skill_library = SkillLibrary()
+        
+        # Define capabilities
+        self.capabilities = robot_capabilities or [
+            RobotCapability.VISION,
+            RobotCapability.MANIPULATION,
+        ]
+    
+    async def start(self):
+        """Start the robot modules."""
+        # Initialize DIMOS with worker count
+        self.dimos = core.start(2)  # Number of workers needed
+        
+        # Deploy modules
+        # ... (see Module System section)
+        
+    def stop(self):
+        """Stop all modules and clean up."""
+        # Stop modules
+        # Close DIMOS
+        if self.dimos:
+            self.dimos.close()
+```
+
+### Key Components Explained
+
+1. **Initialization**: Store references to modules, skills, and capabilities
+2. **Async Start**: Modules must be deployed asynchronously
+3. **Proper Cleanup**: Always stop modules before closing DIMOS
+
+## Module System & LCM Transport
+
+### Understanding DIMOS Modules
+
+Modules are the building blocks of DIMOS robots. They:
+- Process data streams (inputs)
+- Produce outputs
+- Can be connected together
+- Communicate via LCM (Lightweight Communications and Marshalling)
+
+### Deploying a Module
+
+```python
+# Deploy a camera module
+self.camera = self.dimos.deploy(
+    ZEDModule,                    # Module class
+    camera_id=0,                  # Module parameters
+    resolution="HD720",
+    depth_mode="NEURAL",
+    fps=30,
+    publish_rate=30.0,
+    frame_id="camera_frame"
+)
+```
+
+### Setting Up LCM Transport
+
+LCM transport enables inter-module communication:
+
+```python
+# Enable LCM auto-configuration
+from dimos.protocol import pubsub
+pubsub.lcm.autoconf()
+
+# Configure output transport
+self.camera.color_image.transport = core.LCMTransport(
+    "/camera/color_image",        # Topic name
+    Image                         # Message type
+)
+self.camera.depth_image.transport = core.LCMTransport(
+    "/camera/depth_image", 
+    Image
+)
+```
+
+### Connecting Modules
+
+Connect module outputs to inputs:
+
+```python
+# Connect manipulation module to camera outputs
+self.manipulation.rgb_image.connect(self.camera.color_image)
+self.manipulation.depth_image.connect(self.camera.depth_image)
+self.manipulation.camera_info.connect(self.camera.camera_info)
+```
+
+### Module Communication Pattern
+
+```
+┌──────────────┐  LCM    ┌────────────────┐  LCM    ┌──────────────┐
+│   Camera     │────────▶│  Manipulation  │────────▶│ Visualization│
+│   Module     │ Messages│     Module     │ Messages│    Output    │
+└──────────────┘         └────────────────┘         └──────────────┘
+     ▲                          ▲
+     │                          │
+     └──────────────────────────┘
+          Direct Connection via RPC call
+```
+
+## Agent Integration
+
+### Setting Up Agent with Robot
+
+The run file pattern for agent integration:
+
+```python
+#!/usr/bin/env python3
+import asyncio
+import reactivex as rx
+from dimos.agents.claude_agent import ClaudeAgent
+from dimos.web.robot_web_interface import RobotWebInterface
+
+def main():
+    # 1. Create and start robot
+    robot = YourRobot()
+    asyncio.run(robot.start())
+    
+    # 2. Set up skills
+    skills = robot.get_skills()
+    skills.add(YourSkill)
+    skills.create_instance("YourSkill", robot=robot)
+    
+    # 3. Set up reactive streams
+    agent_response_subject = rx.subject.Subject()
+    agent_response_stream = agent_response_subject.pipe(ops.share())
+    
+    # 4. Create web interface
+    web_interface = RobotWebInterface(
+        port=5555,
+        text_streams={"agent_responses": agent_response_stream},
+        audio_subject=rx.subject.Subject()
+    )
+    
+    # 5. Create agent
+    agent = ClaudeAgent(
+        dev_name="your_agent",
+        input_query_stream=web_interface.query_stream,
+        skills=skills,
+        system_query="Your system prompt here",
+        model_name="claude-3-5-haiku-latest"
+    )
+    
+    # 6. Connect agent responses
+    agent.get_response_observable().subscribe(
+        lambda x: agent_response_subject.on_next(x)
+    )
+    
+    # 7. Run interface
+    web_interface.run()
+```
+
+### Key Integration Points
+
+1. **Reactive Streams**: Use RxPy for event-driven communication
+2. **Web Interface**: Provides user input/output
+3. **Agent**: Processes natural language and executes skills
+4. **Skills**: Define robot capabilities as executable actions
+
+## Complete Example
+
+### Step 1: Create Robot Class (`my_robot.py`)
+
+```python
+import asyncio
+from typing import Optional, List
+from dimos import core
+from dimos.hardware.camera import CameraModule
+from dimos.manipulation.module import ManipulationModule
+from dimos.skills.skills import SkillLibrary
+from dimos.types.robot_capabilities import RobotCapability
+from dimos_lcm.sensor_msgs import Image, CameraInfo
+from dimos.protocol import pubsub
+
+class MyRobot:
+    def __init__(self, robot_capabilities: Optional[List[RobotCapability]] = None):
+        self.dimos = None
+        self.camera = None
+        self.manipulation = None
+        self.skill_library = SkillLibrary()
+        
+        self.capabilities = robot_capabilities or [
+            RobotCapability.VISION,
+            RobotCapability.MANIPULATION,
+        ]
+    
+    async def start(self):
+        # Start DIMOS
+        self.dimos = core.start(2)
+        
+        # Enable LCM
+        pubsub.lcm.autoconf()
+        
+        # Deploy camera
+        self.camera = self.dimos.deploy(
+            CameraModule,
+            camera_id=0,
+            fps=30
+        )
+        
+        # Configure camera LCM
+        self.camera.color_image.transport = core.LCMTransport("/camera/rgb", Image)
+        self.camera.depth_image.transport = core.LCMTransport("/camera/depth", Image)
+        self.camera.camera_info.transport = core.LCMTransport("/camera/info", CameraInfo)
+        
+        # Deploy manipulation
+        self.manipulation = self.dimos.deploy(ManipulationModule)
+        
+        # Connect modules
+        self.manipulation.rgb_image.connect(self.camera.color_image)
+        self.manipulation.depth_image.connect(self.camera.depth_image)
+        self.manipulation.camera_info.connect(self.camera.camera_info)
+        
+        # Configure manipulation output
+        self.manipulation.viz_image.transport = core.LCMTransport("/viz/output", Image)
+        
+        # Start modules
+        self.camera.start()
+        self.manipulation.start()
+        
+        await asyncio.sleep(2)  # Allow initialization
+    
+    def get_skills(self):
+        return self.skill_library
+    
+    def stop(self):
+        if self.manipulation:
+            self.manipulation.stop()
+        if self.camera:
+            self.camera.stop()
+        if self.dimos:
+            self.dimos.close()
+```
+
+### Step 2: Create Run Script (`run.py`)
+
+```python
+#!/usr/bin/env python3
+import asyncio
+import os
+from my_robot import MyRobot
+from dimos.agents.claude_agent import ClaudeAgent
+from dimos.skills.basic import BasicSkill
+from dimos.web.robot_web_interface import RobotWebInterface
+import reactivex as rx
+import reactivex.operators as ops
+
+SYSTEM_PROMPT = """You are a helpful robot assistant."""
+
+def main():
+    # Check API key
+    if not os.getenv("ANTHROPIC_API_KEY"):
+        print("Please set ANTHROPIC_API_KEY")
+        return
+    
+    # Create robot
+    robot = MyRobot()
+    
+    try:
+        # Start robot
+        asyncio.run(robot.start())
+        
+        # Set up skills
+        skills = robot.get_skills()
+        skills.add(BasicSkill)
+        skills.create_instance("BasicSkill", robot=robot)
+        
+        # Set up streams
+        agent_response_subject = rx.subject.Subject()
+        agent_response_stream = agent_response_subject.pipe(ops.share())
+        
+        # Create web interface
+        web_interface = RobotWebInterface(
+            port=5555,
+            text_streams={"agent_responses": agent_response_stream}
+        )
+        
+        # Create agent
+        agent = ClaudeAgent(
+            dev_name="my_agent",
+            input_query_stream=web_interface.query_stream,
+            skills=skills,
+            system_query=SYSTEM_PROMPT
+        )
+        
+        # Connect responses
+        agent.get_response_observable().subscribe(
+            lambda x: agent_response_subject.on_next(x)
+        )
+        
+        print("Robot ready at http://localhost:5555")
+        
+        # Run
+        web_interface.run()
+        
+    finally:
+        robot.stop()
+
+if __name__ == "__main__":
+    main()
+```
+
+### Step 3: Define Skills (`skills.py`)
+
+```python
+from dimos.skills import Skill, skill
+
+@skill(
+    description="Perform a basic action",
+    parameters={
+        "action": "The action to perform"
+    }
+)
+class BasicSkill(Skill):
+    def __init__(self, robot):
+        self.robot = robot
+    
+    def run(self, action: str):
+        # Implement skill logic
+        return f"Performed: {action}"
+```
+
+## Best Practices
+
+1. **Module Lifecycle**: Always start DIMOS before deploying modules
+2. **LCM Topics**: Use descriptive topic names with namespaces
+3. **Error Handling**: Wrap module operations in try-except blocks
+4. **Resource Cleanup**: Ensure proper cleanup in stop() methods
+5. **Async Operations**: Use asyncio for non-blocking operations
+6. **Stream Management**: Use RxPy for reactive programming patterns
+
+## Debugging Tips
+
+1. **Check Module Status**: Print module.io().result() to see connections
+2. **Monitor LCM**: Use Foxglove to visualize LCM messages
+3. **Log Everything**: Use dimos.utils.logging_config.setup_logger()
+4. **Test Modules Independently**: Deploy and test one module at a time
+
+## Common Issues
+
+1. **"Module not started"**: Ensure start() is called after deployment
+2. **"No data received"**: Check LCM transport configuration
+3. **"Connection failed"**: Verify input/output types match
+4. **"Cleanup errors"**: Stop modules before closing DIMOS
\ No newline at end of file
diff --git a/dimos/robot/agilex/README_CN.md b/dimos/robot/agilex/README_CN.md
new file mode 100644
index 0000000000..482a09dd6d
--- /dev/null
+++ b/dimos/robot/agilex/README_CN.md
@@ -0,0 +1,465 @@
+# DIMOS 机械臂机器人开发指南
+
+本指南介绍如何创建机器人类、集成智能体（Agent）以及使用 DIMOS 模块系统和 LCM 传输。
+
+## 目录
+1. [机器人类架构](#机器人类架构)
+2. [模块系统与 LCM 传输](#模块系统与-lcm-传输)
+3. [智能体集成](#智能体集成)
+4. [完整示例](#完整示例)
+
+## 机器人类架构
+
+### 基本机器人类结构
+
+DIMOS 机器人类应遵循以下模式：
+
+```python
+from typing import Optional, List
+from dimos import core
+from dimos.types.robot_capabilities import RobotCapability
+
+class YourRobot:
+    """您的机器人实现。"""
+    
+    def __init__(self, robot_capabilities: Optional[List[RobotCapability]] = None):
+        # 核心组件
+        self.dimos = None
+        self.modules = {}
+        self.skill_library = SkillLibrary()
+        
+        # 定义能力
+        self.capabilities = robot_capabilities or [
+            RobotCapability.VISION,
+            RobotCapability.MANIPULATION,
+        ]
+    
+    async def start(self):
+        """启动机器人模块。"""
+        # 初始化 DIMOS，指定工作线程数
+        self.dimos = core.start(2)  # 需要的工作线程数
+        
+        # 部署模块
+        # ... (参见模块系统章节)
+        
+    def stop(self):
+        """停止所有模块并清理资源。"""
+        # 停止模块
+        # 关闭 DIMOS
+        if self.dimos:
+            self.dimos.close()
+```
+
+### 关键组件说明
+
+1. **初始化**：存储模块、技能和能力的引用
+2. **异步启动**：模块必须异步部署
+3. **正确清理**：在关闭 DIMOS 之前始终停止模块
+
+## 模块系统与 LCM 传输
+
+### 理解 DIMOS 模块
+
+模块是 DIMOS 机器人的构建块。它们：
+- 处理数据流（输入）
+- 产生输出
+- 可以相互连接
+- 通过 LCM（轻量级通信和编组）进行通信
+
+### 部署模块
+
+```python
+# 部署相机模块
+self.camera = self.dimos.deploy(
+    ZEDModule,                    # 模块类
+    camera_id=0,                  # 模块参数
+    resolution="HD720",
+    depth_mode="NEURAL",
+    fps=30,
+    publish_rate=30.0,
+    frame_id="camera_frame"
+)
+```
+
+### 设置 LCM 传输
+
+LCM 传输实现模块间通信：
+
+```python
+# 启用 LCM 自动配置
+from dimos.protocol import pubsub
+pubsub.lcm.autoconf()
+
+# 配置输出传输
+self.camera.color_image.transport = core.LCMTransport(
+    "/camera/color_image",        # 主题名称
+    Image                         # 消息类型
+)
+self.camera.depth_image.transport = core.LCMTransport(
+    "/camera/depth_image", 
+    Image
+)
+```
+
+### 连接模块
+
+将模块输出连接到输入：
+
+```python
+# 将操作模块连接到相机输出
+self.manipulation.rgb_image.connect(self.camera.color_image) # ROS set_callback
+self.manipulation.depth_image.connect(self.camera.depth_image)
+self.manipulation.camera_info.connect(self.camera.camera_info)
+```
+
+### 模块通信模式
+
+```
+┌──────────────┐  LCM    ┌────────────────┐  LCM    ┌──────────────┐
+│   相机模块    │────────▶│   操作模块      │────────▶│  可视化输出   │
+│              │  消息    │                │  消息    │              │
+└──────────────┘         └────────────────┘         └──────────────┘
+     ▲                          ▲
+     │                          │
+     └──────────────────────────┘
+              直接连接(RPC指令)
+```
+
+## 智能体集成
+
+### 设置智能体与机器人
+
+运行文件的智能体集成模式：
+
+```python
+#!/usr/bin/env python3
+import asyncio
+import reactivex as rx
+from dimos.agents.claude_agent import ClaudeAgent
+from dimos.web.robot_web_interface import RobotWebInterface
+
+def main():
+    # 1. 创建并启动机器人
+    robot = YourRobot()
+    asyncio.run(robot.start())
+    
+    # 2. 设置技能
+    skills = robot.get_skills()
+    skills.add(YourSkill)
+    skills.create_instance("YourSkill", robot=robot)
+    
+    # 3. 设置响应式流
+    agent_response_subject = rx.subject.Subject()
+    agent_response_stream = agent_response_subject.pipe(ops.share())
+    
+    # 4. 创建 Web 界面
+    web_interface = RobotWebInterface(
+        port=5555,
+        text_streams={"agent_responses": agent_response_stream},
+        audio_subject=rx.subject.Subject()
+    )
+    
+    # 5. 创建智能体
+    agent = ClaudeAgent(
+        dev_name="your_agent",
+        input_query_stream=web_interface.query_stream,
+        skills=skills,
+        system_query="您的系统提示词",
+        model_name="claude-3-5-haiku-latest"
+    )
+    
+    # 6. 连接智能体响应
+    agent.get_response_observable().subscribe(
+        lambda x: agent_response_subject.on_next(x)
+    )
+    
+    # 7. 运行界面
+    web_interface.run()
+```
+
+### 关键集成点
+
+1. **响应式流**：使用 RxPy 进行事件驱动通信
+2. **Web 界面**：提供用户输入/输出
+3. **智能体**：处理自然语言并执行技能
+4. **技能**：将机器人能力定义为可执行动作
+
+## 完整示例
+
+### 步骤 1：创建机器人类（`my_robot.py`）
+
+```python
+import asyncio
+from typing import Optional, List
+from dimos import core
+from dimos.hardware.camera import CameraModule
+from dimos.manipulation.module import ManipulationModule
+from dimos.skills.skills import SkillLibrary
+from dimos.types.robot_capabilities import RobotCapability
+from dimos_lcm.sensor_msgs import Image, CameraInfo
+from dimos.protocol import pubsub
+
+class MyRobot:
+    def __init__(self, robot_capabilities: Optional[List[RobotCapability]] = None):
+        self.dimos = None
+        self.camera = None
+        self.manipulation = None
+        self.skill_library = SkillLibrary()
+        
+        self.capabilities = robot_capabilities or [
+            RobotCapability.VISION,
+            RobotCapability.MANIPULATION,
+        ]
+    
+    async def start(self):
+        # 启动 DIMOS
+        self.dimos = core.start(2)
+        
+        # 启用 LCM
+        pubsub.lcm.autoconf()
+        
+        # 部署相机
+        self.camera = self.dimos.deploy(
+            CameraModule,
+            camera_id=0,
+            fps=30
+        )
+        
+        # 配置相机 LCM
+        self.camera.color_image.transport = core.LCMTransport("/camera/rgb", Image)
+        self.camera.depth_image.transport = core.LCMTransport("/camera/depth", Image)
+        self.camera.camera_info.transport = core.LCMTransport("/camera/info", CameraInfo)
+        
+        # 部署操作模块
+        self.manipulation = self.dimos.deploy(ManipulationModule)
+        
+        # 连接模块
+        self.manipulation.rgb_image.connect(self.camera.color_image)
+        self.manipulation.depth_image.connect(self.camera.depth_image)
+        self.manipulation.camera_info.connect(self.camera.camera_info)
+        
+        # 配置操作输出
+        self.manipulation.viz_image.transport = core.LCMTransport("/viz/output", Image)
+        
+        # 启动模块
+        self.camera.start()
+        self.manipulation.start()
+        
+        await asyncio.sleep(2)  # 允许初始化
+    
+    def get_skills(self):
+        return self.skill_library
+    
+    def stop(self):
+        if self.manipulation:
+            self.manipulation.stop()
+        if self.camera:
+            self.camera.stop()
+        if self.dimos:
+            self.dimos.close()
+```
+
+### 步骤 2：创建运行脚本（`run.py`）
+
+```python
+#!/usr/bin/env python3
+import asyncio
+import os
+from my_robot import MyRobot
+from dimos.agents.claude_agent import ClaudeAgent
+from dimos.skills.basic import BasicSkill
+from dimos.web.robot_web_interface import RobotWebInterface
+import reactivex as rx
+import reactivex.operators as ops
+
+SYSTEM_PROMPT = """您是一个有用的机器人助手。"""
+
+def main():
+    # 检查 API 密钥
+    if not os.getenv("ANTHROPIC_API_KEY"):
+        print("请设置 ANTHROPIC_API_KEY")
+        return
+    
+    # 创建机器人
+    robot = MyRobot()
+    
+    try:
+        # 启动机器人
+        asyncio.run(robot.start())
+        
+        # 设置技能
+        skills = robot.get_skills()
+        skills.add(BasicSkill)
+        skills.create_instance("BasicSkill", robot=robot)
+        
+        # 设置流
+        agent_response_subject = rx.subject.Subject()
+        agent_response_stream = agent_response_subject.pipe(ops.share())
+        
+        # 创建 Web 界面
+        web_interface = RobotWebInterface(
+            port=5555,
+            text_streams={"agent_responses": agent_response_stream}
+        )
+        
+        # 创建智能体
+        agent = ClaudeAgent(
+            dev_name="my_agent",
+            input_query_stream=web_interface.query_stream,
+            skills=skills,
+            system_query=SYSTEM_PROMPT
+        )
+        
+        # 连接响应
+        agent.get_response_observable().subscribe(
+            lambda x: agent_response_subject.on_next(x)
+        )
+        
+        print("机器人就绪，访问 http://localhost:5555")
+        
+        # 运行
+        web_interface.run()
+        
+    finally:
+        robot.stop()
+
+if __name__ == "__main__":
+    main()
+```
+
+### 步骤 3：定义技能（`skills.py`）
+
+```python
+from dimos.skills import Skill, skill
+
+@skill(
+    description="执行一个基本动作",
+    parameters={
+        "action": "要执行的动作"
+    }
+)
+class BasicSkill(Skill):
+    def __init__(self, robot):
+        self.robot = robot
+    
+    def run(self, action: str):
+        # 实现技能逻辑
+        return f"已执行：{action}"
+```
+
+## 最佳实践
+
+1. **模块生命周期**：在部署模块之前始终先启动 DIMOS
+2. **LCM 主题**：使用带命名空间的描述性主题名称
+3. **错误处理**：用 try-except 块包装模块操作
+4. **资源清理**：确保在 stop() 方法中正确清理
+5. **异步操作**：使用 asyncio 进行非阻塞操作
+6. **流管理**：使用 RxPy 进行响应式编程模式
+
+## 调试技巧
+
+1. **检查模块状态**：打印 module.io().result() 查看连接
+2. **监控 LCM**：使用 Foxglove 可视化 LCM 消息
+3. **记录一切**：使用 dimos.utils.logging_config.setup_logger()
+4. **独立测试模块**：一次部署和测试一个模块
+
+## 常见问题
+
+1. **"模块未启动"**：确保在部署后调用 start()
+2. **"未收到数据"**：检查 LCM 传输配置
+3. **"连接失败"**：验证输入/输出类型是否匹配
+4. **"清理错误"**：在关闭 DIMOS 之前停止模块
+
+## 高级主题
+
+### 自定义模块开发
+
+创建自定义模块的基本结构：
+
+```python
+from dimos.core import Module, In, Out, rpc
+
+class CustomModule(Module):
+    # 定义输入
+    input_data: In[DataType] = None
+    
+    # 定义输出  
+    output_data: Out[DataType] = None
+    
+    def __init__(self, param1, param2, **kwargs):
+        super().__init__(**kwargs)
+        self.param1 = param1
+        self.param2 = param2
+    
+    @rpc
+    def start(self):
+        """启动模块处理。"""
+        self.input_data.subscribe(self._process_data)
+    
+    def _process_data(self, data):
+        """处理输入数据。"""
+        # 处理逻辑
+        result = self.process(data)
+        # 发布输出
+        self.output_data.publish(result)
+    
+    @rpc
+    def stop(self):
+        """停止模块。"""
+        # 清理资源
+        pass
+```
+
+### 技能开发指南
+
+技能是机器人可执行的高级动作：
+
+```python
+from dimos.skills import Skill, skill
+from typing import Optional
+
+@skill(
+    description="复杂操作技能",
+    parameters={
+        "target": "目标对象",
+        "location": "目标位置"
+    }
+)
+class ComplexSkill(Skill):
+    def __init__(self, robot, **kwargs):
+        super().__init__(**kwargs)
+        self.robot = robot
+    
+    def run(self, target: str, location: Optional[str] = None):
+        """执行技能逻辑。"""
+        try:
+            # 1. 感知阶段
+            object_info = self.robot.detect_object(target)
+            
+            # 2. 规划阶段
+            if location:
+                plan = self.robot.plan_movement(object_info, location)
+            
+            # 3. 执行阶段
+            result = self.robot.execute_plan(plan)
+            
+            return {
+                "success": True,
+                "message": f"成功移动 {target} 到 {location}"
+            }
+            
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e)
+            }
+```
+
+### 性能优化
+
+1. **并行处理**：使用多个工作线程处理不同模块
+2. **数据缓冲**：为高频数据流实现缓冲机制
+3. **延迟加载**：仅在需要时初始化重型模块
+4. **资源池化**：重用昂贵的资源（如神经网络模型）
+
+希望本指南能帮助您快速上手 DIMOS 机器人开发！ 
\ No newline at end of file
diff --git a/dimos/robot/agilex/piper_arm.py b/dimos/robot/agilex/piper_arm.py
new file mode 100644
index 0000000000..63dc419a78
--- /dev/null
+++ b/dimos/robot/agilex/piper_arm.py
@@ -0,0 +1,201 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import logging
+from typing import Optional, List
+
+from dimos import core
+from dimos.hardware.zed_camera import ZEDModule
+from dimos.manipulation.visual_servoing.manipulation_module import ManipulationModule
+from dimos_lcm.sensor_msgs import Image
+from dimos.protocol import pubsub
+from dimos.skills.skills import SkillLibrary
+from dimos.types.robot_capabilities import RobotCapability
+from dimos.robot.foxglove_bridge import FoxgloveBridge
+from dimos.utils.logging_config import setup_logger
+
+# Import LCM message types
+from dimos_lcm.sensor_msgs import CameraInfo
+
+logger = setup_logger("dimos.robot.agilex.piper_arm")
+
+
+class PiperArmRobot:
+    """Piper Arm robot with ZED camera and manipulation capabilities."""
+
+    def __init__(self, robot_capabilities: Optional[List[RobotCapability]] = None):
+        self.dimos = None
+        self.stereo_camera = None
+        self.manipulation_interface = None
+        self.skill_library = SkillLibrary()
+
+        # Initialize capabilities
+        self.capabilities = robot_capabilities or [
+            RobotCapability.VISION,
+            RobotCapability.MANIPULATION,
+        ]
+
+    async def start(self):
+        """Start the robot modules."""
+        # Start Dimos
+        self.dimos = core.start(2)  # Need 2 workers for ZED and manipulation modules
+        self.foxglove_bridge = FoxgloveBridge()
+
+        # Enable LCM auto-configuration
+        pubsub.lcm.autoconf()
+
+        # Deploy ZED module
+        logger.info("Deploying ZED module...")
+        self.stereo_camera = self.dimos.deploy(
+            ZEDModule,
+            camera_id=0,
+            resolution="HD720",
+            depth_mode="NEURAL",
+            fps=30,
+            enable_tracking=False,  # We don't need tracking for manipulation
+            publish_rate=30.0,
+            frame_id="zed_camera",
+        )
+
+        # Configure ZED LCM transports
+        self.stereo_camera.color_image.transport = core.LCMTransport("/zed/color_image", Image)
+        self.stereo_camera.depth_image.transport = core.LCMTransport("/zed/depth_image", Image)
+        self.stereo_camera.camera_info.transport = core.LCMTransport("/zed/camera_info", CameraInfo)
+
+        # Deploy manipulation module
+        logger.info("Deploying manipulation module...")
+        self.manipulation_interface = self.dimos.deploy(ManipulationModule)
+
+        # Connect manipulation inputs to ZED outputs
+        self.manipulation_interface.rgb_image.connect(self.stereo_camera.color_image)
+        self.manipulation_interface.depth_image.connect(self.stereo_camera.depth_image)
+        self.manipulation_interface.camera_info.connect(self.stereo_camera.camera_info)
+
+        # Configure manipulation output
+        self.manipulation_interface.viz_image.transport = core.LCMTransport(
+            "/manipulation/viz", Image
+        )
+
+        # Print module info
+        logger.info("Modules configured:")
+        print("\nZED Module:")
+        print(self.stereo_camera.io().result())
+        print("\nManipulation Module:")
+        print(self.manipulation_interface.io().result())
+
+        # Start modules
+        logger.info("Starting modules...")
+        self.foxglove_bridge.start()
+        self.stereo_camera.start()
+        self.manipulation_interface.start()
+
+        # Give modules time to initialize
+        await asyncio.sleep(2)
+
+        logger.info("PiperArmRobot initialized and started")
+
+    def get_skills(self):
+        """Get the robot's skill library.
+
+        Returns:
+            The robot's skill library for adding/managing skills
+        """
+        return self.skill_library
+
+    def pick_and_place(
+        self, pick_x: int, pick_y: int, place_x: Optional[int] = None, place_y: Optional[int] = None
+    ):
+        """Execute pick and place task.
+
+        Args:
+            pick_x: X coordinate for pick location
+            pick_y: Y coordinate for pick location
+            place_x: X coordinate for place location (optional)
+            place_y: Y coordinate for place location (optional)
+
+        Returns:
+            Result of the pick and place operation
+        """
+        if self.manipulation_interface:
+            return self.manipulation_interface.pick_and_place(pick_x, pick_y, place_x, place_y)
+        else:
+            logger.error("Manipulation module not initialized")
+            return False
+
+    def handle_keyboard_command(self, key: str):
+        """Pass keyboard commands to manipulation module.
+
+        Args:
+            key: Keyboard key pressed
+
+        Returns:
+            Action taken or None
+        """
+        if self.manipulation_interface:
+            return self.manipulation_interface.handle_keyboard_command(key)
+        else:
+            logger.error("Manipulation module not initialized")
+            return None
+
+    def has_capability(self, capability: RobotCapability) -> bool:
+        """Check if the robot has a specific capability.
+
+        Args:
+            capability: The capability to check for
+
+        Returns:
+            bool: True if the robot has the capability
+        """
+        return capability in self.capabilities
+
+    def stop(self):
+        """Stop all modules and clean up."""
+        logger.info("Stopping PiperArmRobot...")
+
+        try:
+            if self.manipulation_interface:
+                self.manipulation_interface.stop()
+                self.manipulation_interface.cleanup()
+
+            if self.stereo_camera:
+                self.stereo_camera.stop()
+        except Exception as e:
+            logger.warning(f"Error stopping modules: {e}")
+
+        # Close dimos last to ensure workers are available for cleanup
+        if self.dimos:
+            self.dimos.close()
+
+        logger.info("PiperArmRobot stopped")
+
+
+async def run_piper_arm():
+    """Run the Piper Arm robot."""
+    robot = PiperArmRobot()
+
+    await robot.start()
+
+    # Keep the robot running
+    try:
+        while True:
+            await asyncio.sleep(1)
+    except KeyboardInterrupt:
+        logger.info("Keyboard interrupt received")
+    finally:
+        await robot.stop()
+
+
+if __name__ == "__main__":
+    asyncio.run(run_piper_arm())
diff --git a/dimos/robot/agilex/run.py b/dimos/robot/agilex/run.py
new file mode 100644
index 0000000000..c9e5a036d8
--- /dev/null
+++ b/dimos/robot/agilex/run.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Run script for Piper Arm robot with Claude agent integration.
+Provides manipulation capabilities with natural language interface.
+"""
+
+import asyncio
+import os
+import sys
+import time
+from dotenv import load_dotenv
+
+import reactivex as rx
+import reactivex.operators as ops
+
+from dimos.robot.agilex.piper_arm import PiperArmRobot
+from dimos.agents.claude_agent import ClaudeAgent
+from dimos.skills.manipulation.pick_and_place import PickAndPlace
+from dimos.skills.kill_skill import KillSkill
+from dimos.skills.observe import Observe
+from dimos.web.robot_web_interface import RobotWebInterface
+from dimos.stream.audio.pipelines import stt, tts
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger("dimos.robot.agilex.run")
+
+# Load environment variables
+load_dotenv()
+
+# System prompt for the Piper Arm manipulation agent
+SYSTEM_PROMPT = """You are an intelligent robotic assistant controlling a Piper Arm robot with advanced manipulation capabilities. Your primary role is to help users with pick and place tasks using natural language understanding.
+
+## Your Capabilities:
+1. **Visual Perception**: You have access to a ZED stereo camera that provides RGB and depth information
+2. **Object Manipulation**: You can pick up and place objects using a 6-DOF robotic arm with a gripper
+3. **Language Understanding**: You use the Qwen vision-language model to identify objects and locations from natural language descriptions
+
+## Available Skills:
+- **PickAndPlace**: Execute pick and place operations based on object and location descriptions
+  - Pick only: "Pick up the red mug"
+  - Pick and place: "Move the book to the shelf"
+- **Observe**: Capture and analyze the current camera view
+- **KillSkill**: Stop any currently running skill
+
+## Guidelines:
+1. **Safety First**: Always ensure safe operation. If unsure about an object's graspability or a placement location's stability, ask for clarification
+2. **Clear Communication**: Explain what you're doing and ask for confirmation when needed
+3. **Error Handling**: If a task fails, explain why and suggest alternatives
+4. **Precision**: When users give specific object descriptions, use them exactly as provided to the vision model
+
+## Interaction Examples:
+- User: "Pick up the coffee mug"
+  You: "I'll pick up the coffee mug for you." [Execute PickAndPlace with object_query="coffee mug"]
+
+- User: "Put the toy on the table" 
+  You: "I'll place the toy on the table." [Execute PickAndPlace with object_query="toy", target_query="on the table"]
+
+- User: "What do you see?"
+  You: "Let me take a look at the current view." [Execute Observe]
+
+Remember: You're here to assist with manipulation tasks. Be helpful, precise, and always prioritize safe operation of the robot."""
+
+
+def main():
+    """Main entry point."""
+    print("\n" + "=" * 60)
+    print("Piper Arm Robot with Claude Agent")
+    print("=" * 60)
+    print("\nThis system integrates:")
+    print("  - Piper Arm 6-DOF robot")
+    print("  - ZED stereo camera")
+    print("  - Claude AI for natural language understanding")
+    print("  - Qwen VLM for visual object detection")
+    print("  - Web interface with text and voice input")
+    print("  - Foxglove visualization via LCM")
+    print("\nStarting system...\n")
+
+    # Check for API key
+    if not os.getenv("ANTHROPIC_API_KEY"):
+        print("WARNING: ANTHROPIC_API_KEY not found in environment")
+        print("Please set your API key in .env file or environment")
+        sys.exit(1)
+
+    logger.info("Starting Piper Arm Robot with Agent")
+
+    # Create robot instance
+    robot = PiperArmRobot()
+
+    try:
+        # Start the robot (this is async, so we need asyncio.run)
+        logger.info("Initializing robot...")
+        asyncio.run(robot.start())
+        logger.info("Robot initialized successfully")
+
+        # Set up skill library
+        skills = robot.get_skills()
+        skills.add(PickAndPlace)
+        skills.add(Observe)
+        skills.add(KillSkill)
+
+        # Create skill instances
+        skills.create_instance("PickAndPlace", robot=robot)
+        skills.create_instance("Observe", robot=robot)
+        skills.create_instance("KillSkill", robot=robot, skill_library=skills)
+
+        logger.info(f"Skills registered: {[skill.__name__ for skill in skills.get_class_skills()]}")
+
+        # Set up streams for agent and web interface
+        agent_response_subject = rx.subject.Subject()
+        agent_response_stream = agent_response_subject.pipe(ops.share())
+        audio_subject = rx.subject.Subject()
+
+        # Set up streams for web interface
+        streams = {}
+
+        text_streams = {
+            "agent_responses": agent_response_stream,
+        }
+
+        # Create web interface first (needed for agent)
+        try:
+            web_interface = RobotWebInterface(
+                port=5555, text_streams=text_streams, audio_subject=audio_subject, **streams
+            )
+            logger.info("Web interface created successfully")
+        except Exception as e:
+            logger.error(f"Failed to create web interface: {e}")
+            raise
+
+        # Set up speech-to-text
+        stt_node = stt()
+        stt_node.consume_audio(audio_subject.pipe(ops.share()))
+
+        # Create Claude agent
+        agent = ClaudeAgent(
+            dev_name="piper_arm_agent",
+            input_query_stream=web_interface.query_stream,  # Use text input from web interface
+            # input_query_stream=stt_node.emit_text(),  # Uncomment to use voice input
+            skills=skills,
+            system_query=SYSTEM_PROMPT,
+            model_name="claude-3-5-haiku-latest",
+            thinking_budget_tokens=0,
+            max_output_tokens_per_request=4096,
+        )
+
+        # Subscribe to agent responses
+        agent.get_response_observable().subscribe(lambda x: agent_response_subject.on_next(x))
+
+        # Set up text-to-speech for agent responses
+        tts_node = tts()
+        tts_node.consume_text(agent.get_response_observable())
+
+        logger.info("=" * 60)
+        logger.info("Piper Arm Agent Ready!")
+        logger.info(f"Web interface available at: http://localhost:5555")
+        logger.info("Foxglove visualization available at: ws://localhost:8765")
+        logger.info("You can:")
+        logger.info("  - Type commands in the web interface")
+        logger.info("  - Use voice commands")
+        logger.info("  - Ask the robot to pick up objects")
+        logger.info("  - Ask the robot to move objects to locations")
+        logger.info("=" * 60)
+
+        # Run web interface (this blocks)
+        web_interface.run()
+
+    except KeyboardInterrupt:
+        logger.info("Keyboard interrupt received")
+    except Exception as e:
+        logger.error(f"Error running robot: {e}")
+        import traceback
+
+        traceback.print_exc()
+    finally:
+        logger.info("Shutting down...")
+        # Stop the robot (this is also async)
+        robot.stop()
+        logger.info("Robot stopped")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dimos/robot/frontier_exploration/__init__.py b/dimos/robot/frontier_exploration/__init__.py
index 2b69011a9f..8b13789179 100644
--- a/dimos/robot/frontier_exploration/__init__.py
+++ b/dimos/robot/frontier_exploration/__init__.py
@@ -1 +1 @@
-from utils import *
+
diff --git a/dimos/robot/frontier_exploration/test_wavefront_frontier_goal_selector.py b/dimos/robot/frontier_exploration/test_wavefront_frontier_goal_selector.py
index c9b75b28d8..cd344dd0b4 100644
--- a/dimos/robot/frontier_exploration/test_wavefront_frontier_goal_selector.py
+++ b/dimos/robot/frontier_exploration/test_wavefront_frontier_goal_selector.py
@@ -25,7 +25,7 @@
 )
 from dimos.robot.unitree_webrtc.type.lidar import LidarMessage
 from dimos.robot.unitree_webrtc.type.map import Map
-from dimos.types.vector import Vector
+from dimos.msgs.geometry_msgs import Vector3 as Vector
 from dimos.utils.testing import SensorReplay
 
 
@@ -130,13 +130,6 @@ def test_exploration_goal_selection():
         assert isinstance(goal, Vector), "Goal should be a Vector"
         print(f"Selected exploration goal: ({goal.x:.2f}, {goal.y:.2f})")
 
-        # Verify goal is at reasonable distance from robot
-        distance = np.sqrt((goal.x - robot_pose.x) ** 2 + (goal.y - robot_pose.y) ** 2)
-        print(f"Goal distance from robot: {distance:.2f}m")
-        assert distance >= explorer.min_distance_from_robot, (
-            "Goal should respect minimum distance from robot"
-        )
-
         # Test that goal gets marked as explored
         assert len(explorer.explored_goals) == 1, "Goal should be marked as explored"
         assert explorer.explored_goals[0] == goal, "Explored goal should match selected goal"
diff --git a/dimos/robot/frontier_exploration/wavefront_frontier_goal_selector.py b/dimos/robot/frontier_exploration/wavefront_frontier_goal_selector.py
index 1aca32fc93..454a70e803 100644
--- a/dimos/robot/frontier_exploration/wavefront_frontier_goal_selector.py
+++ b/dimos/robot/frontier_exploration/wavefront_frontier_goal_selector.py
@@ -82,11 +82,9 @@ class WavefrontFrontierExplorer:
 
     def __init__(
         self,
-        min_frontier_size: int = 10,
+        min_frontier_size: int = 8,
         occupancy_threshold: int = 65,
-        subsample_resolution: int = 2,
-        min_distance_from_robot: float = 0.5,
-        explored_area_buffer: float = 0.5,
+        subsample_resolution: int = 3,
         min_distance_from_obstacles: float = 0.6,
         info_gain_threshold: float = 0.03,
         num_no_gain_attempts: int = 4,
@@ -101,8 +99,6 @@ def __init__(
             min_frontier_size: Minimum number of points to consider a valid frontier
             occupancy_threshold: Cost threshold above which a cell is considered occupied (0-255)
             subsample_resolution: Factor by which to subsample the costmap for faster processing (1=no subsampling, 2=half resolution, 4=quarter resolution)
-            min_distance_from_robot: Minimum distance frontier must be from robot (meters)
-            explored_area_buffer: Buffer distance around free areas to consider as explored (meters)
             min_distance_from_obstacles: Minimum distance frontier must be from obstacles (meters)
             info_gain_threshold: Minimum percentage increase in costmap information required to continue exploration (0.05 = 5%)
             num_no_gain_attempts: Maximum number of consecutive attempts with no information gain
@@ -113,8 +109,6 @@ def __init__(
         self.min_frontier_size = min_frontier_size
         self.occupancy_threshold = occupancy_threshold
         self.subsample_resolution = subsample_resolution
-        self.min_distance_from_robot = min_distance_from_robot
-        self.explored_area_buffer = explored_area_buffer
         self.min_distance_from_obstacles = min_distance_from_obstacles
         self.info_gain_threshold = info_gain_threshold
         self.num_no_gain_attempts = num_no_gain_attempts
@@ -513,15 +507,6 @@ def _rank_frontiers(
         valid_frontiers = []
 
         for i, frontier in enumerate(frontier_centroids):
-            robot_distance = np.sqrt(
-                (frontier.x - robot_pose.x) ** 2 + (frontier.y - robot_pose.y) ** 2
-            )
-
-            # Filter 1: Skip frontiers too close to robot
-            if robot_distance < self.min_distance_from_robot:
-                continue
-
-            # Filter 2: Skip frontiers too close to obstacles
             obstacle_distance = self._compute_distance_to_obstacles(frontier, costmap)
             if obstacle_distance < self.min_distance_from_obstacles:
                 continue
diff --git a/dimos/robot/unitree_webrtc/connection.py b/dimos/robot/unitree_webrtc/connection.py
index 9bc1874cbe..6119cba860 100644
--- a/dimos/robot/unitree_webrtc/connection.py
+++ b/dimos/robot/unitree_webrtc/connection.py
@@ -239,7 +239,7 @@ async def accept_track(track: MediaStreamTrack) -> VideoMessage:
                 if stop_event.is_set():
                     return
                 frame = await track.recv()
-                subject.on_next(Image.from_numpy(frame.to_ndarray(format="bgr24")))
+                subject.on_next(Image.from_numpy(frame.to_ndarray(format="rgb24")))
 
         self.conn.video.add_track_callback(accept_track)
 
diff --git a/dimos/robot/unitree_webrtc/multiprocess/unitree_go2.py b/dimos/robot/unitree_webrtc/multiprocess/unitree_go2.py
index f2b701fc63..0532be8320 100644
--- a/dimos/robot/unitree_webrtc/multiprocess/unitree_go2.py
+++ b/dimos/robot/unitree_webrtc/multiprocess/unitree_go2.py
@@ -94,7 +94,9 @@ def odom_stream(self):
     @functools.cache
     def video_stream(self):
         print("video stream start")
-        video_store = TimedSensorReplay("unitree_office_walk/video", autocast=Image.from_numpy)
+        video_store = TimedSensorReplay(
+            "unitree_office_walk/video", autocast=lambda x: Image.from_numpy(x).to_rgb()
+        )
         return video_store.stream()
 
     def move(self, vector: Vector):
diff --git a/dimos/skills/manipulation/pick_and_place.py b/dimos/skills/manipulation/pick_and_place.py
new file mode 100644
index 0000000000..4306975d8d
--- /dev/null
+++ b/dimos/skills/manipulation/pick_and_place.py
@@ -0,0 +1,439 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Pick and place skill for Piper Arm robot.
+
+This module provides a skill that uses Qwen VLM to identify pick and place
+locations based on natural language queries, then executes the manipulation.
+"""
+
+import json
+import cv2
+import os
+from typing import Optional, Tuple, Dict, Any
+import numpy as np
+from pydantic import Field
+
+from dimos.skills.manipulation.abstract_manipulation_skill import AbstractManipulationSkill
+from dimos.models.qwen.video_query import query_single_frame
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger("dimos.skills.manipulation.pick_and_place")
+
+
+def parse_qwen_points_response(response: str) -> Optional[Tuple[Tuple[int, int], Tuple[int, int]]]:
+    """
+    Parse Qwen's response containing two points.
+
+    Args:
+        response: Qwen's response containing JSON with two points
+
+    Returns:
+        Tuple of (pick_point, place_point) where each point is (x, y), or None if parsing fails
+    """
+    try:
+        # Try to extract JSON from the response
+        start_idx = response.find("{")
+        end_idx = response.rfind("}") + 1
+
+        if start_idx >= 0 and end_idx > start_idx:
+            json_str = response[start_idx:end_idx]
+            result = json.loads(json_str)
+
+            # Extract pick and place points
+            if "pick_point" in result and "place_point" in result:
+                pick = result["pick_point"]
+                place = result["place_point"]
+
+                # Validate points have x,y coordinates
+                if (
+                    isinstance(pick, (list, tuple))
+                    and len(pick) >= 2
+                    and isinstance(place, (list, tuple))
+                    and len(place) >= 2
+                ):
+                    return (int(pick[0]), int(pick[1])), (int(place[0]), int(place[1]))
+
+    except Exception as e:
+        logger.error(f"Error parsing Qwen points response: {e}")
+        logger.debug(f"Raw response: {response}")
+
+    return None
+
+
+def save_debug_image_with_points(
+    image: np.ndarray,
+    pick_point: Optional[Tuple[int, int]] = None,
+    place_point: Optional[Tuple[int, int]] = None,
+    filename_prefix: str = "qwen_debug",
+) -> str:
+    """
+    Save debug image with crosshairs marking pick and/or place points.
+
+    Args:
+        image: RGB image array
+        pick_point: (x, y) coordinates for pick location
+        place_point: (x, y) coordinates for place location
+        filename_prefix: Prefix for the saved filename
+
+    Returns:
+        Path to the saved image
+    """
+    # Create a copy to avoid modifying original
+    debug_image = image.copy()
+
+    # Draw pick point crosshair (green)
+    if pick_point:
+        x, y = pick_point
+        # Draw crosshair
+        cv2.drawMarker(debug_image, (x, y), (0, 255, 0), cv2.MARKER_CROSS, 30, 2)
+        # Draw circle
+        cv2.circle(debug_image, (x, y), 5, (0, 255, 0), -1)
+        # Add label
+        cv2.putText(
+            debug_image, "PICK", (x + 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2
+        )
+
+    # Draw place point crosshair (cyan)
+    if place_point:
+        x, y = place_point
+        # Draw crosshair
+        cv2.drawMarker(debug_image, (x, y), (255, 255, 0), cv2.MARKER_CROSS, 30, 2)
+        # Draw circle
+        cv2.circle(debug_image, (x, y), 5, (255, 255, 0), -1)
+        # Add label
+        cv2.putText(
+            debug_image, "PLACE", (x + 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2
+        )
+
+    # Draw arrow from pick to place if both exist
+    if pick_point and place_point:
+        cv2.arrowedLine(debug_image, pick_point, place_point, (255, 0, 255), 2, tipLength=0.03)
+
+    # Generate filename with timestamp
+    filename = f"{filename_prefix}.png"
+    filepath = os.path.join(os.getcwd(), filename)
+
+    # Save image
+    cv2.imwrite(filepath, debug_image)
+    logger.info(f"Debug image saved to: {filepath}")
+
+    return filepath
+
+
+def parse_qwen_single_point_response(response: str) -> Optional[Tuple[int, int]]:
+    """
+    Parse Qwen's response containing a single point.
+
+    Args:
+        response: Qwen's response containing JSON with a point
+
+    Returns:
+        Tuple of (x, y) or None if parsing fails
+    """
+    try:
+        # Try to extract JSON from the response
+        start_idx = response.find("{")
+        end_idx = response.rfind("}") + 1
+
+        if start_idx >= 0 and end_idx > start_idx:
+            json_str = response[start_idx:end_idx]
+            result = json.loads(json_str)
+
+            # Try different possible keys
+            point = None
+            for key in ["point", "location", "position", "coordinates"]:
+                if key in result:
+                    point = result[key]
+                    break
+
+            # Validate point has x,y coordinates
+            if point and isinstance(point, (list, tuple)) and len(point) >= 2:
+                return int(point[0]), int(point[1])
+
+    except Exception as e:
+        logger.error(f"Error parsing Qwen single point response: {e}")
+        logger.debug(f"Raw response: {response}")
+
+    return None
+
+
+class PickAndPlace(AbstractManipulationSkill):
+    """
+    A skill that performs pick and place operations using vision-language guidance.
+
+    This skill uses Qwen VLM to identify objects and locations based on natural
+    language queries, then executes pick and place operations using the robot's
+    manipulation interface.
+
+    Example usage:
+        # Just pick the object
+        skill = PickAndPlace(robot=robot, object_query="red mug")
+
+        # Pick and place the object
+        skill = PickAndPlace(robot=robot, object_query="red mug", target_query="on the coaster")
+
+    The skill uses the robot's stereo camera to capture RGB images and its manipulation
+    interface to execute the pick and place operation. It automatically handles coordinate
+    transformation from 2D pixel coordinates to 3D world coordinates.
+    """
+
+    object_query: str = Field(
+        "mug",
+        description="Natural language description of the object to pick (e.g., 'red mug', 'small box')",
+    )
+
+    target_query: Optional[str] = Field(
+        None,
+        description="Natural language description of where to place the object (e.g., 'on the table', 'in the basket'). If not provided, only pick operation will be performed.",
+    )
+
+    model_name: str = Field(
+        "qwen2.5-vl-72b-instruct", description="Qwen model to use for visual queries"
+    )
+
+    def __init__(self, robot=None, **data):
+        """
+        Initialize the PickAndPlace skill.
+
+        Args:
+            robot: The PiperArmRobot instance
+            **data: Additional configuration data
+        """
+        super().__init__(robot=robot, **data)
+
+    def _get_camera_frame(self) -> Optional[np.ndarray]:
+        """
+        Get a single RGB frame from the robot's camera.
+
+        Returns:
+            RGB image as numpy array or None if capture fails
+        """
+        if not self._robot or not self._robot.manipulation_interface:
+            logger.error("Robot or stereo camera not available")
+            return None
+
+        try:
+            # Use the RPC call to get a single RGB frame
+            rgb_frame = self._robot.manipulation_interface.get_single_rgb_frame()
+            if rgb_frame is None:
+                logger.error("Failed to capture RGB frame from camera")
+            return rgb_frame
+        except Exception as e:
+            logger.error(f"Error getting camera frame: {e}")
+            return None
+
+    def _query_pick_and_place_points(
+        self, frame: np.ndarray
+    ) -> Optional[Tuple[Tuple[int, int], Tuple[int, int]]]:
+        """
+        Query Qwen to get both pick and place points in a single query.
+
+        Args:
+            frame: RGB image array
+
+        Returns:
+            Tuple of (pick_point, place_point) or None if query fails
+        """
+        # This method is only called when both object and target are specified
+        prompt = (
+            f"Look at this image carefully. I need you to identify two specific locations:\n"
+            f"1. Find the {self.object_query} - this is the object I want to pick up\n"
+            f"2. Identify where to place it {self.target_query}\n\n"
+            "Instructions:\n"
+            "- The pick_point should be at the center or graspable part of the object\n"
+            "- The place_point should be a stable, flat surface at the target location\n"
+            "- Consider the object's size when choosing the placement point\n\n"
+            "Return ONLY a JSON object with this exact format:\n"
+            "{'pick_point': [x, y], 'place_point': [x, y]}\n"
+            "where [x, y] are pixel coordinates in the image."
+        )
+
+        try:
+            response = query_single_frame(frame, prompt, model_name=self.model_name)
+            return parse_qwen_points_response(response)
+        except Exception as e:
+            logger.error(f"Error querying Qwen for pick and place points: {e}")
+            return None
+
+    def _query_single_point(
+        self, frame: np.ndarray, query: str, point_type: str
+    ) -> Optional[Tuple[int, int]]:
+        """
+        Query Qwen to get a single point location.
+
+        Args:
+            frame: RGB image array
+            query: Natural language description of what to find
+            point_type: Type of point ('pick' or 'place') for context
+
+        Returns:
+            Tuple of (x, y) pixel coordinates or None if query fails
+        """
+        if point_type == "pick":
+            prompt = (
+                f"Look at this image carefully and find the {query}.\n\n"
+                "Instructions:\n"
+                "- Identify the exact object matching the description\n"
+                "- Choose the center point or the most graspable location on the object\n"
+                "- If multiple matching objects exist, choose the most prominent or accessible one\n"
+                "- Consider the object's shape and material when selecting the grasp point\n\n"
+                "Return ONLY a JSON object with this exact format:\n"
+                "{'point': [x, y]}\n"
+                "where [x, y] are the pixel coordinates of the optimal grasping point on the object."
+            )
+        else:  # place
+            prompt = (
+                f"Look at this image and identify where to place an object {query}.\n\n"
+                "Instructions:\n"
+                "- Find a stable, flat surface at the specified location\n"
+                "- Ensure the placement spot is clear of obstacles\n"
+                "- Consider the size of the object being placed\n"
+                "- If the query specifies a container or specific spot, center the placement there\n"
+                "- Otherwise, find the most appropriate nearby surface\n\n"
+                "Return ONLY a JSON object with this exact format:\n"
+                "{'point': [x, y]}\n"
+                "where [x, y] are the pixel coordinates of the optimal placement location."
+            )
+
+        try:
+            response = query_single_frame(frame, prompt, model_name=self.model_name)
+            return parse_qwen_single_point_response(response)
+        except Exception as e:
+            logger.error(f"Error querying Qwen for {point_type} point: {e}")
+            return None
+
+    def __call__(self) -> Dict[str, Any]:
+        """
+        Execute the pick and place operation.
+
+        Returns:
+            Dictionary with operation results
+        """
+        super().__call__()
+
+        if not self._robot:
+            error_msg = "No robot instance provided to PickAndPlace skill"
+            logger.error(error_msg)
+            return {"success": False, "error": error_msg}
+
+        # Register skill as running
+        skill_library = self._robot.get_skills()
+        self.register_as_running("PickAndPlace", skill_library)
+
+        # Get camera frame
+        frame = self._get_camera_frame()
+        if frame is None:
+            return {"success": False, "error": "Failed to capture camera frame"}
+
+        # Convert RGB to BGR for OpenCV if needed
+        if len(frame.shape) == 3 and frame.shape[2] == 3:
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+
+        # Get pick and place points from Qwen
+        pick_point = None
+        place_point = None
+
+        # Determine mode based on whether target_query is provided
+        if self.target_query is None:
+            # Pick only mode
+            logger.info("Pick-only mode (no target specified)")
+
+            # Query for pick point
+            pick_point = self._query_single_point(frame, self.object_query, "pick")
+            if not pick_point:
+                return {"success": False, "error": f"Failed to find {self.object_query}"}
+
+            # No place point needed for pick-only
+            place_point = None
+        else:
+            # Pick and place mode - can use either single or dual query
+            logger.info("Pick and place mode (target specified)")
+
+            # Try single query first for efficiency
+            points = self._query_pick_and_place_points(frame)
+            pick_point, place_point = points
+
+        logger.info(f"Pick point: {pick_point}, Place point: {place_point}")
+
+        # Save debug image with marked points
+        if pick_point or place_point:
+            save_debug_image_with_points(frame, pick_point, place_point)
+
+        # Execute pick (and optionally place) using the robot's interface
+        try:
+            if place_point:
+                # Pick and place
+                result = self._robot.pick_and_place(
+                    pick_x=pick_point[0],
+                    pick_y=pick_point[1],
+                    place_x=place_point[0],
+                    place_y=place_point[1],
+                )
+            else:
+                # Pick only
+                result = self._robot.pick_and_place(
+                    pick_x=pick_point[0], pick_y=pick_point[1], place_x=None, place_y=None
+                )
+
+            if result:
+                if self.target_query:
+                    message = (
+                        f"Successfully picked {self.object_query} and placed it {self.target_query}"
+                    )
+                else:
+                    message = f"Successfully picked {self.object_query}"
+
+                return {
+                    "success": True,
+                    "pick_point": pick_point,
+                    "place_point": place_point,
+                    "object": self.object_query,
+                    "target": self.target_query,
+                    "message": message,
+                }
+            else:
+                operation = "Pick and place" if self.target_query else "Pick"
+                return {
+                    "success": False,
+                    "pick_point": pick_point,
+                    "place_point": place_point,
+                    "error": f"{operation} operation failed",
+                }
+
+        except Exception as e:
+            logger.error(f"Error executing pick and place: {e}")
+            return {
+                "success": False,
+                "error": f"Execution error: {str(e)}",
+                "pick_point": pick_point,
+                "place_point": place_point,
+            }
+        finally:
+            # Always unregister skill when done
+            self.stop()
+
+    def stop(self) -> None:
+        """
+        Stop the pick and place operation and perform cleanup.
+        """
+        logger.info("Stopping PickAndPlace skill")
+
+        # Unregister skill from skill library
+        if self._robot:
+            skill_library = self._robot.get_skills()
+            self.unregister_as_running("PickAndPlace", skill_library)
+
+        logger.info("PickAndPlace skill stopped successfully")
diff --git a/dimos/utils/transform_utils.py b/dimos/utils/transform_utils.py
index 31d3840884..eaedbcecf3 100644
--- a/dimos/utils/transform_utils.py
+++ b/dimos/utils/transform_utils.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import numpy as np
-from typing import Tuple, Dict, Any
+from typing import Tuple
 import logging
+from scipy.spatial.transform import Rotation as R
 
-from dimos.types.vector import Vector
+from dimos_lcm.geometry_msgs import Pose, Point, Vector3, Quaternion
 
 logger = logging.getLogger(__name__)
 
@@ -31,25 +32,216 @@ def distance_angle_to_goal_xy(distance: float, angle: float) -> Tuple[float, flo
     return distance * np.cos(angle), distance * np.sin(angle)
 
 
+def pose_to_matrix(pose: Pose) -> np.ndarray:
+    """
+    Convert pose to 4x4 homogeneous transform matrix.
+
+    Args:
+        pose: Pose object with position and orientation (quaternion)
+
+    Returns:
+        4x4 transformation matrix
+    """
+    # Extract position
+    tx, ty, tz = pose.position.x, pose.position.y, pose.position.z
+
+    # Create rotation matrix from quaternion using scipy
+    quat = [pose.orientation.x, pose.orientation.y, pose.orientation.z, pose.orientation.w]
+
+    # Check for zero norm quaternion and use identity if invalid
+    quat_norm = np.linalg.norm(quat)
+    if quat_norm == 0.0:
+        # Use identity quaternion [0, 0, 0, 1] if zero norm detected
+        quat = [0.0, 0.0, 0.0, 1.0]
+
+    rotation = R.from_quat(quat)
+    Rot = rotation.as_matrix()
+
+    # Create 4x4 transform
+    T = np.eye(4)
+    T[:3, :3] = Rot
+    T[:3, 3] = [tx, ty, tz]
+
+    return T
+
+
+def matrix_to_pose(T: np.ndarray) -> Pose:
+    """
+    Convert 4x4 transformation matrix to Pose object.
+
+    Args:
+        T: 4x4 transformation matrix
+
+    Returns:
+        Pose object with position and orientation (quaternion)
+    """
+    # Extract position
+    pos = Point(T[0, 3], T[1, 3], T[2, 3])
+
+    # Extract rotation matrix and convert to quaternion
+    Rot = T[:3, :3]
+    rotation = R.from_matrix(Rot)
+    quat = rotation.as_quat()  # Returns [x, y, z, w]
+
+    orientation = Quaternion(quat[0], quat[1], quat[2], quat[3])
+
+    return Pose(pos, orientation)
+
+
+def apply_transform(pose: Pose, transform_matrix: np.ndarray) -> Pose:
+    """
+    Apply a transformation matrix to a pose.
+
+    Args:
+        pose: Input pose
+        transform_matrix: 4x4 transformation matrix to apply
+
+    Returns:
+        Transformed pose
+    """
+    # Convert pose to matrix
+    T_pose = pose_to_matrix(pose)
+
+    # Apply transform
+    T_result = transform_matrix @ T_pose
+
+    # Convert back to pose
+    return matrix_to_pose(T_result)
+
+
+def optical_to_robot_frame(pose: Pose) -> Pose:
+    """
+    Convert pose from optical camera frame to robot frame convention.
+
+    Optical Camera Frame (e.g., ZED):
+    - X: Right
+    - Y: Down
+    - Z: Forward (away from camera)
+
+    Robot Frame (ROS/REP-103):
+    - X: Forward
+    - Y: Left
+    - Z: Up
+
+    Args:
+        pose: Pose in optical camera frame
+
+    Returns:
+        Pose in robot frame
+    """
+    # Position transformation
+    robot_x = pose.position.z  # Forward = Camera Z
+    robot_y = -pose.position.x  # Left = -Camera X
+    robot_z = -pose.position.y  # Up = -Camera Y
+
+    # Rotation transformation using quaternions
+    # First convert quaternion to rotation matrix
+    quat_optical = [pose.orientation.x, pose.orientation.y, pose.orientation.z, pose.orientation.w]
+    R_optical = R.from_quat(quat_optical).as_matrix()
+
+    # Coordinate frame transformation matrix from optical to robot
+    # X_robot = Z_optical, Y_robot = -X_optical, Z_robot = -Y_optical
+    T_frame = np.array(
+        [
+            [0, 0, 1],  # X_robot = Z_optical
+            [-1, 0, 0],  # Y_robot = -X_optical
+            [0, -1, 0],  # Z_robot = -Y_optical
+        ]
+    )
+
+    # Transform the rotation matrix
+    R_robot = T_frame @ R_optical @ T_frame.T
+
+    # Convert back to quaternion
+    quat_robot = R.from_matrix(R_robot).as_quat()  # [x, y, z, w]
+
+    return Pose(
+        Point(robot_x, robot_y, robot_z),
+        Quaternion(quat_robot[0], quat_robot[1], quat_robot[2], quat_robot[3]),
+    )
+
+
+def robot_to_optical_frame(pose: Pose) -> Pose:
+    """
+    Convert pose from robot frame to optical camera frame convention.
+    This is the inverse of optical_to_robot_frame.
+
+    Args:
+        pose: Pose in robot frame
+
+    Returns:
+        Pose in optical camera frame
+    """
+    # Position transformation (inverse)
+    optical_x = -pose.position.y  # Right = -Left
+    optical_y = -pose.position.z  # Down = -Up
+    optical_z = pose.position.x  # Forward = Forward
+
+    # Rotation transformation using quaternions
+    quat_robot = [pose.orientation.x, pose.orientation.y, pose.orientation.z, pose.orientation.w]
+    R_robot = R.from_quat(quat_robot).as_matrix()
+
+    # Coordinate frame transformation matrix from Robot to optical (inverse of optical to Robot)
+    # This is the transpose of the forward transformation
+    T_frame_inv = np.array(
+        [
+            [0, -1, 0],  # X_optical = -Y_robot
+            [0, 0, -1],  # Y_optical = -Z_robot
+            [1, 0, 0],  # Z_optical = X_robot
+        ]
+    )
+
+    # Transform the rotation matrix
+    R_optical = T_frame_inv @ R_robot @ T_frame_inv.T
+
+    # Convert back to quaternion
+    quat_optical = R.from_matrix(R_optical).as_quat()  # [x, y, z, w]
+
+    return Pose(
+        Point(optical_x, optical_y, optical_z),
+        Quaternion(quat_optical[0], quat_optical[1], quat_optical[2], quat_optical[3]),
+    )
+
+
+def yaw_towards_point(position: Point, target_point: Point = None) -> float:
+    """
+    Calculate yaw angle from target point to position (away from target).
+    This is commonly used for object orientation in grasping applications.
+    Assumes robot frame where X is forward and Y is left.
+
+    Args:
+        position: Current position in robot frame
+        target_point: Reference point (default: origin)
+
+    Returns:
+        Yaw angle in radians pointing from target_point to position
+    """
+    if target_point is None:
+        target_point = Point(0.0, 0.0, 0.0)
+    direction_x = position.x - target_point.x
+    direction_y = position.y - target_point.y
+    return np.arctan2(direction_y, direction_x)
+
+
 def transform_robot_to_map(
-    robot_position: Vector, robot_rotation: Vector, position: Vector, rotation: Vector
-) -> Tuple[Vector, Vector]:
+    robot_position: Point, robot_rotation: Vector3, position: Point, rotation: Vector3
+) -> Tuple[Point, Vector3]:
     """Transform position and rotation from robot frame to map frame.
 
     Args:
         robot_position: Current robot position in map frame
         robot_rotation: Current robot rotation in map frame
-        position: Position in robot frame as Vector (x, y, z)
-        rotation: Rotation in robot frame as Vector (roll, pitch, yaw) in radians
+        position: Position in robot frame as Point (x, y, z)
+        rotation: Rotation in robot frame as Vector3 (roll, pitch, yaw) in radians
 
     Returns:
         Tuple of (transformed_position, transformed_rotation) where:
-            - transformed_position: Vector (x, y, z) in map frame
-            - transformed_rotation: Vector (roll, pitch, yaw) in map frame
+            - transformed_position: Point (x, y, z) in map frame
+            - transformed_rotation: Vector3 (roll, pitch, yaw) in map frame
 
     Example:
-        obj_pos_robot = Vector(1.0, 0.5, 0.0)  # 1m forward, 0.5m left of robot
-        obj_rot_robot = Vector(0.0, 0.0, 0.0)  # No rotation relative to robot
+        obj_pos_robot = Point(1.0, 0.5, 0.0)  # 1m forward, 0.5m left of robot
+        obj_rot_robot = Vector3(0.0, 0.0, 0.0)  # No rotation relative to robot
 
         map_pos, map_rot = transform_robot_to_map(robot_position, robot_rotation, obj_pos_robot, obj_rot_robot)
     """
@@ -79,7 +271,128 @@ def transform_robot_to_map(
     map_pitch = robot_rot.y + rot_pitch  # Add robot's pitch
     map_yaw_rot = normalize_angle(robot_yaw + rot_yaw)  # Add robot's yaw and normalize
 
-    transformed_position = Vector(map_x, map_y, map_z)
-    transformed_rotation = Vector(map_roll, map_pitch, map_yaw_rot)
+    transformed_position = Point(map_x, map_y, map_z)
+    transformed_rotation = Vector3(map_roll, map_pitch, map_yaw_rot)
 
     return transformed_position, transformed_rotation
+
+
+def create_transform_from_6dof(translation: Vector3, euler_angles: Vector3) -> np.ndarray:
+    """
+    Create a 4x4 transformation matrix from 6DOF parameters.
+
+    Args:
+        translation: Translation vector [x, y, z] in meters
+        euler_angles: Euler angles [rx, ry, rz] in radians (XYZ convention)
+
+    Returns:
+        4x4 transformation matrix
+    """
+    # Create transformation matrix
+    T = np.eye(4)
+
+    # Set translation
+    T[0:3, 3] = [translation.x, translation.y, translation.z]
+
+    # Set rotation using scipy
+    if np.linalg.norm([euler_angles.x, euler_angles.y, euler_angles.z]) > 1e-6:
+        rotation = R.from_euler("xyz", [euler_angles.x, euler_angles.y, euler_angles.z])
+        T[0:3, 0:3] = rotation.as_matrix()
+
+    return T
+
+
+def invert_transform(T: np.ndarray) -> np.ndarray:
+    """
+    Invert a 4x4 transformation matrix efficiently.
+
+    Args:
+        T: 4x4 transformation matrix
+
+    Returns:
+        Inverted 4x4 transformation matrix
+    """
+    # For homogeneous transform matrices, we can use the special structure:
+    # [R t]^-1 = [R^T -R^T*t]
+    # [0 1]      [0    1    ]
+
+    Rot = T[:3, :3]
+    t = T[:3, 3]
+
+    T_inv = np.eye(4)
+    T_inv[:3, :3] = Rot.T
+    T_inv[:3, 3] = -Rot.T @ t
+
+    return T_inv
+
+
+def compose_transforms(*transforms: np.ndarray) -> np.ndarray:
+    """
+    Compose multiple transformation matrices.
+
+    Args:
+        *transforms: Variable number of 4x4 transformation matrices
+
+    Returns:
+        Composed 4x4 transformation matrix (T1 @ T2 @ ... @ Tn)
+    """
+    result = np.eye(4)
+    for T in transforms:
+        result = result @ T
+    return result
+
+
+def euler_to_quaternion(euler_angles: Vector3, degrees: bool = False) -> Quaternion:
+    """
+    Convert euler angles to quaternion.
+
+    Args:
+        euler_angles: Euler angles as Vector3 [roll, pitch, yaw] in radians (XYZ convention)
+
+    Returns:
+        Quaternion object [x, y, z, w]
+    """
+    rotation = R.from_euler(
+        "xyz", [euler_angles.x, euler_angles.y, euler_angles.z], degrees=degrees
+    )
+    quat = rotation.as_quat()  # Returns [x, y, z, w]
+    return Quaternion(quat[0], quat[1], quat[2], quat[3])
+
+
+def quaternion_to_euler(quaternion: Quaternion, degrees: bool = False) -> Vector3:
+    """
+    Convert quaternion to euler angles.
+
+    Args:
+        quaternion: Quaternion object [x, y, z, w]
+
+    Returns:
+        Euler angles as Vector3 [roll, pitch, yaw] in radians (XYZ convention)
+    """
+    quat = [quaternion.x, quaternion.y, quaternion.z, quaternion.w]
+    rotation = R.from_quat(quat)
+    euler = rotation.as_euler("xyz", degrees=degrees)  # Returns [roll, pitch, yaw]
+    if not degrees:
+        return Vector3(
+            normalize_angle(euler[0]), normalize_angle(euler[1]), normalize_angle(euler[2])
+        )
+    else:
+        return Vector3(euler[0], euler[1], euler[2])
+
+
+def get_distance(pose1: Pose, pose2: Pose) -> float:
+    """
+    Calculate Euclidean distance between two poses.
+
+    Args:
+        pose1: First pose
+        pose2: Second pose
+
+    Returns:
+        Euclidean distance between the two poses in meters
+    """
+    dx = pose1.position.x - pose2.position.x
+    dy = pose1.position.y - pose2.position.y
+    dz = pose1.position.z - pose2.position.z
+
+    return np.linalg.norm(np.array([dx, dy, dz]))
diff --git a/docs/modules_CN.md b/docs/modules_CN.md
new file mode 100644
index 0000000000..d8f088ef59
--- /dev/null
+++ b/docs/modules_CN.md
@@ -0,0 +1,188 @@
+# Dimensional 模块系统
+
+DimOS 模块系统使用 Dask 进行计算分布和 LCM（轻量级通信和编组）进行高性能进程间通信，实现分布式、多进程的机器人应用。
+
+## 核心概念
+
+### 1. 模块定义
+模块是继承自 `dimos.core.Module` 的 Python 类，定义输入、输出和 RPC 方法：
+
+```python
+from dimos.core import Module, In, Out, rpc
+from dimos.msgs.geometry_msgs import Vector3
+
+class MyModule(Module): # ROS Node
+    # 将输入/输出声明为初始化为 None 的类属性
+    data_in: In[Vector3] = None # ROS Subscriber
+    data_out: Out[Vector3] = None # ROS Publisher
+
+    def __init__():
+        # 调用父类 Module 初始化
+        super().__init__()
+
+    @rpc
+    def remote_method(self, param):
+        """使用 @rpc 装饰的方法可以远程调用"""
+        return param * 2
+```
+
+### 2. 模块部署
+使用 `dimos.deploy()` 方法在 Dask 工作进程中部署模块：
+
+```python
+from dimos import core
+
+# 启动具有 N 个工作进程的 Dask 集群
+dimos = core.start(4)
+
+# 部署模块时可以传递初始化参数
+# 在这种情况下，param1 和 param2 被传递到模块初始化中
+module = dimos.deploy(Module, param1="value1", param2=123)
+```
+
+### 3. 流连接
+模块通过使用 LCM 传输的响应式流进行通信：
+
+```python
+# 为输出配置 LCM 传输
+module1.data_out.transport = core.LCMTransport("/topic_name", MessageType)
+
+# 将模块输入连接到输出
+module2.data_in.connect(module1.data_out)
+
+# 访问底层的 Observable 流
+stream = module1.data_out.observable()
+stream.subscribe(lambda msg: print(f"接收到: {msg}"))
+```
+
+### 4. 模块生命周期
+```python
+# 启动模块以开始处理
+module.start()  # 如果定义了 @rpc start() 方法，则调用它
+
+# 检查模块 I/O 配置  
+print(module.io().result())  # 显示输入、输出和 RPC 方法
+
+# 优雅关闭
+dimos.shutdown()
+```
+
+## 实际示例：机器人控制系统
+
+```python
+# 连接模块封装机器人硬件/仿真
+connection = dimos.deploy(ConnectionModule, ip=robot_ip)
+connection.lidar.transport = core.LCMTransport("/lidar", LidarMessage)
+connection.video.transport = core.LCMTransport("/video", Image)
+
+# 感知模块处理传感器数据
+perception = dimos.deploy(PersonTrackingStream, camera_intrinsics=[...])
+perception.video.connect(connection.video)
+perception.tracking_data.transport = core.pLCMTransport("/person_tracking")
+
+# 开始处理
+connection.start()
+perception.start()
+
+# 通过 RPC 启用跟踪
+perception.enable_tracking()
+
+# 获取最新的跟踪数据
+data = perception.get_tracking_data()
+```
+
+## LCM 传输配置
+
+```python
+# 用于简单类型（如激光雷达）的标准 LCM 传输
+connection.lidar.transport = core.LCMTransport("/lidar", LidarMessage)
+
+# 用于复杂 Python 对象/字典的基于 pickle 的传输
+connection.tracking_data.transport = core.pLCMTransport("/person_tracking")
+
+# 自动配置 LCM 系统缓冲区（在容器中必需）
+from dimos.protocol import pubsub
+pubsub.lcm.autoconf()
+```
+
+这种架构使得能够将复杂的机器人系统构建为可组合的分布式模块，这些模块通过流和 RPC 高效通信，从单机扩展到集群。
+
+# Dimensional 安装指南
+## Python 安装（Ubuntu 22.04）
+
+```bash
+sudo apt install python3-venv
+
+# 克隆仓库（dev 分支，无子模块）
+git clone -b dev https://github.com/dimensionalOS/dimos.git
+cd dimos
+
+# 创建并激活虚拟环境
+python3 -m venv venv
+source venv/bin/activate
+
+sudo apt install portaudio19-dev python3-pyaudio
+
+# 如果尚未安装，请安装 torch 和 torchvision
+# 示例 CUDA 11.7，Pytorch 2.0.1（如果需要不同的 pytorch 版本，请替换）
+pip install torch==2.0.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+```
+
+### 安装依赖
+```bash
+# 仅 CPU（建议首先尝试）
+pip install .[cpu,dev]
+
+# CUDA 安装
+pip install .[cuda,dev]
+
+# 复制并配置环境变量
+cp default.env .env
+```
+
+### 测试安装
+```bash 
+# 运行标准测试
+pytest -s dimos/
+
+# 测试模块功能
+pytest -s -m module dimos/
+
+# 测试 LCM 通信
+pytest -s -m lcm dimos/
+```
+
+# Unitree Go2 快速开始
+
+要快速测试模块系统，您可以直接运行 Unitree Go2 多进程示例：
+
+```bash
+# 确保设置了所需的环境变量
+export ROBOT_IP=<your_robot_ip>
+
+# 运行多进程 Unitree Go2 示例
+python dimos/robot/unitree_webrtc/multiprocess/unitree_go2.py
+```
+
+## 模块系统的高级特性
+
+### 分布式计算
+DimOS 模块系统建立在 Dask 之上，提供了强大的分布式计算能力：
+
+- **自动负载均衡**：模块自动分布在可用的工作进程中
+- **容错性**：如果工作进程失败，模块可以在其他工作进程上重新启动
+- **可扩展性**：从单机到集群的无缝扩展
+
+### 响应式编程模型
+使用 RxPY 实现的响应式流提供了：
+
+- **异步处理**：非阻塞的数据流处理
+- **背压处理**：自动管理快速生产者和慢速消费者
+- **操作符链**：使用 map、filter、merge 等操作符进行流转换
+
+### 性能优化
+LCM 传输针对机器人应用进行了优化：
+
+- **零拷贝**：大型消息的高效内存使用
+- **低延迟**：微秒级的消息传递
+- **多播支持**：一对多的高效通信 
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 7d51aa91d4..436b9a5750 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -115,6 +115,9 @@ manipulation = [
     "pyyaml>=6.0",
     "contact-graspnet-pytorch @ git+https://github.com/dimensionalOS/contact_graspnet_pytorch.git",
     
+    # piper arm
+    "piper-sdk",
+    
     # Visualization (Optional)
     "kaleido>=0.2.1",
     "plotly>=5.9.0",
diff --git a/tests/test_manipulation_perception_pipeline.py b/tests/test_manipulation_perception_pipeline.py
index 8b333ec310..227f991650 100644
--- a/tests/test_manipulation_perception_pipeline.py
+++ b/tests/test_manipulation_perception_pipeline.py
@@ -36,7 +36,7 @@
 from dimos.stream.stereo_camera_streams.zed import ZEDCameraStream
 from dimos.web.robot_web_interface import RobotWebInterface
 from dimos.utils.logging_config import logger
-from dimos.perception.manip_aio_pipeline import ManipulationPipeline
+from dimos.manipulation.manip_aio_pipeline import ManipulationPipeline
 
 
 def monitor_grasps(pipeline):
diff --git a/tests/test_manipulation_perception_pipeline.py.py b/tests/test_manipulation_perception_pipeline.py.py
new file mode 100644
index 0000000000..227f991650
--- /dev/null
+++ b/tests/test_manipulation_perception_pipeline.py.py
@@ -0,0 +1,167 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import time
+import threading
+from reactivex import operators as ops
+
+import tests.test_header
+
+from pyzed import sl
+from dimos.stream.stereo_camera_streams.zed import ZEDCameraStream
+from dimos.web.robot_web_interface import RobotWebInterface
+from dimos.utils.logging_config import logger
+from dimos.manipulation.manip_aio_pipeline import ManipulationPipeline
+
+
+def monitor_grasps(pipeline):
+    """Monitor and print grasp updates in a separate thread."""
+    print(" Grasp monitor started...")
+
+    last_grasp_count = 0
+    last_update_time = time.time()
+
+    while True:
+        try:
+            # Get latest grasps using the getter function
+            grasps = pipeline.get_latest_grasps(timeout=0.5)
+            current_time = time.time()
+
+            if grasps is not None:
+                current_count = len(grasps)
+                if current_count != last_grasp_count:
+                    print(f" Grasps received: {current_count} (at {time.strftime('%H:%M:%S')})")
+                    if current_count > 0:
+                        best_score = max(grasp.get("score", 0.0) for grasp in grasps)
+                        print(f"   Best grasp score: {best_score:.3f}")
+                    last_grasp_count = current_count
+                    last_update_time = current_time
+            else:
+                # Show periodic "still waiting" message
+                if current_time - last_update_time > 10.0:
+                    print(f" Still waiting for grasps... ({time.strftime('%H:%M:%S')})")
+                    last_update_time = current_time
+
+            time.sleep(1.0)  # Check every second
+
+        except Exception as e:
+            print(f" Error in grasp monitor: {e}")
+            time.sleep(2.0)
+
+
+def main():
+    """Test point cloud filtering with grasp generation using ManipulationPipeline."""
+    print(" Testing point cloud filtering + grasp generation with ManipulationPipeline...")
+
+    # Configuration
+    min_confidence = 0.6
+    web_port = 5555
+    grasp_server_url = "ws://18.224.39.74:8000/ws/grasp"
+
+    try:
+        # Initialize ZED camera stream
+        zed_stream = ZEDCameraStream(resolution=sl.RESOLUTION.HD1080, fps=10)
+
+        # Get camera intrinsics
+        camera_intrinsics_dict = zed_stream.get_camera_info()
+        camera_intrinsics = [
+            camera_intrinsics_dict["fx"],
+            camera_intrinsics_dict["fy"],
+            camera_intrinsics_dict["cx"],
+            camera_intrinsics_dict["cy"],
+        ]
+
+        # Create the concurrent manipulation pipeline WITH grasp generation
+        pipeline = ManipulationPipeline(
+            camera_intrinsics=camera_intrinsics,
+            min_confidence=min_confidence,
+            max_objects=10,
+            grasp_server_url=grasp_server_url,
+            enable_grasp_generation=True,  # Enable grasp generation
+        )
+
+        # Create ZED stream
+        zed_frame_stream = zed_stream.create_stream().pipe(ops.share())
+
+        # Create concurrent processing streams
+        streams = pipeline.create_streams(zed_frame_stream)
+        detection_viz_stream = streams["detection_viz"]
+        pointcloud_viz_stream = streams["pointcloud_viz"]
+        grasps_stream = streams.get("grasps")  # Get grasp stream if available
+        grasp_overlay_stream = streams.get("grasp_overlay")  # Get grasp overlay stream if available
+
+    except ImportError:
+        print("Error: ZED SDK not installed. Please install pyzed package.")
+        sys.exit(1)
+    except RuntimeError as e:
+        print(f"Error: Failed to open ZED camera: {e}")
+        sys.exit(1)
+
+    try:
+        # Set up web interface with concurrent visualization streams
+        print("Initializing web interface...")
+        web_interface = RobotWebInterface(
+            port=web_port,
+            object_detection=detection_viz_stream,
+            pointcloud_stream=pointcloud_viz_stream,
+            grasp_overlay_stream=grasp_overlay_stream,
+        )
+
+        # Start grasp monitoring in background thread
+        grasp_monitor_thread = threading.Thread(
+            target=monitor_grasps, args=(pipeline,), daemon=True
+        )
+        grasp_monitor_thread.start()
+
+        print(f"\n Point Cloud + Grasp Generation Test Running:")
+        print(f"   Web Interface: http://localhost:{web_port}")
+        print(f"   Object Detection View: RGB with bounding boxes")
+        print(f"   Point Cloud View: Depth with colored point clouds and 3D bounding boxes")
+        print(f"   Confidence threshold: {min_confidence}")
+        print(f"   Grasp server: {grasp_server_url}")
+        print(f"   Available streams: {list(streams.keys())}")
+        print("\nPress Ctrl+C to stop the test\n")
+
+        # Start web server (blocking call)
+        web_interface.run()
+
+    except KeyboardInterrupt:
+        print("\nTest interrupted by user")
+    except Exception as e:
+        print(f"Error during test: {e}")
+    finally:
+        print("Cleaning up resources...")
+        if "zed_stream" in locals():
+            zed_stream.cleanup()
+        if "pipeline" in locals():
+            pipeline.cleanup()
+        print("Test completed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_manipulation_pipeline_single_frame.py b/tests/test_manipulation_pipeline_single_frame.py
index 061eb9035e..629ba4dbee 100644
--- a/tests/test_manipulation_pipeline_single_frame.py
+++ b/tests/test_manipulation_pipeline_single_frame.py
@@ -34,7 +34,7 @@
 import open3d as o3d
 
 from dimos.perception.pointcloud.utils import visualize_clustered_point_clouds, visualize_voxel_grid
-from dimos.perception.manip_aio_processer import ManipulationProcessor
+from dimos.manipulation.manip_aio_processer import ManipulationProcessor
 from dimos.perception.pointcloud.utils import (
     load_camera_matrix_from_yaml,
     visualize_pcd,
diff --git a/tests/test_manipulation_pipeline_single_frame_lcm.py b/tests/test_manipulation_pipeline_single_frame_lcm.py
index 635f82c9c9..7b57887ddc 100644
--- a/tests/test_manipulation_pipeline_single_frame_lcm.py
+++ b/tests/test_manipulation_pipeline_single_frame_lcm.py
@@ -42,7 +42,7 @@
 from lcm_msgs.sensor_msgs import CameraInfo as LCMCameraInfo
 
 from dimos.perception.pointcloud.utils import visualize_clustered_point_clouds, visualize_voxel_grid
-from dimos.perception.manip_aio_processer import ManipulationProcessor
+from dimos.manipulation.manip_aio_processer import ManipulationProcessor
 from dimos.perception.grasp_generation.utils import visualize_grasps_3d
 from dimos.perception.pointcloud.utils import visualize_pcd
 from dimos.utils.logging_config import setup_logger
diff --git a/tests/test_pick_and_place_module.py b/tests/test_pick_and_place_module.py
new file mode 100644
index 0000000000..6a8470863e
--- /dev/null
+++ b/tests/test_pick_and_place_module.py
@@ -0,0 +1,355 @@
+#!/usr/bin/env python3
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Run script for Piper Arm robot with pick and place functionality.
+Subscribes to visualization images and handles mouse/keyboard input.
+"""
+
+import cv2
+import sys
+import asyncio
+import threading
+import time
+import numpy as np
+from typing import Optional
+
+try:
+    import pyzed.sl as sl
+except ImportError:
+    print("Error: ZED SDK not installed.")
+    sys.exit(1)
+
+from dimos.robot.agilex.piper_arm import PiperArmRobot
+from dimos.utils.logging_config import setup_logger
+
+# Import LCM message types
+from dimos_lcm.sensor_msgs import Image
+from dimos.protocol.pubsub.lcmpubsub import LCM, Topic
+
+logger = setup_logger("dimos.tests.test_pick_and_place_module")
+
+# Global for mouse events
+mouse_click = None
+camera_mouse_click = None
+current_window = None
+pick_location = None  # Store pick location
+place_location = None  # Store place location
+place_mode = False  # Track if we're in place selection mode
+
+
+def mouse_callback(event, x, y, _flags, param):
+    global mouse_click, camera_mouse_click
+    window_name = param
+    if event == cv2.EVENT_LBUTTONDOWN:
+        if window_name == "Camera Feed":
+            camera_mouse_click = (x, y)
+        else:
+            mouse_click = (x, y)
+
+
+class VisualizationNode:
+    """Node that subscribes to visualization images and handles user input."""
+
+    def __init__(self, robot: PiperArmRobot):
+        self.lcm = LCM()
+        self.latest_viz = None
+        self.latest_camera = None
+        self._running = False
+        self.robot = robot
+
+        # Subscribe to visualization topic
+        self.viz_topic = Topic("/manipulation/viz", Image)
+        self.camera_topic = Topic("/zed/color_image", Image)
+
+    def start(self):
+        """Start the visualization node."""
+        self._running = True
+        self.lcm.start()
+
+        # Subscribe to visualization topic
+        self.lcm.subscribe(self.viz_topic, self._on_viz_image)
+        # Subscribe to camera topic for point selection
+        self.lcm.subscribe(self.camera_topic, self._on_camera_image)
+
+        logger.info("Visualization node started")
+
+    def stop(self):
+        """Stop the visualization node."""
+        self._running = False
+        cv2.destroyAllWindows()
+
+    def _on_viz_image(self, msg: Image, topic: str):
+        """Handle visualization image messages."""
+        try:
+            # Convert LCM message to numpy array
+            data = np.frombuffer(msg.data, dtype=np.uint8)
+            if msg.encoding == "rgb8":
+                image = data.reshape((msg.height, msg.width, 3))
+                # Convert RGB to BGR for OpenCV
+                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+                self.latest_viz = image
+        except Exception as e:
+            logger.error(f"Error processing viz image: {e}")
+
+    def _on_camera_image(self, msg: Image, topic: str):
+        """Handle camera image messages."""
+        try:
+            # Convert LCM message to numpy array
+            data = np.frombuffer(msg.data, dtype=np.uint8)
+            if msg.encoding == "rgb8":
+                image = data.reshape((msg.height, msg.width, 3))
+                # Convert RGB to BGR for OpenCV
+                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+                self.latest_camera = image
+        except Exception as e:
+            logger.error(f"Error processing camera image: {e}")
+
+    def run_visualization(self):
+        """Run the visualization loop with user interaction."""
+        global mouse_click, camera_mouse_click, pick_location, place_location, place_mode
+
+        # Setup windows
+        cv2.namedWindow("Pick and Place")
+        cv2.setMouseCallback("Pick and Place", mouse_callback, "Pick and Place")
+
+        cv2.namedWindow("Camera Feed")
+        cv2.setMouseCallback("Camera Feed", mouse_callback, "Camera Feed")
+
+        print("=== Piper Arm Robot - Pick and Place ===")
+        print("Control mode: Module-based with LCM communication")
+        print("\nPICK AND PLACE WORKFLOW:")
+        print("1. Click on an object to select PICK location")
+        print("2. Click again to select PLACE location (auto pick & place)")
+        print("3. OR press 'p' after first click for pick-only task")
+        print("\nCONTROLS:")
+        print("  'p' - Execute pick-only task (after selecting pick location)")
+        print("  'r' - Reset everything")
+        print("  'q' - Quit")
+        print("  's' - SOFT STOP (emergency stop)")
+        print("  'g' - RELEASE GRIPPER (open gripper)")
+        print("  'SPACE' - EXECUTE target pose (manual override)")
+        print("\nNOTE: Click on objects in the Camera Feed window!")
+
+        while self._running:
+            # Show camera feed with status overlay
+            if self.latest_camera is not None:
+                display_image = self.latest_camera.copy()
+
+                # Add status text
+                status_text = ""
+                if pick_location is None:
+                    status_text = "Click to select PICK location"
+                    color = (0, 255, 0)
+                elif place_location is None:
+                    status_text = "Click to select PLACE location (or press 'p' for pick-only)"
+                    color = (0, 255, 255)
+                else:
+                    status_text = "Executing pick and place..."
+                    color = (255, 0, 255)
+
+                cv2.putText(
+                    display_image, status_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2
+                )
+
+                # Draw pick location marker if set
+                if pick_location is not None:
+                    # Simple circle marker
+                    cv2.circle(display_image, pick_location, 10, (0, 255, 0), 2)
+                    cv2.circle(display_image, pick_location, 2, (0, 255, 0), -1)
+
+                    # Simple label
+                    cv2.putText(
+                        display_image,
+                        "PICK",
+                        (pick_location[0] + 15, pick_location[1] + 5),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.6,
+                        (0, 255, 0),
+                        2,
+                    )
+
+                # Draw place location marker if set
+                if place_location is not None:
+                    # Simple circle marker
+                    cv2.circle(display_image, place_location, 10, (0, 255, 255), 2)
+                    cv2.circle(display_image, place_location, 2, (0, 255, 255), -1)
+
+                    # Simple label
+                    cv2.putText(
+                        display_image,
+                        "PLACE",
+                        (place_location[0] + 15, place_location[1] + 5),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.6,
+                        (0, 255, 255),
+                        2,
+                    )
+
+                    # Draw simple arrow between pick and place
+                    if pick_location is not None:
+                        cv2.arrowedLine(
+                            display_image,
+                            pick_location,
+                            place_location,
+                            (255, 255, 0),
+                            2,
+                            tipLength=0.05,
+                        )
+
+                cv2.imshow("Camera Feed", display_image)
+
+            # Show visualization if available
+            if self.latest_viz is not None:
+                cv2.imshow("Pick and Place", self.latest_viz)
+
+            # Handle keyboard input
+            key = cv2.waitKey(1) & 0xFF
+            if key != 255:  # Key was pressed
+                if key == ord("q"):
+                    logger.info("Quit requested")
+                    self._running = False
+                    break
+                elif key == ord("r"):
+                    # Reset everything
+                    pick_location = None
+                    place_location = None
+                    place_mode = False
+                    logger.info("Reset pick and place selections")
+                    # Also send reset to robot
+                    action = self.robot.handle_keyboard_command("r")
+                    if action:
+                        logger.info(f"Action: {action}")
+                elif key == ord("p"):
+                    # Execute pick-only task if pick location is set
+                    if pick_location is not None:
+                        logger.info(f"Executing pick-only task at {pick_location}")
+                        result = self.robot.pick_and_place(
+                            pick_location[0],
+                            pick_location[1],
+                            None,  # No place location
+                            None,
+                        )
+                        logger.info(f"Pick task started: {result}")
+                        # Clear selection after sending
+                        pick_location = None
+                        place_location = None
+                    else:
+                        logger.warning("Please select a pick location first!")
+                else:
+                    # Send keyboard command to robot
+                    if key in [82, 84]:  # Arrow keys
+                        action = self.robot.handle_keyboard_command(str(key))
+                    else:
+                        action = self.robot.handle_keyboard_command(chr(key))
+                    if action:
+                        logger.info(f"Action: {action}")
+
+            # Handle mouse clicks
+            if camera_mouse_click:
+                x, y = camera_mouse_click
+
+                if pick_location is None:
+                    # First click - set pick location
+                    pick_location = (x, y)
+                    logger.info(f"Pick location set at ({x}, {y})")
+                elif place_location is None:
+                    # Second click - set place location and execute
+                    place_location = (x, y)
+                    logger.info(f"Place location set at ({x}, {y})")
+                    logger.info(f"Executing pick at {pick_location} and place at ({x}, {y})")
+
+                    # Start pick and place task with both locations
+                    result = self.robot.pick_and_place(pick_location[0], pick_location[1], x, y)
+                    logger.info(f"Pick and place task started: {result}")
+
+                    # Clear all points after sending mission
+                    pick_location = None
+                    place_location = None
+
+                camera_mouse_click = None
+
+            # Handle mouse click from Pick and Place window (if viz is running)
+            elif mouse_click and self.latest_viz is not None:
+                # Similar logic for viz window clicks
+                x, y = mouse_click
+
+                if pick_location is None:
+                    # First click - set pick location
+                    pick_location = (x, y)
+                    logger.info(f"Pick location set at ({x}, {y}) from viz window")
+                elif place_location is None:
+                    # Second click - set place location and execute
+                    place_location = (x, y)
+                    logger.info(f"Place location set at ({x}, {y}) from viz window")
+                    logger.info(f"Executing pick at {pick_location} and place at ({x}, {y})")
+
+                    # Start pick and place task with both locations
+                    result = self.robot.pick_and_place(pick_location[0], pick_location[1], x, y)
+                    logger.info(f"Pick and place task started: {result}")
+
+                    # Clear all points after sending mission
+                    pick_location = None
+                    place_location = None
+
+                mouse_click = None
+
+            time.sleep(0.03)  # ~30 FPS
+
+
+async def run_piper_arm_with_viz():
+    """Run the Piper Arm robot with visualization."""
+    logger.info("Starting Piper Arm Robot")
+
+    # Create robot instance
+    robot = PiperArmRobot()
+
+    try:
+        # Start the robot
+        await robot.start()
+
+        # Give modules time to fully initialize
+        await asyncio.sleep(2)
+
+        # Create and start visualization node
+        viz_node = VisualizationNode(robot)
+        viz_node.start()
+
+        # Run visualization in separate thread
+        viz_thread = threading.Thread(target=viz_node.run_visualization, daemon=True)
+        viz_thread.start()
+
+        # Keep running until visualization stops
+        while viz_node._running:
+            await asyncio.sleep(0.1)
+
+        # Stop visualization
+        viz_node.stop()
+
+    except Exception as e:
+        logger.error(f"Error running robot: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+    finally:
+        # Clean up
+        robot.stop()
+        logger.info("Robot stopped")
+
+
+if __name__ == "__main__":
+    # Run the robot
+    asyncio.run(run_piper_arm_with_viz())
diff --git a/tests/test_pick_and_place_skill.py b/tests/test_pick_and_place_skill.py
new file mode 100644
index 0000000000..40cf2c23b0
--- /dev/null
+++ b/tests/test_pick_and_place_skill.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Run script for Piper Arm robot with pick and place functionality.
+Uses hardcoded points and the PickAndPlace skill.
+"""
+
+import sys
+import asyncio
+
+try:
+    import pyzed.sl as sl  # Required for ZED camera
+except ImportError:
+    print("Error: ZED SDK not installed.")
+    sys.exit(1)
+
+from dimos.robot.agilex.piper_arm import PiperArmRobot
+from dimos.skills.manipulation.pick_and_place import PickAndPlace
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger("dimos.robot.agilex.run_robot")
+
+
+async def run_piper_arm():
+    """Run the Piper Arm robot with pick and place skill."""
+    logger.info("Starting Piper Arm Robot")
+
+    # Create robot instance
+    robot = PiperArmRobot()
+
+    try:
+        # Start the robot
+        await robot.start()
+
+        # Give modules time to fully initialize
+        await asyncio.sleep(3)
+
+        # Add the PickAndPlace skill to the robot's skill library
+        robot.skill_library.add(PickAndPlace)
+
+        logger.info("Robot initialized successfully")
+        print("\n=== Piper Arm Robot - Pick and Place Demo ===")
+        print("This demo uses hardcoded pick and place points.")
+        print("\nCommands:")
+        print("  1. Run pick and place with hardcoded points")
+        print("  2. Run pick-only with hardcoded point")
+        print("  r. Reset robot to idle")
+        print("  q. Quit")
+        print("")
+
+        running = True
+        while running:
+            try:
+                # Get user input
+                command = input("\nEnter command: ").strip().lower()
+
+                if command == "q":
+                    logger.info("Quit requested")
+                    running = False
+                    break
+
+                elif command == "r" or command == "s":
+                    logger.info("Resetting robot")
+                    robot.handle_keyboard_command(command)
+
+                elif command == "1":
+                    # Hardcoded pick and place points
+                    # These should be adjusted based on your camera view
+                    print("\nExecuting pick and place with hardcoded points...")
+
+                    # Create and execute the skill
+                    skill = PickAndPlace(
+                        robot=robot,
+                        object_query="labubu doll",  # Will use visual detection
+                        target_query="on the keyboard",  # Will use visual detection
+                    )
+
+                    result = skill()
+
+                    if result["success"]:
+                        print(f"✓ {result['message']}")
+                    else:
+                        print(f"✗ Failed: {result.get('error', 'Unknown error')}")
+
+                elif command == "2":
+                    # Pick-only with hardcoded point
+                    print("\nExecuting pick-only with hardcoded point...")
+
+                    # Create and execute the skill for pick-only
+                    skill = PickAndPlace(
+                        robot=robot,
+                        object_query="labubu doll",  # Will use visual detection
+                        target_query=None,  # No place target - pick only
+                    )
+
+                    result = skill()
+
+                    if result["success"]:
+                        print(f"✓ {result['message']}")
+                    else:
+                        print(f"✗ Failed: {result.get('error', 'Unknown error')}")
+
+                else:
+                    print("Invalid command. Please try again.")
+
+                # Small delay to prevent CPU spinning
+                await asyncio.sleep(0.1)
+
+            except KeyboardInterrupt:
+                logger.info("Keyboard interrupt received")
+                running = False
+                break
+            except Exception as e:
+                logger.error(f"Error in command loop: {e}")
+                print(f"Error: {e}")
+
+    except Exception as e:
+        logger.error(f"Error running robot: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+    finally:
+        # Clean up
+        logger.info("Shutting down robot...")
+        await robot.stop()
+        logger.info("Robot stopped")
+
+
+def main():
+    """Main entry point."""
+    print("Starting Piper Arm Robot...")
+    print("Note: The robot will use Qwen VLM to identify objects and locations")
+    print("based on the queries specified in the code.")
+
+    # Run the robot
+    asyncio.run(run_piper_arm())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_pointcloud_filtering.py b/tests/test_pointcloud_filtering.py
index 308b4fc6ac..57a1cb5b00 100644
--- a/tests/test_pointcloud_filtering.py
+++ b/tests/test_pointcloud_filtering.py
@@ -23,7 +23,7 @@
 from dimos.stream.stereo_camera_streams.zed import ZEDCameraStream
 from dimos.web.robot_web_interface import RobotWebInterface
 from dimos.utils.logging_config import logger
-from dimos.perception.manip_aio_pipeline import ManipulationPipeline
+from dimos.manipulation.manip_aio_pipeline import ManipulationPipeline
 
 
 def main():
diff --git a/tests/test_zed_module.py b/tests/test_zed_module.py
new file mode 100644
index 0000000000..fbc99a54a4
--- /dev/null
+++ b/tests/test_zed_module.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Test script for ZED Module with LCM visualization."""
+
+import asyncio
+import threading
+import time
+from typing import Optional
+import numpy as np
+import cv2
+
+from dimos import core
+from dimos.hardware.zed_camera import ZEDModule
+from dimos.protocol import pubsub
+from dimos.utils.logging_config import setup_logger
+from dimos.perception.common.utils import colorize_depth
+
+# Import LCM message types
+from dimos_lcm.sensor_msgs import Image as LCMImage
+from dimos_lcm.sensor_msgs import CameraInfo
+from dimos_lcm.geometry_msgs import PoseStamped
+from dimos.protocol.pubsub.lcmpubsub import LCM, Topic
+
+logger = setup_logger("test_zed_module")
+
+
+class ZEDVisualizationNode:
+    """Node that subscribes to ZED topics and visualizes the data."""
+
+    def __init__(self):
+        self.lcm = LCM()
+        self.latest_color = None
+        self.latest_depth = None
+        self.latest_pose = None
+        self.camera_info = None
+        self._running = False
+
+        # Subscribe to topics
+        self.color_topic = Topic("/zed/color_image", LCMImage)
+        self.depth_topic = Topic("/zed/depth_image", LCMImage)
+        self.camera_info_topic = Topic("/zed/camera_info", CameraInfo)
+        self.pose_topic = Topic("/zed/pose", PoseStamped)
+
+    def start(self):
+        """Start the visualization node."""
+        self._running = True
+        self.lcm.start()
+
+        # Subscribe to topics
+        self.lcm.subscribe(self.color_topic, self._on_color_image)
+        self.lcm.subscribe(self.depth_topic, self._on_depth_image)
+        self.lcm.subscribe(self.camera_info_topic, self._on_camera_info)
+        self.lcm.subscribe(self.pose_topic, self._on_pose)
+
+        logger.info("Visualization node started, subscribed to ZED topics")
+
+    def stop(self):
+        """Stop the visualization node."""
+        self._running = False
+        cv2.destroyAllWindows()
+
+    def _on_color_image(self, msg: LCMImage, topic: str):
+        """Handle color image messages."""
+        try:
+            # Convert LCM message to numpy array
+            data = np.frombuffer(msg.data, dtype=np.uint8)
+
+            if msg.encoding == "rgb8":
+                image = data.reshape((msg.height, msg.width, 3))
+                # Convert RGB to BGR for OpenCV
+                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+            elif msg.encoding == "mono8":
+                image = data.reshape((msg.height, msg.width))
+            else:
+                logger.warning(f"Unsupported encoding: {msg.encoding}")
+                return
+
+            self.latest_color = image
+            logger.debug(f"Received color image: {msg.width}x{msg.height}")
+
+        except Exception as e:
+            logger.error(f"Error processing color image: {e}")
+
+    def _on_depth_image(self, msg: LCMImage, topic: str):
+        """Handle depth image messages."""
+        try:
+            # Convert LCM message to numpy array
+            if msg.encoding == "32FC1":
+                data = np.frombuffer(msg.data, dtype=np.float32)
+                depth = data.reshape((msg.height, msg.width))
+            else:
+                logger.warning(f"Unsupported depth encoding: {msg.encoding}")
+                return
+
+            self.latest_depth = depth
+            logger.debug(f"Received depth image: {msg.width}x{msg.height}")
+
+        except Exception as e:
+            logger.error(f"Error processing depth image: {e}")
+
+    def _on_camera_info(self, msg: CameraInfo, topic: str):
+        """Handle camera info messages."""
+        self.camera_info = msg
+        logger.info(
+            f"Received camera info: {msg.width}x{msg.height}, distortion model: {msg.distortion_model}"
+        )
+
+    def _on_pose(self, msg: PoseStamped, topic: str):
+        """Handle pose messages."""
+        self.latest_pose = msg
+        pos = msg.pose.position
+        ori = msg.pose.orientation
+        logger.debug(
+            f"Pose: pos=({pos.x:.2f}, {pos.y:.2f}, {pos.z:.2f}), "
+            + f"ori=({ori.x:.2f}, {ori.y:.2f}, {ori.z:.2f}, {ori.w:.2f})"
+        )
+
+    def visualize(self):
+        """Run visualization loop."""
+        while self._running:
+            # Create visualization
+            vis_images = []
+
+            # Color image
+            if self.latest_color is not None:
+                color_vis = self.latest_color.copy()
+
+                # Add pose text if available
+                if self.latest_pose is not None:
+                    pos = self.latest_pose.pose.position
+                    text = f"Pose: ({pos.x:.2f}, {pos.y:.2f}, {pos.z:.2f})"
+                    cv2.putText(
+                        color_vis, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2
+                    )
+
+                vis_images.append(("ZED Color", color_vis))
+
+            # Depth image
+            if self.latest_depth is not None:
+                depth_colorized = colorize_depth(self.latest_depth, max_depth=5.0)
+                if depth_colorized is not None:
+                    # Convert RGB to BGR for OpenCV
+                    depth_colorized = cv2.cvtColor(depth_colorized, cv2.COLOR_RGB2BGR)
+
+                    # Add depth stats
+                    valid_mask = np.isfinite(self.latest_depth) & (self.latest_depth > 0)
+                    if np.any(valid_mask):
+                        min_depth = np.min(self.latest_depth[valid_mask])
+                        max_depth = np.max(self.latest_depth[valid_mask])
+                        mean_depth = np.mean(self.latest_depth[valid_mask])
+
+                        text = f"Depth: min={min_depth:.2f}m, max={max_depth:.2f}m, mean={mean_depth:.2f}m"
+                        cv2.putText(
+                            depth_colorized,
+                            text,
+                            (10, 30),
+                            cv2.FONT_HERSHEY_SIMPLEX,
+                            0.5,
+                            (255, 255, 255),
+                            1,
+                        )
+
+                    vis_images.append(("ZED Depth", depth_colorized))
+
+            # Show windows
+            for name, image in vis_images:
+                cv2.imshow(name, image)
+
+            # Handle key press
+            key = cv2.waitKey(1) & 0xFF
+            if key == ord("q"):
+                logger.info("Quit requested")
+                self._running = False
+                break
+            elif key == ord("s"):
+                # Save images
+                if self.latest_color is not None:
+                    cv2.imwrite("zed_color.png", self.latest_color)
+                    logger.info("Saved color image to zed_color.png")
+                if self.latest_depth is not None:
+                    np.save("zed_depth.npy", self.latest_depth)
+                    logger.info("Saved depth data to zed_depth.npy")
+
+            time.sleep(0.03)  # ~30 FPS
+
+
+async def test_zed_module():
+    """Test the ZED Module with visualization."""
+    logger.info("Starting ZED Module test")
+
+    # Start Dask
+    dimos = core.start(1)
+
+    # Enable LCM auto-configuration
+    pubsub.lcm.autoconf()
+
+    try:
+        # Deploy ZED module
+        logger.info("Deploying ZED module...")
+        zed = dimos.deploy(
+            ZEDModule,
+            camera_id=0,
+            resolution="HD720",
+            depth_mode="NEURAL",
+            fps=30,
+            enable_tracking=True,
+            publish_rate=10.0,  # 10 Hz for testing
+            frame_id="zed_camera",
+        )
+
+        # Configure LCM transports
+        zed.color_image.transport = core.LCMTransport("/zed/color_image", LCMImage)
+        zed.depth_image.transport = core.LCMTransport("/zed/depth_image", LCMImage)
+        zed.camera_info.transport = core.LCMTransport("/zed/camera_info", CameraInfo)
+        zed.pose.transport = core.LCMTransport("/zed/pose", PoseStamped)
+
+        # Print module info
+        logger.info("ZED Module configured:")
+        print(zed.io().result())
+
+        # Start ZED module
+        logger.info("Starting ZED module...")
+        zed.start()
+
+        # Give module time to initialize
+        await asyncio.sleep(2)
+
+        # Create and start visualization node
+        viz_node = ZEDVisualizationNode()
+        viz_node.start()
+
+        # Run visualization in separate thread
+        viz_thread = threading.Thread(target=viz_node.visualize, daemon=True)
+        viz_thread.start()
+
+        logger.info("ZED Module running. Press 'q' in image window to quit, 's' to save images.")
+
+        # Keep running until visualization stops
+        while viz_node._running:
+            await asyncio.sleep(0.1)
+
+        # Stop ZED module
+        logger.info("Stopping ZED module...")
+        zed.stop()
+
+        # Stop visualization
+        viz_node.stop()
+
+    except Exception as e:
+        logger.error(f"Error in test: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+    finally:
+        # Clean up
+        dimos.close()
+        logger.info("Test completed")
+
+
+if __name__ == "__main__":
+    # Run the test
+    asyncio.run(test_zed_module())