From 37bc2e86237ef415f01fd8e1a328abfcc3d3cac7 Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 10:58:37 -0700 Subject: [PATCH 01/48] agents2 --- dimos/agents2/__init__.py | 8 + dimos/agents2/main.py | 78 +++++ dimos/agents2/test_main.py | 51 +++ dimos/core/module.py | 21 +- dimos/protocol/skill/__init__.py | 1 - dimos/protocol/skill/comms.py | 18 +- dimos/protocol/skill/coordinator.py | 395 ++++++++++++++++++++++ dimos/protocol/skill/schema.py | 103 ++++++ dimos/protocol/skill/skill.py | 29 +- dimos/protocol/skill/test_coordinator.py | 105 ++++++ dimos/protocol/skill/test_skill.py | 28 +- dimos/protocol/skill/testing_utils.py | 28 ++ dimos/protocol/skill/type.py | 146 ++++++++ dimos/utils/cli/agentspy/agentspy.py | 65 ++-- dimos/utils/cli/agentspy/demo_agentspy.py | 21 +- 15 files changed, 1022 insertions(+), 75 deletions(-) create mode 100644 dimos/agents2/__init__.py create mode 100644 dimos/agents2/main.py create mode 100644 dimos/agents2/test_main.py create mode 100644 dimos/protocol/skill/coordinator.py create mode 100644 dimos/protocol/skill/schema.py create mode 100644 dimos/protocol/skill/test_coordinator.py create mode 100644 dimos/protocol/skill/testing_utils.py create mode 100644 dimos/protocol/skill/type.py diff --git a/dimos/agents2/__init__.py b/dimos/agents2/__init__.py new file mode 100644 index 0000000000..6a756fbaab --- /dev/null +++ b/dimos/agents2/__init__.py @@ -0,0 +1,8 @@ +from langchain_core.messages import ( + AIMessage, + HumanMessage, + MessageLikeRepresentation, + SystemMessage, + ToolCall, + ToolMessage, +) diff --git a/dimos/agents2/main.py b/dimos/agents2/main.py new file mode 100644 index 0000000000..8e2da24903 --- /dev/null +++ b/dimos/agents2/main.py @@ -0,0 +1,78 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +from pprint import pprint + +from langchain.chat_models import init_chat_model +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import ( + AIMessage, + HumanMessage, + MessageLikeRepresentation, + SystemMessage, + ToolCall, + ToolMessage, +) + +from dimos.core import Module, rpc +from dimos.protocol.skill import skill +from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState +from dimos.utils.logging_config import setup_logger + +logger = setup_logger("dimos.protocol.agents2") + + +class Agent(SkillCoordinator): + def __init__(self, model: str = "gpt-4o", model_provider: str = "openai", *args, **kwargs): + super().__init__(*args, **kwargs) + + self.messages = [] + self._llm = init_chat_model( + model=model, + model_provider=model_provider, + ) + + async def agent_loop(self, seed_query: str = ""): + self.messages.append(HumanMessage(seed_query)) + try: + while True: + tools = self.get_tools() + self._llm = self._llm.bind_tools(tools) + + msg = self._llm.invoke(self.messages) + self.messages.append(msg) + + logger.info(f"Agent response: {msg.content}") + if msg.tool_calls: + self.execute_tool_calls(msg.tool_calls) + + if not self.has_active_skills(): + logger.info("No active tasks, exiting agent loop.") + return + + await self.wait_for_updates() + + for call_id, update in self.generate_snapshot(clear=True).items(): + self.messages.append(update.agent_encode()) + + except Exception as e: + logger.error(f"Error in agent loop: {e}") + import traceback + + traceback.print_exc() + + @rpc + def query(self, query: str): + asyncio.ensure_future(self.agent_loop(query), loop=self._loop) diff --git a/dimos/agents2/test_main.py b/dimos/agents2/test_main.py new file mode 100644 index 0000000000..755666b070 --- /dev/null +++ b/dimos/agents2/test_main.py @@ -0,0 +1,51 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import time + +import pytest + +from dimos.agents2.main import Agent +from dimos.core import start +from dimos.protocol.skill import SkillContainer, skill + + +class TestContainer(SkillContainer): + @skill() + def add(self, x: int, y: int) -> int: + """Adds two integers.""" + time.sleep(0.3) + return x + y + + @skill() + def sub(self, x: int, y: int) -> int: + """Subs two integers.""" + time.sleep(0.3) + return x - y + + +@pytest.mark.asyncio +async def test_agent_init(): + # dimos = start(2) + # agent = dimos.deploy(Agent) + agent = Agent() + agent.register_skills(TestContainer()) + agent.start() + + agent.query( + "hi there, use add tool to add 124181112 and 124124. don't sum yourself, use a tool I provided" + ) + + await asyncio.sleep(5) diff --git a/dimos/core/module.py b/dimos/core/module.py index e30df27a68..7cb2161fb8 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -11,13 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import inspect -from enum import Enum from typing import ( Any, Callable, Optional, - TypeVar, get_args, get_origin, get_type_hints, @@ -50,18 +49,32 @@ class ModuleBase: _rpc: Optional[RPCSpec] = None _agent: Optional[SkillCommsSpec] = None _tf: Optional[TFSpec] = None + _loop: asyncio.AbstractEventLoop = None def __init__(self, *args, **kwargs): # we can completely override comms protocols if we want if kwargs.get("comms", None) is not None: self.comms = kwargs["comms"] try: - get_worker() + # here we attempt to figure out if we are running on a dask worker + # if so we use the dask worker _loop as ours, + # and we register our RPC server + worker = get_worker() + self._loop = worker.loop if worker else None self.rpc = self.comms.rpc() self.rpc.serve_module_rpc(self) self.rpc.start() except ValueError: - return + ... + + # assuming we are not running on a dask worker, + # it's our job to determine or create the event loop + if not self._loop: + try: + self._loop = asyncio.get_running_loop() + except RuntimeError: + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) @property def tf(self): diff --git a/dimos/protocol/skill/__init__.py b/dimos/protocol/skill/__init__.py index 85b6146f56..15ebf0b59c 100644 --- a/dimos/protocol/skill/__init__.py +++ b/dimos/protocol/skill/__init__.py @@ -1,2 +1 @@ -from dimos.protocol.skill.agent_interface import AgentInterface, SkillState from dimos.protocol.skill.skill import SkillContainer, skill diff --git a/dimos/protocol/skill/comms.py b/dimos/protocol/skill/comms.py index d6e9e73bf0..7703eda3e1 100644 --- a/dimos/protocol/skill/comms.py +++ b/dimos/protocol/skill/comms.py @@ -11,27 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations -import time from abc import abstractmethod from dataclasses import dataclass -from enum import Enum from typing import Callable, Generic, Optional, TypeVar, Union -from dimos.protocol.pubsub.lcmpubsub import PickleLCM, Topic +from dimos.protocol.pubsub.lcmpubsub import PickleLCM from dimos.protocol.pubsub.spec import PubSub from dimos.protocol.service import Service -from dimos.protocol.skill.types import AgentMsg, Call, MsgType, Reducer, SkillConfig, Stream -from dimos.types.timestamped import Timestamped +from dimos.protocol.skill.type import SkillMsg # defines a protocol for communication between skills and agents class SkillCommsSpec: @abstractmethod - def publish(self, msg: AgentMsg) -> None: ... + def publish(self, msg: SkillMsg) -> None: ... @abstractmethod - def subscribe(self, cb: Callable[[AgentMsg], None]) -> None: ... + def subscribe(self, cb: Callable[[SkillMsg], None]) -> None: ... @abstractmethod def start(self) -> None: ... @@ -74,15 +72,15 @@ def start(self) -> None: def stop(self): self.pubsub.stop() - def publish(self, msg: AgentMsg) -> None: + def publish(self, msg: SkillMsg) -> None: self.pubsub.publish(self.config.topic, msg) - def subscribe(self, cb: Callable[[AgentMsg], None]) -> None: + def subscribe(self, cb: Callable[[SkillMsg], None]) -> None: self.pubsub.subscribe(self.config.topic, lambda msg, topic: cb(msg)) @dataclass -class LCMCommsConfig(PubSubCommsConfig[str, AgentMsg]): +class LCMCommsConfig(PubSubCommsConfig[str, SkillMsg]): topic: str = "/agent" pubsub: Union[type[PubSub], PubSub, None] = PickleLCM # lcm needs to be started only if receiving diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py new file mode 100644 index 0000000000..4b0f5d27f2 --- /dev/null +++ b/dimos/protocol/skill/coordinator.py @@ -0,0 +1,395 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +from copy import copy +from dataclasses import dataclass +from enum import Enum +from typing import Any, List, Optional + +from langchain_core.tools import tool as langchain_tool +from rich.console import Console +from rich.table import Table +from rich.text import Text + +from dimos.agents2 import ToolCall, ToolMessage +from dimos.core import Module, rpc +from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec +from dimos.protocol.skill.skill import SkillConfig, SkillContainer +from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream +from dimos.types.timestamped import TimestampedCollection +from dimos.utils.logging_config import setup_logger + +logger = setup_logger("dimos.protocol.skill.coordinator") + + +@dataclass +class AgentInputConfig: + agent_comms: type[SkillCommsSpec] = LCMSkillComms + + +class SkillStateEnum(Enum): + pending = 0 + running = 1 + completed = 2 + error = 3 + + def colored_name(self) -> Text: + """Return the state name as a rich Text object with color.""" + colors = { + SkillStateEnum.pending: "yellow", + SkillStateEnum.running: "blue", + SkillStateEnum.completed: "green", + SkillStateEnum.error: "red", + } + return Text(self.name, style=colors.get(self, "white")) + + +# TODO pending timeout, running timeout, etc. +# This object maintains the state of a skill run +# It is used to track the skill's progress, messages, and state +class SkillState(TimestampedCollection): + call_id: str + name: str + state: SkillStateEnum + skill_config: SkillConfig + + def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] = None) -> None: + super().__init__() + self.skill_config = skill_config or SkillConfig( + name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.none, schema={} + ) + + self.state = SkillStateEnum.pending + self.call_id = call_id + self.name = name + + def agent_encode(self) -> ToolMessage: + last_msg = self._items[-1] + return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) + + # returns True if the agent should be called for this message + def handle_msg(self, msg: SkillMsg) -> bool: + self.add(msg) + + if msg.type == MsgType.stream: + if ( + self.skill_config.stream == Stream.none + or self.skill_config.stream == Stream.passive + ): + return False + + if self.skill_config.stream == Stream.call_agent: + return True + + if msg.type == MsgType.ret: + self.state = SkillStateEnum.completed + if self.skill_config.ret == Return.call_agent: + return True + return False + + if msg.type == MsgType.error: + self.state = SkillStateEnum.error + return True + + if msg.type == MsgType.start: + self.state = SkillStateEnum.running + return False + + return False + + def __str__(self) -> str: + # For standard string representation, we'll use rich's Console to render the colored text + console = Console(force_terminal=True, legacy_windows=False) + colored_state = self.state.colored_name() + + # Build the parts of the string + parts = [Text(f"SkillState({self.name} "), colored_state, Text(f", call_id={self.call_id}")] + + if self.state == SkillStateEnum.completed or self.state == SkillStateEnum.error: + parts.append(Text(", ran for=")) + else: + parts.append(Text(", running for=")) + + parts.append(Text(f"{self.duration():.2f}s")) + + if len(self): + parts.append(Text(f", last_msg={self._items[-1]})")) + else: + parts.append(Text(", No Messages)")) + + # Combine all parts into a single Text object + combined = Text() + for part in parts: + combined.append(part) + + # Render to string with console + with console.capture() as capture: + console.print(combined, end="") + return capture.get() + + +class SkillStateDict(dict[str, SkillState]): + """Custom dict for skill states with better string representation.""" + + def __str__(self) -> str: + if not self: + return "SkillStates empty" + + lines = [] + + for call_id, skill_state in self.items(): + # Use the SkillState's own __str__ method for individual items + lines.append(f"{skill_state}") + + return "\n".join(lines) + + +class SkillCoordinator(SkillContainer, Module): + empty: bool = True + + _static_containers: list[SkillContainer] + _dynamic_containers: list[SkillContainer] + _skill_state: SkillStateDict # key is call_id, not skill_name + _skills: dict[str, SkillConfig] + _updates_available: asyncio.Event + _loop: Optional[asyncio.AbstractEventLoop] + + def __init__(self) -> None: + Module.__init__(self) + SkillContainer.__init__(self) + self._static_containers = [] + self._dynamic_containers = [] + self._skills = {} + self._skill_state = SkillStateDict() + self._updates_available = asyncio.Event() + + @rpc + def start(self) -> None: + self.agent_comms.start() + self.agent_comms.subscribe(self.handle_message) + + @rpc + def stop(self) -> None: + self.agent_comms.stop() + + def len(self) -> int: + return len(self._skills) + + def __len__(self) -> int: + return self.len() + + # this can be converted to non-langchain json schema output + # and langchain takes this output as well + # just faster for now + def get_tools(self) -> list[dict]: + # return [skill.schema for skill in self.skills().values()] + + ret = [] + for name, skill_config in self.skills().items(): + # print(f"Tool {name} config: {skill_config}, {skill_config.f}") + ret.append(langchain_tool(skill_config.f)) + + return ret + + # Used by agent to execute tool calls + def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: + """Execute a list of tool calls from the agent.""" + for tool_call in tool_calls: + logger.info(f"executing skill call {tool_call}") + self.call( + tool_call.get("id"), + tool_call.get("name"), + tool_call.get("args"), + ) + + # internal skill call + def call(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: + skill_config = self.get_skill_config(skill_name) + if not skill_config: + logger.error( + f"Skill {skill_name} not found in registered skills, but agent tried to call it (did a dynamic skill expire?)" + ) + return + + # This initializes the skill state if it doesn't exist + self._skill_state[call_id] = SkillState( + name=skill_name, skill_config=skill_config, call_id=call_id + ) + return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) + + # Receives a message from active skill + # Updates local skill state (appends to streamed data if needed etc) + # + # Checks if agent needs to be notified (if ToolConfig has Return=call_agent or Stream=call_agent) + def handle_message(self, msg: SkillMsg) -> None: + logger.info(f"{msg.skill_name}, {msg.call_id} - {msg}") + + if self._skill_state.get(msg.call_id) is None: + logger.warn( + f"Skill state for {msg.skill_name} (call_id={msg.call_id}) not found, (skill not called by our agent?) initializing. (message received: {msg})" + ) + self._skill_state[msg.call_id] = SkillState(call_id=msg.call_id, name=msg.skill_name) + + should_notify = self._skill_state[msg.call_id].handle_msg(msg) + + if should_notify: + self._loop.call_soon_threadsafe(self._updates_available.set) + + def has_active_skills(self) -> bool: + # check if dict is empty + if self._skill_state == {}: + return False + return True + + async def wait_for_updates(self, timeout: Optional[float] = None) -> True: + """Wait for skill updates to become available. + + This method should be called by the agent when it's ready to receive updates. + It will block until updates are available or timeout is reached. + + Args: + timeout: Optional timeout in seconds + + Returns: + True if updates are available, False on timeout + """ + try: + if timeout: + await asyncio.wait_for(self._updates_available.wait(), timeout=timeout) + else: + await self._updates_available.wait() + return True + except asyncio.TimeoutError: + return False + + def generate_snapshot(self, clear: bool = True) -> SkillStateDict: + """Generate a fresh snapshot of completed skills and optionally clear them.""" + ret = copy(self._skill_state) + + if clear: + self._updates_available.clear() + to_delete = [] + # Since snapshot is being sent to agent, we can clear the finished skill runs + for call_id, skill_run in self._skill_state.items(): + if skill_run.state == SkillStateEnum.completed: + logger.info(f"Skill {skill_run.name} (call_id={call_id}) finished") + to_delete.append(call_id) + if skill_run.state == SkillStateEnum.error: + logger.error(f"Skill run error for {skill_run.name} (call_id={call_id})") + to_delete.append(call_id) + + for call_id in to_delete: + logger.debug(f"Call {call_id} finished, removing from state") + del self._skill_state[call_id] + + return ret + + def __str__(self): + console = Console(force_terminal=True, legacy_windows=False) + + # Create main table without any header + table = Table(show_header=False) + + # Add containers section + containers_table = Table(show_header=True, show_edge=False, box=None) + containers_table.add_column("Type", style="cyan") + containers_table.add_column("Container", style="white") + + # Add static containers + for container in self._static_containers: + containers_table.add_row("Static", str(container)) + + # Add dynamic containers + for container in self._dynamic_containers: + containers_table.add_row("Dynamic", str(container)) + + if not self._static_containers and not self._dynamic_containers: + containers_table.add_row("", "[dim]No containers registered[/dim]") + + # Add skill states section + states_table = Table(show_header=True, show_edge=False, box=None) + states_table.add_column("Call ID", style="dim", width=12) + states_table.add_column("Skill", style="white") + states_table.add_column("State", style="white") + states_table.add_column("Duration", style="yellow") + states_table.add_column("Messages", style="dim") + + for call_id, skill_state in self._skill_state.items(): + # Get colored state name + state_text = skill_state.state.colored_name() + + # Duration formatting + if ( + skill_state.state == SkillStateEnum.completed + or skill_state.state == SkillStateEnum.error + ): + duration = f"{skill_state.duration():.2f}s" + else: + duration = f"{skill_state.duration():.2f}s..." + + # Messages info + msg_count = str(len(skill_state)) + + states_table.add_row( + call_id[:8] + "...", skill_state.name, state_text, duration, msg_count + ) + + if not self._skill_state: + states_table.add_row("", "[dim]No active skills[/dim]", "", "", "") + + # Combine into main table + table.add_column("Section", style="bold") + table.add_column("Details", style="none") + table.add_row("Containers", containers_table) + table.add_row("Skills", states_table) + + # Render to string with title above + with console.capture() as capture: + console.print(Text(" SkillCoordinator", style="bold blue")) + console.print(table) + return capture.get().strip() + + # Given skillcontainers can run remotely, we are + # Caching available skills from static containers + # + # Dynamic containers will be queried at runtime via + # .skills() method + def register_skills(self, container: SkillContainer): + self.empty = False + if not container.dynamic_skills: + logger.info(f"Registering static skill container, {container}") + self._static_containers.append(container) + for name, skill_config in container.skills().items(): + self._skills[name] = skill_config.bind(getattr(container, name)) + else: + logger.info(f"Registering dynamic skill container, {container}") + self._dynamic_containers.append(container) + + def get_skill_config(self, skill_name: str) -> Optional[SkillConfig]: + skill_config = self._skills.get(skill_name) + if not skill_config: + skill_config = self.skills().get(skill_name) + return skill_config + + def skills(self) -> dict[str, SkillConfig]: + # Static container skilling is already cached + all_skills: dict[str, SkillConfig] = {**self._skills} + + # Then aggregate skills from dynamic containers + for container in self._dynamic_containers: + for skill_name, skill_config in container.skills().items(): + all_skills[skill_name] = skill_config.bind(getattr(container, skill_name)) + + return all_skills diff --git a/dimos/protocol/skill/schema.py b/dimos/protocol/skill/schema.py new file mode 100644 index 0000000000..37a6e6fac1 --- /dev/null +++ b/dimos/protocol/skill/schema.py @@ -0,0 +1,103 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +from typing import Dict, List, Union, get_args, get_origin + + +def python_type_to_json_schema(python_type) -> dict: + """Convert Python type annotations to JSON Schema format.""" + # Handle None/NoneType + if python_type is type(None) or python_type is None: + return {"type": "null"} + + # Handle Union types (including Optional) + origin = get_origin(python_type) + if origin is Union: + args = get_args(python_type) + # Handle Optional[T] which is Union[T, None] + if len(args) == 2 and type(None) in args: + non_none_type = args[0] if args[1] is type(None) else args[1] + schema = python_type_to_json_schema(non_none_type) + # For OpenAI function calling, we don't use anyOf for optional params + return schema + else: + # For other Union types, use anyOf + return {"anyOf": [python_type_to_json_schema(arg) for arg in args]} + + # Handle List/list types + if origin in (list, List): + args = get_args(python_type) + if args: + return {"type": "array", "items": python_type_to_json_schema(args[0])} + return {"type": "array"} + + # Handle Dict/dict types + if origin in (dict, Dict): + return {"type": "object"} + + # Handle basic types + type_map = { + str: {"type": "string"}, + int: {"type": "integer"}, + float: {"type": "number"}, + bool: {"type": "boolean"}, + list: {"type": "array"}, + dict: {"type": "object"}, + } + + return type_map.get(python_type, {"type": "string"}) + + +def function_to_schema(func) -> dict: + """Convert a function to OpenAI function schema format.""" + try: + signature = inspect.signature(func) + except ValueError as e: + raise ValueError(f"Failed to get signature for function {func.__name__}: {str(e)}") + + properties = {} + required = [] + + for param_name, param in signature.parameters.items(): + # Skip 'self' parameter for methods + if param_name == "self": + continue + + # Get the type annotation + if param.annotation != inspect.Parameter.empty: + param_schema = python_type_to_json_schema(param.annotation) + else: + # Default to string if no type annotation + param_schema = {"type": "string"} + + # Add description from docstring if available (would need more sophisticated parsing) + properties[param_name] = param_schema + + # Add to required list if no default value + if param.default == inspect.Parameter.empty: + required.append(param_name) + + return { + "type": "function", + "function": { + "name": func.__name__, + "description": (func.__doc__ or "").strip(), + "parameters": { + "type": "object", + "properties": properties, + "required": required, + }, + }, + } diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index e0f868b5f9..f612ec7c83 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -17,12 +17,13 @@ from dimos.core import rpc from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec -from dimos.protocol.skill.types import ( - AgentMsg, +from dimos.protocol.skill.schema import function_to_schema +from dimos.protocol.skill.type import ( MsgType, Reducer, Return, SkillConfig, + SkillMsg, Stream, ) @@ -32,16 +33,19 @@ def decorator(f: Callable[..., Any]) -> Any: def wrapper(self, *args, **kwargs): skill = f"{f.__name__}" - if kwargs.get("skillcall"): - del kwargs["skillcall"] + call_id = kwargs.get("call_id", None) + if call_id: + del kwargs["call_id"] def run_function(): - self.agent_comms.publish(AgentMsg(skill, None, type=MsgType.start)) + self.agent_comms.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) try: val = f(self, *args, **kwargs) - self.agent_comms.publish(AgentMsg(skill, val, type=MsgType.ret)) + self.agent_comms.publish(SkillMsg(call_id, skill, val, type=MsgType.ret)) except Exception as e: - self.agent_comms.publish(AgentMsg(skill, str(e), type=MsgType.error)) + self.agent_comms.publish( + SkillMsg(call_id, skill, str(e), type=MsgType.error) + ) thread = threading.Thread(target=run_function) thread.start() @@ -49,7 +53,16 @@ def run_function(): return f(self, *args, **kwargs) - skill_config = SkillConfig(name=f.__name__, reducer=reducer, stream=stream, ret=ret) + # sig = inspect.signature(f) + # params = list(sig.parameters.values()) + # if params and params[0].name == "self": + # params = params[1:] # Remove first parameter 'self' + + # wrapper.__signature__ = sig.replace(parameters=params) + + skill_config = SkillConfig( + name=f.__name__, reducer=reducer, stream=stream, ret=ret, schema=function_to_schema(f) + ) # implicit RPC call as well wrapper.__rpc__ = True # type: ignore[attr-defined] diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py new file mode 100644 index 0000000000..0b6d4d54a5 --- /dev/null +++ b/dimos/protocol/skill/test_coordinator.py @@ -0,0 +1,105 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import asyncio +import time +from pprint import pprint + +import pytest + +from dimos.protocol.skill.coordinator import SkillCoordinator +from dimos.protocol.skill.skill import SkillContainer, skill +from dimos.protocol.skill.testing_utils import TestContainer + +# def test_coordinator_skill_export(): +# skillCoordinator = SkillCoordinator() +# skillCoordinator.register_skills(TestContainer()) + +# assert skillCoordinator.get_tools() == [ +# { +# "function": { +# "description": "", +# "name": "add", +# "parameters": { +# "properties": {"x": {"type": "integer"}, "y": {"type": "integer"}}, +# "required": ["x", "y"], +# "type": "object", +# }, +# }, +# "type": "function", +# }, +# { +# "function": { +# "description": "", +# "name": "delayadd", +# "parameters": { +# "properties": {"x": {"type": "integer"}, "y": {"type": "integer"}}, +# "required": ["x", "y"], +# "type": "object", +# }, +# }, +# "type": "function", +# }, +# ] + +# print(pprint(skillCoordinator.get_tools())) + + +class TestContainer2(SkillContainer): + @skill() + def add(self, x: int, y: int) -> int: + # time.sleep(0.25) + return x + y + + @skill() + def delayadd(self, x: int, y: int) -> int: + time.sleep(0.5) + return x + y + + +@pytest.mark.asyncio +async def test_coordinator_generator(): + skillCoordinator = SkillCoordinator() + skillCoordinator.register_skills(TestContainer()) + + skillCoordinator.start() + + skillCoordinator.call("test-call-0", "delayadd", {"args": [1, 2]}) + + time.sleep(0.1) + + cnt = 0 + while await skillCoordinator.wait_for_updates(1): + print(skillCoordinator) + + skillstates = skillCoordinator.generate_snapshot() + + tool_msg = skillstates[f"test-call-{cnt}"].agent_encode() + tool_msg.content == cnt + 1 + + cnt += 1 + if cnt < 5: + skillCoordinator.call( + f"test-call-{cnt}-delay", + "delayadd", + {"args": [cnt, 2]}, + ) + skillCoordinator.call( + f"test-call-{cnt}", + "add", + {"args": [cnt, 2]}, + ) + + time.sleep(0.1 * cnt) + + print("All updates processed successfully.") diff --git a/dimos/protocol/skill/test_skill.py b/dimos/protocol/skill/test_skill.py index 9bf7e85a35..836f316ca3 100644 --- a/dimos/protocol/skill/test_skill.py +++ b/dimos/protocol/skill/test_skill.py @@ -14,19 +14,9 @@ import time -from dimos.protocol.skill.agent_interface import AgentInterface +from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill - - -class TestContainer(SkillContainer): - @skill() - def add(self, x: int, y: int) -> int: - return x + y - - @skill() - def delayadd(self, x: int, y: int) -> int: - time.sleep(0.5) - return x + y +from dimos.protocol.skill.testing_utils import TestContainer def test_introspect_skill(): @@ -35,7 +25,7 @@ def test_introspect_skill(): def test_internals(): - agentInterface = AgentInterface() + agentInterface = SkillCoordinator() agentInterface.start() testContainer = TestContainer() @@ -45,7 +35,7 @@ def test_internals(): # skillcall=True makes the skill function exit early, # it doesn't behave like a blocking function, # - # return is passed as AgentMsg to the agent topic + # return is passed as SkillMsg to the agent topic testContainer.delayadd(2, 4, skillcall=True) testContainer.add(1, 2, skillcall=True) @@ -61,7 +51,7 @@ def test_internals(): print(agentInterface) - agentInterface.execute_skill("delayadd", 1, 2) + agentInterface.call("test-call-1", "delayadd", 1, 2) time.sleep(0.25) print(agentInterface) @@ -71,7 +61,7 @@ def test_internals(): def test_standard_usage(): - agentInterface = AgentInterface(agent_callback=print) + agentInterface = SkillCoordinator() agentInterface.start() testContainer = TestContainer() @@ -82,7 +72,7 @@ def test_standard_usage(): print(agentInterface.skills()) # we can execute a skill - agentInterface.execute_skill("delayadd", 1, 2) + agentInterface.call("test-call-2", "delayadd", 1, 2) # while skill is executing, we can introspect the state # (we see that the skill is running) @@ -108,7 +98,7 @@ def add(self, x: int, y: int) -> int: time.sleep(0.5) return x * y - agentInterface = AgentInterface(agent_callback=print) + agentInterface = SkillCoordinator() agentInterface.start() dimos = start(1) @@ -117,7 +107,7 @@ def add(self, x: int, y: int) -> int: agentInterface.register_skills(mock_module) # we can execute a skill - agentInterface.execute_skill("add", 1, 2) + agentInterface.call("test-call-3", "add", 1, 2) # while skill is executing, we can introspect the state # (we see that the skill is running) diff --git a/dimos/protocol/skill/testing_utils.py b/dimos/protocol/skill/testing_utils.py new file mode 100644 index 0000000000..fda4c27591 --- /dev/null +++ b/dimos/protocol/skill/testing_utils.py @@ -0,0 +1,28 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + +from dimos.protocol.skill.skill import SkillContainer, skill + + +class TestContainer(SkillContainer): + @skill() + def add(self, x: int, y: int) -> int: + return x + y + + @skill() + def delayadd(self, x: int, y: int) -> int: + time.sleep(0.3) + return x + y diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py new file mode 100644 index 0000000000..0ed1c91ad3 --- /dev/null +++ b/dimos/protocol/skill/type.py @@ -0,0 +1,146 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +from dataclasses import dataclass +from enum import Enum +from typing import Any, Callable + +from dimos.types.timestamped import Timestamped + + +class Call(Enum): + Implicit = 0 + Explicit = 1 + + +class Reducer(Enum): + none = 0 + all = 1 + latest = 2 + average = 3 + + +class Stream(Enum): + # no streaming + none = 0 + # passive stream, doesn't schedule an agent call, but returns the value to the agent + passive = 1 + # calls the agent with every value emitted, schedules an agent call + call_agent = 2 + + +class Return(Enum): + # doesn't return anything to an agent + none = 0 + # returns the value to the agent, but doesn't schedule an agent call + passive = 1 + # calls the agent with the value, scheduling an agent call + call_agent = 2 + + +@dataclass +class SkillConfig: + name: str + reducer: Reducer + stream: Stream + ret: Return + schema: dict[str, Any] + f: Callable | None = None + autostart: bool = False + + def bind(self, f: Callable) -> "SkillConfig": + self.f = f + return self + + def call(self, call_id, *args, **kwargs) -> Any: + if self.f is None: + raise ValueError( + "Function is not bound to the SkillConfig. This should be called only within AgentListener." + ) + + return self.f(*args, **kwargs, call_id=call_id) + + def __str__(self): + parts = [f"name={self.name}"] + + # Only show reducer if stream is not none (streaming is happening) + if self.stream != Stream.none: + reducer_name = "unknown" + if self.reducer == Reducer.latest: + reducer_name = "latest" + elif self.reducer == Reducer.all: + reducer_name = "all" + elif self.reducer == Reducer.average: + reducer_name = "average" + parts.append(f"reducer={reducer_name}") + parts.append(f"stream={self.stream.name}") + + # Always show return mode + parts.append(f"ret={self.ret.name}") + return f"Skill({', '.join(parts)})" + + +class MsgType(Enum): + pending = 0 + start = 1 + stream = 2 + ret = 3 + error = 4 + + +class SkillMsg(Timestamped): + ts: float + type: MsgType + call_id: str + skill_name: str + content: str | int | float | dict | list + + def __init__( + self, + call_id: str, + skill_name: str, + content: str | int | float | dict | list, + type: MsgType = MsgType.ret, + ) -> None: + self.ts = time.time() + self.call_id = call_id + self.skill_name = skill_name + self.content = content + self.type = type + + def __repr__(self): + return self.__str__() + + @property + def end(self) -> bool: + return self.type == MsgType.ret or self.type == MsgType.error + + @property + def start(self) -> bool: + return self.type == MsgType.start + + def __str__(self): + time_ago = time.time() - self.ts + + if self.type == MsgType.start: + return f"Start({time_ago:.1f}s ago)" + if self.type == MsgType.ret: + return f"Ret({time_ago:.1f}s ago, val={self.content})" + if self.type == MsgType.error: + return f"Error({time_ago:.1f}s ago, val={self.content})" + if self.type == MsgType.pending: + return f"Pending({time_ago:.1f}s ago)" + if self.type == MsgType.stream: + return f"Stream({time_ago:.1f}s ago, val={self.content})" diff --git a/dimos/utils/cli/agentspy/agentspy.py b/dimos/utils/cli/agentspy/agentspy.py index 0c25a89612..2c58ab4cf3 100644 --- a/dimos/utils/cli/agentspy/agentspy.py +++ b/dimos/utils/cli/agentspy/agentspy.py @@ -14,7 +14,6 @@ from __future__ import annotations -import asyncio import logging import threading import time @@ -23,20 +22,20 @@ from rich.text import Text from textual.app import App, ComposeResult from textual.binding import Binding -from textual.containers import Container, Horizontal, Vertical +from textual.containers import Vertical from textual.reactive import reactive -from textual.widgets import DataTable, Footer, Header, RichLog +from textual.widgets import DataTable, Footer, RichLog -from dimos.protocol.skill.agent_interface import AgentInterface, SkillState, SkillStateEnum -from dimos.protocol.skill.comms import AgentMsg, LCMSkillComms -from dimos.protocol.skill.types import MsgType +from dimos.protocol.skill.comms import SkillMsg +from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState, SkillStateEnum +from dimos.protocol.skill.type import MsgType class AgentSpy: """Spy on agent skill executions via LCM messages.""" def __init__(self): - self.agent_interface = AgentInterface() + self.agent_interface = SkillCoordinator() self.message_callbacks: list[Callable[[Dict[str, SkillState]], None]] = [] self._lock = threading.Lock() self._latest_state: Dict[str, SkillState] = {} @@ -53,14 +52,14 @@ def stop(self): """Stop spying.""" self.agent_interface.stop() - def _handle_message(self, msg: AgentMsg): - """Handle incoming agent messages.""" + def _handle_message(self, msg: SkillMsg): + """Handle incoming skill messages.""" # Small delay to ensure agent_interface has processed the message def delayed_update(): time.sleep(0.1) with self._lock: - self._latest_state = self.agent_interface.state_snapshot(clear=False) + self._latest_state = self.agent_interface.generate_snapshot(clear=False) for callback in self.message_callbacks: callback(self._latest_state) @@ -83,7 +82,7 @@ def state_color(state: SkillStateEnum) -> str: return "yellow" elif state == SkillStateEnum.running: return "green" - elif state == SkillStateEnum.returned: + elif state == SkillStateEnum.completed: return "cyan" elif state == SkillStateEnum.error: return "red" @@ -181,11 +180,12 @@ def __init__(self, *args, **kwargs): self.spy = AgentSpy() self.table: Optional[DataTable] = None self.log_view: Optional[RichLog] = None - self.skill_history: list[tuple[str, SkillState, float]] = [] # (name, state, start_time) + self.skill_history: list[tuple[str, SkillState, float]] = [] # (call_id, state, start_time) self.log_handler: Optional[TextualLogHandler] = None def compose(self) -> ComposeResult: self.table = DataTable(zebra_stripes=False, cursor_type=None) + self.table.add_column("Call ID") self.table.add_column("Skill Name") self.table.add_column("State") self.table.add_column("Duration") @@ -219,12 +219,23 @@ def on_mount(self): if self.log_view: self.log_handler = TextualLogHandler(self.log_view) - # Custom formatter that shortens the logger name + # Custom formatter that shortens the logger name and highlights call_ids class ShortNameFormatter(logging.Formatter): def format(self, record): # Remove the common prefix from logger names if record.name.startswith("dimos.protocol.skill."): record.name = record.name.replace("dimos.protocol.skill.", "") + + # Highlight call_ids in the message + msg = record.getMessage() + if "call_id=" in msg: + # Extract and colorize call_id + import re + + msg = re.sub(r"call_id=([^\s\)]+)", r"call_id=\033[94m\1\033[0m", msg) + record.msg = msg + record.args = () + return super().format(record) self.log_handler.setFormatter( @@ -246,7 +257,7 @@ def format(self, record): self.spy.start() # Also set up periodic refresh to update durations - self.set_interval(0.5, self.refresh_table) + self.set_interval(1.0, self.refresh_table) def on_unmount(self): """Stop the spy when app unmounts.""" @@ -257,18 +268,18 @@ def on_unmount(self): root_logger.removeHandler(self.log_handler) def update_state(self, state: Dict[str, SkillState]): - """Update state from spy callback.""" + """Update state from spy callback. State dict is keyed by call_id.""" # Update history with current state current_time = time.time() # Add new skills or update existing ones - for skill_name, skill_state in state.items(): - # Find if skill already in history + for call_id, skill_state in state.items(): + # Find if this call_id already in history found = False - for i, (name, old_state, start_time) in enumerate(self.skill_history): - if name == skill_name: + for i, (existing_call_id, old_state, start_time) in enumerate(self.skill_history): + if existing_call_id == call_id: # Update existing entry - self.skill_history[i] = (skill_name, skill_state, start_time) + self.skill_history[i] = (call_id, skill_state, start_time) found = True break @@ -278,7 +289,7 @@ def update_state(self, state: Dict[str, SkillState]): if len(skill_state) > 0: # Use first message timestamp if available start_time = skill_state._items[0].ts - self.skill_history.append((skill_name, skill_state, start_time)) + self.skill_history.append((call_id, skill_state, start_time)) # Schedule UI update self.call_from_thread(self.refresh_table) @@ -299,7 +310,7 @@ def refresh_table(self): max_rows = max(1, height) # Show only top N entries - for skill_name, skill_state, start_time in sorted_history[:max_rows]: + for call_id, skill_state, start_time in sorted_history[:max_rows]: # Calculate how long ago it started time_ago = time.time() - start_time start_str = format_duration(time_ago) + " ago" @@ -317,7 +328,7 @@ def refresh_table(self): last_msg = skill_state._items[-1] if last_msg.type == MsgType.error: details = str(last_msg.content)[:40] - elif skill_state.state == SkillStateEnum.returned and msg_count > 0: + elif skill_state.state == SkillStateEnum.completed and msg_count > 0: # Show return value last_msg = skill_state._items[-1] if last_msg.type == MsgType.ret: @@ -326,9 +337,15 @@ def refresh_table(self): # Show progress indicator details = "⋯ " + "▸" * min(int(time_ago), 20) + # Format call_id for display (truncate if too long) + display_call_id = call_id + if len(call_id) > 16: + display_call_id = call_id[:13] + "..." + # Add row with colored state self.table.add_row( - Text(skill_name, style="white"), + Text(display_call_id, style="bright_blue"), + Text(skill_state.name, style="white"), Text(skill_state.state.name, style=state_color(skill_state.state)), Text(duration_str, style="dim"), Text(start_str, style="dim"), diff --git a/dimos/utils/cli/agentspy/demo_agentspy.py b/dimos/utils/cli/agentspy/demo_agentspy.py index 2b39674a7b..fcd71d99ef 100644 --- a/dimos/utils/cli/agentspy/demo_agentspy.py +++ b/dimos/utils/cli/agentspy/demo_agentspy.py @@ -17,7 +17,7 @@ import time import threading -from dimos.protocol.skill.agent_interface import AgentInterface +from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill @@ -56,7 +56,7 @@ def quick_task(self, name: str) -> str: def run_demo_skills(): """Run demo skills in background.""" # Create and start agent interface - agent_interface = AgentInterface() + agent_interface = SkillCoordinator() agent_interface.start() # Register skills @@ -69,18 +69,21 @@ def skill_runner(): while True: time.sleep(2) + # Generate unique call_id for each invocation + call_id = f"demo-{counter}" + # Run different skills based on counter if counter % 4 == 0: - demo_skills.count_to(3, skillcall=True) + # Run multiple count_to in parallel to show parallel execution + agent_interface.call(f"{call_id}-count-1", "count_to", 3) + agent_interface.call(f"{call_id}-count-2", "count_to", 5) + agent_interface.call(f"{call_id}-count-3", "count_to", 2) elif counter % 4 == 1: - demo_skills.compute_fibonacci(10, skillcall=True) + agent_interface.call(f"{call_id}-fib", "compute_fibonacci", 10) elif counter % 4 == 2: - demo_skills.quick_task(f"task-{counter}", skillcall=True) + agent_interface.call(f"{call_id}-quick", "quick_task", f"task-{counter}") else: - try: - demo_skills.simulate_error(skillcall=True) - except: - pass # Expected to fail + agent_interface.call(f"{call_id}-error", "simulate_error") counter += 1 From 29d2538d2cc782ed022b1ff56c99386a80aaed6a Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 11:04:18 -0700 Subject: [PATCH 02/48] fix skill tests --- dimos/protocol/skill/test_coordinator.py | 34 ------- dimos/protocol/skill/test_skill.py | 120 ----------------------- 2 files changed, 154 deletions(-) delete mode 100644 dimos/protocol/skill/test_skill.py diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 0b6d4d54a5..a75ea85e55 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -21,44 +21,10 @@ from dimos.protocol.skill.skill import SkillContainer, skill from dimos.protocol.skill.testing_utils import TestContainer -# def test_coordinator_skill_export(): -# skillCoordinator = SkillCoordinator() -# skillCoordinator.register_skills(TestContainer()) - -# assert skillCoordinator.get_tools() == [ -# { -# "function": { -# "description": "", -# "name": "add", -# "parameters": { -# "properties": {"x": {"type": "integer"}, "y": {"type": "integer"}}, -# "required": ["x", "y"], -# "type": "object", -# }, -# }, -# "type": "function", -# }, -# { -# "function": { -# "description": "", -# "name": "delayadd", -# "parameters": { -# "properties": {"x": {"type": "integer"}, "y": {"type": "integer"}}, -# "required": ["x", "y"], -# "type": "object", -# }, -# }, -# "type": "function", -# }, -# ] - -# print(pprint(skillCoordinator.get_tools())) - class TestContainer2(SkillContainer): @skill() def add(self, x: int, y: int) -> int: - # time.sleep(0.25) return x + y @skill() diff --git a/dimos/protocol/skill/test_skill.py b/dimos/protocol/skill/test_skill.py deleted file mode 100644 index 836f316ca3..0000000000 --- a/dimos/protocol/skill/test_skill.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time - -from dimos.protocol.skill.coordinator import SkillCoordinator -from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.testing_utils import TestContainer - - -def test_introspect_skill(): - testContainer = TestContainer() - print(testContainer.skills()) - - -def test_internals(): - agentInterface = SkillCoordinator() - agentInterface.start() - - testContainer = TestContainer() - - agentInterface.register_skills(testContainer) - - # skillcall=True makes the skill function exit early, - # it doesn't behave like a blocking function, - # - # return is passed as SkillMsg to the agent topic - testContainer.delayadd(2, 4, skillcall=True) - testContainer.add(1, 2, skillcall=True) - - time.sleep(0.25) - print(agentInterface) - - time.sleep(0.75) - print(agentInterface) - - print(agentInterface.state_snapshot()) - - print(agentInterface.skills()) - - print(agentInterface) - - agentInterface.call("test-call-1", "delayadd", 1, 2) - - time.sleep(0.25) - print(agentInterface) - time.sleep(0.75) - - print(agentInterface) - - -def test_standard_usage(): - agentInterface = SkillCoordinator() - agentInterface.start() - - testContainer = TestContainer() - - agentInterface.register_skills(testContainer) - - # we can investigate skills - print(agentInterface.skills()) - - # we can execute a skill - agentInterface.call("test-call-2", "delayadd", 1, 2) - - # while skill is executing, we can introspect the state - # (we see that the skill is running) - time.sleep(0.25) - print(agentInterface) - time.sleep(0.75) - - # after the skill has finished, we can see the result - # and the skill state - print(agentInterface) - - -def test_module(): - from dimos.core import Module, start - - class MockModule(Module, SkillContainer): - def __init__(self): - super().__init__() - SkillContainer.__init__(self) - - @skill() - def add(self, x: int, y: int) -> int: - time.sleep(0.5) - return x * y - - agentInterface = SkillCoordinator() - agentInterface.start() - - dimos = start(1) - mock_module = dimos.deploy(MockModule) - - agentInterface.register_skills(mock_module) - - # we can execute a skill - agentInterface.call("test-call-3", "add", 1, 2) - - # while skill is executing, we can introspect the state - # (we see that the skill is running) - time.sleep(0.25) - print(agentInterface) - time.sleep(0.75) - - # after the skill has finished, we can see the result - # and the skill state - print(agentInterface) From 9b7a2be2b6992c58eb212148ab6174ab56d832a9 Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 12:15:23 -0700 Subject: [PATCH 03/48] cleanup --- dimos/agents2/__init__.py | 3 + dimos/agents2/{main.py => agent.py} | 33 ++- dimos/agents2/spec.py | 143 +++++++++++ dimos/agents2/{test_main.py => test_agent.py} | 7 +- dimos/protocol/skill/agent_interface.py | 236 ------------------ dimos/protocol/skill/coordinator.py | 9 +- dimos/protocol/skill/types.py | 140 ----------- 7 files changed, 183 insertions(+), 388 deletions(-) rename dimos/agents2/{main.py => agent.py} (75%) create mode 100644 dimos/agents2/spec.py rename dimos/agents2/{test_main.py => test_agent.py} (85%) delete mode 100644 dimos/protocol/skill/agent_interface.py delete mode 100644 dimos/protocol/skill/types.py diff --git a/dimos/agents2/__init__.py b/dimos/agents2/__init__.py index 6a756fbaab..c4776ceec9 100644 --- a/dimos/agents2/__init__.py +++ b/dimos/agents2/__init__.py @@ -6,3 +6,6 @@ ToolCall, ToolMessage, ) + +from dimos.agents2.agent import Agent +from dimos.agents2.spec import AgentSpec diff --git a/dimos/agents2/main.py b/dimos/agents2/agent.py similarity index 75% rename from dimos/agents2/main.py rename to dimos/agents2/agent.py index 8e2da24903..11336602d9 100644 --- a/dimos/agents2/main.py +++ b/dimos/agents2/agent.py @@ -14,6 +14,7 @@ import asyncio from pprint import pprint +from typing import Optional from langchain.chat_models import init_chat_model from langchain_core.language_models.chat_models import BaseChatModel @@ -26,6 +27,7 @@ ToolMessage, ) +from dimos.agents2.spec import AgentSpec from dimos.core import Module, rpc from dimos.protocol.skill import skill from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState @@ -34,15 +36,32 @@ logger = setup_logger("dimos.protocol.agents2") -class Agent(SkillCoordinator): - def __init__(self, model: str = "gpt-4o", model_provider: str = "openai", *args, **kwargs): - super().__init__(*args, **kwargs) +class Agent(AgentSpec, SkillCoordinator): + def __init__( + self, + *args, + **kwargs, + ): + AgentSpec.__init__(self, *args, **kwargs) + SkillCoordinator.__init__(self) self.messages = [] - self._llm = init_chat_model( - model=model, - model_provider=model_provider, - ) + + if self.config.system_prompt: + if isinstance(self.config.system_prompt, str): + self.messages.append(self.config.system_prompt) + else: + self.messages.append(self.config.system_prompt) + + self._llm = init_chat_model(model_provider=self.config.provider, model=self.config.model) + + @rpc + def start(self): + SkillCoordinator.start(self) + + @rpc + def stop(self): + SkillCoordinator.stop(self) async def agent_loop(self, seed_query: str = ""): self.messages.append(HumanMessage(seed_query)) diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py new file mode 100644 index 0000000000..1a4cb463c2 --- /dev/null +++ b/dimos/agents2/spec.py @@ -0,0 +1,143 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base agent module that wraps BaseAgent for DimOS module usage.""" + +from dataclasses import dataclass +from enum import Enum +from typing import Optional, Tuple, Union + +from langchain.chat_models.base import _SUPPORTED_PROVIDERS +from langchain_core.messages import ( + SystemMessage, +) + +from dimos.core import rpc +from dimos.protocol.service import Service +from dimos.protocol.skill.skill import SkillContainer +from dimos.utils.logging_config import setup_logger + +logger = setup_logger("dimos.agents.modules.base_agent") + + +# Dynamically create ModelProvider enum from LangChain's supported providers +Provider = Enum( + "Provider", {provider.upper(): provider for provider in _SUPPORTED_PROVIDERS}, type=str +) + + +class Model(str, Enum): + """Common model names across providers. + + Note: This is not exhaustive as model names change frequently. + Based on langchain's _attempt_infer_model_provider patterns. + """ + + # OpenAI models (prefix: gpt-3, gpt-4, o1, o3) + GPT_4O = "gpt-4o" + GPT_4O_MINI = "gpt-4o-mini" + GPT_4_TURBO = "gpt-4-turbo" + GPT_4_TURBO_PREVIEW = "gpt-4-turbo-preview" + GPT_4 = "gpt-4" + GPT_35_TURBO = "gpt-3.5-turbo" + GPT_35_TURBO_16K = "gpt-3.5-turbo-16k" + O1_PREVIEW = "o1-preview" + O1_MINI = "o1-mini" + O3_MINI = "o3-mini" + + # Anthropic models (prefix: claude) + CLAUDE_3_OPUS = "claude-3-opus-20240229" + CLAUDE_3_SONNET = "claude-3-sonnet-20240229" + CLAUDE_3_HAIKU = "claude-3-haiku-20240307" + CLAUDE_35_SONNET = "claude-3-5-sonnet-20241022" + CLAUDE_35_SONNET_LATEST = "claude-3-5-sonnet-latest" + CLAUDE_3_7_SONNET = "claude-3-7-sonnet-20250219" + + # Google models (prefix: gemini) + GEMINI_20_FLASH = "gemini-2.0-flash" + GEMINI_15_PRO = "gemini-1.5-pro" + GEMINI_15_FLASH = "gemini-1.5-flash" + GEMINI_10_PRO = "gemini-1.0-pro" + + # Amazon Bedrock models (prefix: amazon) + AMAZON_TITAN_EXPRESS = "amazon.titan-text-express-v1" + AMAZON_TITAN_LITE = "amazon.titan-text-lite-v1" + + # Cohere models (prefix: command) + COMMAND_R_PLUS = "command-r-plus" + COMMAND_R = "command-r" + COMMAND = "command" + COMMAND_LIGHT = "command-light" + + # Fireworks models (prefix: accounts/fireworks) + FIREWORKS_LLAMA_V3_70B = "accounts/fireworks/models/llama-v3-70b-instruct" + FIREWORKS_MIXTRAL_8X7B = "accounts/fireworks/models/mixtral-8x7b-instruct" + + # Mistral models (prefix: mistral) + MISTRAL_LARGE = "mistral-large" + MISTRAL_MEDIUM = "mistral-medium" + MISTRAL_SMALL = "mistral-small" + MIXTRAL_8X7B = "mixtral-8x7b" + MIXTRAL_8X22B = "mixtral-8x22b" + MISTRAL_7B = "mistral-7b" + + # DeepSeek models (prefix: deepseek) + DEEPSEEK_CHAT = "deepseek-chat" + DEEPSEEK_CODER = "deepseek-coder" + DEEPSEEK_R1_DISTILL_LLAMA_70B = "deepseek-r1-distill-llama-70b" + + # xAI models (prefix: grok) + GROK_1 = "grok-1" + GROK_2 = "grok-2" + + # Perplexity models (prefix: sonar) + SONAR_SMALL_CHAT = "sonar-small-chat" + SONAR_MEDIUM_CHAT = "sonar-medium-chat" + SONAR_LARGE_CHAT = "sonar-large-chat" + + # Meta Llama models (various providers) + LLAMA_3_70B = "llama-3-70b" + LLAMA_3_8B = "llama-3-8b" + LLAMA_31_70B = "llama-3.1-70b" + LLAMA_31_8B = "llama-3.1-8b" + LLAMA_33_70B = "llama-3.3-70b" + LLAMA_2_70B = "llama-2-70b" + LLAMA_2_13B = "llama-2-13b" + LLAMA_2_7B = "llama-2-7b" + + +@dataclass +class AgentConfig: + system_prompt: Optional[str | SystemMessage] = None + skills: Optional[SkillContainer | list[SkillContainer]] = None + model: Model = Model.GPT_4O + provider: Provider = Provider.OPENAI + + +class AgentSpec( + Service[AgentConfig], +): + default_config: type[AgentConfig] = AgentConfig + + @rpc + def start(self): ... + + @rpc + def stop(self): ... + + @rpc + def clear_history(self): ... + + @rpc + def query(self, query: str): ... diff --git a/dimos/agents2/test_main.py b/dimos/agents2/test_agent.py similarity index 85% rename from dimos/agents2/test_main.py rename to dimos/agents2/test_agent.py index 755666b070..78c81a56d6 100644 --- a/dimos/agents2/test_main.py +++ b/dimos/agents2/test_agent.py @@ -17,8 +17,7 @@ import pytest -from dimos.agents2.main import Agent -from dimos.core import start +from dimos.agents2.agent import Agent from dimos.protocol.skill import SkillContainer, skill @@ -45,7 +44,7 @@ async def test_agent_init(): agent.start() agent.query( - "hi there, use add tool to add 124181112 and 124124. don't sum yourself, use a tool I provided" + "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124. don't sum yourself, use a tool I provided" ) - await asyncio.sleep(5) + await asyncio.sleep(10) diff --git a/dimos/protocol/skill/agent_interface.py b/dimos/protocol/skill/agent_interface.py deleted file mode 100644 index 8a9926d028..0000000000 --- a/dimos/protocol/skill/agent_interface.py +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from copy import copy -from dataclasses import dataclass -from enum import Enum -from pprint import pformat -from typing import Any, Callable, Optional - -from dimos.protocol.skill.comms import AgentMsg, LCMSkillComms, MsgType, SkillCommsSpec -from dimos.protocol.skill.skill import SkillConfig, SkillContainer -from dimos.protocol.skill.types import Reducer, Return, Stream -from dimos.types.timestamped import TimestampedCollection -from dimos.utils.logging_config import setup_logger - -logger = setup_logger("dimos.protocol.skill.agent_interface") - - -@dataclass -class AgentInputConfig: - agent_comms: type[SkillCommsSpec] = LCMSkillComms - - -class SkillStateEnum(Enum): - pending = 0 - running = 1 - returned = 2 - error = 3 - - -# TODO pending timeout, running timeout, etc. -class SkillState(TimestampedCollection): - name: str - state: SkillStateEnum - skill_config: SkillConfig - - def __init__(self, name: str, skill_config: Optional[SkillConfig] = None) -> None: - super().__init__() - self.skill_config = skill_config or SkillConfig( - name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.none - ) - - self.state = SkillStateEnum.pending - self.name = name - - # returns True if the agent should be called for this message - def handle_msg(self, msg: AgentMsg) -> bool: - self.add(msg) - - if msg.type == MsgType.stream: - if ( - self.skill_config.stream == Stream.none - or self.skill_config.stream == Stream.passive - ): - return False - if self.skill_config.stream == Stream.call_agent: - return True - - if msg.type == MsgType.ret: - self.state = SkillStateEnum.returned - if self.skill_config.ret == Return.call_agent: - return True - return False - - if msg.type == MsgType.error: - self.state = SkillStateEnum.error - return True - - if msg.type == MsgType.start: - self.state = SkillStateEnum.running - return False - - return False - - def __str__(self) -> str: - head = f"SkillState(state={self.state}" - - if self.state == SkillStateEnum.returned or self.state == SkillStateEnum.error: - head += ", ran for=" - else: - head += ", running for=" - - head += f"{self.duration():.2f}s" - - if len(self): - return head + f", messages={list(self._items)})" - return head + ", No Messages)" - - -class AgentInterface(SkillContainer): - _static_containers: list[SkillContainer] - _dynamic_containers: list[SkillContainer] - _skill_state: dict[str, SkillState] - _skills: dict[str, SkillConfig] - _agent_callback: Optional[Callable[[dict[str, SkillState]], Any]] = None - - # Agent callback is called with a state snapshot once system decides - # that agents needs to be woken up, according to inputs from active skills - def __init__( - self, agent_callback: Optional[Callable[[dict[str, SkillState]], Any]] = None - ) -> None: - super().__init__() - self._agent_callback = agent_callback - self._static_containers = [] - self._dynamic_containers = [] - self._skills = {} - self._skill_state = {} - - def start(self) -> None: - self.agent_comms.start() - self.agent_comms.subscribe(self.handle_message) - - def stop(self) -> None: - self.agent_comms.stop() - - # This is used by agent to call skills - def execute_skill(self, skill_name: str, *args, **kwargs) -> None: - skill_config = self.get_skill_config(skill_name) - if not skill_config: - logger.error( - f"Skill {skill_name} not found in registered skills, but agent tried to call it (did a dynamic skill expire?)" - ) - return - - # This initializes the skill state if it doesn't exist - self._skill_state[skill_name] = SkillState(name=skill_name, skill_config=skill_config) - return skill_config.call(*args, **kwargs) - - # Receives a message from active skill - # Updates local skill state (appends to streamed data if needed etc) - # - # Checks if agent needs to be called (if ToolConfig has Return=call_agent or Stream=call_agent) - def handle_message(self, msg: AgentMsg) -> None: - logger.info(f"{msg.skill_name} - {msg}") - - if self._skill_state.get(msg.skill_name) is None: - logger.warn( - f"Skill state for {msg.skill_name} not found, (skill not called by our agent?) initializing. (message received: {msg})" - ) - self._skill_state[msg.skill_name] = SkillState(name=msg.skill_name) - - should_call_agent = self._skill_state[msg.skill_name].handle_msg(msg) - if should_call_agent: - self.call_agent() - - # Returns a snapshot of the current state of skill runs. - # - # If clear=True, it will assume the snapshot is being sent to an agent - # and will clear the finished skill runs from the state - def state_snapshot(self, clear: bool = True) -> dict[str, SkillState]: - if not clear: - return self._skill_state - - ret = copy(self._skill_state) - - to_delete = [] - # Since state is exported, we can clear the finished skill runs - for skill_name, skill_run in self._skill_state.items(): - if skill_run.state == SkillStateEnum.returned: - logger.info(f"Skill {skill_name} finished") - to_delete.append(skill_name) - if skill_run.state == SkillStateEnum.error: - logger.error(f"Skill run error for {skill_name}") - to_delete.append(skill_name) - - for skill_name in to_delete: - logger.debug(f"{skill_name} finished, removing from state") - del self._skill_state[skill_name] - - return ret - - def call_agent(self) -> None: - """Call the agent with the current state of skill runs.""" - logger.info(f"Calling agent with current skill state: {self.state_snapshot(clear=False)}") - - state = self.state_snapshot(clear=True) - - if self._agent_callback: - self._agent_callback(state) - - def __str__(self): - # Convert objects to their string representations - def stringify_value(obj): - if isinstance(obj, dict): - return {k: stringify_value(v) for k, v in obj.items()} - elif isinstance(obj, list): - return [stringify_value(item) for item in obj] - else: - return str(obj) - - ret = stringify_value(self._skill_state) - - return f"AgentInput({pformat(ret, indent=2, depth=3, width=120, compact=True)})" - - # Given skillcontainers can run remotely, we are - # Caching available skills from static containers - # - # Dynamic containers will be queried at runtime via - # .skills() method - def register_skills(self, container: SkillContainer): - if not container.dynamic_skills: - logger.info(f"Registering static skill container, {container}") - self._static_containers.append(container) - for name, skill_config in container.skills().items(): - self._skills[name] = skill_config.bind(getattr(container, name)) - else: - logger.info(f"Registering dynamic skill container, {container}") - self._dynamic_containers.append(container) - - def get_skill_config(self, skill_name: str) -> Optional[SkillConfig]: - skill_config = self._skills.get(skill_name) - if not skill_config: - skill_config = self.skills().get(skill_name) - return skill_config - - def skills(self) -> dict[str, SkillConfig]: - # Static container skilling is already cached - all_skills: dict[str, SkillConfig] = {**self._skills} - - # Then aggregate skills from dynamic containers - for container in self._dynamic_containers: - for skill_name, skill_config in container.skills().items(): - all_skills[skill_name] = skill_config.bind(getattr(container, skill_name)) - - return all_skills diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 4b0f5d27f2..3ec56308b2 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -18,12 +18,19 @@ from enum import Enum from typing import Any, List, Optional +from langchain_core.messages import ( + AIMessage, + HumanMessage, + MessageLikeRepresentation, + SystemMessage, + ToolCall, + ToolMessage, +) from langchain_core.tools import tool as langchain_tool from rich.console import Console from rich.table import Table from rich.text import Text -from dimos.agents2 import ToolCall, ToolMessage from dimos.core import Module, rpc from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer diff --git a/dimos/protocol/skill/types.py b/dimos/protocol/skill/types.py deleted file mode 100644 index e4b09a7ef9..0000000000 --- a/dimos/protocol/skill/types.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -from dataclasses import dataclass -from enum import Enum -from typing import Any, Callable, Generic, Optional, TypeVar - -from dimos.types.timestamped import Timestamped - - -class Call(Enum): - Implicit = 0 - Explicit = 1 - - -class Reducer(Enum): - none = 0 - all = 1 - latest = 2 - average = 3 - - -class Stream(Enum): - # no streaming - none = 0 - # passive stream, doesn't schedule an agent call, but returns the value to the agent - passive = 1 - # calls the agent with every value emitted, schedules an agent call - call_agent = 2 - - -class Return(Enum): - # doesn't return anything to an agent - none = 0 - # returns the value to the agent, but doesn't schedule an agent call - passive = 1 - # calls the agent with the value, scheduling an agent call - call_agent = 2 - - -@dataclass -class SkillConfig: - name: str - reducer: Reducer - stream: Stream - ret: Return - f: Callable | None = None - autostart: bool = False - - def bind(self, f: Callable) -> "SkillConfig": - self.f = f - return self - - def call(self, *args, **kwargs) -> Any: - if self.f is None: - raise ValueError( - "Function is not bound to the SkillConfig. This should be called only within AgentListener." - ) - - return self.f(*args, **kwargs, skillcall=True) - - def __str__(self): - parts = [f"name={self.name}"] - - # Only show reducer if stream is not none (streaming is happening) - if self.stream != Stream.none: - reducer_name = "unknown" - if self.reducer == Reducer.latest: - reducer_name = "latest" - elif self.reducer == Reducer.all: - reducer_name = "all" - elif self.reducer == Reducer.average: - reducer_name = "average" - parts.append(f"reducer={reducer_name}") - parts.append(f"stream={self.stream.name}") - - # Always show return mode - parts.append(f"ret={self.ret.name}") - return f"Skill({', '.join(parts)})" - - -class MsgType(Enum): - pending = 0 - start = 1 - stream = 2 - ret = 3 - error = 4 - - -class AgentMsg(Timestamped): - ts: float - type: MsgType - - def __init__( - self, - skill_name: str, - content: str | int | float | dict | list, - type: MsgType = MsgType.ret, - ) -> None: - self.ts = time.time() - self.skill_name = skill_name - self.content = content - self.type = type - - def __repr__(self): - return self.__str__() - - @property - def end(self) -> bool: - return self.type == MsgType.ret or self.type == MsgType.error - - @property - def start(self) -> bool: - return self.type == MsgType.start - - def __str__(self): - time_ago = time.time() - self.ts - - if self.type == MsgType.start: - return f"Start({time_ago:.1f}s ago)" - if self.type == MsgType.ret: - return f"Ret({time_ago:.1f}s ago, val={self.content})" - if self.type == MsgType.error: - return f"Error({time_ago:.1f}s ago, val={self.content})" - if self.type == MsgType.pending: - return f"Pending({time_ago:.1f}s ago)" - if self.type == MsgType.stream: - return f"Stream({time_ago:.1f}s ago, val={self.content})" From 8ea2455fe4672bb4869fba3d77a98ff1a047b364 Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 16:51:54 -0700 Subject: [PATCH 04/48] small fixes, restructure of configurable --- dimos/agents2/agent.py | 27 ++++++++++++++++---------- dimos/agents2/spec.py | 9 ++++++--- dimos/agents2/test_agent.py | 19 +++++++++++++----- dimos/core/module.py | 30 ++++++++++++----------------- dimos/protocol/pubsub/lcmpubsub.py | 2 +- dimos/protocol/pubsub/spec.py | 4 ++-- dimos/protocol/service/__init__.py | 2 +- dimos/protocol/service/spec.py | 12 +++++------- dimos/protocol/service/test_spec.py | 18 +++++++++++++++++ dimos/protocol/skill/skill.py | 26 ++++++++++++------------- 10 files changed, 89 insertions(+), 60 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 11336602d9..4095f1ea50 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -36,15 +36,15 @@ logger = setup_logger("dimos.protocol.agents2") -class Agent(AgentSpec, SkillCoordinator): +class Agent(AgentSpec): def __init__( self, *args, **kwargs, ): AgentSpec.__init__(self, *args, **kwargs) - SkillCoordinator.__init__(self) + self.coordinator = SkillCoordinator() self.messages = [] if self.config.system_prompt: @@ -57,11 +57,15 @@ def __init__( @rpc def start(self): - SkillCoordinator.start(self) + self.coordinator.start() @rpc def stop(self): - SkillCoordinator.stop(self) + self.coordinator.stop() + + @rpc + def clear_history(self): + self.messages.clear() async def agent_loop(self, seed_query: str = ""): self.messages.append(HumanMessage(seed_query)) @@ -75,15 +79,15 @@ async def agent_loop(self, seed_query: str = ""): logger.info(f"Agent response: {msg.content}") if msg.tool_calls: - self.execute_tool_calls(msg.tool_calls) + self.coordinator.execute_tool_calls(msg.tool_calls) - if not self.has_active_skills(): + if not self.coordinator.has_active_skills(): logger.info("No active tasks, exiting agent loop.") - return + return msg.content - await self.wait_for_updates() + await self.coordinator.wait_for_updates() - for call_id, update in self.generate_snapshot(clear=True).items(): + for call_id, update in self.coordinator.generate_snapshot(clear=True).items(): self.messages.append(update.agent_encode()) except Exception as e: @@ -93,5 +97,8 @@ async def agent_loop(self, seed_query: str = ""): traceback.print_exc() @rpc + def query_async(self, query: str): + return asyncio.ensure_future(self.agent_loop(query), loop=self._loop) + def query(self, query: str): - asyncio.ensure_future(self.agent_loop(query), loop=self._loop) + return asyncio.run_coroutine_threadsafe(self.agent_loop(query), self._loop).result() diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 1a4cb463c2..7ecab4bbf4 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -14,6 +14,7 @@ """Base agent module that wraps BaseAgent for DimOS module usage.""" +from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum from typing import Optional, Tuple, Union @@ -125,19 +126,21 @@ class AgentConfig: provider: Provider = Provider.OPENAI -class AgentSpec( - Service[AgentConfig], -): +class AgentSpec(Service[AgentConfig], ABC): default_config: type[AgentConfig] = AgentConfig @rpc + @abstractmethod def start(self): ... @rpc + @abstractmethod def stop(self): ... @rpc + @abstractmethod def clear_history(self): ... @rpc + @abstractmethod def query(self, query: str): ... diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 78c81a56d6..268aac6be6 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -37,14 +37,23 @@ def sub(self, x: int, y: int) -> int: @pytest.mark.asyncio async def test_agent_init(): - # dimos = start(2) - # agent = dimos.deploy(Agent) - agent = Agent() + from dimos.core import start + + dimos = start(2) + agent = dimos.deploy( + Agent, + system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate", + ) + # agent = Agent( + # system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" + # ) agent.register_skills(TestContainer()) agent.start() - agent.query( - "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124. don't sum yourself, use a tool I provided" + print( + agent.query_async( + "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124." + ) ) await asyncio.sleep(10) diff --git a/dimos/core/module.py b/dimos/core/module.py index 7cb2161fb8..f30cbd16a2 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -13,6 +13,7 @@ # limitations under the License. import asyncio import inspect +from dataclasses import dataclass from typing import ( Any, Callable, @@ -28,40 +29,33 @@ from dimos.core.core import T, rpc from dimos.core.stream import In, Out, RemoteIn, RemoteOut, Transport from dimos.protocol.rpc import LCMRPC, RPCSpec -from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec +from dimos.protocol.service import Configurable from dimos.protocol.tf import LCMTF, TFSpec -class CommsSpec: - rpc: type[RPCSpec] - agent: type[SkillCommsSpec] - tf: type[TFSpec] +@dataclass +class ModuleConfig: + rpc_transport: type[RPCSpec] = LCMRPC + tf_transport: type[TFSpec] = LCMTF -class LCMComms(CommsSpec): - rpc = LCMRPC - agent = LCMSkillComms - tf = LCMTF - - -class ModuleBase: - comms: CommsSpec = LCMComms +class ModuleBase(Configurable[ModuleConfig]): _rpc: Optional[RPCSpec] = None - _agent: Optional[SkillCommsSpec] = None _tf: Optional[TFSpec] = None _loop: asyncio.AbstractEventLoop = None + default_config = ModuleConfig + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) # we can completely override comms protocols if we want - if kwargs.get("comms", None) is not None: - self.comms = kwargs["comms"] try: # here we attempt to figure out if we are running on a dask worker # if so we use the dask worker _loop as ours, # and we register our RPC server worker = get_worker() self._loop = worker.loop if worker else None - self.rpc = self.comms.rpc() + self.rpc = self.config.rpc_transport() self.rpc.serve_module_rpc(self) self.rpc.start() except ValueError: @@ -79,7 +73,7 @@ def __init__(self, *args, **kwargs): @property def tf(self): if self._tf is None: - self._tf = self.comms.tf() + self._tf = self.config.tf_transport() return self._tf @tf.setter diff --git a/dimos/protocol/pubsub/lcmpubsub.py b/dimos/protocol/pubsub/lcmpubsub.py index b01ae40cca..5f15467800 100644 --- a/dimos/protocol/pubsub/lcmpubsub.py +++ b/dimos/protocol/pubsub/lcmpubsub.py @@ -54,7 +54,7 @@ def __str__(self) -> str: return f"{self.topic}#{self.lcm_type.msg_name}" -class LCMPubSubBase(PubSub[Topic, Any], LCMService): +class LCMPubSubBase(LCMService, PubSub[Topic, Any]): default_config = LCMConfig lc: lcm.LCM _stop_event: threading.Event diff --git a/dimos/protocol/pubsub/spec.py b/dimos/protocol/pubsub/spec.py index d7a0798557..81db8a0669 100644 --- a/dimos/protocol/pubsub/spec.py +++ b/dimos/protocol/pubsub/spec.py @@ -24,7 +24,7 @@ TopicT = TypeVar("TopicT") -class PubSub(ABC, Generic[TopicT, MsgT]): +class PubSub(Generic[TopicT, MsgT], ABC): """Abstract base class for pub/sub implementations with sugar methods.""" @abstractmethod @@ -91,7 +91,7 @@ def _queue_cb(msg: MsgT, topic: TopicT): unsubscribe_fn() -class PubSubEncoderMixin(ABC, Generic[TopicT, MsgT]): +class PubSubEncoderMixin(Generic[TopicT, MsgT], ABC): """Mixin that encodes messages before publishing and decodes them after receiving. Usage: Just specify encoder and decoder as a subclass: diff --git a/dimos/protocol/service/__init__.py b/dimos/protocol/service/__init__.py index ce8a823f86..4726ad5f83 100644 --- a/dimos/protocol/service/__init__.py +++ b/dimos/protocol/service/__init__.py @@ -1,2 +1,2 @@ from dimos.protocol.service.lcmservice import LCMService -from dimos.protocol.service.spec import Service +from dimos.protocol.service.spec import Configurable, Service diff --git a/dimos/protocol/service/spec.py b/dimos/protocol/service/spec.py index 0f52fd8a18..c79b8d57ba 100644 --- a/dimos/protocol/service/spec.py +++ b/dimos/protocol/service/spec.py @@ -19,18 +19,16 @@ ConfigT = TypeVar("ConfigT") -class Service(ABC, Generic[ConfigT]): +class Configurable(Generic[ConfigT]): default_config: Type[ConfigT] def __init__(self, **kwargs) -> None: self.config: ConfigT = self.default_config(**kwargs) + +class Service(Configurable[ConfigT], ABC): @abstractmethod - def start(self) -> None: - """Start the service.""" - ... + def start(self) -> None: ... @abstractmethod - def stop(self) -> None: - """Stop the service.""" - ... + def stop(self) -> None: ... diff --git a/dimos/protocol/service/test_spec.py b/dimos/protocol/service/test_spec.py index cad531ad1e..0706af5112 100644 --- a/dimos/protocol/service/test_spec.py +++ b/dimos/protocol/service/test_spec.py @@ -84,3 +84,21 @@ def test_complete_configuration_override(): assert service.config.timeout == 60.0 assert service.config.max_connections == 50 assert service.config.ssl_enabled is True + + +def test_service_subclassing(): + @dataclass + class ExtraConfig(DatabaseConfig): + extra_param: str = "default_value" + + class ExtraDatabaseService(DatabaseService): + default_config = ExtraConfig + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + bla = ExtraDatabaseService(host="custom-host2", extra_param="extra_value") + + assert bla.config.host == "custom-host2" + assert bla.config.extra_param == "extra_value" + assert bla.config.port == 5432 # Default value from DatabaseConfig diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index f612ec7c83..f7f87e4857 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -13,9 +13,11 @@ # limitations under the License. import threading +from dataclasses import dataclass from typing import Any, Callable, Optional from dimos.core import rpc +from dimos.protocol.service import Configurable from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.schema import function_to_schema from dimos.protocol.skill.type import ( @@ -74,19 +76,17 @@ def run_function(): return decorator -class CommsSpec: - agent: type[SkillCommsSpec] - - -class LCMComms(CommsSpec): - agent: type[SkillCommsSpec] = LCMSkillComms +@dataclass +class SkillContainerConfig: + skill_transport: type[SkillCommsSpec] = LCMSkillComms # here we can have also dynamic skills potentially # agent can check .skills each time when introspecting -class SkillContainer: - comms: CommsSpec = LCMComms - _agent_comms: Optional[SkillCommsSpec] = None +class SkillContainer(Configurable[SkillContainerConfig]): + default_config = SkillContainerConfig + _skill_transport: Optional[SkillCommsSpec] = None + dynamic_skills = False def __str__(self) -> str: @@ -104,7 +104,7 @@ def skills(self) -> dict[str, SkillConfig]: } @property - def agent_comms(self) -> SkillCommsSpec: - if self._agent_comms is None: - self._agent_comms = self.comms.agent() - return self._agent_comms + def skill_transport(self) -> SkillCommsSpec: + if self._skill_transport is None: + self._skill_transport = self.config.skill_transport() + return self._skill_transport From 4796a5d4c120db9d82dd8ecf6560e7d335428e6a Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 17:04:14 -0700 Subject: [PATCH 05/48] bugfixes --- dimos/agents2/agent.py | 7 +++++++ dimos/agents2/spec.py | 7 ++++--- dimos/agents2/test_agent.py | 4 +--- dimos/protocol/skill/coordinator.py | 16 ++++++++-------- dimos/protocol/skill/skill.py | 8 +++++--- dimos/utils/cli/agentspy/agentspy.py | 2 +- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 4095f1ea50..eb575cce3b 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -102,3 +102,10 @@ def query_async(self, query: str): def query(self, query: str): return asyncio.run_coroutine_threadsafe(self.agent_loop(query), self._loop).result() + + @rpc + def register_skills(self, container): + return self.coordinator.register_skills(container) + + def get_tools(self): + return self.coordinator.get_tools() diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 7ecab4bbf4..79cdd2fdb4 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -24,7 +24,8 @@ SystemMessage, ) -from dimos.core import rpc +from dimos.core import Module, rpc +from dimos.core.module import ModuleConfig from dimos.protocol.service import Service from dimos.protocol.skill.skill import SkillContainer from dimos.utils.logging_config import setup_logger @@ -119,14 +120,14 @@ class Model(str, Enum): @dataclass -class AgentConfig: +class AgentConfig(ModuleConfig): system_prompt: Optional[str | SystemMessage] = None skills: Optional[SkillContainer | list[SkillContainer]] = None model: Model = Model.GPT_4O provider: Provider = Provider.OPENAI -class AgentSpec(Service[AgentConfig], ABC): +class AgentSpec(Service[AgentConfig], Module, ABC): default_config: type[AgentConfig] = AgentConfig @rpc diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 268aac6be6..0a4ffd9d1b 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -51,9 +51,7 @@ async def test_agent_init(): agent.start() print( - agent.query_async( + agent.query( "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124." ) ) - - await asyncio.sleep(10) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 3ec56308b2..be42c11c48 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -31,7 +31,7 @@ from rich.table import Table from rich.text import Text -from dimos.core import Module, rpc +from dimos.core import rpc from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream @@ -42,8 +42,8 @@ @dataclass -class AgentInputConfig: - agent_comms: type[SkillCommsSpec] = LCMSkillComms +class SkillCoordinatorConfig: + skill_transport: type[SkillCommsSpec] = LCMSkillComms class SkillStateEnum(Enum): @@ -163,7 +163,8 @@ def __str__(self) -> str: return "\n".join(lines) -class SkillCoordinator(SkillContainer, Module): +class SkillCoordinator(SkillContainer): + default_config = SkillCoordinatorConfig empty: bool = True _static_containers: list[SkillContainer] @@ -174,7 +175,6 @@ class SkillCoordinator(SkillContainer, Module): _loop: Optional[asyncio.AbstractEventLoop] def __init__(self) -> None: - Module.__init__(self) SkillContainer.__init__(self) self._static_containers = [] self._dynamic_containers = [] @@ -184,12 +184,12 @@ def __init__(self) -> None: @rpc def start(self) -> None: - self.agent_comms.start() - self.agent_comms.subscribe(self.handle_message) + self.skill_transport.start() + self.skill_transport.subscribe(self.handle_message) @rpc def stop(self) -> None: - self.agent_comms.stop() + self.skill_transport.stop() def len(self) -> int: return len(self._skills) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index f7f87e4857..085a352327 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -40,12 +40,14 @@ def wrapper(self, *args, **kwargs): del kwargs["call_id"] def run_function(): - self.agent_comms.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) + self.skill_transport.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) try: val = f(self, *args, **kwargs) - self.agent_comms.publish(SkillMsg(call_id, skill, val, type=MsgType.ret)) + self.skill_transport.publish( + SkillMsg(call_id, skill, val, type=MsgType.ret) + ) except Exception as e: - self.agent_comms.publish( + self.skill_transport.publish( SkillMsg(call_id, skill, str(e), type=MsgType.error) ) diff --git a/dimos/utils/cli/agentspy/agentspy.py b/dimos/utils/cli/agentspy/agentspy.py index 2c58ab4cf3..3f51afc968 100644 --- a/dimos/utils/cli/agentspy/agentspy.py +++ b/dimos/utils/cli/agentspy/agentspy.py @@ -46,7 +46,7 @@ def start(self): self.agent_interface.start() # Subscribe to the agent interface's comms - self.agent_interface.agent_comms.subscribe(self._handle_message) + self.agent_interface.skill_transport.subscribe(self._handle_message) def stop(self): """Stop spying.""" From 21f82fe6803a846f3eb51f99f1f287d84934794d Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 17:12:55 -0700 Subject: [PATCH 06/48] get_loop functionality --- dimos/agents2/test_agent.py | 18 ++++++++++-------- dimos/core/module.py | 21 +++++++++++++++++++++ dimos/protocol/skill/coordinator.py | 3 +++ 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 0a4ffd9d1b..e17e5a88c9 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -39,19 +39,21 @@ def sub(self, x: int, y: int) -> int: async def test_agent_init(): from dimos.core import start - dimos = start(2) - agent = dimos.deploy( - Agent, - system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate", - ) - # agent = Agent( - # system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" + # dimos = start(2) + # agent = dimos.deploy( + # Agent, + # system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate", # ) + agent = Agent( + system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" + ) agent.register_skills(TestContainer()) agent.start() print( - agent.query( + agent.query_async( "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124." ) ) + + await asyncio.sleep(5) diff --git a/dimos/core/module.py b/dimos/core/module.py index f30cbd16a2..01abfcdb8a 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -33,6 +33,26 @@ from dimos.protocol.tf import LCMTF, TFSpec +def get_loop() -> asyncio.AbstractEventLoop: + try: + # here we attempt to figure out if we are running on a dask worker + # if so we use the dask worker _loop as ours, + # and we register our RPC server + worker = get_worker() + if worker.loop: + return worker.loop + + except ValueError: + ... + + try: + return asyncio.get_running_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + return loop + + @dataclass class ModuleConfig: rpc_transport: type[RPCSpec] = LCMRPC @@ -48,6 +68,7 @@ class ModuleBase(Configurable[ModuleConfig]): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self._loop = get_loop() # we can completely override comms protocols if we want try: # here we attempt to figure out if we are running on a dask worker diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index be42c11c48..c948028e91 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -32,6 +32,7 @@ from rich.text import Text from dimos.core import rpc +from dimos.core.module import get_loop from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream @@ -74,6 +75,7 @@ class SkillState(TimestampedCollection): def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] = None) -> None: super().__init__() + self.skill_config = skill_config or SkillConfig( name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.none, schema={} ) @@ -176,6 +178,7 @@ class SkillCoordinator(SkillContainer): def __init__(self) -> None: SkillContainer.__init__(self) + self._loop = get_loop() self._static_containers = [] self._dynamic_containers = [] self._skills = {} From 0d455ae22ac5906ba2e4fccfcee6f2dc4480053e Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 18:10:12 -0700 Subject: [PATCH 07/48] langchain dep --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fcc62bf476..30038ac143 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,9 +43,10 @@ dependencies = [ "sse-starlette>=2.2.1", "uvicorn>=0.34.0", - # Agent Memory + # Agents "langchain-chroma>=0.1.4", "langchain-openai>=0.2.14", + "langchain==0.3.27", # Class Extraction "pydantic", From b92446f0624eceefb184f6ca9eafce1e6f7daf59 Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 22:19:43 -0700 Subject: [PATCH 08/48] plucked ci changes from agent-refactor --- .github/workflows/docker.yml | 16 +++++++++++----- .github/workflows/tests.yml | 4 ++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 929462d8ae..0c6abff68d 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -132,7 +132,9 @@ jobs: uses: ./.github/workflows/_docker-build-template.yml with: should-run: ${{ - needs.check-changes.result == 'success' && ((needs.ros-python.result == 'success') || (needs.ros-python.result == 'skipped')) && (needs.check-changes.outputs.dev == 'true') + needs.check-changes.result == 'success' && + (needs.check-changes.outputs.dev == 'true' || + (needs.ros-python.result == 'success' && (needs.check-changes.outputs.python == 'true' || needs.check-changes.outputs.ros == 'true'))) }} from-image: ghcr.io/dimensionalos/ros-python:${{ needs.ros-python.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} to-image: ghcr.io/dimensionalos/ros-dev:${{ needs.check-changes.outputs.branch-tag }} @@ -142,6 +144,7 @@ jobs: needs: [check-changes, ros-dev] if: always() uses: ./.github/workflows/tests.yml + secrets: inherit with: should-run: ${{ needs.check-changes.result == 'success' && @@ -150,12 +153,13 @@ jobs: needs.check-changes.outputs.tests == 'true')) }} cmd: "pytest && pytest -m ros" # run tests that depend on ros as well - dev-image: ros-dev:${{ needs.check-changes.outputs.dev == 'true' && needs.ros-dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} + dev-image: ros-dev:${{ (needs.check-changes.outputs.python == 'true' || needs.check-changes.outputs.dev == 'true' || needs.check-changes.outputs.ros == 'true') && needs.ros-dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} run-tests: needs: [check-changes, dev] if: always() uses: ./.github/workflows/tests.yml + secrets: inherit with: should-run: ${{ needs.check-changes.result == 'success' && @@ -164,13 +168,14 @@ jobs: needs.check-changes.outputs.tests == 'true')) }} cmd: "pytest" - dev-image: dev:${{ needs.check-changes.outputs.dev == 'true' && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} + dev-image: dev:${{ (needs.check-changes.outputs.python == 'true' || needs.check-changes.outputs.dev == 'true') && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} # we run in parallel with normal tests for speed run-heavy-tests: needs: [check-changes, dev] if: always() uses: ./.github/workflows/tests.yml + secrets: inherit with: should-run: ${{ needs.check-changes.result == 'success' && @@ -179,12 +184,13 @@ jobs: needs.check-changes.outputs.tests == 'true')) }} cmd: "pytest -m heavy" - dev-image: dev:${{ needs.check-changes.outputs.dev == 'true' && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} + dev-image: dev:${{ (needs.check-changes.outputs.python == 'true' || needs.check-changes.outputs.dev == 'true') && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} run-lcm-tests: needs: [check-changes, dev] if: always() uses: ./.github/workflows/tests.yml + secrets: inherit with: should-run: ${{ needs.check-changes.result == 'success' && @@ -193,7 +199,7 @@ jobs: needs.check-changes.outputs.tests == 'true')) }} cmd: "pytest -m lcm" - dev-image: dev:${{ needs.check-changes.outputs.dev == 'true' && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} + dev-image: dev:${{ (needs.check-changes.outputs.python == 'true' || needs.check-changes.outputs.dev == 'true') && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} # Run module tests directly to avoid pytest forking issues # run-module-tests: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2d9b917f0e..a94839a505 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -40,6 +40,10 @@ jobs: runs-on: [self-hosted, Linux] container: image: ghcr.io/dimensionalos/${{ inputs.dev-image }} + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + ALIBABA_API_KEY: ${{ secrets.ALIBABA_API_KEY }} steps: - uses: actions/checkout@v4 From b6bf28c034899ddd7e7dced20bd35970d869940d Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 15:00:37 +0300 Subject: [PATCH 09/48] skillcontainer hosts skill execution --- dimos/protocol/skill/coordinator.py | 16 ++++++++-- dimos/protocol/skill/skill.py | 37 +++++++++++++++++------- dimos/protocol/skill/test_coordinator.py | 7 ++--- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index c948028e91..b711452e74 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -72,6 +72,7 @@ class SkillState(TimestampedCollection): name: str state: SkillStateEnum skill_config: SkillConfig + value: Optional[Any] = None def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] = None) -> None: super().__init__() @@ -104,11 +105,13 @@ def handle_msg(self, msg: SkillMsg) -> bool: if msg.type == MsgType.ret: self.state = SkillStateEnum.completed + self.value = msg.content if self.skill_config.ret == Return.call_agent: return True return False if msg.type == MsgType.error: + self.value = msg.content self.state = SkillStateEnum.error return True @@ -218,14 +221,14 @@ def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: """Execute a list of tool calls from the agent.""" for tool_call in tool_calls: logger.info(f"executing skill call {tool_call}") - self.call( + self.call_skill( tool_call.get("id"), tool_call.get("name"), tool_call.get("args"), ) # internal skill call - def call(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: + def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: skill_config = self.get_skill_config(skill_name) if not skill_config: logger.error( @@ -237,6 +240,7 @@ def call(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: self._skill_state[call_id] = SkillState( name=skill_name, skill_config=skill_config, call_id=call_id ) + return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) # Receives a message from active skill @@ -297,7 +301,13 @@ def generate_snapshot(self, clear: bool = True) -> SkillStateDict: logger.info(f"Skill {skill_run.name} (call_id={call_id}) finished") to_delete.append(call_id) if skill_run.state == SkillStateEnum.error: - logger.error(f"Skill run error for {skill_run.name} (call_id={call_id})") + error_msg = skill_run.value.get("msg", "Unknown error") + error_traceback = skill_run.value.get("traceback", "No traceback available") + + logger.error( + f"Skill error for {skill_run.name} (call_id={call_id}): {error_msg}" + ) + print(error_traceback) to_delete.append(call_id) for call_id in to_delete: diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 085a352327..a5230acc49 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -40,16 +40,7 @@ def wrapper(self, *args, **kwargs): del kwargs["call_id"] def run_function(): - self.skill_transport.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) - try: - val = f(self, *args, **kwargs) - self.skill_transport.publish( - SkillMsg(call_id, skill, val, type=MsgType.ret) - ) - except Exception as e: - self.skill_transport.publish( - SkillMsg(call_id, skill, str(e), type=MsgType.error) - ) + return self.call_skill(call_id, skill, args, kwargs) thread = threading.Thread(target=run_function) thread.start() @@ -94,6 +85,32 @@ class SkillContainer(Configurable[SkillContainerConfig]): def __str__(self) -> str: return f"SkillContainer({self.__class__.__name__})" + def call_skill( + self, call_id: str, skill_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] + ) -> None: + f = getattr(self, skill_name, None) + + if f is None: + raise ValueError(f"Skill '{skill_name}' not found in {self.__class__.__name__}") + + self.skill_transport.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) + try: + val = f(*args, **kwargs) + self.skill_transport.publish(SkillMsg(call_id, skill, val, type=MsgType.ret)) + except Exception as e: + import traceback + + formatted_traceback = "".join(traceback.TracebackException.from_exception(e).format()) + + self.skill_transport.publish( + SkillMsg( + call_id, + skill, + {"msg": str(e), "traceback": formatted_traceback}, + type=MsgType.error, + ) + ) + @rpc def skills(self) -> dict[str, SkillConfig]: # Avoid recursion by excluding this property itself diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index a75ea85e55..c58b506505 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -39,8 +39,7 @@ async def test_coordinator_generator(): skillCoordinator.register_skills(TestContainer()) skillCoordinator.start() - - skillCoordinator.call("test-call-0", "delayadd", {"args": [1, 2]}) + skillCoordinator.call_skill("test-call-0", "delayadd", {"args": [1, 2]}) time.sleep(0.1) @@ -55,12 +54,12 @@ async def test_coordinator_generator(): cnt += 1 if cnt < 5: - skillCoordinator.call( + skillCoordinator.call_skill( f"test-call-{cnt}-delay", "delayadd", {"args": [cnt, 2]}, ) - skillCoordinator.call( + skillCoordinator.call_skill( f"test-call-{cnt}", "add", {"args": [cnt, 2]}, From c37f61181e32e91b1ce8dbd23e6e686b66b19d54 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 15:10:53 +0300 Subject: [PATCH 10/48] better documentation --- dimos/protocol/skill/coordinator.py | 10 ++++++-- dimos/protocol/skill/skill.py | 40 +++++++++++++++++++++++++++-- dimos/protocol/skill/type.py | 5 ++-- 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index b711452e74..73dd8a79ab 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -65,8 +65,8 @@ def colored_name(self) -> Text: # TODO pending timeout, running timeout, etc. -# This object maintains the state of a skill run -# It is used to track the skill's progress, messages, and state +# +# This object maintains the state of a skill run on a caller end class SkillState(TimestampedCollection): call_id: str name: str @@ -152,6 +152,7 @@ def __str__(self) -> str: return capture.get() +# subclassed the dict just to have a better string representation class SkillStateDict(dict[str, SkillState]): """Custom dict for skill states with better string representation.""" @@ -168,6 +169,11 @@ def __str__(self) -> str: return "\n".join(lines) +# This class is responsible for managing the lifecycle of skills, +# handling skill calls, and coordinating communication between the agent and skills. +# +# It aggregates skills from static and dynamic containers, manages skill states, +# and decides when to notify the agent about updates. class SkillCoordinator(SkillContainer): default_config = SkillCoordinatorConfig empty: bool = True diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index a5230acc49..2a916ec080 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -29,6 +29,31 @@ Stream, ) +# skill is a decorator that allows us to specify a skill behaviour for a function. +# +# there are several parameters that can be specified: +# - ret: how to return the value from the skill, can be one of: +# +# Return.none: doesn't return anything to an agent +# Return.passive: doesn't schedule an agent call but +# returns the value to the agent when agent is called +# Return.call_agent: calls the agent with the value, scheduling an agent call +# +# - stream: if the skill streams values, it can behave in several ways: +# +# Stream.none: no streaming, skill doesn't emit any values +# Stream.passive: doesn't schedule an agent call upon emitting a value, +# returns the streamed value to the agent when agent is called +# Stream.call_agent: calls the agent with every value emitted, scheduling an agent call +# +# - reducer: defines an optional strategy for passive streams and how we collapse potential +# multiple values into something meaningful for the agent +# +# Reducer.none: no reduction, every emitted value is returned to the agent +# Reducer.latest: only the latest value is returned to the agent +# Reducer.average: assumes the skill emits a number, +# the average of all values is returned to the agent + def skill(reducer=Reducer.latest, stream=Stream.none, ret=Return.call_agent): def decorator(f: Callable[..., Any]) -> Any: @@ -74,8 +99,19 @@ class SkillContainerConfig: skill_transport: type[SkillCommsSpec] = LCMSkillComms -# here we can have also dynamic skills potentially -# agent can check .skills each time when introspecting +# Inherited by any class that wants to provide skills +# (This component works standalone but commonly used by DimOS modules) +# +# - It allows us to specify a communication layer for skills (LCM for now by default) +# - introspection of available skills via the `skills` RPC method +# - ability to provide dynamic context dependant skills with dynamic_skills flag +# for this you'll need to override the `skills` method to return a dynamic set of skills +# SkillCoordinator will call this method to get the skills available upon every request to +# the agent +# +# +# Hosts the function execution and handles correct publishing of skill messages +# according to the skill decorator configuration class SkillContainer(Configurable[SkillContainerConfig]): default_config = SkillContainerConfig _skill_transport: Optional[SkillCommsSpec] = None diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 0ed1c91ad3..f9c7e8f377 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -27,9 +27,8 @@ class Call(Enum): class Reducer(Enum): none = 0 - all = 1 - latest = 2 - average = 3 + latest = 1 + average = 2 class Stream(Enum): From 34da5511f9dfdb0628d07d51b75d3bee152152be Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 15:20:03 +0300 Subject: [PATCH 11/48] skillcoordinator handles threading --- dimos/protocol/skill/comms.py | 7 +++++-- dimos/protocol/skill/skill.py | 32 +++++++++++++++++++++++--------- dimos/protocol/skill/type.py | 2 ++ 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/dimos/protocol/skill/comms.py b/dimos/protocol/skill/comms.py index 7703eda3e1..67fa47f31c 100644 --- a/dimos/protocol/skill/comms.py +++ b/dimos/protocol/skill/comms.py @@ -22,8 +22,10 @@ from dimos.protocol.service import Service from dimos.protocol.skill.type import SkillMsg - # defines a protocol for communication between skills and agents +# it has simple requirements of pub/sub semantics capable of sending and receiving SkillMsg objects + + class SkillCommsSpec: @abstractmethod def publish(self, msg: SkillMsg) -> None: ... @@ -44,11 +46,12 @@ def stop(self) -> None: ... @dataclass class PubSubCommsConfig(Generic[TopicT, MsgT]): - topic: Optional[TopicT] = None # Required field but needs default for dataclass inheritance + topic: Optional[TopicT] = None pubsub: Union[type[PubSub[TopicT, MsgT]], PubSub[TopicT, MsgT], None] = None autostart: bool = True +# implementation of the SkillComms using any standard PubSub mechanism class PubSubComms(Service[PubSubCommsConfig], SkillCommsSpec): default_config: type[PubSubCommsConfig] = PubSubCommsConfig diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 2a916ec080..baa3c7afb8 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -64,12 +64,13 @@ def wrapper(self, *args, **kwargs): if call_id: del kwargs["call_id"] - def run_function(): - return self.call_skill(call_id, skill, args, kwargs) - - thread = threading.Thread(target=run_function) - thread.start() - return None + return self.call_skill(call_id, skill, args, kwargs) + # def run_function(): + # return self.call_skill(call_id, skill, args, kwargs) + # + # thread = threading.Thread(target=run_function) + # thread.start() + # return None return f(self, *args, **kwargs) @@ -99,9 +100,23 @@ class SkillContainerConfig: skill_transport: type[SkillCommsSpec] = LCMSkillComms +def threaded(f: Callable[..., Any]) -> Callable[..., None]: + """Decorator to run a function in a separate thread.""" + + def wrapper(self, *args, **kwargs): + thread = threading.Thread(target=f, args=(self, *args), kwargs=kwargs) + thread.start() + return None + + return wrapper + + # Inherited by any class that wants to provide skills # (This component works standalone but commonly used by DimOS modules) # +# Hosts the function execution and handles correct publishing of skill messages +# according to the individual skill decorator configuration +# # - It allows us to specify a communication layer for skills (LCM for now by default) # - introspection of available skills via the `skills` RPC method # - ability to provide dynamic context dependant skills with dynamic_skills flag @@ -109,9 +124,6 @@ class SkillContainerConfig: # SkillCoordinator will call this method to get the skills available upon every request to # the agent # -# -# Hosts the function execution and handles correct publishing of skill messages -# according to the skill decorator configuration class SkillContainer(Configurable[SkillContainerConfig]): default_config = SkillContainerConfig _skill_transport: Optional[SkillCommsSpec] = None @@ -121,6 +133,8 @@ class SkillContainer(Configurable[SkillContainerConfig]): def __str__(self) -> str: return f"SkillContainer({self.__class__.__name__})" + # same interface as coordinator call_skill + @threaded def call_skill( self, call_id: str, skill_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] ) -> None: diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index f9c7e8f377..bec3f7a3ab 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -19,6 +19,8 @@ from dimos.types.timestamped import Timestamped +# defines protocol messages used for communication between skills and agents + class Call(Enum): Implicit = 0 From c7a7446d4ef7176bd49942264ee406c0469bd773 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 15:27:06 +0300 Subject: [PATCH 12/48] streaming skill sketch, async skill sketch --- dimos/protocol/skill/skill.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index baa3c7afb8..b496c934be 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import threading from dataclasses import dataclass from typing import Any, Callable, Optional @@ -87,7 +88,7 @@ def wrapper(self, *args, **kwargs): # implicit RPC call as well wrapper.__rpc__ = True # type: ignore[attr-defined] - wrapper._skill = skill_config # type: ignore[attr-defined] + wrapper._skill_config = skill_config # type: ignore[attr-defined] wrapper.__name__ = f.__name__ # Preserve original function name wrapper.__doc__ = f.__doc__ # Preserve original docstring return wrapper @@ -133,7 +134,8 @@ class SkillContainer(Configurable[SkillContainerConfig]): def __str__(self) -> str: return f"SkillContainer({self.__class__.__name__})" - # same interface as coordinator call_skill + # TODO: figure out standard args/kwargs passing format, + # use same interface as skill coordinator call_skill method @threaded def call_skill( self, call_id: str, skill_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] @@ -141,11 +143,27 @@ def call_skill( f = getattr(self, skill_name, None) if f is None: - raise ValueError(f"Skill '{skill_name}' not found in {self.__class__.__name__}") + raise ValueError(f"Function '{skill_name}' not found in {self.__class__.__name__}") + config = getattr(f, "_skill_config", None) + if config is None: + raise ValueError(f"Function '{skill_name}' in {self.__class__.__name__} is not a skill") + + # we notify the skill transport about the start of the skill call self.skill_transport.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) + try: val = f(*args, **kwargs) + + # check if the skill returned a coroutine, if it is, block until it resolves + if isinstance(val, asyncio.Future): + val = asyncio.run(val) + + # check if the skill is a generator, if it is, we need to iterate over it + if hasattr(val, "__iter__") and not isinstance(val, str): + for v in val: + self.skill_transport.publish(SkillMsg(call_id, skill, v, type=MsgType.stream)) + self.skill_transport.publish(SkillMsg(call_id, skill, val, type=MsgType.ret)) except Exception as e: import traceback @@ -165,11 +183,11 @@ def call_skill( def skills(self) -> dict[str, SkillConfig]: # Avoid recursion by excluding this property itself return { - name: getattr(self, name)._skill + name: getattr(self, name)._skill_config for name in dir(self) if not name.startswith("_") and name != "skills" - and hasattr(getattr(self, name), "_skill") + and hasattr(getattr(self, name), "_skill_config") } @property From c29e88758bac0f7f65f735443b17e9f7fd3317a1 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 16:04:02 +0300 Subject: [PATCH 13/48] initial streaming implemented --- dimos/protocol/skill/coordinator.py | 11 +++++-- dimos/protocol/skill/skill.py | 13 ++++++-- dimos/protocol/skill/test_coordinator.py | 39 +++++++++++++++--------- dimos/protocol/skill/testing_utils.py | 10 ++++++ 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 73dd8a79ab..2c9854ddbc 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -85,8 +85,13 @@ def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] self.call_id = call_id self.name = name + @property + def messages(self) -> List[SkillMsg]: + return self._items + def agent_encode(self) -> ToolMessage: - last_msg = self._items[-1] + # here we need to process streamed messages depending on the reducer + last_msg = self.messages[-1] return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) # returns True if the agent should be called for this message @@ -137,7 +142,7 @@ def __str__(self) -> str: parts.append(Text(f"{self.duration():.2f}s")) if len(self): - parts.append(Text(f", last_msg={self._items[-1]})")) + parts.append(Text(f", last_msg={self.messages[-1]})")) else: parts.append(Text(", No Messages)")) @@ -254,7 +259,7 @@ def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> Non # # Checks if agent needs to be notified (if ToolConfig has Return=call_agent or Stream=call_agent) def handle_message(self, msg: SkillMsg) -> None: - logger.info(f"{msg.skill_name}, {msg.call_id} - {msg}") + logger.info(f"SkillMsg from {msg.skill_name}, {msg.call_id} - {msg}") if self._skill_state.get(msg.call_id) is None: logger.warn( diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index b496c934be..ac6280b97f 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -161,10 +161,19 @@ def call_skill( # check if the skill is a generator, if it is, we need to iterate over it if hasattr(val, "__iter__") and not isinstance(val, str): + last_value = None for v in val: - self.skill_transport.publish(SkillMsg(call_id, skill, v, type=MsgType.stream)) + last_value = v + self.skill_transport.publish( + SkillMsg(call_id, skill_name, v, type=MsgType.stream) + ) + self.skill_transport.publish( + SkillMsg(call_id, skill_name, last_value, type=MsgType.ret) + ) + + else: + self.skill_transport.publish(SkillMsg(call_id, skill_name, val, type=MsgType.ret)) - self.skill_transport.publish(SkillMsg(call_id, skill, val, type=MsgType.ret)) except Exception as e: import traceback diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index c58b506505..5ca8e109ab 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -18,23 +18,11 @@ import pytest from dimos.protocol.skill.coordinator import SkillCoordinator -from dimos.protocol.skill.skill import SkillContainer, skill from dimos.protocol.skill.testing_utils import TestContainer -class TestContainer2(SkillContainer): - @skill() - def add(self, x: int, y: int) -> int: - return x + y - - @skill() - def delayadd(self, x: int, y: int) -> int: - time.sleep(0.5) - return x + y - - @pytest.mark.asyncio -async def test_coordinator_generator(): +async def test_coordinator_parallel_calls(): skillCoordinator = SkillCoordinator() skillCoordinator.register_skills(TestContainer()) @@ -67,4 +55,27 @@ async def test_coordinator_generator(): time.sleep(0.1 * cnt) - print("All updates processed successfully.") + +@pytest.mark.asyncio +async def test_coordinator_generator(): + skillCoordinator = SkillCoordinator() + skillCoordinator.register_skills(TestContainer()) + + skillCoordinator.start() + skillCoordinator.call_skill("test-call-0", "counter", {"args": [10]}) + + skillstate = None + while await skillCoordinator.wait_for_updates(1): + skillstate = skillCoordinator.generate_snapshot(clear=True) + print("Skill State:", skillstate) + print("Agent update:", skillstate["test-call-0"].agent_encode()) + # we simulate agent thinking + await asyncio.sleep(0.25) + + print("Skill lifecycle finished") + print( + "All messages:" + + "".join( + map(lambda x: f"\n {x}", skillstate["test-call-0"].messages), + ), + ) diff --git a/dimos/protocol/skill/testing_utils.py b/dimos/protocol/skill/testing_utils.py index fda4c27591..caf1f54f2b 100644 --- a/dimos/protocol/skill/testing_utils.py +++ b/dimos/protocol/skill/testing_utils.py @@ -13,8 +13,10 @@ # limitations under the License. import time +from typing import Generator, Optional from dimos.protocol.skill.skill import SkillContainer, skill +from dimos.protocol.skill.type import Reducer, Return, Stream class TestContainer(SkillContainer): @@ -26,3 +28,11 @@ def add(self, x: int, y: int) -> int: def delayadd(self, x: int, y: int) -> int: time.sleep(0.3) return x + y + + @skill(stream=Stream.call_agent) + def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, None, None]: + """Counts from 1 to count_to, with an optional delay between counts.""" + for i in range(1, count_to + 1): + if delay > 0: + time.sleep(delay) + yield i From b45b83cc2ba279726eadc7a4776837c438ce5f0b Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 17:19:21 +0300 Subject: [PATCH 14/48] type fixes, work on reducers --- dimos/protocol/skill/coordinator.py | 2 ++ dimos/protocol/skill/type.py | 30 ++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 2c9854ddbc..42f688bd6e 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -91,6 +91,8 @@ def messages(self) -> List[SkillMsg]: def agent_encode(self) -> ToolMessage: # here we need to process streamed messages depending on the reducer + # we also want to reduce the messages we are storing so that long running streams + # don't fill up the memory last_msg = self.messages[-1] return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index bec3f7a3ab..f210e8dc75 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -15,11 +15,11 @@ import time from dataclasses import dataclass from enum import Enum -from typing import Any, Callable +from typing import Any, Callable, Generic, Literal, Optional, TypeVar from dimos.types.timestamped import Timestamped -# defines protocol messages used for communication between skills and agents +# This file defines protocol messages used for communication between skills and agents class Call(Enum): @@ -97,13 +97,17 @@ class MsgType(Enum): pending = 0 start = 1 stream = 2 - ret = 3 - error = 4 + reduced = 3 + ret = 4 + error = 5 -class SkillMsg(Timestamped): +M = TypeVar("M", bound="MsgType") + + +class SkillMsg(Timestamped, Generic[M]): ts: float - type: MsgType + type: M call_id: str skill_name: str content: str | int | float | dict | list @@ -113,7 +117,7 @@ def __init__( call_id: str, skill_name: str, content: str | int | float | dict | list, - type: MsgType = MsgType.ret, + type: M, ) -> None: self.ts = time.time() self.call_id = call_id @@ -145,3 +149,15 @@ def __str__(self): return f"Pending({time_ago:.1f}s ago)" if self.type == MsgType.stream: return f"Stream({time_ago:.1f}s ago, val={self.content})" + + +# Reducers take stream messages, combine them and return a reduced message. +type ReducerFunction = Callable[ + [ + list[ + SkillMsg[Literal[MsgType.Stream]], + Optional[SkillMsg[Literal[MsgType.Reduced]]], + ], + SkillMsg[Literal[MsgType.Reduced]], + ] +] From 5c1cbfb2778ab7f40ed3a6f8de49acdeefa12fd6 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 17:21:07 +0300 Subject: [PATCH 15/48] skill state bugfix --- dimos/protocol/skill/coordinator.py | 2 +- dimos/protocol/skill/skill.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 42f688bd6e..60d0fcff41 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -251,7 +251,7 @@ def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> Non # This initializes the skill state if it doesn't exist self._skill_state[call_id] = SkillState( - name=skill_name, skill_config=skill_config, call_id=call_id + call_id=call_id, name=skill_name, skill_config=skill_config ) return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index ac6280b97f..9336f35d82 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -150,7 +150,7 @@ def call_skill( raise ValueError(f"Function '{skill_name}' in {self.__class__.__name__} is not a skill") # we notify the skill transport about the start of the skill call - self.skill_transport.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) + self.skill_transport.publish(SkillMsg(call_id, skill_name, None, type=MsgType.start)) try: val = f(*args, **kwargs) @@ -182,7 +182,7 @@ def call_skill( self.skill_transport.publish( SkillMsg( call_id, - skill, + skill_name, {"msg": str(e), "traceback": formatted_traceback}, type=MsgType.error, ) From 07b153a8608dccf34f13202c557e3a5d79e95b62 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 18:48:47 +0300 Subject: [PATCH 16/48] test bugfix --- dimos/protocol/skill/test_coordinator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 5ca8e109ab..6c924ece5d 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -62,13 +62,13 @@ async def test_coordinator_generator(): skillCoordinator.register_skills(TestContainer()) skillCoordinator.start() - skillCoordinator.call_skill("test-call-0", "counter", {"args": [10]}) + skillCoordinator.call_skill("test-gen-0", "counter", {"args": [10]}) skillstate = None while await skillCoordinator.wait_for_updates(1): skillstate = skillCoordinator.generate_snapshot(clear=True) print("Skill State:", skillstate) - print("Agent update:", skillstate["test-call-0"].agent_encode()) + print("Agent update:", skillstate["test-gen-0"].agent_encode()) # we simulate agent thinking await asyncio.sleep(0.25) @@ -76,6 +76,6 @@ async def test_coordinator_generator(): print( "All messages:" + "".join( - map(lambda x: f"\n {x}", skillstate["test-call-0"].messages), + map(lambda x: f"\n {x}", skillstate["test-gen-0"].messages), ), ) From b1f5d9d38c7fc12cc1c01659938e0848a887dd3f Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 14:46:03 +0300 Subject: [PATCH 17/48] reducer rewrite starting --- dimos/protocol/skill/test_coordinator.py | 39 ++++++++++++++++++++++-- dimos/protocol/skill/testing_utils.py | 38 ----------------------- dimos/protocol/skill/type.py | 5 --- 3 files changed, 37 insertions(+), 45 deletions(-) delete mode 100644 dimos/protocol/skill/testing_utils.py diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 6c924ece5d..11bea14e33 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -13,12 +13,42 @@ # limitations under the License. import asyncio import time -from pprint import pprint +from typing import Generator, Optional import pytest from dimos.protocol.skill.coordinator import SkillCoordinator -from dimos.protocol.skill.testing_utils import TestContainer +from dimos.protocol.skill.skill import SkillContainer, skill +from dimos.protocol.skill.type import Reducer, Return, Stream + + +class TestContainer(SkillContainer): + @skill() + def add(self, x: int, y: int) -> int: + return x + y + + @skill() + def delayadd(self, x: int, y: int) -> int: + time.sleep(0.3) + return x + y + + @skill(stream=Stream.call_agent) + def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, None, None]: + """Counts from 1 to count_to, with an optional delay between counts.""" + for i in range(1, count_to + 1): + if delay > 0: + time.sleep(delay) + yield i + + @skill(stream=Stream.passive) + def counter_passive( + self, count_to: int, delay: Optional[float] = 0.1 + ) -> Generator[int, None, None]: + """Counts from 1 to count_to, with an optional delay between counts.""" + for i in range(1, count_to + 1): + if delay > 0: + time.sleep(delay) + yield i @pytest.mark.asyncio @@ -62,11 +92,16 @@ async def test_coordinator_generator(): skillCoordinator.register_skills(TestContainer()) skillCoordinator.start() + + # here we call a skill that generates a sequence of messages skillCoordinator.call_skill("test-gen-0", "counter", {"args": [10]}) skillstate = None + # periodically agent is stopping it's thinking cycle and asks for updates while await skillCoordinator.wait_for_updates(1): skillstate = skillCoordinator.generate_snapshot(clear=True) + + # reducer is generating a summary print("Skill State:", skillstate) print("Agent update:", skillstate["test-gen-0"].agent_encode()) # we simulate agent thinking diff --git a/dimos/protocol/skill/testing_utils.py b/dimos/protocol/skill/testing_utils.py deleted file mode 100644 index caf1f54f2b..0000000000 --- a/dimos/protocol/skill/testing_utils.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -from typing import Generator, Optional - -from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Return, Stream - - -class TestContainer(SkillContainer): - @skill() - def add(self, x: int, y: int) -> int: - return x + y - - @skill() - def delayadd(self, x: int, y: int) -> int: - time.sleep(0.3) - return x + y - - @skill(stream=Stream.call_agent) - def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, None, None]: - """Counts from 1 to count_to, with an optional delay between counts.""" - for i in range(1, count_to + 1): - if delay > 0: - time.sleep(delay) - yield i diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index f210e8dc75..79973245c8 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -22,11 +22,6 @@ # This file defines protocol messages used for communication between skills and agents -class Call(Enum): - Implicit = 0 - Explicit = 1 - - class Reducer(Enum): none = 0 latest = 1 From 5cc4ebfeab1ff2105693964ff41ebbae81ad5271 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 15:30:50 +0300 Subject: [PATCH 18/48] new reducer structure implemented --- dimos/protocol/skill/coordinator.py | 2 +- dimos/protocol/skill/skill.py | 2 +- dimos/protocol/skill/test_coordinator.py | 2 +- dimos/protocol/skill/type.py | 39 +++++++----------------- 4 files changed, 14 insertions(+), 31 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 60d0fcff41..9f6e43a561 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -78,7 +78,7 @@ def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] super().__init__() self.skill_config = skill_config or SkillConfig( - name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.none, schema={} + name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.all, schema={} ) self.state = SkillStateEnum.pending diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 9336f35d82..81e1be469f 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -20,10 +20,10 @@ from dimos.core import rpc from dimos.protocol.service import Configurable from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec +from dimos.protocol.skill.reducer import Reducer from dimos.protocol.skill.schema import function_to_schema from dimos.protocol.skill.type import ( MsgType, - Reducer, Return, SkillConfig, SkillMsg, diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 11bea14e33..614bafe395 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -32,7 +32,7 @@ def delayadd(self, x: int, y: int) -> int: time.sleep(0.3) return x + y - @skill(stream=Stream.call_agent) + @skill(stream=Stream.call_agent, reducer=Reducer.all) def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, None, None]: """Counts from 1 to count_to, with an optional delay between counts.""" for i in range(1, count_to + 1): diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 79973245c8..df0baf4332 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -15,19 +15,14 @@ import time from dataclasses import dataclass from enum import Enum -from typing import Any, Callable, Generic, Literal, Optional, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Optional, TypeVar +from dimos.protocol.skill.reducer import Reducer from dimos.types.timestamped import Timestamped # This file defines protocol messages used for communication between skills and agents -class Reducer(Enum): - none = 0 - latest = 1 - average = 2 - - class Stream(Enum): # no streaming none = 0 @@ -73,14 +68,14 @@ def __str__(self): # Only show reducer if stream is not none (streaming is happening) if self.stream != Stream.none: - reducer_name = "unknown" - if self.reducer == Reducer.latest: - reducer_name = "latest" - elif self.reducer == Reducer.all: - reducer_name = "all" - elif self.reducer == Reducer.average: - reducer_name = "average" - parts.append(f"reducer={reducer_name}") + # reducer_name = "unknown" + # if self.reducer == Reducer.latest: + # reducer_name = "latest" + # elif self.reducer == Reducer.all: + # reducer_name = "all" + # elif self.reducer == Reducer.average: + # reducer_name = "average" + # parts.append(f"reducer={reducer_name}") parts.append(f"stream={self.stream.name}") # Always show return mode @@ -92,7 +87,7 @@ class MsgType(Enum): pending = 0 start = 1 stream = 2 - reduced = 3 + reduced_stream = 3 ret = 4 error = 5 @@ -144,15 +139,3 @@ def __str__(self): return f"Pending({time_ago:.1f}s ago)" if self.type == MsgType.stream: return f"Stream({time_ago:.1f}s ago, val={self.content})" - - -# Reducers take stream messages, combine them and return a reduced message. -type ReducerFunction = Callable[ - [ - list[ - SkillMsg[Literal[MsgType.Stream]], - Optional[SkillMsg[Literal[MsgType.Reduced]]], - ], - SkillMsg[Literal[MsgType.Reduced]], - ] -] From 6359e5c18c43123a884a35ce650a012442aa5174 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 15:56:34 +0300 Subject: [PATCH 19/48] reducer restructure checkpoint, tests passing --- dimos/protocol/skill/coordinator.py | 54 +++++++++++++++-------- dimos/protocol/skill/test_coordinator.py | 8 +--- dimos/protocol/skill/type.py | 56 +++++++++++++++++++++++- 3 files changed, 91 insertions(+), 27 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 9f6e43a561..3bd27442b7 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -13,10 +13,11 @@ # limitations under the License. import asyncio +import time from copy import copy from dataclasses import dataclass from enum import Enum -from typing import Any, List, Optional +from typing import Any, List, Literal, Optional from langchain_core.messages import ( AIMessage, @@ -67,12 +68,19 @@ def colored_name(self) -> Text: # TODO pending timeout, running timeout, etc. # # This object maintains the state of a skill run on a caller end -class SkillState(TimestampedCollection): +class SkillState: call_id: str name: str state: SkillStateEnum skill_config: SkillConfig - value: Optional[Any] = None + + msg_count: int = 0 + + start_msg: SkillMsg[Literal[MsgType.start]] = None + end_msg: SkillMsg[Literal[MsgType.ret]] = None + error_msg: SkillMsg[Literal[MsgType.error]] = None + ret_msg: SkillMsg[Literal[MsgType.ret]] = None + reduced_stream_msg: List[SkillMsg[Literal[MsgType.reduced_stream]]] = None def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] = None) -> None: super().__init__() @@ -85,22 +93,26 @@ def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] self.call_id = call_id self.name = name - @property - def messages(self) -> List[SkillMsg]: - return self._items + def duration(self) -> float: + """Calculate the duration of the skill run.""" + if self.start_msg and self.end_msg: + return self.end_msg.ts - self.start_msg.ts + elif self.start_msg: + return time.time() - self.start_msg.ts + else: + return 0.0 def agent_encode(self) -> ToolMessage: - # here we need to process streamed messages depending on the reducer - # we also want to reduce the messages we are storing so that long running streams - # don't fill up the memory - last_msg = self.messages[-1] - return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) + # last_msg = self.messages[-1] + # return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) + return ToolMessage("something smart", name=self.name, tool_call_id=self.call_id) # returns True if the agent should be called for this message def handle_msg(self, msg: SkillMsg) -> bool: - self.add(msg) - + self.msg_count += 1 if msg.type == MsgType.stream: + self.reduced_stream_msg = self.skill_config.reducer(self.reduced_stream_msg, msg) + if ( self.skill_config.stream == Stream.none or self.skill_config.stream == Stream.passive @@ -112,22 +124,26 @@ def handle_msg(self, msg: SkillMsg) -> bool: if msg.type == MsgType.ret: self.state = SkillStateEnum.completed - self.value = msg.content + self.ret_msg = msg if self.skill_config.ret == Return.call_agent: return True return False if msg.type == MsgType.error: - self.value = msg.content self.state = SkillStateEnum.error + self.error_msg = msg return True if msg.type == MsgType.start: self.state = SkillStateEnum.running + self.start_msg = msg return False return False + def __len__(self) -> int: + return self.msg_count + def __str__(self) -> str: # For standard string representation, we'll use rich's Console to render the colored text console = Console(force_terminal=True, legacy_windows=False) @@ -144,7 +160,7 @@ def __str__(self) -> str: parts.append(Text(f"{self.duration():.2f}s")) if len(self): - parts.append(Text(f", last_msg={self.messages[-1]})")) + parts.append(Text(f", msg_count={self.msg_count})")) else: parts.append(Text(", No Messages)")) @@ -314,8 +330,10 @@ def generate_snapshot(self, clear: bool = True) -> SkillStateDict: logger.info(f"Skill {skill_run.name} (call_id={call_id}) finished") to_delete.append(call_id) if skill_run.state == SkillStateEnum.error: - error_msg = skill_run.value.get("msg", "Unknown error") - error_traceback = skill_run.value.get("traceback", "No traceback available") + error_msg = skill_run.error_msg.content.get("msg", "Unknown error") + error_traceback = skill_run.error_msg.content.get( + "traceback", "No traceback available" + ) logger.error( f"Skill error for {skill_run.name} (call_id={call_id}): {error_msg}" diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 614bafe395..33a35e1602 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -40,7 +40,7 @@ def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, time.sleep(delay) yield i - @skill(stream=Stream.passive) + @skill(stream=Stream.passive, reducer=Reducer.sum) def counter_passive( self, count_to: int, delay: Optional[float] = 0.1 ) -> Generator[int, None, None]: @@ -108,9 +108,3 @@ async def test_coordinator_generator(): await asyncio.sleep(0.25) print("Skill lifecycle finished") - print( - "All messages:" - + "".join( - map(lambda x: f"\n {x}", skillstate["test-gen-0"].messages), - ), - ) diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index df0baf4332..e9f5f64696 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import time from dataclasses import dataclass from enum import Enum from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Optional, TypeVar -from dimos.protocol.skill.reducer import Reducer from dimos.types.timestamped import Timestamped # This file defines protocol messages used for communication between skills and agents @@ -44,7 +44,7 @@ class Return(Enum): @dataclass class SkillConfig: name: str - reducer: Reducer + reducer: "ReducerF" stream: Stream ret: Return schema: dict[str, Any] @@ -139,3 +139,55 @@ def __str__(self): return f"Pending({time_ago:.1f}s ago)" if self.type == MsgType.stream: return f"Stream({time_ago:.1f}s ago, val={self.content})" + + +# typing looks complex but it's a standard reducer function signature, using SkillMsgs +# (Optional[accumulator], msg) -> accumulator +type ReducerF = Callable[ + [Optional[SkillMsg[Literal[MsgType.reduced_stream]]], SkillMsg[Literal[MsgType.stream]]], + SkillMsg[Literal[MsgType.reduced_stream]], +] + + +C = TypeVar("C") # content type +A = TypeVar("A") # accumulator type +# define a naive reducer function type that's generic in terms of the accumulator type +type SimpleReducerF[A, C] = Callable[[Optional[A], C], A] + + +def make_reducer(simple_reducer: SimpleReducerF) -> ReducerF: + """ + Converts a naive reducer function into a standard reducer function. + The naive reducer function should accept an accumulator and a message, + and return the updated accumulator. + """ + + def reducer( + accumulator: Optional[SkillMsg[Literal[MsgType.reduced_stream]]], + msg: SkillMsg[Literal[MsgType.stream]], + ) -> SkillMsg[Literal[MsgType.reduced_stream]]: + # Extract the content from the accumulator if it exists + acc_value = accumulator.content if accumulator else None + + # Apply the simple reducer to get the new accumulated value + new_value = simple_reducer(acc_value, msg.content) + + # Wrap the result in a SkillMsg with reduced_stream type + return SkillMsg( + call_id=msg.call_id, + skill_name=msg.skill_name, + content=new_value, + type=MsgType.reduced_stream, + ) + + return reducer + + +class Reducer: + sum = staticmethod(make_reducer(lambda x, y: x + y if x else y)) + latest = staticmethod(make_reducer(lambda x, y: y)) + all = staticmethod(make_reducer(lambda x, y: x + [y] if x else [y])) + + +# Create singleton instance +Reducer = Reducer() From c665a44e67c1674a756ca65152a02af37c1f7a5d Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 16:14:50 +0300 Subject: [PATCH 20/48] finished reducing --- dimos/protocol/skill/coordinator.py | 29 +++++++++++++++++++++--- dimos/protocol/skill/test_coordinator.py | 17 +++++++------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 3bd27442b7..b955d9a4b4 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -103,9 +103,32 @@ def duration(self) -> float: return 0.0 def agent_encode(self) -> ToolMessage: - # last_msg = self.messages[-1] - # return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) - return ToolMessage("something smart", name=self.name, tool_call_id=self.call_id) + agent_data = {"state": self.state.name, "ran_for": f"{round(self.duration())} seconds"} + + if self.state == SkillStateEnum.running: + if self.reduced_stream_msg: + agent_data["stream_data"] = self.reduced_stream_msg.content + + if self.state == SkillStateEnum.completed: + if self.reduced_stream_msg: + agent_data["return_value"] = self.reduced_stream_msg.content + else: + agent_data["return_value"] = self.ret_msg.content + + if self.state == SkillStateEnum.error: + agent_data["return_value"] = self.error_msg.content + if self.reduced_stream_msg: + agent_data["stream_data"] = self.reduced_stream_msg.content + + if self.error_msg: + if self.reduced_stream_msg: + agent_data["stream_data"] = self.reduced_stream_msg.content + agent_data["error"] = { + "msg": self.error_msg.content.get("msg", "Unknown error"), + "traceback": self.error_msg.content.get("traceback", "No traceback available"), + } + + return ToolMessage(agent_data, name=self.name, tool_call_id=self.call_id) # returns True if the agent should be called for this message def handle_msg(self, msg: SkillMsg) -> bool: diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 33a35e1602..543e4745c7 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -33,7 +33,7 @@ def delayadd(self, x: int, y: int) -> int: return x + y @skill(stream=Stream.call_agent, reducer=Reducer.all) - def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, None, None]: + def counter(self, count_to: int, delay: Optional[float] = 0.05) -> Generator[int, None, None]: """Counts from 1 to count_to, with an optional delay between counts.""" for i in range(1, count_to + 1): if delay > 0: @@ -41,8 +41,8 @@ def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, yield i @skill(stream=Stream.passive, reducer=Reducer.sum) - def counter_passive( - self, count_to: int, delay: Optional[float] = 0.1 + def counter_passive_sum( + self, count_to: int, delay: Optional[float] = 0.05 ) -> Generator[int, None, None]: """Counts from 1 to count_to, with an optional delay between counts.""" for i in range(1, count_to + 1): @@ -95,16 +95,17 @@ async def test_coordinator_generator(): # here we call a skill that generates a sequence of messages skillCoordinator.call_skill("test-gen-0", "counter", {"args": [10]}) + skillCoordinator.call_skill("test-gen-1", "counter_passive_sum", {"args": [5]}) skillstate = None # periodically agent is stopping it's thinking cycle and asks for updates - while await skillCoordinator.wait_for_updates(1): - skillstate = skillCoordinator.generate_snapshot(clear=True) + while await skillCoordinator.wait_for_updates(2): + print(skillCoordinator) # reducer is generating a summary - print("Skill State:", skillstate) - print("Agent update:", skillstate["test-gen-0"].agent_encode()) + skillstate = skillCoordinator.generate_snapshot(clear=True) + print("Agent update:", skillstate) # we simulate agent thinking - await asyncio.sleep(0.25) + await asyncio.sleep(0.125) print("Skill lifecycle finished") From c6479607d0971df9134b7a0f20992e00102867ab Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 16:22:32 +0300 Subject: [PATCH 21/48] __str__ for coordinator and skill state --- dimos/protocol/skill/coordinator.py | 78 +++++++++++++----------- dimos/protocol/skill/test_coordinator.py | 7 +-- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index b955d9a4b4..23cb14ff97 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -202,17 +202,47 @@ def __str__(self) -> str: class SkillStateDict(dict[str, SkillState]): """Custom dict for skill states with better string representation.""" - def __str__(self) -> str: - if not self: - return "SkillStates empty" - - lines = [] + def table(self) -> Table: + # Add skill states section + states_table = Table(show_header=True) + states_table.add_column("Call ID", style="dim", width=12) + states_table.add_column("Skill", style="white") + states_table.add_column("State", style="white") + states_table.add_column("Duration", style="yellow") + states_table.add_column("Messages", style="dim") for call_id, skill_state in self.items(): - # Use the SkillState's own __str__ method for individual items - lines.append(f"{skill_state}") + # Get colored state name + state_text = skill_state.state.colored_name() + + # Duration formatting + if ( + skill_state.state == SkillStateEnum.completed + or skill_state.state == SkillStateEnum.error + ): + duration = f"{skill_state.duration():.2f}s" + else: + duration = f"{skill_state.duration():.2f}s..." + + # Messages info + msg_count = str(len(skill_state)) + + states_table.add_row( + call_id[:8] + "...", skill_state.name, state_text, duration, msg_count + ) + + if not self: + states_table.add_row("", "[dim]No active skills[/dim]", "", "", "") + return states_table + + def __str__(self): + console = Console(force_terminal=True, legacy_windows=False) - return "\n".join(lines) + # Render to string with title above + with console.capture() as capture: + console.print(Text(" SkillState", style="bold blue")) + console.print(self.table()) + return capture.get().strip() # This class is responsible for managing the lifecycle of skills, @@ -393,35 +423,9 @@ def __str__(self): containers_table.add_row("", "[dim]No containers registered[/dim]") # Add skill states section - states_table = Table(show_header=True, show_edge=False, box=None) - states_table.add_column("Call ID", style="dim", width=12) - states_table.add_column("Skill", style="white") - states_table.add_column("State", style="white") - states_table.add_column("Duration", style="yellow") - states_table.add_column("Messages", style="dim") - - for call_id, skill_state in self._skill_state.items(): - # Get colored state name - state_text = skill_state.state.colored_name() - - # Duration formatting - if ( - skill_state.state == SkillStateEnum.completed - or skill_state.state == SkillStateEnum.error - ): - duration = f"{skill_state.duration():.2f}s" - else: - duration = f"{skill_state.duration():.2f}s..." - - # Messages info - msg_count = str(len(skill_state)) - - states_table.add_row( - call_id[:8] + "...", skill_state.name, state_text, duration, msg_count - ) - - if not self._skill_state: - states_table.add_row("", "[dim]No active skills[/dim]", "", "", "") + states_table = self._skill_state.table() + states_table.show_edge = False + states_table.box = None # Combine into main table table.add_column("Section", style="bold") diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 543e4745c7..b0fdc20c84 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -97,15 +97,10 @@ async def test_coordinator_generator(): skillCoordinator.call_skill("test-gen-0", "counter", {"args": [10]}) skillCoordinator.call_skill("test-gen-1", "counter_passive_sum", {"args": [5]}) - skillstate = None # periodically agent is stopping it's thinking cycle and asks for updates while await skillCoordinator.wait_for_updates(2): print(skillCoordinator) - - # reducer is generating a summary - skillstate = skillCoordinator.generate_snapshot(clear=True) - print("Agent update:", skillstate) - # we simulate agent thinking + agent_update = skillCoordinator.generate_snapshot(clear=True) await asyncio.sleep(0.125) print("Skill lifecycle finished") From 547e7d3a5d91c0a427ff4912760c5f41d9a13dd8 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 16:28:26 +0300 Subject: [PATCH 22/48] cleanup --- dimos/protocol/skill/coordinator.py | 4 ++++ dimos/protocol/skill/skill.py | 9 +-------- dimos/protocol/skill/test_coordinator.py | 4 ++-- dimos/protocol/skill/type.py | 19 ++++--------------- 4 files changed, 11 insertions(+), 25 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 23cb14ff97..82ae90f133 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -202,6 +202,10 @@ def __str__(self) -> str: class SkillStateDict(dict[str, SkillState]): """Custom dict for skill states with better string representation.""" + def agent_encode(self) -> list[ToolMessage]: + """Encode all skill states into a list of ToolMessages for the agent.""" + return [skill_state.agent_encode() for skill_state in self.values()] + def table(self) -> Table: # Add skill states section states_table = Table(show_header=True) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 81e1be469f..44963d326b 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -20,15 +20,8 @@ from dimos.core import rpc from dimos.protocol.service import Configurable from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec -from dimos.protocol.skill.reducer import Reducer from dimos.protocol.skill.schema import function_to_schema -from dimos.protocol.skill.type import ( - MsgType, - Return, - SkillConfig, - SkillMsg, - Stream, -) +from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillConfig, SkillMsg, Stream # skill is a decorator that allows us to specify a skill behaviour for a function. # diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index b0fdc20c84..3b37d9ffe9 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -19,7 +19,7 @@ from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Return, Stream +from dimos.protocol.skill.type import Reducer, Stream class TestContainer(SkillContainer): @@ -90,7 +90,6 @@ async def test_coordinator_parallel_calls(): async def test_coordinator_generator(): skillCoordinator = SkillCoordinator() skillCoordinator.register_skills(TestContainer()) - skillCoordinator.start() # here we call a skill that generates a sequence of messages @@ -101,6 +100,7 @@ async def test_coordinator_generator(): while await skillCoordinator.wait_for_updates(2): print(skillCoordinator) agent_update = skillCoordinator.generate_snapshot(clear=True) + print(agent_update.agent_encode()) await asyncio.sleep(0.125) print("Skill lifecycle finished") diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index e9f5f64696..acf3028848 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -68,14 +68,6 @@ def __str__(self): # Only show reducer if stream is not none (streaming is happening) if self.stream != Stream.none: - # reducer_name = "unknown" - # if self.reducer == Reducer.latest: - # reducer_name = "latest" - # elif self.reducer == Reducer.all: - # reducer_name = "all" - # elif self.reducer == Reducer.average: - # reducer_name = "average" - # parts.append(f"reducer={reducer_name}") parts.append(f"stream={self.stream.name}") # Always show return mode @@ -183,11 +175,8 @@ def reducer( return reducer +# just a convinience class to hold reducer functions class Reducer: - sum = staticmethod(make_reducer(lambda x, y: x + y if x else y)) - latest = staticmethod(make_reducer(lambda x, y: y)) - all = staticmethod(make_reducer(lambda x, y: x + [y] if x else [y])) - - -# Create singleton instance -Reducer = Reducer() + sum = make_reducer(lambda x, y: x + y if x else y) + latest = make_reducer(lambda x, y: y) + all = make_reducer(lambda x, y: x + [y] if x else [y]) From a05d1c01b6b7c8d329b3284a45017c3fe6cb53de Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 16:38:52 +0300 Subject: [PATCH 23/48] passive skills tests --- dimos/agents2/agent.py | 26 ++++++++++++++++++++++-- dimos/agents2/test_agent.py | 17 +++------------- dimos/protocol/skill/coordinator.py | 11 ---------- dimos/protocol/skill/test_coordinator.py | 10 +++++++++ 4 files changed, 37 insertions(+), 27 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index eb575cce3b..e110143b39 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -14,7 +14,7 @@ import asyncio from pprint import pprint -from typing import Optional +from typing import List, Optional from langchain.chat_models import init_chat_model from langchain_core.language_models.chat_models import BaseChatModel @@ -37,6 +37,8 @@ class Agent(AgentSpec): + implicit_skill_counter: int = 0 + def __init__( self, *args, @@ -67,6 +69,26 @@ def stop(self): def clear_history(self): self.messages.clear() + # Used by agent to execute tool calls + def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: + """Execute a list of tool calls from the agent.""" + for tool_call in tool_calls: + logger.info(f"executing skill call {tool_call}") + self.coordinator.call_skill( + tool_call.get("id"), + tool_call.get("name"), + tool_call.get("args"), + ) + + # used to inject skill calls into the agent loop without agent asking for it + def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: + self.coordinator.call_skill( + f"implicit-skill-{self.implicit_skill_counter}", + skill_name, + {"args": args, "kwargs": kwargs}, + ) + self.implicit_skill_counter += 1 + async def agent_loop(self, seed_query: str = ""): self.messages.append(HumanMessage(seed_query)) try: @@ -79,7 +101,7 @@ async def agent_loop(self, seed_query: str = ""): logger.info(f"Agent response: {msg.content}") if msg.tool_calls: - self.coordinator.execute_tool_calls(msg.tool_calls) + self.execute_tool_calls(msg.tool_calls) if not self.coordinator.has_active_skills(): logger.info("No active tasks, exiting agent loop.") diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index e17e5a88c9..336cd988df 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -19,20 +19,7 @@ from dimos.agents2.agent import Agent from dimos.protocol.skill import SkillContainer, skill - - -class TestContainer(SkillContainer): - @skill() - def add(self, x: int, y: int) -> int: - """Adds two integers.""" - time.sleep(0.3) - return x + y - - @skill() - def sub(self, x: int, y: int) -> int: - """Subs two integers.""" - time.sleep(0.3) - return x - y +from dimos.protocol.skill.test_coordinator import TestContainer @pytest.mark.asyncio @@ -48,6 +35,8 @@ async def test_agent_init(): system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" ) agent.register_skills(TestContainer()) + agent.run_implicit_skill("passive_time", frequency=1) + agent.start() print( diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 82ae90f133..b1dd1487f3 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -302,17 +302,6 @@ def get_tools(self) -> list[dict]: return ret - # Used by agent to execute tool calls - def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: - """Execute a list of tool calls from the agent.""" - for tool_call in tool_calls: - logger.info(f"executing skill call {tool_call}") - self.call_skill( - tool_call.get("id"), - tool_call.get("name"), - tool_call.get("args"), - ) - # internal skill call def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: skill_config = self.get_skill_config(skill_name) diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 3b37d9ffe9..022293e51e 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import asyncio +import datetime import time from typing import Generator, Optional @@ -25,10 +26,12 @@ class TestContainer(SkillContainer): @skill() def add(self, x: int, y: int) -> int: + """adds x and y.""" return x + y @skill() def delayadd(self, x: int, y: int) -> int: + """waits 0.3 seconds before adding x and y.""" time.sleep(0.3) return x + y @@ -50,6 +53,13 @@ def counter_passive_sum( time.sleep(delay) yield i + @skill(stream=Stream.passive, reducer=Reducer.latest) + def passive_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: + """Provides current time.""" + while True: + time.sleep(1 / frequency) + yield str(datetime.datetime.now()) + @pytest.mark.asyncio async def test_coordinator_parallel_calls(): From 95b250e40e08bb4dfb9795033c6de976c5edbb53 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 16:59:01 +0300 Subject: [PATCH 24/48] ToolMessage/situational awareness msg ordering --- dimos/agents2/agent.py | 2 +- dimos/agents2/test_agent.py | 22 ++++++------ dimos/protocol/skill/coordinator.py | 56 ++++++++++++++++++++--------- 3 files changed, 53 insertions(+), 27 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index e110143b39..acbb730e72 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -83,7 +83,7 @@ def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: # used to inject skill calls into the agent loop without agent asking for it def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: self.coordinator.call_skill( - f"implicit-skill-{self.implicit_skill_counter}", + False, skill_name, {"args": args, "kwargs": kwargs}, ) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 336cd988df..e8f7057f49 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -18,23 +18,25 @@ import pytest from dimos.agents2.agent import Agent -from dimos.protocol.skill import SkillContainer, skill from dimos.protocol.skill.test_coordinator import TestContainer @pytest.mark.asyncio async def test_agent_init(): - from dimos.core import start + system_prompt = ( + "Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" + ) + ## Uncomment the following lines to use a real module system + # from dimos.core import start # dimos = start(2) - # agent = dimos.deploy( - # Agent, - # system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate", - # ) - agent = Agent( - system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" - ) - agent.register_skills(TestContainer()) + # testcontainer = dimos.deploy(TestContainer) + # agent = dimos.deploy(Agent, system_prompt=system_prompt) + + testcontainer = TestContainer() + agent = Agent(system_prompt=system_prompt) + + agent.register_skills(testcontainer) agent.run_implicit_skill("passive_time", frequency=1) agent.start() diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index b1dd1487f3..879381b1c2 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -13,11 +13,12 @@ # limitations under the License. import asyncio +import json import time from copy import copy from dataclasses import dataclass from enum import Enum -from typing import Any, List, Literal, Optional +from typing import Any, List, Literal, Optional, Union from langchain_core.messages import ( AIMessage, @@ -37,7 +38,6 @@ from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream -from dimos.types.timestamped import TimestampedCollection from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.skill.coordinator") @@ -65,8 +65,6 @@ def colored_name(self) -> Text: return Text(self.name, style=colors.get(self, "white")) -# TODO pending timeout, running timeout, etc. -# # This object maintains the state of a skill run on a caller end class SkillState: call_id: str @@ -75,6 +73,7 @@ class SkillState: skill_config: SkillConfig msg_count: int = 0 + sent_tool_msg: bool = False start_msg: SkillMsg[Literal[MsgType.start]] = None end_msg: SkillMsg[Literal[MsgType.ret]] = None @@ -102,33 +101,49 @@ def duration(self) -> float: else: return 0.0 - def agent_encode(self) -> ToolMessage: + def agent_encode(self) -> Union[ToolMessage, HumanMessage]: + # any tool output can be a custom type that knows how to encode itself + # like a costmap, path, transform etc could be translatable into strings + def maybe_encode(something: Any) -> str: + if getattr(something, "agent_encode", None): + return something.agent_encode() + return str(something) + agent_data = {"state": self.state.name, "ran_for": f"{round(self.duration())} seconds"} if self.state == SkillStateEnum.running: if self.reduced_stream_msg: - agent_data["stream_data"] = self.reduced_stream_msg.content + agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) if self.state == SkillStateEnum.completed: if self.reduced_stream_msg: - agent_data["return_value"] = self.reduced_stream_msg.content + agent_data["return_value"] = maybe_encode(self.reduced_stream_msg.content) else: - agent_data["return_value"] = self.ret_msg.content + agent_data["return_value"] = maybe_encode(self.ret_msg.content) if self.state == SkillStateEnum.error: - agent_data["return_value"] = self.error_msg.content + agent_data["return_value"] = maybe_encode(self.error_msg.content) if self.reduced_stream_msg: - agent_data["stream_data"] = self.reduced_stream_msg.content + agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) if self.error_msg: if self.reduced_stream_msg: - agent_data["stream_data"] = self.reduced_stream_msg.content + agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) agent_data["error"] = { "msg": self.error_msg.content.get("msg", "Unknown error"), "traceback": self.error_msg.content.get("traceback", "No traceback available"), } - return ToolMessage(agent_data, name=self.name, tool_call_id=self.call_id) + # tool call can emit a single ToolMessage + # subsequent messages are considered SituationalAwarenessMessages, + # those are collapsed into a HumanMessage, that's artificially prepended to history + if not self.sent_tool_msg: + self.sent_tool_msg = True + return ToolMessage(agent_data, name=self.name, tool_call_id=self.call_id) + else: + return HumanMessage( + content=json.dumps(agent_data), + ) # returns True if the agent should be called for this message def handle_msg(self, msg: SkillMsg) -> bool: @@ -303,7 +318,9 @@ def get_tools(self) -> list[dict]: return ret # internal skill call - def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: + def call_skill( + self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] + ) -> None: skill_config = self.get_skill_config(skill_name) if not skill_config: logger.error( @@ -312,9 +329,16 @@ def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> Non return # This initializes the skill state if it doesn't exist - self._skill_state[call_id] = SkillState( - call_id=call_id, name=skill_name, skill_config=skill_config - ) + if call_id: + self._skill_state[call_id] = SkillState( + call_id=call_id, name=skill_name, skill_config=skill_config + ) + else: + call_id = time.time() + self._skill_state[call_id] = SkillState( + call_id=call_id, name=skill_name, skill_config=skill_config + ) + self._skill_state[call_id].sent_tool_msg = True return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) From 237c4c6b30dcc27ec8a5f4f3e753d370b42844da Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 19:40:57 +0300 Subject: [PATCH 25/48] agent initial working version --- dimos/agents2/agent.py | 60 +++++++++++++++++++++--- dimos/agents2/test_agent.py | 5 +- dimos/protocol/skill/coordinator.py | 60 ++++++++++++++---------- dimos/protocol/skill/skill.py | 21 +++++++-- dimos/protocol/skill/test_coordinator.py | 21 +++++++-- dimos/protocol/skill/type.py | 8 ++++ 6 files changed, 133 insertions(+), 42 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index acbb730e72..677a2eadd5 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import asyncio from pprint import pprint from typing import List, Optional @@ -26,6 +25,9 @@ ToolCall, ToolMessage, ) +from rich.console import Console +from rich.table import Table +from rich.text import Text from dimos.agents2.spec import AgentSpec from dimos.core import Module, rpc @@ -36,9 +38,12 @@ logger = setup_logger("dimos.protocol.agents2") -class Agent(AgentSpec): - implicit_skill_counter: int = 0 +SYSTEM_MSG_APPEND = """ +Your message history will always be appended with a System Overview message that provides situational awareness. +""" + +class Agent(AgentSpec): def __init__( self, *args, @@ -51,8 +56,9 @@ def __init__( if self.config.system_prompt: if isinstance(self.config.system_prompt, str): - self.messages.append(self.config.system_prompt) + self.messages.append(SystemMessage(self.config.system_prompt + SYSTEM_MSG_APPEND)) else: + self.config.system_prompt.content += SYSTEM_MSG_APPEND self.messages.append(self.config.system_prompt) self._llm = init_chat_model(model_provider=self.config.provider, model=self.config.model) @@ -80,6 +86,45 @@ def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: tool_call.get("args"), ) + def __str__(self) -> str: + console = Console(force_terminal=True, legacy_windows=False) + + table = Table(title="Agent History", show_header=True) + + table.add_column("Message Type", style="cyan", no_wrap=True) + table.add_column("Content") + + for message in self.messages: + if isinstance(message, HumanMessage): + table.add_row(Text("Human", style="green"), Text(message.content, style="green")) + elif isinstance(message, AIMessage): + table.add_row( + Text("Agent", style="magenta"), Text(message.content, style="magenta") + ) + + for tool_call in message.tool_calls: + table.add_row( + "Tool Call", + Text( + f"{tool_call.get('name')}({tool_call.get('args').get('args')})", + style="bold magenta", + ), + ) + elif isinstance(message, ToolMessage): + table.add_row( + "Tool Response", Text(f"{message.name}() -> {message.content}"), style="red" + ) + elif isinstance(message, SystemMessage): + table.add_row("System", Text(message.content, style="yellow")) + else: + table.add_row("Unknown", str(message)) + + # Render to string with title above + with console.capture() as capture: + console.print(Text(" Agent", style="bold blue")) + console.print(table) + return capture.get().strip() + # used to inject skill calls into the agent loop without agent asking for it def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: self.coordinator.call_skill( @@ -87,7 +132,6 @@ def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: skill_name, {"args": args, "kwargs": kwargs}, ) - self.implicit_skill_counter += 1 async def agent_loop(self, seed_query: str = ""): self.messages.append(HumanMessage(seed_query)) @@ -109,8 +153,10 @@ async def agent_loop(self, seed_query: str = ""): await self.coordinator.wait_for_updates() - for call_id, update in self.coordinator.generate_snapshot(clear=True).items(): - self.messages.append(update.agent_encode()) + update = self.coordinator.generate_snapshot(clear=True) + self.messages = self.messages + update.agent_encode() + print(self) + print(self.coordinator) except Exception as e: logger.error(f"Error in agent loop: {e}") diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index e8f7057f49..ddb916af6c 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -37,14 +37,15 @@ async def test_agent_init(): agent = Agent(system_prompt=system_prompt) agent.register_skills(testcontainer) - agent.run_implicit_skill("passive_time", frequency=1) + agent.run_implicit_skill("uptime_seconds", frequency=1) agent.start() print( agent.query_async( - "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124." + "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" ) ) await asyncio.sleep(5) + print(agent) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 879381b1c2..6e22630533 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -37,7 +37,7 @@ from dimos.core.module import get_loop from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer -from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream +from dimos.protocol.skill.type import MsgType, Reducer, Return, ReturnType, SkillMsg, Stream from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.skill.coordinator") @@ -101,7 +101,7 @@ def duration(self) -> float: else: return 0.0 - def agent_encode(self) -> Union[ToolMessage, HumanMessage]: + def content(self) -> str: # any tool output can be a custom type that knows how to encode itself # like a costmap, path, transform etc could be translatable into strings def maybe_encode(something: Any) -> str: @@ -109,46 +109,38 @@ def maybe_encode(something: Any) -> str: return something.agent_encode() return str(something) - agent_data = {"state": self.state.name, "ran_for": f"{round(self.duration())} seconds"} - if self.state == SkillStateEnum.running: if self.reduced_stream_msg: - agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) + return maybe_encode(self.reduced_stream_msg.content) if self.state == SkillStateEnum.completed: - if self.reduced_stream_msg: - agent_data["return_value"] = maybe_encode(self.reduced_stream_msg.content) - else: - agent_data["return_value"] = maybe_encode(self.ret_msg.content) + if self.reduced_stream_msg: # are we a streaming skill? + return maybe_encode(self.reduced_stream_msg.content) + return maybe_encode(self.ret_msg.content) if self.state == SkillStateEnum.error: - agent_data["return_value"] = maybe_encode(self.error_msg.content) if self.reduced_stream_msg: - agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) - - if self.error_msg: - if self.reduced_stream_msg: - agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) - agent_data["error"] = { - "msg": self.error_msg.content.get("msg", "Unknown error"), - "traceback": self.error_msg.content.get("traceback", "No traceback available"), - } + ( + maybe_encode(self.reduced_stream_msg.content) + + "\n" + + maybe_encode(self.error_msg.content) + ) + def agent_encode(self) -> Union[ToolMessage, str]: # tool call can emit a single ToolMessage # subsequent messages are considered SituationalAwarenessMessages, # those are collapsed into a HumanMessage, that's artificially prepended to history if not self.sent_tool_msg: self.sent_tool_msg = True - return ToolMessage(agent_data, name=self.name, tool_call_id=self.call_id) + return ToolMessage(self.content(), name=self.name, tool_call_id=self.call_id) else: - return HumanMessage( - content=json.dumps(agent_data), - ) + return self.name + ": " + json.dumps(self.content()) # returns True if the agent should be called for this message def handle_msg(self, msg: SkillMsg) -> bool: self.msg_count += 1 if msg.type == MsgType.stream: + self.state = SkillStateEnum.running self.reduced_stream_msg = self.skill_config.reducer(self.reduced_stream_msg, msg) if ( @@ -219,7 +211,23 @@ class SkillStateDict(dict[str, SkillState]): def agent_encode(self) -> list[ToolMessage]: """Encode all skill states into a list of ToolMessages for the agent.""" - return [skill_state.agent_encode() for skill_state in self.values()] + tool_responses = [] + overview_msg = [] + + for skill_state in self.values(): + response = skill_state.agent_encode() + if isinstance(response, ToolMessage): + tool_responses.append(response) + else: + overview_msg.append(response) + + if overview_msg: + state = AIMessage( + "System Overview:\n" + "\n".join(overview_msg), + metadata={"state": True}, + ) + return tool_responses + [state] + return tool_responses def table(self) -> Table: # Add skill states section @@ -334,7 +342,7 @@ def call_skill( call_id=call_id, name=skill_name, skill_config=skill_config ) else: - call_id = time.time() + call_id = str(time.time()) self._skill_state[call_id] = SkillState( call_id=call_id, name=skill_name, skill_config=skill_config ) @@ -347,7 +355,7 @@ def call_skill( # # Checks if agent needs to be notified (if ToolConfig has Return=call_agent or Stream=call_agent) def handle_message(self, msg: SkillMsg) -> None: - logger.info(f"SkillMsg from {msg.skill_name}, {msg.call_id} - {msg}") + # logger.info(f"SkillMsg from {msg.skill_name}, {msg.call_id} - {msg}") if self._skill_state.get(msg.call_id) is None: logger.warn( diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 44963d326b..386b20270d 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -21,7 +21,15 @@ from dimos.protocol.service import Configurable from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.schema import function_to_schema -from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillConfig, SkillMsg, Stream +from dimos.protocol.skill.type import ( + MsgType, + Reducer, + Return, + ReturnType, + SkillConfig, + SkillMsg, + Stream, +) # skill is a decorator that allows us to specify a skill behaviour for a function. # @@ -49,7 +57,9 @@ # the average of all values is returned to the agent -def skill(reducer=Reducer.latest, stream=Stream.none, ret=Return.call_agent): +def skill( + reducer=Reducer.latest, stream=Stream.none, ret=Return.call_agent, ret_type=ReturnType.auto +) -> Callable: def decorator(f: Callable[..., Any]) -> Any: def wrapper(self, *args, **kwargs): skill = f"{f.__name__}" @@ -76,7 +86,12 @@ def wrapper(self, *args, **kwargs): # wrapper.__signature__ = sig.replace(parameters=params) skill_config = SkillConfig( - name=f.__name__, reducer=reducer, stream=stream, ret=ret, schema=function_to_schema(f) + name=f.__name__, + reducer=reducer, + stream=stream, + ret=ret, + schema=function_to_schema(f), + ret_type=ret_type, ) # implicit RPC call as well diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 022293e51e..bd737eaf63 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -20,7 +20,7 @@ from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Stream +from dimos.protocol.skill.type import Reducer, Return, ReturnType, Stream class TestContainer(SkillContainer): @@ -53,12 +53,25 @@ def counter_passive_sum( time.sleep(delay) yield i - @skill(stream=Stream.passive, reducer=Reducer.latest) - def passive_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: + @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.passthrough) + def current_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: """Provides current time.""" while True: - time.sleep(1 / frequency) yield str(datetime.datetime.now()) + time.sleep(1 / frequency) + + @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.passthrough) + def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, None, None]: + """Provides current uptime.""" + start_time = datetime.datetime.now() + while True: + yield (datetime.datetime.now() - start_time).total_seconds() + time.sleep(1 / frequency) + + @skill(ret_type=ReturnType.passthrough) + def current_date(self, frequency: Optional[float] = 10) -> str: + """Provides current date.""" + return str(datetime.datetime.now()) @pytest.mark.asyncio diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index acf3028848..fe9ddf51a2 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -39,6 +39,13 @@ class Return(Enum): passive = 1 # calls the agent with the value, scheduling an agent call call_agent = 2 + # calls the function to get a value, when the agent is being called + callback = 3 # TODO: this is a work in progress, not implemented yet + + +class ReturnType(Enum): + auto = 0 + passthrough = 1 @dataclass @@ -48,6 +55,7 @@ class SkillConfig: stream: Stream ret: Return schema: dict[str, Any] + ret_type: ReturnType = ReturnType.auto f: Callable | None = None autostart: bool = False From 5561c0f4ffc4e5a78effec5fb81c51efa4062bf8 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 20:08:20 +0300 Subject: [PATCH 26/48] coordinator -> agent interface still needs work --- dimos/agents2/agent.py | 12 +++++++--- dimos/agents2/test_agent.py | 2 +- dimos/protocol/skill/coordinator.py | 29 ++++++++++++++++++------ dimos/protocol/skill/test_coordinator.py | 7 +++--- 4 files changed, 36 insertions(+), 14 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 677a2eadd5..c92fbd4828 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -98,9 +98,15 @@ def __str__(self) -> str: if isinstance(message, HumanMessage): table.add_row(Text("Human", style="green"), Text(message.content, style="green")) elif isinstance(message, AIMessage): - table.add_row( - Text("Agent", style="magenta"), Text(message.content, style="magenta") - ) + if hasattr(message, "metadata") and message.metadata.get("state"): + table.add_row( + Text("State Summary", style="blue"), + Text(message.content, style="blue"), + ) + else: + table.add_row( + Text("Agent", style="magenta"), Text(message.content, style="magenta") + ) for tool_call in message.tool_calls: table.add_row( diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index ddb916af6c..9a965802b8 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -47,5 +47,5 @@ async def test_agent_init(): ) ) - await asyncio.sleep(5) + await asyncio.sleep(20) print(agent) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 6e22630533..7ab7ab3d2e 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -120,20 +120,34 @@ def maybe_encode(something: Any) -> str: if self.state == SkillStateEnum.error: if self.reduced_stream_msg: - ( - maybe_encode(self.reduced_stream_msg.content) - + "\n" - + maybe_encode(self.error_msg.content) - ) + (maybe_encode(self.reduced_stream_msg.content) + "\n" + self.error_msg.content) + else: + return self.error_msg.content def agent_encode(self) -> Union[ToolMessage, str]: # tool call can emit a single ToolMessage # subsequent messages are considered SituationalAwarenessMessages, # those are collapsed into a HumanMessage, that's artificially prepended to history + if not self.sent_tool_msg: self.sent_tool_msg = True - return ToolMessage(self.content(), name=self.name, tool_call_id=self.call_id) + return ToolMessage( + self.content() or "Querying, please wait, you will receive a response soon.", + name=self.name, + tool_call_id=self.call_id, + ) else: + if self.skill_config.ret_type == ReturnType.auto: + # if we are not a streaming skill, we return a string + return json.dumps( + { + "name": self.name, + "call_id": self.call_id, + "state": self.state.name, + "data": self.content(), + "ran_for": self.duration(), + } + ) return self.name + ": " + json.dumps(self.content()) # returns True if the agent should be called for this message @@ -327,7 +341,7 @@ def get_tools(self) -> list[dict]: # internal skill call def call_skill( - self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] + self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] = {} ) -> None: skill_config = self.get_skill_config(skill_name) if not skill_config: @@ -348,6 +362,7 @@ def call_skill( ) self._skill_state[call_id].sent_tool_msg = True + print("ARGS ARE", args) return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) # Receives a message from active skill diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index bd737eaf63..5d8c1f214b 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -53,14 +53,14 @@ def counter_passive_sum( time.sleep(delay) yield i - @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.passthrough) + @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.auto) def current_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: """Provides current time.""" while True: yield str(datetime.datetime.now()) time.sleep(1 / frequency) - @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.passthrough) + @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.auto) def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, None, None]: """Provides current uptime.""" start_time = datetime.datetime.now() @@ -68,9 +68,10 @@ def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, No yield (datetime.datetime.now() - start_time).total_seconds() time.sleep(1 / frequency) - @skill(ret_type=ReturnType.passthrough) + @skill(ret_type=ReturnType.auto) def current_date(self, frequency: Optional[float] = 10) -> str: """Provides current date.""" + time.sleep(3) return str(datetime.datetime.now()) From 789d8f6678dc1ad89f15eae3a424cc8daaf846a9 Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 16:13:58 +0300 Subject: [PATCH 27/48] major agent cleanup --- dimos/agents2/agent.py | 164 +++++++++++++--------- dimos/agents2/spec.py | 62 +++++++- dimos/agents2/test_agent.py | 2 - dimos/protocol/skill/comms.py | 2 +- dimos/protocol/skill/coordinator.py | 75 ++++------ dimos/protocol/skill/skill.py | 8 +- dimos/protocol/skill/test_coordinator.py | 9 +- dimos/protocol/skill/type.py | 6 - dimos/utils/cli/agentspy/agentspy.py | 27 ++-- dimos/utils/cli/agentspy/demo_agentspy.py | 14 +- 10 files changed, 216 insertions(+), 153 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index c92fbd4828..1efe14379c 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import asyncio +import json +from functools import reduce from pprint import pprint -from typing import List, Optional +from typing import Any, Dict, List, Optional, Tuple, Union from langchain.chat_models import init_chat_model from langchain_core.language_models.chat_models import BaseChatModel @@ -25,14 +27,11 @@ ToolCall, ToolMessage, ) -from rich.console import Console -from rich.table import Table -from rich.text import Text from dimos.agents2.spec import AgentSpec from dimos.core import Module, rpc from dimos.protocol.skill import skill -from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState +from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState, SkillStateDict from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.agents2") @@ -43,7 +42,70 @@ """ +def toolmsg_from_state(state: SkillState) -> ToolMessage: + return ToolMessage( + # if agent call has been triggered by another skill, + # but this specific skill didn't finish yet so we don't have data for a tool call response + state.content() + or "Loading, you will be called with an update, no need for subsequent tool calls", + name=state.name, + tool_call_id=state.call_id, + ) + + +def summary_from_state(state: SkillState) -> Dict[str, Any]: + return { + "name": state.name, + "call_id": state.call_id, + "state": state.state.name, + "data": state.content(), + } + + +def snapshot_to_messages( + state: SkillStateDict, + tool_calls: List[ToolCall], +) -> Tuple[List[ToolMessage], Optional[AIMessage]]: + # tool call ids from a previous agent call + tool_call_ids = set( + map( + lambda tool_call: tool_call.get("id"), + tool_calls, + ) + ) + + # we build a tool msg responses + tool_msgs: list[ToolMessage] = [] + + # we build a general skill state overview (for longer running skills) + state_overview: list[Dict[str, Any]] = [] + + for skill_state in sorted( + state.values(), + key=lambda skill_state: skill_state.duration(), + ): + if skill_state.call_id in tool_call_ids: + tool_msgs.append(toolmsg_from_state(skill_state)) + continue + + state_overview.append(summary_from_state(skill_state)) + + if state_overview: + state_msg = AIMessage( + "State Overview:\n" + "\n".join(map(json.dumps, state_overview)), + metadata={"state": True}, + ) + + return tool_msgs, state_msg + + return tool_msgs, None + + +# Agent class job is to glue skill coordinator state to agent messages class Agent(AgentSpec): + system_message: SystemMessage + state_message: Optional[AIMessage] = None + def __init__( self, *args, @@ -52,14 +114,14 @@ def __init__( AgentSpec.__init__(self, *args, **kwargs) self.coordinator = SkillCoordinator() - self.messages = [] + self._history = [] if self.config.system_prompt: if isinstance(self.config.system_prompt, str): - self.messages.append(SystemMessage(self.config.system_prompt + SYSTEM_MSG_APPEND)) + self.system_message = SystemMessage(self.config.system_prompt + SYSTEM_MSG_APPEND) else: self.config.system_prompt.content += SYSTEM_MSG_APPEND - self.messages.append(self.config.system_prompt) + self.system_message = self.config.system_prompt self._llm = init_chat_model(model_provider=self.config.provider, model=self.config.model) @@ -73,7 +135,17 @@ def stop(self): @rpc def clear_history(self): - self.messages.clear() + self._history.clear() + + def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): + self._history.extend(msgs) + + def history(self): + return ( + [self.system_message] + + self._history + + ([self.state_message] if self.state_message else []) + ) # Used by agent to execute tool calls def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: @@ -86,70 +158,25 @@ def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: tool_call.get("args"), ) - def __str__(self) -> str: - console = Console(force_terminal=True, legacy_windows=False) - - table = Table(title="Agent History", show_header=True) - - table.add_column("Message Type", style="cyan", no_wrap=True) - table.add_column("Content") - - for message in self.messages: - if isinstance(message, HumanMessage): - table.add_row(Text("Human", style="green"), Text(message.content, style="green")) - elif isinstance(message, AIMessage): - if hasattr(message, "metadata") and message.metadata.get("state"): - table.add_row( - Text("State Summary", style="blue"), - Text(message.content, style="blue"), - ) - else: - table.add_row( - Text("Agent", style="magenta"), Text(message.content, style="magenta") - ) - - for tool_call in message.tool_calls: - table.add_row( - "Tool Call", - Text( - f"{tool_call.get('name')}({tool_call.get('args').get('args')})", - style="bold magenta", - ), - ) - elif isinstance(message, ToolMessage): - table.add_row( - "Tool Response", Text(f"{message.name}() -> {message.content}"), style="red" - ) - elif isinstance(message, SystemMessage): - table.add_row("System", Text(message.content, style="yellow")) - else: - table.add_row("Unknown", str(message)) - - # Render to string with title above - with console.capture() as capture: - console.print(Text(" Agent", style="bold blue")) - console.print(table) - return capture.get().strip() - # used to inject skill calls into the agent loop without agent asking for it def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: - self.coordinator.call_skill( - False, - skill_name, - {"args": args, "kwargs": kwargs}, - ) + self.coordinator.call_skill(False, skill_name, {"args": args, "kwargs": kwargs}) async def agent_loop(self, seed_query: str = ""): - self.messages.append(HumanMessage(seed_query)) + self.append_history(HumanMessage(seed_query)) + try: while True: tools = self.get_tools() self._llm = self._llm.bind_tools(tools) - msg = self._llm.invoke(self.messages) - self.messages.append(msg) + # history() call ensures we include latest system state + # and system message in our invocation + msg = self._llm.invoke(self.history()) + self.append_history(msg) logger.info(f"Agent response: {msg.content}") + if msg.tool_calls: self.execute_tool_calls(msg.tool_calls) @@ -157,10 +184,21 @@ async def agent_loop(self, seed_query: str = ""): logger.info("No active tasks, exiting agent loop.") return msg.content + # coordinator will continue once a skill state has changed in + # such a way that agent call needs to be executed await self.coordinator.wait_for_updates() + # we build a full snapshot of currently running skills + # we also remove finished/errored out skills from subsequent snapshots (clear=True) update = self.coordinator.generate_snapshot(clear=True) - self.messages = self.messages + update.agent_encode() + + # generate tool_msgs and general state update message, + # depending on a skill is a tool call from previous interaction or not + tool_msgs, state_msg = snapshot_to_messages(update, msg.tool_calls) + + self.state_message = state_msg + self.append_history(*tool_msgs) + print(self) print(self.coordinator) diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 79cdd2fdb4..9ccc131b3b 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -17,12 +17,20 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum -from typing import Optional, Tuple, Union +from typing import List, Optional, Tuple, Union from langchain.chat_models.base import _SUPPORTED_PROVIDERS from langchain_core.messages import ( + AIMessage, + HumanMessage, + MessageLikeRepresentation, SystemMessage, + ToolCall, + ToolMessage, ) +from rich.console import Console +from rich.table import Table +from rich.text import Text from dimos.core import Module, rpc from dimos.core.module import ModuleConfig @@ -142,6 +150,58 @@ def stop(self): ... @abstractmethod def clear_history(self): ... + @abstractmethod + def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): + self._history.extend(msgs) + + @abstractmethod + def history(self) -> List[Union[SystemMessage, ToolMessage, AIMessage, HumanMessage]]: ... + @rpc @abstractmethod def query(self, query: str): ... + + def __str__(self) -> str: + console = Console(force_terminal=True, legacy_windows=False) + + table = Table(title="Agent History", show_header=True) + + table.add_column("Message Type", style="cyan", no_wrap=True) + table.add_column("Content") + + for message in self.history(): + if isinstance(message, HumanMessage): + table.add_row(Text("Human", style="green"), Text(message.content, style="green")) + elif isinstance(message, AIMessage): + if hasattr(message, "metadata") and message.metadata.get("state"): + table.add_row( + Text("State Summary", style="blue"), + Text(message.content, style="blue"), + ) + else: + table.add_row( + Text("Agent", style="magenta"), Text(message.content, style="magenta") + ) + + for tool_call in message.tool_calls: + table.add_row( + "Tool Call", + Text( + f"{tool_call.get('name')}({tool_call.get('args').get('args')})", + style="bold magenta", + ), + ) + elif isinstance(message, ToolMessage): + table.add_row( + "Tool Response", Text(f"{message.name}() -> {message.content}"), style="red" + ) + elif isinstance(message, SystemMessage): + table.add_row("System", Text(message.content, style="yellow")) + else: + table.add_row("Unknown", str(message)) + + # Render to string with title above + with console.capture() as capture: + console.print(Text(" Agent", style="bold blue")) + console.print(table) + return capture.get().strip() diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 9a965802b8..e1bc5781ce 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -13,7 +13,6 @@ # limitations under the License. import asyncio -import time import pytest @@ -37,7 +36,6 @@ async def test_agent_init(): agent = Agent(system_prompt=system_prompt) agent.register_skills(testcontainer) - agent.run_implicit_skill("uptime_seconds", frequency=1) agent.start() diff --git a/dimos/protocol/skill/comms.py b/dimos/protocol/skill/comms.py index 67fa47f31c..09273c36c0 100644 --- a/dimos/protocol/skill/comms.py +++ b/dimos/protocol/skill/comms.py @@ -84,7 +84,7 @@ def subscribe(self, cb: Callable[[SkillMsg], None]) -> None: @dataclass class LCMCommsConfig(PubSubCommsConfig[str, SkillMsg]): - topic: str = "/agent" + topic: str = "/skill" pubsub: Union[type[PubSub], PubSub, None] = PickleLCM # lcm needs to be started only if receiving # skill comms are broadcast only in modules so we don't autostart diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 7ab7ab3d2e..9e634e889d 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -37,7 +37,7 @@ from dimos.core.module import get_loop from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer -from dimos.protocol.skill.type import MsgType, Reducer, Return, ReturnType, SkillMsg, Stream +from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.skill.coordinator") @@ -101,13 +101,13 @@ def duration(self) -> float: else: return 0.0 - def content(self) -> str: + def content(self) -> dict[str, Any] | str | int | float | None: # any tool output can be a custom type that knows how to encode itself # like a costmap, path, transform etc could be translatable into strings def maybe_encode(something: Any) -> str: if getattr(something, "agent_encode", None): - return something.agent_encode() - return str(something) + something = something.agent_encode() + return something if self.state == SkillStateEnum.running: if self.reduced_stream_msg: @@ -137,18 +137,15 @@ def agent_encode(self) -> Union[ToolMessage, str]: tool_call_id=self.call_id, ) else: - if self.skill_config.ret_type == ReturnType.auto: - # if we are not a streaming skill, we return a string - return json.dumps( - { - "name": self.name, - "call_id": self.call_id, - "state": self.state.name, - "data": self.content(), - "ran_for": self.duration(), - } - ) - return self.name + ": " + json.dumps(self.content()) + return json.dumps( + { + "name": self.name, + "call_id": self.call_id, + "state": self.state.name, + "data": self.content(), + "ran_for": self.duration(), + } + ) # returns True if the agent should be called for this message def handle_msg(self, msg: SkillMsg) -> bool: @@ -223,26 +220,6 @@ def __str__(self) -> str: class SkillStateDict(dict[str, SkillState]): """Custom dict for skill states with better string representation.""" - def agent_encode(self) -> list[ToolMessage]: - """Encode all skill states into a list of ToolMessages for the agent.""" - tool_responses = [] - overview_msg = [] - - for skill_state in self.values(): - response = skill_state.agent_encode() - if isinstance(response, ToolMessage): - tool_responses.append(response) - else: - overview_msg.append(response) - - if overview_msg: - state = AIMessage( - "System Overview:\n" + "\n".join(overview_msg), - metadata={"state": True}, - ) - return tool_responses + [state] - return tool_responses - def table(self) -> Table: # Add skill states section states_table = Table(show_header=True) @@ -341,7 +318,7 @@ def get_tools(self) -> list[dict]: # internal skill call def call_skill( - self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] = {} + self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] ) -> None: skill_config = self.get_skill_config(skill_name) if not skill_config: @@ -350,20 +327,18 @@ def call_skill( ) return - # This initializes the skill state if it doesn't exist - if call_id: - self._skill_state[call_id] = SkillState( - call_id=call_id, name=skill_name, skill_config=skill_config - ) - else: - call_id = str(time.time()) - self._skill_state[call_id] = SkillState( - call_id=call_id, name=skill_name, skill_config=skill_config - ) - self._skill_state[call_id].sent_tool_msg = True + self._skill_state[call_id] = SkillState( + call_id=call_id, name=skill_name, skill_config=skill_config + ) + + # TODO agent often calls the skill again if previous response is still loading. + # maybe create a new skill_state linked to a previous one? not sure - print("ARGS ARE", args) - return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) + return skill_config.call( + call_id, + *(args.get("args") or []), + **(args.get("kwargs") or {}), + ) # Receives a message from active skill # Updates local skill state (appends to streamed data if needed etc) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 386b20270d..b130734e99 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -25,7 +25,6 @@ MsgType, Reducer, Return, - ReturnType, SkillConfig, SkillMsg, Stream, @@ -58,7 +57,9 @@ def skill( - reducer=Reducer.latest, stream=Stream.none, ret=Return.call_agent, ret_type=ReturnType.auto + reducer=Reducer.latest, + stream=Stream.none, + ret=Return.call_agent, ) -> Callable: def decorator(f: Callable[..., Any]) -> Any: def wrapper(self, *args, **kwargs): @@ -82,7 +83,6 @@ def wrapper(self, *args, **kwargs): # params = list(sig.parameters.values()) # if params and params[0].name == "self": # params = params[1:] # Remove first parameter 'self' - # wrapper.__signature__ = sig.replace(parameters=params) skill_config = SkillConfig( @@ -91,10 +91,8 @@ def wrapper(self, *args, **kwargs): stream=stream, ret=ret, schema=function_to_schema(f), - ret_type=ret_type, ) - # implicit RPC call as well wrapper.__rpc__ = True # type: ignore[attr-defined] wrapper._skill_config = skill_config # type: ignore[attr-defined] wrapper.__name__ = f.__name__ # Preserve original function name diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 5d8c1f214b..46e3fc78a6 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -18,9 +18,10 @@ import pytest +from dimos.core import Module from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Return, ReturnType, Stream +from dimos.protocol.skill.type import Reducer, Return, Stream class TestContainer(SkillContainer): @@ -53,14 +54,14 @@ def counter_passive_sum( time.sleep(delay) yield i - @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.auto) + @skill(stream=Stream.passive, reducer=Reducer.latest) def current_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: """Provides current time.""" while True: yield str(datetime.datetime.now()) time.sleep(1 / frequency) - @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.auto) + @skill(stream=Stream.passive, reducer=Reducer.latest) def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, None, None]: """Provides current uptime.""" start_time = datetime.datetime.now() @@ -68,7 +69,7 @@ def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, No yield (datetime.datetime.now() - start_time).total_seconds() time.sleep(1 / frequency) - @skill(ret_type=ReturnType.auto) + @skill() def current_date(self, frequency: Optional[float] = 10) -> str: """Provides current date.""" time.sleep(3) diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index fe9ddf51a2..7891141693 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -43,11 +43,6 @@ class Return(Enum): callback = 3 # TODO: this is a work in progress, not implemented yet -class ReturnType(Enum): - auto = 0 - passthrough = 1 - - @dataclass class SkillConfig: name: str @@ -55,7 +50,6 @@ class SkillConfig: stream: Stream ret: Return schema: dict[str, Any] - ret_type: ReturnType = ReturnType.auto f: Callable | None = None autostart: bool = False diff --git a/dimos/utils/cli/agentspy/agentspy.py b/dimos/utils/cli/agentspy/agentspy.py index 3f51afc968..8255f72587 100644 --- a/dimos/utils/cli/agentspy/agentspy.py +++ b/dimos/utils/cli/agentspy/agentspy.py @@ -189,7 +189,6 @@ def compose(self) -> ComposeResult: self.table.add_column("Skill Name") self.table.add_column("State") self.table.add_column("Duration") - self.table.add_column("Start Time") self.table.add_column("Messages") self.table.add_column("Details") @@ -286,9 +285,9 @@ def update_state(self, state: Dict[str, SkillState]): if not found: # Add new entry with current time as start start_time = current_time - if len(skill_state) > 0: - # Use first message timestamp if available - start_time = skill_state._items[0].ts + if skill_state.start_msg: + # Use start message timestamp if available + start_time = skill_state.start_msg.ts self.skill_history.append((call_id, skill_state, start_time)) # Schedule UI update @@ -311,9 +310,8 @@ def refresh_table(self): # Show only top N entries for call_id, skill_state, start_time in sorted_history[:max_rows]: - # Calculate how long ago it started + # Calculate how long ago it started (for progress indicator) time_ago = time.time() - start_time - start_str = format_duration(time_ago) + " ago" # Duration duration_str = format_duration(skill_state.duration()) @@ -323,16 +321,16 @@ def refresh_table(self): # Details based on state and last message details = "" - if skill_state.state == SkillStateEnum.error and msg_count > 0: + if skill_state.state == SkillStateEnum.error and skill_state.error_msg: # Show error message - last_msg = skill_state._items[-1] - if last_msg.type == MsgType.error: - details = str(last_msg.content)[:40] - elif skill_state.state == SkillStateEnum.completed and msg_count > 0: + error_content = skill_state.error_msg.content + if isinstance(error_content, dict): + details = error_content.get("msg", "Error")[:40] + else: + details = str(error_content)[:40] + elif skill_state.state == SkillStateEnum.completed and skill_state.ret_msg: # Show return value - last_msg = skill_state._items[-1] - if last_msg.type == MsgType.ret: - details = f"→ {str(last_msg.content)[:37]}" + details = f"→ {str(skill_state.ret_msg.content)[:37]}" elif skill_state.state == SkillStateEnum.running: # Show progress indicator details = "⋯ " + "▸" * min(int(time_ago), 20) @@ -348,7 +346,6 @@ def refresh_table(self): Text(skill_state.name, style="white"), Text(skill_state.state.name, style=state_color(skill_state.state)), Text(duration_str, style="dim"), - Text(start_str, style="dim"), Text(str(msg_count), style="dim"), Text(details, style="dim white"), ) diff --git a/dimos/utils/cli/agentspy/demo_agentspy.py b/dimos/utils/cli/agentspy/demo_agentspy.py index fcd71d99ef..3ec3829794 100644 --- a/dimos/utils/cli/agentspy/demo_agentspy.py +++ b/dimos/utils/cli/agentspy/demo_agentspy.py @@ -75,15 +75,17 @@ def skill_runner(): # Run different skills based on counter if counter % 4 == 0: # Run multiple count_to in parallel to show parallel execution - agent_interface.call(f"{call_id}-count-1", "count_to", 3) - agent_interface.call(f"{call_id}-count-2", "count_to", 5) - agent_interface.call(f"{call_id}-count-3", "count_to", 2) + agent_interface.call_skill(f"{call_id}-count-1", "count_to", {"args": [3]}) + agent_interface.call_skill(f"{call_id}-count-2", "count_to", {"args": [5]}) + agent_interface.call_skill(f"{call_id}-count-3", "count_to", {"args": [2]}) elif counter % 4 == 1: - agent_interface.call(f"{call_id}-fib", "compute_fibonacci", 10) + agent_interface.call_skill(f"{call_id}-fib", "compute_fibonacci", {"args": [10]}) elif counter % 4 == 2: - agent_interface.call(f"{call_id}-quick", "quick_task", f"task-{counter}") + agent_interface.call_skill( + f"{call_id}-quick", "quick_task", {"args": [f"task-{counter}"]} + ) else: - agent_interface.call(f"{call_id}-error", "simulate_error") + agent_interface.call_skill(f"{call_id}-error", "simulate_error", {}) counter += 1 From fa270e646c1cb52c3415c0084c2538c83772f612 Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 16:44:41 +0300 Subject: [PATCH 28/48] agent publishes messages exchanged, for observability --- dimos/agents2/agent.py | 12 +++++++++--- dimos/agents2/spec.py | 30 +++++++++++++++++++++++------- dimos/agents2/test_agent.py | 1 - 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 1efe14379c..dddf304375 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -138,6 +138,9 @@ def clear_history(self): self._history.clear() def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): + for msg in msgs: + self.publish(msg) + self._history.extend(msgs) def history(self): @@ -172,6 +175,9 @@ async def agent_loop(self, seed_query: str = ""): # history() call ensures we include latest system state # and system message in our invocation + if self.state_message: + self.publish(self.state_message) + msg = self._llm.invoke(self.history()) self.append_history(msg) @@ -180,6 +186,9 @@ async def agent_loop(self, seed_query: str = ""): if msg.tool_calls: self.execute_tool_calls(msg.tool_calls) + print(self) + print(self.coordinator) + if not self.coordinator.has_active_skills(): logger.info("No active tasks, exiting agent loop.") return msg.content @@ -199,9 +208,6 @@ async def agent_loop(self, seed_query: str = ""): self.state_message = state_msg self.append_history(*tool_msgs) - print(self) - print(self.coordinator) - except Exception as e: logger.error(f"Error in agent loop: {e}") import traceback diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 9ccc131b3b..92e771b380 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -15,9 +15,9 @@ """Base agent module that wraps BaseAgent for DimOS module usage.""" from abc import ABC, abstractmethod -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum -from typing import List, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple, Union from langchain.chat_models.base import _SUPPORTED_PROVIDERS from langchain_core.messages import ( @@ -34,6 +34,7 @@ from dimos.core import Module, rpc from dimos.core.module import ModuleConfig +from dimos.protocol.pubsub import PubSub, lcm from dimos.protocol.service import Service from dimos.protocol.skill.skill import SkillContainer from dimos.utils.logging_config import setup_logger @@ -134,10 +135,27 @@ class AgentConfig(ModuleConfig): model: Model = Model.GPT_4O provider: Provider = Provider.OPENAI + agent_transport: type[PubSub] = lcm.PickleLCM + agent_topic: Any = field(default_factory=lambda: lcm.Topic("/agent")) + + +type AnyMessage = Union[SystemMessage, ToolMessage, AIMessage, HumanMessage] + class AgentSpec(Service[AgentConfig], Module, ABC): default_config: type[AgentConfig] = AgentConfig + def __init__(self, *args, **kwargs): + Service.__init__(self, *args, **kwargs) + Module.__init__(self, *args, **kwargs) + + if self.config.agent_transport: + self.transport = self.config.agent_transport() + + def publish(self, msg: AnyMessage): + if self.transport: + self.transport.publish(self.config.agent_topic, msg) + @rpc @abstractmethod def start(self): ... @@ -151,11 +169,10 @@ def stop(self): ... def clear_history(self): ... @abstractmethod - def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): - self._history.extend(msgs) + def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): ... @abstractmethod - def history(self) -> List[Union[SystemMessage, ToolMessage, AIMessage, HumanMessage]]: ... + def history(self) -> List[AnyMessage]: ... @rpc @abstractmethod @@ -163,8 +180,7 @@ def query(self, query: str): ... def __str__(self) -> str: console = Console(force_terminal=True, legacy_windows=False) - - table = Table(title="Agent History", show_header=True) + table = Table(show_header=True) table.add_column("Message Type", style="cyan", no_wrap=True) table.add_column("Content") diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index e1bc5781ce..a5e1002c81 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -46,4 +46,3 @@ async def test_agent_init(): ) await asyncio.sleep(20) - print(agent) From f970c95feb58dfc897c3af6c3bd1c35037dafad6 Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 17:02:33 +0300 Subject: [PATCH 29/48] agentspy renamed to skillspy --- dimos/agents2/agent.py | 3 +++ .../{agentspy/demo_agentspy.py => skillspy/demo_skillspy.py} | 0 dimos/utils/cli/{agentspy/agentspy.py => skillspy/skillspy.py} | 0 pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) rename dimos/utils/cli/{agentspy/demo_agentspy.py => skillspy/demo_skillspy.py} (100%) rename dimos/utils/cli/{agentspy/agentspy.py => skillspy/skillspy.py} (100%) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index dddf304375..381ac89493 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -46,6 +46,7 @@ def toolmsg_from_state(state: SkillState) -> ToolMessage: return ToolMessage( # if agent call has been triggered by another skill, # but this specific skill didn't finish yet so we don't have data for a tool call response + # we generate an informative message instead state.content() or "Loading, you will be called with an update, no need for subsequent tool calls", name=state.name, @@ -62,6 +63,8 @@ def summary_from_state(state: SkillState) -> Dict[str, Any]: } +# we take overview of running skills from the coorindator +# and build messages to be sent to an agent def snapshot_to_messages( state: SkillStateDict, tool_calls: List[ToolCall], diff --git a/dimos/utils/cli/agentspy/demo_agentspy.py b/dimos/utils/cli/skillspy/demo_skillspy.py similarity index 100% rename from dimos/utils/cli/agentspy/demo_agentspy.py rename to dimos/utils/cli/skillspy/demo_skillspy.py diff --git a/dimos/utils/cli/agentspy/agentspy.py b/dimos/utils/cli/skillspy/skillspy.py similarity index 100% rename from dimos/utils/cli/agentspy/agentspy.py rename to dimos/utils/cli/skillspy/skillspy.py diff --git a/pyproject.toml b/pyproject.toml index 30038ac143..aacd7c05d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,7 +101,7 @@ dependencies = [ [project.scripts] lcmspy = "dimos.utils.cli.lcmspy.run_lcmspy:main" foxglove-bridge = "dimos.utils.cli.foxglove_bridge.run_foxglove_bridge:main" -agentspy = "dimos.utils.cli.agentspy.agentspy:main" +skillspy = "dimos.utils.cli.skillspy.skillspy:main" [project.optional-dependencies] manipulation = [ From 2d8ecc4ec29795cf300da5966d932e7b703dabc5 Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 17:42:17 +0300 Subject: [PATCH 30/48] agentspy --- dimos/agents2/agent.py | 11 +- dimos/utils/cli/agentspy/agentspy.py | 242 ++++++++++++++++++++++ dimos/utils/cli/agentspy/demo_agentspy.py | 65 ++++++ pyproject.toml | 1 + 4 files changed, 313 insertions(+), 6 deletions(-) create mode 100644 dimos/utils/cli/agentspy/agentspy.py create mode 100755 dimos/utils/cli/agentspy/demo_agentspy.py diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 381ac89493..e1d4af34bc 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -37,18 +37,16 @@ logger = setup_logger("dimos.protocol.agents2") -SYSTEM_MSG_APPEND = """ -Your message history will always be appended with a System Overview message that provides situational awareness. -""" +SYSTEM_MSG_APPEND = "\nYour message history will always be appended with a System Overview message that provides situational awareness." def toolmsg_from_state(state: SkillState) -> ToolMessage: return ToolMessage( # if agent call has been triggered by another skill, - # but this specific skill didn't finish yet so we don't have data for a tool call response - # we generate an informative message instead + # and this specific skill didn't finish yet but we need a tool call response + # we return a message explaining that execution is still ongoing state.content() - or "Loading, you will be called with an update, no need for subsequent tool calls", + or "Running, you will be called with an update, no need for subsequent tool calls", name=state.name, tool_call_id=state.call_id, ) @@ -126,6 +124,7 @@ def __init__( self.config.system_prompt.content += SYSTEM_MSG_APPEND self.system_message = self.config.system_prompt + self.publish(self.system_message) self._llm = init_chat_model(model_provider=self.config.provider, model=self.config.model) @rpc diff --git a/dimos/utils/cli/agentspy/agentspy.py b/dimos/utils/cli/agentspy/agentspy.py new file mode 100644 index 0000000000..de784f4719 --- /dev/null +++ b/dimos/utils/cli/agentspy/agentspy.py @@ -0,0 +1,242 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import time +from collections import deque +from dataclasses import dataclass +from typing import Any, Deque, Dict, List, Optional, Union + +from langchain_core.messages import ( + AIMessage, + HumanMessage, + SystemMessage, + ToolMessage, +) +from rich.console import Console +from rich.table import Table +from rich.text import Text +from textual.app import App, ComposeResult +from textual.binding import Binding +from textual.containers import Container, ScrollableContainer +from textual.reactive import reactive +from textual.widgets import Footer, RichLog + +from dimos.protocol.pubsub import lcm +from dimos.protocol.pubsub.lcmpubsub import PickleLCM +from dimos.utils.logging_config import setup_logger + +# Type alias for all message types we might receive +AnyMessage = Union[SystemMessage, ToolMessage, AIMessage, HumanMessage] + + +@dataclass +class MessageEntry: + """Store a single message with metadata.""" + + timestamp: float + message: AnyMessage + + def __post_init__(self): + """Initialize timestamp if not provided.""" + if self.timestamp is None: + self.timestamp = time.time() + + +class AgentMessageMonitor: + """Monitor agent messages published via LCM.""" + + def __init__(self, topic: str = "/agent", max_messages: int = 1000): + self.topic = topic + self.max_messages = max_messages + self.messages: Deque[MessageEntry] = deque(maxlen=max_messages) + self.transport = PickleLCM() + self.transport.start() + self.callbacks: List[callable] = [] + pass + + def start(self): + """Start monitoring messages.""" + self.transport.subscribe(self.topic, self._handle_message) + + def stop(self): + """Stop monitoring.""" + # PickleLCM doesn't have explicit stop method + pass + + def _handle_message(self, msg: Any, topic: str): + """Handle incoming messages.""" + # Check if it's one of the message types we care about + if isinstance(msg, (SystemMessage, ToolMessage, AIMessage, HumanMessage)): + entry = MessageEntry(timestamp=time.time(), message=msg) + self.messages.append(entry) + + # Notify callbacks + for callback in self.callbacks: + callback(entry) + else: + pass + + def subscribe(self, callback: callable): + """Subscribe to new messages.""" + self.callbacks.append(callback) + + def get_messages(self) -> List[MessageEntry]: + """Get all stored messages.""" + return list(self.messages) + + +def format_timestamp(timestamp: float) -> str: + """Format timestamp as HH:MM:SS.mmm.""" + return ( + time.strftime("%H:%M:%S", time.localtime(timestamp)) + f".{int((timestamp % 1) * 1000):03d}" + ) + + +def get_message_type_and_style(msg: AnyMessage) -> tuple[str, str]: + """Get message type name and style color.""" + if isinstance(msg, HumanMessage): + return "Human ", "green" + elif isinstance(msg, AIMessage): + if hasattr(msg, "metadata") and msg.metadata.get("state"): + return "State ", "blue" + return "Agent ", "yellow" + elif isinstance(msg, ToolMessage): + return "Tool ", "red" + elif isinstance(msg, SystemMessage): + return "System", "red" + else: + return "Unkn ", "white" + + +def format_message_content(msg: AnyMessage) -> str: + """Format message content for display.""" + if isinstance(msg, ToolMessage): + return f"{msg.name}() -> {msg.content}" + elif isinstance(msg, AIMessage) and msg.tool_calls: + # Include tool calls in content + tool_info = [] + for tc in msg.tool_calls: + args_str = str(tc.get("args", {})) + tool_info.append(f"{tc.get('name')}({args_str})") + content = msg.content or "" + if content and tool_info: + return f"{content}\n[Tool Calls: {', '.join(tool_info)}]" + elif tool_info: + return f"[Tool Calls: {', '.join(tool_info)}]" + return content + else: + return str(msg.content) if hasattr(msg, "content") else str(msg) + + +class AgentSpyApp(App): + """TUI application for monitoring agent messages.""" + + CSS = """ + Screen { + layout: vertical; + background: black; + } + + RichLog { + height: 1fr; + border: none; + background: black; + padding: 0 1; + } + + Footer { + dock: bottom; + height: 1; + } + """ + + BINDINGS = [ + Binding("q", "quit", "Quit"), + Binding("c", "clear", "Clear"), + Binding("ctrl+c", "quit", show=False), + ] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.monitor = AgentMessageMonitor() + self.message_log: Optional[RichLog] = None + + def compose(self) -> ComposeResult: + """Compose the UI.""" + self.message_log = RichLog(wrap=True, highlight=True, markup=True) + yield self.message_log + yield Footer() + + def on_mount(self): + """Start monitoring when app mounts.""" + self.theme = "flexoki" + + # Subscribe to new messages + self.monitor.subscribe(self.on_new_message) + self.monitor.start() + + # Write existing messages to the log + for entry in self.monitor.get_messages(): + self.on_new_message(entry) + + def on_unmount(self): + """Stop monitoring when app unmounts.""" + self.monitor.stop() + + def on_new_message(self, entry: MessageEntry): + """Handle new messages.""" + if self.message_log: + msg = entry.message + msg_type, style = get_message_type_and_style(msg) + content = format_message_content(msg) + + # Format the message for the log + timestamp = format_timestamp(entry.timestamp) + self.message_log.write( + f"[dim white]{timestamp}[/dim white] | " + f"[bold {style}]{msg_type}[/bold {style}] | " + f"[{style}]{content}[/{style}]" + ) + + def refresh_display(self): + """Refresh the message display.""" + # Not needed anymore as messages are written directly to the log + + def action_clear(self): + """Clear message history.""" + self.monitor.messages.clear() + if self.message_log: + self.message_log.clear() + + +def main(): + """Main entry point for agentspy.""" + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "web": + import os + + from textual_serve.server import Server + + server = Server(f"python {os.path.abspath(__file__)}") + server.serve() + else: + app = AgentSpyApp() + app.run() + + +if __name__ == "__main__": + main() diff --git a/dimos/utils/cli/agentspy/demo_agentspy.py b/dimos/utils/cli/agentspy/demo_agentspy.py new file mode 100755 index 0000000000..1e3a0d4f3b --- /dev/null +++ b/dimos/utils/cli/agentspy/demo_agentspy.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Demo script to test agent message publishing and agentspy reception.""" + +import time +from langchain_core.messages import ( + AIMessage, + HumanMessage, + SystemMessage, + ToolMessage, +) +from dimos.protocol.pubsub.lcmpubsub import PickleLCM +from dimos.protocol.pubsub import lcm + + +def test_publish_messages(): + """Publish test messages to verify agentspy is working.""" + print("Starting agent message publisher demo...") + + # Create transport + transport = PickleLCM() + topic = lcm.Topic("/agent") + + print(f"Publishing to topic: {topic}") + + # Test messages + messages = [ + SystemMessage("System initialized for testing"), + HumanMessage("Hello agent, can you help me?"), + AIMessage( + "Of course! I'm here to help.", + tool_calls=[{"name": "get_info", "args": {"query": "test"}, "id": "1"}], + ), + ToolMessage(name="get_info", content="Test result: success", tool_call_id="1"), + AIMessage("The test was successful!", metadata={"state": True}), + ] + + # Publish messages with delays + for i, msg in enumerate(messages): + print(f"\nPublishing message {i + 1}: {type(msg).__name__}") + print(f"Content: {msg.content if hasattr(msg, 'content') else msg}") + + transport.publish(topic, msg) + time.sleep(1) # Wait 1 second between messages + + print("\nAll messages published! Check agentspy to see if they were received.") + print("Keeping publisher alive for 10 more seconds...") + time.sleep(10) + + +if __name__ == "__main__": + test_publish_messages() diff --git a/pyproject.toml b/pyproject.toml index aacd7c05d8..a8adfd18ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,7 @@ dependencies = [ lcmspy = "dimos.utils.cli.lcmspy.run_lcmspy:main" foxglove-bridge = "dimos.utils.cli.foxglove_bridge.run_foxglove_bridge:main" skillspy = "dimos.utils.cli.skillspy.skillspy:main" +agentspy = "dimos.utils.cli.agentspy.agentspy:main" [project.optional-dependencies] manipulation = [ From 6c420690cb1efd132b5fee00952926a2ed823cef Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 18:28:06 +0300 Subject: [PATCH 31/48] implicit skills --- dimos/agents2/test_agent.py | 10 +++------- dimos/protocol/skill/coordinator.py | 2 ++ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index a5e1002c81..9029d6d8ac 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -34,15 +34,11 @@ async def test_agent_init(): testcontainer = TestContainer() agent = Agent(system_prompt=system_prompt) - agent.register_skills(testcontainer) - agent.start() - - print( - agent.query_async( - "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" - ) + agent.run_implicit_skill("uptime_seconds") + agent.query_async( + "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" ) await asyncio.sleep(20) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 9e634e889d..6fad1f7be7 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -320,6 +320,8 @@ def get_tools(self) -> list[dict]: def call_skill( self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] ) -> None: + if not call_id: + call_id = str(round(time.time())) skill_config = self.get_skill_config(skill_name) if not skill_config: logger.error( From 89be5d891ead48d31e16d48d8c96efad47ef6c70 Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 18:54:54 +0300 Subject: [PATCH 32/48] tests fix --- dimos/agents2/test_agent.py | 1 + dimos/protocol/skill/test_coordinator.py | 2 +- dimos/protocol/skill/type.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 9029d6d8ac..6662589d96 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -20,6 +20,7 @@ from dimos.protocol.skill.test_coordinator import TestContainer +@pytest.mark.tool @pytest.mark.asyncio async def test_agent_init(): system_prompt = ( diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 46e3fc78a6..27ec420a22 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -125,7 +125,7 @@ async def test_coordinator_generator(): while await skillCoordinator.wait_for_updates(2): print(skillCoordinator) agent_update = skillCoordinator.generate_snapshot(clear=True) - print(agent_update.agent_encode()) + print(agent_update) await asyncio.sleep(0.125) print("Skill lifecycle finished") diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 7891141693..c10b2459ad 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -137,7 +137,7 @@ def __str__(self): # typing looks complex but it's a standard reducer function signature, using SkillMsgs # (Optional[accumulator], msg) -> accumulator -type ReducerF = Callable[ +ReducerF = Callable[ [Optional[SkillMsg[Literal[MsgType.reduced_stream]]], SkillMsg[Literal[MsgType.stream]]], SkillMsg[Literal[MsgType.reduced_stream]], ] From c2b4c7fe432c086a0b70869a5996c87b321d9922 Mon Sep 17 00:00:00 2001 From: lesh Date: Tue, 19 Aug 2025 03:27:14 +0300 Subject: [PATCH 33/48] small comments cleanup --- dimos/agents2/agent.py | 39 +++++++++++------------- dimos/protocol/skill/test_coordinator.py | 3 +- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index e1d4af34bc..29afdea1cb 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -13,24 +13,20 @@ # limitations under the License. import asyncio import json -from functools import reduce -from pprint import pprint +from operator import itemgetter from typing import Any, Dict, List, Optional, Tuple, Union from langchain.chat_models import init_chat_model -from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import ( AIMessage, HumanMessage, - MessageLikeRepresentation, SystemMessage, ToolCall, ToolMessage, ) from dimos.agents2.spec import AgentSpec -from dimos.core import Module, rpc -from dimos.protocol.skill import skill +from dimos.core import rpc from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState, SkillStateDict from dimos.utils.logging_config import setup_logger @@ -61,24 +57,21 @@ def summary_from_state(state: SkillState) -> Dict[str, Any]: } -# we take overview of running skills from the coorindator -# and build messages to be sent to an agent +# takes an overview of running skills from the coorindator +# and builds messages to be sent to an agent def snapshot_to_messages( state: SkillStateDict, tool_calls: List[ToolCall], ) -> Tuple[List[ToolMessage], Optional[AIMessage]]: - # tool call ids from a previous agent call + # builds a set of tool call ids from a previous agent request tool_call_ids = set( - map( - lambda tool_call: tool_call.get("id"), - tool_calls, - ) + map(itemgetter("id"), tool_calls), ) - # we build a tool msg responses + # build a tool msg responses tool_msgs: list[ToolMessage] = [] - # we build a general skill state overview (for longer running skills) + # build a general skill state overview (for longer running skills) state_overview: list[Dict[str, Any]] = [] for skill_state in sorted( @@ -102,7 +95,7 @@ def snapshot_to_messages( return tool_msgs, None -# Agent class job is to glue skill coordinator state to agent messages +# Agent class job is to glue skill coordinator state to an agent, builds langchain messages class Agent(AgentSpec): system_message: SystemMessage state_message: Optional[AIMessage] = None @@ -172,14 +165,17 @@ async def agent_loop(self, seed_query: str = ""): try: while True: + # we are getting tools from the coordinator on each turn + # since this allows for skillcontainers to dynamically provide new skills tools = self.get_tools() self._llm = self._llm.bind_tools(tools) - # history() call ensures we include latest system state - # and system message in our invocation + # publish to /agent topic for observability if self.state_message: self.publish(self.state_message) + # history() builds our message history dynamically + # ensures we include latest system state, but not old ones. msg = self._llm.invoke(self.history()) self.append_history(msg) @@ -199,12 +195,13 @@ async def agent_loop(self, seed_query: str = ""): # such a way that agent call needs to be executed await self.coordinator.wait_for_updates() - # we build a full snapshot of currently running skills - # we also remove finished/errored out skills from subsequent snapshots (clear=True) + # we request a full snapshot of currently running, finished or errored out skills + # we ask for removal of finished skills from subsequent snapshots (clear=True) update = self.coordinator.generate_snapshot(clear=True) # generate tool_msgs and general state update message, - # depending on a skill is a tool call from previous interaction or not + # depending on a skill having associated tool call from previous interaction + # we will return a tool message, and not a general state message tool_msgs, state_msg = snapshot_to_messages(update, msg.tool_calls) self.state_message = state_msg diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 27ec420a22..7419408521 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -18,10 +18,9 @@ import pytest -from dimos.core import Module from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Return, Stream +from dimos.protocol.skill.type import Reducer, Stream class TestContainer(SkillContainer): From 7c84d5ad1a8bb45b80c19b7a804fa051fd0c60fe Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 20 Aug 2025 14:23:55 +0300 Subject: [PATCH 34/48] ci tests fix --- dimos/protocol/skill/type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index c10b2459ad..8334453b18 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -146,7 +146,7 @@ def __str__(self): C = TypeVar("C") # content type A = TypeVar("A") # accumulator type # define a naive reducer function type that's generic in terms of the accumulator type -type SimpleReducerF[A, C] = Callable[[Optional[A], C], A] +SimpleReducerF = Callable[[Optional[A], C], A] def make_reducer(simple_reducer: SimpleReducerF) -> ReducerF: From e98bf09e53789cece2ade0e6c8cc9bc8d6e2fc13 Mon Sep 17 00:00:00 2001 From: lesh Date: Tue, 26 Aug 2025 17:19:24 +0300 Subject: [PATCH 35/48] initial image implementation --- dimos/agents2/agent.py | 64 ++++++++++++++-------- dimos/agents2/spec.py | 6 ++- dimos/agents2/test_agent.py | 2 +- dimos/msgs/sensor_msgs/Image.py | 67 +++++++++++++++--------- dimos/protocol/skill/coordinator.py | 9 +++- dimos/protocol/skill/skill.py | 9 ++-- dimos/protocol/skill/test_coordinator.py | 14 +++-- dimos/protocol/skill/type.py | 7 +++ 8 files changed, 123 insertions(+), 55 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 29afdea1cb..f2efe37dda 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -14,7 +14,7 @@ import asyncio import json from operator import itemgetter -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, TypedDict, Union from langchain.chat_models import init_chat_model from langchain_core.messages import ( @@ -27,7 +27,9 @@ from dimos.agents2.spec import AgentSpec from dimos.core import rpc +from dimos.msgs.sensor_msgs import Image from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState, SkillStateDict +from dimos.protocol.skill.type import Output from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.agents2") @@ -41,19 +43,33 @@ def toolmsg_from_state(state: SkillState) -> ToolMessage: # if agent call has been triggered by another skill, # and this specific skill didn't finish yet but we need a tool call response # we return a message explaining that execution is still ongoing - state.content() + content=state.content() or "Running, you will be called with an update, no need for subsequent tool calls", name=state.name, tool_call_id=state.call_id, ) -def summary_from_state(state: SkillState) -> Dict[str, Any]: +class SkillStateSummary(TypedDict): + name: str + call_id: str + state: str + data: Any + + +def summary_from_state(state: SkillState, special_data: bool = False) -> SkillStateSummary: + content = state.content() + if isinstance(content, dict): + content = json.dumps(content) + + if not isinstance(content, str): + content = str(content) + return { "name": state.name, "call_id": state.call_id, "state": state.state.name, - "data": state.content(), + "data": state.content() if not special_data else "data will be in a separate message", } @@ -72,7 +88,11 @@ def snapshot_to_messages( tool_msgs: list[ToolMessage] = [] # build a general skill state overview (for longer running skills) - state_overview: list[Dict[str, Any]] = [] + state_overview: list[Dict[str, SkillStateSummary]] = [] + + # for special skills that want to return a separate message + # (images for example, requires to be a HumanMessage) + special_msgs: List[HumanMessage] = [] for skill_state in sorted( state.values(), @@ -82,23 +102,28 @@ def snapshot_to_messages( tool_msgs.append(toolmsg_from_state(skill_state)) continue - state_overview.append(summary_from_state(skill_state)) + special_data = skill_state.skill_config.output != Output.standard + if special_data: + print("special data from skill", skill_state.name, skill_state.content()) + special_msgs.append(HumanMessage(content=[skill_state.content()])) + + state_overview.append(summary_from_state(skill_state, special_data)) if state_overview: state_msg = AIMessage( "State Overview:\n" + "\n".join(map(json.dumps, state_overview)), - metadata={"state": True}, ) - return tool_msgs, state_msg - - return tool_msgs, None + return { + "tool_msgs": tool_msgs if tool_msgs else [], + "state_msgs": ([state_msg] if state_msg else []) + special_msgs, + } # Agent class job is to glue skill coordinator state to an agent, builds langchain messages class Agent(AgentSpec): system_message: SystemMessage - state_message: Optional[AIMessage] = None + state_messages: List[Union[AIMessage, HumanMessage]] def __init__( self, @@ -107,6 +132,7 @@ def __init__( ): AgentSpec.__init__(self, *args, **kwargs) + self.state_messages = [] self.coordinator = SkillCoordinator() self._history = [] @@ -139,11 +165,7 @@ def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): self._history.extend(msgs) def history(self): - return ( - [self.system_message] - + self._history - + ([self.state_message] if self.state_message else []) - ) + return [self.system_message] + self._history + self.state_messages # Used by agent to execute tool calls def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: @@ -171,8 +193,8 @@ async def agent_loop(self, seed_query: str = ""): self._llm = self._llm.bind_tools(tools) # publish to /agent topic for observability - if self.state_message: - self.publish(self.state_message) + for state_msg in self.state_messages: + self.publish(state_msg) # history() builds our message history dynamically # ensures we include latest system state, but not old ones. @@ -202,10 +224,10 @@ async def agent_loop(self, seed_query: str = ""): # generate tool_msgs and general state update message, # depending on a skill having associated tool call from previous interaction # we will return a tool message, and not a general state message - tool_msgs, state_msg = snapshot_to_messages(update, msg.tool_calls) + snapshot_msgs = snapshot_to_messages(update, msg.tool_calls) - self.state_message = state_msg - self.append_history(*tool_msgs) + self.state_messages = snapshot_msgs.get("state_msgs", []) + self.append_history(*snapshot_msgs.get("tool_msgs", [])) except Exception as e: logger.error(f"Error in agent loop: {e}") diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 92e771b380..1e5e9eaecd 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -187,7 +187,11 @@ def __str__(self) -> str: for message in self.history(): if isinstance(message, HumanMessage): - table.add_row(Text("Human", style="green"), Text(message.content, style="green")) + content = message.content + if not isinstance(content, str): + content = "" + + table.add_row(Text("Human", style="green"), Text(content, style="green")) elif isinstance(message, AIMessage): if hasattr(message, "metadata") and message.metadata.get("state"): table.add_row( diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 6662589d96..0df5e7b634 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -39,7 +39,7 @@ async def test_agent_init(): agent.start() agent.run_implicit_skill("uptime_seconds") agent.query_async( - "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" + "hi there, I have 4 questions for you: Please tell me what's your name and current date, and how much is 124181112 + 124124, and what do you see on the camera?" ) await asyncio.sleep(20) diff --git a/dimos/msgs/sensor_msgs/Image.py b/dimos/msgs/sensor_msgs/Image.py index fb57cfcd3e..d1aff49a42 100644 --- a/dimos/msgs/sensor_msgs/Image.py +++ b/dimos/msgs/sensor_msgs/Image.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import base64 import time from dataclasses import dataclass, field from enum import Enum -from typing import Optional, Tuple +from typing import Literal, Optional, Tuple, TypedDict import cv2 import numpy as np @@ -40,6 +41,15 @@ class ImageFormat(Enum): DEPTH16 = "DEPTH16" # 16-bit Integer Depth (millimeters) +class AgentImageMessage(TypedDict): + """Type definition for agent-compatible image representation.""" + + type: Literal["image"] + source_type: Literal["base64"] + mime_type: Literal["image/jpeg", "image/png"] + data: str # Base64 encoded image data + + @dataclass class Image(Timestamped): """Standardized image type with LCM integration.""" @@ -285,6 +295,38 @@ def save(self, filepath: str) -> bool: cv_image = self.to_opencv() return cv2.imwrite(filepath, cv_image) + def to_base64(self, max_width: int = 640, max_height: int = 480) -> str: + """Encode image to base64 JPEG format for agent processing. + + Args: + max_width: Maximum width for resizing (default 640) + max_height: Maximum height for resizing (default 480) + + Returns: + Base64 encoded JPEG string suitable for LLM/agent consumption. + """ + bgr_image = self.to_bgr() + + # Encode as JPEG + encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 80] # 80% quality + success, buffer = cv2.imencode(".jpg", bgr_image.data, encode_param) + + if not success: + raise ValueError("Failed to encode image as JPEG") + + # Convert to base64 + + jpeg_bytes = buffer.tobytes() + base64_str = base64.b64encode(jpeg_bytes).decode("utf-8") + + return base64_str + + def agent_encode(self) -> AgentImageMessage: + return { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{self.to_base64()}"}, + } + def lcm_encode(self, frame_id: Optional[str] = None) -> LCMImage: """Convert to LCM Image message.""" msg = LCMImage() @@ -433,26 +475,3 @@ def __eq__(self, other) -> bool: def __len__(self) -> int: """Return total number of pixels.""" return self.height * self.width - - def agent_encode(self) -> str: - """Encode image to base64 JPEG format for agent processing. - - Returns: - Base64 encoded JPEG string suitable for LLM/agent consumption. - """ - bgr_image = self.to_bgr() - - # Encode as JPEG - encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 95] # 95% quality - success, buffer = cv2.imencode(".jpg", bgr_image.data, encode_param) - - if not success: - raise ValueError("Failed to encode image as JPEG") - - # Convert to base64 - import base64 - - jpeg_bytes = buffer.tobytes() - base64_str = base64.b64encode(jpeg_bytes).decode("utf-8") - - return base64_str diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 6fad1f7be7..c41ed15e9e 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -106,7 +106,14 @@ def content(self) -> dict[str, Any] | str | int | float | None: # like a costmap, path, transform etc could be translatable into strings def maybe_encode(something: Any) -> str: if getattr(something, "agent_encode", None): - something = something.agent_encode() + return something.agent_encode() + + # if isinstance(something, dict): + # something = json.dumps(something) + + # if not isinstance(something, str): + # something = str(something) + return something if self.state == SkillStateEnum.running: diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index b130734e99..7a01c9546d 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -23,6 +23,7 @@ from dimos.protocol.skill.schema import function_to_schema from dimos.protocol.skill.type import ( MsgType, + Output, Reducer, Return, SkillConfig, @@ -57,9 +58,10 @@ def skill( - reducer=Reducer.latest, - stream=Stream.none, - ret=Return.call_agent, + reducer: Reducer = Reducer.latest, + stream: Stream = Stream.none, + ret: Return = Return.call_agent, + output: Output = Output.standard, ) -> Callable: def decorator(f: Callable[..., Any]) -> Any: def wrapper(self, *args, **kwargs): @@ -90,6 +92,7 @@ def wrapper(self, *args, **kwargs): reducer=reducer, stream=stream, ret=ret, + output=output, schema=function_to_schema(f), ) diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 7419408521..244380e1d7 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -18,9 +18,11 @@ import pytest +from dimos.msgs.sensor_msgs import Image from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Stream +from dimos.protocol.skill.type import Output, Reducer, Stream +from dimos.utils.data import get_data class TestContainer(SkillContainer): @@ -57,7 +59,7 @@ def counter_passive_sum( def current_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: """Provides current time.""" while True: - yield str(datetime.datetime.now()) + yield datetime.datetime.now() time.sleep(1 / frequency) @skill(stream=Stream.passive, reducer=Reducer.latest) @@ -71,8 +73,12 @@ def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, No @skill() def current_date(self, frequency: Optional[float] = 10) -> str: """Provides current date.""" - time.sleep(3) - return str(datetime.datetime.now()) + return datetime.datetime.now() + + @skill(output=Output.image) + def take_photo(self) -> str: + """Takes a camera photo""" + return Image.from_file(get_data("cafe.jpg")) @pytest.mark.asyncio diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 8334453b18..84b912f303 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -23,6 +23,12 @@ # This file defines protocol messages used for communication between skills and agents +class Output(Enum): + standard = 0 + separate_message = 1 # e.g., for images, videos, files, etc. + image = 2 # this is same as separate_message, but maybe clearer for users + + class Stream(Enum): # no streaming none = 0 @@ -49,6 +55,7 @@ class SkillConfig: reducer: "ReducerF" stream: Stream ret: Return + output: Output schema: dict[str, Any] f: Callable | None = None autostart: bool = False From 709056428bafc2c49960b595c70e85cc47890410 Mon Sep 17 00:00:00 2001 From: dimensional5 Date: Wed, 27 Aug 2025 01:41:29 -0700 Subject: [PATCH 36/48] Remove type alias not supported in python 3.10 --- dimos/agents2/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 1e5e9eaecd..1ed5e00327 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -139,7 +139,7 @@ class AgentConfig(ModuleConfig): agent_topic: Any = field(default_factory=lambda: lcm.Topic("/agent")) -type AnyMessage = Union[SystemMessage, ToolMessage, AIMessage, HumanMessage] +AnyMessage = Union[SystemMessage, ToolMessage, AIMessage, HumanMessage] class AgentSpec(Service[AgentConfig], Module, ABC): From 5094483dd0a8320765315f0d3694fc35cee00a28 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 19:00:15 +0300 Subject: [PATCH 37/48] mock agent implementation --- dimos/agents2/agent.py | 12 +++- dimos/agents2/spec.py | 9 ++- dimos/agents2/test_fake_agent.py | 70 ++++++++++++++++++++++++ dimos/protocol/skill/coordinator.py | 9 ++- dimos/protocol/skill/test_coordinator.py | 4 +- 5 files changed, 97 insertions(+), 7 deletions(-) create mode 100644 dimos/agents2/test_fake_agent.py diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index f2efe37dda..6dc8a6a3d5 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -94,6 +94,9 @@ def snapshot_to_messages( # (images for example, requires to be a HumanMessage) special_msgs: List[HumanMessage] = [] + # Initialize state_msg + state_msg = None + for skill_state in sorted( state.values(), key=lambda skill_state: skill_state.duration(), @@ -144,7 +147,14 @@ def __init__( self.system_message = self.config.system_prompt self.publish(self.system_message) - self._llm = init_chat_model(model_provider=self.config.provider, model=self.config.model) + + # Use provided model instance if available, otherwise initialize from config + if self.config.model_instance: + self._llm = self.config.model_instance + else: + self._llm = init_chat_model( + model_provider=self.config.provider, model=self.config.model + ) @rpc def start(self): diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 1ed5e00327..894d1812b2 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -20,6 +20,7 @@ from typing import Any, List, Optional, Tuple, Union from langchain.chat_models.base import _SUPPORTED_PROVIDERS +from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import ( AIMessage, HumanMessage, @@ -43,9 +44,8 @@ # Dynamically create ModelProvider enum from LangChain's supported providers -Provider = Enum( - "Provider", {provider.upper(): provider for provider in _SUPPORTED_PROVIDERS}, type=str -) +_providers = {provider.upper(): provider for provider in _SUPPORTED_PROVIDERS} +Provider = Enum("Provider", _providers, type=str) class Model(str, Enum): @@ -132,8 +132,11 @@ class Model(str, Enum): class AgentConfig(ModuleConfig): system_prompt: Optional[str | SystemMessage] = None skills: Optional[SkillContainer | list[SkillContainer]] = None + + # we can provide model/provvider enums or instantiated model_instance model: Model = Model.GPT_4O provider: Provider = Provider.OPENAI + model_instance: Optional[BaseChatModel] = None agent_transport: type[PubSub] = lcm.PickleLCM agent_topic: Any = field(default_factory=lambda: lcm.Topic("/agent")) diff --git a/dimos/agents2/test_fake_agent.py b/dimos/agents2/test_fake_agent.py new file mode 100644 index 0000000000..feacd68339 --- /dev/null +++ b/dimos/agents2/test_fake_agent.py @@ -0,0 +1,70 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test agent with FakeChatModel for unit testing.""" + +import os + +import pytest +from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolCall + +from dimos.agents2.agent import Agent +from dimos.agents2.spec import AgentConfig, Model, Provider +from dimos.agents2.testing import ToolCallFakeChatModel +from dimos.protocol.skill import skill +from dimos.protocol.skill.skill import SkillContainer +from dimos.protocol.skill.test_coordinator import TestContainer + + +class TestFakeAgent: + """Test suite for Agent with FakeChatModel.""" + + async def test_fake_agent_with_tool_call(self): + """Test agent initialization and tool call execution.""" + # Create a fake model that will respond with tool calls + fake_model = ToolCallFakeChatModel( + responses=[ + AIMessage( + content="I'll add those numbers for you.", + tool_calls=[ + { + "name": "add", + "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, + "id": "tool_call_1", + } + ], + ), + AIMessage(content="The result of adding 5 and 3 is 8."), + ] + ) + + # Create agent with the fake model + agent = Agent( + model_instance=fake_model, + system_prompt="You are a helpful robot assistant with math skills.", + ) + + # Register skills with coordinator + skills = TestContainer() + agent.coordinator.register_skills(skills) + agent.start() + # Query the agent + await agent.query_async("Please add 5 and 3") + + # Check that tools were bound + assert fake_model.tools is not None + assert len(fake_model.tools) > 0 + + # Verify the model was called and history updated + assert len(agent._history) > 0 diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index c41ed15e9e..0561aa7d9a 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -37,7 +37,7 @@ from dimos.core.module import get_loop from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer -from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream +from dimos.protocol.skill.type import MsgType, Output, Reducer, Return, SkillMsg, Stream from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.skill.coordinator") @@ -85,7 +85,12 @@ def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] super().__init__() self.skill_config = skill_config or SkillConfig( - name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.all, schema={} + name=name, + stream=Stream.none, + ret=Return.none, + reducer=Reducer.all, + output=Output.standard, + schema={}, ) self.state = SkillStateEnum.pending diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 244380e1d7..842dc252eb 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -125,6 +125,7 @@ async def test_coordinator_generator(): # here we call a skill that generates a sequence of messages skillCoordinator.call_skill("test-gen-0", "counter", {"args": [10]}) skillCoordinator.call_skill("test-gen-1", "counter_passive_sum", {"args": [5]}) + skillCoordinator.call_skill("test-gen-2", "take_photo", {"args": []}) # periodically agent is stopping it's thinking cycle and asks for updates while await skillCoordinator.wait_for_updates(2): @@ -133,4 +134,5 @@ async def test_coordinator_generator(): print(agent_update) await asyncio.sleep(0.125) - print("Skill lifecycle finished") + print("coordinator loop finished") + print(skillCoordinator) From 5aea816de588ca2edac79097c0d6f81881239d6f Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 19:19:12 +0300 Subject: [PATCH 38/48] mock agent testing, image calls --- dimos/agents2/test_fake_agent.py | 70 ------------------- dimos/agents2/test_mock_agent.py | 114 +++++++++++++++++++++++++++++++ dimos/agents2/testing.py | 105 ++++++++++++++++++++++++++++ 3 files changed, 219 insertions(+), 70 deletions(-) delete mode 100644 dimos/agents2/test_fake_agent.py create mode 100644 dimos/agents2/test_mock_agent.py create mode 100644 dimos/agents2/testing.py diff --git a/dimos/agents2/test_fake_agent.py b/dimos/agents2/test_fake_agent.py deleted file mode 100644 index feacd68339..0000000000 --- a/dimos/agents2/test_fake_agent.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test agent with FakeChatModel for unit testing.""" - -import os - -import pytest -from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolCall - -from dimos.agents2.agent import Agent -from dimos.agents2.spec import AgentConfig, Model, Provider -from dimos.agents2.testing import ToolCallFakeChatModel -from dimos.protocol.skill import skill -from dimos.protocol.skill.skill import SkillContainer -from dimos.protocol.skill.test_coordinator import TestContainer - - -class TestFakeAgent: - """Test suite for Agent with FakeChatModel.""" - - async def test_fake_agent_with_tool_call(self): - """Test agent initialization and tool call execution.""" - # Create a fake model that will respond with tool calls - fake_model = ToolCallFakeChatModel( - responses=[ - AIMessage( - content="I'll add those numbers for you.", - tool_calls=[ - { - "name": "add", - "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, - "id": "tool_call_1", - } - ], - ), - AIMessage(content="The result of adding 5 and 3 is 8."), - ] - ) - - # Create agent with the fake model - agent = Agent( - model_instance=fake_model, - system_prompt="You are a helpful robot assistant with math skills.", - ) - - # Register skills with coordinator - skills = TestContainer() - agent.coordinator.register_skills(skills) - agent.start() - # Query the agent - await agent.query_async("Please add 5 and 3") - - # Check that tools were bound - assert fake_model.tools is not None - assert len(fake_model.tools) > 0 - - # Verify the model was called and history updated - assert len(agent._history) > 0 diff --git a/dimos/agents2/test_mock_agent.py b/dimos/agents2/test_mock_agent.py new file mode 100644 index 0000000000..5ba1902923 --- /dev/null +++ b/dimos/agents2/test_mock_agent.py @@ -0,0 +1,114 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test agent with FakeChatModel for unit testing.""" + +import os + +import pytest +from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolCall + +from dimos.agents2.agent import Agent +from dimos.agents2.testing import MockModel +from dimos.protocol.skill.test_coordinator import TestContainer + + +async def test_tool_call(): + """Test agent initialization and tool call execution.""" + # Create a fake model that will respond with tool calls + fake_model = MockModel( + responses=[ + AIMessage( + content="I'll add those numbers for you.", + tool_calls=[ + { + "name": "add", + "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, + "id": "tool_call_1", + } + ], + ), + AIMessage(content="The result of adding 5 and 3 is 8."), + ] + ) + + # Create agent with the fake model + agent = Agent( + model_instance=fake_model, + system_prompt="You are a helpful robot assistant with math skills.", + ) + + # Register skills with coordinator + skills = TestContainer() + agent.coordinator.register_skills(skills) + agent.start() + # Query the agent + await agent.query_async("Please add 5 and 3") + + # Check that tools were bound + assert fake_model.tools is not None + assert len(fake_model.tools) > 0 + + # Verify the model was called and history updated + assert len(agent._history) > 0 + + +async def test_image_tool_call(): + """Test agent with image tool call execution.""" + # Create a fake model that will respond with image tool calls + fake_model = MockModel( + responses=[ + AIMessage( + content="I'll take a photo for you.", + tool_calls=[ + { + "name": "take_photo", + "args": {"args": [], "kwargs": {}}, + "id": "tool_call_image_1", + } + ], + ), + AIMessage(content="I've taken the photo. The image shows a cafe scene."), + ] + ) + + # Create agent with the fake model + agent = Agent( + model_instance=fake_model, + system_prompt="You are a helpful robot assistant with camera capabilities.", + ) + + # Register skills with coordinator + skills = TestContainer() + agent.coordinator.register_skills(skills) + agent.start() + + # Query the agent + await agent.query_async("Please take a photo") + + # Check that tools were bound + assert fake_model.tools is not None + assert len(fake_model.tools) > 0 + + # Verify the model was called and history updated + assert len(agent._history) > 0 + + # Check that image was handled specially + # Look for HumanMessage with image content in history + human_messages_with_images = [ + msg + for msg in agent._history + if isinstance(msg, HumanMessage) and msg.content and isinstance(msg.content, list) + ] + assert len(human_messages_with_images) >= 0 # May have image messages diff --git a/dimos/agents2/testing.py b/dimos/agents2/testing.py new file mode 100644 index 0000000000..f7ea8d4d3d --- /dev/null +++ b/dimos/agents2/testing.py @@ -0,0 +1,105 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Testing utilities for agents.""" + +from typing import Any, Dict, Iterator, List, Optional, Sequence, Union + +from langchain_core.callbacks.manager import CallbackManagerForLLMRun +from langchain_core.language_models.chat_models import SimpleChatModel +from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage +from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult +from langchain_core.runnables import Runnable + + +class MockModel(SimpleChatModel): + """Custom fake chat model that supports tool calls for testing.""" + + responses: List[Union[str, AIMessage]] = [] + i: int = 0 + + def __init__(self, **kwargs): + # Extract responses before calling super().__init__ + responses = kwargs.pop("responses", []) + super().__init__(**kwargs) + self.responses = responses + self.i = 0 + self._bound_tools: Optional[Sequence[Any]] = None + + @property + def _llm_type(self) -> str: + return "tool-call-fake-chat-model" + + def _call( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """Not used in _generate.""" + return "" + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + """Generate a response using predefined responses.""" + if self.i >= len(self.responses): + self.i = 0 # Wrap around + + response = self.responses[self.i] + self.i += 1 + + # Handle different response types + if isinstance(response, AIMessage): + message = response + else: + # It's a string + message = AIMessage(content=str(response)) + + generation = ChatGeneration(message=message) + return ChatResult(generations=[generation]) + + def _stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + """Stream not implemented for testing.""" + result = self._generate(messages, stop, run_manager, **kwargs) + message = result.generations[0].message + chunk = AIMessageChunk(content=message.content) + yield ChatGenerationChunk(message=chunk) + + def bind_tools( + self, + tools: Sequence[Union[dict[str, Any], type, Any]], + *, + tool_choice: Optional[str] = None, + **kwargs: Any, + ) -> Runnable: + """Store tools and return self.""" + self._bound_tools = tools + return self + + @property + def tools(self) -> Optional[Sequence[Any]]: + """Get bound tools for inspection.""" + return self._bound_tools From 63572e5abef98b57ae3645f982ca29f5565e5dc1 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 22:09:36 +0300 Subject: [PATCH 39/48] reducers are pickleable --- dimos/protocol/skill/type.py | 46 +++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 84b912f303..0e7e902fb3 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -185,7 +185,47 @@ def reducer( # just a convinience class to hold reducer functions +def _make_skill_msg( + msg: SkillMsg[Literal[MsgType.stream]], content: Any +) -> SkillMsg[Literal[MsgType.reduced_stream]]: + """Helper to create a reduced stream message with new content.""" + return SkillMsg( + call_id=msg.call_id, + skill_name=msg.skill_name, + content=content, + type=MsgType.reduced_stream, + ) + + +def sum_reducer( + accumulator: Optional[SkillMsg[Literal[MsgType.reduced_stream]]], + msg: SkillMsg[Literal[MsgType.stream]], +) -> SkillMsg[Literal[MsgType.reduced_stream]]: + """Sum reducer that adds values together.""" + acc_value = accumulator.content if accumulator else None + new_value = acc_value + msg.content if acc_value else msg.content + return _make_skill_msg(msg, new_value) + + +def latest_reducer( + accumulator: Optional[SkillMsg[Literal[MsgType.reduced_stream]]], + msg: SkillMsg[Literal[MsgType.stream]], +) -> SkillMsg[Literal[MsgType.reduced_stream]]: + """Latest reducer that keeps only the most recent value.""" + return _make_skill_msg(msg, msg.content) + + +def all_reducer( + accumulator: Optional[SkillMsg[Literal[MsgType.reduced_stream]]], + msg: SkillMsg[Literal[MsgType.stream]], +) -> SkillMsg[Literal[MsgType.reduced_stream]]: + """All reducer that collects all values into a list.""" + acc_value = accumulator.content if accumulator else None + new_value = acc_value + [msg.content] if acc_value else [msg.content] + return _make_skill_msg(msg, new_value) + + class Reducer: - sum = make_reducer(lambda x, y: x + y if x else y) - latest = make_reducer(lambda x, y: y) - all = make_reducer(lambda x, y: x + [y] if x else [y]) + sum = sum_reducer + latest = latest_reducer + all = all_reducer From 3400b24cf19da5b4c034ab2bfeedbb5836a8ea74 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 22:10:32 +0300 Subject: [PATCH 40/48] mock agent, rpc client inherits docstrings, all modules are skillcontainers --- dimos/agents2/agent.py | 3 - dimos/agents2/test_agent.py | 16 ++--- dimos/agents2/test_mock_agent.py | 92 +++++++++++++----------- dimos/core/__init__.py | 16 ++++- dimos/core/module.py | 4 +- dimos/protocol/pubsub/spec.py | 9 ++- dimos/protocol/skill/coordinator.py | 9 ++- dimos/protocol/skill/skill.py | 21 ++++-- dimos/protocol/skill/test_coordinator.py | 14 ++-- 9 files changed, 110 insertions(+), 74 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 6dc8a6a3d5..c0b9eafd2e 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -164,7 +164,6 @@ def start(self): def stop(self): self.coordinator.stop() - @rpc def clear_history(self): self._history.clear() @@ -245,14 +244,12 @@ async def agent_loop(self, seed_query: str = ""): traceback.print_exc() - @rpc def query_async(self, query: str): return asyncio.ensure_future(self.agent_loop(query), loop=self._loop) def query(self, query: str): return asyncio.run_coroutine_threadsafe(self.agent_loop(query), self._loop).result() - @rpc def register_skills(self, container): return self.coordinator.register_skills(container) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 0df5e7b634..16a3819111 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -17,7 +17,8 @@ import pytest from dimos.agents2.agent import Agent -from dimos.protocol.skill.test_coordinator import TestContainer +from dimos.core import start +from dimos.protocol.skill.test_coordinator import SkillContainerTest @pytest.mark.tool @@ -27,14 +28,13 @@ async def test_agent_init(): "Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" ) - ## Uncomment the following lines to use a real module system - # from dimos.core import start - # dimos = start(2) - # testcontainer = dimos.deploy(TestContainer) - # agent = dimos.deploy(Agent, system_prompt=system_prompt) - - testcontainer = TestContainer() + # # Uncomment the following lines to use a real module system + dimos = start(2) + testcontainer = dimos.deploy(SkillContainerTest) agent = Agent(system_prompt=system_prompt) + + # testcontainer = TestContainer() + # agent = Agent(system_prompt=system_prompt) agent.register_skills(testcontainer) agent.start() agent.run_implicit_skill("uptime_seconds") diff --git a/dimos/agents2/test_mock_agent.py b/dimos/agents2/test_mock_agent.py index 5ba1902923..8e17b737e1 100644 --- a/dimos/agents2/test_mock_agent.py +++ b/dimos/agents2/test_mock_agent.py @@ -21,51 +21,54 @@ from dimos.agents2.agent import Agent from dimos.agents2.testing import MockModel -from dimos.protocol.skill.test_coordinator import TestContainer - - -async def test_tool_call(): - """Test agent initialization and tool call execution.""" - # Create a fake model that will respond with tool calls - fake_model = MockModel( - responses=[ - AIMessage( - content="I'll add those numbers for you.", - tool_calls=[ - { - "name": "add", - "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, - "id": "tool_call_1", - } - ], - ), - AIMessage(content="The result of adding 5 and 3 is 8."), - ] - ) - - # Create agent with the fake model - agent = Agent( - model_instance=fake_model, - system_prompt="You are a helpful robot assistant with math skills.", - ) - - # Register skills with coordinator - skills = TestContainer() - agent.coordinator.register_skills(skills) - agent.start() - # Query the agent - await agent.query_async("Please add 5 and 3") - - # Check that tools were bound - assert fake_model.tools is not None - assert len(fake_model.tools) > 0 - - # Verify the model was called and history updated - assert len(agent._history) > 0 +from dimos.core import start +from dimos.protocol.skill.test_coordinator import SkillContainerTest + +# async def test_tool_call(): +# """Test agent initialization and tool call execution.""" +# # Create a fake model that will respond with tool calls +# fake_model = MockModel( +# responses=[ +# AIMessage( +# content="I'll add those numbers for you.", +# tool_calls=[ +# { +# "name": "add", +# "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, +# "id": "tool_call_1", +# } +# ], +# ), +# AIMessage(content="The result of adding 5 and 3 is 8."), +# ] +# ) + +# # Create agent with the fake model +# agent = Agent( +# model_instance=fake_model, +# system_prompt="You are a helpful robot assistant with math skills.", +# ) + +# # Register skills with coordinator +# skills = SkillContainerTest() +# agent.coordinator.register_skills(skills) +# agent.start() +# # Query the agent +# await agent.query_async("Please add 5 and 3") + +# # Check that tools were bound +# assert fake_model.tools is not None +# assert len(fake_model.tools) > 0 + +# # Verify the model was called and history updated +# assert len(agent._history) > 0 + +# agent.stop() async def test_image_tool_call(): """Test agent with image tool call execution.""" + dimos = start(2) # Create a fake model that will respond with image tool calls fake_model = MockModel( responses=[ @@ -73,6 +76,8 @@ async def test_image_tool_call(): content="I'll take a photo for you.", tool_calls=[ { + # "name": "add", + # "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, "name": "take_photo", "args": {"args": [], "kwargs": {}}, "id": "tool_call_image_1", @@ -90,8 +95,8 @@ async def test_image_tool_call(): ) # Register skills with coordinator - skills = TestContainer() - agent.coordinator.register_skills(skills) + skills = dimos.deploy(SkillContainerTest) + agent.register_skills(skills) agent.start() # Query the agent @@ -112,3 +117,4 @@ async def test_image_tool_call(): if isinstance(msg, HumanMessage) and msg.content and isinstance(msg.content, list) ] assert len(human_messages_with_images) >= 0 # May have image messages + agent.stop() diff --git a/dimos/core/__init__.py b/dimos/core/__init__.py index 0b7755e2e3..1e6eccaaed 100644 --- a/dimos/core/__init__.py +++ b/dimos/core/__init__.py @@ -53,9 +53,19 @@ def __getattr__(self, name: str): raise AttributeError(f"{name} is not found.") if name in self.rpcs: - return lambda *args, **kwargs: self.rpc.call_sync( - f"{self.remote_name}/{name}", (args, kwargs) - ) + # Get the original method to preserve its docstring + original_method = getattr(self.actor_class, name, None) + + def rpc_call(*args, **kwargs): + return self.rpc.call_sync(f"{self.remote_name}/{name}", (args, kwargs)) + + # Copy docstring and other attributes from original method + if original_method: + rpc_call.__doc__ = original_method.__doc__ + rpc_call.__name__ = original_method.__name__ + rpc_call.__qualname__ = f"{self.__class__.__name__}.{original_method.__name__}" + + return rpc_call # return super().__getattr__(name) # Try to avoid recursion by directly accessing attributes that are known diff --git a/dimos/core/module.py b/dimos/core/module.py index 01abfcdb8a..15abbe52bd 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -30,6 +30,8 @@ from dimos.core.stream import In, Out, RemoteIn, RemoteOut, Transport from dimos.protocol.rpc import LCMRPC, RPCSpec from dimos.protocol.service import Configurable +from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec +from dimos.protocol.skill.skill import SkillContainer from dimos.protocol.tf import LCMTF, TFSpec @@ -59,7 +61,7 @@ class ModuleConfig: tf_transport: type[TFSpec] = LCMTF -class ModuleBase(Configurable[ModuleConfig]): +class ModuleBase(Configurable[ModuleConfig], SkillContainer): _rpc: Optional[RPCSpec] = None _tf: Optional[TFSpec] = None _loop: asyncio.AbstractEventLoop = None diff --git a/dimos/protocol/pubsub/spec.py b/dimos/protocol/pubsub/spec.py index 81db8a0669..1d38cc74bd 100644 --- a/dimos/protocol/pubsub/spec.py +++ b/dimos/protocol/pubsub/spec.py @@ -132,7 +132,14 @@ def wrapper_cb(encoded_data: bytes, topic: TopicT): class PickleEncoderMixin(PubSubEncoderMixin[TopicT, MsgT]): def encode(self, msg: MsgT, *_: TopicT) -> bytes: - return pickle.dumps(msg) + try: + return pickle.dumps(msg) + except Exception as e: + print("Pickle encoding error:", e) + import traceback + + traceback.print_exc() + print("Tried to pickle:", msg) def decode(self, msg: bytes, _: TopicT) -> MsgT: return pickle.loads(msg) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 0561aa7d9a..c0d6a3eb60 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -275,12 +275,15 @@ def __str__(self): return capture.get().strip() +from dimos.core.module import Module + + # This class is responsible for managing the lifecycle of skills, # handling skill calls, and coordinating communication between the agent and skills. # # It aggregates skills from static and dynamic containers, manages skill states, # and decides when to notify the agent about updates. -class SkillCoordinator(SkillContainer): +class SkillCoordinator(Module): default_config = SkillCoordinatorConfig empty: bool = True @@ -323,7 +326,7 @@ def get_tools(self) -> list[dict]: ret = [] for name, skill_config in self.skills().items(): - # print(f"Tool {name} config: {skill_config}, {skill_config.f}") + print(f"Tool {name} config: {skill_config}, {skill_config.f}") ret.append(langchain_tool(skill_config.f)) return ret @@ -475,7 +478,7 @@ def __str__(self): # .skills() method def register_skills(self, container: SkillContainer): self.empty = False - if not container.dynamic_skills: + if not container.dynamic_skills(): logger.info(f"Registering static skill container, {container}") self._static_containers.append(container) for name, skill_config in container.skills().items(): diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 7a01c9546d..151029447a 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -17,8 +17,7 @@ from dataclasses import dataclass from typing import Any, Callable, Optional -from dimos.core import rpc -from dimos.protocol.service import Configurable +# from dimos.core.core import rpc from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.schema import function_to_schema from dimos.protocol.skill.type import ( @@ -57,6 +56,11 @@ # the average of all values is returned to the agent +def rpc(fn: Callable[..., Any]) -> Callable[..., Any]: + fn.__rpc__ = True # type: ignore[attr-defined] + return fn + + def skill( reducer: Reducer = Reducer.latest, stream: Stream = Stream.none, @@ -133,12 +137,15 @@ def wrapper(self, *args, **kwargs): # for this you'll need to override the `skills` method to return a dynamic set of skills # SkillCoordinator will call this method to get the skills available upon every request to # the agent -# -class SkillContainer(Configurable[SkillContainerConfig]): - default_config = SkillContainerConfig + + +class SkillContainer: + skill_transport_class: type[SkillCommsSpec] = LCMSkillComms _skill_transport: Optional[SkillCommsSpec] = None - dynamic_skills = False + @rpc + def dynamic_skills(self): + return False def __str__(self) -> str: return f"SkillContainer({self.__class__.__name__})" @@ -211,5 +218,5 @@ def skills(self) -> dict[str, SkillConfig]: @property def skill_transport(self) -> SkillCommsSpec: if self._skill_transport is None: - self._skill_transport = self.config.skill_transport() + self._skill_transport = self.skill_transport_class() return self._skill_transport diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 842dc252eb..a4e4d813df 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -18,14 +18,15 @@ import pytest +from dimos.core import Module from dimos.msgs.sensor_msgs import Image from dimos.protocol.skill.coordinator import SkillCoordinator -from dimos.protocol.skill.skill import SkillContainer, skill +from dimos.protocol.skill.skill import skill from dimos.protocol.skill.type import Output, Reducer, Stream from dimos.utils.data import get_data -class TestContainer(SkillContainer): +class SkillContainerTest(Module): @skill() def add(self, x: int, y: int) -> int: """adds x and y.""" @@ -78,13 +79,16 @@ def current_date(self, frequency: Optional[float] = 10) -> str: @skill(output=Output.image) def take_photo(self) -> str: """Takes a camera photo""" - return Image.from_file(get_data("cafe.jpg")) + print("Taking photo...") + img = Image.from_file(get_data("cafe.jpg")) + print("Photo taken.") + return img @pytest.mark.asyncio async def test_coordinator_parallel_calls(): skillCoordinator = SkillCoordinator() - skillCoordinator.register_skills(TestContainer()) + skillCoordinator.register_skills(SkillContainerTest()) skillCoordinator.start() skillCoordinator.call_skill("test-call-0", "delayadd", {"args": [1, 2]}) @@ -119,7 +123,7 @@ async def test_coordinator_parallel_calls(): @pytest.mark.asyncio async def test_coordinator_generator(): skillCoordinator = SkillCoordinator() - skillCoordinator.register_skills(TestContainer()) + skillCoordinator.register_skills(SkillContainerTest()) skillCoordinator.start() # here we call a skill that generates a sequence of messages From 1af4bf4a79e99bcd4f0a5dcf919d407fdda81cc3 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 22:14:48 +0300 Subject: [PATCH 41/48] enabled single-process mock agent test --- dimos/agents2/test_mock_agent.py | 83 ++++++++++++++++---------------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/dimos/agents2/test_mock_agent.py b/dimos/agents2/test_mock_agent.py index 8e17b737e1..7f03e964a0 100644 --- a/dimos/agents2/test_mock_agent.py +++ b/dimos/agents2/test_mock_agent.py @@ -24,46 +24,47 @@ from dimos.core import start from dimos.protocol.skill.test_coordinator import SkillContainerTest -# async def test_tool_call(): -# """Test agent initialization and tool call execution.""" -# # Create a fake model that will respond with tool calls -# fake_model = MockModel( -# responses=[ -# AIMessage( -# content="I'll add those numbers for you.", -# tool_calls=[ -# { -# "name": "add", -# "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, -# "id": "tool_call_1", -# } -# ], -# ), -# AIMessage(content="The result of adding 5 and 3 is 8."), -# ] -# ) - -# # Create agent with the fake model -# agent = Agent( -# model_instance=fake_model, -# system_prompt="You are a helpful robot assistant with math skills.", -# ) - -# # Register skills with coordinator -# skills = SkillContainerTest() -# agent.coordinator.register_skills(skills) -# agent.start() -# # Query the agent -# await agent.query_async("Please add 5 and 3") - -# # Check that tools were bound -# assert fake_model.tools is not None -# assert len(fake_model.tools) > 0 - -# # Verify the model was called and history updated -# assert len(agent._history) > 0 - -# agent.stop() + +async def test_tool_call(): + """Test agent initialization and tool call execution.""" + # Create a fake model that will respond with tool calls + fake_model = MockModel( + responses=[ + AIMessage( + content="I'll add those numbers for you.", + tool_calls=[ + { + "name": "add", + "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, + "id": "tool_call_1", + } + ], + ), + AIMessage(content="The result of adding 5 and 3 is 8."), + ] + ) + + # Create agent with the fake model + agent = Agent( + model_instance=fake_model, + system_prompt="You are a helpful robot assistant with math skills.", + ) + + # Register skills with coordinator + skills = SkillContainerTest() + agent.coordinator.register_skills(skills) + agent.start() + # Query the agent + await agent.query_async("Please add 5 and 3") + + # Check that tools were bound + assert fake_model.tools is not None + assert len(fake_model.tools) > 0 + + # Verify the model was called and history updated + assert len(agent._history) > 0 + + agent.stop() async def test_image_tool_call(): @@ -76,8 +77,6 @@ async def test_image_tool_call(): content="I'll take a photo for you.", tool_calls=[ { - # "name": "add", - # "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, "name": "take_photo", "args": {"args": [], "kwargs": {}}, "id": "tool_call_image_1", From 60b492c2aea3ffcbf6b586ccd3b5f7f9e13a5aa6 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 22:59:59 +0300 Subject: [PATCH 42/48] agent encoding happens before message is sent from a skillcontainer --- dimos/protocol/skill/coordinator.py | 22 ++++------------------ dimos/protocol/skill/type.py | 10 +++++++++- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index c0d6a3eb60..4ba62fa5a8 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -107,32 +107,18 @@ def duration(self) -> float: return 0.0 def content(self) -> dict[str, Any] | str | int | float | None: - # any tool output can be a custom type that knows how to encode itself - # like a costmap, path, transform etc could be translatable into strings - def maybe_encode(something: Any) -> str: - if getattr(something, "agent_encode", None): - return something.agent_encode() - - # if isinstance(something, dict): - # something = json.dumps(something) - - # if not isinstance(something, str): - # something = str(something) - - return something - if self.state == SkillStateEnum.running: if self.reduced_stream_msg: - return maybe_encode(self.reduced_stream_msg.content) + return self.reduced_stream_msg.content if self.state == SkillStateEnum.completed: if self.reduced_stream_msg: # are we a streaming skill? - return maybe_encode(self.reduced_stream_msg.content) - return maybe_encode(self.ret_msg.content) + return self.reduced_stream_msg.content + return self.ret_msg.content if self.state == SkillStateEnum.error: if self.reduced_stream_msg: - (maybe_encode(self.reduced_stream_msg.content) + "\n" + self.error_msg.content) + (self.reduced_stream_msg.content + "\n" + self.error_msg.content) else: return self.error_msg.content diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 0e7e902fb3..ec82e4a576 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -96,6 +96,11 @@ class MsgType(Enum): M = TypeVar("M", bound="MsgType") +def maybe_encode(something: Any) -> str: + if getattr(something, "agent_encode", None): + return something.agent_encode() + + class SkillMsg(Timestamped, Generic[M]): ts: float type: M @@ -113,7 +118,10 @@ def __init__( self.ts = time.time() self.call_id = call_id self.skill_name = skill_name - self.content = content + # any tool output can be a custom type that knows how to encode itself + # like a costmap, path, transform etc could be translatable into strings + + self.content = maybe_encode(content) self.type = type def __repr__(self): From 00d21be557fd52806437490d73913921536bfe80 Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 30 Aug 2025 23:50:47 +0300 Subject: [PATCH 43/48] test fix --- dimos/protocol/skill/test_coordinator.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index a4e4d813df..f6860c3747 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -30,6 +30,7 @@ class SkillContainerTest(Module): @skill() def add(self, x: int, y: int) -> int: """adds x and y.""" + time.sleep(2) return x + y @skill() @@ -91,7 +92,7 @@ async def test_coordinator_parallel_calls(): skillCoordinator.register_skills(SkillContainerTest()) skillCoordinator.start() - skillCoordinator.call_skill("test-call-0", "delayadd", {"args": [1, 2]}) + skillCoordinator.call_skill("test-call-0", "add", {"args": [0, 2]}) time.sleep(0.1) @@ -101,8 +102,9 @@ async def test_coordinator_parallel_calls(): skillstates = skillCoordinator.generate_snapshot() - tool_msg = skillstates[f"test-call-{cnt}"].agent_encode() - tool_msg.content == cnt + 1 + skill_id = f"test-call-{cnt}" + tool_msg = skillstates[skill_id].agent_encode() + assert tool_msg.content == cnt + 2 cnt += 1 if cnt < 5: @@ -119,6 +121,8 @@ async def test_coordinator_parallel_calls(): time.sleep(0.1 * cnt) + skillCoordinator.stop() + @pytest.mark.asyncio async def test_coordinator_generator(): @@ -140,3 +144,4 @@ async def test_coordinator_generator(): print("coordinator loop finished") print(skillCoordinator) + skillCoordinator.stop() From 4016e64da86b2680331d18ad676265ca0c165ab9 Mon Sep 17 00:00:00 2001 From: leshy <681516+leshy@users.noreply.github.com> Date: Sat, 30 Aug 2025 20:53:53 +0000 Subject: [PATCH 44/48] CI code cleanup --- dimos/msgs/sensor_msgs/Image.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dimos/msgs/sensor_msgs/Image.py b/dimos/msgs/sensor_msgs/Image.py index 1eed56af3f..ba66661eab 100644 --- a/dimos/msgs/sensor_msgs/Image.py +++ b/dimos/msgs/sensor_msgs/Image.py @@ -515,7 +515,6 @@ def __len__(self) -> int: """Return total number of pixels.""" return self.height * self.width - def agent_encode(self) -> str: """Encode image to base64 JPEG format for agent processing. @@ -554,4 +553,3 @@ def find_best(*argv): return rx.interval(1.0 / target_frequency).pipe( ops.observe_on(thread_scheduler), ops.map(find_best) ) - From 86d05dc5a0abf47b24de4aa46407c4e183e88daf Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 30 Aug 2025 23:56:05 +0300 Subject: [PATCH 45/48] skill threading bugfix --- dimos/protocol/skill/skill.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 151029447a..8fa774e3b0 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -14,6 +14,7 @@ import asyncio import threading +from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from typing import Any, Callable, Optional @@ -114,12 +115,14 @@ class SkillContainerConfig: skill_transport: type[SkillCommsSpec] = LCMSkillComms +_skill_thread_pool = ThreadPoolExecutor(max_workers=50, thread_name_prefix="skill_worker") + + def threaded(f: Callable[..., Any]) -> Callable[..., None]: - """Decorator to run a function in a separate thread.""" + """Decorator to run a function in a thread pool.""" def wrapper(self, *args, **kwargs): - thread = threading.Thread(target=f, args=(self, *args), kwargs=kwargs) - thread.start() + _skill_thread_pool.submit(f, self, *args, **kwargs) return None return wrapper From a84b89c8a49f35f0e86ecd4202ded796528e4cd5 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 31 Aug 2025 09:45:18 +0300 Subject: [PATCH 46/48] core test fix --- dimos/core/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/core/test_core.py b/dimos/core/test_core.py index a67d164b00..32433987d7 100644 --- a/dimos/core/test_core.py +++ b/dimos/core/test_core.py @@ -90,7 +90,7 @@ def test_classmethods(): # Check that we have the expected RPC methods assert "navigate_to" in class_rpcs, "navigate_to should be in rpcs" assert "start" in class_rpcs, "start should be in rpcs" - assert len(class_rpcs) == 3 + assert len(class_rpcs) == 5 # Check that the values are callable assert callable(class_rpcs["navigate_to"]), "navigate_to should be callable" From 812949cf51f38f5f77fc998d6744f4322640e3de Mon Sep 17 00:00:00 2001 From: dimensional5 Date: Tue, 2 Sep 2025 14:25:50 -0700 Subject: [PATCH 47/48] Added temp development tests and files --- dimos/agents2/agent_refactor.md | 391 ++++++++++++++++++ dimos/agents2/temp/run_unitree_agents2.py | 267 ++++++++++++ dimos/agents2/temp/run_unitree_async.py | 180 ++++++++ dimos/agents2/temp/test_agent_query.py | 213 ++++++++++ dimos/agents2/temp/test_event_loop.py | 121 ++++++ dimos/agents2/temp/test_simple_query.py | 72 ++++ dimos/agents2/temp/test_tornado_fix.py | 76 ++++ dimos/agents2/temp/test_unitree_skills.py | 168 ++++++++ .../unitree_webrtc/unitree_skill_container.py | 169 ++++++++ 9 files changed, 1657 insertions(+) create mode 100644 dimos/agents2/agent_refactor.md create mode 100644 dimos/agents2/temp/run_unitree_agents2.py create mode 100644 dimos/agents2/temp/run_unitree_async.py create mode 100644 dimos/agents2/temp/test_agent_query.py create mode 100644 dimos/agents2/temp/test_event_loop.py create mode 100644 dimos/agents2/temp/test_simple_query.py create mode 100644 dimos/agents2/temp/test_tornado_fix.py create mode 100644 dimos/agents2/temp/test_unitree_skills.py create mode 100644 dimos/robot/unitree_webrtc/unitree_skill_container.py diff --git a/dimos/agents2/agent_refactor.md b/dimos/agents2/agent_refactor.md new file mode 100644 index 0000000000..9ed3deb568 --- /dev/null +++ b/dimos/agents2/agent_refactor.md @@ -0,0 +1,391 @@ +# DimOS Agents2: LangChain-Based Agent Refactor + +## Overview + +The `agents2` module represents a complete refactor of the DimOS agent system, migrating from a custom implementation to a LangChain-based architecture. This refactor provides better integration with modern LLM frameworks, standardized tool calling, and improved message handling. + +## Architecture + +### Core Components + +#### 1. **AgentSpec** (`spec.py`) +- Abstract base class defining the agent interface +- Inherits from `Service[AgentConfig]` and `Module` +- Provides transport layer for publishing agent messages via LCM +- Defines abstract methods that all agents must implement: + - `start()`, `stop()`, `clear_history()` + - `append_history()`, `history()` + - `query()` - main interaction method +- Rich console output for debugging agent conversations + +#### 2. **Agent** (`agent.py`) +- Concrete implementation of `AgentSpec` +- Integrates with `SkillCoordinator` for tool/skill management +- Uses LangChain's `init_chat_model` for LLM interaction +- Key features: + - Dynamic tool binding per conversation turn + - Asynchronous agent loop with skill state management + - Support for implicit skill execution + - Message snapshot system for long-running skills + +#### 3. **Message Types** +- Leverages LangChain's message types: + - `SystemMessage` - system prompts + - `HumanMessage` - user inputs + - `AIMessage` - agent responses + - `ToolMessage` - tool execution results + - `ToolCall` - tool invocation requests + +#### 4. **Configuration** +- `AgentConfig` dataclass with: + - Model selection (extensive enum of supported models) + - Provider selection (dynamically generated from LangChain) + - System prompt configuration + - Transport configuration (LCM by default) + - Skills/tools configuration + +### Key Differences from Old Agent System + +| Aspect | Old System (`dimos/agents`) | New System (`dimos/agents2`) | +|--------|------------------------------|-------------------------------| +| **Framework** | Custom implementation | LangChain-based | +| **Message Handling** | Custom `AgentMessage` class | LangChain message types | +| **Tool Integration** | Custom `AbstractSkill` | LangChain tools + SkillCoordinator | +| **Model Support** | Manual provider implementations | LangChain's unified interface | +| **Streaming** | Custom stream handling | Integrated with SkillCoordinator | +| **Memory** | Custom `AbstractAgentSemanticMemory` | Not yet implemented (TODO) | +| **Configuration** | Multiple parameters | Unified `AgentConfig` dataclass | + +## Migration Guide + +### For Agent Users + +**Old way:** +```python +from dimos.agents.modules.base_agent import BaseAgentModule + +agent = BaseAgentModule( + model="openai::gpt-4o-mini", + system_prompt="You are a helpful assistant", + skills=skill_library, + temperature=0.0 +) +``` + +**New way:** +```python +from dimos.agents2 import Agent, AgentSpec +from dimos.agents2.spec import Model, Provider + +agent = Agent( + system_prompt="You are a helpful assistant", + model=Model.GPT_4O_MINI, + provider=Provider.OPENAI +) +agent.register_skills(skill_container) +``` + +### For Skill Developers + +**Old way:** +```python +from dimos.skills.skills import AbstractSkill + +class MySkill(AbstractSkill): + def execute(self, *args, **kwargs): + return result +``` + +**New way:** +```python +from dimos.protocol.skill.skill import SkillContainer, skill + +class MySkillContainer(SkillContainer): + @skill() + def my_skill(self, arg1: int, arg2: str) -> str: + """Skill description for LLM.""" + return result +``` + +## Current Issues & TODOs + +### Immediate Issues + +1. **Python Version Compatibility** + - ✅ Fixed: `type` alias syntax incompatible with Python 3.10 + - Solution: Use simple assignment `AnyMessage = Union[...]` instead of `type AnyMessage = ...` + +### TODO Items + +1. **Memory/RAG Integration** + - Old system had `AbstractAgentSemanticMemory` for semantic search + - New system needs LangChain memory integration + - Consider using LangChain's memory abstractions + +2. **Streaming Improvements** + - Better handling of streaming responses + - Integration with LangChain's streaming capabilities + +3. **Testing** + - Expand test coverage beyond basic `test_agent.py` + - Add integration tests with real LLM providers + - Test skill coordination edge cases + +4. **Documentation** + - Add docstrings to all public methods + - Create usage examples + - Document skill development patterns + +5. **Performance** + - Profile agent loop performance + - Optimize message history management + - Consider caching strategies for tools + +6. **Error Handling** + - Improve error recovery in agent loop + - Better error messages for skill failures + - Timeout handling for long-running skills + +## Testing Strategy + +### Unit Tests +- Test message handling and transformation +- Test skill registration and tool generation +- Test configuration parsing + +### Integration Tests +- Test with mock LLM providers +- Test skill execution flow +- Test error scenarios + +### System Tests +- End-to-end conversation flow +- Multi-turn interactions with tools +- Long-running skill management + +## Code Quality Notes + +### Strengths +- Clean separation of concerns (spec vs implementation) +- Good use of type hints and dataclasses +- Leverages established LangChain patterns +- Modular skill system + +### Areas for Improvement +- Add comprehensive error handling +- Implement proper logging throughout +- Add metrics/observability +- Consider adding middleware support + +## Performance Considerations + +1. **Message History**: Currently keeps full history in memory + - Consider implementing sliding window + - Add history persistence option + +2. **Tool Binding**: Re-binds tools on each turn + - Could cache if tool set is stable + - Profile impact on latency + +3. **Async Handling**: Good use of async/await + - Consider adding connection pooling for LLM calls + - Implement proper backpressure handling + +## Security Considerations + +1. **Input Validation**: Need to validate tool arguments +2. **Prompt Injection**: Consider adding guards +3. **Rate Limiting**: Add support for rate limiting LLM calls +4. **Secrets Management**: Ensure API keys are handled securely + +## Compatibility Matrix + +| Python Version | Status | Notes | +|----------------|--------|-------| +| 3.10 | ✅ Supported | Use `AnyMessage = Union[...]` syntax | +| 3.11 | ✅ Supported | Same as 3.10 | +| 3.12+ | ✅ Supported | Could use `type` keyword but not required | + +## Dependencies + +### Required +- `langchain-core`: Core LangChain functionality +- `langchain`: Chat model initialization +- `rich`: Console output formatting +- `dimos.protocol.skill`: Skill coordination system +- `dimos.core`: DimOS module system + +### Optional (Provider-Specific) +- `langchain-openai`: For OpenAI models +- `langchain-anthropic`: For Claude models +- `langchain-google-genai`: For Gemini models +- etc. + +## Next Steps + +1. **Immediate** + - ✅ Fix Python 3.10 compatibility + - Add proper error handling to agent loop + - Implement basic memory support + +2. **Short-term** + - Expand test coverage + - Add more comprehensive examples + - Document migration path for existing agents + +3. **Long-term** + - Full feature parity with old agent system + - Performance optimizations + - Advanced features (multi-agent coordination, etc.) + +## Implementation Progress + +### Completed Tasks + +#### 1. UnitreeSkillContainer Creation (✅ Complete) +- **File**: `dimos/robot/unitree_webrtc/unitree_skill_container.py` +- **Status**: Successfully converted all Unitree skills to new framework +- **Changes Made**: + - Converted from `AbstractSkill`/`AbstractRobotSkill` to `SkillContainer` with `@skill` decorators + - Migrated all movement skills (move, wait) + - Migrated navigation skills (navigate_with_text, get_pose, navigate_to_goal, explore) + - Migrated speech skill (speak with OpenAI TTS) + - Migrated all Unitree control skills (damp, stand_up, sit, dance, flip, etc.) + - Added proper type hints and docstrings for LangChain compatibility + - Implemented helper methods for WebRTC communication + +#### Key Skill Migration Patterns Applied: +1. **Simple Skills**: Direct conversion with `@skill()` decorator + ```python + # Old: class Wait(AbstractSkill) + # New: + @skill() + def wait(self, seconds: float) -> str: + ``` + +2. **Robot Skills**: Maintain robot reference in container init + ```python + def __init__(self, robot: Optional['UnitreeGo2'] = None): + self._robot = robot + ``` + +3. **Streaming Skills**: Use Stream and Reducer parameters + ```python + @skill(stream=Stream.passive, reducer=Reducer.latest) + def explore(...) -> Generator[dict, None, None]: + ``` + +4. **Image Output Skills**: Use Output parameter + ```python + @skill(output=Output.image) + def take_photo(self) -> Image: + ``` + +### Testing Complete (✅) +- Test file created: `dimos/agents2/temp/test_unitree_skills.py` +- Run file created: `dimos/agents2/temp/run_unitree_agents2.py` +- **43 skills successfully registered** (41 dynamic + 2 explicit) +- Skills have proper LangChain-compatible schemas + +### Dynamic Skill Generation Implementation (✅) +- **File**: `dimos/robot/unitree_webrtc/unitree_skill_container.py` +- **Method**: Dynamically generates skills from `UNITREE_WEBRTC_CONTROLS` list +- **Pattern**: + ```python + def _create_dynamic_skill(self, skill_name, api_id, description, original_name): + def dynamic_skill_func(self) -> str: + return self._execute_sport_command(api_id, original_name) + decorated_skill = skill()(dynamic_skill_func) + setattr(self, skill_name, decorated_skill.__get__(self, self.__class__)) + ``` + +### Skills Successfully Migrated: +**Explicit Skills (2)**: +- `move` - Direct velocity control with duration +- `wait` - Time delay + +**Dynamic Skills (41)** - Generated from UNITREE_WEBRTC_CONTROLS: +- **Basic Movement**: damp, balance_stand, stand_up, stand_down, recovery_stand, sit, rise_sit +- **Gaits**: switch_gait, continuous_gait, economic_gait +- **Actions**: hello, stretch, wallow, scrape, pose +- **Dance**: dance1, dance2, wiggle_hips, moon_walk +- **Advanced**: front_flip, back_flip, left_flip, right_flip, front_jump, front_pounce, handstand, bound +- **Settings**: body_height, foot_raise_height, speed_level, trigger +- **And more...** + +### Ready for Integration Testing +The system is now ready to test with: +- Real robot hardware (UnitreeGo2) +- Live LLM API calls (OpenAI GPT-4 or similar) +- Web interface integration + +## Event Loop Fix (✅ Resolved) + +### Final Solution: +The Tornado `AsyncIOMainLoop` used by Dask wraps an asyncio loop. We access the underlying loop via `asyncio_loop` attribute: + +```python +# In query_async() and query() +if loop_type == "AsyncIOMainLoop": + actual_loop = self._loop.asyncio_loop # Get the wrapped asyncio loop + return asyncio.ensure_future(self.agent_loop(query), loop=actual_loop) +``` + +### Fixed Issues: +1. **AsyncIOMainLoop.create_task Error**: Fixed by using the wrapped asyncio loop +2. **AsyncIOMainLoop.is_running Error**: Fixed by checking loop type before calling +3. **Event Loop Management**: + - Tornado AsyncIOMainLoop: Use wrapped `asyncio_loop` attribute + - Standard asyncio loop: Use directly, start in thread if needed + +### Known Limitation: +**Dynamic Skills & Dask**: Dynamically generated skills have pickle issues when sent over network. +**Workaround**: Create the container locally on the same worker as the agent. + +## Event Loop Implementation Details + +### The Issue: +- Module class creates `self._loop` but doesn't run it +- `agent.query()` uses `asyncio.run_coroutine_threadsafe()` which requires a running loop +- This caused queries to hang or fail + +### The Solution: +- Added event loop startup in `Agent.start()` method +- Automatically starts loop in background thread if not running +- Now `agent.query()` works immediately after `agent.start()` + +### Clean Usage: +```python +agent = Agent(...) +agent.register_skills(container) +agent.start() # This ensures event loop is running +result = agent.query("Hello!") # Works without any thread management +``` + +### Two Clean Approaches: +1. **Sync API** (run_unitree_agents2.py): Use `agent.start()` then `agent.query()` +2. **Async API** (run_unitree_async.py): Use `async`/`await` throughout + +## Current Status (August 2025) + +### Working Features: +- ✅ LangChain-based agent with tool binding +- ✅ SkillCoordinator integration +- ✅ UnitreeSkillContainer with 43 skills (41 dynamic + 2 explicit) +- ✅ Event loop compatibility (Tornado AsyncIOMainLoop & standard asyncio) +- ✅ Both sync and async query methods +- ✅ Skill streaming and implicit skills +- ✅ Message snapshot system + +### Test Files (in agents2/temp/): +- `test_unitree_skills.py` - Tests skill registration +- `run_unitree_agents2.py` - Sync approach for running agent +- `run_unitree_async.py` - Async approach +- `test_simple_query.py` - Basic query testing +- `test_event_loop.py` - Event loop testing +- `test_agent_query.py` - Agent query testing +- `test_tornado_fix.py` - Tornado compatibility testing + +## Conclusion + +The agents2 refactor successfully modernizes the DimOS agent system by adopting LangChain, providing better standardization and ecosystem compatibility. The Unitree robot skills have been fully migrated with dynamic generation, and event loop issues have been resolved. The foundation is solid and ready for integration testing with actual hardware. \ No newline at end of file diff --git a/dimos/agents2/temp/run_unitree_agents2.py b/dimos/agents2/temp/run_unitree_agents2.py new file mode 100644 index 0000000000..b3a6e29b9d --- /dev/null +++ b/dimos/agents2/temp/run_unitree_agents2.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Run script for Unitree Go2 robot with agents2 framework. +This is the migrated version using the new LangChain-based agent system. +""" + +import os +import sys +import time +from pathlib import Path +from dotenv import load_dotenv +from typing import Optional + +# Add parent directories to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + +from dimos.robot.unitree_webrtc.unitree_go2 import UnitreeGo2 +from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer +from dimos.agents2 import Agent +from dimos.agents2.spec import AgentConfig, Model, Provider, SystemMessage +from dimos.utils.logging_config import setup_logger + +# For web interface (simplified for now) +from dimos.web.robot_web_interface import RobotWebInterface +import reactivex as rx +import reactivex.operators as ops + +logger = setup_logger("dimos.agents2.run_unitree") + +# Load environment variables +load_dotenv() + +# System prompt path +SYSTEM_PROMPT_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "assets/agent/prompt.txt", +) + + +class UnitreeAgentRunner: + """Manages the Unitree robot with the new agents2 framework.""" + + def __init__(self): + self.robot = None + self.agent = None + self.web_interface = None + self.agent_thread = None + self.running = False + + def setup_robot(self) -> UnitreeGo2: + """Initialize the robot connection.""" + logger.info("Initializing Unitree Go2 robot...") + + robot = UnitreeGo2( + ip=os.getenv("ROBOT_IP"), + connection_type=os.getenv("CONNECTION_TYPE", "webrtc"), + ) + + robot.start() + time.sleep(3) + + logger.info("Robot initialized successfully") + return robot + + def setup_agent(self, robot: UnitreeGo2, system_prompt: str) -> Agent: + """Create and configure the agent with skills.""" + logger.info("Setting up agent with skills...") + + # Create skill container with robot reference + skill_container = UnitreeSkillContainer(robot=robot) + + # Create agent + # Note: For Claude/Anthropic support, we'd need to extend the Agent class + # For now, using OpenAI as a placeholder + agent = Agent( + system_prompt=system_prompt, + model=Model.GPT_4O, # Could add CLAUDE models to enum + provider=Provider.OPENAI, # Would need ANTHROPIC provider + ) + + # Register skills + agent.register_skills(skill_container) + + # Start agent + agent.start() + # Log available skills + tools = agent.get_tools() + logger.info(f"Agent configured with {len(tools)} skills:") + for tool in tools: # Show first 5 + logger.info(f" - {tool.name}") + + return agent + + def setup_web_interface(self) -> RobotWebInterface: + """Setup web interface for text input.""" + logger.info("Setting up web interface...") + + # Create stream subjects for web interface + agent_response_subject = rx.subject.Subject() + agent_response_stream = agent_response_subject.pipe(ops.share()) + + text_streams = { + "agent_responses": agent_response_stream, + } + + web_interface = RobotWebInterface( + port=5555, + text_streams=text_streams, + audio_subject=rx.subject.Subject(), + ) + + # Store subject for later use + self.agent_response_subject = agent_response_subject + + logger.info("Web interface created on port 5555") + return web_interface + + def handle_queries(self): + """Handle incoming queries from web interface.""" + if not self.web_interface or not self.agent: + return + + # Subscribe to query stream from web interface + def process_query(query_text): + if not query_text or not self.running: + return + + logger.info(f"Received query: {query_text}") + + try: + # Process query with agent (blocking call) + response = self.agent.query(query_text) + + # Send response back through web interface + if response and self.agent_response_subject: + self.agent_response_subject.on_next(response) + logger.info( + f"Agent response: {response[:100]}..." + if len(response) > 100 + else f"Agent response: {response}" + ) + + except Exception as e: + logger.error(f"Error processing query: {e}") + if self.agent_response_subject: + self.agent_response_subject.on_next(f"Error: {str(e)}") + + # Subscribe to web interface query stream + if hasattr(self.web_interface, "query_stream"): + self.web_interface.query_stream.subscribe(process_query) + logger.info("Subscribed to web interface queries") + + def run(self): + """Main run loop.""" + print("\n" + "=" * 60) + print("Unitree Go2 Robot with agents2 Framework") + print("=" * 60) + print("\nThis system integrates:") + print(" - Unitree Go2 quadruped robot") + print(" - WebRTC communication interface") + print(" - LangChain-based agent system (agents2)") + print(" - Converted skill system with @skill decorators") + print(" - Web interface for text input") + print("\nStarting system...\n") + + # Check for API key (would need ANTHROPIC_API_KEY for Claude) + if not os.getenv("OPENAI_API_KEY"): + print("WARNING: OPENAI_API_KEY not found in environment") + print("Please set your API key in .env file or environment") + print("(Note: Full Claude support would require ANTHROPIC_API_KEY)") + sys.exit(1) + + # Load system prompt + try: + with open(SYSTEM_PROMPT_PATH, "r") as f: + system_prompt = f.read() + except FileNotFoundError: + logger.warning(f"System prompt file not found at {SYSTEM_PROMPT_PATH}") + system_prompt = """You are a helpful robot assistant controlling a Unitree Go2 quadruped robot. +You can move, navigate, speak, and perform various actions. Be helpful and friendly.""" + + try: + # Setup components + self.robot = self.setup_robot() + self.agent = self.setup_agent(self.robot, system_prompt) + self.web_interface = self.setup_web_interface() + + # Start handling queries + self.running = True + self.handle_queries() + + logger.info("=" * 60) + logger.info("Unitree Go2 Agent Ready (agents2 framework)!") + logger.info(f"Web interface available at: http://localhost:5555") + logger.info("You can:") + logger.info(" - Type commands in the web interface") + logger.info(" - Ask the robot to move or navigate") + logger.info(" - Ask the robot to perform actions (sit, stand, dance, etc.)") + logger.info(" - Ask the robot to speak text") + logger.info("=" * 60) + + # Test query - agent.start() now handles the event loop + try: + logger.info("Testing agent query...") + result = self.agent.query("Hello, what can you do?") + logger.info(f"Agent query result: {result}") + except Exception as e: + logger.error(f"Error during test query: {e}") + # Continue anyway - the web interface will handle future queries + + # Run web interface (blocks) + self.web_interface.run() + + except KeyboardInterrupt: + logger.info("Keyboard interrupt received") + except Exception as e: + logger.error(f"Error running robot: {e}") + import traceback + + traceback.print_exc() + finally: + self.shutdown() + + def shutdown(self): + """Clean shutdown of all components.""" + logger.info("Shutting down...") + self.running = False + + if self.agent: + try: + self.agent.stop() + logger.info("Agent stopped") + except Exception as e: + logger.error(f"Error stopping agent: {e}") + + if self.robot: + try: + # WebRTC robot doesn't have a stop method + logger.info("Robot connection closed") + except Exception as e: + logger.error(f"Error stopping robot: {e}") + + logger.info("Shutdown complete") + + +def main(): + """Entry point for the application.""" + runner = UnitreeAgentRunner() + runner.run() + + +if __name__ == "__main__": + main() diff --git a/dimos/agents2/temp/run_unitree_async.py b/dimos/agents2/temp/run_unitree_async.py new file mode 100644 index 0000000000..cb870096da --- /dev/null +++ b/dimos/agents2/temp/run_unitree_async.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Async version of the Unitree run file for agents2. +Properly handles the async nature of the agent. +""" + +import asyncio +import os +import sys +from pathlib import Path +from dotenv import load_dotenv + +# Add parent directories to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + +from dimos.robot.unitree_webrtc.unitree_go2 import UnitreeGo2 +from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer +from dimos.agents2 import Agent +from dimos.agents2.spec import Model, Provider +from dimos.utils.logging_config import setup_logger + +logger = setup_logger("run_unitree_async") + +# Load environment variables +load_dotenv() + +# System prompt path +SYSTEM_PROMPT_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "assets/agent/prompt.txt", +) + + +async def handle_query(agent, query_text): + """Handle a single query asynchronously.""" + logger.info(f"Processing query: {query_text}") + + try: + # Use query_async which returns a Future + future = agent.query_async(query_text) + + # Wait for the result (with timeout) + await asyncio.wait_for(asyncio.wrap_future(future), timeout=30.0) + + # Get the result + if future.done(): + result = future.result() + logger.info(f"Agent response: {result}") + return result + else: + logger.warning("Query did not complete") + return "Query timeout" + + except asyncio.TimeoutError: + logger.error("Query timed out after 30 seconds") + return "Query timeout" + except Exception as e: + logger.error(f"Error processing query: {e}") + return f"Error: {str(e)}" + + +async def interactive_loop(agent): + """Run an interactive query loop.""" + print("\n" + "=" * 60) + print("Interactive Agent Mode") + print("Type your commands or 'quit' to exit") + print("=" * 60 + "\n") + + while True: + try: + # Get user input + query = input("\nYou: ").strip() + + if query.lower() in ["quit", "exit", "q"]: + break + + if not query: + continue + + # Process query + response = await handle_query(agent, query) + print(f"\nAgent: {response}") + + except KeyboardInterrupt: + break + except Exception as e: + logger.error(f"Error in interactive loop: {e}") + + +async def main(): + """Main async function.""" + print("\n" + "=" * 60) + print("Unitree Go2 Robot with agents2 Framework (Async)") + print("=" * 60) + + # Check for API key + if not os.getenv("OPENAI_API_KEY"): + print("ERROR: OPENAI_API_KEY not found") + print("Set your API key in .env file or environment") + sys.exit(1) + + # Load system prompt + try: + with open(SYSTEM_PROMPT_PATH, "r") as f: + system_prompt = f.read() + except FileNotFoundError: + system_prompt = """You are a helpful robot assistant controlling a Unitree Go2 robot. +You have access to various movement and control skills. Be helpful and concise.""" + + # Initialize robot (optional - comment out if no robot) + robot = None + if os.getenv("ROBOT_IP"): + try: + logger.info("Connecting to robot...") + robot = UnitreeGo2( + ip=os.getenv("ROBOT_IP"), + connection_type=os.getenv("CONNECTION_TYPE", "webrtc"), + ) + robot.start() + await asyncio.sleep(3) + logger.info("Robot connected") + except Exception as e: + logger.warning(f"Could not connect to robot: {e}") + logger.info("Continuing without robot...") + + # Create skill container + skill_container = UnitreeSkillContainer(robot=robot) + + # Create agent + agent = Agent( + system_prompt=system_prompt, + model=Model.GPT_4O_MINI, # Using mini for faster responses + provider=Provider.OPENAI, + ) + + # Register skills and start + agent.register_skills(skill_container) + agent.start() + + # Log available skills + skills = skill_container.skills() + logger.info(f"Agent initialized with {len(skills)} skills") + + # Test query + print("\n--- Testing agent query ---") + test_response = await handle_query(agent, "Hello! Can you list 5 of your movement skills?") + print(f"Test response: {test_response}\n") + + # Run interactive loop + try: + await interactive_loop(agent) + except KeyboardInterrupt: + logger.info("Interrupted by user") + + # Clean up + logger.info("Shutting down...") + agent.stop() + if robot: + logger.info("Robot disconnected") + + print("\nGoodbye!") + + +if __name__ == "__main__": + # Run the async main function + asyncio.run(main()) diff --git a/dimos/agents2/temp/test_agent_query.py b/dimos/agents2/temp/test_agent_query.py new file mode 100644 index 0000000000..226ce4da8e --- /dev/null +++ b/dimos/agents2/temp/test_agent_query.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Test script to debug agent query issues. +Shows different ways to call the agent and handle async. +""" + +import asyncio +import os +import sys +import time +from pathlib import Path +from dotenv import load_dotenv + +# Add parent directories to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + +from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer +from dimos.agents2 import Agent +from dimos.agents2.spec import Model, Provider +from dimos.utils.logging_config import setup_logger + +logger = setup_logger("test_agent_query") + +# Load environment variables +load_dotenv() + + +async def test_async_query(): + """Test agent query using async/await pattern.""" + print("\n=== Testing Async Query ===\n") + + # Create skill container + container = UnitreeSkillContainer(robot=None) + + # Create agent + agent = Agent( + system_prompt="You are a helpful robot assistant. List 3 skills you can do.", + model=Model.GPT_4O_MINI, + provider=Provider.OPENAI, + ) + + # Register skills and start + agent.register_skills(container) + agent.start() + + # Query asynchronously + logger.info("Sending async query...") + future = agent.query_async("Hello! What skills do you have?") + + # Wait for result + logger.info("Waiting for response...") + await asyncio.sleep(10) # Give it time to process + + # Check if future is done + if hasattr(future, "done") and future.done(): + try: + result = future.result() + logger.info(f"Got result: {result}") + except Exception as e: + logger.error(f"Future failed: {e}") + else: + logger.warning("Future not completed yet") + + # Clean up + agent.stop() + + return future + + +def test_sync_query_with_thread(): + """Test agent query using threading for the event loop.""" + print("\n=== Testing Sync Query with Thread ===\n") + + import threading + + # Create skill container + container = UnitreeSkillContainer(robot=None) + + # Create agent + agent = Agent( + system_prompt="You are a helpful robot assistant. List 3 skills you can do.", + model=Model.GPT_4O_MINI, + provider=Provider.OPENAI, + ) + + # Register skills and start + agent.register_skills(container) + agent.start() + + # The agent's event loop should be running in the Module's thread + # Let's check if it's running + if agent._loop and agent._loop.is_running(): + logger.info("Agent's event loop is running") + else: + logger.warning("Agent's event loop is NOT running - this is the problem!") + + # Try to run the loop in a thread + def run_loop(): + asyncio.set_event_loop(agent._loop) + agent._loop.run_forever() + + thread = threading.Thread(target=run_loop, daemon=True) + thread.start() + time.sleep(1) # Give loop time to start + logger.info("Started event loop in thread") + + # Now try the query + try: + logger.info("Sending sync query...") + result = agent.query("Hello! What skills do you have?") + logger.info(f"Got result: {result}") + except Exception as e: + logger.error(f"Query failed: {e}") + import traceback + + traceback.print_exc() + + # Clean up + agent.stop() + + +def test_with_real_module_system(): + """Test using the real DimOS module system (like in test_agent.py).""" + print("\n=== Testing with Module System ===\n") + + from dimos.core import start + + # Start the DimOS system + dimos = start(2) + + # Deploy container and agent as modules + container = dimos.deploy(UnitreeSkillContainer, robot=None) + agent = dimos.deploy( + Agent, + system_prompt="You are a helpful robot assistant. List 3 skills you can do.", + model=Model.GPT_4O_MINI, + provider=Provider.OPENAI, + ) + + # Register skills + agent.register_skills(container) + agent.start() + + # Query + try: + logger.info("Sending query through module system...") + future = agent.query_async("Hello! What skills do you have?") + + # In the module system, the loop should be running + time.sleep(5) # Wait for processing + + if hasattr(future, "result"): + result = future.result(timeout=10) + logger.info(f"Got result: {result}") + except Exception as e: + logger.error(f"Query failed: {e}") + + # Clean up + agent.stop() + dimos.stop() + + +def main(): + """Run tests based on available API key.""" + + if not os.getenv("OPENAI_API_KEY"): + print("ERROR: OPENAI_API_KEY not set") + print("Please set your OpenAI API key to test the agent") + sys.exit(1) + + print("=" * 60) + print("Agent Query Testing") + print("=" * 60) + + # Test 1: Async query + try: + asyncio.run(test_async_query()) + except Exception as e: + logger.error(f"Async test failed: {e}") + + # Test 2: Sync query with threading + try: + test_sync_query_with_thread() + except Exception as e: + logger.error(f"Sync test failed: {e}") + + # Test 3: Module system (optional - more complex) + # try: + # test_with_real_module_system() + # except Exception as e: + # logger.error(f"Module test failed: {e}") + + print("\n" + "=" * 60) + print("Testing complete") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/dimos/agents2/temp/test_event_loop.py b/dimos/agents2/temp/test_event_loop.py new file mode 100644 index 0000000000..0af5523a56 --- /dev/null +++ b/dimos/agents2/temp/test_event_loop.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Test that event loop handling works correctly in both Dask and non-Dask environments. +""" + +import os +import sys +from pathlib import Path + +# Add parent directories to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + +from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer +from dimos.agents2 import Agent +from dimos.agents2.spec import Model, Provider + + +def test_non_dask(): + """Test agent outside of Dask.""" + print("\n=== Testing Non-Dask Environment ===") + + # Mock API key to avoid that error + os.environ["OPENAI_API_KEY"] = "test-key-12345" + + try: + container = UnitreeSkillContainer(robot=None) + agent = Agent(system_prompt="Test agent", model=Model.GPT_4O_MINI, provider=Provider.OPENAI) + agent.register_skills(container) + + print("Starting agent (should start event loop in thread)...") + agent.start() + + # Check if loop is set + if agent._loop: + print(f"Event loop type: {type(agent._loop).__name__}") + if hasattr(agent._loop, "is_running"): + print(f"Event loop running: {agent._loop.is_running()}") + + print("✓ Non-Dask test passed") + agent.stop() + + except Exception as e: + print(f"✗ Non-Dask test failed: {e}") + finally: + # Clean up mock key + del os.environ["OPENAI_API_KEY"] + + +def test_with_dask(): + """Test agent inside Dask.""" + print("\n=== Testing Dask Environment ===") + + # Mock API key + os.environ["OPENAI_API_KEY"] = "test-key-12345" + + try: + from dimos.core import start + + print("Starting Dask cluster...") + dimos = start(2) + + # Create container directly (not a Module) + container = UnitreeSkillContainer(robot=None) + + print("Deploying agent as Module...") + agent = dimos.deploy( + Agent, system_prompt="Test agent", model=Model.GPT_4O_MINI, provider=Provider.OPENAI + ) + + print("Registering skills and starting agent...") + agent.register_skills(container) + agent.start() + + print("✓ Dask test passed - no AsyncIOMainLoop error!") + + agent.stop() + dimos.stop() + + except Exception as e: + print(f"✗ Dask test failed: {e}") + import traceback + + traceback.print_exc() + finally: + # Clean up + if "OPENAI_API_KEY" in os.environ: + del os.environ["OPENAI_API_KEY"] + + +def main(): + print("=" * 60) + print("Event Loop Handling Test") + print("=" * 60) + + # Test 1: Outside Dask + test_non_dask() + + # Test 2: Inside Dask + test_with_dask() + + print("\n" + "=" * 60) + print("All tests complete!") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/dimos/agents2/temp/test_simple_query.py b/dimos/agents2/temp/test_simple_query.py new file mode 100644 index 0000000000..4e0918b280 --- /dev/null +++ b/dimos/agents2/temp/test_simple_query.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Simple test to verify the agent query works with minimal setup. +""" + +import os +import sys +from pathlib import Path +from dotenv import load_dotenv + +# Add parent directories to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + +from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer +from dimos.agents2 import Agent +from dimos.agents2.spec import Model, Provider + +# Load environment variables +load_dotenv() + + +def main(): + """Simple sync test.""" + + if not os.getenv("OPENAI_API_KEY"): + print("ERROR: OPENAI_API_KEY not set") + return + + print("Creating agent...") + + # Create container and agent + container = UnitreeSkillContainer(robot=None) + agent = Agent( + system_prompt="You are a helpful robot. Answer concisely.", + model=Model.GPT_4O_MINI, + provider=Provider.OPENAI, + ) + + # Register and start + agent.register_skills(container) + agent.start() # This now ensures the event loop is running + + print("Agent started. Testing query...") + + # Simple sync query - should just work now + try: + result = agent.query("What are 3 skills you can do?") + print(f"\nAgent response:\n{result}") + except Exception as e: + print(f"Query failed: {e}") + + # Clean up + agent.stop() + print("\nDone!") + + +if __name__ == "__main__": + main() diff --git a/dimos/agents2/temp/test_tornado_fix.py b/dimos/agents2/temp/test_tornado_fix.py new file mode 100644 index 0000000000..69308957f9 --- /dev/null +++ b/dimos/agents2/temp/test_tornado_fix.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Test that the Tornado AsyncIOMainLoop fix works. +""" + +import os +import sys +from pathlib import Path + +# Add parent directories to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + +os.environ["OPENAI_API_KEY"] = "test-key" + +from dimos.core import start +from dimos.agents2 import Agent +from dimos.agents2.spec import Model, Provider +from dimos.protocol.skill.test_coordinator import TestContainer + + +def test_dask_deployment(): + print("Testing Dask deployment with Tornado AsyncIOMainLoop...") + + # Start Dask cluster + dimos = start(2) + + try: + # Create TestContainer locally + testcontainer = TestContainer() + + # Deploy agent + print("Deploying agent...") + agent = dimos.deploy( + Agent, system_prompt="Test agent", model=Model.GPT_4O_MINI, provider=Provider.OPENAI + ) + + print("Registering skills...") + agent.register_skills(testcontainer) + + print("Starting agent...") + agent.start() + + print("Testing query_async...") + future = agent.query_async("What is 2+2?") + print(f"Query started, future type: {type(future)}") + + # Note: Can't easily wait for result in this test without proper async context + # But if no error occurs, the fix is working + + print("✓ Test passed - no AsyncIOMainLoop errors!") + + except Exception as e: + print(f"✗ Test failed: {e}") + import traceback + + traceback.print_exc() + finally: + dimos.stop() + + +if __name__ == "__main__": + test_dask_deployment() diff --git a/dimos/agents2/temp/test_unitree_skills.py b/dimos/agents2/temp/test_unitree_skills.py new file mode 100644 index 0000000000..d0ad7bc355 --- /dev/null +++ b/dimos/agents2/temp/test_unitree_skills.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Test file for UnitreeSkillContainer with agents2 framework. +Tests skill registration and basic functionality. +""" + +import asyncio +import sys +import os +from pathlib import Path + +# Add parent directories to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + +from dimos.agents2 import Agent +from dimos.agents2.spec import AgentConfig, Model, Provider +from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer +from dimos.utils.logging_config import setup_logger + +logger = setup_logger("test_unitree_skills") + + +def test_skill_container_creation(): + """Test that the skill container can be created and skills are registered.""" + print("\n=== Testing UnitreeSkillContainer Creation ===") + + # Create container without robot (for testing) + container = UnitreeSkillContainer(robot=None) + + # Get available skills from the container + skills = container.skills() + + print(f"Number of skills registered: {len(skills)}") + print("\nAvailable skills:") + for name, skill_config in list(skills.items())[:10]: # Show first 10 + print( + f" - {name}: {skill_config.description if hasattr(skill_config, 'description') else 'No description'}" + ) + if len(skills) > 10: + print(f" ... and {len(skills) - 10} more skills") + + return container, skills + + +def test_agent_with_skills(): + """Test that an agent can be created with the skill container.""" + print("\n=== Testing Agent with Skills ===") + + # Create skill container + container = UnitreeSkillContainer(robot=None) + + # Create agent with configuration passed directly + agent = Agent( + system_prompt="You are a helpful robot assistant that can control a Unitree Go2 robot.", + model=Model.GPT_4O_MINI, + provider=Provider.OPENAI, + ) + + # Register skills + agent.register_skills(container) + + print("Agent created and skills registered successfully!") + + # Get tools to verify + tools = agent.get_tools() + print(f"Agent has access to {len(tools)} tools") + + return agent + + +async def test_simple_query(): + """Test a simple query to the agent.""" + print("\n=== Testing Simple Query ===") + + # Create container and agent + container = UnitreeSkillContainer(robot=None) + agent = Agent( + system_prompt="You are a test robot. When asked to wait, use the wait skill.", + model=Model.GPT_4O_MINI, + provider=Provider.OPENAI, + ) + agent.register_skills(container) + + # Start the agent + agent.start() + + # Test query (this would normally interact with the LLM) + print("Testing agent query system...") + # Note: Actual query would require API keys and LLM interaction + # For now, just verify the system is set up correctly + + print("Query system ready (would require API keys for actual test)") + + # Clean up + agent.stop() + + +def test_skill_schemas(): + """Test that skill schemas are properly generated for LangChain.""" + print("\n=== Testing Skill Schemas ===") + + container = UnitreeSkillContainer(robot=None) + skills = container.skills() + + # Check a few key skills (using snake_case names now) + skill_names = ["move", "wait", "stand_up", "sit", "front_flip", "dance1"] + + for name in skill_names: + if name in skills: + skill_config = skills[name] + print(f"\n{name} skill:") + print(f" Config: {skill_config}") + if hasattr(skill_config, "schema"): + print( + f" Schema keys: {skill_config.schema.keys() if skill_config.schema else 'None'}" + ) + else: + print(f"\nWARNING: Skill '{name}' not found!") + + +def main(): + """Run all tests.""" + print("=" * 60) + print("Testing UnitreeSkillContainer with agents2 Framework") + print("=" * 60) + + try: + # Test 1: Container creation + container, skills = test_skill_container_creation() + + # Test 2: Agent with skills + agent = test_agent_with_skills() + + # Test 3: Skill schemas + test_skill_schemas() + + # Test 4: Simple query (async) + # asyncio.run(test_simple_query()) + print("\n=== Async query test skipped (would require running agent) ===") + + print("\n" + "=" * 60) + print("All tests completed successfully!") + print("=" * 60) + + except Exception as e: + print(f"\nERROR during testing: {e}") + import traceback + + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/dimos/robot/unitree_webrtc/unitree_skill_container.py b/dimos/robot/unitree_webrtc/unitree_skill_container.py new file mode 100644 index 0000000000..aae2547d57 --- /dev/null +++ b/dimos/robot/unitree_webrtc/unitree_skill_container.py @@ -0,0 +1,169 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Unitree skill container for the new agents2 framework. +Dynamically generates skills from UNITREE_WEBRTC_CONTROLS list. +""" + +from __future__ import annotations +import time +from typing import Optional, TYPE_CHECKING + +from dimos.protocol.skill.skill import SkillContainer, skill +from dimos.msgs.geometry_msgs import Vector3 +from dimos.utils.logging_config import setup_logger +from dimos.protocol.skill.type import Output, Reducer, Stream +import datetime + +if TYPE_CHECKING: + from dimos.robot.unitree_webrtc.unitree_go2 import UnitreeGo2 + +logger = setup_logger("dimos.robot.unitree_webrtc.unitree_skill_container") + +# Import constants from unitree_skills +from dimos.robot.unitree_webrtc.unitree_skills import UNITREE_WEBRTC_CONTROLS +from go2_webrtc_driver.constants import RTC_TOPIC + + +class UnitreeSkillContainer(SkillContainer): + """Container for Unitree Go2 robot skills using the new framework.""" + + def __init__(self, robot: Optional["UnitreeGo2"] = None): + """Initialize the skill container with robot reference. + + Args: + robot: The UnitreeGo2 robot instance + """ + super().__init__() + self._robot = robot + + # Dynamically generate skills from UNITREE_WEBRTC_CONTROLS + self._generate_unitree_skills() + + def _generate_unitree_skills(self): + """Dynamically generate skills from the UNITREE_WEBRTC_CONTROLS list.""" + logger.info(f"Generating {len(UNITREE_WEBRTC_CONTROLS)} dynamic Unitree skills") + + for name, api_id, description in UNITREE_WEBRTC_CONTROLS: + if name not in ["Reverse", "Spin"]: # Exclude reverse and spin as in original + # Convert CamelCase to snake_case for method name + skill_name = self._convert_to_snake_case(name) + self._create_dynamic_skill(skill_name, api_id, description, name) + + def _convert_to_snake_case(self, name: str) -> str: + """Convert CamelCase to snake_case. + + Examples: + StandUp -> stand_up + RecoveryStand -> recovery_stand + FrontFlip -> front_flip + """ + result = [] + for i, char in enumerate(name): + if i > 0 and char.isupper(): + result.append("_") + result.append(char.lower()) + return "".join(result) + + def _create_dynamic_skill( + self, skill_name: str, api_id: int, description: str, original_name: str + ): + """Create a dynamic skill method with the @skill decorator. + + Args: + skill_name: Snake_case name for the method + api_id: The API command ID + description: Human-readable description + original_name: Original CamelCase name for display + """ + + # Define the skill function + def dynamic_skill_func(self) -> str: + """Dynamic skill function.""" + return self._execute_sport_command(api_id, original_name) + + # Set the function's metadata + dynamic_skill_func.__name__ = skill_name + dynamic_skill_func.__doc__ = description + + # Apply the @skill decorator + decorated_skill = skill()(dynamic_skill_func) + + # Bind the method to the instance + bound_method = decorated_skill.__get__(self, self.__class__) + + # Add it as an attribute + setattr(self, skill_name, bound_method) + + logger.debug(f"Generated skill: {skill_name} (API ID: {api_id})") + + # ========== Explicit Skills ========== + + @skill() + def move(self, x: float, y: float = 0.0, yaw: float = 0.0, duration: float = 0.0) -> str: + """Move the robot using direct velocity commands. Determine duration required based on user distance instructions. + + Args: + x: Forward velocity (m/s) + y: Left/right velocity (m/s) + yaw: Rotational velocity (rad/s) + duration: How long to move (seconds) + """ + if self._robot is None: + return "Error: Robot not connected" + + self._robot.move(Vector3(x, y, yaw), duration=duration) + return f"Started moving with velocity=({x}, {y}, {yaw}) for {duration} seconds" + + @skill() + def wait(self, seconds: float) -> str: + """Wait for a specified amount of time. + + Args: + seconds: Seconds to wait + """ + time.sleep(seconds) + return f"Wait completed with length={seconds}s" + + @skill(stream=Stream.passive, reducer=Reducer.latest) + def current_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: + """Provides current time.""" + while True: + yield datetime.datetime.now() + time.sleep(1 / frequency) + + # ========== Helper Methods ========== + + def _execute_sport_command(self, api_id: int, name: str) -> str: + """Execute a sport command through WebRTC interface. + + Args: + api_id: The API command ID + name: Human-readable name of the command + """ + if self._robot is None: + return f"Error: Robot not connected (cannot execute {name})" + + try: + result = self._robot.connection.publish_request( + RTC_TOPIC["SPORT_MOD"], {"api_id": api_id} + ) + message = f"{name} command executed successfully (id={api_id})" + logger.info(message) + return message + except Exception as e: + error_msg = f"Failed to execute {name}: {e}" + logger.error(error_msg) + return error_msg From f2c82a8c5cd2308230e5ee32b1a0661b04c7e8a8 Mon Sep 17 00:00:00 2001 From: stash Date: Tue, 2 Sep 2025 17:44:01 -0700 Subject: [PATCH 48/48] Rewrote LM generated tests, deleted reduntant tests --- dimos/agents2/temp/run_unitree_agents2.py | 18 +-- dimos/agents2/temp/test_event_loop.py | 121 ------------------ dimos/agents2/temp/test_simple_query.py | 72 ----------- dimos/agents2/temp/test_tornado_fix.py | 76 ----------- ...t_query.py => test_unitree_agent_query.py} | 78 +++++------ ...lls.py => test_unitree_skill_container.py} | 0 6 files changed, 48 insertions(+), 317 deletions(-) delete mode 100644 dimos/agents2/temp/test_event_loop.py delete mode 100644 dimos/agents2/temp/test_simple_query.py delete mode 100644 dimos/agents2/temp/test_tornado_fix.py rename dimos/agents2/temp/{test_agent_query.py => test_unitree_agent_query.py} (80%) rename dimos/agents2/temp/{test_unitree_skills.py => test_unitree_skill_container.py} (100%) diff --git a/dimos/agents2/temp/run_unitree_agents2.py b/dimos/agents2/temp/run_unitree_agents2.py index b3a6e29b9d..4f50c3aaa6 100644 --- a/dimos/agents2/temp/run_unitree_agents2.py +++ b/dimos/agents2/temp/run_unitree_agents2.py @@ -143,7 +143,7 @@ def process_query(query_text): try: # Process query with agent (blocking call) - response = self.agent.query(query_text) + response = self.agent.query_async(query_text) # Send response back through web interface if response and self.agent_response_subject: @@ -213,14 +213,14 @@ def run(self): logger.info(" - Ask the robot to speak text") logger.info("=" * 60) - # Test query - agent.start() now handles the event loop - try: - logger.info("Testing agent query...") - result = self.agent.query("Hello, what can you do?") - logger.info(f"Agent query result: {result}") - except Exception as e: - logger.error(f"Error during test query: {e}") - # Continue anyway - the web interface will handle future queries + # # Test query - agent.start() now handles the event loop + # try: + # logger.info("Testing agent query...") + # result = self.agent.query("Hello, what can you do?") + # logger.info(f"Agent query result: {result}") + # except Exception as e: + # logger.error(f"Error during test query: {e}") + # # Continue anyway - the web interface will handle future queries # Run web interface (blocks) self.web_interface.run() diff --git a/dimos/agents2/temp/test_event_loop.py b/dimos/agents2/temp/test_event_loop.py deleted file mode 100644 index 0af5523a56..0000000000 --- a/dimos/agents2/temp/test_event_loop.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Test that event loop handling works correctly in both Dask and non-Dask environments. -""" - -import os -import sys -from pathlib import Path - -# Add parent directories to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) - -from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer -from dimos.agents2 import Agent -from dimos.agents2.spec import Model, Provider - - -def test_non_dask(): - """Test agent outside of Dask.""" - print("\n=== Testing Non-Dask Environment ===") - - # Mock API key to avoid that error - os.environ["OPENAI_API_KEY"] = "test-key-12345" - - try: - container = UnitreeSkillContainer(robot=None) - agent = Agent(system_prompt="Test agent", model=Model.GPT_4O_MINI, provider=Provider.OPENAI) - agent.register_skills(container) - - print("Starting agent (should start event loop in thread)...") - agent.start() - - # Check if loop is set - if agent._loop: - print(f"Event loop type: {type(agent._loop).__name__}") - if hasattr(agent._loop, "is_running"): - print(f"Event loop running: {agent._loop.is_running()}") - - print("✓ Non-Dask test passed") - agent.stop() - - except Exception as e: - print(f"✗ Non-Dask test failed: {e}") - finally: - # Clean up mock key - del os.environ["OPENAI_API_KEY"] - - -def test_with_dask(): - """Test agent inside Dask.""" - print("\n=== Testing Dask Environment ===") - - # Mock API key - os.environ["OPENAI_API_KEY"] = "test-key-12345" - - try: - from dimos.core import start - - print("Starting Dask cluster...") - dimos = start(2) - - # Create container directly (not a Module) - container = UnitreeSkillContainer(robot=None) - - print("Deploying agent as Module...") - agent = dimos.deploy( - Agent, system_prompt="Test agent", model=Model.GPT_4O_MINI, provider=Provider.OPENAI - ) - - print("Registering skills and starting agent...") - agent.register_skills(container) - agent.start() - - print("✓ Dask test passed - no AsyncIOMainLoop error!") - - agent.stop() - dimos.stop() - - except Exception as e: - print(f"✗ Dask test failed: {e}") - import traceback - - traceback.print_exc() - finally: - # Clean up - if "OPENAI_API_KEY" in os.environ: - del os.environ["OPENAI_API_KEY"] - - -def main(): - print("=" * 60) - print("Event Loop Handling Test") - print("=" * 60) - - # Test 1: Outside Dask - test_non_dask() - - # Test 2: Inside Dask - test_with_dask() - - print("\n" + "=" * 60) - print("All tests complete!") - print("=" * 60) - - -if __name__ == "__main__": - main() diff --git a/dimos/agents2/temp/test_simple_query.py b/dimos/agents2/temp/test_simple_query.py deleted file mode 100644 index 4e0918b280..0000000000 --- a/dimos/agents2/temp/test_simple_query.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Simple test to verify the agent query works with minimal setup. -""" - -import os -import sys -from pathlib import Path -from dotenv import load_dotenv - -# Add parent directories to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) - -from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer -from dimos.agents2 import Agent -from dimos.agents2.spec import Model, Provider - -# Load environment variables -load_dotenv() - - -def main(): - """Simple sync test.""" - - if not os.getenv("OPENAI_API_KEY"): - print("ERROR: OPENAI_API_KEY not set") - return - - print("Creating agent...") - - # Create container and agent - container = UnitreeSkillContainer(robot=None) - agent = Agent( - system_prompt="You are a helpful robot. Answer concisely.", - model=Model.GPT_4O_MINI, - provider=Provider.OPENAI, - ) - - # Register and start - agent.register_skills(container) - agent.start() # This now ensures the event loop is running - - print("Agent started. Testing query...") - - # Simple sync query - should just work now - try: - result = agent.query("What are 3 skills you can do?") - print(f"\nAgent response:\n{result}") - except Exception as e: - print(f"Query failed: {e}") - - # Clean up - agent.stop() - print("\nDone!") - - -if __name__ == "__main__": - main() diff --git a/dimos/agents2/temp/test_tornado_fix.py b/dimos/agents2/temp/test_tornado_fix.py deleted file mode 100644 index 69308957f9..0000000000 --- a/dimos/agents2/temp/test_tornado_fix.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Test that the Tornado AsyncIOMainLoop fix works. -""" - -import os -import sys -from pathlib import Path - -# Add parent directories to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) - -os.environ["OPENAI_API_KEY"] = "test-key" - -from dimos.core import start -from dimos.agents2 import Agent -from dimos.agents2.spec import Model, Provider -from dimos.protocol.skill.test_coordinator import TestContainer - - -def test_dask_deployment(): - print("Testing Dask deployment with Tornado AsyncIOMainLoop...") - - # Start Dask cluster - dimos = start(2) - - try: - # Create TestContainer locally - testcontainer = TestContainer() - - # Deploy agent - print("Deploying agent...") - agent = dimos.deploy( - Agent, system_prompt="Test agent", model=Model.GPT_4O_MINI, provider=Provider.OPENAI - ) - - print("Registering skills...") - agent.register_skills(testcontainer) - - print("Starting agent...") - agent.start() - - print("Testing query_async...") - future = agent.query_async("What is 2+2?") - print(f"Query started, future type: {type(future)}") - - # Note: Can't easily wait for result in this test without proper async context - # But if no error occurs, the fix is working - - print("✓ Test passed - no AsyncIOMainLoop errors!") - - except Exception as e: - print(f"✗ Test failed: {e}") - import traceback - - traceback.print_exc() - finally: - dimos.stop() - - -if __name__ == "__main__": - test_dask_deployment() diff --git a/dimos/agents2/temp/test_agent_query.py b/dimos/agents2/temp/test_unitree_agent_query.py similarity index 80% rename from dimos/agents2/temp/test_agent_query.py rename to dimos/agents2/temp/test_unitree_agent_query.py index 226ce4da8e..19446d8cf2 100644 --- a/dimos/agents2/temp/test_agent_query.py +++ b/dimos/agents2/temp/test_unitree_agent_query.py @@ -133,45 +133,45 @@ def run_loop(): agent.stop() -def test_with_real_module_system(): - """Test using the real DimOS module system (like in test_agent.py).""" - print("\n=== Testing with Module System ===\n") - - from dimos.core import start - - # Start the DimOS system - dimos = start(2) - - # Deploy container and agent as modules - container = dimos.deploy(UnitreeSkillContainer, robot=None) - agent = dimos.deploy( - Agent, - system_prompt="You are a helpful robot assistant. List 3 skills you can do.", - model=Model.GPT_4O_MINI, - provider=Provider.OPENAI, - ) - - # Register skills - agent.register_skills(container) - agent.start() - - # Query - try: - logger.info("Sending query through module system...") - future = agent.query_async("Hello! What skills do you have?") - - # In the module system, the loop should be running - time.sleep(5) # Wait for processing - - if hasattr(future, "result"): - result = future.result(timeout=10) - logger.info(f"Got result: {result}") - except Exception as e: - logger.error(f"Query failed: {e}") - - # Clean up - agent.stop() - dimos.stop() +# def test_with_real_module_system(): +# """Test using the real DimOS module system (like in test_agent.py).""" +# print("\n=== Testing with Module System ===\n") + +# from dimos.core import start + +# # Start the DimOS system +# dimos = start(2) + +# # Deploy container and agent as modules +# container = dimos.deploy(UnitreeSkillContainer, robot=None) +# agent = dimos.deploy( +# Agent, +# system_prompt="You are a helpful robot assistant. List 3 skills you can do.", +# model=Model.GPT_4O_MINI, +# provider=Provider.OPENAI, +# ) + +# # Register skills +# agent.register_skills(container) +# agent.start() + +# # Query +# try: +# logger.info("Sending query through module system...") +# future = agent.query_async("Hello! What skills do you have?") + +# # In the module system, the loop should be running +# time.sleep(5) # Wait for processing + +# if hasattr(future, "result"): +# result = future.result(timeout=10) +# logger.info(f"Got result: {result}") +# except Exception as e: +# logger.error(f"Query failed: {e}") + +# # Clean up +# agent.stop() +# dimos.stop() def main(): diff --git a/dimos/agents2/temp/test_unitree_skills.py b/dimos/agents2/temp/test_unitree_skill_container.py similarity index 100% rename from dimos/agents2/temp/test_unitree_skills.py rename to dimos/agents2/temp/test_unitree_skill_container.py