From 37bc2e86237ef415f01fd8e1a328abfcc3d3cac7 Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 10:58:37 -0700 Subject: [PATCH 01/59] agents2 --- dimos/agents2/__init__.py | 8 + dimos/agents2/main.py | 78 +++++ dimos/agents2/test_main.py | 51 +++ dimos/core/module.py | 21 +- dimos/protocol/skill/__init__.py | 1 - dimos/protocol/skill/comms.py | 18 +- dimos/protocol/skill/coordinator.py | 395 ++++++++++++++++++++++ dimos/protocol/skill/schema.py | 103 ++++++ dimos/protocol/skill/skill.py | 29 +- dimos/protocol/skill/test_coordinator.py | 105 ++++++ dimos/protocol/skill/test_skill.py | 28 +- dimos/protocol/skill/testing_utils.py | 28 ++ dimos/protocol/skill/type.py | 146 ++++++++ dimos/utils/cli/agentspy/agentspy.py | 65 ++-- dimos/utils/cli/agentspy/demo_agentspy.py | 21 +- 15 files changed, 1022 insertions(+), 75 deletions(-) create mode 100644 dimos/agents2/__init__.py create mode 100644 dimos/agents2/main.py create mode 100644 dimos/agents2/test_main.py create mode 100644 dimos/protocol/skill/coordinator.py create mode 100644 dimos/protocol/skill/schema.py create mode 100644 dimos/protocol/skill/test_coordinator.py create mode 100644 dimos/protocol/skill/testing_utils.py create mode 100644 dimos/protocol/skill/type.py diff --git a/dimos/agents2/__init__.py b/dimos/agents2/__init__.py new file mode 100644 index 0000000000..6a756fbaab --- /dev/null +++ b/dimos/agents2/__init__.py @@ -0,0 +1,8 @@ +from langchain_core.messages import ( + AIMessage, + HumanMessage, + MessageLikeRepresentation, + SystemMessage, + ToolCall, + ToolMessage, +) diff --git a/dimos/agents2/main.py b/dimos/agents2/main.py new file mode 100644 index 0000000000..8e2da24903 --- /dev/null +++ b/dimos/agents2/main.py @@ -0,0 +1,78 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +from pprint import pprint + +from langchain.chat_models import init_chat_model +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import ( + AIMessage, + HumanMessage, + MessageLikeRepresentation, + SystemMessage, + ToolCall, + ToolMessage, +) + +from dimos.core import Module, rpc +from dimos.protocol.skill import skill +from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState +from dimos.utils.logging_config import setup_logger + +logger = setup_logger("dimos.protocol.agents2") + + +class Agent(SkillCoordinator): + def __init__(self, model: str = "gpt-4o", model_provider: str = "openai", *args, **kwargs): + super().__init__(*args, **kwargs) + + self.messages = [] + self._llm = init_chat_model( + model=model, + model_provider=model_provider, + ) + + async def agent_loop(self, seed_query: str = ""): + self.messages.append(HumanMessage(seed_query)) + try: + while True: + tools = self.get_tools() + self._llm = self._llm.bind_tools(tools) + + msg = self._llm.invoke(self.messages) + self.messages.append(msg) + + logger.info(f"Agent response: {msg.content}") + if msg.tool_calls: + self.execute_tool_calls(msg.tool_calls) + + if not self.has_active_skills(): + logger.info("No active tasks, exiting agent loop.") + return + + await self.wait_for_updates() + + for call_id, update in self.generate_snapshot(clear=True).items(): + self.messages.append(update.agent_encode()) + + except Exception as e: + logger.error(f"Error in agent loop: {e}") + import traceback + + traceback.print_exc() + + @rpc + def query(self, query: str): + asyncio.ensure_future(self.agent_loop(query), loop=self._loop) diff --git a/dimos/agents2/test_main.py b/dimos/agents2/test_main.py new file mode 100644 index 0000000000..755666b070 --- /dev/null +++ b/dimos/agents2/test_main.py @@ -0,0 +1,51 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import time + +import pytest + +from dimos.agents2.main import Agent +from dimos.core import start +from dimos.protocol.skill import SkillContainer, skill + + +class TestContainer(SkillContainer): + @skill() + def add(self, x: int, y: int) -> int: + """Adds two integers.""" + time.sleep(0.3) + return x + y + + @skill() + def sub(self, x: int, y: int) -> int: + """Subs two integers.""" + time.sleep(0.3) + return x - y + + +@pytest.mark.asyncio +async def test_agent_init(): + # dimos = start(2) + # agent = dimos.deploy(Agent) + agent = Agent() + agent.register_skills(TestContainer()) + agent.start() + + agent.query( + "hi there, use add tool to add 124181112 and 124124. don't sum yourself, use a tool I provided" + ) + + await asyncio.sleep(5) diff --git a/dimos/core/module.py b/dimos/core/module.py index e30df27a68..7cb2161fb8 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -11,13 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import inspect -from enum import Enum from typing import ( Any, Callable, Optional, - TypeVar, get_args, get_origin, get_type_hints, @@ -50,18 +49,32 @@ class ModuleBase: _rpc: Optional[RPCSpec] = None _agent: Optional[SkillCommsSpec] = None _tf: Optional[TFSpec] = None + _loop: asyncio.AbstractEventLoop = None def __init__(self, *args, **kwargs): # we can completely override comms protocols if we want if kwargs.get("comms", None) is not None: self.comms = kwargs["comms"] try: - get_worker() + # here we attempt to figure out if we are running on a dask worker + # if so we use the dask worker _loop as ours, + # and we register our RPC server + worker = get_worker() + self._loop = worker.loop if worker else None self.rpc = self.comms.rpc() self.rpc.serve_module_rpc(self) self.rpc.start() except ValueError: - return + ... + + # assuming we are not running on a dask worker, + # it's our job to determine or create the event loop + if not self._loop: + try: + self._loop = asyncio.get_running_loop() + except RuntimeError: + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) @property def tf(self): diff --git a/dimos/protocol/skill/__init__.py b/dimos/protocol/skill/__init__.py index 85b6146f56..15ebf0b59c 100644 --- a/dimos/protocol/skill/__init__.py +++ b/dimos/protocol/skill/__init__.py @@ -1,2 +1 @@ -from dimos.protocol.skill.agent_interface import AgentInterface, SkillState from dimos.protocol.skill.skill import SkillContainer, skill diff --git a/dimos/protocol/skill/comms.py b/dimos/protocol/skill/comms.py index d6e9e73bf0..7703eda3e1 100644 --- a/dimos/protocol/skill/comms.py +++ b/dimos/protocol/skill/comms.py @@ -11,27 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations -import time from abc import abstractmethod from dataclasses import dataclass -from enum import Enum from typing import Callable, Generic, Optional, TypeVar, Union -from dimos.protocol.pubsub.lcmpubsub import PickleLCM, Topic +from dimos.protocol.pubsub.lcmpubsub import PickleLCM from dimos.protocol.pubsub.spec import PubSub from dimos.protocol.service import Service -from dimos.protocol.skill.types import AgentMsg, Call, MsgType, Reducer, SkillConfig, Stream -from dimos.types.timestamped import Timestamped +from dimos.protocol.skill.type import SkillMsg # defines a protocol for communication between skills and agents class SkillCommsSpec: @abstractmethod - def publish(self, msg: AgentMsg) -> None: ... + def publish(self, msg: SkillMsg) -> None: ... @abstractmethod - def subscribe(self, cb: Callable[[AgentMsg], None]) -> None: ... + def subscribe(self, cb: Callable[[SkillMsg], None]) -> None: ... @abstractmethod def start(self) -> None: ... @@ -74,15 +72,15 @@ def start(self) -> None: def stop(self): self.pubsub.stop() - def publish(self, msg: AgentMsg) -> None: + def publish(self, msg: SkillMsg) -> None: self.pubsub.publish(self.config.topic, msg) - def subscribe(self, cb: Callable[[AgentMsg], None]) -> None: + def subscribe(self, cb: Callable[[SkillMsg], None]) -> None: self.pubsub.subscribe(self.config.topic, lambda msg, topic: cb(msg)) @dataclass -class LCMCommsConfig(PubSubCommsConfig[str, AgentMsg]): +class LCMCommsConfig(PubSubCommsConfig[str, SkillMsg]): topic: str = "/agent" pubsub: Union[type[PubSub], PubSub, None] = PickleLCM # lcm needs to be started only if receiving diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py new file mode 100644 index 0000000000..4b0f5d27f2 --- /dev/null +++ b/dimos/protocol/skill/coordinator.py @@ -0,0 +1,395 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +from copy import copy +from dataclasses import dataclass +from enum import Enum +from typing import Any, List, Optional + +from langchain_core.tools import tool as langchain_tool +from rich.console import Console +from rich.table import Table +from rich.text import Text + +from dimos.agents2 import ToolCall, ToolMessage +from dimos.core import Module, rpc +from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec +from dimos.protocol.skill.skill import SkillConfig, SkillContainer +from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream +from dimos.types.timestamped import TimestampedCollection +from dimos.utils.logging_config import setup_logger + +logger = setup_logger("dimos.protocol.skill.coordinator") + + +@dataclass +class AgentInputConfig: + agent_comms: type[SkillCommsSpec] = LCMSkillComms + + +class SkillStateEnum(Enum): + pending = 0 + running = 1 + completed = 2 + error = 3 + + def colored_name(self) -> Text: + """Return the state name as a rich Text object with color.""" + colors = { + SkillStateEnum.pending: "yellow", + SkillStateEnum.running: "blue", + SkillStateEnum.completed: "green", + SkillStateEnum.error: "red", + } + return Text(self.name, style=colors.get(self, "white")) + + +# TODO pending timeout, running timeout, etc. +# This object maintains the state of a skill run +# It is used to track the skill's progress, messages, and state +class SkillState(TimestampedCollection): + call_id: str + name: str + state: SkillStateEnum + skill_config: SkillConfig + + def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] = None) -> None: + super().__init__() + self.skill_config = skill_config or SkillConfig( + name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.none, schema={} + ) + + self.state = SkillStateEnum.pending + self.call_id = call_id + self.name = name + + def agent_encode(self) -> ToolMessage: + last_msg = self._items[-1] + return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) + + # returns True if the agent should be called for this message + def handle_msg(self, msg: SkillMsg) -> bool: + self.add(msg) + + if msg.type == MsgType.stream: + if ( + self.skill_config.stream == Stream.none + or self.skill_config.stream == Stream.passive + ): + return False + + if self.skill_config.stream == Stream.call_agent: + return True + + if msg.type == MsgType.ret: + self.state = SkillStateEnum.completed + if self.skill_config.ret == Return.call_agent: + return True + return False + + if msg.type == MsgType.error: + self.state = SkillStateEnum.error + return True + + if msg.type == MsgType.start: + self.state = SkillStateEnum.running + return False + + return False + + def __str__(self) -> str: + # For standard string representation, we'll use rich's Console to render the colored text + console = Console(force_terminal=True, legacy_windows=False) + colored_state = self.state.colored_name() + + # Build the parts of the string + parts = [Text(f"SkillState({self.name} "), colored_state, Text(f", call_id={self.call_id}")] + + if self.state == SkillStateEnum.completed or self.state == SkillStateEnum.error: + parts.append(Text(", ran for=")) + else: + parts.append(Text(", running for=")) + + parts.append(Text(f"{self.duration():.2f}s")) + + if len(self): + parts.append(Text(f", last_msg={self._items[-1]})")) + else: + parts.append(Text(", No Messages)")) + + # Combine all parts into a single Text object + combined = Text() + for part in parts: + combined.append(part) + + # Render to string with console + with console.capture() as capture: + console.print(combined, end="") + return capture.get() + + +class SkillStateDict(dict[str, SkillState]): + """Custom dict for skill states with better string representation.""" + + def __str__(self) -> str: + if not self: + return "SkillStates empty" + + lines = [] + + for call_id, skill_state in self.items(): + # Use the SkillState's own __str__ method for individual items + lines.append(f"{skill_state}") + + return "\n".join(lines) + + +class SkillCoordinator(SkillContainer, Module): + empty: bool = True + + _static_containers: list[SkillContainer] + _dynamic_containers: list[SkillContainer] + _skill_state: SkillStateDict # key is call_id, not skill_name + _skills: dict[str, SkillConfig] + _updates_available: asyncio.Event + _loop: Optional[asyncio.AbstractEventLoop] + + def __init__(self) -> None: + Module.__init__(self) + SkillContainer.__init__(self) + self._static_containers = [] + self._dynamic_containers = [] + self._skills = {} + self._skill_state = SkillStateDict() + self._updates_available = asyncio.Event() + + @rpc + def start(self) -> None: + self.agent_comms.start() + self.agent_comms.subscribe(self.handle_message) + + @rpc + def stop(self) -> None: + self.agent_comms.stop() + + def len(self) -> int: + return len(self._skills) + + def __len__(self) -> int: + return self.len() + + # this can be converted to non-langchain json schema output + # and langchain takes this output as well + # just faster for now + def get_tools(self) -> list[dict]: + # return [skill.schema for skill in self.skills().values()] + + ret = [] + for name, skill_config in self.skills().items(): + # print(f"Tool {name} config: {skill_config}, {skill_config.f}") + ret.append(langchain_tool(skill_config.f)) + + return ret + + # Used by agent to execute tool calls + def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: + """Execute a list of tool calls from the agent.""" + for tool_call in tool_calls: + logger.info(f"executing skill call {tool_call}") + self.call( + tool_call.get("id"), + tool_call.get("name"), + tool_call.get("args"), + ) + + # internal skill call + def call(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: + skill_config = self.get_skill_config(skill_name) + if not skill_config: + logger.error( + f"Skill {skill_name} not found in registered skills, but agent tried to call it (did a dynamic skill expire?)" + ) + return + + # This initializes the skill state if it doesn't exist + self._skill_state[call_id] = SkillState( + name=skill_name, skill_config=skill_config, call_id=call_id + ) + return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) + + # Receives a message from active skill + # Updates local skill state (appends to streamed data if needed etc) + # + # Checks if agent needs to be notified (if ToolConfig has Return=call_agent or Stream=call_agent) + def handle_message(self, msg: SkillMsg) -> None: + logger.info(f"{msg.skill_name}, {msg.call_id} - {msg}") + + if self._skill_state.get(msg.call_id) is None: + logger.warn( + f"Skill state for {msg.skill_name} (call_id={msg.call_id}) not found, (skill not called by our agent?) initializing. (message received: {msg})" + ) + self._skill_state[msg.call_id] = SkillState(call_id=msg.call_id, name=msg.skill_name) + + should_notify = self._skill_state[msg.call_id].handle_msg(msg) + + if should_notify: + self._loop.call_soon_threadsafe(self._updates_available.set) + + def has_active_skills(self) -> bool: + # check if dict is empty + if self._skill_state == {}: + return False + return True + + async def wait_for_updates(self, timeout: Optional[float] = None) -> True: + """Wait for skill updates to become available. + + This method should be called by the agent when it's ready to receive updates. + It will block until updates are available or timeout is reached. + + Args: + timeout: Optional timeout in seconds + + Returns: + True if updates are available, False on timeout + """ + try: + if timeout: + await asyncio.wait_for(self._updates_available.wait(), timeout=timeout) + else: + await self._updates_available.wait() + return True + except asyncio.TimeoutError: + return False + + def generate_snapshot(self, clear: bool = True) -> SkillStateDict: + """Generate a fresh snapshot of completed skills and optionally clear them.""" + ret = copy(self._skill_state) + + if clear: + self._updates_available.clear() + to_delete = [] + # Since snapshot is being sent to agent, we can clear the finished skill runs + for call_id, skill_run in self._skill_state.items(): + if skill_run.state == SkillStateEnum.completed: + logger.info(f"Skill {skill_run.name} (call_id={call_id}) finished") + to_delete.append(call_id) + if skill_run.state == SkillStateEnum.error: + logger.error(f"Skill run error for {skill_run.name} (call_id={call_id})") + to_delete.append(call_id) + + for call_id in to_delete: + logger.debug(f"Call {call_id} finished, removing from state") + del self._skill_state[call_id] + + return ret + + def __str__(self): + console = Console(force_terminal=True, legacy_windows=False) + + # Create main table without any header + table = Table(show_header=False) + + # Add containers section + containers_table = Table(show_header=True, show_edge=False, box=None) + containers_table.add_column("Type", style="cyan") + containers_table.add_column("Container", style="white") + + # Add static containers + for container in self._static_containers: + containers_table.add_row("Static", str(container)) + + # Add dynamic containers + for container in self._dynamic_containers: + containers_table.add_row("Dynamic", str(container)) + + if not self._static_containers and not self._dynamic_containers: + containers_table.add_row("", "[dim]No containers registered[/dim]") + + # Add skill states section + states_table = Table(show_header=True, show_edge=False, box=None) + states_table.add_column("Call ID", style="dim", width=12) + states_table.add_column("Skill", style="white") + states_table.add_column("State", style="white") + states_table.add_column("Duration", style="yellow") + states_table.add_column("Messages", style="dim") + + for call_id, skill_state in self._skill_state.items(): + # Get colored state name + state_text = skill_state.state.colored_name() + + # Duration formatting + if ( + skill_state.state == SkillStateEnum.completed + or skill_state.state == SkillStateEnum.error + ): + duration = f"{skill_state.duration():.2f}s" + else: + duration = f"{skill_state.duration():.2f}s..." + + # Messages info + msg_count = str(len(skill_state)) + + states_table.add_row( + call_id[:8] + "...", skill_state.name, state_text, duration, msg_count + ) + + if not self._skill_state: + states_table.add_row("", "[dim]No active skills[/dim]", "", "", "") + + # Combine into main table + table.add_column("Section", style="bold") + table.add_column("Details", style="none") + table.add_row("Containers", containers_table) + table.add_row("Skills", states_table) + + # Render to string with title above + with console.capture() as capture: + console.print(Text(" SkillCoordinator", style="bold blue")) + console.print(table) + return capture.get().strip() + + # Given skillcontainers can run remotely, we are + # Caching available skills from static containers + # + # Dynamic containers will be queried at runtime via + # .skills() method + def register_skills(self, container: SkillContainer): + self.empty = False + if not container.dynamic_skills: + logger.info(f"Registering static skill container, {container}") + self._static_containers.append(container) + for name, skill_config in container.skills().items(): + self._skills[name] = skill_config.bind(getattr(container, name)) + else: + logger.info(f"Registering dynamic skill container, {container}") + self._dynamic_containers.append(container) + + def get_skill_config(self, skill_name: str) -> Optional[SkillConfig]: + skill_config = self._skills.get(skill_name) + if not skill_config: + skill_config = self.skills().get(skill_name) + return skill_config + + def skills(self) -> dict[str, SkillConfig]: + # Static container skilling is already cached + all_skills: dict[str, SkillConfig] = {**self._skills} + + # Then aggregate skills from dynamic containers + for container in self._dynamic_containers: + for skill_name, skill_config in container.skills().items(): + all_skills[skill_name] = skill_config.bind(getattr(container, skill_name)) + + return all_skills diff --git a/dimos/protocol/skill/schema.py b/dimos/protocol/skill/schema.py new file mode 100644 index 0000000000..37a6e6fac1 --- /dev/null +++ b/dimos/protocol/skill/schema.py @@ -0,0 +1,103 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +from typing import Dict, List, Union, get_args, get_origin + + +def python_type_to_json_schema(python_type) -> dict: + """Convert Python type annotations to JSON Schema format.""" + # Handle None/NoneType + if python_type is type(None) or python_type is None: + return {"type": "null"} + + # Handle Union types (including Optional) + origin = get_origin(python_type) + if origin is Union: + args = get_args(python_type) + # Handle Optional[T] which is Union[T, None] + if len(args) == 2 and type(None) in args: + non_none_type = args[0] if args[1] is type(None) else args[1] + schema = python_type_to_json_schema(non_none_type) + # For OpenAI function calling, we don't use anyOf for optional params + return schema + else: + # For other Union types, use anyOf + return {"anyOf": [python_type_to_json_schema(arg) for arg in args]} + + # Handle List/list types + if origin in (list, List): + args = get_args(python_type) + if args: + return {"type": "array", "items": python_type_to_json_schema(args[0])} + return {"type": "array"} + + # Handle Dict/dict types + if origin in (dict, Dict): + return {"type": "object"} + + # Handle basic types + type_map = { + str: {"type": "string"}, + int: {"type": "integer"}, + float: {"type": "number"}, + bool: {"type": "boolean"}, + list: {"type": "array"}, + dict: {"type": "object"}, + } + + return type_map.get(python_type, {"type": "string"}) + + +def function_to_schema(func) -> dict: + """Convert a function to OpenAI function schema format.""" + try: + signature = inspect.signature(func) + except ValueError as e: + raise ValueError(f"Failed to get signature for function {func.__name__}: {str(e)}") + + properties = {} + required = [] + + for param_name, param in signature.parameters.items(): + # Skip 'self' parameter for methods + if param_name == "self": + continue + + # Get the type annotation + if param.annotation != inspect.Parameter.empty: + param_schema = python_type_to_json_schema(param.annotation) + else: + # Default to string if no type annotation + param_schema = {"type": "string"} + + # Add description from docstring if available (would need more sophisticated parsing) + properties[param_name] = param_schema + + # Add to required list if no default value + if param.default == inspect.Parameter.empty: + required.append(param_name) + + return { + "type": "function", + "function": { + "name": func.__name__, + "description": (func.__doc__ or "").strip(), + "parameters": { + "type": "object", + "properties": properties, + "required": required, + }, + }, + } diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index e0f868b5f9..f612ec7c83 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -17,12 +17,13 @@ from dimos.core import rpc from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec -from dimos.protocol.skill.types import ( - AgentMsg, +from dimos.protocol.skill.schema import function_to_schema +from dimos.protocol.skill.type import ( MsgType, Reducer, Return, SkillConfig, + SkillMsg, Stream, ) @@ -32,16 +33,19 @@ def decorator(f: Callable[..., Any]) -> Any: def wrapper(self, *args, **kwargs): skill = f"{f.__name__}" - if kwargs.get("skillcall"): - del kwargs["skillcall"] + call_id = kwargs.get("call_id", None) + if call_id: + del kwargs["call_id"] def run_function(): - self.agent_comms.publish(AgentMsg(skill, None, type=MsgType.start)) + self.agent_comms.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) try: val = f(self, *args, **kwargs) - self.agent_comms.publish(AgentMsg(skill, val, type=MsgType.ret)) + self.agent_comms.publish(SkillMsg(call_id, skill, val, type=MsgType.ret)) except Exception as e: - self.agent_comms.publish(AgentMsg(skill, str(e), type=MsgType.error)) + self.agent_comms.publish( + SkillMsg(call_id, skill, str(e), type=MsgType.error) + ) thread = threading.Thread(target=run_function) thread.start() @@ -49,7 +53,16 @@ def run_function(): return f(self, *args, **kwargs) - skill_config = SkillConfig(name=f.__name__, reducer=reducer, stream=stream, ret=ret) + # sig = inspect.signature(f) + # params = list(sig.parameters.values()) + # if params and params[0].name == "self": + # params = params[1:] # Remove first parameter 'self' + + # wrapper.__signature__ = sig.replace(parameters=params) + + skill_config = SkillConfig( + name=f.__name__, reducer=reducer, stream=stream, ret=ret, schema=function_to_schema(f) + ) # implicit RPC call as well wrapper.__rpc__ = True # type: ignore[attr-defined] diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py new file mode 100644 index 0000000000..0b6d4d54a5 --- /dev/null +++ b/dimos/protocol/skill/test_coordinator.py @@ -0,0 +1,105 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import asyncio +import time +from pprint import pprint + +import pytest + +from dimos.protocol.skill.coordinator import SkillCoordinator +from dimos.protocol.skill.skill import SkillContainer, skill +from dimos.protocol.skill.testing_utils import TestContainer + +# def test_coordinator_skill_export(): +# skillCoordinator = SkillCoordinator() +# skillCoordinator.register_skills(TestContainer()) + +# assert skillCoordinator.get_tools() == [ +# { +# "function": { +# "description": "", +# "name": "add", +# "parameters": { +# "properties": {"x": {"type": "integer"}, "y": {"type": "integer"}}, +# "required": ["x", "y"], +# "type": "object", +# }, +# }, +# "type": "function", +# }, +# { +# "function": { +# "description": "", +# "name": "delayadd", +# "parameters": { +# "properties": {"x": {"type": "integer"}, "y": {"type": "integer"}}, +# "required": ["x", "y"], +# "type": "object", +# }, +# }, +# "type": "function", +# }, +# ] + +# print(pprint(skillCoordinator.get_tools())) + + +class TestContainer2(SkillContainer): + @skill() + def add(self, x: int, y: int) -> int: + # time.sleep(0.25) + return x + y + + @skill() + def delayadd(self, x: int, y: int) -> int: + time.sleep(0.5) + return x + y + + +@pytest.mark.asyncio +async def test_coordinator_generator(): + skillCoordinator = SkillCoordinator() + skillCoordinator.register_skills(TestContainer()) + + skillCoordinator.start() + + skillCoordinator.call("test-call-0", "delayadd", {"args": [1, 2]}) + + time.sleep(0.1) + + cnt = 0 + while await skillCoordinator.wait_for_updates(1): + print(skillCoordinator) + + skillstates = skillCoordinator.generate_snapshot() + + tool_msg = skillstates[f"test-call-{cnt}"].agent_encode() + tool_msg.content == cnt + 1 + + cnt += 1 + if cnt < 5: + skillCoordinator.call( + f"test-call-{cnt}-delay", + "delayadd", + {"args": [cnt, 2]}, + ) + skillCoordinator.call( + f"test-call-{cnt}", + "add", + {"args": [cnt, 2]}, + ) + + time.sleep(0.1 * cnt) + + print("All updates processed successfully.") diff --git a/dimos/protocol/skill/test_skill.py b/dimos/protocol/skill/test_skill.py index 9bf7e85a35..836f316ca3 100644 --- a/dimos/protocol/skill/test_skill.py +++ b/dimos/protocol/skill/test_skill.py @@ -14,19 +14,9 @@ import time -from dimos.protocol.skill.agent_interface import AgentInterface +from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill - - -class TestContainer(SkillContainer): - @skill() - def add(self, x: int, y: int) -> int: - return x + y - - @skill() - def delayadd(self, x: int, y: int) -> int: - time.sleep(0.5) - return x + y +from dimos.protocol.skill.testing_utils import TestContainer def test_introspect_skill(): @@ -35,7 +25,7 @@ def test_introspect_skill(): def test_internals(): - agentInterface = AgentInterface() + agentInterface = SkillCoordinator() agentInterface.start() testContainer = TestContainer() @@ -45,7 +35,7 @@ def test_internals(): # skillcall=True makes the skill function exit early, # it doesn't behave like a blocking function, # - # return is passed as AgentMsg to the agent topic + # return is passed as SkillMsg to the agent topic testContainer.delayadd(2, 4, skillcall=True) testContainer.add(1, 2, skillcall=True) @@ -61,7 +51,7 @@ def test_internals(): print(agentInterface) - agentInterface.execute_skill("delayadd", 1, 2) + agentInterface.call("test-call-1", "delayadd", 1, 2) time.sleep(0.25) print(agentInterface) @@ -71,7 +61,7 @@ def test_internals(): def test_standard_usage(): - agentInterface = AgentInterface(agent_callback=print) + agentInterface = SkillCoordinator() agentInterface.start() testContainer = TestContainer() @@ -82,7 +72,7 @@ def test_standard_usage(): print(agentInterface.skills()) # we can execute a skill - agentInterface.execute_skill("delayadd", 1, 2) + agentInterface.call("test-call-2", "delayadd", 1, 2) # while skill is executing, we can introspect the state # (we see that the skill is running) @@ -108,7 +98,7 @@ def add(self, x: int, y: int) -> int: time.sleep(0.5) return x * y - agentInterface = AgentInterface(agent_callback=print) + agentInterface = SkillCoordinator() agentInterface.start() dimos = start(1) @@ -117,7 +107,7 @@ def add(self, x: int, y: int) -> int: agentInterface.register_skills(mock_module) # we can execute a skill - agentInterface.execute_skill("add", 1, 2) + agentInterface.call("test-call-3", "add", 1, 2) # while skill is executing, we can introspect the state # (we see that the skill is running) diff --git a/dimos/protocol/skill/testing_utils.py b/dimos/protocol/skill/testing_utils.py new file mode 100644 index 0000000000..fda4c27591 --- /dev/null +++ b/dimos/protocol/skill/testing_utils.py @@ -0,0 +1,28 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + +from dimos.protocol.skill.skill import SkillContainer, skill + + +class TestContainer(SkillContainer): + @skill() + def add(self, x: int, y: int) -> int: + return x + y + + @skill() + def delayadd(self, x: int, y: int) -> int: + time.sleep(0.3) + return x + y diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py new file mode 100644 index 0000000000..0ed1c91ad3 --- /dev/null +++ b/dimos/protocol/skill/type.py @@ -0,0 +1,146 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +from dataclasses import dataclass +from enum import Enum +from typing import Any, Callable + +from dimos.types.timestamped import Timestamped + + +class Call(Enum): + Implicit = 0 + Explicit = 1 + + +class Reducer(Enum): + none = 0 + all = 1 + latest = 2 + average = 3 + + +class Stream(Enum): + # no streaming + none = 0 + # passive stream, doesn't schedule an agent call, but returns the value to the agent + passive = 1 + # calls the agent with every value emitted, schedules an agent call + call_agent = 2 + + +class Return(Enum): + # doesn't return anything to an agent + none = 0 + # returns the value to the agent, but doesn't schedule an agent call + passive = 1 + # calls the agent with the value, scheduling an agent call + call_agent = 2 + + +@dataclass +class SkillConfig: + name: str + reducer: Reducer + stream: Stream + ret: Return + schema: dict[str, Any] + f: Callable | None = None + autostart: bool = False + + def bind(self, f: Callable) -> "SkillConfig": + self.f = f + return self + + def call(self, call_id, *args, **kwargs) -> Any: + if self.f is None: + raise ValueError( + "Function is not bound to the SkillConfig. This should be called only within AgentListener." + ) + + return self.f(*args, **kwargs, call_id=call_id) + + def __str__(self): + parts = [f"name={self.name}"] + + # Only show reducer if stream is not none (streaming is happening) + if self.stream != Stream.none: + reducer_name = "unknown" + if self.reducer == Reducer.latest: + reducer_name = "latest" + elif self.reducer == Reducer.all: + reducer_name = "all" + elif self.reducer == Reducer.average: + reducer_name = "average" + parts.append(f"reducer={reducer_name}") + parts.append(f"stream={self.stream.name}") + + # Always show return mode + parts.append(f"ret={self.ret.name}") + return f"Skill({', '.join(parts)})" + + +class MsgType(Enum): + pending = 0 + start = 1 + stream = 2 + ret = 3 + error = 4 + + +class SkillMsg(Timestamped): + ts: float + type: MsgType + call_id: str + skill_name: str + content: str | int | float | dict | list + + def __init__( + self, + call_id: str, + skill_name: str, + content: str | int | float | dict | list, + type: MsgType = MsgType.ret, + ) -> None: + self.ts = time.time() + self.call_id = call_id + self.skill_name = skill_name + self.content = content + self.type = type + + def __repr__(self): + return self.__str__() + + @property + def end(self) -> bool: + return self.type == MsgType.ret or self.type == MsgType.error + + @property + def start(self) -> bool: + return self.type == MsgType.start + + def __str__(self): + time_ago = time.time() - self.ts + + if self.type == MsgType.start: + return f"Start({time_ago:.1f}s ago)" + if self.type == MsgType.ret: + return f"Ret({time_ago:.1f}s ago, val={self.content})" + if self.type == MsgType.error: + return f"Error({time_ago:.1f}s ago, val={self.content})" + if self.type == MsgType.pending: + return f"Pending({time_ago:.1f}s ago)" + if self.type == MsgType.stream: + return f"Stream({time_ago:.1f}s ago, val={self.content})" diff --git a/dimos/utils/cli/agentspy/agentspy.py b/dimos/utils/cli/agentspy/agentspy.py index 0c25a89612..2c58ab4cf3 100644 --- a/dimos/utils/cli/agentspy/agentspy.py +++ b/dimos/utils/cli/agentspy/agentspy.py @@ -14,7 +14,6 @@ from __future__ import annotations -import asyncio import logging import threading import time @@ -23,20 +22,20 @@ from rich.text import Text from textual.app import App, ComposeResult from textual.binding import Binding -from textual.containers import Container, Horizontal, Vertical +from textual.containers import Vertical from textual.reactive import reactive -from textual.widgets import DataTable, Footer, Header, RichLog +from textual.widgets import DataTable, Footer, RichLog -from dimos.protocol.skill.agent_interface import AgentInterface, SkillState, SkillStateEnum -from dimos.protocol.skill.comms import AgentMsg, LCMSkillComms -from dimos.protocol.skill.types import MsgType +from dimos.protocol.skill.comms import SkillMsg +from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState, SkillStateEnum +from dimos.protocol.skill.type import MsgType class AgentSpy: """Spy on agent skill executions via LCM messages.""" def __init__(self): - self.agent_interface = AgentInterface() + self.agent_interface = SkillCoordinator() self.message_callbacks: list[Callable[[Dict[str, SkillState]], None]] = [] self._lock = threading.Lock() self._latest_state: Dict[str, SkillState] = {} @@ -53,14 +52,14 @@ def stop(self): """Stop spying.""" self.agent_interface.stop() - def _handle_message(self, msg: AgentMsg): - """Handle incoming agent messages.""" + def _handle_message(self, msg: SkillMsg): + """Handle incoming skill messages.""" # Small delay to ensure agent_interface has processed the message def delayed_update(): time.sleep(0.1) with self._lock: - self._latest_state = self.agent_interface.state_snapshot(clear=False) + self._latest_state = self.agent_interface.generate_snapshot(clear=False) for callback in self.message_callbacks: callback(self._latest_state) @@ -83,7 +82,7 @@ def state_color(state: SkillStateEnum) -> str: return "yellow" elif state == SkillStateEnum.running: return "green" - elif state == SkillStateEnum.returned: + elif state == SkillStateEnum.completed: return "cyan" elif state == SkillStateEnum.error: return "red" @@ -181,11 +180,12 @@ def __init__(self, *args, **kwargs): self.spy = AgentSpy() self.table: Optional[DataTable] = None self.log_view: Optional[RichLog] = None - self.skill_history: list[tuple[str, SkillState, float]] = [] # (name, state, start_time) + self.skill_history: list[tuple[str, SkillState, float]] = [] # (call_id, state, start_time) self.log_handler: Optional[TextualLogHandler] = None def compose(self) -> ComposeResult: self.table = DataTable(zebra_stripes=False, cursor_type=None) + self.table.add_column("Call ID") self.table.add_column("Skill Name") self.table.add_column("State") self.table.add_column("Duration") @@ -219,12 +219,23 @@ def on_mount(self): if self.log_view: self.log_handler = TextualLogHandler(self.log_view) - # Custom formatter that shortens the logger name + # Custom formatter that shortens the logger name and highlights call_ids class ShortNameFormatter(logging.Formatter): def format(self, record): # Remove the common prefix from logger names if record.name.startswith("dimos.protocol.skill."): record.name = record.name.replace("dimos.protocol.skill.", "") + + # Highlight call_ids in the message + msg = record.getMessage() + if "call_id=" in msg: + # Extract and colorize call_id + import re + + msg = re.sub(r"call_id=([^\s\)]+)", r"call_id=\033[94m\1\033[0m", msg) + record.msg = msg + record.args = () + return super().format(record) self.log_handler.setFormatter( @@ -246,7 +257,7 @@ def format(self, record): self.spy.start() # Also set up periodic refresh to update durations - self.set_interval(0.5, self.refresh_table) + self.set_interval(1.0, self.refresh_table) def on_unmount(self): """Stop the spy when app unmounts.""" @@ -257,18 +268,18 @@ def on_unmount(self): root_logger.removeHandler(self.log_handler) def update_state(self, state: Dict[str, SkillState]): - """Update state from spy callback.""" + """Update state from spy callback. State dict is keyed by call_id.""" # Update history with current state current_time = time.time() # Add new skills or update existing ones - for skill_name, skill_state in state.items(): - # Find if skill already in history + for call_id, skill_state in state.items(): + # Find if this call_id already in history found = False - for i, (name, old_state, start_time) in enumerate(self.skill_history): - if name == skill_name: + for i, (existing_call_id, old_state, start_time) in enumerate(self.skill_history): + if existing_call_id == call_id: # Update existing entry - self.skill_history[i] = (skill_name, skill_state, start_time) + self.skill_history[i] = (call_id, skill_state, start_time) found = True break @@ -278,7 +289,7 @@ def update_state(self, state: Dict[str, SkillState]): if len(skill_state) > 0: # Use first message timestamp if available start_time = skill_state._items[0].ts - self.skill_history.append((skill_name, skill_state, start_time)) + self.skill_history.append((call_id, skill_state, start_time)) # Schedule UI update self.call_from_thread(self.refresh_table) @@ -299,7 +310,7 @@ def refresh_table(self): max_rows = max(1, height) # Show only top N entries - for skill_name, skill_state, start_time in sorted_history[:max_rows]: + for call_id, skill_state, start_time in sorted_history[:max_rows]: # Calculate how long ago it started time_ago = time.time() - start_time start_str = format_duration(time_ago) + " ago" @@ -317,7 +328,7 @@ def refresh_table(self): last_msg = skill_state._items[-1] if last_msg.type == MsgType.error: details = str(last_msg.content)[:40] - elif skill_state.state == SkillStateEnum.returned and msg_count > 0: + elif skill_state.state == SkillStateEnum.completed and msg_count > 0: # Show return value last_msg = skill_state._items[-1] if last_msg.type == MsgType.ret: @@ -326,9 +337,15 @@ def refresh_table(self): # Show progress indicator details = "⋯ " + "▸" * min(int(time_ago), 20) + # Format call_id for display (truncate if too long) + display_call_id = call_id + if len(call_id) > 16: + display_call_id = call_id[:13] + "..." + # Add row with colored state self.table.add_row( - Text(skill_name, style="white"), + Text(display_call_id, style="bright_blue"), + Text(skill_state.name, style="white"), Text(skill_state.state.name, style=state_color(skill_state.state)), Text(duration_str, style="dim"), Text(start_str, style="dim"), diff --git a/dimos/utils/cli/agentspy/demo_agentspy.py b/dimos/utils/cli/agentspy/demo_agentspy.py index 2b39674a7b..fcd71d99ef 100644 --- a/dimos/utils/cli/agentspy/demo_agentspy.py +++ b/dimos/utils/cli/agentspy/demo_agentspy.py @@ -17,7 +17,7 @@ import time import threading -from dimos.protocol.skill.agent_interface import AgentInterface +from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill @@ -56,7 +56,7 @@ def quick_task(self, name: str) -> str: def run_demo_skills(): """Run demo skills in background.""" # Create and start agent interface - agent_interface = AgentInterface() + agent_interface = SkillCoordinator() agent_interface.start() # Register skills @@ -69,18 +69,21 @@ def skill_runner(): while True: time.sleep(2) + # Generate unique call_id for each invocation + call_id = f"demo-{counter}" + # Run different skills based on counter if counter % 4 == 0: - demo_skills.count_to(3, skillcall=True) + # Run multiple count_to in parallel to show parallel execution + agent_interface.call(f"{call_id}-count-1", "count_to", 3) + agent_interface.call(f"{call_id}-count-2", "count_to", 5) + agent_interface.call(f"{call_id}-count-3", "count_to", 2) elif counter % 4 == 1: - demo_skills.compute_fibonacci(10, skillcall=True) + agent_interface.call(f"{call_id}-fib", "compute_fibonacci", 10) elif counter % 4 == 2: - demo_skills.quick_task(f"task-{counter}", skillcall=True) + agent_interface.call(f"{call_id}-quick", "quick_task", f"task-{counter}") else: - try: - demo_skills.simulate_error(skillcall=True) - except: - pass # Expected to fail + agent_interface.call(f"{call_id}-error", "simulate_error") counter += 1 From 29d2538d2cc782ed022b1ff56c99386a80aaed6a Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 11:04:18 -0700 Subject: [PATCH 02/59] fix skill tests --- dimos/protocol/skill/test_coordinator.py | 34 ------- dimos/protocol/skill/test_skill.py | 120 ----------------------- 2 files changed, 154 deletions(-) delete mode 100644 dimos/protocol/skill/test_skill.py diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 0b6d4d54a5..a75ea85e55 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -21,44 +21,10 @@ from dimos.protocol.skill.skill import SkillContainer, skill from dimos.protocol.skill.testing_utils import TestContainer -# def test_coordinator_skill_export(): -# skillCoordinator = SkillCoordinator() -# skillCoordinator.register_skills(TestContainer()) - -# assert skillCoordinator.get_tools() == [ -# { -# "function": { -# "description": "", -# "name": "add", -# "parameters": { -# "properties": {"x": {"type": "integer"}, "y": {"type": "integer"}}, -# "required": ["x", "y"], -# "type": "object", -# }, -# }, -# "type": "function", -# }, -# { -# "function": { -# "description": "", -# "name": "delayadd", -# "parameters": { -# "properties": {"x": {"type": "integer"}, "y": {"type": "integer"}}, -# "required": ["x", "y"], -# "type": "object", -# }, -# }, -# "type": "function", -# }, -# ] - -# print(pprint(skillCoordinator.get_tools())) - class TestContainer2(SkillContainer): @skill() def add(self, x: int, y: int) -> int: - # time.sleep(0.25) return x + y @skill() diff --git a/dimos/protocol/skill/test_skill.py b/dimos/protocol/skill/test_skill.py deleted file mode 100644 index 836f316ca3..0000000000 --- a/dimos/protocol/skill/test_skill.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time - -from dimos.protocol.skill.coordinator import SkillCoordinator -from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.testing_utils import TestContainer - - -def test_introspect_skill(): - testContainer = TestContainer() - print(testContainer.skills()) - - -def test_internals(): - agentInterface = SkillCoordinator() - agentInterface.start() - - testContainer = TestContainer() - - agentInterface.register_skills(testContainer) - - # skillcall=True makes the skill function exit early, - # it doesn't behave like a blocking function, - # - # return is passed as SkillMsg to the agent topic - testContainer.delayadd(2, 4, skillcall=True) - testContainer.add(1, 2, skillcall=True) - - time.sleep(0.25) - print(agentInterface) - - time.sleep(0.75) - print(agentInterface) - - print(agentInterface.state_snapshot()) - - print(agentInterface.skills()) - - print(agentInterface) - - agentInterface.call("test-call-1", "delayadd", 1, 2) - - time.sleep(0.25) - print(agentInterface) - time.sleep(0.75) - - print(agentInterface) - - -def test_standard_usage(): - agentInterface = SkillCoordinator() - agentInterface.start() - - testContainer = TestContainer() - - agentInterface.register_skills(testContainer) - - # we can investigate skills - print(agentInterface.skills()) - - # we can execute a skill - agentInterface.call("test-call-2", "delayadd", 1, 2) - - # while skill is executing, we can introspect the state - # (we see that the skill is running) - time.sleep(0.25) - print(agentInterface) - time.sleep(0.75) - - # after the skill has finished, we can see the result - # and the skill state - print(agentInterface) - - -def test_module(): - from dimos.core import Module, start - - class MockModule(Module, SkillContainer): - def __init__(self): - super().__init__() - SkillContainer.__init__(self) - - @skill() - def add(self, x: int, y: int) -> int: - time.sleep(0.5) - return x * y - - agentInterface = SkillCoordinator() - agentInterface.start() - - dimos = start(1) - mock_module = dimos.deploy(MockModule) - - agentInterface.register_skills(mock_module) - - # we can execute a skill - agentInterface.call("test-call-3", "add", 1, 2) - - # while skill is executing, we can introspect the state - # (we see that the skill is running) - time.sleep(0.25) - print(agentInterface) - time.sleep(0.75) - - # after the skill has finished, we can see the result - # and the skill state - print(agentInterface) From 9b7a2be2b6992c58eb212148ab6174ab56d832a9 Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 12:15:23 -0700 Subject: [PATCH 03/59] cleanup --- dimos/agents2/__init__.py | 3 + dimos/agents2/{main.py => agent.py} | 33 ++- dimos/agents2/spec.py | 143 +++++++++++ dimos/agents2/{test_main.py => test_agent.py} | 7 +- dimos/protocol/skill/agent_interface.py | 236 ------------------ dimos/protocol/skill/coordinator.py | 9 +- dimos/protocol/skill/types.py | 140 ----------- 7 files changed, 183 insertions(+), 388 deletions(-) rename dimos/agents2/{main.py => agent.py} (75%) create mode 100644 dimos/agents2/spec.py rename dimos/agents2/{test_main.py => test_agent.py} (85%) delete mode 100644 dimos/protocol/skill/agent_interface.py delete mode 100644 dimos/protocol/skill/types.py diff --git a/dimos/agents2/__init__.py b/dimos/agents2/__init__.py index 6a756fbaab..c4776ceec9 100644 --- a/dimos/agents2/__init__.py +++ b/dimos/agents2/__init__.py @@ -6,3 +6,6 @@ ToolCall, ToolMessage, ) + +from dimos.agents2.agent import Agent +from dimos.agents2.spec import AgentSpec diff --git a/dimos/agents2/main.py b/dimos/agents2/agent.py similarity index 75% rename from dimos/agents2/main.py rename to dimos/agents2/agent.py index 8e2da24903..11336602d9 100644 --- a/dimos/agents2/main.py +++ b/dimos/agents2/agent.py @@ -14,6 +14,7 @@ import asyncio from pprint import pprint +from typing import Optional from langchain.chat_models import init_chat_model from langchain_core.language_models.chat_models import BaseChatModel @@ -26,6 +27,7 @@ ToolMessage, ) +from dimos.agents2.spec import AgentSpec from dimos.core import Module, rpc from dimos.protocol.skill import skill from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState @@ -34,15 +36,32 @@ logger = setup_logger("dimos.protocol.agents2") -class Agent(SkillCoordinator): - def __init__(self, model: str = "gpt-4o", model_provider: str = "openai", *args, **kwargs): - super().__init__(*args, **kwargs) +class Agent(AgentSpec, SkillCoordinator): + def __init__( + self, + *args, + **kwargs, + ): + AgentSpec.__init__(self, *args, **kwargs) + SkillCoordinator.__init__(self) self.messages = [] - self._llm = init_chat_model( - model=model, - model_provider=model_provider, - ) + + if self.config.system_prompt: + if isinstance(self.config.system_prompt, str): + self.messages.append(self.config.system_prompt) + else: + self.messages.append(self.config.system_prompt) + + self._llm = init_chat_model(model_provider=self.config.provider, model=self.config.model) + + @rpc + def start(self): + SkillCoordinator.start(self) + + @rpc + def stop(self): + SkillCoordinator.stop(self) async def agent_loop(self, seed_query: str = ""): self.messages.append(HumanMessage(seed_query)) diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py new file mode 100644 index 0000000000..1a4cb463c2 --- /dev/null +++ b/dimos/agents2/spec.py @@ -0,0 +1,143 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base agent module that wraps BaseAgent for DimOS module usage.""" + +from dataclasses import dataclass +from enum import Enum +from typing import Optional, Tuple, Union + +from langchain.chat_models.base import _SUPPORTED_PROVIDERS +from langchain_core.messages import ( + SystemMessage, +) + +from dimos.core import rpc +from dimos.protocol.service import Service +from dimos.protocol.skill.skill import SkillContainer +from dimos.utils.logging_config import setup_logger + +logger = setup_logger("dimos.agents.modules.base_agent") + + +# Dynamically create ModelProvider enum from LangChain's supported providers +Provider = Enum( + "Provider", {provider.upper(): provider for provider in _SUPPORTED_PROVIDERS}, type=str +) + + +class Model(str, Enum): + """Common model names across providers. + + Note: This is not exhaustive as model names change frequently. + Based on langchain's _attempt_infer_model_provider patterns. + """ + + # OpenAI models (prefix: gpt-3, gpt-4, o1, o3) + GPT_4O = "gpt-4o" + GPT_4O_MINI = "gpt-4o-mini" + GPT_4_TURBO = "gpt-4-turbo" + GPT_4_TURBO_PREVIEW = "gpt-4-turbo-preview" + GPT_4 = "gpt-4" + GPT_35_TURBO = "gpt-3.5-turbo" + GPT_35_TURBO_16K = "gpt-3.5-turbo-16k" + O1_PREVIEW = "o1-preview" + O1_MINI = "o1-mini" + O3_MINI = "o3-mini" + + # Anthropic models (prefix: claude) + CLAUDE_3_OPUS = "claude-3-opus-20240229" + CLAUDE_3_SONNET = "claude-3-sonnet-20240229" + CLAUDE_3_HAIKU = "claude-3-haiku-20240307" + CLAUDE_35_SONNET = "claude-3-5-sonnet-20241022" + CLAUDE_35_SONNET_LATEST = "claude-3-5-sonnet-latest" + CLAUDE_3_7_SONNET = "claude-3-7-sonnet-20250219" + + # Google models (prefix: gemini) + GEMINI_20_FLASH = "gemini-2.0-flash" + GEMINI_15_PRO = "gemini-1.5-pro" + GEMINI_15_FLASH = "gemini-1.5-flash" + GEMINI_10_PRO = "gemini-1.0-pro" + + # Amazon Bedrock models (prefix: amazon) + AMAZON_TITAN_EXPRESS = "amazon.titan-text-express-v1" + AMAZON_TITAN_LITE = "amazon.titan-text-lite-v1" + + # Cohere models (prefix: command) + COMMAND_R_PLUS = "command-r-plus" + COMMAND_R = "command-r" + COMMAND = "command" + COMMAND_LIGHT = "command-light" + + # Fireworks models (prefix: accounts/fireworks) + FIREWORKS_LLAMA_V3_70B = "accounts/fireworks/models/llama-v3-70b-instruct" + FIREWORKS_MIXTRAL_8X7B = "accounts/fireworks/models/mixtral-8x7b-instruct" + + # Mistral models (prefix: mistral) + MISTRAL_LARGE = "mistral-large" + MISTRAL_MEDIUM = "mistral-medium" + MISTRAL_SMALL = "mistral-small" + MIXTRAL_8X7B = "mixtral-8x7b" + MIXTRAL_8X22B = "mixtral-8x22b" + MISTRAL_7B = "mistral-7b" + + # DeepSeek models (prefix: deepseek) + DEEPSEEK_CHAT = "deepseek-chat" + DEEPSEEK_CODER = "deepseek-coder" + DEEPSEEK_R1_DISTILL_LLAMA_70B = "deepseek-r1-distill-llama-70b" + + # xAI models (prefix: grok) + GROK_1 = "grok-1" + GROK_2 = "grok-2" + + # Perplexity models (prefix: sonar) + SONAR_SMALL_CHAT = "sonar-small-chat" + SONAR_MEDIUM_CHAT = "sonar-medium-chat" + SONAR_LARGE_CHAT = "sonar-large-chat" + + # Meta Llama models (various providers) + LLAMA_3_70B = "llama-3-70b" + LLAMA_3_8B = "llama-3-8b" + LLAMA_31_70B = "llama-3.1-70b" + LLAMA_31_8B = "llama-3.1-8b" + LLAMA_33_70B = "llama-3.3-70b" + LLAMA_2_70B = "llama-2-70b" + LLAMA_2_13B = "llama-2-13b" + LLAMA_2_7B = "llama-2-7b" + + +@dataclass +class AgentConfig: + system_prompt: Optional[str | SystemMessage] = None + skills: Optional[SkillContainer | list[SkillContainer]] = None + model: Model = Model.GPT_4O + provider: Provider = Provider.OPENAI + + +class AgentSpec( + Service[AgentConfig], +): + default_config: type[AgentConfig] = AgentConfig + + @rpc + def start(self): ... + + @rpc + def stop(self): ... + + @rpc + def clear_history(self): ... + + @rpc + def query(self, query: str): ... diff --git a/dimos/agents2/test_main.py b/dimos/agents2/test_agent.py similarity index 85% rename from dimos/agents2/test_main.py rename to dimos/agents2/test_agent.py index 755666b070..78c81a56d6 100644 --- a/dimos/agents2/test_main.py +++ b/dimos/agents2/test_agent.py @@ -17,8 +17,7 @@ import pytest -from dimos.agents2.main import Agent -from dimos.core import start +from dimos.agents2.agent import Agent from dimos.protocol.skill import SkillContainer, skill @@ -45,7 +44,7 @@ async def test_agent_init(): agent.start() agent.query( - "hi there, use add tool to add 124181112 and 124124. don't sum yourself, use a tool I provided" + "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124. don't sum yourself, use a tool I provided" ) - await asyncio.sleep(5) + await asyncio.sleep(10) diff --git a/dimos/protocol/skill/agent_interface.py b/dimos/protocol/skill/agent_interface.py deleted file mode 100644 index 8a9926d028..0000000000 --- a/dimos/protocol/skill/agent_interface.py +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from copy import copy -from dataclasses import dataclass -from enum import Enum -from pprint import pformat -from typing import Any, Callable, Optional - -from dimos.protocol.skill.comms import AgentMsg, LCMSkillComms, MsgType, SkillCommsSpec -from dimos.protocol.skill.skill import SkillConfig, SkillContainer -from dimos.protocol.skill.types import Reducer, Return, Stream -from dimos.types.timestamped import TimestampedCollection -from dimos.utils.logging_config import setup_logger - -logger = setup_logger("dimos.protocol.skill.agent_interface") - - -@dataclass -class AgentInputConfig: - agent_comms: type[SkillCommsSpec] = LCMSkillComms - - -class SkillStateEnum(Enum): - pending = 0 - running = 1 - returned = 2 - error = 3 - - -# TODO pending timeout, running timeout, etc. -class SkillState(TimestampedCollection): - name: str - state: SkillStateEnum - skill_config: SkillConfig - - def __init__(self, name: str, skill_config: Optional[SkillConfig] = None) -> None: - super().__init__() - self.skill_config = skill_config or SkillConfig( - name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.none - ) - - self.state = SkillStateEnum.pending - self.name = name - - # returns True if the agent should be called for this message - def handle_msg(self, msg: AgentMsg) -> bool: - self.add(msg) - - if msg.type == MsgType.stream: - if ( - self.skill_config.stream == Stream.none - or self.skill_config.stream == Stream.passive - ): - return False - if self.skill_config.stream == Stream.call_agent: - return True - - if msg.type == MsgType.ret: - self.state = SkillStateEnum.returned - if self.skill_config.ret == Return.call_agent: - return True - return False - - if msg.type == MsgType.error: - self.state = SkillStateEnum.error - return True - - if msg.type == MsgType.start: - self.state = SkillStateEnum.running - return False - - return False - - def __str__(self) -> str: - head = f"SkillState(state={self.state}" - - if self.state == SkillStateEnum.returned or self.state == SkillStateEnum.error: - head += ", ran for=" - else: - head += ", running for=" - - head += f"{self.duration():.2f}s" - - if len(self): - return head + f", messages={list(self._items)})" - return head + ", No Messages)" - - -class AgentInterface(SkillContainer): - _static_containers: list[SkillContainer] - _dynamic_containers: list[SkillContainer] - _skill_state: dict[str, SkillState] - _skills: dict[str, SkillConfig] - _agent_callback: Optional[Callable[[dict[str, SkillState]], Any]] = None - - # Agent callback is called with a state snapshot once system decides - # that agents needs to be woken up, according to inputs from active skills - def __init__( - self, agent_callback: Optional[Callable[[dict[str, SkillState]], Any]] = None - ) -> None: - super().__init__() - self._agent_callback = agent_callback - self._static_containers = [] - self._dynamic_containers = [] - self._skills = {} - self._skill_state = {} - - def start(self) -> None: - self.agent_comms.start() - self.agent_comms.subscribe(self.handle_message) - - def stop(self) -> None: - self.agent_comms.stop() - - # This is used by agent to call skills - def execute_skill(self, skill_name: str, *args, **kwargs) -> None: - skill_config = self.get_skill_config(skill_name) - if not skill_config: - logger.error( - f"Skill {skill_name} not found in registered skills, but agent tried to call it (did a dynamic skill expire?)" - ) - return - - # This initializes the skill state if it doesn't exist - self._skill_state[skill_name] = SkillState(name=skill_name, skill_config=skill_config) - return skill_config.call(*args, **kwargs) - - # Receives a message from active skill - # Updates local skill state (appends to streamed data if needed etc) - # - # Checks if agent needs to be called (if ToolConfig has Return=call_agent or Stream=call_agent) - def handle_message(self, msg: AgentMsg) -> None: - logger.info(f"{msg.skill_name} - {msg}") - - if self._skill_state.get(msg.skill_name) is None: - logger.warn( - f"Skill state for {msg.skill_name} not found, (skill not called by our agent?) initializing. (message received: {msg})" - ) - self._skill_state[msg.skill_name] = SkillState(name=msg.skill_name) - - should_call_agent = self._skill_state[msg.skill_name].handle_msg(msg) - if should_call_agent: - self.call_agent() - - # Returns a snapshot of the current state of skill runs. - # - # If clear=True, it will assume the snapshot is being sent to an agent - # and will clear the finished skill runs from the state - def state_snapshot(self, clear: bool = True) -> dict[str, SkillState]: - if not clear: - return self._skill_state - - ret = copy(self._skill_state) - - to_delete = [] - # Since state is exported, we can clear the finished skill runs - for skill_name, skill_run in self._skill_state.items(): - if skill_run.state == SkillStateEnum.returned: - logger.info(f"Skill {skill_name} finished") - to_delete.append(skill_name) - if skill_run.state == SkillStateEnum.error: - logger.error(f"Skill run error for {skill_name}") - to_delete.append(skill_name) - - for skill_name in to_delete: - logger.debug(f"{skill_name} finished, removing from state") - del self._skill_state[skill_name] - - return ret - - def call_agent(self) -> None: - """Call the agent with the current state of skill runs.""" - logger.info(f"Calling agent with current skill state: {self.state_snapshot(clear=False)}") - - state = self.state_snapshot(clear=True) - - if self._agent_callback: - self._agent_callback(state) - - def __str__(self): - # Convert objects to their string representations - def stringify_value(obj): - if isinstance(obj, dict): - return {k: stringify_value(v) for k, v in obj.items()} - elif isinstance(obj, list): - return [stringify_value(item) for item in obj] - else: - return str(obj) - - ret = stringify_value(self._skill_state) - - return f"AgentInput({pformat(ret, indent=2, depth=3, width=120, compact=True)})" - - # Given skillcontainers can run remotely, we are - # Caching available skills from static containers - # - # Dynamic containers will be queried at runtime via - # .skills() method - def register_skills(self, container: SkillContainer): - if not container.dynamic_skills: - logger.info(f"Registering static skill container, {container}") - self._static_containers.append(container) - for name, skill_config in container.skills().items(): - self._skills[name] = skill_config.bind(getattr(container, name)) - else: - logger.info(f"Registering dynamic skill container, {container}") - self._dynamic_containers.append(container) - - def get_skill_config(self, skill_name: str) -> Optional[SkillConfig]: - skill_config = self._skills.get(skill_name) - if not skill_config: - skill_config = self.skills().get(skill_name) - return skill_config - - def skills(self) -> dict[str, SkillConfig]: - # Static container skilling is already cached - all_skills: dict[str, SkillConfig] = {**self._skills} - - # Then aggregate skills from dynamic containers - for container in self._dynamic_containers: - for skill_name, skill_config in container.skills().items(): - all_skills[skill_name] = skill_config.bind(getattr(container, skill_name)) - - return all_skills diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 4b0f5d27f2..3ec56308b2 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -18,12 +18,19 @@ from enum import Enum from typing import Any, List, Optional +from langchain_core.messages import ( + AIMessage, + HumanMessage, + MessageLikeRepresentation, + SystemMessage, + ToolCall, + ToolMessage, +) from langchain_core.tools import tool as langchain_tool from rich.console import Console from rich.table import Table from rich.text import Text -from dimos.agents2 import ToolCall, ToolMessage from dimos.core import Module, rpc from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer diff --git a/dimos/protocol/skill/types.py b/dimos/protocol/skill/types.py deleted file mode 100644 index e4b09a7ef9..0000000000 --- a/dimos/protocol/skill/types.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -from dataclasses import dataclass -from enum import Enum -from typing import Any, Callable, Generic, Optional, TypeVar - -from dimos.types.timestamped import Timestamped - - -class Call(Enum): - Implicit = 0 - Explicit = 1 - - -class Reducer(Enum): - none = 0 - all = 1 - latest = 2 - average = 3 - - -class Stream(Enum): - # no streaming - none = 0 - # passive stream, doesn't schedule an agent call, but returns the value to the agent - passive = 1 - # calls the agent with every value emitted, schedules an agent call - call_agent = 2 - - -class Return(Enum): - # doesn't return anything to an agent - none = 0 - # returns the value to the agent, but doesn't schedule an agent call - passive = 1 - # calls the agent with the value, scheduling an agent call - call_agent = 2 - - -@dataclass -class SkillConfig: - name: str - reducer: Reducer - stream: Stream - ret: Return - f: Callable | None = None - autostart: bool = False - - def bind(self, f: Callable) -> "SkillConfig": - self.f = f - return self - - def call(self, *args, **kwargs) -> Any: - if self.f is None: - raise ValueError( - "Function is not bound to the SkillConfig. This should be called only within AgentListener." - ) - - return self.f(*args, **kwargs, skillcall=True) - - def __str__(self): - parts = [f"name={self.name}"] - - # Only show reducer if stream is not none (streaming is happening) - if self.stream != Stream.none: - reducer_name = "unknown" - if self.reducer == Reducer.latest: - reducer_name = "latest" - elif self.reducer == Reducer.all: - reducer_name = "all" - elif self.reducer == Reducer.average: - reducer_name = "average" - parts.append(f"reducer={reducer_name}") - parts.append(f"stream={self.stream.name}") - - # Always show return mode - parts.append(f"ret={self.ret.name}") - return f"Skill({', '.join(parts)})" - - -class MsgType(Enum): - pending = 0 - start = 1 - stream = 2 - ret = 3 - error = 4 - - -class AgentMsg(Timestamped): - ts: float - type: MsgType - - def __init__( - self, - skill_name: str, - content: str | int | float | dict | list, - type: MsgType = MsgType.ret, - ) -> None: - self.ts = time.time() - self.skill_name = skill_name - self.content = content - self.type = type - - def __repr__(self): - return self.__str__() - - @property - def end(self) -> bool: - return self.type == MsgType.ret or self.type == MsgType.error - - @property - def start(self) -> bool: - return self.type == MsgType.start - - def __str__(self): - time_ago = time.time() - self.ts - - if self.type == MsgType.start: - return f"Start({time_ago:.1f}s ago)" - if self.type == MsgType.ret: - return f"Ret({time_ago:.1f}s ago, val={self.content})" - if self.type == MsgType.error: - return f"Error({time_ago:.1f}s ago, val={self.content})" - if self.type == MsgType.pending: - return f"Pending({time_ago:.1f}s ago)" - if self.type == MsgType.stream: - return f"Stream({time_ago:.1f}s ago, val={self.content})" From 8ea2455fe4672bb4869fba3d77a98ff1a047b364 Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 16:51:54 -0700 Subject: [PATCH 04/59] small fixes, restructure of configurable --- dimos/agents2/agent.py | 27 ++++++++++++++++---------- dimos/agents2/spec.py | 9 ++++++--- dimos/agents2/test_agent.py | 19 +++++++++++++----- dimos/core/module.py | 30 ++++++++++++----------------- dimos/protocol/pubsub/lcmpubsub.py | 2 +- dimos/protocol/pubsub/spec.py | 4 ++-- dimos/protocol/service/__init__.py | 2 +- dimos/protocol/service/spec.py | 12 +++++------- dimos/protocol/service/test_spec.py | 18 +++++++++++++++++ dimos/protocol/skill/skill.py | 26 ++++++++++++------------- 10 files changed, 89 insertions(+), 60 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 11336602d9..4095f1ea50 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -36,15 +36,15 @@ logger = setup_logger("dimos.protocol.agents2") -class Agent(AgentSpec, SkillCoordinator): +class Agent(AgentSpec): def __init__( self, *args, **kwargs, ): AgentSpec.__init__(self, *args, **kwargs) - SkillCoordinator.__init__(self) + self.coordinator = SkillCoordinator() self.messages = [] if self.config.system_prompt: @@ -57,11 +57,15 @@ def __init__( @rpc def start(self): - SkillCoordinator.start(self) + self.coordinator.start() @rpc def stop(self): - SkillCoordinator.stop(self) + self.coordinator.stop() + + @rpc + def clear_history(self): + self.messages.clear() async def agent_loop(self, seed_query: str = ""): self.messages.append(HumanMessage(seed_query)) @@ -75,15 +79,15 @@ async def agent_loop(self, seed_query: str = ""): logger.info(f"Agent response: {msg.content}") if msg.tool_calls: - self.execute_tool_calls(msg.tool_calls) + self.coordinator.execute_tool_calls(msg.tool_calls) - if not self.has_active_skills(): + if not self.coordinator.has_active_skills(): logger.info("No active tasks, exiting agent loop.") - return + return msg.content - await self.wait_for_updates() + await self.coordinator.wait_for_updates() - for call_id, update in self.generate_snapshot(clear=True).items(): + for call_id, update in self.coordinator.generate_snapshot(clear=True).items(): self.messages.append(update.agent_encode()) except Exception as e: @@ -93,5 +97,8 @@ async def agent_loop(self, seed_query: str = ""): traceback.print_exc() @rpc + def query_async(self, query: str): + return asyncio.ensure_future(self.agent_loop(query), loop=self._loop) + def query(self, query: str): - asyncio.ensure_future(self.agent_loop(query), loop=self._loop) + return asyncio.run_coroutine_threadsafe(self.agent_loop(query), self._loop).result() diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 1a4cb463c2..7ecab4bbf4 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -14,6 +14,7 @@ """Base agent module that wraps BaseAgent for DimOS module usage.""" +from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum from typing import Optional, Tuple, Union @@ -125,19 +126,21 @@ class AgentConfig: provider: Provider = Provider.OPENAI -class AgentSpec( - Service[AgentConfig], -): +class AgentSpec(Service[AgentConfig], ABC): default_config: type[AgentConfig] = AgentConfig @rpc + @abstractmethod def start(self): ... @rpc + @abstractmethod def stop(self): ... @rpc + @abstractmethod def clear_history(self): ... @rpc + @abstractmethod def query(self, query: str): ... diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 78c81a56d6..268aac6be6 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -37,14 +37,23 @@ def sub(self, x: int, y: int) -> int: @pytest.mark.asyncio async def test_agent_init(): - # dimos = start(2) - # agent = dimos.deploy(Agent) - agent = Agent() + from dimos.core import start + + dimos = start(2) + agent = dimos.deploy( + Agent, + system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate", + ) + # agent = Agent( + # system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" + # ) agent.register_skills(TestContainer()) agent.start() - agent.query( - "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124. don't sum yourself, use a tool I provided" + print( + agent.query_async( + "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124." + ) ) await asyncio.sleep(10) diff --git a/dimos/core/module.py b/dimos/core/module.py index 7cb2161fb8..f30cbd16a2 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -13,6 +13,7 @@ # limitations under the License. import asyncio import inspect +from dataclasses import dataclass from typing import ( Any, Callable, @@ -28,40 +29,33 @@ from dimos.core.core import T, rpc from dimos.core.stream import In, Out, RemoteIn, RemoteOut, Transport from dimos.protocol.rpc import LCMRPC, RPCSpec -from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec +from dimos.protocol.service import Configurable from dimos.protocol.tf import LCMTF, TFSpec -class CommsSpec: - rpc: type[RPCSpec] - agent: type[SkillCommsSpec] - tf: type[TFSpec] +@dataclass +class ModuleConfig: + rpc_transport: type[RPCSpec] = LCMRPC + tf_transport: type[TFSpec] = LCMTF -class LCMComms(CommsSpec): - rpc = LCMRPC - agent = LCMSkillComms - tf = LCMTF - - -class ModuleBase: - comms: CommsSpec = LCMComms +class ModuleBase(Configurable[ModuleConfig]): _rpc: Optional[RPCSpec] = None - _agent: Optional[SkillCommsSpec] = None _tf: Optional[TFSpec] = None _loop: asyncio.AbstractEventLoop = None + default_config = ModuleConfig + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) # we can completely override comms protocols if we want - if kwargs.get("comms", None) is not None: - self.comms = kwargs["comms"] try: # here we attempt to figure out if we are running on a dask worker # if so we use the dask worker _loop as ours, # and we register our RPC server worker = get_worker() self._loop = worker.loop if worker else None - self.rpc = self.comms.rpc() + self.rpc = self.config.rpc_transport() self.rpc.serve_module_rpc(self) self.rpc.start() except ValueError: @@ -79,7 +73,7 @@ def __init__(self, *args, **kwargs): @property def tf(self): if self._tf is None: - self._tf = self.comms.tf() + self._tf = self.config.tf_transport() return self._tf @tf.setter diff --git a/dimos/protocol/pubsub/lcmpubsub.py b/dimos/protocol/pubsub/lcmpubsub.py index b01ae40cca..5f15467800 100644 --- a/dimos/protocol/pubsub/lcmpubsub.py +++ b/dimos/protocol/pubsub/lcmpubsub.py @@ -54,7 +54,7 @@ def __str__(self) -> str: return f"{self.topic}#{self.lcm_type.msg_name}" -class LCMPubSubBase(PubSub[Topic, Any], LCMService): +class LCMPubSubBase(LCMService, PubSub[Topic, Any]): default_config = LCMConfig lc: lcm.LCM _stop_event: threading.Event diff --git a/dimos/protocol/pubsub/spec.py b/dimos/protocol/pubsub/spec.py index d7a0798557..81db8a0669 100644 --- a/dimos/protocol/pubsub/spec.py +++ b/dimos/protocol/pubsub/spec.py @@ -24,7 +24,7 @@ TopicT = TypeVar("TopicT") -class PubSub(ABC, Generic[TopicT, MsgT]): +class PubSub(Generic[TopicT, MsgT], ABC): """Abstract base class for pub/sub implementations with sugar methods.""" @abstractmethod @@ -91,7 +91,7 @@ def _queue_cb(msg: MsgT, topic: TopicT): unsubscribe_fn() -class PubSubEncoderMixin(ABC, Generic[TopicT, MsgT]): +class PubSubEncoderMixin(Generic[TopicT, MsgT], ABC): """Mixin that encodes messages before publishing and decodes them after receiving. Usage: Just specify encoder and decoder as a subclass: diff --git a/dimos/protocol/service/__init__.py b/dimos/protocol/service/__init__.py index ce8a823f86..4726ad5f83 100644 --- a/dimos/protocol/service/__init__.py +++ b/dimos/protocol/service/__init__.py @@ -1,2 +1,2 @@ from dimos.protocol.service.lcmservice import LCMService -from dimos.protocol.service.spec import Service +from dimos.protocol.service.spec import Configurable, Service diff --git a/dimos/protocol/service/spec.py b/dimos/protocol/service/spec.py index 0f52fd8a18..c79b8d57ba 100644 --- a/dimos/protocol/service/spec.py +++ b/dimos/protocol/service/spec.py @@ -19,18 +19,16 @@ ConfigT = TypeVar("ConfigT") -class Service(ABC, Generic[ConfigT]): +class Configurable(Generic[ConfigT]): default_config: Type[ConfigT] def __init__(self, **kwargs) -> None: self.config: ConfigT = self.default_config(**kwargs) + +class Service(Configurable[ConfigT], ABC): @abstractmethod - def start(self) -> None: - """Start the service.""" - ... + def start(self) -> None: ... @abstractmethod - def stop(self) -> None: - """Stop the service.""" - ... + def stop(self) -> None: ... diff --git a/dimos/protocol/service/test_spec.py b/dimos/protocol/service/test_spec.py index cad531ad1e..0706af5112 100644 --- a/dimos/protocol/service/test_spec.py +++ b/dimos/protocol/service/test_spec.py @@ -84,3 +84,21 @@ def test_complete_configuration_override(): assert service.config.timeout == 60.0 assert service.config.max_connections == 50 assert service.config.ssl_enabled is True + + +def test_service_subclassing(): + @dataclass + class ExtraConfig(DatabaseConfig): + extra_param: str = "default_value" + + class ExtraDatabaseService(DatabaseService): + default_config = ExtraConfig + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + bla = ExtraDatabaseService(host="custom-host2", extra_param="extra_value") + + assert bla.config.host == "custom-host2" + assert bla.config.extra_param == "extra_value" + assert bla.config.port == 5432 # Default value from DatabaseConfig diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index f612ec7c83..f7f87e4857 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -13,9 +13,11 @@ # limitations under the License. import threading +from dataclasses import dataclass from typing import Any, Callable, Optional from dimos.core import rpc +from dimos.protocol.service import Configurable from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.schema import function_to_schema from dimos.protocol.skill.type import ( @@ -74,19 +76,17 @@ def run_function(): return decorator -class CommsSpec: - agent: type[SkillCommsSpec] - - -class LCMComms(CommsSpec): - agent: type[SkillCommsSpec] = LCMSkillComms +@dataclass +class SkillContainerConfig: + skill_transport: type[SkillCommsSpec] = LCMSkillComms # here we can have also dynamic skills potentially # agent can check .skills each time when introspecting -class SkillContainer: - comms: CommsSpec = LCMComms - _agent_comms: Optional[SkillCommsSpec] = None +class SkillContainer(Configurable[SkillContainerConfig]): + default_config = SkillContainerConfig + _skill_transport: Optional[SkillCommsSpec] = None + dynamic_skills = False def __str__(self) -> str: @@ -104,7 +104,7 @@ def skills(self) -> dict[str, SkillConfig]: } @property - def agent_comms(self) -> SkillCommsSpec: - if self._agent_comms is None: - self._agent_comms = self.comms.agent() - return self._agent_comms + def skill_transport(self) -> SkillCommsSpec: + if self._skill_transport is None: + self._skill_transport = self.config.skill_transport() + return self._skill_transport From 4796a5d4c120db9d82dd8ecf6560e7d335428e6a Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 17:04:14 -0700 Subject: [PATCH 05/59] bugfixes --- dimos/agents2/agent.py | 7 +++++++ dimos/agents2/spec.py | 7 ++++--- dimos/agents2/test_agent.py | 4 +--- dimos/protocol/skill/coordinator.py | 16 ++++++++-------- dimos/protocol/skill/skill.py | 8 +++++--- dimos/utils/cli/agentspy/agentspy.py | 2 +- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 4095f1ea50..eb575cce3b 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -102,3 +102,10 @@ def query_async(self, query: str): def query(self, query: str): return asyncio.run_coroutine_threadsafe(self.agent_loop(query), self._loop).result() + + @rpc + def register_skills(self, container): + return self.coordinator.register_skills(container) + + def get_tools(self): + return self.coordinator.get_tools() diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 7ecab4bbf4..79cdd2fdb4 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -24,7 +24,8 @@ SystemMessage, ) -from dimos.core import rpc +from dimos.core import Module, rpc +from dimos.core.module import ModuleConfig from dimos.protocol.service import Service from dimos.protocol.skill.skill import SkillContainer from dimos.utils.logging_config import setup_logger @@ -119,14 +120,14 @@ class Model(str, Enum): @dataclass -class AgentConfig: +class AgentConfig(ModuleConfig): system_prompt: Optional[str | SystemMessage] = None skills: Optional[SkillContainer | list[SkillContainer]] = None model: Model = Model.GPT_4O provider: Provider = Provider.OPENAI -class AgentSpec(Service[AgentConfig], ABC): +class AgentSpec(Service[AgentConfig], Module, ABC): default_config: type[AgentConfig] = AgentConfig @rpc diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 268aac6be6..0a4ffd9d1b 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -51,9 +51,7 @@ async def test_agent_init(): agent.start() print( - agent.query_async( + agent.query( "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124." ) ) - - await asyncio.sleep(10) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 3ec56308b2..be42c11c48 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -31,7 +31,7 @@ from rich.table import Table from rich.text import Text -from dimos.core import Module, rpc +from dimos.core import rpc from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream @@ -42,8 +42,8 @@ @dataclass -class AgentInputConfig: - agent_comms: type[SkillCommsSpec] = LCMSkillComms +class SkillCoordinatorConfig: + skill_transport: type[SkillCommsSpec] = LCMSkillComms class SkillStateEnum(Enum): @@ -163,7 +163,8 @@ def __str__(self) -> str: return "\n".join(lines) -class SkillCoordinator(SkillContainer, Module): +class SkillCoordinator(SkillContainer): + default_config = SkillCoordinatorConfig empty: bool = True _static_containers: list[SkillContainer] @@ -174,7 +175,6 @@ class SkillCoordinator(SkillContainer, Module): _loop: Optional[asyncio.AbstractEventLoop] def __init__(self) -> None: - Module.__init__(self) SkillContainer.__init__(self) self._static_containers = [] self._dynamic_containers = [] @@ -184,12 +184,12 @@ def __init__(self) -> None: @rpc def start(self) -> None: - self.agent_comms.start() - self.agent_comms.subscribe(self.handle_message) + self.skill_transport.start() + self.skill_transport.subscribe(self.handle_message) @rpc def stop(self) -> None: - self.agent_comms.stop() + self.skill_transport.stop() def len(self) -> int: return len(self._skills) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index f7f87e4857..085a352327 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -40,12 +40,14 @@ def wrapper(self, *args, **kwargs): del kwargs["call_id"] def run_function(): - self.agent_comms.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) + self.skill_transport.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) try: val = f(self, *args, **kwargs) - self.agent_comms.publish(SkillMsg(call_id, skill, val, type=MsgType.ret)) + self.skill_transport.publish( + SkillMsg(call_id, skill, val, type=MsgType.ret) + ) except Exception as e: - self.agent_comms.publish( + self.skill_transport.publish( SkillMsg(call_id, skill, str(e), type=MsgType.error) ) diff --git a/dimos/utils/cli/agentspy/agentspy.py b/dimos/utils/cli/agentspy/agentspy.py index 2c58ab4cf3..3f51afc968 100644 --- a/dimos/utils/cli/agentspy/agentspy.py +++ b/dimos/utils/cli/agentspy/agentspy.py @@ -46,7 +46,7 @@ def start(self): self.agent_interface.start() # Subscribe to the agent interface's comms - self.agent_interface.agent_comms.subscribe(self._handle_message) + self.agent_interface.skill_transport.subscribe(self._handle_message) def stop(self): """Stop spying.""" From 21f82fe6803a846f3eb51f99f1f287d84934794d Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 17:12:55 -0700 Subject: [PATCH 06/59] get_loop functionality --- dimos/agents2/test_agent.py | 18 ++++++++++-------- dimos/core/module.py | 21 +++++++++++++++++++++ dimos/protocol/skill/coordinator.py | 3 +++ 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 0a4ffd9d1b..e17e5a88c9 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -39,19 +39,21 @@ def sub(self, x: int, y: int) -> int: async def test_agent_init(): from dimos.core import start - dimos = start(2) - agent = dimos.deploy( - Agent, - system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate", - ) - # agent = Agent( - # system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" + # dimos = start(2) + # agent = dimos.deploy( + # Agent, + # system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate", # ) + agent = Agent( + system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" + ) agent.register_skills(TestContainer()) agent.start() print( - agent.query( + agent.query_async( "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124." ) ) + + await asyncio.sleep(5) diff --git a/dimos/core/module.py b/dimos/core/module.py index f30cbd16a2..01abfcdb8a 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -33,6 +33,26 @@ from dimos.protocol.tf import LCMTF, TFSpec +def get_loop() -> asyncio.AbstractEventLoop: + try: + # here we attempt to figure out if we are running on a dask worker + # if so we use the dask worker _loop as ours, + # and we register our RPC server + worker = get_worker() + if worker.loop: + return worker.loop + + except ValueError: + ... + + try: + return asyncio.get_running_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + return loop + + @dataclass class ModuleConfig: rpc_transport: type[RPCSpec] = LCMRPC @@ -48,6 +68,7 @@ class ModuleBase(Configurable[ModuleConfig]): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self._loop = get_loop() # we can completely override comms protocols if we want try: # here we attempt to figure out if we are running on a dask worker diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index be42c11c48..c948028e91 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -32,6 +32,7 @@ from rich.text import Text from dimos.core import rpc +from dimos.core.module import get_loop from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream @@ -74,6 +75,7 @@ class SkillState(TimestampedCollection): def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] = None) -> None: super().__init__() + self.skill_config = skill_config or SkillConfig( name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.none, schema={} ) @@ -176,6 +178,7 @@ class SkillCoordinator(SkillContainer): def __init__(self) -> None: SkillContainer.__init__(self) + self._loop = get_loop() self._static_containers = [] self._dynamic_containers = [] self._skills = {} From 0d455ae22ac5906ba2e4fccfcee6f2dc4480053e Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 18:10:12 -0700 Subject: [PATCH 07/59] langchain dep --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fcc62bf476..30038ac143 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,9 +43,10 @@ dependencies = [ "sse-starlette>=2.2.1", "uvicorn>=0.34.0", - # Agent Memory + # Agents "langchain-chroma>=0.1.4", "langchain-openai>=0.2.14", + "langchain==0.3.27", # Class Extraction "pydantic", From b92446f0624eceefb184f6ca9eafce1e6f7daf59 Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 9 Aug 2025 22:19:43 -0700 Subject: [PATCH 08/59] plucked ci changes from agent-refactor --- .github/workflows/docker.yml | 16 +++++++++++----- .github/workflows/tests.yml | 4 ++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 929462d8ae..0c6abff68d 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -132,7 +132,9 @@ jobs: uses: ./.github/workflows/_docker-build-template.yml with: should-run: ${{ - needs.check-changes.result == 'success' && ((needs.ros-python.result == 'success') || (needs.ros-python.result == 'skipped')) && (needs.check-changes.outputs.dev == 'true') + needs.check-changes.result == 'success' && + (needs.check-changes.outputs.dev == 'true' || + (needs.ros-python.result == 'success' && (needs.check-changes.outputs.python == 'true' || needs.check-changes.outputs.ros == 'true'))) }} from-image: ghcr.io/dimensionalos/ros-python:${{ needs.ros-python.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} to-image: ghcr.io/dimensionalos/ros-dev:${{ needs.check-changes.outputs.branch-tag }} @@ -142,6 +144,7 @@ jobs: needs: [check-changes, ros-dev] if: always() uses: ./.github/workflows/tests.yml + secrets: inherit with: should-run: ${{ needs.check-changes.result == 'success' && @@ -150,12 +153,13 @@ jobs: needs.check-changes.outputs.tests == 'true')) }} cmd: "pytest && pytest -m ros" # run tests that depend on ros as well - dev-image: ros-dev:${{ needs.check-changes.outputs.dev == 'true' && needs.ros-dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} + dev-image: ros-dev:${{ (needs.check-changes.outputs.python == 'true' || needs.check-changes.outputs.dev == 'true' || needs.check-changes.outputs.ros == 'true') && needs.ros-dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} run-tests: needs: [check-changes, dev] if: always() uses: ./.github/workflows/tests.yml + secrets: inherit with: should-run: ${{ needs.check-changes.result == 'success' && @@ -164,13 +168,14 @@ jobs: needs.check-changes.outputs.tests == 'true')) }} cmd: "pytest" - dev-image: dev:${{ needs.check-changes.outputs.dev == 'true' && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} + dev-image: dev:${{ (needs.check-changes.outputs.python == 'true' || needs.check-changes.outputs.dev == 'true') && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} # we run in parallel with normal tests for speed run-heavy-tests: needs: [check-changes, dev] if: always() uses: ./.github/workflows/tests.yml + secrets: inherit with: should-run: ${{ needs.check-changes.result == 'success' && @@ -179,12 +184,13 @@ jobs: needs.check-changes.outputs.tests == 'true')) }} cmd: "pytest -m heavy" - dev-image: dev:${{ needs.check-changes.outputs.dev == 'true' && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} + dev-image: dev:${{ (needs.check-changes.outputs.python == 'true' || needs.check-changes.outputs.dev == 'true') && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} run-lcm-tests: needs: [check-changes, dev] if: always() uses: ./.github/workflows/tests.yml + secrets: inherit with: should-run: ${{ needs.check-changes.result == 'success' && @@ -193,7 +199,7 @@ jobs: needs.check-changes.outputs.tests == 'true')) }} cmd: "pytest -m lcm" - dev-image: dev:${{ needs.check-changes.outputs.dev == 'true' && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} + dev-image: dev:${{ (needs.check-changes.outputs.python == 'true' || needs.check-changes.outputs.dev == 'true') && needs.dev.result == 'success' && needs.check-changes.outputs.branch-tag || 'dev' }} # Run module tests directly to avoid pytest forking issues # run-module-tests: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2d9b917f0e..a94839a505 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -40,6 +40,10 @@ jobs: runs-on: [self-hosted, Linux] container: image: ghcr.io/dimensionalos/${{ inputs.dev-image }} + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + ALIBABA_API_KEY: ${{ secrets.ALIBABA_API_KEY }} steps: - uses: actions/checkout@v4 From b6bf28c034899ddd7e7dced20bd35970d869940d Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 15:00:37 +0300 Subject: [PATCH 09/59] skillcontainer hosts skill execution --- dimos/protocol/skill/coordinator.py | 16 ++++++++-- dimos/protocol/skill/skill.py | 37 +++++++++++++++++------- dimos/protocol/skill/test_coordinator.py | 7 ++--- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index c948028e91..b711452e74 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -72,6 +72,7 @@ class SkillState(TimestampedCollection): name: str state: SkillStateEnum skill_config: SkillConfig + value: Optional[Any] = None def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] = None) -> None: super().__init__() @@ -104,11 +105,13 @@ def handle_msg(self, msg: SkillMsg) -> bool: if msg.type == MsgType.ret: self.state = SkillStateEnum.completed + self.value = msg.content if self.skill_config.ret == Return.call_agent: return True return False if msg.type == MsgType.error: + self.value = msg.content self.state = SkillStateEnum.error return True @@ -218,14 +221,14 @@ def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: """Execute a list of tool calls from the agent.""" for tool_call in tool_calls: logger.info(f"executing skill call {tool_call}") - self.call( + self.call_skill( tool_call.get("id"), tool_call.get("name"), tool_call.get("args"), ) # internal skill call - def call(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: + def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: skill_config = self.get_skill_config(skill_name) if not skill_config: logger.error( @@ -237,6 +240,7 @@ def call(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: self._skill_state[call_id] = SkillState( name=skill_name, skill_config=skill_config, call_id=call_id ) + return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) # Receives a message from active skill @@ -297,7 +301,13 @@ def generate_snapshot(self, clear: bool = True) -> SkillStateDict: logger.info(f"Skill {skill_run.name} (call_id={call_id}) finished") to_delete.append(call_id) if skill_run.state == SkillStateEnum.error: - logger.error(f"Skill run error for {skill_run.name} (call_id={call_id})") + error_msg = skill_run.value.get("msg", "Unknown error") + error_traceback = skill_run.value.get("traceback", "No traceback available") + + logger.error( + f"Skill error for {skill_run.name} (call_id={call_id}): {error_msg}" + ) + print(error_traceback) to_delete.append(call_id) for call_id in to_delete: diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 085a352327..a5230acc49 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -40,16 +40,7 @@ def wrapper(self, *args, **kwargs): del kwargs["call_id"] def run_function(): - self.skill_transport.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) - try: - val = f(self, *args, **kwargs) - self.skill_transport.publish( - SkillMsg(call_id, skill, val, type=MsgType.ret) - ) - except Exception as e: - self.skill_transport.publish( - SkillMsg(call_id, skill, str(e), type=MsgType.error) - ) + return self.call_skill(call_id, skill, args, kwargs) thread = threading.Thread(target=run_function) thread.start() @@ -94,6 +85,32 @@ class SkillContainer(Configurable[SkillContainerConfig]): def __str__(self) -> str: return f"SkillContainer({self.__class__.__name__})" + def call_skill( + self, call_id: str, skill_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] + ) -> None: + f = getattr(self, skill_name, None) + + if f is None: + raise ValueError(f"Skill '{skill_name}' not found in {self.__class__.__name__}") + + self.skill_transport.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) + try: + val = f(*args, **kwargs) + self.skill_transport.publish(SkillMsg(call_id, skill, val, type=MsgType.ret)) + except Exception as e: + import traceback + + formatted_traceback = "".join(traceback.TracebackException.from_exception(e).format()) + + self.skill_transport.publish( + SkillMsg( + call_id, + skill, + {"msg": str(e), "traceback": formatted_traceback}, + type=MsgType.error, + ) + ) + @rpc def skills(self) -> dict[str, SkillConfig]: # Avoid recursion by excluding this property itself diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index a75ea85e55..c58b506505 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -39,8 +39,7 @@ async def test_coordinator_generator(): skillCoordinator.register_skills(TestContainer()) skillCoordinator.start() - - skillCoordinator.call("test-call-0", "delayadd", {"args": [1, 2]}) + skillCoordinator.call_skill("test-call-0", "delayadd", {"args": [1, 2]}) time.sleep(0.1) @@ -55,12 +54,12 @@ async def test_coordinator_generator(): cnt += 1 if cnt < 5: - skillCoordinator.call( + skillCoordinator.call_skill( f"test-call-{cnt}-delay", "delayadd", {"args": [cnt, 2]}, ) - skillCoordinator.call( + skillCoordinator.call_skill( f"test-call-{cnt}", "add", {"args": [cnt, 2]}, From c37f61181e32e91b1ce8dbd23e6e686b66b19d54 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 15:10:53 +0300 Subject: [PATCH 10/59] better documentation --- dimos/protocol/skill/coordinator.py | 10 ++++++-- dimos/protocol/skill/skill.py | 40 +++++++++++++++++++++++++++-- dimos/protocol/skill/type.py | 5 ++-- 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index b711452e74..73dd8a79ab 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -65,8 +65,8 @@ def colored_name(self) -> Text: # TODO pending timeout, running timeout, etc. -# This object maintains the state of a skill run -# It is used to track the skill's progress, messages, and state +# +# This object maintains the state of a skill run on a caller end class SkillState(TimestampedCollection): call_id: str name: str @@ -152,6 +152,7 @@ def __str__(self) -> str: return capture.get() +# subclassed the dict just to have a better string representation class SkillStateDict(dict[str, SkillState]): """Custom dict for skill states with better string representation.""" @@ -168,6 +169,11 @@ def __str__(self) -> str: return "\n".join(lines) +# This class is responsible for managing the lifecycle of skills, +# handling skill calls, and coordinating communication between the agent and skills. +# +# It aggregates skills from static and dynamic containers, manages skill states, +# and decides when to notify the agent about updates. class SkillCoordinator(SkillContainer): default_config = SkillCoordinatorConfig empty: bool = True diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index a5230acc49..2a916ec080 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -29,6 +29,31 @@ Stream, ) +# skill is a decorator that allows us to specify a skill behaviour for a function. +# +# there are several parameters that can be specified: +# - ret: how to return the value from the skill, can be one of: +# +# Return.none: doesn't return anything to an agent +# Return.passive: doesn't schedule an agent call but +# returns the value to the agent when agent is called +# Return.call_agent: calls the agent with the value, scheduling an agent call +# +# - stream: if the skill streams values, it can behave in several ways: +# +# Stream.none: no streaming, skill doesn't emit any values +# Stream.passive: doesn't schedule an agent call upon emitting a value, +# returns the streamed value to the agent when agent is called +# Stream.call_agent: calls the agent with every value emitted, scheduling an agent call +# +# - reducer: defines an optional strategy for passive streams and how we collapse potential +# multiple values into something meaningful for the agent +# +# Reducer.none: no reduction, every emitted value is returned to the agent +# Reducer.latest: only the latest value is returned to the agent +# Reducer.average: assumes the skill emits a number, +# the average of all values is returned to the agent + def skill(reducer=Reducer.latest, stream=Stream.none, ret=Return.call_agent): def decorator(f: Callable[..., Any]) -> Any: @@ -74,8 +99,19 @@ class SkillContainerConfig: skill_transport: type[SkillCommsSpec] = LCMSkillComms -# here we can have also dynamic skills potentially -# agent can check .skills each time when introspecting +# Inherited by any class that wants to provide skills +# (This component works standalone but commonly used by DimOS modules) +# +# - It allows us to specify a communication layer for skills (LCM for now by default) +# - introspection of available skills via the `skills` RPC method +# - ability to provide dynamic context dependant skills with dynamic_skills flag +# for this you'll need to override the `skills` method to return a dynamic set of skills +# SkillCoordinator will call this method to get the skills available upon every request to +# the agent +# +# +# Hosts the function execution and handles correct publishing of skill messages +# according to the skill decorator configuration class SkillContainer(Configurable[SkillContainerConfig]): default_config = SkillContainerConfig _skill_transport: Optional[SkillCommsSpec] = None diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 0ed1c91ad3..f9c7e8f377 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -27,9 +27,8 @@ class Call(Enum): class Reducer(Enum): none = 0 - all = 1 - latest = 2 - average = 3 + latest = 1 + average = 2 class Stream(Enum): From 34da5511f9dfdb0628d07d51b75d3bee152152be Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 15:20:03 +0300 Subject: [PATCH 11/59] skillcoordinator handles threading --- dimos/protocol/skill/comms.py | 7 +++++-- dimos/protocol/skill/skill.py | 32 +++++++++++++++++++++++--------- dimos/protocol/skill/type.py | 2 ++ 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/dimos/protocol/skill/comms.py b/dimos/protocol/skill/comms.py index 7703eda3e1..67fa47f31c 100644 --- a/dimos/protocol/skill/comms.py +++ b/dimos/protocol/skill/comms.py @@ -22,8 +22,10 @@ from dimos.protocol.service import Service from dimos.protocol.skill.type import SkillMsg - # defines a protocol for communication between skills and agents +# it has simple requirements of pub/sub semantics capable of sending and receiving SkillMsg objects + + class SkillCommsSpec: @abstractmethod def publish(self, msg: SkillMsg) -> None: ... @@ -44,11 +46,12 @@ def stop(self) -> None: ... @dataclass class PubSubCommsConfig(Generic[TopicT, MsgT]): - topic: Optional[TopicT] = None # Required field but needs default for dataclass inheritance + topic: Optional[TopicT] = None pubsub: Union[type[PubSub[TopicT, MsgT]], PubSub[TopicT, MsgT], None] = None autostart: bool = True +# implementation of the SkillComms using any standard PubSub mechanism class PubSubComms(Service[PubSubCommsConfig], SkillCommsSpec): default_config: type[PubSubCommsConfig] = PubSubCommsConfig diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 2a916ec080..baa3c7afb8 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -64,12 +64,13 @@ def wrapper(self, *args, **kwargs): if call_id: del kwargs["call_id"] - def run_function(): - return self.call_skill(call_id, skill, args, kwargs) - - thread = threading.Thread(target=run_function) - thread.start() - return None + return self.call_skill(call_id, skill, args, kwargs) + # def run_function(): + # return self.call_skill(call_id, skill, args, kwargs) + # + # thread = threading.Thread(target=run_function) + # thread.start() + # return None return f(self, *args, **kwargs) @@ -99,9 +100,23 @@ class SkillContainerConfig: skill_transport: type[SkillCommsSpec] = LCMSkillComms +def threaded(f: Callable[..., Any]) -> Callable[..., None]: + """Decorator to run a function in a separate thread.""" + + def wrapper(self, *args, **kwargs): + thread = threading.Thread(target=f, args=(self, *args), kwargs=kwargs) + thread.start() + return None + + return wrapper + + # Inherited by any class that wants to provide skills # (This component works standalone but commonly used by DimOS modules) # +# Hosts the function execution and handles correct publishing of skill messages +# according to the individual skill decorator configuration +# # - It allows us to specify a communication layer for skills (LCM for now by default) # - introspection of available skills via the `skills` RPC method # - ability to provide dynamic context dependant skills with dynamic_skills flag @@ -109,9 +124,6 @@ class SkillContainerConfig: # SkillCoordinator will call this method to get the skills available upon every request to # the agent # -# -# Hosts the function execution and handles correct publishing of skill messages -# according to the skill decorator configuration class SkillContainer(Configurable[SkillContainerConfig]): default_config = SkillContainerConfig _skill_transport: Optional[SkillCommsSpec] = None @@ -121,6 +133,8 @@ class SkillContainer(Configurable[SkillContainerConfig]): def __str__(self) -> str: return f"SkillContainer({self.__class__.__name__})" + # same interface as coordinator call_skill + @threaded def call_skill( self, call_id: str, skill_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] ) -> None: diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index f9c7e8f377..bec3f7a3ab 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -19,6 +19,8 @@ from dimos.types.timestamped import Timestamped +# defines protocol messages used for communication between skills and agents + class Call(Enum): Implicit = 0 From c7a7446d4ef7176bd49942264ee406c0469bd773 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 15:27:06 +0300 Subject: [PATCH 12/59] streaming skill sketch, async skill sketch --- dimos/protocol/skill/skill.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index baa3c7afb8..b496c934be 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import threading from dataclasses import dataclass from typing import Any, Callable, Optional @@ -87,7 +88,7 @@ def wrapper(self, *args, **kwargs): # implicit RPC call as well wrapper.__rpc__ = True # type: ignore[attr-defined] - wrapper._skill = skill_config # type: ignore[attr-defined] + wrapper._skill_config = skill_config # type: ignore[attr-defined] wrapper.__name__ = f.__name__ # Preserve original function name wrapper.__doc__ = f.__doc__ # Preserve original docstring return wrapper @@ -133,7 +134,8 @@ class SkillContainer(Configurable[SkillContainerConfig]): def __str__(self) -> str: return f"SkillContainer({self.__class__.__name__})" - # same interface as coordinator call_skill + # TODO: figure out standard args/kwargs passing format, + # use same interface as skill coordinator call_skill method @threaded def call_skill( self, call_id: str, skill_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] @@ -141,11 +143,27 @@ def call_skill( f = getattr(self, skill_name, None) if f is None: - raise ValueError(f"Skill '{skill_name}' not found in {self.__class__.__name__}") + raise ValueError(f"Function '{skill_name}' not found in {self.__class__.__name__}") + config = getattr(f, "_skill_config", None) + if config is None: + raise ValueError(f"Function '{skill_name}' in {self.__class__.__name__} is not a skill") + + # we notify the skill transport about the start of the skill call self.skill_transport.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) + try: val = f(*args, **kwargs) + + # check if the skill returned a coroutine, if it is, block until it resolves + if isinstance(val, asyncio.Future): + val = asyncio.run(val) + + # check if the skill is a generator, if it is, we need to iterate over it + if hasattr(val, "__iter__") and not isinstance(val, str): + for v in val: + self.skill_transport.publish(SkillMsg(call_id, skill, v, type=MsgType.stream)) + self.skill_transport.publish(SkillMsg(call_id, skill, val, type=MsgType.ret)) except Exception as e: import traceback @@ -165,11 +183,11 @@ def call_skill( def skills(self) -> dict[str, SkillConfig]: # Avoid recursion by excluding this property itself return { - name: getattr(self, name)._skill + name: getattr(self, name)._skill_config for name in dir(self) if not name.startswith("_") and name != "skills" - and hasattr(getattr(self, name), "_skill") + and hasattr(getattr(self, name), "_skill_config") } @property From c29e88758bac0f7f65f735443b17e9f7fd3317a1 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 16:04:02 +0300 Subject: [PATCH 13/59] initial streaming implemented --- dimos/protocol/skill/coordinator.py | 11 +++++-- dimos/protocol/skill/skill.py | 13 ++++++-- dimos/protocol/skill/test_coordinator.py | 39 +++++++++++++++--------- dimos/protocol/skill/testing_utils.py | 10 ++++++ 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 73dd8a79ab..2c9854ddbc 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -85,8 +85,13 @@ def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] self.call_id = call_id self.name = name + @property + def messages(self) -> List[SkillMsg]: + return self._items + def agent_encode(self) -> ToolMessage: - last_msg = self._items[-1] + # here we need to process streamed messages depending on the reducer + last_msg = self.messages[-1] return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) # returns True if the agent should be called for this message @@ -137,7 +142,7 @@ def __str__(self) -> str: parts.append(Text(f"{self.duration():.2f}s")) if len(self): - parts.append(Text(f", last_msg={self._items[-1]})")) + parts.append(Text(f", last_msg={self.messages[-1]})")) else: parts.append(Text(", No Messages)")) @@ -254,7 +259,7 @@ def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> Non # # Checks if agent needs to be notified (if ToolConfig has Return=call_agent or Stream=call_agent) def handle_message(self, msg: SkillMsg) -> None: - logger.info(f"{msg.skill_name}, {msg.call_id} - {msg}") + logger.info(f"SkillMsg from {msg.skill_name}, {msg.call_id} - {msg}") if self._skill_state.get(msg.call_id) is None: logger.warn( diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index b496c934be..ac6280b97f 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -161,10 +161,19 @@ def call_skill( # check if the skill is a generator, if it is, we need to iterate over it if hasattr(val, "__iter__") and not isinstance(val, str): + last_value = None for v in val: - self.skill_transport.publish(SkillMsg(call_id, skill, v, type=MsgType.stream)) + last_value = v + self.skill_transport.publish( + SkillMsg(call_id, skill_name, v, type=MsgType.stream) + ) + self.skill_transport.publish( + SkillMsg(call_id, skill_name, last_value, type=MsgType.ret) + ) + + else: + self.skill_transport.publish(SkillMsg(call_id, skill_name, val, type=MsgType.ret)) - self.skill_transport.publish(SkillMsg(call_id, skill, val, type=MsgType.ret)) except Exception as e: import traceback diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index c58b506505..5ca8e109ab 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -18,23 +18,11 @@ import pytest from dimos.protocol.skill.coordinator import SkillCoordinator -from dimos.protocol.skill.skill import SkillContainer, skill from dimos.protocol.skill.testing_utils import TestContainer -class TestContainer2(SkillContainer): - @skill() - def add(self, x: int, y: int) -> int: - return x + y - - @skill() - def delayadd(self, x: int, y: int) -> int: - time.sleep(0.5) - return x + y - - @pytest.mark.asyncio -async def test_coordinator_generator(): +async def test_coordinator_parallel_calls(): skillCoordinator = SkillCoordinator() skillCoordinator.register_skills(TestContainer()) @@ -67,4 +55,27 @@ async def test_coordinator_generator(): time.sleep(0.1 * cnt) - print("All updates processed successfully.") + +@pytest.mark.asyncio +async def test_coordinator_generator(): + skillCoordinator = SkillCoordinator() + skillCoordinator.register_skills(TestContainer()) + + skillCoordinator.start() + skillCoordinator.call_skill("test-call-0", "counter", {"args": [10]}) + + skillstate = None + while await skillCoordinator.wait_for_updates(1): + skillstate = skillCoordinator.generate_snapshot(clear=True) + print("Skill State:", skillstate) + print("Agent update:", skillstate["test-call-0"].agent_encode()) + # we simulate agent thinking + await asyncio.sleep(0.25) + + print("Skill lifecycle finished") + print( + "All messages:" + + "".join( + map(lambda x: f"\n {x}", skillstate["test-call-0"].messages), + ), + ) diff --git a/dimos/protocol/skill/testing_utils.py b/dimos/protocol/skill/testing_utils.py index fda4c27591..caf1f54f2b 100644 --- a/dimos/protocol/skill/testing_utils.py +++ b/dimos/protocol/skill/testing_utils.py @@ -13,8 +13,10 @@ # limitations under the License. import time +from typing import Generator, Optional from dimos.protocol.skill.skill import SkillContainer, skill +from dimos.protocol.skill.type import Reducer, Return, Stream class TestContainer(SkillContainer): @@ -26,3 +28,11 @@ def add(self, x: int, y: int) -> int: def delayadd(self, x: int, y: int) -> int: time.sleep(0.3) return x + y + + @skill(stream=Stream.call_agent) + def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, None, None]: + """Counts from 1 to count_to, with an optional delay between counts.""" + for i in range(1, count_to + 1): + if delay > 0: + time.sleep(delay) + yield i From b45b83cc2ba279726eadc7a4776837c438ce5f0b Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 17:19:21 +0300 Subject: [PATCH 14/59] type fixes, work on reducers --- dimos/protocol/skill/coordinator.py | 2 ++ dimos/protocol/skill/type.py | 30 ++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 2c9854ddbc..42f688bd6e 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -91,6 +91,8 @@ def messages(self) -> List[SkillMsg]: def agent_encode(self) -> ToolMessage: # here we need to process streamed messages depending on the reducer + # we also want to reduce the messages we are storing so that long running streams + # don't fill up the memory last_msg = self.messages[-1] return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index bec3f7a3ab..f210e8dc75 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -15,11 +15,11 @@ import time from dataclasses import dataclass from enum import Enum -from typing import Any, Callable +from typing import Any, Callable, Generic, Literal, Optional, TypeVar from dimos.types.timestamped import Timestamped -# defines protocol messages used for communication between skills and agents +# This file defines protocol messages used for communication between skills and agents class Call(Enum): @@ -97,13 +97,17 @@ class MsgType(Enum): pending = 0 start = 1 stream = 2 - ret = 3 - error = 4 + reduced = 3 + ret = 4 + error = 5 -class SkillMsg(Timestamped): +M = TypeVar("M", bound="MsgType") + + +class SkillMsg(Timestamped, Generic[M]): ts: float - type: MsgType + type: M call_id: str skill_name: str content: str | int | float | dict | list @@ -113,7 +117,7 @@ def __init__( call_id: str, skill_name: str, content: str | int | float | dict | list, - type: MsgType = MsgType.ret, + type: M, ) -> None: self.ts = time.time() self.call_id = call_id @@ -145,3 +149,15 @@ def __str__(self): return f"Pending({time_ago:.1f}s ago)" if self.type == MsgType.stream: return f"Stream({time_ago:.1f}s ago, val={self.content})" + + +# Reducers take stream messages, combine them and return a reduced message. +type ReducerFunction = Callable[ + [ + list[ + SkillMsg[Literal[MsgType.Stream]], + Optional[SkillMsg[Literal[MsgType.Reduced]]], + ], + SkillMsg[Literal[MsgType.Reduced]], + ] +] From 5c1cbfb2778ab7f40ed3a6f8de49acdeefa12fd6 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 17:21:07 +0300 Subject: [PATCH 15/59] skill state bugfix --- dimos/protocol/skill/coordinator.py | 2 +- dimos/protocol/skill/skill.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 42f688bd6e..60d0fcff41 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -251,7 +251,7 @@ def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> Non # This initializes the skill state if it doesn't exist self._skill_state[call_id] = SkillState( - name=skill_name, skill_config=skill_config, call_id=call_id + call_id=call_id, name=skill_name, skill_config=skill_config ) return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index ac6280b97f..9336f35d82 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -150,7 +150,7 @@ def call_skill( raise ValueError(f"Function '{skill_name}' in {self.__class__.__name__} is not a skill") # we notify the skill transport about the start of the skill call - self.skill_transport.publish(SkillMsg(call_id, skill, None, type=MsgType.start)) + self.skill_transport.publish(SkillMsg(call_id, skill_name, None, type=MsgType.start)) try: val = f(*args, **kwargs) @@ -182,7 +182,7 @@ def call_skill( self.skill_transport.publish( SkillMsg( call_id, - skill, + skill_name, {"msg": str(e), "traceback": formatted_traceback}, type=MsgType.error, ) From 07b153a8608dccf34f13202c557e3a5d79e95b62 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 14 Aug 2025 18:48:47 +0300 Subject: [PATCH 16/59] test bugfix --- dimos/protocol/skill/test_coordinator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 5ca8e109ab..6c924ece5d 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -62,13 +62,13 @@ async def test_coordinator_generator(): skillCoordinator.register_skills(TestContainer()) skillCoordinator.start() - skillCoordinator.call_skill("test-call-0", "counter", {"args": [10]}) + skillCoordinator.call_skill("test-gen-0", "counter", {"args": [10]}) skillstate = None while await skillCoordinator.wait_for_updates(1): skillstate = skillCoordinator.generate_snapshot(clear=True) print("Skill State:", skillstate) - print("Agent update:", skillstate["test-call-0"].agent_encode()) + print("Agent update:", skillstate["test-gen-0"].agent_encode()) # we simulate agent thinking await asyncio.sleep(0.25) @@ -76,6 +76,6 @@ async def test_coordinator_generator(): print( "All messages:" + "".join( - map(lambda x: f"\n {x}", skillstate["test-call-0"].messages), + map(lambda x: f"\n {x}", skillstate["test-gen-0"].messages), ), ) From b1f5d9d38c7fc12cc1c01659938e0848a887dd3f Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 14:46:03 +0300 Subject: [PATCH 17/59] reducer rewrite starting --- dimos/protocol/skill/test_coordinator.py | 39 ++++++++++++++++++++++-- dimos/protocol/skill/testing_utils.py | 38 ----------------------- dimos/protocol/skill/type.py | 5 --- 3 files changed, 37 insertions(+), 45 deletions(-) delete mode 100644 dimos/protocol/skill/testing_utils.py diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 6c924ece5d..11bea14e33 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -13,12 +13,42 @@ # limitations under the License. import asyncio import time -from pprint import pprint +from typing import Generator, Optional import pytest from dimos.protocol.skill.coordinator import SkillCoordinator -from dimos.protocol.skill.testing_utils import TestContainer +from dimos.protocol.skill.skill import SkillContainer, skill +from dimos.protocol.skill.type import Reducer, Return, Stream + + +class TestContainer(SkillContainer): + @skill() + def add(self, x: int, y: int) -> int: + return x + y + + @skill() + def delayadd(self, x: int, y: int) -> int: + time.sleep(0.3) + return x + y + + @skill(stream=Stream.call_agent) + def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, None, None]: + """Counts from 1 to count_to, with an optional delay between counts.""" + for i in range(1, count_to + 1): + if delay > 0: + time.sleep(delay) + yield i + + @skill(stream=Stream.passive) + def counter_passive( + self, count_to: int, delay: Optional[float] = 0.1 + ) -> Generator[int, None, None]: + """Counts from 1 to count_to, with an optional delay between counts.""" + for i in range(1, count_to + 1): + if delay > 0: + time.sleep(delay) + yield i @pytest.mark.asyncio @@ -62,11 +92,16 @@ async def test_coordinator_generator(): skillCoordinator.register_skills(TestContainer()) skillCoordinator.start() + + # here we call a skill that generates a sequence of messages skillCoordinator.call_skill("test-gen-0", "counter", {"args": [10]}) skillstate = None + # periodically agent is stopping it's thinking cycle and asks for updates while await skillCoordinator.wait_for_updates(1): skillstate = skillCoordinator.generate_snapshot(clear=True) + + # reducer is generating a summary print("Skill State:", skillstate) print("Agent update:", skillstate["test-gen-0"].agent_encode()) # we simulate agent thinking diff --git a/dimos/protocol/skill/testing_utils.py b/dimos/protocol/skill/testing_utils.py deleted file mode 100644 index caf1f54f2b..0000000000 --- a/dimos/protocol/skill/testing_utils.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -from typing import Generator, Optional - -from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Return, Stream - - -class TestContainer(SkillContainer): - @skill() - def add(self, x: int, y: int) -> int: - return x + y - - @skill() - def delayadd(self, x: int, y: int) -> int: - time.sleep(0.3) - return x + y - - @skill(stream=Stream.call_agent) - def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, None, None]: - """Counts from 1 to count_to, with an optional delay between counts.""" - for i in range(1, count_to + 1): - if delay > 0: - time.sleep(delay) - yield i diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index f210e8dc75..79973245c8 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -22,11 +22,6 @@ # This file defines protocol messages used for communication between skills and agents -class Call(Enum): - Implicit = 0 - Explicit = 1 - - class Reducer(Enum): none = 0 latest = 1 From 5cc4ebfeab1ff2105693964ff41ebbae81ad5271 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 15:30:50 +0300 Subject: [PATCH 18/59] new reducer structure implemented --- dimos/protocol/skill/coordinator.py | 2 +- dimos/protocol/skill/skill.py | 2 +- dimos/protocol/skill/test_coordinator.py | 2 +- dimos/protocol/skill/type.py | 39 +++++++----------------- 4 files changed, 14 insertions(+), 31 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 60d0fcff41..9f6e43a561 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -78,7 +78,7 @@ def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] super().__init__() self.skill_config = skill_config or SkillConfig( - name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.none, schema={} + name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.all, schema={} ) self.state = SkillStateEnum.pending diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 9336f35d82..81e1be469f 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -20,10 +20,10 @@ from dimos.core import rpc from dimos.protocol.service import Configurable from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec +from dimos.protocol.skill.reducer import Reducer from dimos.protocol.skill.schema import function_to_schema from dimos.protocol.skill.type import ( MsgType, - Reducer, Return, SkillConfig, SkillMsg, diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 11bea14e33..614bafe395 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -32,7 +32,7 @@ def delayadd(self, x: int, y: int) -> int: time.sleep(0.3) return x + y - @skill(stream=Stream.call_agent) + @skill(stream=Stream.call_agent, reducer=Reducer.all) def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, None, None]: """Counts from 1 to count_to, with an optional delay between counts.""" for i in range(1, count_to + 1): diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 79973245c8..df0baf4332 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -15,19 +15,14 @@ import time from dataclasses import dataclass from enum import Enum -from typing import Any, Callable, Generic, Literal, Optional, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Optional, TypeVar +from dimos.protocol.skill.reducer import Reducer from dimos.types.timestamped import Timestamped # This file defines protocol messages used for communication between skills and agents -class Reducer(Enum): - none = 0 - latest = 1 - average = 2 - - class Stream(Enum): # no streaming none = 0 @@ -73,14 +68,14 @@ def __str__(self): # Only show reducer if stream is not none (streaming is happening) if self.stream != Stream.none: - reducer_name = "unknown" - if self.reducer == Reducer.latest: - reducer_name = "latest" - elif self.reducer == Reducer.all: - reducer_name = "all" - elif self.reducer == Reducer.average: - reducer_name = "average" - parts.append(f"reducer={reducer_name}") + # reducer_name = "unknown" + # if self.reducer == Reducer.latest: + # reducer_name = "latest" + # elif self.reducer == Reducer.all: + # reducer_name = "all" + # elif self.reducer == Reducer.average: + # reducer_name = "average" + # parts.append(f"reducer={reducer_name}") parts.append(f"stream={self.stream.name}") # Always show return mode @@ -92,7 +87,7 @@ class MsgType(Enum): pending = 0 start = 1 stream = 2 - reduced = 3 + reduced_stream = 3 ret = 4 error = 5 @@ -144,15 +139,3 @@ def __str__(self): return f"Pending({time_ago:.1f}s ago)" if self.type == MsgType.stream: return f"Stream({time_ago:.1f}s ago, val={self.content})" - - -# Reducers take stream messages, combine them and return a reduced message. -type ReducerFunction = Callable[ - [ - list[ - SkillMsg[Literal[MsgType.Stream]], - Optional[SkillMsg[Literal[MsgType.Reduced]]], - ], - SkillMsg[Literal[MsgType.Reduced]], - ] -] From 6359e5c18c43123a884a35ce650a012442aa5174 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 15:56:34 +0300 Subject: [PATCH 19/59] reducer restructure checkpoint, tests passing --- dimos/protocol/skill/coordinator.py | 54 +++++++++++++++-------- dimos/protocol/skill/test_coordinator.py | 8 +--- dimos/protocol/skill/type.py | 56 +++++++++++++++++++++++- 3 files changed, 91 insertions(+), 27 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 9f6e43a561..3bd27442b7 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -13,10 +13,11 @@ # limitations under the License. import asyncio +import time from copy import copy from dataclasses import dataclass from enum import Enum -from typing import Any, List, Optional +from typing import Any, List, Literal, Optional from langchain_core.messages import ( AIMessage, @@ -67,12 +68,19 @@ def colored_name(self) -> Text: # TODO pending timeout, running timeout, etc. # # This object maintains the state of a skill run on a caller end -class SkillState(TimestampedCollection): +class SkillState: call_id: str name: str state: SkillStateEnum skill_config: SkillConfig - value: Optional[Any] = None + + msg_count: int = 0 + + start_msg: SkillMsg[Literal[MsgType.start]] = None + end_msg: SkillMsg[Literal[MsgType.ret]] = None + error_msg: SkillMsg[Literal[MsgType.error]] = None + ret_msg: SkillMsg[Literal[MsgType.ret]] = None + reduced_stream_msg: List[SkillMsg[Literal[MsgType.reduced_stream]]] = None def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] = None) -> None: super().__init__() @@ -85,22 +93,26 @@ def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] self.call_id = call_id self.name = name - @property - def messages(self) -> List[SkillMsg]: - return self._items + def duration(self) -> float: + """Calculate the duration of the skill run.""" + if self.start_msg and self.end_msg: + return self.end_msg.ts - self.start_msg.ts + elif self.start_msg: + return time.time() - self.start_msg.ts + else: + return 0.0 def agent_encode(self) -> ToolMessage: - # here we need to process streamed messages depending on the reducer - # we also want to reduce the messages we are storing so that long running streams - # don't fill up the memory - last_msg = self.messages[-1] - return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) + # last_msg = self.messages[-1] + # return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) + return ToolMessage("something smart", name=self.name, tool_call_id=self.call_id) # returns True if the agent should be called for this message def handle_msg(self, msg: SkillMsg) -> bool: - self.add(msg) - + self.msg_count += 1 if msg.type == MsgType.stream: + self.reduced_stream_msg = self.skill_config.reducer(self.reduced_stream_msg, msg) + if ( self.skill_config.stream == Stream.none or self.skill_config.stream == Stream.passive @@ -112,22 +124,26 @@ def handle_msg(self, msg: SkillMsg) -> bool: if msg.type == MsgType.ret: self.state = SkillStateEnum.completed - self.value = msg.content + self.ret_msg = msg if self.skill_config.ret == Return.call_agent: return True return False if msg.type == MsgType.error: - self.value = msg.content self.state = SkillStateEnum.error + self.error_msg = msg return True if msg.type == MsgType.start: self.state = SkillStateEnum.running + self.start_msg = msg return False return False + def __len__(self) -> int: + return self.msg_count + def __str__(self) -> str: # For standard string representation, we'll use rich's Console to render the colored text console = Console(force_terminal=True, legacy_windows=False) @@ -144,7 +160,7 @@ def __str__(self) -> str: parts.append(Text(f"{self.duration():.2f}s")) if len(self): - parts.append(Text(f", last_msg={self.messages[-1]})")) + parts.append(Text(f", msg_count={self.msg_count})")) else: parts.append(Text(", No Messages)")) @@ -314,8 +330,10 @@ def generate_snapshot(self, clear: bool = True) -> SkillStateDict: logger.info(f"Skill {skill_run.name} (call_id={call_id}) finished") to_delete.append(call_id) if skill_run.state == SkillStateEnum.error: - error_msg = skill_run.value.get("msg", "Unknown error") - error_traceback = skill_run.value.get("traceback", "No traceback available") + error_msg = skill_run.error_msg.content.get("msg", "Unknown error") + error_traceback = skill_run.error_msg.content.get( + "traceback", "No traceback available" + ) logger.error( f"Skill error for {skill_run.name} (call_id={call_id}): {error_msg}" diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 614bafe395..33a35e1602 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -40,7 +40,7 @@ def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, time.sleep(delay) yield i - @skill(stream=Stream.passive) + @skill(stream=Stream.passive, reducer=Reducer.sum) def counter_passive( self, count_to: int, delay: Optional[float] = 0.1 ) -> Generator[int, None, None]: @@ -108,9 +108,3 @@ async def test_coordinator_generator(): await asyncio.sleep(0.25) print("Skill lifecycle finished") - print( - "All messages:" - + "".join( - map(lambda x: f"\n {x}", skillstate["test-gen-0"].messages), - ), - ) diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index df0baf4332..e9f5f64696 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import time from dataclasses import dataclass from enum import Enum from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Optional, TypeVar -from dimos.protocol.skill.reducer import Reducer from dimos.types.timestamped import Timestamped # This file defines protocol messages used for communication between skills and agents @@ -44,7 +44,7 @@ class Return(Enum): @dataclass class SkillConfig: name: str - reducer: Reducer + reducer: "ReducerF" stream: Stream ret: Return schema: dict[str, Any] @@ -139,3 +139,55 @@ def __str__(self): return f"Pending({time_ago:.1f}s ago)" if self.type == MsgType.stream: return f"Stream({time_ago:.1f}s ago, val={self.content})" + + +# typing looks complex but it's a standard reducer function signature, using SkillMsgs +# (Optional[accumulator], msg) -> accumulator +type ReducerF = Callable[ + [Optional[SkillMsg[Literal[MsgType.reduced_stream]]], SkillMsg[Literal[MsgType.stream]]], + SkillMsg[Literal[MsgType.reduced_stream]], +] + + +C = TypeVar("C") # content type +A = TypeVar("A") # accumulator type +# define a naive reducer function type that's generic in terms of the accumulator type +type SimpleReducerF[A, C] = Callable[[Optional[A], C], A] + + +def make_reducer(simple_reducer: SimpleReducerF) -> ReducerF: + """ + Converts a naive reducer function into a standard reducer function. + The naive reducer function should accept an accumulator and a message, + and return the updated accumulator. + """ + + def reducer( + accumulator: Optional[SkillMsg[Literal[MsgType.reduced_stream]]], + msg: SkillMsg[Literal[MsgType.stream]], + ) -> SkillMsg[Literal[MsgType.reduced_stream]]: + # Extract the content from the accumulator if it exists + acc_value = accumulator.content if accumulator else None + + # Apply the simple reducer to get the new accumulated value + new_value = simple_reducer(acc_value, msg.content) + + # Wrap the result in a SkillMsg with reduced_stream type + return SkillMsg( + call_id=msg.call_id, + skill_name=msg.skill_name, + content=new_value, + type=MsgType.reduced_stream, + ) + + return reducer + + +class Reducer: + sum = staticmethod(make_reducer(lambda x, y: x + y if x else y)) + latest = staticmethod(make_reducer(lambda x, y: y)) + all = staticmethod(make_reducer(lambda x, y: x + [y] if x else [y])) + + +# Create singleton instance +Reducer = Reducer() From c665a44e67c1674a756ca65152a02af37c1f7a5d Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 16:14:50 +0300 Subject: [PATCH 20/59] finished reducing --- dimos/protocol/skill/coordinator.py | 29 +++++++++++++++++++++--- dimos/protocol/skill/test_coordinator.py | 17 +++++++------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 3bd27442b7..b955d9a4b4 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -103,9 +103,32 @@ def duration(self) -> float: return 0.0 def agent_encode(self) -> ToolMessage: - # last_msg = self.messages[-1] - # return ToolMessage(last_msg.content, name=self.name, tool_call_id=self.call_id) - return ToolMessage("something smart", name=self.name, tool_call_id=self.call_id) + agent_data = {"state": self.state.name, "ran_for": f"{round(self.duration())} seconds"} + + if self.state == SkillStateEnum.running: + if self.reduced_stream_msg: + agent_data["stream_data"] = self.reduced_stream_msg.content + + if self.state == SkillStateEnum.completed: + if self.reduced_stream_msg: + agent_data["return_value"] = self.reduced_stream_msg.content + else: + agent_data["return_value"] = self.ret_msg.content + + if self.state == SkillStateEnum.error: + agent_data["return_value"] = self.error_msg.content + if self.reduced_stream_msg: + agent_data["stream_data"] = self.reduced_stream_msg.content + + if self.error_msg: + if self.reduced_stream_msg: + agent_data["stream_data"] = self.reduced_stream_msg.content + agent_data["error"] = { + "msg": self.error_msg.content.get("msg", "Unknown error"), + "traceback": self.error_msg.content.get("traceback", "No traceback available"), + } + + return ToolMessage(agent_data, name=self.name, tool_call_id=self.call_id) # returns True if the agent should be called for this message def handle_msg(self, msg: SkillMsg) -> bool: diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 33a35e1602..543e4745c7 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -33,7 +33,7 @@ def delayadd(self, x: int, y: int) -> int: return x + y @skill(stream=Stream.call_agent, reducer=Reducer.all) - def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, None, None]: + def counter(self, count_to: int, delay: Optional[float] = 0.05) -> Generator[int, None, None]: """Counts from 1 to count_to, with an optional delay between counts.""" for i in range(1, count_to + 1): if delay > 0: @@ -41,8 +41,8 @@ def counter(self, count_to: int, delay: Optional[float] = 0.1) -> Generator[int, yield i @skill(stream=Stream.passive, reducer=Reducer.sum) - def counter_passive( - self, count_to: int, delay: Optional[float] = 0.1 + def counter_passive_sum( + self, count_to: int, delay: Optional[float] = 0.05 ) -> Generator[int, None, None]: """Counts from 1 to count_to, with an optional delay between counts.""" for i in range(1, count_to + 1): @@ -95,16 +95,17 @@ async def test_coordinator_generator(): # here we call a skill that generates a sequence of messages skillCoordinator.call_skill("test-gen-0", "counter", {"args": [10]}) + skillCoordinator.call_skill("test-gen-1", "counter_passive_sum", {"args": [5]}) skillstate = None # periodically agent is stopping it's thinking cycle and asks for updates - while await skillCoordinator.wait_for_updates(1): - skillstate = skillCoordinator.generate_snapshot(clear=True) + while await skillCoordinator.wait_for_updates(2): + print(skillCoordinator) # reducer is generating a summary - print("Skill State:", skillstate) - print("Agent update:", skillstate["test-gen-0"].agent_encode()) + skillstate = skillCoordinator.generate_snapshot(clear=True) + print("Agent update:", skillstate) # we simulate agent thinking - await asyncio.sleep(0.25) + await asyncio.sleep(0.125) print("Skill lifecycle finished") From c6479607d0971df9134b7a0f20992e00102867ab Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 16:22:32 +0300 Subject: [PATCH 21/59] __str__ for coordinator and skill state --- dimos/protocol/skill/coordinator.py | 78 +++++++++++++----------- dimos/protocol/skill/test_coordinator.py | 7 +-- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index b955d9a4b4..23cb14ff97 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -202,17 +202,47 @@ def __str__(self) -> str: class SkillStateDict(dict[str, SkillState]): """Custom dict for skill states with better string representation.""" - def __str__(self) -> str: - if not self: - return "SkillStates empty" - - lines = [] + def table(self) -> Table: + # Add skill states section + states_table = Table(show_header=True) + states_table.add_column("Call ID", style="dim", width=12) + states_table.add_column("Skill", style="white") + states_table.add_column("State", style="white") + states_table.add_column("Duration", style="yellow") + states_table.add_column("Messages", style="dim") for call_id, skill_state in self.items(): - # Use the SkillState's own __str__ method for individual items - lines.append(f"{skill_state}") + # Get colored state name + state_text = skill_state.state.colored_name() + + # Duration formatting + if ( + skill_state.state == SkillStateEnum.completed + or skill_state.state == SkillStateEnum.error + ): + duration = f"{skill_state.duration():.2f}s" + else: + duration = f"{skill_state.duration():.2f}s..." + + # Messages info + msg_count = str(len(skill_state)) + + states_table.add_row( + call_id[:8] + "...", skill_state.name, state_text, duration, msg_count + ) + + if not self: + states_table.add_row("", "[dim]No active skills[/dim]", "", "", "") + return states_table + + def __str__(self): + console = Console(force_terminal=True, legacy_windows=False) - return "\n".join(lines) + # Render to string with title above + with console.capture() as capture: + console.print(Text(" SkillState", style="bold blue")) + console.print(self.table()) + return capture.get().strip() # This class is responsible for managing the lifecycle of skills, @@ -393,35 +423,9 @@ def __str__(self): containers_table.add_row("", "[dim]No containers registered[/dim]") # Add skill states section - states_table = Table(show_header=True, show_edge=False, box=None) - states_table.add_column("Call ID", style="dim", width=12) - states_table.add_column("Skill", style="white") - states_table.add_column("State", style="white") - states_table.add_column("Duration", style="yellow") - states_table.add_column("Messages", style="dim") - - for call_id, skill_state in self._skill_state.items(): - # Get colored state name - state_text = skill_state.state.colored_name() - - # Duration formatting - if ( - skill_state.state == SkillStateEnum.completed - or skill_state.state == SkillStateEnum.error - ): - duration = f"{skill_state.duration():.2f}s" - else: - duration = f"{skill_state.duration():.2f}s..." - - # Messages info - msg_count = str(len(skill_state)) - - states_table.add_row( - call_id[:8] + "...", skill_state.name, state_text, duration, msg_count - ) - - if not self._skill_state: - states_table.add_row("", "[dim]No active skills[/dim]", "", "", "") + states_table = self._skill_state.table() + states_table.show_edge = False + states_table.box = None # Combine into main table table.add_column("Section", style="bold") diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 543e4745c7..b0fdc20c84 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -97,15 +97,10 @@ async def test_coordinator_generator(): skillCoordinator.call_skill("test-gen-0", "counter", {"args": [10]}) skillCoordinator.call_skill("test-gen-1", "counter_passive_sum", {"args": [5]}) - skillstate = None # periodically agent is stopping it's thinking cycle and asks for updates while await skillCoordinator.wait_for_updates(2): print(skillCoordinator) - - # reducer is generating a summary - skillstate = skillCoordinator.generate_snapshot(clear=True) - print("Agent update:", skillstate) - # we simulate agent thinking + agent_update = skillCoordinator.generate_snapshot(clear=True) await asyncio.sleep(0.125) print("Skill lifecycle finished") From 547e7d3a5d91c0a427ff4912760c5f41d9a13dd8 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 16:28:26 +0300 Subject: [PATCH 22/59] cleanup --- dimos/protocol/skill/coordinator.py | 4 ++++ dimos/protocol/skill/skill.py | 9 +-------- dimos/protocol/skill/test_coordinator.py | 4 ++-- dimos/protocol/skill/type.py | 19 ++++--------------- 4 files changed, 11 insertions(+), 25 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 23cb14ff97..82ae90f133 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -202,6 +202,10 @@ def __str__(self) -> str: class SkillStateDict(dict[str, SkillState]): """Custom dict for skill states with better string representation.""" + def agent_encode(self) -> list[ToolMessage]: + """Encode all skill states into a list of ToolMessages for the agent.""" + return [skill_state.agent_encode() for skill_state in self.values()] + def table(self) -> Table: # Add skill states section states_table = Table(show_header=True) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 81e1be469f..44963d326b 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -20,15 +20,8 @@ from dimos.core import rpc from dimos.protocol.service import Configurable from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec -from dimos.protocol.skill.reducer import Reducer from dimos.protocol.skill.schema import function_to_schema -from dimos.protocol.skill.type import ( - MsgType, - Return, - SkillConfig, - SkillMsg, - Stream, -) +from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillConfig, SkillMsg, Stream # skill is a decorator that allows us to specify a skill behaviour for a function. # diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index b0fdc20c84..3b37d9ffe9 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -19,7 +19,7 @@ from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Return, Stream +from dimos.protocol.skill.type import Reducer, Stream class TestContainer(SkillContainer): @@ -90,7 +90,6 @@ async def test_coordinator_parallel_calls(): async def test_coordinator_generator(): skillCoordinator = SkillCoordinator() skillCoordinator.register_skills(TestContainer()) - skillCoordinator.start() # here we call a skill that generates a sequence of messages @@ -101,6 +100,7 @@ async def test_coordinator_generator(): while await skillCoordinator.wait_for_updates(2): print(skillCoordinator) agent_update = skillCoordinator.generate_snapshot(clear=True) + print(agent_update.agent_encode()) await asyncio.sleep(0.125) print("Skill lifecycle finished") diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index e9f5f64696..acf3028848 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -68,14 +68,6 @@ def __str__(self): # Only show reducer if stream is not none (streaming is happening) if self.stream != Stream.none: - # reducer_name = "unknown" - # if self.reducer == Reducer.latest: - # reducer_name = "latest" - # elif self.reducer == Reducer.all: - # reducer_name = "all" - # elif self.reducer == Reducer.average: - # reducer_name = "average" - # parts.append(f"reducer={reducer_name}") parts.append(f"stream={self.stream.name}") # Always show return mode @@ -183,11 +175,8 @@ def reducer( return reducer +# just a convinience class to hold reducer functions class Reducer: - sum = staticmethod(make_reducer(lambda x, y: x + y if x else y)) - latest = staticmethod(make_reducer(lambda x, y: y)) - all = staticmethod(make_reducer(lambda x, y: x + [y] if x else [y])) - - -# Create singleton instance -Reducer = Reducer() + sum = make_reducer(lambda x, y: x + y if x else y) + latest = make_reducer(lambda x, y: y) + all = make_reducer(lambda x, y: x + [y] if x else [y]) From a05d1c01b6b7c8d329b3284a45017c3fe6cb53de Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 16:38:52 +0300 Subject: [PATCH 23/59] passive skills tests --- dimos/agents2/agent.py | 26 ++++++++++++++++++++++-- dimos/agents2/test_agent.py | 17 +++------------- dimos/protocol/skill/coordinator.py | 11 ---------- dimos/protocol/skill/test_coordinator.py | 10 +++++++++ 4 files changed, 37 insertions(+), 27 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index eb575cce3b..e110143b39 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -14,7 +14,7 @@ import asyncio from pprint import pprint -from typing import Optional +from typing import List, Optional from langchain.chat_models import init_chat_model from langchain_core.language_models.chat_models import BaseChatModel @@ -37,6 +37,8 @@ class Agent(AgentSpec): + implicit_skill_counter: int = 0 + def __init__( self, *args, @@ -67,6 +69,26 @@ def stop(self): def clear_history(self): self.messages.clear() + # Used by agent to execute tool calls + def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: + """Execute a list of tool calls from the agent.""" + for tool_call in tool_calls: + logger.info(f"executing skill call {tool_call}") + self.coordinator.call_skill( + tool_call.get("id"), + tool_call.get("name"), + tool_call.get("args"), + ) + + # used to inject skill calls into the agent loop without agent asking for it + def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: + self.coordinator.call_skill( + f"implicit-skill-{self.implicit_skill_counter}", + skill_name, + {"args": args, "kwargs": kwargs}, + ) + self.implicit_skill_counter += 1 + async def agent_loop(self, seed_query: str = ""): self.messages.append(HumanMessage(seed_query)) try: @@ -79,7 +101,7 @@ async def agent_loop(self, seed_query: str = ""): logger.info(f"Agent response: {msg.content}") if msg.tool_calls: - self.coordinator.execute_tool_calls(msg.tool_calls) + self.execute_tool_calls(msg.tool_calls) if not self.coordinator.has_active_skills(): logger.info("No active tasks, exiting agent loop.") diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index e17e5a88c9..336cd988df 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -19,20 +19,7 @@ from dimos.agents2.agent import Agent from dimos.protocol.skill import SkillContainer, skill - - -class TestContainer(SkillContainer): - @skill() - def add(self, x: int, y: int) -> int: - """Adds two integers.""" - time.sleep(0.3) - return x + y - - @skill() - def sub(self, x: int, y: int) -> int: - """Subs two integers.""" - time.sleep(0.3) - return x - y +from dimos.protocol.skill.test_coordinator import TestContainer @pytest.mark.asyncio @@ -48,6 +35,8 @@ async def test_agent_init(): system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" ) agent.register_skills(TestContainer()) + agent.run_implicit_skill("passive_time", frequency=1) + agent.start() print( diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 82ae90f133..b1dd1487f3 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -302,17 +302,6 @@ def get_tools(self) -> list[dict]: return ret - # Used by agent to execute tool calls - def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: - """Execute a list of tool calls from the agent.""" - for tool_call in tool_calls: - logger.info(f"executing skill call {tool_call}") - self.call_skill( - tool_call.get("id"), - tool_call.get("name"), - tool_call.get("args"), - ) - # internal skill call def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: skill_config = self.get_skill_config(skill_name) diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 3b37d9ffe9..022293e51e 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import asyncio +import datetime import time from typing import Generator, Optional @@ -25,10 +26,12 @@ class TestContainer(SkillContainer): @skill() def add(self, x: int, y: int) -> int: + """adds x and y.""" return x + y @skill() def delayadd(self, x: int, y: int) -> int: + """waits 0.3 seconds before adding x and y.""" time.sleep(0.3) return x + y @@ -50,6 +53,13 @@ def counter_passive_sum( time.sleep(delay) yield i + @skill(stream=Stream.passive, reducer=Reducer.latest) + def passive_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: + """Provides current time.""" + while True: + time.sleep(1 / frequency) + yield str(datetime.datetime.now()) + @pytest.mark.asyncio async def test_coordinator_parallel_calls(): From 95b250e40e08bb4dfb9795033c6de976c5edbb53 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 16:59:01 +0300 Subject: [PATCH 24/59] ToolMessage/situational awareness msg ordering --- dimos/agents2/agent.py | 2 +- dimos/agents2/test_agent.py | 22 ++++++------ dimos/protocol/skill/coordinator.py | 56 ++++++++++++++++++++--------- 3 files changed, 53 insertions(+), 27 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index e110143b39..acbb730e72 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -83,7 +83,7 @@ def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: # used to inject skill calls into the agent loop without agent asking for it def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: self.coordinator.call_skill( - f"implicit-skill-{self.implicit_skill_counter}", + False, skill_name, {"args": args, "kwargs": kwargs}, ) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 336cd988df..e8f7057f49 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -18,23 +18,25 @@ import pytest from dimos.agents2.agent import Agent -from dimos.protocol.skill import SkillContainer, skill from dimos.protocol.skill.test_coordinator import TestContainer @pytest.mark.asyncio async def test_agent_init(): - from dimos.core import start + system_prompt = ( + "Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" + ) + ## Uncomment the following lines to use a real module system + # from dimos.core import start # dimos = start(2) - # agent = dimos.deploy( - # Agent, - # system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate", - # ) - agent = Agent( - system_prompt="Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" - ) - agent.register_skills(TestContainer()) + # testcontainer = dimos.deploy(TestContainer) + # agent = dimos.deploy(Agent, system_prompt=system_prompt) + + testcontainer = TestContainer() + agent = Agent(system_prompt=system_prompt) + + agent.register_skills(testcontainer) agent.run_implicit_skill("passive_time", frequency=1) agent.start() diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index b1dd1487f3..879381b1c2 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -13,11 +13,12 @@ # limitations under the License. import asyncio +import json import time from copy import copy from dataclasses import dataclass from enum import Enum -from typing import Any, List, Literal, Optional +from typing import Any, List, Literal, Optional, Union from langchain_core.messages import ( AIMessage, @@ -37,7 +38,6 @@ from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream -from dimos.types.timestamped import TimestampedCollection from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.skill.coordinator") @@ -65,8 +65,6 @@ def colored_name(self) -> Text: return Text(self.name, style=colors.get(self, "white")) -# TODO pending timeout, running timeout, etc. -# # This object maintains the state of a skill run on a caller end class SkillState: call_id: str @@ -75,6 +73,7 @@ class SkillState: skill_config: SkillConfig msg_count: int = 0 + sent_tool_msg: bool = False start_msg: SkillMsg[Literal[MsgType.start]] = None end_msg: SkillMsg[Literal[MsgType.ret]] = None @@ -102,33 +101,49 @@ def duration(self) -> float: else: return 0.0 - def agent_encode(self) -> ToolMessage: + def agent_encode(self) -> Union[ToolMessage, HumanMessage]: + # any tool output can be a custom type that knows how to encode itself + # like a costmap, path, transform etc could be translatable into strings + def maybe_encode(something: Any) -> str: + if getattr(something, "agent_encode", None): + return something.agent_encode() + return str(something) + agent_data = {"state": self.state.name, "ran_for": f"{round(self.duration())} seconds"} if self.state == SkillStateEnum.running: if self.reduced_stream_msg: - agent_data["stream_data"] = self.reduced_stream_msg.content + agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) if self.state == SkillStateEnum.completed: if self.reduced_stream_msg: - agent_data["return_value"] = self.reduced_stream_msg.content + agent_data["return_value"] = maybe_encode(self.reduced_stream_msg.content) else: - agent_data["return_value"] = self.ret_msg.content + agent_data["return_value"] = maybe_encode(self.ret_msg.content) if self.state == SkillStateEnum.error: - agent_data["return_value"] = self.error_msg.content + agent_data["return_value"] = maybe_encode(self.error_msg.content) if self.reduced_stream_msg: - agent_data["stream_data"] = self.reduced_stream_msg.content + agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) if self.error_msg: if self.reduced_stream_msg: - agent_data["stream_data"] = self.reduced_stream_msg.content + agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) agent_data["error"] = { "msg": self.error_msg.content.get("msg", "Unknown error"), "traceback": self.error_msg.content.get("traceback", "No traceback available"), } - return ToolMessage(agent_data, name=self.name, tool_call_id=self.call_id) + # tool call can emit a single ToolMessage + # subsequent messages are considered SituationalAwarenessMessages, + # those are collapsed into a HumanMessage, that's artificially prepended to history + if not self.sent_tool_msg: + self.sent_tool_msg = True + return ToolMessage(agent_data, name=self.name, tool_call_id=self.call_id) + else: + return HumanMessage( + content=json.dumps(agent_data), + ) # returns True if the agent should be called for this message def handle_msg(self, msg: SkillMsg) -> bool: @@ -303,7 +318,9 @@ def get_tools(self) -> list[dict]: return ret # internal skill call - def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> None: + def call_skill( + self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] + ) -> None: skill_config = self.get_skill_config(skill_name) if not skill_config: logger.error( @@ -312,9 +329,16 @@ def call_skill(self, call_id: str, skill_name: str, args: dict[str, Any]) -> Non return # This initializes the skill state if it doesn't exist - self._skill_state[call_id] = SkillState( - call_id=call_id, name=skill_name, skill_config=skill_config - ) + if call_id: + self._skill_state[call_id] = SkillState( + call_id=call_id, name=skill_name, skill_config=skill_config + ) + else: + call_id = time.time() + self._skill_state[call_id] = SkillState( + call_id=call_id, name=skill_name, skill_config=skill_config + ) + self._skill_state[call_id].sent_tool_msg = True return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) From 237c4c6b30dcc27ec8a5f4f3e753d370b42844da Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 19:40:57 +0300 Subject: [PATCH 25/59] agent initial working version --- dimos/agents2/agent.py | 60 +++++++++++++++++++++--- dimos/agents2/test_agent.py | 5 +- dimos/protocol/skill/coordinator.py | 60 ++++++++++++++---------- dimos/protocol/skill/skill.py | 21 +++++++-- dimos/protocol/skill/test_coordinator.py | 21 +++++++-- dimos/protocol/skill/type.py | 8 ++++ 6 files changed, 133 insertions(+), 42 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index acbb730e72..677a2eadd5 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import asyncio from pprint import pprint from typing import List, Optional @@ -26,6 +25,9 @@ ToolCall, ToolMessage, ) +from rich.console import Console +from rich.table import Table +from rich.text import Text from dimos.agents2.spec import AgentSpec from dimos.core import Module, rpc @@ -36,9 +38,12 @@ logger = setup_logger("dimos.protocol.agents2") -class Agent(AgentSpec): - implicit_skill_counter: int = 0 +SYSTEM_MSG_APPEND = """ +Your message history will always be appended with a System Overview message that provides situational awareness. +""" + +class Agent(AgentSpec): def __init__( self, *args, @@ -51,8 +56,9 @@ def __init__( if self.config.system_prompt: if isinstance(self.config.system_prompt, str): - self.messages.append(self.config.system_prompt) + self.messages.append(SystemMessage(self.config.system_prompt + SYSTEM_MSG_APPEND)) else: + self.config.system_prompt.content += SYSTEM_MSG_APPEND self.messages.append(self.config.system_prompt) self._llm = init_chat_model(model_provider=self.config.provider, model=self.config.model) @@ -80,6 +86,45 @@ def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: tool_call.get("args"), ) + def __str__(self) -> str: + console = Console(force_terminal=True, legacy_windows=False) + + table = Table(title="Agent History", show_header=True) + + table.add_column("Message Type", style="cyan", no_wrap=True) + table.add_column("Content") + + for message in self.messages: + if isinstance(message, HumanMessage): + table.add_row(Text("Human", style="green"), Text(message.content, style="green")) + elif isinstance(message, AIMessage): + table.add_row( + Text("Agent", style="magenta"), Text(message.content, style="magenta") + ) + + for tool_call in message.tool_calls: + table.add_row( + "Tool Call", + Text( + f"{tool_call.get('name')}({tool_call.get('args').get('args')})", + style="bold magenta", + ), + ) + elif isinstance(message, ToolMessage): + table.add_row( + "Tool Response", Text(f"{message.name}() -> {message.content}"), style="red" + ) + elif isinstance(message, SystemMessage): + table.add_row("System", Text(message.content, style="yellow")) + else: + table.add_row("Unknown", str(message)) + + # Render to string with title above + with console.capture() as capture: + console.print(Text(" Agent", style="bold blue")) + console.print(table) + return capture.get().strip() + # used to inject skill calls into the agent loop without agent asking for it def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: self.coordinator.call_skill( @@ -87,7 +132,6 @@ def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: skill_name, {"args": args, "kwargs": kwargs}, ) - self.implicit_skill_counter += 1 async def agent_loop(self, seed_query: str = ""): self.messages.append(HumanMessage(seed_query)) @@ -109,8 +153,10 @@ async def agent_loop(self, seed_query: str = ""): await self.coordinator.wait_for_updates() - for call_id, update in self.coordinator.generate_snapshot(clear=True).items(): - self.messages.append(update.agent_encode()) + update = self.coordinator.generate_snapshot(clear=True) + self.messages = self.messages + update.agent_encode() + print(self) + print(self.coordinator) except Exception as e: logger.error(f"Error in agent loop: {e}") diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index e8f7057f49..ddb916af6c 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -37,14 +37,15 @@ async def test_agent_init(): agent = Agent(system_prompt=system_prompt) agent.register_skills(testcontainer) - agent.run_implicit_skill("passive_time", frequency=1) + agent.run_implicit_skill("uptime_seconds", frequency=1) agent.start() print( agent.query_async( - "hi there, please tell me what's your name, and use add tool to add 124181112 and 124124." + "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" ) ) await asyncio.sleep(5) + print(agent) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 879381b1c2..6e22630533 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -37,7 +37,7 @@ from dimos.core.module import get_loop from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer -from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream +from dimos.protocol.skill.type import MsgType, Reducer, Return, ReturnType, SkillMsg, Stream from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.skill.coordinator") @@ -101,7 +101,7 @@ def duration(self) -> float: else: return 0.0 - def agent_encode(self) -> Union[ToolMessage, HumanMessage]: + def content(self) -> str: # any tool output can be a custom type that knows how to encode itself # like a costmap, path, transform etc could be translatable into strings def maybe_encode(something: Any) -> str: @@ -109,46 +109,38 @@ def maybe_encode(something: Any) -> str: return something.agent_encode() return str(something) - agent_data = {"state": self.state.name, "ran_for": f"{round(self.duration())} seconds"} - if self.state == SkillStateEnum.running: if self.reduced_stream_msg: - agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) + return maybe_encode(self.reduced_stream_msg.content) if self.state == SkillStateEnum.completed: - if self.reduced_stream_msg: - agent_data["return_value"] = maybe_encode(self.reduced_stream_msg.content) - else: - agent_data["return_value"] = maybe_encode(self.ret_msg.content) + if self.reduced_stream_msg: # are we a streaming skill? + return maybe_encode(self.reduced_stream_msg.content) + return maybe_encode(self.ret_msg.content) if self.state == SkillStateEnum.error: - agent_data["return_value"] = maybe_encode(self.error_msg.content) if self.reduced_stream_msg: - agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) - - if self.error_msg: - if self.reduced_stream_msg: - agent_data["stream_data"] = maybe_encode(self.reduced_stream_msg.content) - agent_data["error"] = { - "msg": self.error_msg.content.get("msg", "Unknown error"), - "traceback": self.error_msg.content.get("traceback", "No traceback available"), - } + ( + maybe_encode(self.reduced_stream_msg.content) + + "\n" + + maybe_encode(self.error_msg.content) + ) + def agent_encode(self) -> Union[ToolMessage, str]: # tool call can emit a single ToolMessage # subsequent messages are considered SituationalAwarenessMessages, # those are collapsed into a HumanMessage, that's artificially prepended to history if not self.sent_tool_msg: self.sent_tool_msg = True - return ToolMessage(agent_data, name=self.name, tool_call_id=self.call_id) + return ToolMessage(self.content(), name=self.name, tool_call_id=self.call_id) else: - return HumanMessage( - content=json.dumps(agent_data), - ) + return self.name + ": " + json.dumps(self.content()) # returns True if the agent should be called for this message def handle_msg(self, msg: SkillMsg) -> bool: self.msg_count += 1 if msg.type == MsgType.stream: + self.state = SkillStateEnum.running self.reduced_stream_msg = self.skill_config.reducer(self.reduced_stream_msg, msg) if ( @@ -219,7 +211,23 @@ class SkillStateDict(dict[str, SkillState]): def agent_encode(self) -> list[ToolMessage]: """Encode all skill states into a list of ToolMessages for the agent.""" - return [skill_state.agent_encode() for skill_state in self.values()] + tool_responses = [] + overview_msg = [] + + for skill_state in self.values(): + response = skill_state.agent_encode() + if isinstance(response, ToolMessage): + tool_responses.append(response) + else: + overview_msg.append(response) + + if overview_msg: + state = AIMessage( + "System Overview:\n" + "\n".join(overview_msg), + metadata={"state": True}, + ) + return tool_responses + [state] + return tool_responses def table(self) -> Table: # Add skill states section @@ -334,7 +342,7 @@ def call_skill( call_id=call_id, name=skill_name, skill_config=skill_config ) else: - call_id = time.time() + call_id = str(time.time()) self._skill_state[call_id] = SkillState( call_id=call_id, name=skill_name, skill_config=skill_config ) @@ -347,7 +355,7 @@ def call_skill( # # Checks if agent needs to be notified (if ToolConfig has Return=call_agent or Stream=call_agent) def handle_message(self, msg: SkillMsg) -> None: - logger.info(f"SkillMsg from {msg.skill_name}, {msg.call_id} - {msg}") + # logger.info(f"SkillMsg from {msg.skill_name}, {msg.call_id} - {msg}") if self._skill_state.get(msg.call_id) is None: logger.warn( diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 44963d326b..386b20270d 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -21,7 +21,15 @@ from dimos.protocol.service import Configurable from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.schema import function_to_schema -from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillConfig, SkillMsg, Stream +from dimos.protocol.skill.type import ( + MsgType, + Reducer, + Return, + ReturnType, + SkillConfig, + SkillMsg, + Stream, +) # skill is a decorator that allows us to specify a skill behaviour for a function. # @@ -49,7 +57,9 @@ # the average of all values is returned to the agent -def skill(reducer=Reducer.latest, stream=Stream.none, ret=Return.call_agent): +def skill( + reducer=Reducer.latest, stream=Stream.none, ret=Return.call_agent, ret_type=ReturnType.auto +) -> Callable: def decorator(f: Callable[..., Any]) -> Any: def wrapper(self, *args, **kwargs): skill = f"{f.__name__}" @@ -76,7 +86,12 @@ def wrapper(self, *args, **kwargs): # wrapper.__signature__ = sig.replace(parameters=params) skill_config = SkillConfig( - name=f.__name__, reducer=reducer, stream=stream, ret=ret, schema=function_to_schema(f) + name=f.__name__, + reducer=reducer, + stream=stream, + ret=ret, + schema=function_to_schema(f), + ret_type=ret_type, ) # implicit RPC call as well diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 022293e51e..bd737eaf63 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -20,7 +20,7 @@ from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Stream +from dimos.protocol.skill.type import Reducer, Return, ReturnType, Stream class TestContainer(SkillContainer): @@ -53,12 +53,25 @@ def counter_passive_sum( time.sleep(delay) yield i - @skill(stream=Stream.passive, reducer=Reducer.latest) - def passive_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: + @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.passthrough) + def current_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: """Provides current time.""" while True: - time.sleep(1 / frequency) yield str(datetime.datetime.now()) + time.sleep(1 / frequency) + + @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.passthrough) + def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, None, None]: + """Provides current uptime.""" + start_time = datetime.datetime.now() + while True: + yield (datetime.datetime.now() - start_time).total_seconds() + time.sleep(1 / frequency) + + @skill(ret_type=ReturnType.passthrough) + def current_date(self, frequency: Optional[float] = 10) -> str: + """Provides current date.""" + return str(datetime.datetime.now()) @pytest.mark.asyncio diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index acf3028848..fe9ddf51a2 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -39,6 +39,13 @@ class Return(Enum): passive = 1 # calls the agent with the value, scheduling an agent call call_agent = 2 + # calls the function to get a value, when the agent is being called + callback = 3 # TODO: this is a work in progress, not implemented yet + + +class ReturnType(Enum): + auto = 0 + passthrough = 1 @dataclass @@ -48,6 +55,7 @@ class SkillConfig: stream: Stream ret: Return schema: dict[str, Any] + ret_type: ReturnType = ReturnType.auto f: Callable | None = None autostart: bool = False From 5561c0f4ffc4e5a78effec5fb81c51efa4062bf8 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 17 Aug 2025 20:08:20 +0300 Subject: [PATCH 26/59] coordinator -> agent interface still needs work --- dimos/agents2/agent.py | 12 +++++++--- dimos/agents2/test_agent.py | 2 +- dimos/protocol/skill/coordinator.py | 29 ++++++++++++++++++------ dimos/protocol/skill/test_coordinator.py | 7 +++--- 4 files changed, 36 insertions(+), 14 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 677a2eadd5..c92fbd4828 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -98,9 +98,15 @@ def __str__(self) -> str: if isinstance(message, HumanMessage): table.add_row(Text("Human", style="green"), Text(message.content, style="green")) elif isinstance(message, AIMessage): - table.add_row( - Text("Agent", style="magenta"), Text(message.content, style="magenta") - ) + if hasattr(message, "metadata") and message.metadata.get("state"): + table.add_row( + Text("State Summary", style="blue"), + Text(message.content, style="blue"), + ) + else: + table.add_row( + Text("Agent", style="magenta"), Text(message.content, style="magenta") + ) for tool_call in message.tool_calls: table.add_row( diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index ddb916af6c..9a965802b8 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -47,5 +47,5 @@ async def test_agent_init(): ) ) - await asyncio.sleep(5) + await asyncio.sleep(20) print(agent) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 6e22630533..7ab7ab3d2e 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -120,20 +120,34 @@ def maybe_encode(something: Any) -> str: if self.state == SkillStateEnum.error: if self.reduced_stream_msg: - ( - maybe_encode(self.reduced_stream_msg.content) - + "\n" - + maybe_encode(self.error_msg.content) - ) + (maybe_encode(self.reduced_stream_msg.content) + "\n" + self.error_msg.content) + else: + return self.error_msg.content def agent_encode(self) -> Union[ToolMessage, str]: # tool call can emit a single ToolMessage # subsequent messages are considered SituationalAwarenessMessages, # those are collapsed into a HumanMessage, that's artificially prepended to history + if not self.sent_tool_msg: self.sent_tool_msg = True - return ToolMessage(self.content(), name=self.name, tool_call_id=self.call_id) + return ToolMessage( + self.content() or "Querying, please wait, you will receive a response soon.", + name=self.name, + tool_call_id=self.call_id, + ) else: + if self.skill_config.ret_type == ReturnType.auto: + # if we are not a streaming skill, we return a string + return json.dumps( + { + "name": self.name, + "call_id": self.call_id, + "state": self.state.name, + "data": self.content(), + "ran_for": self.duration(), + } + ) return self.name + ": " + json.dumps(self.content()) # returns True if the agent should be called for this message @@ -327,7 +341,7 @@ def get_tools(self) -> list[dict]: # internal skill call def call_skill( - self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] + self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] = {} ) -> None: skill_config = self.get_skill_config(skill_name) if not skill_config: @@ -348,6 +362,7 @@ def call_skill( ) self._skill_state[call_id].sent_tool_msg = True + print("ARGS ARE", args) return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) # Receives a message from active skill diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index bd737eaf63..5d8c1f214b 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -53,14 +53,14 @@ def counter_passive_sum( time.sleep(delay) yield i - @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.passthrough) + @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.auto) def current_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: """Provides current time.""" while True: yield str(datetime.datetime.now()) time.sleep(1 / frequency) - @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.passthrough) + @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.auto) def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, None, None]: """Provides current uptime.""" start_time = datetime.datetime.now() @@ -68,9 +68,10 @@ def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, No yield (datetime.datetime.now() - start_time).total_seconds() time.sleep(1 / frequency) - @skill(ret_type=ReturnType.passthrough) + @skill(ret_type=ReturnType.auto) def current_date(self, frequency: Optional[float] = 10) -> str: """Provides current date.""" + time.sleep(3) return str(datetime.datetime.now()) From 789d8f6678dc1ad89f15eae3a424cc8daaf846a9 Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 16:13:58 +0300 Subject: [PATCH 27/59] major agent cleanup --- dimos/agents2/agent.py | 164 +++++++++++++--------- dimos/agents2/spec.py | 62 +++++++- dimos/agents2/test_agent.py | 2 - dimos/protocol/skill/comms.py | 2 +- dimos/protocol/skill/coordinator.py | 75 ++++------ dimos/protocol/skill/skill.py | 8 +- dimos/protocol/skill/test_coordinator.py | 9 +- dimos/protocol/skill/type.py | 6 - dimos/utils/cli/agentspy/agentspy.py | 27 ++-- dimos/utils/cli/agentspy/demo_agentspy.py | 14 +- 10 files changed, 216 insertions(+), 153 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index c92fbd4828..1efe14379c 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import asyncio +import json +from functools import reduce from pprint import pprint -from typing import List, Optional +from typing import Any, Dict, List, Optional, Tuple, Union from langchain.chat_models import init_chat_model from langchain_core.language_models.chat_models import BaseChatModel @@ -25,14 +27,11 @@ ToolCall, ToolMessage, ) -from rich.console import Console -from rich.table import Table -from rich.text import Text from dimos.agents2.spec import AgentSpec from dimos.core import Module, rpc from dimos.protocol.skill import skill -from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState +from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState, SkillStateDict from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.agents2") @@ -43,7 +42,70 @@ """ +def toolmsg_from_state(state: SkillState) -> ToolMessage: + return ToolMessage( + # if agent call has been triggered by another skill, + # but this specific skill didn't finish yet so we don't have data for a tool call response + state.content() + or "Loading, you will be called with an update, no need for subsequent tool calls", + name=state.name, + tool_call_id=state.call_id, + ) + + +def summary_from_state(state: SkillState) -> Dict[str, Any]: + return { + "name": state.name, + "call_id": state.call_id, + "state": state.state.name, + "data": state.content(), + } + + +def snapshot_to_messages( + state: SkillStateDict, + tool_calls: List[ToolCall], +) -> Tuple[List[ToolMessage], Optional[AIMessage]]: + # tool call ids from a previous agent call + tool_call_ids = set( + map( + lambda tool_call: tool_call.get("id"), + tool_calls, + ) + ) + + # we build a tool msg responses + tool_msgs: list[ToolMessage] = [] + + # we build a general skill state overview (for longer running skills) + state_overview: list[Dict[str, Any]] = [] + + for skill_state in sorted( + state.values(), + key=lambda skill_state: skill_state.duration(), + ): + if skill_state.call_id in tool_call_ids: + tool_msgs.append(toolmsg_from_state(skill_state)) + continue + + state_overview.append(summary_from_state(skill_state)) + + if state_overview: + state_msg = AIMessage( + "State Overview:\n" + "\n".join(map(json.dumps, state_overview)), + metadata={"state": True}, + ) + + return tool_msgs, state_msg + + return tool_msgs, None + + +# Agent class job is to glue skill coordinator state to agent messages class Agent(AgentSpec): + system_message: SystemMessage + state_message: Optional[AIMessage] = None + def __init__( self, *args, @@ -52,14 +114,14 @@ def __init__( AgentSpec.__init__(self, *args, **kwargs) self.coordinator = SkillCoordinator() - self.messages = [] + self._history = [] if self.config.system_prompt: if isinstance(self.config.system_prompt, str): - self.messages.append(SystemMessage(self.config.system_prompt + SYSTEM_MSG_APPEND)) + self.system_message = SystemMessage(self.config.system_prompt + SYSTEM_MSG_APPEND) else: self.config.system_prompt.content += SYSTEM_MSG_APPEND - self.messages.append(self.config.system_prompt) + self.system_message = self.config.system_prompt self._llm = init_chat_model(model_provider=self.config.provider, model=self.config.model) @@ -73,7 +135,17 @@ def stop(self): @rpc def clear_history(self): - self.messages.clear() + self._history.clear() + + def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): + self._history.extend(msgs) + + def history(self): + return ( + [self.system_message] + + self._history + + ([self.state_message] if self.state_message else []) + ) # Used by agent to execute tool calls def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: @@ -86,70 +158,25 @@ def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: tool_call.get("args"), ) - def __str__(self) -> str: - console = Console(force_terminal=True, legacy_windows=False) - - table = Table(title="Agent History", show_header=True) - - table.add_column("Message Type", style="cyan", no_wrap=True) - table.add_column("Content") - - for message in self.messages: - if isinstance(message, HumanMessage): - table.add_row(Text("Human", style="green"), Text(message.content, style="green")) - elif isinstance(message, AIMessage): - if hasattr(message, "metadata") and message.metadata.get("state"): - table.add_row( - Text("State Summary", style="blue"), - Text(message.content, style="blue"), - ) - else: - table.add_row( - Text("Agent", style="magenta"), Text(message.content, style="magenta") - ) - - for tool_call in message.tool_calls: - table.add_row( - "Tool Call", - Text( - f"{tool_call.get('name')}({tool_call.get('args').get('args')})", - style="bold magenta", - ), - ) - elif isinstance(message, ToolMessage): - table.add_row( - "Tool Response", Text(f"{message.name}() -> {message.content}"), style="red" - ) - elif isinstance(message, SystemMessage): - table.add_row("System", Text(message.content, style="yellow")) - else: - table.add_row("Unknown", str(message)) - - # Render to string with title above - with console.capture() as capture: - console.print(Text(" Agent", style="bold blue")) - console.print(table) - return capture.get().strip() - # used to inject skill calls into the agent loop without agent asking for it def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: - self.coordinator.call_skill( - False, - skill_name, - {"args": args, "kwargs": kwargs}, - ) + self.coordinator.call_skill(False, skill_name, {"args": args, "kwargs": kwargs}) async def agent_loop(self, seed_query: str = ""): - self.messages.append(HumanMessage(seed_query)) + self.append_history(HumanMessage(seed_query)) + try: while True: tools = self.get_tools() self._llm = self._llm.bind_tools(tools) - msg = self._llm.invoke(self.messages) - self.messages.append(msg) + # history() call ensures we include latest system state + # and system message in our invocation + msg = self._llm.invoke(self.history()) + self.append_history(msg) logger.info(f"Agent response: {msg.content}") + if msg.tool_calls: self.execute_tool_calls(msg.tool_calls) @@ -157,10 +184,21 @@ async def agent_loop(self, seed_query: str = ""): logger.info("No active tasks, exiting agent loop.") return msg.content + # coordinator will continue once a skill state has changed in + # such a way that agent call needs to be executed await self.coordinator.wait_for_updates() + # we build a full snapshot of currently running skills + # we also remove finished/errored out skills from subsequent snapshots (clear=True) update = self.coordinator.generate_snapshot(clear=True) - self.messages = self.messages + update.agent_encode() + + # generate tool_msgs and general state update message, + # depending on a skill is a tool call from previous interaction or not + tool_msgs, state_msg = snapshot_to_messages(update, msg.tool_calls) + + self.state_message = state_msg + self.append_history(*tool_msgs) + print(self) print(self.coordinator) diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 79cdd2fdb4..9ccc131b3b 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -17,12 +17,20 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum -from typing import Optional, Tuple, Union +from typing import List, Optional, Tuple, Union from langchain.chat_models.base import _SUPPORTED_PROVIDERS from langchain_core.messages import ( + AIMessage, + HumanMessage, + MessageLikeRepresentation, SystemMessage, + ToolCall, + ToolMessage, ) +from rich.console import Console +from rich.table import Table +from rich.text import Text from dimos.core import Module, rpc from dimos.core.module import ModuleConfig @@ -142,6 +150,58 @@ def stop(self): ... @abstractmethod def clear_history(self): ... + @abstractmethod + def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): + self._history.extend(msgs) + + @abstractmethod + def history(self) -> List[Union[SystemMessage, ToolMessage, AIMessage, HumanMessage]]: ... + @rpc @abstractmethod def query(self, query: str): ... + + def __str__(self) -> str: + console = Console(force_terminal=True, legacy_windows=False) + + table = Table(title="Agent History", show_header=True) + + table.add_column("Message Type", style="cyan", no_wrap=True) + table.add_column("Content") + + for message in self.history(): + if isinstance(message, HumanMessage): + table.add_row(Text("Human", style="green"), Text(message.content, style="green")) + elif isinstance(message, AIMessage): + if hasattr(message, "metadata") and message.metadata.get("state"): + table.add_row( + Text("State Summary", style="blue"), + Text(message.content, style="blue"), + ) + else: + table.add_row( + Text("Agent", style="magenta"), Text(message.content, style="magenta") + ) + + for tool_call in message.tool_calls: + table.add_row( + "Tool Call", + Text( + f"{tool_call.get('name')}({tool_call.get('args').get('args')})", + style="bold magenta", + ), + ) + elif isinstance(message, ToolMessage): + table.add_row( + "Tool Response", Text(f"{message.name}() -> {message.content}"), style="red" + ) + elif isinstance(message, SystemMessage): + table.add_row("System", Text(message.content, style="yellow")) + else: + table.add_row("Unknown", str(message)) + + # Render to string with title above + with console.capture() as capture: + console.print(Text(" Agent", style="bold blue")) + console.print(table) + return capture.get().strip() diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 9a965802b8..e1bc5781ce 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -13,7 +13,6 @@ # limitations under the License. import asyncio -import time import pytest @@ -37,7 +36,6 @@ async def test_agent_init(): agent = Agent(system_prompt=system_prompt) agent.register_skills(testcontainer) - agent.run_implicit_skill("uptime_seconds", frequency=1) agent.start() diff --git a/dimos/protocol/skill/comms.py b/dimos/protocol/skill/comms.py index 67fa47f31c..09273c36c0 100644 --- a/dimos/protocol/skill/comms.py +++ b/dimos/protocol/skill/comms.py @@ -84,7 +84,7 @@ def subscribe(self, cb: Callable[[SkillMsg], None]) -> None: @dataclass class LCMCommsConfig(PubSubCommsConfig[str, SkillMsg]): - topic: str = "/agent" + topic: str = "/skill" pubsub: Union[type[PubSub], PubSub, None] = PickleLCM # lcm needs to be started only if receiving # skill comms are broadcast only in modules so we don't autostart diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 7ab7ab3d2e..9e634e889d 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -37,7 +37,7 @@ from dimos.core.module import get_loop from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer -from dimos.protocol.skill.type import MsgType, Reducer, Return, ReturnType, SkillMsg, Stream +from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.skill.coordinator") @@ -101,13 +101,13 @@ def duration(self) -> float: else: return 0.0 - def content(self) -> str: + def content(self) -> dict[str, Any] | str | int | float | None: # any tool output can be a custom type that knows how to encode itself # like a costmap, path, transform etc could be translatable into strings def maybe_encode(something: Any) -> str: if getattr(something, "agent_encode", None): - return something.agent_encode() - return str(something) + something = something.agent_encode() + return something if self.state == SkillStateEnum.running: if self.reduced_stream_msg: @@ -137,18 +137,15 @@ def agent_encode(self) -> Union[ToolMessage, str]: tool_call_id=self.call_id, ) else: - if self.skill_config.ret_type == ReturnType.auto: - # if we are not a streaming skill, we return a string - return json.dumps( - { - "name": self.name, - "call_id": self.call_id, - "state": self.state.name, - "data": self.content(), - "ran_for": self.duration(), - } - ) - return self.name + ": " + json.dumps(self.content()) + return json.dumps( + { + "name": self.name, + "call_id": self.call_id, + "state": self.state.name, + "data": self.content(), + "ran_for": self.duration(), + } + ) # returns True if the agent should be called for this message def handle_msg(self, msg: SkillMsg) -> bool: @@ -223,26 +220,6 @@ def __str__(self) -> str: class SkillStateDict(dict[str, SkillState]): """Custom dict for skill states with better string representation.""" - def agent_encode(self) -> list[ToolMessage]: - """Encode all skill states into a list of ToolMessages for the agent.""" - tool_responses = [] - overview_msg = [] - - for skill_state in self.values(): - response = skill_state.agent_encode() - if isinstance(response, ToolMessage): - tool_responses.append(response) - else: - overview_msg.append(response) - - if overview_msg: - state = AIMessage( - "System Overview:\n" + "\n".join(overview_msg), - metadata={"state": True}, - ) - return tool_responses + [state] - return tool_responses - def table(self) -> Table: # Add skill states section states_table = Table(show_header=True) @@ -341,7 +318,7 @@ def get_tools(self) -> list[dict]: # internal skill call def call_skill( - self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] = {} + self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] ) -> None: skill_config = self.get_skill_config(skill_name) if not skill_config: @@ -350,20 +327,18 @@ def call_skill( ) return - # This initializes the skill state if it doesn't exist - if call_id: - self._skill_state[call_id] = SkillState( - call_id=call_id, name=skill_name, skill_config=skill_config - ) - else: - call_id = str(time.time()) - self._skill_state[call_id] = SkillState( - call_id=call_id, name=skill_name, skill_config=skill_config - ) - self._skill_state[call_id].sent_tool_msg = True + self._skill_state[call_id] = SkillState( + call_id=call_id, name=skill_name, skill_config=skill_config + ) + + # TODO agent often calls the skill again if previous response is still loading. + # maybe create a new skill_state linked to a previous one? not sure - print("ARGS ARE", args) - return skill_config.call(call_id, *args.get("args", []), **args.get("kwargs", {})) + return skill_config.call( + call_id, + *(args.get("args") or []), + **(args.get("kwargs") or {}), + ) # Receives a message from active skill # Updates local skill state (appends to streamed data if needed etc) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 386b20270d..b130734e99 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -25,7 +25,6 @@ MsgType, Reducer, Return, - ReturnType, SkillConfig, SkillMsg, Stream, @@ -58,7 +57,9 @@ def skill( - reducer=Reducer.latest, stream=Stream.none, ret=Return.call_agent, ret_type=ReturnType.auto + reducer=Reducer.latest, + stream=Stream.none, + ret=Return.call_agent, ) -> Callable: def decorator(f: Callable[..., Any]) -> Any: def wrapper(self, *args, **kwargs): @@ -82,7 +83,6 @@ def wrapper(self, *args, **kwargs): # params = list(sig.parameters.values()) # if params and params[0].name == "self": # params = params[1:] # Remove first parameter 'self' - # wrapper.__signature__ = sig.replace(parameters=params) skill_config = SkillConfig( @@ -91,10 +91,8 @@ def wrapper(self, *args, **kwargs): stream=stream, ret=ret, schema=function_to_schema(f), - ret_type=ret_type, ) - # implicit RPC call as well wrapper.__rpc__ = True # type: ignore[attr-defined] wrapper._skill_config = skill_config # type: ignore[attr-defined] wrapper.__name__ = f.__name__ # Preserve original function name diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 5d8c1f214b..46e3fc78a6 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -18,9 +18,10 @@ import pytest +from dimos.core import Module from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Return, ReturnType, Stream +from dimos.protocol.skill.type import Reducer, Return, Stream class TestContainer(SkillContainer): @@ -53,14 +54,14 @@ def counter_passive_sum( time.sleep(delay) yield i - @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.auto) + @skill(stream=Stream.passive, reducer=Reducer.latest) def current_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: """Provides current time.""" while True: yield str(datetime.datetime.now()) time.sleep(1 / frequency) - @skill(stream=Stream.passive, reducer=Reducer.latest, ret_type=ReturnType.auto) + @skill(stream=Stream.passive, reducer=Reducer.latest) def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, None, None]: """Provides current uptime.""" start_time = datetime.datetime.now() @@ -68,7 +69,7 @@ def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, No yield (datetime.datetime.now() - start_time).total_seconds() time.sleep(1 / frequency) - @skill(ret_type=ReturnType.auto) + @skill() def current_date(self, frequency: Optional[float] = 10) -> str: """Provides current date.""" time.sleep(3) diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index fe9ddf51a2..7891141693 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -43,11 +43,6 @@ class Return(Enum): callback = 3 # TODO: this is a work in progress, not implemented yet -class ReturnType(Enum): - auto = 0 - passthrough = 1 - - @dataclass class SkillConfig: name: str @@ -55,7 +50,6 @@ class SkillConfig: stream: Stream ret: Return schema: dict[str, Any] - ret_type: ReturnType = ReturnType.auto f: Callable | None = None autostart: bool = False diff --git a/dimos/utils/cli/agentspy/agentspy.py b/dimos/utils/cli/agentspy/agentspy.py index 3f51afc968..8255f72587 100644 --- a/dimos/utils/cli/agentspy/agentspy.py +++ b/dimos/utils/cli/agentspy/agentspy.py @@ -189,7 +189,6 @@ def compose(self) -> ComposeResult: self.table.add_column("Skill Name") self.table.add_column("State") self.table.add_column("Duration") - self.table.add_column("Start Time") self.table.add_column("Messages") self.table.add_column("Details") @@ -286,9 +285,9 @@ def update_state(self, state: Dict[str, SkillState]): if not found: # Add new entry with current time as start start_time = current_time - if len(skill_state) > 0: - # Use first message timestamp if available - start_time = skill_state._items[0].ts + if skill_state.start_msg: + # Use start message timestamp if available + start_time = skill_state.start_msg.ts self.skill_history.append((call_id, skill_state, start_time)) # Schedule UI update @@ -311,9 +310,8 @@ def refresh_table(self): # Show only top N entries for call_id, skill_state, start_time in sorted_history[:max_rows]: - # Calculate how long ago it started + # Calculate how long ago it started (for progress indicator) time_ago = time.time() - start_time - start_str = format_duration(time_ago) + " ago" # Duration duration_str = format_duration(skill_state.duration()) @@ -323,16 +321,16 @@ def refresh_table(self): # Details based on state and last message details = "" - if skill_state.state == SkillStateEnum.error and msg_count > 0: + if skill_state.state == SkillStateEnum.error and skill_state.error_msg: # Show error message - last_msg = skill_state._items[-1] - if last_msg.type == MsgType.error: - details = str(last_msg.content)[:40] - elif skill_state.state == SkillStateEnum.completed and msg_count > 0: + error_content = skill_state.error_msg.content + if isinstance(error_content, dict): + details = error_content.get("msg", "Error")[:40] + else: + details = str(error_content)[:40] + elif skill_state.state == SkillStateEnum.completed and skill_state.ret_msg: # Show return value - last_msg = skill_state._items[-1] - if last_msg.type == MsgType.ret: - details = f"→ {str(last_msg.content)[:37]}" + details = f"→ {str(skill_state.ret_msg.content)[:37]}" elif skill_state.state == SkillStateEnum.running: # Show progress indicator details = "⋯ " + "▸" * min(int(time_ago), 20) @@ -348,7 +346,6 @@ def refresh_table(self): Text(skill_state.name, style="white"), Text(skill_state.state.name, style=state_color(skill_state.state)), Text(duration_str, style="dim"), - Text(start_str, style="dim"), Text(str(msg_count), style="dim"), Text(details, style="dim white"), ) diff --git a/dimos/utils/cli/agentspy/demo_agentspy.py b/dimos/utils/cli/agentspy/demo_agentspy.py index fcd71d99ef..3ec3829794 100644 --- a/dimos/utils/cli/agentspy/demo_agentspy.py +++ b/dimos/utils/cli/agentspy/demo_agentspy.py @@ -75,15 +75,17 @@ def skill_runner(): # Run different skills based on counter if counter % 4 == 0: # Run multiple count_to in parallel to show parallel execution - agent_interface.call(f"{call_id}-count-1", "count_to", 3) - agent_interface.call(f"{call_id}-count-2", "count_to", 5) - agent_interface.call(f"{call_id}-count-3", "count_to", 2) + agent_interface.call_skill(f"{call_id}-count-1", "count_to", {"args": [3]}) + agent_interface.call_skill(f"{call_id}-count-2", "count_to", {"args": [5]}) + agent_interface.call_skill(f"{call_id}-count-3", "count_to", {"args": [2]}) elif counter % 4 == 1: - agent_interface.call(f"{call_id}-fib", "compute_fibonacci", 10) + agent_interface.call_skill(f"{call_id}-fib", "compute_fibonacci", {"args": [10]}) elif counter % 4 == 2: - agent_interface.call(f"{call_id}-quick", "quick_task", f"task-{counter}") + agent_interface.call_skill( + f"{call_id}-quick", "quick_task", {"args": [f"task-{counter}"]} + ) else: - agent_interface.call(f"{call_id}-error", "simulate_error") + agent_interface.call_skill(f"{call_id}-error", "simulate_error", {}) counter += 1 From fa270e646c1cb52c3415c0084c2538c83772f612 Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 16:44:41 +0300 Subject: [PATCH 28/59] agent publishes messages exchanged, for observability --- dimos/agents2/agent.py | 12 +++++++++--- dimos/agents2/spec.py | 30 +++++++++++++++++++++++------- dimos/agents2/test_agent.py | 1 - 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 1efe14379c..dddf304375 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -138,6 +138,9 @@ def clear_history(self): self._history.clear() def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): + for msg in msgs: + self.publish(msg) + self._history.extend(msgs) def history(self): @@ -172,6 +175,9 @@ async def agent_loop(self, seed_query: str = ""): # history() call ensures we include latest system state # and system message in our invocation + if self.state_message: + self.publish(self.state_message) + msg = self._llm.invoke(self.history()) self.append_history(msg) @@ -180,6 +186,9 @@ async def agent_loop(self, seed_query: str = ""): if msg.tool_calls: self.execute_tool_calls(msg.tool_calls) + print(self) + print(self.coordinator) + if not self.coordinator.has_active_skills(): logger.info("No active tasks, exiting agent loop.") return msg.content @@ -199,9 +208,6 @@ async def agent_loop(self, seed_query: str = ""): self.state_message = state_msg self.append_history(*tool_msgs) - print(self) - print(self.coordinator) - except Exception as e: logger.error(f"Error in agent loop: {e}") import traceback diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 9ccc131b3b..92e771b380 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -15,9 +15,9 @@ """Base agent module that wraps BaseAgent for DimOS module usage.""" from abc import ABC, abstractmethod -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum -from typing import List, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple, Union from langchain.chat_models.base import _SUPPORTED_PROVIDERS from langchain_core.messages import ( @@ -34,6 +34,7 @@ from dimos.core import Module, rpc from dimos.core.module import ModuleConfig +from dimos.protocol.pubsub import PubSub, lcm from dimos.protocol.service import Service from dimos.protocol.skill.skill import SkillContainer from dimos.utils.logging_config import setup_logger @@ -134,10 +135,27 @@ class AgentConfig(ModuleConfig): model: Model = Model.GPT_4O provider: Provider = Provider.OPENAI + agent_transport: type[PubSub] = lcm.PickleLCM + agent_topic: Any = field(default_factory=lambda: lcm.Topic("/agent")) + + +type AnyMessage = Union[SystemMessage, ToolMessage, AIMessage, HumanMessage] + class AgentSpec(Service[AgentConfig], Module, ABC): default_config: type[AgentConfig] = AgentConfig + def __init__(self, *args, **kwargs): + Service.__init__(self, *args, **kwargs) + Module.__init__(self, *args, **kwargs) + + if self.config.agent_transport: + self.transport = self.config.agent_transport() + + def publish(self, msg: AnyMessage): + if self.transport: + self.transport.publish(self.config.agent_topic, msg) + @rpc @abstractmethod def start(self): ... @@ -151,11 +169,10 @@ def stop(self): ... def clear_history(self): ... @abstractmethod - def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): - self._history.extend(msgs) + def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): ... @abstractmethod - def history(self) -> List[Union[SystemMessage, ToolMessage, AIMessage, HumanMessage]]: ... + def history(self) -> List[AnyMessage]: ... @rpc @abstractmethod @@ -163,8 +180,7 @@ def query(self, query: str): ... def __str__(self) -> str: console = Console(force_terminal=True, legacy_windows=False) - - table = Table(title="Agent History", show_header=True) + table = Table(show_header=True) table.add_column("Message Type", style="cyan", no_wrap=True) table.add_column("Content") diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index e1bc5781ce..a5e1002c81 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -46,4 +46,3 @@ async def test_agent_init(): ) await asyncio.sleep(20) - print(agent) From f970c95feb58dfc897c3af6c3bd1c35037dafad6 Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 17:02:33 +0300 Subject: [PATCH 29/59] agentspy renamed to skillspy --- dimos/agents2/agent.py | 3 +++ .../{agentspy/demo_agentspy.py => skillspy/demo_skillspy.py} | 0 dimos/utils/cli/{agentspy/agentspy.py => skillspy/skillspy.py} | 0 pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) rename dimos/utils/cli/{agentspy/demo_agentspy.py => skillspy/demo_skillspy.py} (100%) rename dimos/utils/cli/{agentspy/agentspy.py => skillspy/skillspy.py} (100%) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index dddf304375..381ac89493 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -46,6 +46,7 @@ def toolmsg_from_state(state: SkillState) -> ToolMessage: return ToolMessage( # if agent call has been triggered by another skill, # but this specific skill didn't finish yet so we don't have data for a tool call response + # we generate an informative message instead state.content() or "Loading, you will be called with an update, no need for subsequent tool calls", name=state.name, @@ -62,6 +63,8 @@ def summary_from_state(state: SkillState) -> Dict[str, Any]: } +# we take overview of running skills from the coorindator +# and build messages to be sent to an agent def snapshot_to_messages( state: SkillStateDict, tool_calls: List[ToolCall], diff --git a/dimos/utils/cli/agentspy/demo_agentspy.py b/dimos/utils/cli/skillspy/demo_skillspy.py similarity index 100% rename from dimos/utils/cli/agentspy/demo_agentspy.py rename to dimos/utils/cli/skillspy/demo_skillspy.py diff --git a/dimos/utils/cli/agentspy/agentspy.py b/dimos/utils/cli/skillspy/skillspy.py similarity index 100% rename from dimos/utils/cli/agentspy/agentspy.py rename to dimos/utils/cli/skillspy/skillspy.py diff --git a/pyproject.toml b/pyproject.toml index 30038ac143..aacd7c05d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,7 +101,7 @@ dependencies = [ [project.scripts] lcmspy = "dimos.utils.cli.lcmspy.run_lcmspy:main" foxglove-bridge = "dimos.utils.cli.foxglove_bridge.run_foxglove_bridge:main" -agentspy = "dimos.utils.cli.agentspy.agentspy:main" +skillspy = "dimos.utils.cli.skillspy.skillspy:main" [project.optional-dependencies] manipulation = [ From 2d8ecc4ec29795cf300da5966d932e7b703dabc5 Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 17:42:17 +0300 Subject: [PATCH 30/59] agentspy --- dimos/agents2/agent.py | 11 +- dimos/utils/cli/agentspy/agentspy.py | 242 ++++++++++++++++++++++ dimos/utils/cli/agentspy/demo_agentspy.py | 65 ++++++ pyproject.toml | 1 + 4 files changed, 313 insertions(+), 6 deletions(-) create mode 100644 dimos/utils/cli/agentspy/agentspy.py create mode 100755 dimos/utils/cli/agentspy/demo_agentspy.py diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 381ac89493..e1d4af34bc 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -37,18 +37,16 @@ logger = setup_logger("dimos.protocol.agents2") -SYSTEM_MSG_APPEND = """ -Your message history will always be appended with a System Overview message that provides situational awareness. -""" +SYSTEM_MSG_APPEND = "\nYour message history will always be appended with a System Overview message that provides situational awareness." def toolmsg_from_state(state: SkillState) -> ToolMessage: return ToolMessage( # if agent call has been triggered by another skill, - # but this specific skill didn't finish yet so we don't have data for a tool call response - # we generate an informative message instead + # and this specific skill didn't finish yet but we need a tool call response + # we return a message explaining that execution is still ongoing state.content() - or "Loading, you will be called with an update, no need for subsequent tool calls", + or "Running, you will be called with an update, no need for subsequent tool calls", name=state.name, tool_call_id=state.call_id, ) @@ -126,6 +124,7 @@ def __init__( self.config.system_prompt.content += SYSTEM_MSG_APPEND self.system_message = self.config.system_prompt + self.publish(self.system_message) self._llm = init_chat_model(model_provider=self.config.provider, model=self.config.model) @rpc diff --git a/dimos/utils/cli/agentspy/agentspy.py b/dimos/utils/cli/agentspy/agentspy.py new file mode 100644 index 0000000000..de784f4719 --- /dev/null +++ b/dimos/utils/cli/agentspy/agentspy.py @@ -0,0 +1,242 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import time +from collections import deque +from dataclasses import dataclass +from typing import Any, Deque, Dict, List, Optional, Union + +from langchain_core.messages import ( + AIMessage, + HumanMessage, + SystemMessage, + ToolMessage, +) +from rich.console import Console +from rich.table import Table +from rich.text import Text +from textual.app import App, ComposeResult +from textual.binding import Binding +from textual.containers import Container, ScrollableContainer +from textual.reactive import reactive +from textual.widgets import Footer, RichLog + +from dimos.protocol.pubsub import lcm +from dimos.protocol.pubsub.lcmpubsub import PickleLCM +from dimos.utils.logging_config import setup_logger + +# Type alias for all message types we might receive +AnyMessage = Union[SystemMessage, ToolMessage, AIMessage, HumanMessage] + + +@dataclass +class MessageEntry: + """Store a single message with metadata.""" + + timestamp: float + message: AnyMessage + + def __post_init__(self): + """Initialize timestamp if not provided.""" + if self.timestamp is None: + self.timestamp = time.time() + + +class AgentMessageMonitor: + """Monitor agent messages published via LCM.""" + + def __init__(self, topic: str = "/agent", max_messages: int = 1000): + self.topic = topic + self.max_messages = max_messages + self.messages: Deque[MessageEntry] = deque(maxlen=max_messages) + self.transport = PickleLCM() + self.transport.start() + self.callbacks: List[callable] = [] + pass + + def start(self): + """Start monitoring messages.""" + self.transport.subscribe(self.topic, self._handle_message) + + def stop(self): + """Stop monitoring.""" + # PickleLCM doesn't have explicit stop method + pass + + def _handle_message(self, msg: Any, topic: str): + """Handle incoming messages.""" + # Check if it's one of the message types we care about + if isinstance(msg, (SystemMessage, ToolMessage, AIMessage, HumanMessage)): + entry = MessageEntry(timestamp=time.time(), message=msg) + self.messages.append(entry) + + # Notify callbacks + for callback in self.callbacks: + callback(entry) + else: + pass + + def subscribe(self, callback: callable): + """Subscribe to new messages.""" + self.callbacks.append(callback) + + def get_messages(self) -> List[MessageEntry]: + """Get all stored messages.""" + return list(self.messages) + + +def format_timestamp(timestamp: float) -> str: + """Format timestamp as HH:MM:SS.mmm.""" + return ( + time.strftime("%H:%M:%S", time.localtime(timestamp)) + f".{int((timestamp % 1) * 1000):03d}" + ) + + +def get_message_type_and_style(msg: AnyMessage) -> tuple[str, str]: + """Get message type name and style color.""" + if isinstance(msg, HumanMessage): + return "Human ", "green" + elif isinstance(msg, AIMessage): + if hasattr(msg, "metadata") and msg.metadata.get("state"): + return "State ", "blue" + return "Agent ", "yellow" + elif isinstance(msg, ToolMessage): + return "Tool ", "red" + elif isinstance(msg, SystemMessage): + return "System", "red" + else: + return "Unkn ", "white" + + +def format_message_content(msg: AnyMessage) -> str: + """Format message content for display.""" + if isinstance(msg, ToolMessage): + return f"{msg.name}() -> {msg.content}" + elif isinstance(msg, AIMessage) and msg.tool_calls: + # Include tool calls in content + tool_info = [] + for tc in msg.tool_calls: + args_str = str(tc.get("args", {})) + tool_info.append(f"{tc.get('name')}({args_str})") + content = msg.content or "" + if content and tool_info: + return f"{content}\n[Tool Calls: {', '.join(tool_info)}]" + elif tool_info: + return f"[Tool Calls: {', '.join(tool_info)}]" + return content + else: + return str(msg.content) if hasattr(msg, "content") else str(msg) + + +class AgentSpyApp(App): + """TUI application for monitoring agent messages.""" + + CSS = """ + Screen { + layout: vertical; + background: black; + } + + RichLog { + height: 1fr; + border: none; + background: black; + padding: 0 1; + } + + Footer { + dock: bottom; + height: 1; + } + """ + + BINDINGS = [ + Binding("q", "quit", "Quit"), + Binding("c", "clear", "Clear"), + Binding("ctrl+c", "quit", show=False), + ] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.monitor = AgentMessageMonitor() + self.message_log: Optional[RichLog] = None + + def compose(self) -> ComposeResult: + """Compose the UI.""" + self.message_log = RichLog(wrap=True, highlight=True, markup=True) + yield self.message_log + yield Footer() + + def on_mount(self): + """Start monitoring when app mounts.""" + self.theme = "flexoki" + + # Subscribe to new messages + self.monitor.subscribe(self.on_new_message) + self.monitor.start() + + # Write existing messages to the log + for entry in self.monitor.get_messages(): + self.on_new_message(entry) + + def on_unmount(self): + """Stop monitoring when app unmounts.""" + self.monitor.stop() + + def on_new_message(self, entry: MessageEntry): + """Handle new messages.""" + if self.message_log: + msg = entry.message + msg_type, style = get_message_type_and_style(msg) + content = format_message_content(msg) + + # Format the message for the log + timestamp = format_timestamp(entry.timestamp) + self.message_log.write( + f"[dim white]{timestamp}[/dim white] | " + f"[bold {style}]{msg_type}[/bold {style}] | " + f"[{style}]{content}[/{style}]" + ) + + def refresh_display(self): + """Refresh the message display.""" + # Not needed anymore as messages are written directly to the log + + def action_clear(self): + """Clear message history.""" + self.monitor.messages.clear() + if self.message_log: + self.message_log.clear() + + +def main(): + """Main entry point for agentspy.""" + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "web": + import os + + from textual_serve.server import Server + + server = Server(f"python {os.path.abspath(__file__)}") + server.serve() + else: + app = AgentSpyApp() + app.run() + + +if __name__ == "__main__": + main() diff --git a/dimos/utils/cli/agentspy/demo_agentspy.py b/dimos/utils/cli/agentspy/demo_agentspy.py new file mode 100755 index 0000000000..1e3a0d4f3b --- /dev/null +++ b/dimos/utils/cli/agentspy/demo_agentspy.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Demo script to test agent message publishing and agentspy reception.""" + +import time +from langchain_core.messages import ( + AIMessage, + HumanMessage, + SystemMessage, + ToolMessage, +) +from dimos.protocol.pubsub.lcmpubsub import PickleLCM +from dimos.protocol.pubsub import lcm + + +def test_publish_messages(): + """Publish test messages to verify agentspy is working.""" + print("Starting agent message publisher demo...") + + # Create transport + transport = PickleLCM() + topic = lcm.Topic("/agent") + + print(f"Publishing to topic: {topic}") + + # Test messages + messages = [ + SystemMessage("System initialized for testing"), + HumanMessage("Hello agent, can you help me?"), + AIMessage( + "Of course! I'm here to help.", + tool_calls=[{"name": "get_info", "args": {"query": "test"}, "id": "1"}], + ), + ToolMessage(name="get_info", content="Test result: success", tool_call_id="1"), + AIMessage("The test was successful!", metadata={"state": True}), + ] + + # Publish messages with delays + for i, msg in enumerate(messages): + print(f"\nPublishing message {i + 1}: {type(msg).__name__}") + print(f"Content: {msg.content if hasattr(msg, 'content') else msg}") + + transport.publish(topic, msg) + time.sleep(1) # Wait 1 second between messages + + print("\nAll messages published! Check agentspy to see if they were received.") + print("Keeping publisher alive for 10 more seconds...") + time.sleep(10) + + +if __name__ == "__main__": + test_publish_messages() diff --git a/pyproject.toml b/pyproject.toml index aacd7c05d8..a8adfd18ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,7 @@ dependencies = [ lcmspy = "dimos.utils.cli.lcmspy.run_lcmspy:main" foxglove-bridge = "dimos.utils.cli.foxglove_bridge.run_foxglove_bridge:main" skillspy = "dimos.utils.cli.skillspy.skillspy:main" +agentspy = "dimos.utils.cli.agentspy.agentspy:main" [project.optional-dependencies] manipulation = [ From 6c420690cb1efd132b5fee00952926a2ed823cef Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 18:28:06 +0300 Subject: [PATCH 31/59] implicit skills --- dimos/agents2/test_agent.py | 10 +++------- dimos/protocol/skill/coordinator.py | 2 ++ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index a5e1002c81..9029d6d8ac 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -34,15 +34,11 @@ async def test_agent_init(): testcontainer = TestContainer() agent = Agent(system_prompt=system_prompt) - agent.register_skills(testcontainer) - agent.start() - - print( - agent.query_async( - "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" - ) + agent.run_implicit_skill("uptime_seconds") + agent.query_async( + "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" ) await asyncio.sleep(20) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 9e634e889d..6fad1f7be7 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -320,6 +320,8 @@ def get_tools(self) -> list[dict]: def call_skill( self, call_id: Union[str | Literal[False]], skill_name: str, args: dict[str, Any] ) -> None: + if not call_id: + call_id = str(round(time.time())) skill_config = self.get_skill_config(skill_name) if not skill_config: logger.error( From 89be5d891ead48d31e16d48d8c96efad47ef6c70 Mon Sep 17 00:00:00 2001 From: lesh Date: Mon, 18 Aug 2025 18:54:54 +0300 Subject: [PATCH 32/59] tests fix --- dimos/agents2/test_agent.py | 1 + dimos/protocol/skill/test_coordinator.py | 2 +- dimos/protocol/skill/type.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 9029d6d8ac..6662589d96 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -20,6 +20,7 @@ from dimos.protocol.skill.test_coordinator import TestContainer +@pytest.mark.tool @pytest.mark.asyncio async def test_agent_init(): system_prompt = ( diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 46e3fc78a6..27ec420a22 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -125,7 +125,7 @@ async def test_coordinator_generator(): while await skillCoordinator.wait_for_updates(2): print(skillCoordinator) agent_update = skillCoordinator.generate_snapshot(clear=True) - print(agent_update.agent_encode()) + print(agent_update) await asyncio.sleep(0.125) print("Skill lifecycle finished") diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 7891141693..c10b2459ad 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -137,7 +137,7 @@ def __str__(self): # typing looks complex but it's a standard reducer function signature, using SkillMsgs # (Optional[accumulator], msg) -> accumulator -type ReducerF = Callable[ +ReducerF = Callable[ [Optional[SkillMsg[Literal[MsgType.reduced_stream]]], SkillMsg[Literal[MsgType.stream]]], SkillMsg[Literal[MsgType.reduced_stream]], ] From c2b4c7fe432c086a0b70869a5996c87b321d9922 Mon Sep 17 00:00:00 2001 From: lesh Date: Tue, 19 Aug 2025 03:27:14 +0300 Subject: [PATCH 33/59] small comments cleanup --- dimos/agents2/agent.py | 39 +++++++++++------------- dimos/protocol/skill/test_coordinator.py | 3 +- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index e1d4af34bc..29afdea1cb 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -13,24 +13,20 @@ # limitations under the License. import asyncio import json -from functools import reduce -from pprint import pprint +from operator import itemgetter from typing import Any, Dict, List, Optional, Tuple, Union from langchain.chat_models import init_chat_model -from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import ( AIMessage, HumanMessage, - MessageLikeRepresentation, SystemMessage, ToolCall, ToolMessage, ) from dimos.agents2.spec import AgentSpec -from dimos.core import Module, rpc -from dimos.protocol.skill import skill +from dimos.core import rpc from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState, SkillStateDict from dimos.utils.logging_config import setup_logger @@ -61,24 +57,21 @@ def summary_from_state(state: SkillState) -> Dict[str, Any]: } -# we take overview of running skills from the coorindator -# and build messages to be sent to an agent +# takes an overview of running skills from the coorindator +# and builds messages to be sent to an agent def snapshot_to_messages( state: SkillStateDict, tool_calls: List[ToolCall], ) -> Tuple[List[ToolMessage], Optional[AIMessage]]: - # tool call ids from a previous agent call + # builds a set of tool call ids from a previous agent request tool_call_ids = set( - map( - lambda tool_call: tool_call.get("id"), - tool_calls, - ) + map(itemgetter("id"), tool_calls), ) - # we build a tool msg responses + # build a tool msg responses tool_msgs: list[ToolMessage] = [] - # we build a general skill state overview (for longer running skills) + # build a general skill state overview (for longer running skills) state_overview: list[Dict[str, Any]] = [] for skill_state in sorted( @@ -102,7 +95,7 @@ def snapshot_to_messages( return tool_msgs, None -# Agent class job is to glue skill coordinator state to agent messages +# Agent class job is to glue skill coordinator state to an agent, builds langchain messages class Agent(AgentSpec): system_message: SystemMessage state_message: Optional[AIMessage] = None @@ -172,14 +165,17 @@ async def agent_loop(self, seed_query: str = ""): try: while True: + # we are getting tools from the coordinator on each turn + # since this allows for skillcontainers to dynamically provide new skills tools = self.get_tools() self._llm = self._llm.bind_tools(tools) - # history() call ensures we include latest system state - # and system message in our invocation + # publish to /agent topic for observability if self.state_message: self.publish(self.state_message) + # history() builds our message history dynamically + # ensures we include latest system state, but not old ones. msg = self._llm.invoke(self.history()) self.append_history(msg) @@ -199,12 +195,13 @@ async def agent_loop(self, seed_query: str = ""): # such a way that agent call needs to be executed await self.coordinator.wait_for_updates() - # we build a full snapshot of currently running skills - # we also remove finished/errored out skills from subsequent snapshots (clear=True) + # we request a full snapshot of currently running, finished or errored out skills + # we ask for removal of finished skills from subsequent snapshots (clear=True) update = self.coordinator.generate_snapshot(clear=True) # generate tool_msgs and general state update message, - # depending on a skill is a tool call from previous interaction or not + # depending on a skill having associated tool call from previous interaction + # we will return a tool message, and not a general state message tool_msgs, state_msg = snapshot_to_messages(update, msg.tool_calls) self.state_message = state_msg diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 27ec420a22..7419408521 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -18,10 +18,9 @@ import pytest -from dimos.core import Module from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Return, Stream +from dimos.protocol.skill.type import Reducer, Stream class TestContainer(SkillContainer): From 7c84d5ad1a8bb45b80c19b7a804fa051fd0c60fe Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 20 Aug 2025 14:23:55 +0300 Subject: [PATCH 34/59] ci tests fix --- dimos/protocol/skill/type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index c10b2459ad..8334453b18 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -146,7 +146,7 @@ def __str__(self): C = TypeVar("C") # content type A = TypeVar("A") # accumulator type # define a naive reducer function type that's generic in terms of the accumulator type -type SimpleReducerF[A, C] = Callable[[Optional[A], C], A] +SimpleReducerF = Callable[[Optional[A], C], A] def make_reducer(simple_reducer: SimpleReducerF) -> ReducerF: From e98bf09e53789cece2ade0e6c8cc9bc8d6e2fc13 Mon Sep 17 00:00:00 2001 From: lesh Date: Tue, 26 Aug 2025 17:19:24 +0300 Subject: [PATCH 35/59] initial image implementation --- dimos/agents2/agent.py | 64 ++++++++++++++-------- dimos/agents2/spec.py | 6 ++- dimos/agents2/test_agent.py | 2 +- dimos/msgs/sensor_msgs/Image.py | 67 +++++++++++++++--------- dimos/protocol/skill/coordinator.py | 9 +++- dimos/protocol/skill/skill.py | 9 ++-- dimos/protocol/skill/test_coordinator.py | 14 +++-- dimos/protocol/skill/type.py | 7 +++ 8 files changed, 123 insertions(+), 55 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 29afdea1cb..f2efe37dda 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -14,7 +14,7 @@ import asyncio import json from operator import itemgetter -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, TypedDict, Union from langchain.chat_models import init_chat_model from langchain_core.messages import ( @@ -27,7 +27,9 @@ from dimos.agents2.spec import AgentSpec from dimos.core import rpc +from dimos.msgs.sensor_msgs import Image from dimos.protocol.skill.coordinator import SkillCoordinator, SkillState, SkillStateDict +from dimos.protocol.skill.type import Output from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.agents2") @@ -41,19 +43,33 @@ def toolmsg_from_state(state: SkillState) -> ToolMessage: # if agent call has been triggered by another skill, # and this specific skill didn't finish yet but we need a tool call response # we return a message explaining that execution is still ongoing - state.content() + content=state.content() or "Running, you will be called with an update, no need for subsequent tool calls", name=state.name, tool_call_id=state.call_id, ) -def summary_from_state(state: SkillState) -> Dict[str, Any]: +class SkillStateSummary(TypedDict): + name: str + call_id: str + state: str + data: Any + + +def summary_from_state(state: SkillState, special_data: bool = False) -> SkillStateSummary: + content = state.content() + if isinstance(content, dict): + content = json.dumps(content) + + if not isinstance(content, str): + content = str(content) + return { "name": state.name, "call_id": state.call_id, "state": state.state.name, - "data": state.content(), + "data": state.content() if not special_data else "data will be in a separate message", } @@ -72,7 +88,11 @@ def snapshot_to_messages( tool_msgs: list[ToolMessage] = [] # build a general skill state overview (for longer running skills) - state_overview: list[Dict[str, Any]] = [] + state_overview: list[Dict[str, SkillStateSummary]] = [] + + # for special skills that want to return a separate message + # (images for example, requires to be a HumanMessage) + special_msgs: List[HumanMessage] = [] for skill_state in sorted( state.values(), @@ -82,23 +102,28 @@ def snapshot_to_messages( tool_msgs.append(toolmsg_from_state(skill_state)) continue - state_overview.append(summary_from_state(skill_state)) + special_data = skill_state.skill_config.output != Output.standard + if special_data: + print("special data from skill", skill_state.name, skill_state.content()) + special_msgs.append(HumanMessage(content=[skill_state.content()])) + + state_overview.append(summary_from_state(skill_state, special_data)) if state_overview: state_msg = AIMessage( "State Overview:\n" + "\n".join(map(json.dumps, state_overview)), - metadata={"state": True}, ) - return tool_msgs, state_msg - - return tool_msgs, None + return { + "tool_msgs": tool_msgs if tool_msgs else [], + "state_msgs": ([state_msg] if state_msg else []) + special_msgs, + } # Agent class job is to glue skill coordinator state to an agent, builds langchain messages class Agent(AgentSpec): system_message: SystemMessage - state_message: Optional[AIMessage] = None + state_messages: List[Union[AIMessage, HumanMessage]] def __init__( self, @@ -107,6 +132,7 @@ def __init__( ): AgentSpec.__init__(self, *args, **kwargs) + self.state_messages = [] self.coordinator = SkillCoordinator() self._history = [] @@ -139,11 +165,7 @@ def append_history(self, *msgs: List[Union[AIMessage, HumanMessage]]): self._history.extend(msgs) def history(self): - return ( - [self.system_message] - + self._history - + ([self.state_message] if self.state_message else []) - ) + return [self.system_message] + self._history + self.state_messages # Used by agent to execute tool calls def execute_tool_calls(self, tool_calls: List[ToolCall]) -> None: @@ -171,8 +193,8 @@ async def agent_loop(self, seed_query: str = ""): self._llm = self._llm.bind_tools(tools) # publish to /agent topic for observability - if self.state_message: - self.publish(self.state_message) + for state_msg in self.state_messages: + self.publish(state_msg) # history() builds our message history dynamically # ensures we include latest system state, but not old ones. @@ -202,10 +224,10 @@ async def agent_loop(self, seed_query: str = ""): # generate tool_msgs and general state update message, # depending on a skill having associated tool call from previous interaction # we will return a tool message, and not a general state message - tool_msgs, state_msg = snapshot_to_messages(update, msg.tool_calls) + snapshot_msgs = snapshot_to_messages(update, msg.tool_calls) - self.state_message = state_msg - self.append_history(*tool_msgs) + self.state_messages = snapshot_msgs.get("state_msgs", []) + self.append_history(*snapshot_msgs.get("tool_msgs", [])) except Exception as e: logger.error(f"Error in agent loop: {e}") diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 92e771b380..1e5e9eaecd 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -187,7 +187,11 @@ def __str__(self) -> str: for message in self.history(): if isinstance(message, HumanMessage): - table.add_row(Text("Human", style="green"), Text(message.content, style="green")) + content = message.content + if not isinstance(content, str): + content = "" + + table.add_row(Text("Human", style="green"), Text(content, style="green")) elif isinstance(message, AIMessage): if hasattr(message, "metadata") and message.metadata.get("state"): table.add_row( diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 6662589d96..0df5e7b634 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -39,7 +39,7 @@ async def test_agent_init(): agent.start() agent.run_implicit_skill("uptime_seconds") agent.query_async( - "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" + "hi there, I have 4 questions for you: Please tell me what's your name and current date, and how much is 124181112 + 124124, and what do you see on the camera?" ) await asyncio.sleep(20) diff --git a/dimos/msgs/sensor_msgs/Image.py b/dimos/msgs/sensor_msgs/Image.py index fb57cfcd3e..d1aff49a42 100644 --- a/dimos/msgs/sensor_msgs/Image.py +++ b/dimos/msgs/sensor_msgs/Image.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import base64 import time from dataclasses import dataclass, field from enum import Enum -from typing import Optional, Tuple +from typing import Literal, Optional, Tuple, TypedDict import cv2 import numpy as np @@ -40,6 +41,15 @@ class ImageFormat(Enum): DEPTH16 = "DEPTH16" # 16-bit Integer Depth (millimeters) +class AgentImageMessage(TypedDict): + """Type definition for agent-compatible image representation.""" + + type: Literal["image"] + source_type: Literal["base64"] + mime_type: Literal["image/jpeg", "image/png"] + data: str # Base64 encoded image data + + @dataclass class Image(Timestamped): """Standardized image type with LCM integration.""" @@ -285,6 +295,38 @@ def save(self, filepath: str) -> bool: cv_image = self.to_opencv() return cv2.imwrite(filepath, cv_image) + def to_base64(self, max_width: int = 640, max_height: int = 480) -> str: + """Encode image to base64 JPEG format for agent processing. + + Args: + max_width: Maximum width for resizing (default 640) + max_height: Maximum height for resizing (default 480) + + Returns: + Base64 encoded JPEG string suitable for LLM/agent consumption. + """ + bgr_image = self.to_bgr() + + # Encode as JPEG + encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 80] # 80% quality + success, buffer = cv2.imencode(".jpg", bgr_image.data, encode_param) + + if not success: + raise ValueError("Failed to encode image as JPEG") + + # Convert to base64 + + jpeg_bytes = buffer.tobytes() + base64_str = base64.b64encode(jpeg_bytes).decode("utf-8") + + return base64_str + + def agent_encode(self) -> AgentImageMessage: + return { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{self.to_base64()}"}, + } + def lcm_encode(self, frame_id: Optional[str] = None) -> LCMImage: """Convert to LCM Image message.""" msg = LCMImage() @@ -433,26 +475,3 @@ def __eq__(self, other) -> bool: def __len__(self) -> int: """Return total number of pixels.""" return self.height * self.width - - def agent_encode(self) -> str: - """Encode image to base64 JPEG format for agent processing. - - Returns: - Base64 encoded JPEG string suitable for LLM/agent consumption. - """ - bgr_image = self.to_bgr() - - # Encode as JPEG - encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 95] # 95% quality - success, buffer = cv2.imencode(".jpg", bgr_image.data, encode_param) - - if not success: - raise ValueError("Failed to encode image as JPEG") - - # Convert to base64 - import base64 - - jpeg_bytes = buffer.tobytes() - base64_str = base64.b64encode(jpeg_bytes).decode("utf-8") - - return base64_str diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 6fad1f7be7..c41ed15e9e 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -106,7 +106,14 @@ def content(self) -> dict[str, Any] | str | int | float | None: # like a costmap, path, transform etc could be translatable into strings def maybe_encode(something: Any) -> str: if getattr(something, "agent_encode", None): - something = something.agent_encode() + return something.agent_encode() + + # if isinstance(something, dict): + # something = json.dumps(something) + + # if not isinstance(something, str): + # something = str(something) + return something if self.state == SkillStateEnum.running: diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index b130734e99..7a01c9546d 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -23,6 +23,7 @@ from dimos.protocol.skill.schema import function_to_schema from dimos.protocol.skill.type import ( MsgType, + Output, Reducer, Return, SkillConfig, @@ -57,9 +58,10 @@ def skill( - reducer=Reducer.latest, - stream=Stream.none, - ret=Return.call_agent, + reducer: Reducer = Reducer.latest, + stream: Stream = Stream.none, + ret: Return = Return.call_agent, + output: Output = Output.standard, ) -> Callable: def decorator(f: Callable[..., Any]) -> Any: def wrapper(self, *args, **kwargs): @@ -90,6 +92,7 @@ def wrapper(self, *args, **kwargs): reducer=reducer, stream=stream, ret=ret, + output=output, schema=function_to_schema(f), ) diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 7419408521..244380e1d7 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -18,9 +18,11 @@ import pytest +from dimos.msgs.sensor_msgs import Image from dimos.protocol.skill.coordinator import SkillCoordinator from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.protocol.skill.type import Reducer, Stream +from dimos.protocol.skill.type import Output, Reducer, Stream +from dimos.utils.data import get_data class TestContainer(SkillContainer): @@ -57,7 +59,7 @@ def counter_passive_sum( def current_time(self, frequency: Optional[float] = 10) -> Generator[str, None, None]: """Provides current time.""" while True: - yield str(datetime.datetime.now()) + yield datetime.datetime.now() time.sleep(1 / frequency) @skill(stream=Stream.passive, reducer=Reducer.latest) @@ -71,8 +73,12 @@ def uptime_seconds(self, frequency: Optional[float] = 10) -> Generator[float, No @skill() def current_date(self, frequency: Optional[float] = 10) -> str: """Provides current date.""" - time.sleep(3) - return str(datetime.datetime.now()) + return datetime.datetime.now() + + @skill(output=Output.image) + def take_photo(self) -> str: + """Takes a camera photo""" + return Image.from_file(get_data("cafe.jpg")) @pytest.mark.asyncio diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 8334453b18..84b912f303 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -23,6 +23,12 @@ # This file defines protocol messages used for communication between skills and agents +class Output(Enum): + standard = 0 + separate_message = 1 # e.g., for images, videos, files, etc. + image = 2 # this is same as separate_message, but maybe clearer for users + + class Stream(Enum): # no streaming none = 0 @@ -49,6 +55,7 @@ class SkillConfig: reducer: "ReducerF" stream: Stream ret: Return + output: Output schema: dict[str, Any] f: Callable | None = None autostart: bool = False From 709056428bafc2c49960b595c70e85cc47890410 Mon Sep 17 00:00:00 2001 From: dimensional5 Date: Wed, 27 Aug 2025 01:41:29 -0700 Subject: [PATCH 36/59] Remove type alias not supported in python 3.10 --- dimos/agents2/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 1e5e9eaecd..1ed5e00327 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -139,7 +139,7 @@ class AgentConfig(ModuleConfig): agent_topic: Any = field(default_factory=lambda: lcm.Topic("/agent")) -type AnyMessage = Union[SystemMessage, ToolMessage, AIMessage, HumanMessage] +AnyMessage = Union[SystemMessage, ToolMessage, AIMessage, HumanMessage] class AgentSpec(Service[AgentConfig], Module, ABC): From 5094483dd0a8320765315f0d3694fc35cee00a28 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 19:00:15 +0300 Subject: [PATCH 37/59] mock agent implementation --- dimos/agents2/agent.py | 12 +++- dimos/agents2/spec.py | 9 ++- dimos/agents2/test_fake_agent.py | 70 ++++++++++++++++++++++++ dimos/protocol/skill/coordinator.py | 9 ++- dimos/protocol/skill/test_coordinator.py | 4 +- 5 files changed, 97 insertions(+), 7 deletions(-) create mode 100644 dimos/agents2/test_fake_agent.py diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index f2efe37dda..6dc8a6a3d5 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -94,6 +94,9 @@ def snapshot_to_messages( # (images for example, requires to be a HumanMessage) special_msgs: List[HumanMessage] = [] + # Initialize state_msg + state_msg = None + for skill_state in sorted( state.values(), key=lambda skill_state: skill_state.duration(), @@ -144,7 +147,14 @@ def __init__( self.system_message = self.config.system_prompt self.publish(self.system_message) - self._llm = init_chat_model(model_provider=self.config.provider, model=self.config.model) + + # Use provided model instance if available, otherwise initialize from config + if self.config.model_instance: + self._llm = self.config.model_instance + else: + self._llm = init_chat_model( + model_provider=self.config.provider, model=self.config.model + ) @rpc def start(self): diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 1ed5e00327..894d1812b2 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -20,6 +20,7 @@ from typing import Any, List, Optional, Tuple, Union from langchain.chat_models.base import _SUPPORTED_PROVIDERS +from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import ( AIMessage, HumanMessage, @@ -43,9 +44,8 @@ # Dynamically create ModelProvider enum from LangChain's supported providers -Provider = Enum( - "Provider", {provider.upper(): provider for provider in _SUPPORTED_PROVIDERS}, type=str -) +_providers = {provider.upper(): provider for provider in _SUPPORTED_PROVIDERS} +Provider = Enum("Provider", _providers, type=str) class Model(str, Enum): @@ -132,8 +132,11 @@ class Model(str, Enum): class AgentConfig(ModuleConfig): system_prompt: Optional[str | SystemMessage] = None skills: Optional[SkillContainer | list[SkillContainer]] = None + + # we can provide model/provvider enums or instantiated model_instance model: Model = Model.GPT_4O provider: Provider = Provider.OPENAI + model_instance: Optional[BaseChatModel] = None agent_transport: type[PubSub] = lcm.PickleLCM agent_topic: Any = field(default_factory=lambda: lcm.Topic("/agent")) diff --git a/dimos/agents2/test_fake_agent.py b/dimos/agents2/test_fake_agent.py new file mode 100644 index 0000000000..feacd68339 --- /dev/null +++ b/dimos/agents2/test_fake_agent.py @@ -0,0 +1,70 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test agent with FakeChatModel for unit testing.""" + +import os + +import pytest +from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolCall + +from dimos.agents2.agent import Agent +from dimos.agents2.spec import AgentConfig, Model, Provider +from dimos.agents2.testing import ToolCallFakeChatModel +from dimos.protocol.skill import skill +from dimos.protocol.skill.skill import SkillContainer +from dimos.protocol.skill.test_coordinator import TestContainer + + +class TestFakeAgent: + """Test suite for Agent with FakeChatModel.""" + + async def test_fake_agent_with_tool_call(self): + """Test agent initialization and tool call execution.""" + # Create a fake model that will respond with tool calls + fake_model = ToolCallFakeChatModel( + responses=[ + AIMessage( + content="I'll add those numbers for you.", + tool_calls=[ + { + "name": "add", + "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, + "id": "tool_call_1", + } + ], + ), + AIMessage(content="The result of adding 5 and 3 is 8."), + ] + ) + + # Create agent with the fake model + agent = Agent( + model_instance=fake_model, + system_prompt="You are a helpful robot assistant with math skills.", + ) + + # Register skills with coordinator + skills = TestContainer() + agent.coordinator.register_skills(skills) + agent.start() + # Query the agent + await agent.query_async("Please add 5 and 3") + + # Check that tools were bound + assert fake_model.tools is not None + assert len(fake_model.tools) > 0 + + # Verify the model was called and history updated + assert len(agent._history) > 0 diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index c41ed15e9e..0561aa7d9a 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -37,7 +37,7 @@ from dimos.core.module import get_loop from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.skill import SkillConfig, SkillContainer -from dimos.protocol.skill.type import MsgType, Reducer, Return, SkillMsg, Stream +from dimos.protocol.skill.type import MsgType, Output, Reducer, Return, SkillMsg, Stream from dimos.utils.logging_config import setup_logger logger = setup_logger("dimos.protocol.skill.coordinator") @@ -85,7 +85,12 @@ def __init__(self, call_id: str, name: str, skill_config: Optional[SkillConfig] super().__init__() self.skill_config = skill_config or SkillConfig( - name=name, stream=Stream.none, ret=Return.none, reducer=Reducer.all, schema={} + name=name, + stream=Stream.none, + ret=Return.none, + reducer=Reducer.all, + output=Output.standard, + schema={}, ) self.state = SkillStateEnum.pending diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 244380e1d7..842dc252eb 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -125,6 +125,7 @@ async def test_coordinator_generator(): # here we call a skill that generates a sequence of messages skillCoordinator.call_skill("test-gen-0", "counter", {"args": [10]}) skillCoordinator.call_skill("test-gen-1", "counter_passive_sum", {"args": [5]}) + skillCoordinator.call_skill("test-gen-2", "take_photo", {"args": []}) # periodically agent is stopping it's thinking cycle and asks for updates while await skillCoordinator.wait_for_updates(2): @@ -133,4 +134,5 @@ async def test_coordinator_generator(): print(agent_update) await asyncio.sleep(0.125) - print("Skill lifecycle finished") + print("coordinator loop finished") + print(skillCoordinator) From 5aea816de588ca2edac79097c0d6f81881239d6f Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 19:19:12 +0300 Subject: [PATCH 38/59] mock agent testing, image calls --- dimos/agents2/test_fake_agent.py | 70 ------------------- dimos/agents2/test_mock_agent.py | 114 +++++++++++++++++++++++++++++++ dimos/agents2/testing.py | 105 ++++++++++++++++++++++++++++ 3 files changed, 219 insertions(+), 70 deletions(-) delete mode 100644 dimos/agents2/test_fake_agent.py create mode 100644 dimos/agents2/test_mock_agent.py create mode 100644 dimos/agents2/testing.py diff --git a/dimos/agents2/test_fake_agent.py b/dimos/agents2/test_fake_agent.py deleted file mode 100644 index feacd68339..0000000000 --- a/dimos/agents2/test_fake_agent.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test agent with FakeChatModel for unit testing.""" - -import os - -import pytest -from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolCall - -from dimos.agents2.agent import Agent -from dimos.agents2.spec import AgentConfig, Model, Provider -from dimos.agents2.testing import ToolCallFakeChatModel -from dimos.protocol.skill import skill -from dimos.protocol.skill.skill import SkillContainer -from dimos.protocol.skill.test_coordinator import TestContainer - - -class TestFakeAgent: - """Test suite for Agent with FakeChatModel.""" - - async def test_fake_agent_with_tool_call(self): - """Test agent initialization and tool call execution.""" - # Create a fake model that will respond with tool calls - fake_model = ToolCallFakeChatModel( - responses=[ - AIMessage( - content="I'll add those numbers for you.", - tool_calls=[ - { - "name": "add", - "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, - "id": "tool_call_1", - } - ], - ), - AIMessage(content="The result of adding 5 and 3 is 8."), - ] - ) - - # Create agent with the fake model - agent = Agent( - model_instance=fake_model, - system_prompt="You are a helpful robot assistant with math skills.", - ) - - # Register skills with coordinator - skills = TestContainer() - agent.coordinator.register_skills(skills) - agent.start() - # Query the agent - await agent.query_async("Please add 5 and 3") - - # Check that tools were bound - assert fake_model.tools is not None - assert len(fake_model.tools) > 0 - - # Verify the model was called and history updated - assert len(agent._history) > 0 diff --git a/dimos/agents2/test_mock_agent.py b/dimos/agents2/test_mock_agent.py new file mode 100644 index 0000000000..5ba1902923 --- /dev/null +++ b/dimos/agents2/test_mock_agent.py @@ -0,0 +1,114 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test agent with FakeChatModel for unit testing.""" + +import os + +import pytest +from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolCall + +from dimos.agents2.agent import Agent +from dimos.agents2.testing import MockModel +from dimos.protocol.skill.test_coordinator import TestContainer + + +async def test_tool_call(): + """Test agent initialization and tool call execution.""" + # Create a fake model that will respond with tool calls + fake_model = MockModel( + responses=[ + AIMessage( + content="I'll add those numbers for you.", + tool_calls=[ + { + "name": "add", + "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, + "id": "tool_call_1", + } + ], + ), + AIMessage(content="The result of adding 5 and 3 is 8."), + ] + ) + + # Create agent with the fake model + agent = Agent( + model_instance=fake_model, + system_prompt="You are a helpful robot assistant with math skills.", + ) + + # Register skills with coordinator + skills = TestContainer() + agent.coordinator.register_skills(skills) + agent.start() + # Query the agent + await agent.query_async("Please add 5 and 3") + + # Check that tools were bound + assert fake_model.tools is not None + assert len(fake_model.tools) > 0 + + # Verify the model was called and history updated + assert len(agent._history) > 0 + + +async def test_image_tool_call(): + """Test agent with image tool call execution.""" + # Create a fake model that will respond with image tool calls + fake_model = MockModel( + responses=[ + AIMessage( + content="I'll take a photo for you.", + tool_calls=[ + { + "name": "take_photo", + "args": {"args": [], "kwargs": {}}, + "id": "tool_call_image_1", + } + ], + ), + AIMessage(content="I've taken the photo. The image shows a cafe scene."), + ] + ) + + # Create agent with the fake model + agent = Agent( + model_instance=fake_model, + system_prompt="You are a helpful robot assistant with camera capabilities.", + ) + + # Register skills with coordinator + skills = TestContainer() + agent.coordinator.register_skills(skills) + agent.start() + + # Query the agent + await agent.query_async("Please take a photo") + + # Check that tools were bound + assert fake_model.tools is not None + assert len(fake_model.tools) > 0 + + # Verify the model was called and history updated + assert len(agent._history) > 0 + + # Check that image was handled specially + # Look for HumanMessage with image content in history + human_messages_with_images = [ + msg + for msg in agent._history + if isinstance(msg, HumanMessage) and msg.content and isinstance(msg.content, list) + ] + assert len(human_messages_with_images) >= 0 # May have image messages diff --git a/dimos/agents2/testing.py b/dimos/agents2/testing.py new file mode 100644 index 0000000000..f7ea8d4d3d --- /dev/null +++ b/dimos/agents2/testing.py @@ -0,0 +1,105 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Testing utilities for agents.""" + +from typing import Any, Dict, Iterator, List, Optional, Sequence, Union + +from langchain_core.callbacks.manager import CallbackManagerForLLMRun +from langchain_core.language_models.chat_models import SimpleChatModel +from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage +from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult +from langchain_core.runnables import Runnable + + +class MockModel(SimpleChatModel): + """Custom fake chat model that supports tool calls for testing.""" + + responses: List[Union[str, AIMessage]] = [] + i: int = 0 + + def __init__(self, **kwargs): + # Extract responses before calling super().__init__ + responses = kwargs.pop("responses", []) + super().__init__(**kwargs) + self.responses = responses + self.i = 0 + self._bound_tools: Optional[Sequence[Any]] = None + + @property + def _llm_type(self) -> str: + return "tool-call-fake-chat-model" + + def _call( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """Not used in _generate.""" + return "" + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + """Generate a response using predefined responses.""" + if self.i >= len(self.responses): + self.i = 0 # Wrap around + + response = self.responses[self.i] + self.i += 1 + + # Handle different response types + if isinstance(response, AIMessage): + message = response + else: + # It's a string + message = AIMessage(content=str(response)) + + generation = ChatGeneration(message=message) + return ChatResult(generations=[generation]) + + def _stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + """Stream not implemented for testing.""" + result = self._generate(messages, stop, run_manager, **kwargs) + message = result.generations[0].message + chunk = AIMessageChunk(content=message.content) + yield ChatGenerationChunk(message=chunk) + + def bind_tools( + self, + tools: Sequence[Union[dict[str, Any], type, Any]], + *, + tool_choice: Optional[str] = None, + **kwargs: Any, + ) -> Runnable: + """Store tools and return self.""" + self._bound_tools = tools + return self + + @property + def tools(self) -> Optional[Sequence[Any]]: + """Get bound tools for inspection.""" + return self._bound_tools From 63572e5abef98b57ae3645f982ca29f5565e5dc1 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 22:09:36 +0300 Subject: [PATCH 39/59] reducers are pickleable --- dimos/protocol/skill/type.py | 46 +++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 84b912f303..0e7e902fb3 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -185,7 +185,47 @@ def reducer( # just a convinience class to hold reducer functions +def _make_skill_msg( + msg: SkillMsg[Literal[MsgType.stream]], content: Any +) -> SkillMsg[Literal[MsgType.reduced_stream]]: + """Helper to create a reduced stream message with new content.""" + return SkillMsg( + call_id=msg.call_id, + skill_name=msg.skill_name, + content=content, + type=MsgType.reduced_stream, + ) + + +def sum_reducer( + accumulator: Optional[SkillMsg[Literal[MsgType.reduced_stream]]], + msg: SkillMsg[Literal[MsgType.stream]], +) -> SkillMsg[Literal[MsgType.reduced_stream]]: + """Sum reducer that adds values together.""" + acc_value = accumulator.content if accumulator else None + new_value = acc_value + msg.content if acc_value else msg.content + return _make_skill_msg(msg, new_value) + + +def latest_reducer( + accumulator: Optional[SkillMsg[Literal[MsgType.reduced_stream]]], + msg: SkillMsg[Literal[MsgType.stream]], +) -> SkillMsg[Literal[MsgType.reduced_stream]]: + """Latest reducer that keeps only the most recent value.""" + return _make_skill_msg(msg, msg.content) + + +def all_reducer( + accumulator: Optional[SkillMsg[Literal[MsgType.reduced_stream]]], + msg: SkillMsg[Literal[MsgType.stream]], +) -> SkillMsg[Literal[MsgType.reduced_stream]]: + """All reducer that collects all values into a list.""" + acc_value = accumulator.content if accumulator else None + new_value = acc_value + [msg.content] if acc_value else [msg.content] + return _make_skill_msg(msg, new_value) + + class Reducer: - sum = make_reducer(lambda x, y: x + y if x else y) - latest = make_reducer(lambda x, y: y) - all = make_reducer(lambda x, y: x + [y] if x else [y]) + sum = sum_reducer + latest = latest_reducer + all = all_reducer From 3400b24cf19da5b4c034ab2bfeedbb5836a8ea74 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 22:10:32 +0300 Subject: [PATCH 40/59] mock agent, rpc client inherits docstrings, all modules are skillcontainers --- dimos/agents2/agent.py | 3 - dimos/agents2/test_agent.py | 16 ++--- dimos/agents2/test_mock_agent.py | 92 +++++++++++++----------- dimos/core/__init__.py | 16 ++++- dimos/core/module.py | 4 +- dimos/protocol/pubsub/spec.py | 9 ++- dimos/protocol/skill/coordinator.py | 9 ++- dimos/protocol/skill/skill.py | 21 ++++-- dimos/protocol/skill/test_coordinator.py | 14 ++-- 9 files changed, 110 insertions(+), 74 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 6dc8a6a3d5..c0b9eafd2e 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -164,7 +164,6 @@ def start(self): def stop(self): self.coordinator.stop() - @rpc def clear_history(self): self._history.clear() @@ -245,14 +244,12 @@ async def agent_loop(self, seed_query: str = ""): traceback.print_exc() - @rpc def query_async(self, query: str): return asyncio.ensure_future(self.agent_loop(query), loop=self._loop) def query(self, query: str): return asyncio.run_coroutine_threadsafe(self.agent_loop(query), self._loop).result() - @rpc def register_skills(self, container): return self.coordinator.register_skills(container) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 0df5e7b634..16a3819111 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -17,7 +17,8 @@ import pytest from dimos.agents2.agent import Agent -from dimos.protocol.skill.test_coordinator import TestContainer +from dimos.core import start +from dimos.protocol.skill.test_coordinator import SkillContainerTest @pytest.mark.tool @@ -27,14 +28,13 @@ async def test_agent_init(): "Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" ) - ## Uncomment the following lines to use a real module system - # from dimos.core import start - # dimos = start(2) - # testcontainer = dimos.deploy(TestContainer) - # agent = dimos.deploy(Agent, system_prompt=system_prompt) - - testcontainer = TestContainer() + # # Uncomment the following lines to use a real module system + dimos = start(2) + testcontainer = dimos.deploy(SkillContainerTest) agent = Agent(system_prompt=system_prompt) + + # testcontainer = TestContainer() + # agent = Agent(system_prompt=system_prompt) agent.register_skills(testcontainer) agent.start() agent.run_implicit_skill("uptime_seconds") diff --git a/dimos/agents2/test_mock_agent.py b/dimos/agents2/test_mock_agent.py index 5ba1902923..8e17b737e1 100644 --- a/dimos/agents2/test_mock_agent.py +++ b/dimos/agents2/test_mock_agent.py @@ -21,51 +21,54 @@ from dimos.agents2.agent import Agent from dimos.agents2.testing import MockModel -from dimos.protocol.skill.test_coordinator import TestContainer - - -async def test_tool_call(): - """Test agent initialization and tool call execution.""" - # Create a fake model that will respond with tool calls - fake_model = MockModel( - responses=[ - AIMessage( - content="I'll add those numbers for you.", - tool_calls=[ - { - "name": "add", - "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, - "id": "tool_call_1", - } - ], - ), - AIMessage(content="The result of adding 5 and 3 is 8."), - ] - ) - - # Create agent with the fake model - agent = Agent( - model_instance=fake_model, - system_prompt="You are a helpful robot assistant with math skills.", - ) - - # Register skills with coordinator - skills = TestContainer() - agent.coordinator.register_skills(skills) - agent.start() - # Query the agent - await agent.query_async("Please add 5 and 3") - - # Check that tools were bound - assert fake_model.tools is not None - assert len(fake_model.tools) > 0 - - # Verify the model was called and history updated - assert len(agent._history) > 0 +from dimos.core import start +from dimos.protocol.skill.test_coordinator import SkillContainerTest + +# async def test_tool_call(): +# """Test agent initialization and tool call execution.""" +# # Create a fake model that will respond with tool calls +# fake_model = MockModel( +# responses=[ +# AIMessage( +# content="I'll add those numbers for you.", +# tool_calls=[ +# { +# "name": "add", +# "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, +# "id": "tool_call_1", +# } +# ], +# ), +# AIMessage(content="The result of adding 5 and 3 is 8."), +# ] +# ) + +# # Create agent with the fake model +# agent = Agent( +# model_instance=fake_model, +# system_prompt="You are a helpful robot assistant with math skills.", +# ) + +# # Register skills with coordinator +# skills = SkillContainerTest() +# agent.coordinator.register_skills(skills) +# agent.start() +# # Query the agent +# await agent.query_async("Please add 5 and 3") + +# # Check that tools were bound +# assert fake_model.tools is not None +# assert len(fake_model.tools) > 0 + +# # Verify the model was called and history updated +# assert len(agent._history) > 0 + +# agent.stop() async def test_image_tool_call(): """Test agent with image tool call execution.""" + dimos = start(2) # Create a fake model that will respond with image tool calls fake_model = MockModel( responses=[ @@ -73,6 +76,8 @@ async def test_image_tool_call(): content="I'll take a photo for you.", tool_calls=[ { + # "name": "add", + # "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, "name": "take_photo", "args": {"args": [], "kwargs": {}}, "id": "tool_call_image_1", @@ -90,8 +95,8 @@ async def test_image_tool_call(): ) # Register skills with coordinator - skills = TestContainer() - agent.coordinator.register_skills(skills) + skills = dimos.deploy(SkillContainerTest) + agent.register_skills(skills) agent.start() # Query the agent @@ -112,3 +117,4 @@ async def test_image_tool_call(): if isinstance(msg, HumanMessage) and msg.content and isinstance(msg.content, list) ] assert len(human_messages_with_images) >= 0 # May have image messages + agent.stop() diff --git a/dimos/core/__init__.py b/dimos/core/__init__.py index 0b7755e2e3..1e6eccaaed 100644 --- a/dimos/core/__init__.py +++ b/dimos/core/__init__.py @@ -53,9 +53,19 @@ def __getattr__(self, name: str): raise AttributeError(f"{name} is not found.") if name in self.rpcs: - return lambda *args, **kwargs: self.rpc.call_sync( - f"{self.remote_name}/{name}", (args, kwargs) - ) + # Get the original method to preserve its docstring + original_method = getattr(self.actor_class, name, None) + + def rpc_call(*args, **kwargs): + return self.rpc.call_sync(f"{self.remote_name}/{name}", (args, kwargs)) + + # Copy docstring and other attributes from original method + if original_method: + rpc_call.__doc__ = original_method.__doc__ + rpc_call.__name__ = original_method.__name__ + rpc_call.__qualname__ = f"{self.__class__.__name__}.{original_method.__name__}" + + return rpc_call # return super().__getattr__(name) # Try to avoid recursion by directly accessing attributes that are known diff --git a/dimos/core/module.py b/dimos/core/module.py index 01abfcdb8a..15abbe52bd 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -30,6 +30,8 @@ from dimos.core.stream import In, Out, RemoteIn, RemoteOut, Transport from dimos.protocol.rpc import LCMRPC, RPCSpec from dimos.protocol.service import Configurable +from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec +from dimos.protocol.skill.skill import SkillContainer from dimos.protocol.tf import LCMTF, TFSpec @@ -59,7 +61,7 @@ class ModuleConfig: tf_transport: type[TFSpec] = LCMTF -class ModuleBase(Configurable[ModuleConfig]): +class ModuleBase(Configurable[ModuleConfig], SkillContainer): _rpc: Optional[RPCSpec] = None _tf: Optional[TFSpec] = None _loop: asyncio.AbstractEventLoop = None diff --git a/dimos/protocol/pubsub/spec.py b/dimos/protocol/pubsub/spec.py index 81db8a0669..1d38cc74bd 100644 --- a/dimos/protocol/pubsub/spec.py +++ b/dimos/protocol/pubsub/spec.py @@ -132,7 +132,14 @@ def wrapper_cb(encoded_data: bytes, topic: TopicT): class PickleEncoderMixin(PubSubEncoderMixin[TopicT, MsgT]): def encode(self, msg: MsgT, *_: TopicT) -> bytes: - return pickle.dumps(msg) + try: + return pickle.dumps(msg) + except Exception as e: + print("Pickle encoding error:", e) + import traceback + + traceback.print_exc() + print("Tried to pickle:", msg) def decode(self, msg: bytes, _: TopicT) -> MsgT: return pickle.loads(msg) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 0561aa7d9a..c0d6a3eb60 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -275,12 +275,15 @@ def __str__(self): return capture.get().strip() +from dimos.core.module import Module + + # This class is responsible for managing the lifecycle of skills, # handling skill calls, and coordinating communication between the agent and skills. # # It aggregates skills from static and dynamic containers, manages skill states, # and decides when to notify the agent about updates. -class SkillCoordinator(SkillContainer): +class SkillCoordinator(Module): default_config = SkillCoordinatorConfig empty: bool = True @@ -323,7 +326,7 @@ def get_tools(self) -> list[dict]: ret = [] for name, skill_config in self.skills().items(): - # print(f"Tool {name} config: {skill_config}, {skill_config.f}") + print(f"Tool {name} config: {skill_config}, {skill_config.f}") ret.append(langchain_tool(skill_config.f)) return ret @@ -475,7 +478,7 @@ def __str__(self): # .skills() method def register_skills(self, container: SkillContainer): self.empty = False - if not container.dynamic_skills: + if not container.dynamic_skills(): logger.info(f"Registering static skill container, {container}") self._static_containers.append(container) for name, skill_config in container.skills().items(): diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 7a01c9546d..151029447a 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -17,8 +17,7 @@ from dataclasses import dataclass from typing import Any, Callable, Optional -from dimos.core import rpc -from dimos.protocol.service import Configurable +# from dimos.core.core import rpc from dimos.protocol.skill.comms import LCMSkillComms, SkillCommsSpec from dimos.protocol.skill.schema import function_to_schema from dimos.protocol.skill.type import ( @@ -57,6 +56,11 @@ # the average of all values is returned to the agent +def rpc(fn: Callable[..., Any]) -> Callable[..., Any]: + fn.__rpc__ = True # type: ignore[attr-defined] + return fn + + def skill( reducer: Reducer = Reducer.latest, stream: Stream = Stream.none, @@ -133,12 +137,15 @@ def wrapper(self, *args, **kwargs): # for this you'll need to override the `skills` method to return a dynamic set of skills # SkillCoordinator will call this method to get the skills available upon every request to # the agent -# -class SkillContainer(Configurable[SkillContainerConfig]): - default_config = SkillContainerConfig + + +class SkillContainer: + skill_transport_class: type[SkillCommsSpec] = LCMSkillComms _skill_transport: Optional[SkillCommsSpec] = None - dynamic_skills = False + @rpc + def dynamic_skills(self): + return False def __str__(self) -> str: return f"SkillContainer({self.__class__.__name__})" @@ -211,5 +218,5 @@ def skills(self) -> dict[str, SkillConfig]: @property def skill_transport(self) -> SkillCommsSpec: if self._skill_transport is None: - self._skill_transport = self.config.skill_transport() + self._skill_transport = self.skill_transport_class() return self._skill_transport diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 842dc252eb..a4e4d813df 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -18,14 +18,15 @@ import pytest +from dimos.core import Module from dimos.msgs.sensor_msgs import Image from dimos.protocol.skill.coordinator import SkillCoordinator -from dimos.protocol.skill.skill import SkillContainer, skill +from dimos.protocol.skill.skill import skill from dimos.protocol.skill.type import Output, Reducer, Stream from dimos.utils.data import get_data -class TestContainer(SkillContainer): +class SkillContainerTest(Module): @skill() def add(self, x: int, y: int) -> int: """adds x and y.""" @@ -78,13 +79,16 @@ def current_date(self, frequency: Optional[float] = 10) -> str: @skill(output=Output.image) def take_photo(self) -> str: """Takes a camera photo""" - return Image.from_file(get_data("cafe.jpg")) + print("Taking photo...") + img = Image.from_file(get_data("cafe.jpg")) + print("Photo taken.") + return img @pytest.mark.asyncio async def test_coordinator_parallel_calls(): skillCoordinator = SkillCoordinator() - skillCoordinator.register_skills(TestContainer()) + skillCoordinator.register_skills(SkillContainerTest()) skillCoordinator.start() skillCoordinator.call_skill("test-call-0", "delayadd", {"args": [1, 2]}) @@ -119,7 +123,7 @@ async def test_coordinator_parallel_calls(): @pytest.mark.asyncio async def test_coordinator_generator(): skillCoordinator = SkillCoordinator() - skillCoordinator.register_skills(TestContainer()) + skillCoordinator.register_skills(SkillContainerTest()) skillCoordinator.start() # here we call a skill that generates a sequence of messages From 1af4bf4a79e99bcd4f0a5dcf919d407fdda81cc3 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 22:14:48 +0300 Subject: [PATCH 41/59] enabled single-process mock agent test --- dimos/agents2/test_mock_agent.py | 83 ++++++++++++++++---------------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/dimos/agents2/test_mock_agent.py b/dimos/agents2/test_mock_agent.py index 8e17b737e1..7f03e964a0 100644 --- a/dimos/agents2/test_mock_agent.py +++ b/dimos/agents2/test_mock_agent.py @@ -24,46 +24,47 @@ from dimos.core import start from dimos.protocol.skill.test_coordinator import SkillContainerTest -# async def test_tool_call(): -# """Test agent initialization and tool call execution.""" -# # Create a fake model that will respond with tool calls -# fake_model = MockModel( -# responses=[ -# AIMessage( -# content="I'll add those numbers for you.", -# tool_calls=[ -# { -# "name": "add", -# "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, -# "id": "tool_call_1", -# } -# ], -# ), -# AIMessage(content="The result of adding 5 and 3 is 8."), -# ] -# ) - -# # Create agent with the fake model -# agent = Agent( -# model_instance=fake_model, -# system_prompt="You are a helpful robot assistant with math skills.", -# ) - -# # Register skills with coordinator -# skills = SkillContainerTest() -# agent.coordinator.register_skills(skills) -# agent.start() -# # Query the agent -# await agent.query_async("Please add 5 and 3") - -# # Check that tools were bound -# assert fake_model.tools is not None -# assert len(fake_model.tools) > 0 - -# # Verify the model was called and history updated -# assert len(agent._history) > 0 - -# agent.stop() + +async def test_tool_call(): + """Test agent initialization and tool call execution.""" + # Create a fake model that will respond with tool calls + fake_model = MockModel( + responses=[ + AIMessage( + content="I'll add those numbers for you.", + tool_calls=[ + { + "name": "add", + "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, + "id": "tool_call_1", + } + ], + ), + AIMessage(content="The result of adding 5 and 3 is 8."), + ] + ) + + # Create agent with the fake model + agent = Agent( + model_instance=fake_model, + system_prompt="You are a helpful robot assistant with math skills.", + ) + + # Register skills with coordinator + skills = SkillContainerTest() + agent.coordinator.register_skills(skills) + agent.start() + # Query the agent + await agent.query_async("Please add 5 and 3") + + # Check that tools were bound + assert fake_model.tools is not None + assert len(fake_model.tools) > 0 + + # Verify the model was called and history updated + assert len(agent._history) > 0 + + agent.stop() async def test_image_tool_call(): @@ -76,8 +77,6 @@ async def test_image_tool_call(): content="I'll take a photo for you.", tool_calls=[ { - # "name": "add", - # "args": {"args": [], "kwargs": {"x": 5, "y": 3}}, "name": "take_photo", "args": {"args": [], "kwargs": {}}, "id": "tool_call_image_1", From 60b492c2aea3ffcbf6b586ccd3b5f7f9e13a5aa6 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 27 Aug 2025 22:59:59 +0300 Subject: [PATCH 42/59] agent encoding happens before message is sent from a skillcontainer --- dimos/protocol/skill/coordinator.py | 22 ++++------------------ dimos/protocol/skill/type.py | 10 +++++++++- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index c0d6a3eb60..4ba62fa5a8 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -107,32 +107,18 @@ def duration(self) -> float: return 0.0 def content(self) -> dict[str, Any] | str | int | float | None: - # any tool output can be a custom type that knows how to encode itself - # like a costmap, path, transform etc could be translatable into strings - def maybe_encode(something: Any) -> str: - if getattr(something, "agent_encode", None): - return something.agent_encode() - - # if isinstance(something, dict): - # something = json.dumps(something) - - # if not isinstance(something, str): - # something = str(something) - - return something - if self.state == SkillStateEnum.running: if self.reduced_stream_msg: - return maybe_encode(self.reduced_stream_msg.content) + return self.reduced_stream_msg.content if self.state == SkillStateEnum.completed: if self.reduced_stream_msg: # are we a streaming skill? - return maybe_encode(self.reduced_stream_msg.content) - return maybe_encode(self.ret_msg.content) + return self.reduced_stream_msg.content + return self.ret_msg.content if self.state == SkillStateEnum.error: if self.reduced_stream_msg: - (maybe_encode(self.reduced_stream_msg.content) + "\n" + self.error_msg.content) + (self.reduced_stream_msg.content + "\n" + self.error_msg.content) else: return self.error_msg.content diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index 0e7e902fb3..ec82e4a576 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -96,6 +96,11 @@ class MsgType(Enum): M = TypeVar("M", bound="MsgType") +def maybe_encode(something: Any) -> str: + if getattr(something, "agent_encode", None): + return something.agent_encode() + + class SkillMsg(Timestamped, Generic[M]): ts: float type: M @@ -113,7 +118,10 @@ def __init__( self.ts = time.time() self.call_id = call_id self.skill_name = skill_name - self.content = content + # any tool output can be a custom type that knows how to encode itself + # like a costmap, path, transform etc could be translatable into strings + + self.content = maybe_encode(content) self.type = type def __repr__(self): From 73a50dfc5d1d6481ae087175f18ee1599fc53652 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 28 Aug 2025 02:08:07 +0300 Subject: [PATCH 43/59] detector + agent --- dimos/agents2/agent.py | 16 +++- dimos/agents2/spec.py | 2 +- dimos/agents2/test_mock_agent.py | 91 ++++++++++++++++++- dimos/perception/detection2d/module.py | 48 ++++++++-- dimos/protocol/skill/coordinator.py | 13 ++- dimos/protocol/skill/test_coordinator.py | 1 + dimos/protocol/skill/type.py | 32 +++++-- .../unitree_webrtc/modular/ivan_unitree.py | 2 +- 8 files changed, 178 insertions(+), 27 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index c0b9eafd2e..399123d1cf 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -39,11 +39,16 @@ def toolmsg_from_state(state: SkillState) -> ToolMessage: + if state.skill_config.output != Output.standard: + content = "Special output, see separate message" + else: + content = state.content() + return ToolMessage( # if agent call has been triggered by another skill, # and this specific skill didn't finish yet but we need a tool call response # we return a message explaining that execution is still ongoing - content=state.content() + content=content or "Running, you will be called with an update, no need for subsequent tool calls", name=state.name, tool_call_id=state.call_id, @@ -103,12 +108,14 @@ def snapshot_to_messages( ): if skill_state.call_id in tool_call_ids: tool_msgs.append(toolmsg_from_state(skill_state)) - continue special_data = skill_state.skill_config.output != Output.standard if special_data: - print("special data from skill", skill_state.name, skill_state.content()) - special_msgs.append(HumanMessage(content=[skill_state.content()])) + content = skill_state.content() + special_msgs.append(HumanMessage(content=[content])) + + if skill_state.call_id in tool_call_ids: + continue state_overview.append(summary_from_state(skill_state, special_data)) @@ -235,6 +242,7 @@ async def agent_loop(self, seed_query: str = ""): # we will return a tool message, and not a general state message snapshot_msgs = snapshot_to_messages(update, msg.tool_calls) + print("SNAPSHOT", snapshot_msgs) self.state_messages = snapshot_msgs.get("state_msgs", []) self.append_history(*snapshot_msgs.get("tool_msgs", [])) diff --git a/dimos/agents2/spec.py b/dimos/agents2/spec.py index 894d1812b2..d8d5ca8eda 100644 --- a/dimos/agents2/spec.py +++ b/dimos/agents2/spec.py @@ -192,7 +192,7 @@ def __str__(self) -> str: if isinstance(message, HumanMessage): content = message.content if not isinstance(content, str): - content = "" + content = "" table.add_row(Text("Human", style="green"), Text(content, style="green")) elif isinstance(message, AIMessage): diff --git a/dimos/agents2/test_mock_agent.py b/dimos/agents2/test_mock_agent.py index 7f03e964a0..2846f4d3e9 100644 --- a/dimos/agents2/test_mock_agent.py +++ b/dimos/agents2/test_mock_agent.py @@ -15,14 +15,22 @@ """Test agent with FakeChatModel for unit testing.""" import os +import time import pytest +from dimos_lcm.sensor_msgs import CameraInfo from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolCall from dimos.agents2.agent import Agent from dimos.agents2.testing import MockModel -from dimos.core import start +from dimos.core import LCMTransport, start +from dimos.msgs.foxglove_msgs import ImageAnnotations +from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Vector3 +from dimos.msgs.sensor_msgs import Image +from dimos.perception.detection2d import Detect2DModule, Detection2DArrayFix from dimos.protocol.skill.test_coordinator import SkillContainerTest +from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule +from dimos.robot.unitree_webrtc.type.lidar import LidarMessage async def test_tool_call(): @@ -93,11 +101,86 @@ async def test_image_tool_call(): system_prompt="You are a helpful robot assistant with camera capabilities.", ) - # Register skills with coordinator - skills = dimos.deploy(SkillContainerTest) - agent.register_skills(skills) + test_skill_module = dimos.deploy(SkillContainerTest) + + agent.register_skills(test_skill_module) + agent.start() + + agent.run_implicit_skill("get_detections") + + # Query the agent + await agent.query_async("Please take a photo") + + # Check that tools were bound + assert fake_model.tools is not None + assert len(fake_model.tools) > 0 + + # Verify the model was called and history updated + assert len(agent._history) > 0 + + # Check that image was handled specially + # Look for HumanMessage with image content in history + human_messages_with_images = [ + msg + for msg in agent._history + if isinstance(msg, HumanMessage) and msg.content and isinstance(msg.content, list) + ] + assert len(human_messages_with_images) >= 0 # May have image messages + agent.stop() + + +@pytest.mark.tool +async def test_tool_call_implicit_detections(): + """Test agent with image tool call execution.""" + dimos = start(2) + # Create a fake model that will respond with image tool calls + fake_model = MockModel( + responses=[ + AIMessage( + content="I'll take a photo for you.", + tool_calls=[ + { + "name": "take_photo", + "args": {"args": [], "kwargs": {}}, + "id": "tool_call_image_1", + } + ], + ), + AIMessage(content="I've taken the photo. The image shows a cafe scene."), + ] + ) + + # Create agent with the fake model + agent = Agent( + model_instance=fake_model, + system_prompt="You are a helpful robot assistant with camera capabilities.", + ) + + robot_connection = dimos.deploy(ConnectionModule, connection_type="fake") + robot_connection.lidar.transport = LCMTransport("/lidar", LidarMessage) + robot_connection.odom.transport = LCMTransport("/odom", PoseStamped) + robot_connection.video.transport = LCMTransport("/image", Image) + robot_connection.movecmd.transport = LCMTransport("/cmd_vel", Vector3) + robot_connection.camera_info.transport = LCMTransport("/camera_info", CameraInfo) + robot_connection.start() + + detect2d = dimos.deploy(Detect2DModule) + detect2d.detections.transport = LCMTransport("/detections", Detection2DArrayFix) + detect2d.annotations.transport = LCMTransport("/annotations", ImageAnnotations) + detect2d.image.connect(robot_connection.video) + detect2d.start() + + test_skill_module = dimos.deploy(SkillContainerTest) + + agent.register_skills(detect2d) + agent.register_skills(test_skill_module) agent.start() + agent.run_implicit_skill("get_detections") + + print("waiting 8.5 seconds for some detections before quering agent") + time.sleep(8.5) + # Query the agent await agent.query_async("Please take a photo") diff --git a/dimos/perception/detection2d/module.py b/dimos/perception/detection2d/module.py index 11ebeab86c..df17857233 100644 --- a/dimos/perception/detection2d/module.py +++ b/dimos/perception/detection2d/module.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import functools -from typing import Any, Callable, List, Optional, Tuple +import queue +from typing import Any, Callable, Generator, List, Optional, Tuple -from dimos_lcm.foxglove_msgs.Color import Color -from dimos_lcm.foxglove_msgs.ImageAnnotations import ( - ImageAnnotations, +from dimos_lcm.foxglove_msgs import ( PointsAnnotation, TextAnnotation, ) +from dimos_lcm.foxglove_msgs.Color import Color from dimos_lcm.foxglove_msgs.Point2 import Point2 from dimos_lcm.vision_msgs import ( BoundingBox2D, @@ -31,11 +31,15 @@ Pose2D, ) from reactivex import operators as ops +from reactivex.observable import Observable from dimos.core import In, Module, Out, rpc +from dimos.msgs.foxglove_msgs import ImageAnnotations from dimos.msgs.sensor_msgs import Image from dimos.msgs.std_msgs import Header from dimos.perception.detection2d.yolo_2d_det import Yolo2DDetector +from dimos.protocol.skill.skill import skill +from dimos.protocol.skill.type import Output, Reducer, Stream from dimos.types.timestamped import to_ros_stamp @@ -203,14 +207,38 @@ def detect(self, image: Image) -> Detections: @rpc def start(self): - # from dimos.activate_cuda import _init_cuda self.detector = self._initDetector() - detection_stream = self.image.observable().pipe(ops.map(self.detect)) + self.detection2d_stream().subscribe(self.detections.publish) + self.annotation_stream().subscribe(self.annotations.publish) + + @functools.cache + def detection2d_stream(self) -> Observable[Detection2DArrayFix]: + return self.image.observable().pipe(ops.map(self.detect), ops.map(build_detection2d_array)) + + @functools.cache + def annotation_stream(self) -> Observable[ImageAnnotations]: + return self.image.observable().pipe(ops.map(self.detect), ops.map(build_imageannotations)) + + @functools.cache + def detection_stream(self) -> Observable[ImageDetections]: + return self.image.observable().pipe(ops.map(self.detect)) + + @skill(stream=Stream.passive, reducer=Reducer.accumulate_dict) + def get_detections(self) -> Generator[ImageAnnotations, None, None]: + """Provides latest image detections""" + + blocking_queue = queue.Queue() + self.detection_stream().subscribe(blocking_queue.put) + + while True: + [image, detections] = blocking_queue.get() + + detection_dict = {} + for detection in detections: + [bbox, track_id, class_id, confidence, name] = detection + detection_dict[name] = f"{confidence:.3f}" - detection_stream.pipe(ops.map(build_imageannotations)).subscribe(self.annotations.publish) - detection_stream.pipe( - ops.filter(lambda x: len(x) != 0), ops.map(build_detection2d_array) - ).subscribe(self.detections.publish) + yield detection_dict @rpc def stop(self): ... diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 4ba62fa5a8..7261d1fa23 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -312,7 +312,7 @@ def get_tools(self) -> list[dict]: ret = [] for name, skill_config in self.skills().items(): - print(f"Tool {name} config: {skill_config}, {skill_config.f}") + # print(f"Tool {name} config: {skill_config}, {skill_config.f}") ret.append(langchain_tool(skill_config.f)) return ret @@ -412,6 +412,17 @@ def generate_snapshot(self, clear: bool = True) -> SkillStateDict: print(error_traceback) to_delete.append(call_id) + elif ( + skill_run.state == SkillStateEnum.running + and skill_run.reduced_stream_msg is not None + ): + # preserve ret as a copy + ret[call_id] = copy(skill_run) + logger.debug( + f"Resetting accumulator for skill {skill_run.name} (call_id={call_id})" + ) + skill_run.reduced_stream_msg = None + for call_id in to_delete: logger.debug(f"Call {call_id} finished, removing from state") del self._skill_state[call_id] diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index a4e4d813df..25df777b67 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -30,6 +30,7 @@ class SkillContainerTest(Module): @skill() def add(self, x: int, y: int) -> int: """adds x and y.""" + time.sleep(2) return x + y @skill() diff --git a/dimos/protocol/skill/type.py b/dimos/protocol/skill/type.py index ec82e4a576..a6527f0d42 100644 --- a/dimos/protocol/skill/type.py +++ b/dimos/protocol/skill/type.py @@ -16,7 +16,7 @@ import time from dataclasses import dataclass from enum import Enum -from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Optional, TypeVar +from typing import Any, Callable, Generic, Literal, Optional, TypeVar from dimos.types.timestamped import Timestamped @@ -97,8 +97,9 @@ class MsgType(Enum): def maybe_encode(something: Any) -> str: - if getattr(something, "agent_encode", None): + if hasattr(something, "agent_encode"): return something.agent_encode() + return something class SkillMsg(Timestamped, Generic[M]): @@ -112,7 +113,7 @@ def __init__( self, call_id: str, skill_name: str, - content: str | int | float | dict | list, + content: Any, type: M, ) -> None: self.ts = time.time() @@ -124,9 +125,6 @@ def __init__( self.content = maybe_encode(content) self.type = type - def __repr__(self): - return self.__str__() - @property def end(self) -> bool: return self.type == MsgType.ret or self.type == MsgType.error @@ -148,6 +146,8 @@ def __str__(self): return f"Pending({time_ago:.1f}s ago)" if self.type == MsgType.stream: return f"Stream({time_ago:.1f}s ago, val={self.content})" + if self.type == MsgType.reduced_stream: + return f"Stream({time_ago:.1f}s ago, val={self.content})" # typing looks complex but it's a standard reducer function signature, using SkillMsgs @@ -233,7 +233,27 @@ def all_reducer( return _make_skill_msg(msg, new_value) +def accumulate_list( + accumulator: Optional[SkillMsg[Literal[MsgType.reduced_stream]]], + msg: SkillMsg[Literal[MsgType.stream]], +) -> SkillMsg[Literal[MsgType.reduced_stream]]: + """All reducer that collects all values into a list.""" + acc_value = accumulator.content if accumulator else [] + return _make_skill_msg(msg, acc_value + msg.content) + + +def accumulate_dict( + accumulator: Optional[SkillMsg[Literal[MsgType.reduced_stream]]], + msg: SkillMsg[Literal[MsgType.stream]], +) -> SkillMsg[Literal[MsgType.reduced_stream]]: + """All reducer that collects all values into a list.""" + acc_value = accumulator.content if accumulator else {} + return _make_skill_msg(msg, {**acc_value, **msg.content}) + + class Reducer: sum = sum_reducer latest = latest_reducer all = all_reducer + accumulate_list = accumulate_list + accumulate_dict = accumulate_dict diff --git a/dimos/robot/unitree_webrtc/modular/ivan_unitree.py b/dimos/robot/unitree_webrtc/modular/ivan_unitree.py index df0d1ed42a..5c62fbb2e4 100644 --- a/dimos/robot/unitree_webrtc/modular/ivan_unitree.py +++ b/dimos/robot/unitree_webrtc/modular/ivan_unitree.py @@ -17,11 +17,11 @@ import time from typing import Optional -from dimos_lcm.foxglove_msgs.ImageAnnotations import ImageAnnotations from dimos_lcm.sensor_msgs import CameraInfo from dimos_lcm.std_msgs import Bool, String from dimos.core import LCMTransport, start +from dimos.msgs.foxglove_msgs import ImageAnnotations from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Vector3 from dimos.msgs.nav_msgs import OccupancyGrid, Path from dimos.msgs.sensor_msgs import Image From 0eff40ede15471f0e558d6f921ba5c5775b2d793 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 28 Aug 2025 02:09:15 +0300 Subject: [PATCH 44/59] bugfix --- dimos/agents2/agent.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 399123d1cf..914e1c2600 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -112,6 +112,8 @@ def snapshot_to_messages( special_data = skill_state.skill_config.output != Output.standard if special_data: content = skill_state.content() + if not content: + continue special_msgs.append(HumanMessage(content=[content])) if skill_state.call_id in tool_call_ids: From 450167bf6fd1196ad15363e510c5b8471a03c05d Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 28 Aug 2025 02:16:15 +0300 Subject: [PATCH 45/59] format comment --- dimos/perception/detection2d/module.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dimos/perception/detection2d/module.py b/dimos/perception/detection2d/module.py index df17857233..3cd4b1e831 100644 --- a/dimos/perception/detection2d/module.py +++ b/dimos/perception/detection2d/module.py @@ -231,6 +231,8 @@ def get_detections(self) -> Generator[ImageAnnotations, None, None]: self.detection_stream().subscribe(blocking_queue.put) while True: + # dealing with a dumb format from detic and yolo + # probably needs to be abstracted earlier in the pipeline so it's more convinient to use [image, detections] = blocking_queue.get() detection_dict = {} From dc7341c81f2c62c64e968725c8094fd16b72b77b Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 28 Aug 2025 02:39:34 +0300 Subject: [PATCH 46/59] removed broken test --- dimos/perception/detection2d/test_module.py | 53 --------------------- 1 file changed, 53 deletions(-) delete mode 100644 dimos/perception/detection2d/test_module.py diff --git a/dimos/perception/detection2d/test_module.py b/dimos/perception/detection2d/test_module.py deleted file mode 100644 index 81305bfe92..0000000000 --- a/dimos/perception/detection2d/test_module.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dimos.perception.detection2d.module import ( - better_detection_format, - build_bbox, - build_detection2d, - build_detection2d_array, - build_imageannotations, -) - -array_sample = better_detection_format( - [ - [[246.2418670654297, 315.33331298828125, 371.5143127441406, 387.5533752441406]], - [10], - [28], - [0.6393297910690308], - ["suitcase"], - ] -) - -import time - - -class FakeImage: - ts: float - - def __init__(self): - self.ts = time.time() - - -detections = (FakeImage(), array_sample) - - -def test_build_detectionarray(): - print(build_detection2d_array(detections).lcm_encode()) - - -def test_build_imageannotations(): - annotations = build_imageannotations(detections) - print(annotations, annotations.texts) - print(annotations.lcm_encode()) From 013f44b5014cfad627657519c99cc75cdb277008 Mon Sep 17 00:00:00 2001 From: lesh Date: Fri, 29 Aug 2025 16:11:41 +0300 Subject: [PATCH 47/59] test fix --- dimos/core/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/core/test_core.py b/dimos/core/test_core.py index a67d164b00..32433987d7 100644 --- a/dimos/core/test_core.py +++ b/dimos/core/test_core.py @@ -90,7 +90,7 @@ def test_classmethods(): # Check that we have the expected RPC methods assert "navigate_to" in class_rpcs, "navigate_to should be in rpcs" assert "start" in class_rpcs, "start should be in rpcs" - assert len(class_rpcs) == 3 + assert len(class_rpcs) == 5 # Check that the values are callable assert callable(class_rpcs["navigate_to"]), "navigate_to should be callable" From 5bafdc19b5588f9d5992177002872b5294c0ec7f Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 30 Aug 2025 23:50:10 +0300 Subject: [PATCH 48/59] agent bugfix --- dimos/protocol/skill/test_coordinator.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index 25df777b67..f6860c3747 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -92,7 +92,7 @@ async def test_coordinator_parallel_calls(): skillCoordinator.register_skills(SkillContainerTest()) skillCoordinator.start() - skillCoordinator.call_skill("test-call-0", "delayadd", {"args": [1, 2]}) + skillCoordinator.call_skill("test-call-0", "add", {"args": [0, 2]}) time.sleep(0.1) @@ -102,8 +102,9 @@ async def test_coordinator_parallel_calls(): skillstates = skillCoordinator.generate_snapshot() - tool_msg = skillstates[f"test-call-{cnt}"].agent_encode() - tool_msg.content == cnt + 1 + skill_id = f"test-call-{cnt}" + tool_msg = skillstates[skill_id].agent_encode() + assert tool_msg.content == cnt + 2 cnt += 1 if cnt < 5: @@ -120,6 +121,8 @@ async def test_coordinator_parallel_calls(): time.sleep(0.1 * cnt) + skillCoordinator.stop() + @pytest.mark.asyncio async def test_coordinator_generator(): @@ -141,3 +144,4 @@ async def test_coordinator_generator(): print("coordinator loop finished") print(skillCoordinator) + skillCoordinator.stop() From 5ea123a2d1918c143557fc44c72fab4a45a3e1ff Mon Sep 17 00:00:00 2001 From: lesh Date: Sat, 30 Aug 2025 23:55:21 +0300 Subject: [PATCH 49/59] skills use threadpool now --- dimos/protocol/skill/skill.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 151029447a..8fa774e3b0 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -14,6 +14,7 @@ import asyncio import threading +from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from typing import Any, Callable, Optional @@ -114,12 +115,14 @@ class SkillContainerConfig: skill_transport: type[SkillCommsSpec] = LCMSkillComms +_skill_thread_pool = ThreadPoolExecutor(max_workers=50, thread_name_prefix="skill_worker") + + def threaded(f: Callable[..., Any]) -> Callable[..., None]: - """Decorator to run a function in a separate thread.""" + """Decorator to run a function in a thread pool.""" def wrapper(self, *args, **kwargs): - thread = threading.Thread(target=f, args=(self, *args), kwargs=kwargs) - thread.start() + _skill_thread_pool.submit(f, self, *args, **kwargs) return None return wrapper From 7a6ad6d053de8a7f9512af1075881214b2740585 Mon Sep 17 00:00:00 2001 From: lesh Date: Sun, 31 Aug 2025 10:22:51 +0300 Subject: [PATCH 50/59] moved ivan_unitree to twist --- .../modular/connection_module.py | 6 +-- .../unitree_webrtc/modular/ivan_unitree.py | 42 ++++++++++++------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/dimos/robot/unitree_webrtc/modular/connection_module.py b/dimos/robot/unitree_webrtc/modular/connection_module.py index 70110ef31f..289cc622e0 100644 --- a/dimos/robot/unitree_webrtc/modular/connection_module.py +++ b/dimos/robot/unitree_webrtc/modular/connection_module.py @@ -27,7 +27,7 @@ from reactivex.observable import Observable from dimos.core import In, Module, Out, rpc -from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Vector3 +from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Twist, Vector3 from dimos.msgs.sensor_msgs.Image import Image, sharpness_window from dimos.msgs.std_msgs import Header from dimos.robot.unitree_webrtc.connection import UnitreeWebRTCConnection @@ -99,7 +99,7 @@ def raw_video_stream(self): def video_stream(self): return self.raw_video_stream() - def move(self, vector: Vector3, duration: float = 0.0): + def move(self, vector: Twist, duration: float = 0.0): pass def publish_request(self, topic: str, data: dict): @@ -114,7 +114,7 @@ class ConnectionModule(Module): odom: Out[PoseStamped] = None lidar: Out[LidarMessage] = None video: Out[Image] = None - movecmd: In[Vector3] = None + movecmd: In[Twist] = None def __init__(self, ip: str = None, connection_type: str = "webrtc", *args, **kwargs): self.ip = ip diff --git a/dimos/robot/unitree_webrtc/modular/ivan_unitree.py b/dimos/robot/unitree_webrtc/modular/ivan_unitree.py index c8889d7352..c69f488d50 100644 --- a/dimos/robot/unitree_webrtc/modular/ivan_unitree.py +++ b/dimos/robot/unitree_webrtc/modular/ivan_unitree.py @@ -22,7 +22,7 @@ from dimos.core import LCMTransport, start from dimos.msgs.foxglove_msgs import ImageAnnotations -from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Vector3 +from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Twist, Vector3 from dimos.msgs.nav_msgs import OccupancyGrid, Path from dimos.msgs.sensor_msgs import Image from dimos.navigation.bt_navigator.navigator import BehaviorTreeNavigator, NavigatorState @@ -41,6 +41,26 @@ logger = setup_logger("dimos.robot.unitree_webrtc.unitree_go2", level=logging.INFO) +def deploy_foxglove(dimos, connection, mapper, global_planner): + """Deploy and configure visualization modules.""" + websocket_vis = dimos.deploy(WebsocketVisModule, port=7779) + websocket_vis.click_goal.transport = LCMTransport("/goal_request", PoseStamped) + websocket_vis.explore_cmd.transport = LCMTransport("/explore_cmd", Bool) + websocket_vis.stop_explore_cmd.transport = LCMTransport("/stop_explore_cmd", Bool) + websocket_vis.movecmd.transport = LCMTransport("/cmd_vel", Twist) + + websocket_vis.robot_pose.connect(connection.odom) + websocket_vis.path.connect(global_planner.path) + websocket_vis.global_costmap.connect(mapper.global_costmap) + + connection.movecmd.connect(websocket_vis.movecmd) + foxglove_bridge = FoxgloveBridge() + + websocket_vis.start() + foxglove_bridge.start() + return websocket_vis, foxglove_bridge + + def deploy_navigation(dimos, connection): mapper = dimos.deploy(Map, voxel_size=0.5, cost_resolution=0.05, global_publish_interval=1.0) mapper.lidar.connect(connection.lidar) @@ -64,7 +84,7 @@ def deploy_navigation(dimos, connection): navigator.navigation_state.transport = LCMTransport("/navigation_state", String) navigator.global_costmap.transport = LCMTransport("/global_costmap", OccupancyGrid) global_planner.path.transport = LCMTransport("/global_path", Path) - local_planner.cmd_vel.transport = LCMTransport("/cmd_vel", Vector3) + local_planner.cmd_vel.transport = LCMTransport("/cmd_vel", Twist) frontier_explorer.goal_request.transport = LCMTransport("/goal_request", PoseStamped) frontier_explorer.goal_reached.transport = LCMTransport("/goal_reached", Bool) frontier_explorer.explore_cmd.transport = LCMTransport("/explore_cmd", Bool) @@ -85,18 +105,12 @@ def deploy_navigation(dimos, connection): frontier_explorer.costmap.connect(mapper.global_costmap) frontier_explorer.odometry.connect(connection.odom) - websocket_vis = dimos.deploy(WebsocketVisModule, port=7779) - websocket_vis.click_goal.transport = LCMTransport("/goal_request", PoseStamped) - - websocket_vis.robot_pose.connect(connection.odom) - websocket_vis.path.connect(global_planner.path) - websocket_vis.global_costmap.connect(mapper.global_costmap) - mapper.start() global_planner.start() local_planner.start() navigator.start() - websocket_vis.start() + + return mapper, global_planner class UnitreeGo2: @@ -107,14 +121,11 @@ def __init__( ): dimos = start(3) - foxglove_bridge = dimos.deploy(FoxgloveBridge) - foxglove_bridge.start() - connection = dimos.deploy(ConnectionModule, ip, connection_type) connection.lidar.transport = LCMTransport("/lidar", LidarMessage) connection.odom.transport = LCMTransport("/odom", PoseStamped) connection.video.transport = LCMTransport("/image", Image) - connection.movecmd.transport = LCMTransport("/cmd_vel", Vector3) + connection.movecmd.transport = LCMTransport("/cmd_vel", Twist) connection.camera_info.transport = LCMTransport("/camera_info", CameraInfo) connection.start() @@ -126,7 +137,8 @@ def __init__( detection.annotations.transport = LCMTransport("/annotations", ImageAnnotations) detection.start() - # deploy_navigation(dimos, connection) + mapper, global_planner = deploy_navigation(dimos, connection) + deploy_foxglove(dimos, connection, mapper, global_planner) def stop(): ... From de6a4f1c8fdb3e21e447311378227cac88c9d8b2 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 3 Sep 2025 16:34:21 +0300 Subject: [PATCH 51/59] proper skill threadpool hosting and shutdown, proper agents shutdown --- dimos/agents2/agent.py | 7 +++---- dimos/agents2/test_agent.py | 24 +++++++++++++++--------- dimos/agents2/test_mock_agent.py | 5 ++++- dimos/core/__init__.py | 1 + dimos/protocol/skill/coordinator.py | 11 +++++++++++ dimos/protocol/skill/skill.py | 22 +++++++++++++++++----- dimos/protocol/skill/test_coordinator.py | 2 +- 7 files changed, 52 insertions(+), 20 deletions(-) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 914e1c2600..0a55a77d44 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -244,7 +244,6 @@ async def agent_loop(self, seed_query: str = ""): # we will return a tool message, and not a general state message snapshot_msgs = snapshot_to_messages(update, msg.tool_calls) - print("SNAPSHOT", snapshot_msgs) self.state_messages = snapshot_msgs.get("state_msgs", []) self.append_history(*snapshot_msgs.get("tool_msgs", [])) @@ -254,11 +253,11 @@ async def agent_loop(self, seed_query: str = ""): traceback.print_exc() - def query_async(self, query: str): + def query(self, query: str): return asyncio.ensure_future(self.agent_loop(query), loop=self._loop) - def query(self, query: str): - return asyncio.run_coroutine_threadsafe(self.agent_loop(query), self._loop).result() + def query_async(self, query: str): + return self.agent_loop(query) def register_skills(self, container): return self.coordinator.register_skills(container) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index d0c929348e..e4453cc2a3 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import asyncio - import pytest from dimos.agents2.agent import Agent @@ -28,18 +26,26 @@ async def test_agent_init(): "Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" ) - # # Uncomment the following lines to use a real module system - # dimos = start(2) - # testcontainer = dimos.deploy(SkillContainerTest) + # # Uncomment the following lines to use a dimos module system + dimos = start(2) + testcontainer = dimos.deploy(SkillContainerTest) + agent = Agent(system_prompt=system_prompt) + + ## uncomment the following lines to run agents in a main loop without a module system + # testcontainer = SkillContainerTest() # agent = Agent(system_prompt=system_prompt) - testcontainer = SkillContainerTest() - agent = Agent(system_prompt=system_prompt) agent.register_skills(testcontainer) agent.start() + agent.run_implicit_skill("uptime_seconds") - agent.query_async( + + await agent.query_async( "hi there, I have 4 questions for you: Please tell me what's your name and current date, and how much is 124181112 + 124124, and what do you see on the camera?" ) - await asyncio.sleep(20) + print("Agent loop finished") + + agent.stop() + testcontainer.stop() + dimos.stop() diff --git a/dimos/agents2/test_mock_agent.py b/dimos/agents2/test_mock_agent.py index 2846f4d3e9..576fbf1e3f 100644 --- a/dimos/agents2/test_mock_agent.py +++ b/dimos/agents2/test_mock_agent.py @@ -62,6 +62,7 @@ async def test_tool_call(): skills = SkillContainerTest() agent.coordinator.register_skills(skills) agent.start() + # Query the agent await agent.query_async("Please add 5 and 3") @@ -178,7 +179,9 @@ async def test_tool_call_implicit_detections(): agent.run_implicit_skill("get_detections") - print("waiting 8.5 seconds for some detections before quering agent") + print( + "Robot replay pipeline is running in the background.\nWaiting 8.5 seconds for some detections before quering agent" + ) time.sleep(8.5) # Query the agent diff --git a/dimos/core/__init__.py b/dimos/core/__init__.py index 1e6eccaaed..ab2dcbda0a 100644 --- a/dimos/core/__init__.py +++ b/dimos/core/__init__.py @@ -147,6 +147,7 @@ def check_worker_memory(): dask_client.deploy = deploy dask_client.check_worker_memory = check_worker_memory + dask_client.stop = lambda: dask_client.shutdown() return dask_client diff --git a/dimos/protocol/skill/coordinator.py b/dimos/protocol/skill/coordinator.py index 7261d1fa23..4b15e171a5 100644 --- a/dimos/protocol/skill/coordinator.py +++ b/dimos/protocol/skill/coordinator.py @@ -362,6 +362,17 @@ def handle_message(self, msg: SkillMsg) -> None: self._loop.call_soon_threadsafe(self._updates_available.set) def has_active_skills(self) -> bool: + if not self.has_passive_skills(): + return False + for skill_run in self._skill_state.values(): + # check if this skill will notify agent + if skill_run.skill_config.ret == Return.call_agent: + return True + if skill_run.skill_config.stream == Stream.call_agent: + return True + return False + + def has_passive_skills(self) -> bool: # check if dict is empty if self._skill_state == {}: return False diff --git a/dimos/protocol/skill/skill.py b/dimos/protocol/skill/skill.py index 8fa774e3b0..0c344b4af4 100644 --- a/dimos/protocol/skill/skill.py +++ b/dimos/protocol/skill/skill.py @@ -96,7 +96,8 @@ def wrapper(self, *args, **kwargs): name=f.__name__, reducer=reducer, stream=stream, - ret=ret, + # if stream is passive, ret must be passive too + ret=ret.passive if stream == Stream.passive else ret, output=output, schema=function_to_schema(f), ) @@ -115,14 +116,15 @@ class SkillContainerConfig: skill_transport: type[SkillCommsSpec] = LCMSkillComms -_skill_thread_pool = ThreadPoolExecutor(max_workers=50, thread_name_prefix="skill_worker") - - def threaded(f: Callable[..., Any]) -> Callable[..., None]: """Decorator to run a function in a thread pool.""" def wrapper(self, *args, **kwargs): - _skill_thread_pool.submit(f, self, *args, **kwargs) + if self._skill_thread_pool is None: + self._skill_thread_pool = ThreadPoolExecutor( + max_workers=50, thread_name_prefix="skill_worker" + ) + self._skill_thread_pool.submit(f, self, *args, **kwargs) return None return wrapper @@ -144,6 +146,7 @@ def wrapper(self, *args, **kwargs): class SkillContainer: skill_transport_class: type[SkillCommsSpec] = LCMSkillComms + _skill_thread_pool: Optional[ThreadPoolExecutor] = None _skill_transport: Optional[SkillCommsSpec] = None @rpc @@ -153,6 +156,15 @@ def dynamic_skills(self): def __str__(self) -> str: return f"SkillContainer({self.__class__.__name__})" + def stop(self): + if self._skill_transport: + self._skill_transport.stop() + self._skill_transport = None + + if self._skill_thread_pool: + self._skill_thread_pool.shutdown(wait=True) + self._skill_thread_pool = None + # TODO: figure out standard args/kwargs passing format, # use same interface as skill coordinator call_skill method @threaded diff --git a/dimos/protocol/skill/test_coordinator.py b/dimos/protocol/skill/test_coordinator.py index f6860c3747..849d01d492 100644 --- a/dimos/protocol/skill/test_coordinator.py +++ b/dimos/protocol/skill/test_coordinator.py @@ -119,7 +119,7 @@ async def test_coordinator_parallel_calls(): {"args": [cnt, 2]}, ) - time.sleep(0.1 * cnt) + await asyncio.sleep(0.1 * cnt) skillCoordinator.stop() From ebadffcadce97088ed030e4dacd45de841f38f85 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 3 Sep 2025 16:39:57 +0300 Subject: [PATCH 52/59] clean mock agent exit --- dimos/agents2/test_mock_agent.py | 7 ++++++- dimos/perception/detection2d/module.py | 3 --- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dimos/agents2/test_mock_agent.py b/dimos/agents2/test_mock_agent.py index 576fbf1e3f..1a6adaf075 100644 --- a/dimos/agents2/test_mock_agent.py +++ b/dimos/agents2/test_mock_agent.py @@ -201,5 +201,10 @@ async def test_tool_call_implicit_detections(): for msg in agent._history if isinstance(msg, HumanMessage) and msg.content and isinstance(msg.content, list) ] - assert len(human_messages_with_images) >= 0 # May have image messages + assert len(human_messages_with_images) >= 0 + agent.stop() + test_skill_module.stop() + robot_connection.stop() + detect2d.stop() + dimos.stop() diff --git a/dimos/perception/detection2d/module.py b/dimos/perception/detection2d/module.py index 3cd4b1e831..2428891dff 100644 --- a/dimos/perception/detection2d/module.py +++ b/dimos/perception/detection2d/module.py @@ -241,6 +241,3 @@ def get_detections(self) -> Generator[ImageAnnotations, None, None]: detection_dict[name] = f"{confidence:.3f}" yield detection_dict - - @rpc - def stop(self): ... From 66aa8dc99083c0379032988259a11e7e0afbfe1e Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 3 Sep 2025 16:42:14 +0300 Subject: [PATCH 53/59] showing double agent query --- dimos/agents2/test_agent.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index e4453cc2a3..89dad0df82 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -41,9 +41,13 @@ async def test_agent_init(): agent.run_implicit_skill("uptime_seconds") await agent.query_async( - "hi there, I have 4 questions for you: Please tell me what's your name and current date, and how much is 124181112 + 124124, and what do you see on the camera?" + "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" ) + print("Agent loop finished, asking about camera") + + await agent.query_async("tell me what you see on the camera?") + print("Agent loop finished") agent.stop() From 6150ad4e87ee60b2c93a73f278f920f85c3dbecd Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 3 Sep 2025 16:45:28 +0300 Subject: [PATCH 54/59] agent loop comments --- dimos/agents2/test_agent.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index 89dad0df82..edfa78cfc2 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -44,8 +44,12 @@ async def test_agent_init(): "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" ) + # agent loop is considered finished once no active skills remain, + # agent will stop it's loop if passive streams are active print("Agent loop finished, asking about camera") + # we query again (this shows subsequent querying, but we could have asked for camera image in the original query, + # it all runs in parallel, and agent might get called once or twice depending on timing of skill responses) await agent.query_async("tell me what you see on the camera?") print("Agent loop finished") From a99522c531946cbf7b52c241be17ea93d30cbc2a Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 3 Sep 2025 16:47:20 +0300 Subject: [PATCH 55/59] skillspy/agentspy suggestion comment --- dimos/agents2/test_agent.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dimos/agents2/test_agent.py b/dimos/agents2/test_agent.py index edfa78cfc2..85f1f556c4 100644 --- a/dimos/agents2/test_agent.py +++ b/dimos/agents2/test_agent.py @@ -52,6 +52,8 @@ async def test_agent_init(): # it all runs in parallel, and agent might get called once or twice depending on timing of skill responses) await agent.query_async("tell me what you see on the camera?") + # you can run skillspy and agentspy in parallel with this test for a better observation of what's happening + print("Agent loop finished") agent.stop() From d8c9eb116ced48827bd56760f47d2ead72dfc605 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 4 Sep 2025 13:35:21 +0300 Subject: [PATCH 56/59] langchain exact versions --- pyproject.toml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3418c79fd1..6064ee6680 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,9 +52,11 @@ dependencies = [ "uvicorn>=0.34.0", # Agents - "langchain-chroma>=0.1.4", - "langchain-openai>=0.2.14", "langchain==0.3.27", + "langchain-chroma==0.2.5", + "langchain-core==0.3.72", + "langchain-openai==0.3.28", + "langchain-text-splitters==0.3.9", # Class Extraction "pydantic", From 79028e396ddb8b8f1c64e911cc60ecf369857110 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 4 Sep 2025 13:37:28 +0300 Subject: [PATCH 57/59] stash agent test --- dimos/agents2/test_stash_agent.py | 62 +++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 dimos/agents2/test_stash_agent.py diff --git a/dimos/agents2/test_stash_agent.py b/dimos/agents2/test_stash_agent.py new file mode 100644 index 0000000000..715e24b513 --- /dev/null +++ b/dimos/agents2/test_stash_agent.py @@ -0,0 +1,62 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from dimos.agents2.agent import Agent +from dimos.core import start +from dimos.protocol.skill.test_coordinator import SkillContainerTest + + +@pytest.mark.tool +@pytest.mark.asyncio +async def test_agent_init(): + system_prompt = ( + "Your name is Mr. Potato, potatoes are bad at math. Use a tools if asked to calculate" + ) + + # # Uncomment the following lines to use a dimos module system + # dimos = start(2) + # testcontainer = dimos.deploy(SkillContainerTest) + # agent = Agent(system_prompt=system_prompt) + + ## uncomment the following lines to run agents in a main loop without a module system + testcontainer = SkillContainerTest() + agent = Agent(system_prompt=system_prompt) + + agent.register_skills(testcontainer) + agent.start() + + agent.run_implicit_skill("uptime_seconds") + + await agent.query_async( + "hi there, please tell me what's your name and current date, and how much is 124181112 + 124124?" + ) + + # agent loop is considered finished once no active skills remain, + # agent will stop it's loop if passive streams are active + print("Agent loop finished, asking about camera") + + # we query again (this shows subsequent querying, but we could have asked for camera image in the original query, + # it all runs in parallel, and agent might get called once or twice depending on timing of skill responses) + # await agent.query_async("tell me what you see on the camera?") + + # you can run skillspy and agentspy in parallel with this test for a better observation of what's happening + await agent.query_async("tell me exactly everything we've talked about until now") + + print("Agent loop finished") + + agent.stop() + testcontainer.stop() + dimos.stop() From 48e2d2e891448aec364e919618cbfc6614a10b6a Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 4 Sep 2025 14:31:26 +0300 Subject: [PATCH 58/59] state reset between subsequent calls --- dimos/agents2/agent.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 0a55a77d44..d5d3ce53e4 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -201,6 +201,7 @@ def run_implicit_skill(self, skill_name: str, *args, **kwargs) -> None: self.coordinator.call_skill(False, skill_name, {"args": args, "kwargs": kwargs}) async def agent_loop(self, seed_query: str = ""): + self.state_messages = [] self.append_history(HumanMessage(seed_query)) try: From 4fc8f8efab133412dc24e6719a94c0a69ecd8d61 Mon Sep 17 00:00:00 2001 From: lesh Date: Thu, 4 Sep 2025 22:37:40 +0300 Subject: [PATCH 59/59] loose langchain versions --- pyproject.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6064ee6680..390cd94de4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,11 +52,11 @@ dependencies = [ "uvicorn>=0.34.0", # Agents - "langchain==0.3.27", - "langchain-chroma==0.2.5", - "langchain-core==0.3.72", - "langchain-openai==0.3.28", - "langchain-text-splitters==0.3.9", + "langchain>=0.3.27", + "langchain-chroma>=0.2.5", + "langchain-core>=0.3.72", + "langchain-openai>=0.3.28", + "langchain-text-splitters>=0.3.9", # Class Extraction "pydantic",