diff --git a/dimos/agents/agent_ctransformers_gguf.py b/dimos/agents/agent_ctransformers_gguf.py deleted file mode 100644 index 17d233437d..0000000000 --- a/dimos/agents/agent_ctransformers_gguf.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -# Standard library imports -import logging -import os -from typing import TYPE_CHECKING, Any - -# Third-party imports -from dotenv import load_dotenv -from reactivex import Observable, create -import torch - -# Local imports -from dimos.agents.agent import LLMAgent -from dimos.agents.prompt_builder.impl import PromptBuilder -from dimos.utils.logging_config import setup_logger - -# Initialize environment variables -load_dotenv() - -# Initialize logger for the agent module -logger = setup_logger("dimos.agents", level=logging.DEBUG) - -from ctransformers import AutoModelForCausalLM as CTransformersModel - -if TYPE_CHECKING: - from reactivex.scheduler import ThreadPoolScheduler - from reactivex.subject import Subject - - from dimos.agents.memory.base import AbstractAgentSemanticMemory - - -class CTransformersTokenizerAdapter: - def __init__(self, model) -> None: - self.model = model - - def encode(self, text: str, **kwargs): - return self.model.tokenize(text) - - def decode(self, token_ids, **kwargs): - return self.model.detokenize(token_ids) - - def token_count(self, text: str): - return len(self.tokenize_text(text)) if text else 0 - - def tokenize_text(self, text: str): - return self.model.tokenize(text) - - def detokenize_text(self, tokenized_text): - try: - return self.model.detokenize(tokenized_text) - except Exception as e: - raise ValueError(f"Failed to detokenize text. Error: {e!s}") - - def apply_chat_template( - self, conversation, tokenize: bool = False, add_generation_prompt: bool = True - ): - prompt = "" - for message in conversation: - role = message["role"] - content = message["content"] - if role == "system": - prompt += f"<|system|>\n{content}\n" - elif role == "user": - prompt += f"<|user|>\n{content}\n" - elif role == "assistant": - prompt += f"<|assistant|>\n{content}\n" - if add_generation_prompt: - prompt += "<|assistant|>\n" - return prompt - - -# CTransformers Agent Class -class CTransformersGGUFAgent(LLMAgent): - def __init__( - self, - dev_name: str, - agent_type: str = "HF-LLM", - model_name: str = "TheBloke/Llama-2-7B-GGUF", - model_file: str = "llama-2-7b.Q4_K_M.gguf", - model_type: str = "llama", - gpu_layers: int = 50, - device: str = "auto", - query: str = "How many r's are in the word 'strawberry'?", - input_query_stream: Observable | None = None, - input_video_stream: Observable | None = None, - output_dir: str = os.path.join(os.getcwd(), "assets", "agent"), - agent_memory: AbstractAgentSemanticMemory | None = None, - system_query: str | None = "You are a helpful assistant.", - max_output_tokens_per_request: int = 10, - max_input_tokens_per_request: int = 250, - prompt_builder: PromptBuilder | None = None, - pool_scheduler: ThreadPoolScheduler | None = None, - process_all_inputs: bool | None = None, - ) -> None: - # Determine appropriate default for process_all_inputs if not provided - if process_all_inputs is None: - # Default to True for text queries, False for video streams - if input_query_stream is not None and input_video_stream is None: - process_all_inputs = True - else: - process_all_inputs = False - - super().__init__( - dev_name=dev_name, - agent_type=agent_type, - agent_memory=agent_memory, - pool_scheduler=pool_scheduler, - process_all_inputs=process_all_inputs, - system_query=system_query, - max_output_tokens_per_request=max_output_tokens_per_request, - max_input_tokens_per_request=max_input_tokens_per_request, - ) - - self.query = query - self.output_dir = output_dir - os.makedirs(self.output_dir, exist_ok=True) - - self.model_name = model_name - self.device = device - if self.device == "auto": - self.device = "cuda" if torch.cuda.is_available() else "cpu" - if self.device == "cuda": - print(f"Using GPU: {torch.cuda.get_device_name(0)}") - else: - print("GPU not available, using CPU") - print(f"Device: {self.device}") - - self.model = CTransformersModel.from_pretrained( - model_name, model_file=model_file, model_type=model_type, gpu_layers=gpu_layers - ) - - self.tokenizer = CTransformersTokenizerAdapter(self.model) - - self.prompt_builder = prompt_builder or PromptBuilder( - self.model_name, tokenizer=self.tokenizer - ) - - self.max_output_tokens_per_request = max_output_tokens_per_request - - # self.stream_query(self.query).subscribe(lambda x: print(x)) - - self.input_video_stream = input_video_stream - self.input_query_stream = input_query_stream - - # Ensure only one input stream is provided. - if self.input_video_stream is not None and self.input_query_stream is not None: - raise ValueError( - "More than one input stream provided. Please provide only one input stream." - ) - - if self.input_video_stream is not None: - logger.info("Subscribing to input video stream...") - self.disposables.add(self.subscribe_to_image_processing(self.input_video_stream)) - if self.input_query_stream is not None: - logger.info("Subscribing to input query stream...") - self.disposables.add(self.subscribe_to_query_processing(self.input_query_stream)) - - def _send_query(self, messages: list) -> Any: - try: - _BLUE_PRINT_COLOR: str = "\033[34m" - _RESET_COLOR: str = "\033[0m" - - # === FIX: Flatten message content === - flat_messages = [] - for msg in messages: - role = msg["role"] - content = msg["content"] - if isinstance(content, list): - # Assume it's a list of {'type': 'text', 'text': ...} - text_parts = [c["text"] for c in content if isinstance(c, dict) and "text" in c] - content = " ".join(text_parts) - flat_messages.append({"role": role, "content": content}) - - print(f"{_BLUE_PRINT_COLOR}Messages: {flat_messages}{_RESET_COLOR}") - - print("Applying chat template...") - prompt_text = self.tokenizer.apply_chat_template( - conversation=flat_messages, tokenize=False, add_generation_prompt=True - ) - print("Chat template applied.") - print(f"Prompt text:\n{prompt_text}") - - response = self.model(prompt_text, max_new_tokens=self.max_output_tokens_per_request) - print("Model response received.") - return response - - except Exception as e: - logger.error(f"Error during HuggingFace query: {e}") - return "Error processing request." - - def stream_query(self, query_text: str) -> Subject: - """ - Creates an observable that processes a text query and emits the response. - """ - return create( - lambda observer, _: self._observable_query(observer, incoming_query=query_text) - ) - - -# endregion HuggingFaceLLMAgent Subclass (HuggingFace-Specific Implementation) diff --git a/dimos/agents/agent_huggingface_local.py b/dimos/agents/agent_huggingface_local.py deleted file mode 100644 index 69d02bb1d2..0000000000 --- a/dimos/agents/agent_huggingface_local.py +++ /dev/null @@ -1,240 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -# Standard library imports -import logging -import os -from typing import TYPE_CHECKING, Any - -# Third-party imports -from dotenv import load_dotenv -from reactivex import Observable, create -import torch -from transformers import AutoModelForCausalLM - -# Local imports -from dimos.agents.agent import LLMAgent -from dimos.agents.memory.chroma_impl import LocalSemanticMemory -from dimos.agents.prompt_builder.impl import PromptBuilder -from dimos.agents.tokenizer.huggingface_tokenizer import HuggingFaceTokenizer -from dimos.utils.logging_config import setup_logger - -if TYPE_CHECKING: - from reactivex.scheduler import ThreadPoolScheduler - from reactivex.subject import Subject - - from dimos.agents.memory.base import AbstractAgentSemanticMemory - from dimos.agents.tokenizer.base import AbstractTokenizer - -# Initialize environment variables -load_dotenv() - -# Initialize logger for the agent module -logger = setup_logger("dimos.agents", level=logging.DEBUG) - - -# HuggingFaceLLMAgent Class -class HuggingFaceLocalAgent(LLMAgent): - def __init__( - self, - dev_name: str, - agent_type: str = "HF-LLM", - model_name: str = "Qwen/Qwen2.5-3B", - device: str = "auto", - query: str = "How many r's are in the word 'strawberry'?", - input_query_stream: Observable | None = None, - input_video_stream: Observable | None = None, - output_dir: str = os.path.join(os.getcwd(), "assets", "agent"), - agent_memory: AbstractAgentSemanticMemory | None = None, - system_query: str | None = None, - max_output_tokens_per_request: int | None = None, - max_input_tokens_per_request: int | None = None, - prompt_builder: PromptBuilder | None = None, - tokenizer: AbstractTokenizer | None = None, - image_detail: str = "low", - pool_scheduler: ThreadPoolScheduler | None = None, - process_all_inputs: bool | None = None, - ) -> None: - # Determine appropriate default for process_all_inputs if not provided - if process_all_inputs is None: - # Default to True for text queries, False for video streams - if input_query_stream is not None and input_video_stream is None: - process_all_inputs = True - else: - process_all_inputs = False - - super().__init__( - dev_name=dev_name, - agent_type=agent_type, - agent_memory=agent_memory or LocalSemanticMemory(), - pool_scheduler=pool_scheduler, - process_all_inputs=process_all_inputs, - system_query=system_query, - ) - - self.query = query - self.output_dir = output_dir - os.makedirs(self.output_dir, exist_ok=True) - - self.model_name = model_name - self.device = device - if self.device == "auto": - self.device = "cuda" if torch.cuda.is_available() else "cpu" - if self.device == "cuda": - print(f"Using GPU: {torch.cuda.get_device_name(0)}") - else: - print("GPU not available, using CPU") - print(f"Device: {self.device}") - - self.tokenizer = tokenizer or HuggingFaceTokenizer(self.model_name) - - self.prompt_builder = prompt_builder or PromptBuilder( - self.model_name, tokenizer=self.tokenizer - ) - - self.model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, - device_map=self.device, - ) - - self.max_output_tokens_per_request = max_output_tokens_per_request - - # self.stream_query(self.query).subscribe(lambda x: print(x)) - - self.input_video_stream = input_video_stream - self.input_query_stream = input_query_stream - - # Ensure only one input stream is provided. - if self.input_video_stream is not None and self.input_query_stream is not None: - raise ValueError( - "More than one input stream provided. Please provide only one input stream." - ) - - if self.input_video_stream is not None: - logger.info("Subscribing to input video stream...") - self.disposables.add(self.subscribe_to_image_processing(self.input_video_stream)) - if self.input_query_stream is not None: - logger.info("Subscribing to input query stream...") - self.disposables.add(self.subscribe_to_query_processing(self.input_query_stream)) - - def _send_query(self, messages: list) -> Any: - _BLUE_PRINT_COLOR: str = "\033[34m" - _RESET_COLOR: str = "\033[0m" - - try: - # Log the incoming messages - print(f"{_BLUE_PRINT_COLOR}Messages: {messages!s}{_RESET_COLOR}") - - # Process with chat template - try: - print("Applying chat template...") - prompt_text = self.tokenizer.tokenizer.apply_chat_template( - conversation=[{"role": "user", "content": str(messages)}], - tokenize=False, - add_generation_prompt=True, - ) - print("Chat template applied.") - - # Tokenize the prompt - print("Preparing model inputs...") - model_inputs = self.tokenizer.tokenizer([prompt_text], return_tensors="pt").to( - self.model.device - ) - print("Model inputs prepared.") - - # Generate the response - print("Generating response...") - generated_ids = self.model.generate( - **model_inputs, max_new_tokens=self.max_output_tokens_per_request - ) - - # Extract the generated tokens (excluding the input prompt tokens) - print("Processing generated output...") - generated_ids = [ - output_ids[len(input_ids) :] - for input_ids, output_ids in zip( - model_inputs.input_ids, generated_ids, strict=False - ) - ] - - # Convert tokens back to text - response = self.tokenizer.tokenizer.batch_decode( - generated_ids, skip_special_tokens=True - )[0] - print("Response successfully generated.") - - return response - - except AttributeError as e: - # Handle case where tokenizer doesn't have the expected methods - logger.warning(f"Chat template not available: {e}. Using simple format.") - # Continue with execution and use simple format - - except Exception as e: - # Log any other errors but continue execution - logger.warning( - f"Error in chat template processing: {e}. Falling back to simple format." - ) - - # Fallback approach for models without chat template support - # This code runs if the try block above raises an exception - print("Using simple prompt format...") - - # Convert messages to a simple text format - if ( - isinstance(messages, list) - and messages - and isinstance(messages[0], dict) - and "content" in messages[0] - ): - prompt_text = messages[0]["content"] - else: - prompt_text = str(messages) - - # Tokenize the prompt - model_inputs = self.tokenizer.tokenize_text(prompt_text) - model_inputs = torch.tensor([model_inputs], device=self.model.device) - - # Generate the response - generated_ids = self.model.generate( - input_ids=model_inputs, max_new_tokens=self.max_output_tokens_per_request - ) - - # Extract the generated tokens - generated_ids = generated_ids[0][len(model_inputs[0]) :] - - # Convert tokens back to text - response = self.tokenizer.detokenize_text(generated_ids.tolist()) - print("Response generated using simple format.") - - return response - - except Exception as e: - # Catch all other errors - logger.error(f"Error during query processing: {e}", exc_info=True) - return "Error processing request. Please try again." - - def stream_query(self, query_text: str) -> Subject: - """ - Creates an observable that processes a text query and emits the response. - """ - return create( - lambda observer, _: self._observable_query(observer, incoming_query=query_text) - ) - - -# endregion HuggingFaceLLMAgent Subclass (HuggingFace-Specific Implementation) diff --git a/dimos/agents/agent_huggingface_remote.py b/dimos/agents/agent_huggingface_remote.py deleted file mode 100644 index 5bb5b293d3..0000000000 --- a/dimos/agents/agent_huggingface_remote.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -# Standard library imports -import logging -import os -from typing import TYPE_CHECKING, Any - -# Third-party imports -from dotenv import load_dotenv -from huggingface_hub import InferenceClient -from reactivex import Observable, create - -# Local imports -from dimos.agents.agent import LLMAgent -from dimos.agents.prompt_builder.impl import PromptBuilder -from dimos.agents.tokenizer.huggingface_tokenizer import HuggingFaceTokenizer -from dimos.utils.logging_config import setup_logger - -if TYPE_CHECKING: - from reactivex.scheduler import ThreadPoolScheduler - from reactivex.subject import Subject - - from dimos.agents.memory.base import AbstractAgentSemanticMemory - from dimos.agents.tokenizer.base import AbstractTokenizer - -# Initialize environment variables -load_dotenv() - -# Initialize logger for the agent module -logger = setup_logger("dimos.agents", level=logging.DEBUG) - - -# HuggingFaceLLMAgent Class -class HuggingFaceRemoteAgent(LLMAgent): - def __init__( - self, - dev_name: str, - agent_type: str = "HF-LLM", - model_name: str = "Qwen/QwQ-32B", - query: str = "How many r's are in the word 'strawberry'?", - input_query_stream: Observable | None = None, - input_video_stream: Observable | None = None, - output_dir: str = os.path.join(os.getcwd(), "assets", "agent"), - agent_memory: AbstractAgentSemanticMemory | None = None, - system_query: str | None = None, - max_output_tokens_per_request: int = 16384, - prompt_builder: PromptBuilder | None = None, - tokenizer: AbstractTokenizer | None = None, - image_detail: str = "low", - pool_scheduler: ThreadPoolScheduler | None = None, - process_all_inputs: bool | None = None, - api_key: str | None = None, - hf_provider: str | None = None, - hf_base_url: str | None = None, - ) -> None: - # Determine appropriate default for process_all_inputs if not provided - if process_all_inputs is None: - # Default to True for text queries, False for video streams - if input_query_stream is not None and input_video_stream is None: - process_all_inputs = True - else: - process_all_inputs = False - - super().__init__( - dev_name=dev_name, - agent_type=agent_type, - agent_memory=agent_memory, - pool_scheduler=pool_scheduler, - process_all_inputs=process_all_inputs, - system_query=system_query, - ) - - self.query = query - self.output_dir = output_dir - os.makedirs(self.output_dir, exist_ok=True) - - self.model_name = model_name - self.prompt_builder = prompt_builder or PromptBuilder( - self.model_name, tokenizer=tokenizer or HuggingFaceTokenizer(self.model_name) - ) - - self.model_name = model_name - - self.max_output_tokens_per_request = max_output_tokens_per_request - - self.api_key = api_key or os.getenv("HF_TOKEN") - self.provider = hf_provider or "hf-inference" - self.base_url = hf_base_url or os.getenv("HUGGINGFACE_PRV_ENDPOINT") - self.client = InferenceClient( - provider=self.provider, - base_url=self.base_url, - api_key=self.api_key, - ) - - # self.stream_query(self.query).subscribe(lambda x: print(x)) - - self.input_video_stream = input_video_stream - self.input_query_stream = input_query_stream - - # Ensure only one input stream is provided. - if self.input_video_stream is not None and self.input_query_stream is not None: - raise ValueError( - "More than one input stream provided. Please provide only one input stream." - ) - - if self.input_video_stream is not None: - logger.info("Subscribing to input video stream...") - self.disposables.add(self.subscribe_to_image_processing(self.input_video_stream)) - if self.input_query_stream is not None: - logger.info("Subscribing to input query stream...") - self.disposables.add(self.subscribe_to_query_processing(self.input_query_stream)) - - def _send_query(self, messages: list) -> Any: - try: - completion = self.client.chat.completions.create( - model=self.model_name, - messages=messages, - max_tokens=self.max_output_tokens_per_request, - ) - - return completion.choices[0].message - except Exception as e: - logger.error(f"Error during HuggingFace query: {e}") - return "Error processing request." - - def stream_query(self, query_text: str) -> Subject: - """ - Creates an observable that processes a text query and emits the response. - """ - return create( - lambda observer, _: self._observable_query(observer, incoming_query=query_text) - ) diff --git a/dimos/agents/cerebras_agent.py b/dimos/agents/cerebras_agent.py deleted file mode 100644 index e58de812d0..0000000000 --- a/dimos/agents/cerebras_agent.py +++ /dev/null @@ -1,613 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Cerebras agent implementation for the DIMOS agent framework. - -This module provides a CerebrasAgent class that implements the LLMAgent interface -for Cerebras inference API using the official Cerebras Python SDK. -""" - -from __future__ import annotations - -import copy -import json -import os -import threading -import time -from typing import TYPE_CHECKING - -from cerebras.cloud.sdk import Cerebras -from dotenv import load_dotenv - -# Local imports -from dimos.agents.agent import LLMAgent -from dimos.agents.prompt_builder.impl import PromptBuilder -from dimos.agents.tokenizer.openai_tokenizer import OpenAITokenizer -from dimos.skills.skills import AbstractSkill, SkillLibrary -from dimos.utils.logging_config import setup_logger - -if TYPE_CHECKING: - from pydantic import BaseModel - from reactivex import Observable - from reactivex.observer import Observer - from reactivex.scheduler import ThreadPoolScheduler - - from dimos.agents.memory.base import AbstractAgentSemanticMemory - from dimos.agents.tokenizer.base import AbstractTokenizer - from dimos.stream.frame_processor import FrameProcessor - -# Initialize environment variables -load_dotenv() - -# Initialize logger for the Cerebras agent -logger = setup_logger("dimos.agents.cerebras") - - -# Response object compatible with LLMAgent -class CerebrasResponseMessage(dict): - def __init__( - self, - content: str = "", - tool_calls=None, - ) -> None: - self.content = content - self.tool_calls = tool_calls or [] - self.parsed = None - - # Initialize as dict with the proper structure - super().__init__(self.to_dict()) - - def __str__(self) -> str: - # Return a string representation for logging - if self.content: - return self.content - elif self.tool_calls: - # Return JSON representation of the first tool call - if self.tool_calls: - tool_call = self.tool_calls[0] - tool_json = { - "name": tool_call.function.name, - "arguments": json.loads(tool_call.function.arguments), - } - return json.dumps(tool_json) - return "[No content]" - - def to_dict(self): - """Convert to dictionary format for JSON serialization.""" - result = {"role": "assistant", "content": self.content or ""} - - if self.tool_calls: - result["tool_calls"] = [] - for tool_call in self.tool_calls: - result["tool_calls"].append( - { - "id": tool_call.id, - "type": "function", - "function": { - "name": tool_call.function.name, - "arguments": tool_call.function.arguments, - }, - } - ) - - return result - - -class CerebrasAgent(LLMAgent): - """Cerebras agent implementation using the official Cerebras Python SDK. - - This class implements the _send_query method to interact with Cerebras API - using their official SDK, allowing most of the LLMAgent logic to be reused. - """ - - def __init__( - self, - dev_name: str, - agent_type: str = "Vision", - query: str = "What do you see?", - input_query_stream: Observable | None = None, - input_video_stream: Observable | None = None, - input_data_stream: Observable | None = None, - output_dir: str = os.path.join(os.getcwd(), "assets", "agent"), - agent_memory: AbstractAgentSemanticMemory | None = None, - system_query: str | None = None, - max_input_tokens_per_request: int = 128000, - max_output_tokens_per_request: int = 16384, - model_name: str = "llama-4-scout-17b-16e-instruct", - skills: AbstractSkill | list[AbstractSkill] | SkillLibrary | None = None, - response_model: BaseModel | None = None, - frame_processor: FrameProcessor | None = None, - image_detail: str = "low", - pool_scheduler: ThreadPoolScheduler | None = None, - process_all_inputs: bool | None = None, - tokenizer: AbstractTokenizer | None = None, - prompt_builder: PromptBuilder | None = None, - ) -> None: - """ - Initializes a new instance of the CerebrasAgent. - - Args: - dev_name (str): The device name of the agent. - agent_type (str): The type of the agent. - query (str): The default query text. - input_query_stream (Observable): An observable for query input. - input_video_stream (Observable): An observable for video frames. - input_data_stream (Observable): An observable for data input. - output_dir (str): Directory for output files. - agent_memory (AbstractAgentSemanticMemory): The memory system. - system_query (str): The system prompt to use with RAG context. - max_input_tokens_per_request (int): Maximum tokens for input. - max_output_tokens_per_request (int): Maximum tokens for output. - model_name (str): The Cerebras model name to use. Available options: - - llama-4-scout-17b-16e-instruct (default, fastest) - - llama3.1-8b - - llama-3.3-70b - - qwen-3-32b - - deepseek-r1-distill-llama-70b (private preview) - skills (Union[AbstractSkill, List[AbstractSkill], SkillLibrary]): Skills available to the agent. - response_model (BaseModel): Optional Pydantic model for structured responses. - frame_processor (FrameProcessor): Custom frame processor. - image_detail (str): Detail level for images ("low", "high", "auto"). - pool_scheduler (ThreadPoolScheduler): The scheduler to use for thread pool operations. - process_all_inputs (bool): Whether to process all inputs or skip when busy. - tokenizer (AbstractTokenizer): The tokenizer for the agent. - prompt_builder (PromptBuilder): The prompt builder for the agent. - """ - # Determine appropriate default for process_all_inputs if not provided - if process_all_inputs is None: - # Default to True for text queries, False for video streams - if input_query_stream is not None and input_video_stream is None: - process_all_inputs = True - else: - process_all_inputs = False - - super().__init__( - dev_name=dev_name, - agent_type=agent_type, - agent_memory=agent_memory, - pool_scheduler=pool_scheduler, - process_all_inputs=process_all_inputs, - system_query=system_query, - input_query_stream=input_query_stream, - input_video_stream=input_video_stream, - input_data_stream=input_data_stream, - ) - - # Initialize Cerebras client - self.client = Cerebras() - - self.query = query - self.output_dir = output_dir - os.makedirs(self.output_dir, exist_ok=True) - - # Initialize conversation history for multi-turn conversations - self.conversation_history = [] - self._history_lock = threading.Lock() - - # Configure skills - self.skills = skills - self.skill_library = None - if isinstance(self.skills, SkillLibrary): - self.skill_library = self.skills - elif isinstance(self.skills, list): - self.skill_library = SkillLibrary() - for skill in self.skills: - self.skill_library.add(skill) - elif isinstance(self.skills, AbstractSkill): - self.skill_library = SkillLibrary() - self.skill_library.add(self.skills) - - self.response_model = response_model - self.model_name = model_name - self.image_detail = image_detail - self.max_output_tokens_per_request = max_output_tokens_per_request - self.max_input_tokens_per_request = max_input_tokens_per_request - self.max_tokens_per_request = max_input_tokens_per_request + max_output_tokens_per_request - - # Add static context to memory. - self._add_context_to_memory() - - # Initialize tokenizer and prompt builder - self.tokenizer = tokenizer or OpenAITokenizer( - model_name="gpt-4o" - ) # Use GPT-4 tokenizer for better accuracy - self.prompt_builder = prompt_builder or PromptBuilder( - model_name=self.model_name, - max_tokens=self.max_input_tokens_per_request, - tokenizer=self.tokenizer, - ) - - logger.info("Cerebras Agent Initialized.") - - def _add_context_to_memory(self) -> None: - """Adds initial context to the agent's memory.""" - context_data = [ - ( - "id0", - "Optical Flow is a technique used to track the movement of objects in a video sequence.", - ), - ( - "id1", - "Edge Detection is a technique used to identify the boundaries of objects in an image.", - ), - ("id2", "Video is a sequence of frames captured at regular intervals."), - ( - "id3", - "Colors in Optical Flow are determined by the movement of light, and can be used to track the movement of objects.", - ), - ( - "id4", - "Json is a data interchange format that is easy for humans to read and write, and easy for machines to parse and generate.", - ), - ] - for doc_id, text in context_data: - self.agent_memory.add_vector(doc_id, text) - - def _build_prompt( - self, - messages: list, - base64_image: str | list[str] | None = None, - dimensions: tuple[int, int] | None = None, - override_token_limit: bool = False, - condensed_results: str = "", - ) -> list: - """Builds a prompt message specifically for Cerebras API. - - Args: - messages (list): Existing messages list to build upon. - base64_image (Union[str, List[str]]): Optional Base64-encoded image(s). - dimensions (Tuple[int, int]): Optional image dimensions. - override_token_limit (bool): Whether to override token limits. - condensed_results (str): The condensed RAG context. - - Returns: - list: Messages formatted for Cerebras API. - """ - # Add system message if provided and not already in history - if self.system_query and (not messages or messages[0].get("role") != "system"): - messages.insert(0, {"role": "system", "content": self.system_query}) - logger.info("Added system message to conversation") - - # Append user query while handling RAG - if condensed_results: - user_message = {"role": "user", "content": f"{condensed_results}\n\n{self.query}"} - logger.info("Created user message with RAG context") - else: - user_message = {"role": "user", "content": self.query} - - messages.append(user_message) - - if base64_image is not None: - # Handle both single image (str) and multiple images (List[str]) - images = [base64_image] if isinstance(base64_image, str) else base64_image - - # For Cerebras, we'll add images inline with text (OpenAI-style format) - for img in images: - img_content = [ - {"type": "text", "text": "Here is an image to analyze:"}, - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{img}", - "detail": self.image_detail, - }, - }, - ] - messages.append({"role": "user", "content": img_content}) - - logger.info(f"Added {len(images)} image(s) to conversation") - - # Use new truncation function - messages = self._truncate_messages(messages, override_token_limit) - - return messages - - def _truncate_messages(self, messages: list, override_token_limit: bool = False) -> list: - """Truncate messages if total tokens exceed 16k using existing truncate_tokens method. - - Args: - messages (list): List of message dictionaries - override_token_limit (bool): Whether to skip truncation - - Returns: - list: Messages with content truncated if needed - """ - if override_token_limit: - return messages - - total_tokens = 0 - for message in messages: - if isinstance(message.get("content"), str): - total_tokens += self.prompt_builder.tokenizer.token_count(message["content"]) - elif isinstance(message.get("content"), list): - for item in message["content"]: - if item.get("type") == "text": - total_tokens += self.prompt_builder.tokenizer.token_count(item["text"]) - elif item.get("type") == "image_url": - total_tokens += 85 - - if total_tokens > 16000: - excess_tokens = total_tokens - 16000 - current_tokens = total_tokens - - # Start from oldest messages and truncate until under 16k - for i in range(len(messages)): - if current_tokens <= 16000: - break - - msg = messages[i] - if msg.get("role") == "system": - continue - - if isinstance(msg.get("content"), str): - original_tokens = self.prompt_builder.tokenizer.token_count(msg["content"]) - # Calculate how much to truncate from this message - tokens_to_remove = min(excess_tokens, original_tokens // 3) - new_max_tokens = max(50, original_tokens - tokens_to_remove) - - msg["content"] = self.prompt_builder.truncate_tokens( - msg["content"], new_max_tokens, "truncate_end" - ) - - new_tokens = self.prompt_builder.tokenizer.token_count(msg["content"]) - tokens_saved = original_tokens - new_tokens - current_tokens -= tokens_saved - excess_tokens -= tokens_saved - - logger.info( - f"Truncated older messages using truncate_tokens, final tokens: {current_tokens}" - ) - else: - logger.info(f"No truncation needed, total tokens: {total_tokens}") - - return messages - - def clean_cerebras_schema(self, schema: dict) -> dict: - """Simple schema cleaner that removes unsupported fields for Cerebras API.""" - if not isinstance(schema, dict): - return schema - - # Removing the problematic fields that pydantic generates - cleaned = {} - unsupported_fields = { - "minItems", - "maxItems", - "uniqueItems", - "exclusiveMinimum", - "exclusiveMaximum", - "minimum", - "maximum", - } - - for key, value in schema.items(): - if key in unsupported_fields: - continue # Skip unsupported fields - elif isinstance(value, dict): - cleaned[key] = self.clean_cerebras_schema(value) - elif isinstance(value, list): - cleaned[key] = [ - self.clean_cerebras_schema(item) if isinstance(item, dict) else item - for item in value - ] - else: - cleaned[key] = value - - return cleaned - - def create_tool_call( - self, - name: str | None = None, - arguments: dict | None = None, - call_id: str | None = None, - content: str | None = None, - ): - """Create a tool call object from either direct parameters or JSON content.""" - # If content is provided, parse it as JSON - if content: - logger.info(f"Creating tool call from content: {content}") - try: - content_json = json.loads(content) - if ( - isinstance(content_json, dict) - and "name" in content_json - and "arguments" in content_json - ): - name = content_json["name"] - arguments = content_json["arguments"] - else: - return None - except json.JSONDecodeError: - logger.warning("Content appears to be JSON but failed to parse") - return None - - # Create the tool call object - if name and arguments is not None: - timestamp = int(time.time() * 1000000) # microsecond precision - tool_id = f"call_{timestamp}" - - logger.info(f"Creating tool call with timestamp ID: {tool_id}") - return type( - "ToolCall", - (), - { - "id": tool_id, - "function": type( - "Function", (), {"name": name, "arguments": json.dumps(arguments)} - ), - }, - ) - - return None - - def _send_query(self, messages: list) -> CerebrasResponseMessage: - """Sends the query to Cerebras API using the official Cerebras SDK. - - Args: - messages (list): The prompt messages to send. - - Returns: - The response message from Cerebras wrapped in our CerebrasResponseMessage class. - - Raises: - Exception: If no response message is returned from the API. - ConnectionError: If there's an issue connecting to the API. - ValueError: If the messages or other parameters are invalid. - """ - try: - # Prepare API call parameters - api_params = { - "model": self.model_name, - "messages": messages, - # "max_tokens": self.max_output_tokens_per_request, - } - - # Add tools if available - if self.skill_library and self.skill_library.get_tools(): - tools = self.skill_library.get_tools() - for tool in tools: - if "function" in tool and "parameters" in tool["function"]: - tool["function"]["parameters"] = self.clean_cerebras_schema( - tool["function"]["parameters"] - ) - api_params["tools"] = tools - api_params["tool_choice"] = "auto" - - if self.response_model is not None: - api_params["response_format"] = { - "type": "json_object", - "schema": self.response_model, - } - - # Make the API call - response = self.client.chat.completions.create(**api_params) - - raw_message = response.choices[0].message - if raw_message is None: - logger.error("Response message does not exist.") - raise Exception("Response message does not exist.") - - # Process response into final format - content = raw_message.content - tool_calls = getattr(raw_message, "tool_calls", None) - - # If no structured tool calls from API, try parsing content as JSON tool call - if not tool_calls and content and content.strip().startswith("{"): - parsed_tool_call = self.create_tool_call(content=content) - if parsed_tool_call: - tool_calls = [parsed_tool_call] - content = None - - return CerebrasResponseMessage(content=content, tool_calls=tool_calls) - - except ConnectionError as ce: - logger.error(f"Connection error with Cerebras API: {ce}") - raise - except ValueError as ve: - logger.error(f"Invalid parameters for Cerebras API: {ve}") - raise - except Exception as e: - # Print the raw API parameters when an error occurs - logger.error(f"Raw API parameters: {json.dumps(api_params, indent=2)}") - logger.error(f"Unexpected error in Cerebras API call: {e}") - raise - - def _observable_query( - self, - observer: Observer, - base64_image: str | None = None, - dimensions: tuple[int, int] | None = None, - override_token_limit: bool = False, - incoming_query: str | None = None, - reset_conversation: bool = False, - ): - """Main query handler that manages conversation history and Cerebras interactions. - - This method follows ClaudeAgent's pattern for efficient conversation history management. - - Args: - observer (Observer): The observer to emit responses to. - base64_image (str): Optional Base64-encoded image. - dimensions (Tuple[int, int]): Optional image dimensions. - override_token_limit (bool): Whether to override token limits. - incoming_query (str): Optional query to update the agent's query. - reset_conversation (bool): Whether to reset the conversation history. - """ - try: - # Reset conversation history if requested - if reset_conversation: - self.conversation_history = [] - logger.info("Conversation history reset") - - # Create a local copy of conversation history and record its length - messages = copy.deepcopy(self.conversation_history) - - # Update query and get context - self._update_query(incoming_query) - _, condensed_results = self._get_rag_context() - - # Build prompt - messages = self._build_prompt( - messages, base64_image, dimensions, override_token_limit, condensed_results - ) - - while True: - logger.info("Sending Query.") - response_message = self._send_query(messages) - logger.info(f"Received Response: {response_message}") - - if response_message is None: - raise Exception("Response message does not exist.") - - # If no skill library or no tool calls, we're done - if ( - self.skill_library is None - or self.skill_library.get_tools() is None - or response_message.tool_calls is None - ): - final_msg = ( - response_message.parsed - if hasattr(response_message, "parsed") and response_message.parsed - else ( - response_message.content - if hasattr(response_message, "content") - else response_message - ) - ) - messages.append(response_message) - break - - logger.info(f"Assistant requested {len(response_message.tool_calls)} tool call(s)") - next_response = self._handle_tooling(response_message, messages) - - if next_response is None: - final_msg = response_message.content or "" - break - - response_message = next_response - - with self._history_lock: - self.conversation_history = messages - logger.info( - f"Updated conversation history (total: {len(self.conversation_history)} messages)" - ) - - # Emit the final message content to the observer - observer.on_next(final_msg) - self.response_subject.on_next(final_msg) - observer.on_completed() - - except Exception as e: - logger.error(f"Query failed in {self.dev_name}: {e}") - observer.on_error(e) - self.response_subject.on_error(e) diff --git a/dimos/agents/planning_agent.py b/dimos/agents/planning_agent.py deleted file mode 100644 index 6dbdbf5866..0000000000 --- a/dimos/agents/planning_agent.py +++ /dev/null @@ -1,318 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from textwrap import dedent -import threading -import time -from typing import Literal - -from pydantic import BaseModel -from reactivex import Observable, operators as ops - -from dimos.agents.agent import OpenAIAgent -from dimos.skills.skills import AbstractSkill -from dimos.utils.logging_config import setup_logger - -logger = setup_logger("dimos.agents.planning_agent") - - -# For response validation -class PlanningAgentResponse(BaseModel): - type: Literal["dialogue", "plan"] - content: list[str] - needs_confirmation: bool - - -class PlanningAgent(OpenAIAgent): - """Agent that plans and breaks down tasks through dialogue. - - This agent specializes in: - 1. Understanding complex tasks through dialogue - 2. Breaking tasks into concrete, executable steps - 3. Refining plans based on user feedback - 4. Streaming individual steps to ExecutionAgents - - The agent maintains conversation state and can refine plans until - the user confirms they are ready to execute. - """ - - def __init__( - self, - dev_name: str = "PlanningAgent", - model_name: str = "gpt-4", - input_query_stream: Observable | None = None, - use_terminal: bool = False, - skills: AbstractSkill | None = None, - ) -> None: - """Initialize the planning agent. - - Args: - dev_name: Name identifier for the agent - model_name: OpenAI model to use - input_query_stream: Observable stream of user queries - use_terminal: Whether to enable terminal input - skills: Available skills/functions for the agent - """ - # Planning state - self.conversation_history = [] - self.current_plan = [] - self.plan_confirmed = False - self.latest_response = None - - # Build system prompt - skills_list = [] - if skills is not None: - skills_list = skills.get_tools() - - system_query = dedent(f""" - You are a Robot planning assistant that helps break down tasks into concrete, executable steps. - Your goal is to: - 1. Break down the task into clear, sequential steps - 2. Refine the plan based on user feedback as needed - 3. Only finalize the plan when the user explicitly confirms - - You have the following skills at your disposal: - {skills_list} - - IMPORTANT: You MUST ALWAYS respond with ONLY valid JSON in the following format, with no additional text or explanation: - {{ - "type": "dialogue" | "plan", - "content": string | list[string], - "needs_confirmation": boolean - }} - - Your goal is to: - 1. Understand the user's task through dialogue - 2. Break it down into clear, sequential steps - 3. Refine the plan based on user feedback - 4. Only finalize the plan when the user explicitly confirms - - For dialogue responses, use: - {{ - "type": "dialogue", - "content": "Your message to the user", - "needs_confirmation": false - }} - - For plan proposals, use: - {{ - "type": "plan", - "content": ["Execute", "Execute", ...], - "needs_confirmation": true - }} - - Remember: ONLY output valid JSON, no other text.""") - - # Initialize OpenAIAgent with our configuration - super().__init__( - dev_name=dev_name, - agent_type="Planning", - query="", # Will be set by process_user_input - model_name=model_name, - input_query_stream=input_query_stream, - system_query=system_query, - max_output_tokens_per_request=1000, - response_model=PlanningAgentResponse, - ) - logger.info("Planning agent initialized") - - # Set up terminal mode if requested - self.use_terminal = use_terminal - use_terminal = False - if use_terminal: - # Start terminal interface in a separate thread - logger.info("Starting terminal interface in a separate thread") - terminal_thread = threading.Thread(target=self.start_terminal_interface, daemon=True) - terminal_thread.start() - - def _handle_response(self, response) -> None: - """Handle the agent's response and update state. - - Args: - response: ParsedChatCompletionMessage containing PlanningAgentResponse - """ - print("handle response", response) - print("handle response type", type(response)) - - # Extract the PlanningAgentResponse from parsed field if available - planning_response = response.parsed if hasattr(response, "parsed") else response - print("planning response", planning_response) - print("planning response type", type(planning_response)) - # Convert to dict for storage in conversation history - response_dict = planning_response.model_dump() - self.conversation_history.append(response_dict) - - # If it's a plan, update current plan - if planning_response.type == "plan": - logger.info(f"Updating current plan: {planning_response.content}") - self.current_plan = planning_response.content - - # Store latest response - self.latest_response = response_dict - - def _stream_plan(self) -> None: - """Stream each step of the confirmed plan.""" - logger.info("Starting to stream plan steps") - logger.debug(f"Current plan: {self.current_plan}") - - for i, step in enumerate(self.current_plan, 1): - logger.info(f"Streaming step {i}: {step}") - # Add a small delay between steps to ensure they're processed - time.sleep(0.5) - try: - self.response_subject.on_next(str(step)) - logger.debug(f"Successfully emitted step {i} to response_subject") - except Exception as e: - logger.error(f"Error emitting step {i}: {e}") - - logger.info("Plan streaming completed") - self.response_subject.on_completed() - - def _send_query(self, messages: list) -> PlanningAgentResponse: - """Send query to OpenAI and parse the response. - - Extends OpenAIAgent's _send_query to handle planning-specific response formats. - - Args: - messages: List of message dictionaries - - Returns: - PlanningAgentResponse: Validated response with type, content, and needs_confirmation - """ - try: - return super()._send_query(messages) - except Exception as e: - logger.error(f"Caught exception in _send_query: {e!s}") - return PlanningAgentResponse( - type="dialogue", content=f"Error: {e!s}", needs_confirmation=False - ) - - def process_user_input(self, user_input: str) -> None: - """Process user input and generate appropriate response. - - Args: - user_input: The user's message - """ - if not user_input: - return - - # Check for plan confirmation - if self.current_plan and user_input.lower() in ["yes", "y", "confirm"]: - logger.info("Plan confirmation received") - self.plan_confirmed = True - # Create a proper PlanningAgentResponse with content as a list - confirmation_msg = PlanningAgentResponse( - type="dialogue", - content="Plan confirmed! Streaming steps to execution...", - needs_confirmation=False, - ) - self._handle_response(confirmation_msg) - self._stream_plan() - return - - # Build messages for OpenAI with conversation history - messages = [ - {"role": "system", "content": self.system_query} # Using system_query from OpenAIAgent - ] - - # Add the new user input to conversation history - self.conversation_history.append({"type": "user_message", "content": user_input}) - - # Add complete conversation history including both user and assistant messages - for msg in self.conversation_history: - if msg["type"] == "user_message": - messages.append({"role": "user", "content": msg["content"]}) - elif msg["type"] == "dialogue": - messages.append({"role": "assistant", "content": msg["content"]}) - elif msg["type"] == "plan": - plan_text = "Here's my proposed plan:\n" + "\n".join( - f"{i + 1}. {step}" for i, step in enumerate(msg["content"]) - ) - messages.append({"role": "assistant", "content": plan_text}) - - # Get and handle response - response = self._send_query(messages) - self._handle_response(response) - - def start_terminal_interface(self) -> None: - """Start the terminal interface for input/output.""" - - time.sleep(5) # buffer time for clean terminal interface printing - print("=" * 50) - print("\nDimOS Action PlanningAgent\n") - print("I have access to your Robot() and Robot Skills()") - print( - "Describe your task and I'll break it down into steps using your skills as a reference." - ) - print("Once you're happy with the plan, type 'yes' to execute it.") - print("Type 'quit' to exit.\n") - - while True: - try: - print("=" * 50) - user_input = input("USER > ") - if user_input.lower() in ["quit", "exit"]: - break - - self.process_user_input(user_input) - - # Display response - if self.latest_response["type"] == "dialogue": - print(f"\nPlanner: {self.latest_response['content']}") - elif self.latest_response["type"] == "plan": - print("\nProposed Plan:") - for i, step in enumerate(self.latest_response["content"], 1): - print(f"{i}. {step}") - if self.latest_response["needs_confirmation"]: - print("\nDoes this plan look good? (yes/no)") - - if self.plan_confirmed: - print("\nPlan confirmed! Streaming steps to execution...") - break - - except KeyboardInterrupt: - print("\nStopping...") - break - except Exception as e: - print(f"\nError: {e}") - break - - def get_response_observable(self) -> Observable: - """Gets an observable that emits responses from this agent. - - This method processes the response stream from the parent class, - extracting content from `PlanningAgentResponse` objects and flattening - any lists of plan steps for emission. - - Returns: - Observable: An observable that emits plan steps from the agent. - """ - - def extract_content(response) -> list[str]: - if isinstance(response, PlanningAgentResponse): - if response.type == "plan": - return response.content # List of steps to be emitted individually - else: # dialogue type - return [response.content] # Wrap single dialogue message in a list - else: - return [str(response)] # Wrap non-PlanningAgentResponse in a list - - # Get base observable from parent class - base_observable = super().get_response_observable() - - # Process the stream: extract content and flatten plan lists - return base_observable.pipe( - ops.map(extract_content), - ops.flat_map(lambda items: items), # Flatten the list of items - ) diff --git a/dimos/agents/test_agent_image_message.py b/dimos/agents/test_agent_image_message.py deleted file mode 100644 index c7f84bcefe..0000000000 --- a/dimos/agents/test_agent_image_message.py +++ /dev/null @@ -1,403 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test BaseAgent with AgentMessage containing images.""" - -import logging -import os - -from dotenv import load_dotenv -import numpy as np -import pytest - -from dimos.agents.agent_message import AgentMessage -from dimos.agents.modules.base import BaseAgent -from dimos.msgs.sensor_msgs import Image -from dimos.msgs.sensor_msgs.Image import ImageFormat -from dimos.utils.logging_config import setup_logger - -logger = setup_logger("test_agent_image_message") -# Enable debug logging for base module -logging.getLogger("dimos.agents.modules.base").setLevel(logging.DEBUG) - - -@pytest.mark.tofix -def test_agent_single_image() -> None: - """Test agent with single image in AgentMessage.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful vision assistant. Describe what you see concisely.", - temperature=0.0, - seed=42, - ) - - # Create AgentMessage with text and single image - msg = AgentMessage() - msg.add_text("What color is this image?") - - # Create a solid red image in RGB format for clarity - red_data = np.zeros((100, 100, 3), dtype=np.uint8) - red_data[:, :, 0] = 255 # R channel (index 0 in RGB) - red_data[:, :, 1] = 0 # G channel (index 1 in RGB) - red_data[:, :, 2] = 0 # B channel (index 2 in RGB) - # Explicitly specify RGB format to avoid confusion - red_img = Image.from_numpy(red_data, format=ImageFormat.RGB) - print(f"[Test] Created image format: {red_img.format}, shape: {red_img.data.shape}") - msg.add_image(red_img) - - # Query - response = agent.query(msg) - print(f"\n[Test] Single image response: '{response.content}'") - - # Verify response - assert response.content is not None - # The model should mention a color or describe the image - response_lower = response.content.lower() - # Accept any color mention since models may see colors differently - color_mentioned = any( - word in response_lower - for word in ["red", "blue", "color", "solid", "image", "shade", "hue"] - ) - assert color_mentioned, f"Expected color description in response, got: {response.content}" - - # Check conversation history - assert agent.conversation.size() == 2 - # User message should have content array - history = agent.conversation.to_openai_format() - user_msg = history[0] - assert user_msg["role"] == "user" - assert isinstance(user_msg["content"], list), "Multimodal message should have content array" - assert len(user_msg["content"]) == 2 # text + image - assert user_msg["content"][0]["type"] == "text" - assert user_msg["content"][0]["text"] == "What color is this image?" - assert user_msg["content"][1]["type"] == "image_url" - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -def test_agent_multiple_images() -> None: - """Test agent with multiple images in AgentMessage.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful vision assistant that compares images.", - temperature=0.0, - seed=42, - ) - - # Create AgentMessage with multiple images - msg = AgentMessage() - msg.add_text("Compare these three images.") - msg.add_text("What are their colors?") - - # Create three different colored images - red_img = Image(data=np.full((50, 50, 3), [255, 0, 0], dtype=np.uint8)) - green_img = Image(data=np.full((50, 50, 3), [0, 255, 0], dtype=np.uint8)) - blue_img = Image(data=np.full((50, 50, 3), [0, 0, 255], dtype=np.uint8)) - - msg.add_image(red_img) - msg.add_image(green_img) - msg.add_image(blue_img) - - # Query - response = agent.query(msg) - - # Verify response acknowledges the images - response_lower = response.content.lower() - # Check if the model is actually seeing the images - if "unable to view" in response_lower or "can't see" in response_lower: - print(f"WARNING: Model not seeing images: {response.content}") - # Still pass the test but note the issue - else: - # If the model can see images, it should mention some colors - colors_mentioned = sum( - 1 - for color in ["red", "green", "blue", "color", "image", "bright", "dark"] - if color in response_lower - ) - assert colors_mentioned >= 1, ( - f"Expected color/image references, found none in: {response.content}" - ) - - # Check history structure - history = agent.conversation.to_openai_format() - user_msg = history[0] - assert user_msg["role"] == "user" - assert isinstance(user_msg["content"], list) - assert len(user_msg["content"]) == 4 # 1 text + 3 images - assert user_msg["content"][0]["type"] == "text" - assert user_msg["content"][0]["text"] == "Compare these three images. What are their colors?" - - # Verify all images are in the message - for i in range(1, 4): - assert user_msg["content"][i]["type"] == "image_url" - assert user_msg["content"][i]["image_url"]["url"].startswith("data:image/jpeg;base64,") - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -def test_agent_image_with_context() -> None: - """Test agent maintaining context with image queries.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful vision assistant with good memory.", - temperature=0.0, - seed=42, - ) - - # First query with image - msg1 = AgentMessage() - msg1.add_text("This is my favorite color.") - msg1.add_text("Remember it.") - - # Create purple image - purple_img = Image(data=np.full((80, 80, 3), [128, 0, 128], dtype=np.uint8)) - msg1.add_image(purple_img) - - response1 = agent.query(msg1) - # The model should acknowledge the color or mention the image - assert any( - word in response1.content.lower() - for word in ["purple", "violet", "color", "image", "magenta"] - ), f"Expected color or image reference in response: {response1.content}" - - # Second query without image, referencing the first - response2 = agent.query("What was my favorite color that I showed you?") - # Check if the model acknowledges the previous conversation - response_lower = response2.content.lower() - logger.info(f"Response: {response2.content}") - assert any( - word in response_lower - for word in ["purple", "violet", "color", "favorite", "showed", "image"] - ), f"Agent should reference previous conversation: {response2.content}" - - # Check conversation history has all messages - assert agent.conversation.size() == 4 - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -def test_agent_mixed_content() -> None: - """Test agent with mixed text-only and image queries.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant that can see images when provided.", - temperature=0.0, - seed=100, - ) - - # Text-only query - response1 = agent.query("Hello! Can you see images?") - assert response1.content is not None - - # Image query - msg2 = AgentMessage() - msg2.add_text("Now look at this image.") - msg2.add_text("What do you see? Describe the scene.") - - # Use first frame from rgbd_frames test data - import numpy as np - from PIL import Image as PILImage - - from dimos.msgs.sensor_msgs import Image - from dimos.utils.data import get_data - - data_path = get_data("rgbd_frames") - image_path = os.path.join(data_path, "color", "00000.png") - - pil_image = PILImage.open(image_path) - image_array = np.array(pil_image) - - image = Image.from_numpy(image_array) - - msg2.add_image(image) - - # Check image encoding - logger.info(f"Image shape: {image.data.shape}") - logger.info(f"Image encoding: {len(image.agent_encode())} chars") - - response2 = agent.query(msg2) - logger.info(f"Image query response: {response2.content}") - logger.info(f"Agent supports vision: {agent._supports_vision}") - logger.info(f"Message has images: {msg2.has_images()}") - logger.info(f"Number of images in message: {len(msg2.images)}") - # Check that the model saw and described the image - assert any( - word in response2.content.lower() - for word in ["desk", "chair", "table", "laptop", "computer", "screen", "monitor"] - ), f"Expected description of office scene, got: {response2.content}" - - # Another text-only query - response3 = agent.query("What did I just show you?") - words = ["office", "room", "hallway", "image", "scene"] - content = response3.content.lower() - - assert any(word in content for word in words), f"{content=}" - - # Check history structure - assert agent.conversation.size() == 6 - history = agent.conversation.to_openai_format() - # First query should be simple string - assert isinstance(history[0]["content"], str) - # Second query should be content array - assert isinstance(history[2]["content"], list) - # Third query should be simple string again - assert isinstance(history[4]["content"], str) - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -def test_agent_empty_image_message() -> None: - """Test edge case with empty parts of AgentMessage.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant.", - temperature=0.0, - seed=42, - ) - - # AgentMessage with only images, no text - msg = AgentMessage() - # Don't add any text - - # Add a simple colored image - img = Image(data=np.full((60, 60, 3), [255, 255, 0], dtype=np.uint8)) # Yellow - msg.add_image(img) - - response = agent.query(msg) - # Should still work even without text - assert response.content is not None - assert len(response.content) > 0 - - # AgentMessage with empty text parts - msg2 = AgentMessage() - msg2.add_text("") # Empty - msg2.add_text("What") - msg2.add_text("") # Empty - msg2.add_text("color?") - msg2.add_image(img) - - response2 = agent.query(msg2) - # Accept various color interpretations for yellow (RGB 255,255,0) - response_lower = response2.content.lower() - assert any( - color in response_lower for color in ["yellow", "color", "bright", "turquoise", "green"] - ), f"Expected color reference in response: {response2.content}" - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -def test_agent_non_vision_model_with_images() -> None: - """Test that non-vision models handle image input gracefully.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent with non-vision model - agent = BaseAgent( - model="openai::gpt-3.5-turbo", # This model doesn't support vision - system_prompt="You are a helpful assistant.", - temperature=0.0, - seed=42, - ) - - # Try to send an image - msg = AgentMessage() - msg.add_text("What do you see in this image?") - - img = Image(data=np.zeros((100, 100, 3), dtype=np.uint8)) - msg.add_image(img) - - # Should log warning and process as text-only - response = agent.query(msg) - assert response.content is not None - - # Check history - should be text-only - history = agent.conversation.to_openai_format() - user_msg = history[0] - assert isinstance(user_msg["content"], str), "Non-vision model should store text-only" - assert user_msg["content"] == "What do you see in this image?" - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -def test_mock_agent_with_images() -> None: - """Test mock agent with images for CI.""" - # This test doesn't need API keys - - from dimos.agents.test_base_agent_text import MockAgent - - # Create mock agent - agent = MockAgent(model="mock::vision", system_prompt="Mock vision agent") - agent._supports_vision = True # Enable vision support - - # Test with image - msg = AgentMessage() - msg.add_text("What color is this?") - - img = Image(data=np.zeros((50, 50, 3), dtype=np.uint8)) - msg.add_image(img) - - response = agent.query(msg) - assert response.content is not None - assert "Mock response" in response.content or "color" in response.content - - # Check conversation history - assert agent.conversation.size() == 2 - - # Clean up - agent.dispose() diff --git a/dimos/agents/test_agent_message_streams.py b/dimos/agents/test_agent_message_streams.py deleted file mode 100644 index 22d33b46de..0000000000 --- a/dimos/agents/test_agent_message_streams.py +++ /dev/null @@ -1,387 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test BaseAgent with AgentMessage and video streams.""" - -import asyncio -import os -import pickle - -from dotenv import load_dotenv -import pytest -from reactivex import operators as ops - -from dimos import core -from dimos.agents.agent_message import AgentMessage -from dimos.agents.agent_types import AgentResponse -from dimos.agents.modules.base_agent import BaseAgentModule -from dimos.core import In, Module, Out, rpc -from dimos.msgs.sensor_msgs import Image -from dimos.protocol import pubsub -from dimos.utils.data import get_data -from dimos.utils.logging_config import setup_logger -from dimos.utils.testing import TimedSensorReplay - -logger = setup_logger("test_agent_message_streams") - - -class VideoMessageSender(Module): - """Module that sends AgentMessage with video frames every 2 seconds.""" - - message_out: Out[AgentMessage] = None - - def __init__(self, video_path: str) -> None: - super().__init__() - self.video_path = video_path - self._subscription = None - self._frame_count = 0 - - @rpc - def start(self) -> None: - """Start sending video messages.""" - # Use TimedSensorReplay to replay video frames - video_replay = TimedSensorReplay(self.video_path, autocast=Image.from_numpy) - - # Send AgentMessage with frame every 3 seconds (give agent more time to process) - self._subscription = ( - video_replay.stream() - .pipe( - ops.sample(3.0), # Every 3 seconds - ops.take(3), # Only send 3 frames total - ops.map(self._create_message), - ) - .subscribe( - on_next=lambda msg: self._send_message(msg), - on_error=lambda e: logger.error(f"Video stream error: {e}"), - on_completed=lambda: logger.info("Video stream completed"), - ) - ) - - logger.info("Video message streaming started (every 3 seconds, max 3 frames)") - - def _create_message(self, frame: Image) -> AgentMessage: - """Create AgentMessage with frame and query.""" - self._frame_count += 1 - - msg = AgentMessage() - msg.add_text(f"What do you see in frame {self._frame_count}? Describe in one sentence.") - msg.add_image(frame) - - logger.info(f"Created message with frame {self._frame_count}") - return msg - - def _send_message(self, msg: AgentMessage) -> None: - """Send the message and test pickling.""" - # Test that message can be pickled (for module communication) - try: - pickled = pickle.dumps(msg) - pickle.loads(pickled) - logger.info(f"Message pickling test passed - size: {len(pickled)} bytes") - except Exception as e: - logger.error(f"Message pickling failed: {e}") - - self.message_out.publish(msg) - - @rpc - def stop(self) -> None: - """Stop streaming.""" - if self._subscription: - self._subscription.dispose() - self._subscription = None - - -class MultiImageMessageSender(Module): - """Send AgentMessage with multiple images.""" - - message_out: Out[AgentMessage] = None - - def __init__(self, video_path: str) -> None: - super().__init__() - self.video_path = video_path - self.frames = [] - - @rpc - def start(self) -> None: - """Collect some frames.""" - video_replay = TimedSensorReplay(self.video_path, autocast=Image.from_numpy) - - # Collect first 3 frames - video_replay.stream().pipe(ops.take(3)).subscribe( - on_next=lambda frame: self.frames.append(frame), - on_completed=self._send_multi_image_query, - ) - - def _send_multi_image_query(self) -> None: - """Send query with multiple images.""" - if len(self.frames) >= 2: - msg = AgentMessage() - msg.add_text("Compare these images and describe what changed between them.") - - for _i, frame in enumerate(self.frames[:2]): - msg.add_image(frame) - - logger.info(f"Sending multi-image message with {len(msg.images)} images") - - # Test pickling - try: - pickled = pickle.dumps(msg) - logger.info(f"Multi-image message pickle size: {len(pickled)} bytes") - except Exception as e: - logger.error(f"Multi-image pickling failed: {e}") - - self.message_out.publish(msg) - - -class ResponseCollector(Module): - """Collect responses.""" - - response_in: In[AgentResponse] = None - - def __init__(self) -> None: - super().__init__() - self.responses = [] - - @rpc - def start(self) -> None: - self.response_in.subscribe(self._on_response) - - def _on_response(self, resp: AgentResponse) -> None: - logger.info(f"Collected response: {resp.content[:100] if resp.content else 'None'}...") - self.responses.append(resp) - - @rpc - def get_responses(self): - return self.responses - - -@pytest.mark.tofix -@pytest.mark.module -@pytest.mark.asyncio -async def test_agent_message_video_stream() -> None: - """Test BaseAgentModule with AgentMessage containing video frames.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - pubsub.lcm.autoconf() - - logger.info("Testing BaseAgentModule with AgentMessage video stream...") - dimos = core.start(4) - - try: - # Get test video - data_path = get_data("unitree_office_walk") - video_path = os.path.join(data_path, "video") - - logger.info(f"Using video from: {video_path}") - - # Deploy modules - video_sender = dimos.deploy(VideoMessageSender, video_path) - video_sender.message_out.transport = core.pLCMTransport("/agent/message") - - agent = dimos.deploy( - BaseAgentModule, - model="openai::gpt-4o-mini", - system_prompt="You are a vision assistant. Describe what you see concisely.", - temperature=0.0, - ) - agent.response_out.transport = core.pLCMTransport("/agent/response") - - collector = dimos.deploy(ResponseCollector) - - # Connect modules - agent.message_in.connect(video_sender.message_out) - collector.response_in.connect(agent.response_out) - - # Start modules - agent.start() - collector.start() - video_sender.start() - - logger.info("All modules started, streaming video messages...") - - # Wait for 3 messages to be sent (3 frames * 3 seconds = 9 seconds) - # Plus processing time, wait 12 seconds total - await asyncio.sleep(12) - - # Stop video stream - video_sender.stop() - - # Get all responses - responses = collector.get_responses() - logger.info(f"\nCollected {len(responses)} responses:") - for i, resp in enumerate(responses): - logger.info( - f"\nResponse {i + 1}: {resp.content if isinstance(resp, AgentResponse) else resp}" - ) - - # Verify we got at least 2 responses (sometimes the 3rd frame doesn't get processed in time) - assert len(responses) >= 2, f"Expected at least 2 responses, got {len(responses)}" - - # Verify responses describe actual scene - all_responses = " ".join( - resp.content if isinstance(resp, AgentResponse) else resp for resp in responses - ).lower() - assert any( - word in all_responses - for word in ["office", "room", "hallway", "corridor", "door", "wall", "floor", "frame"] - ), "Responses should describe the office environment" - - logger.info("\n✅ AgentMessage video stream test PASSED!") - - # Stop agent - agent.stop() - - finally: - dimos.close() - dimos.shutdown() - - -@pytest.mark.tofix -@pytest.mark.module -@pytest.mark.asyncio -async def test_agent_message_multi_image() -> None: - """Test BaseAgentModule with AgentMessage containing multiple images.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - pubsub.lcm.autoconf() - - logger.info("Testing BaseAgentModule with multi-image AgentMessage...") - dimos = core.start(4) - - try: - # Get test video - data_path = get_data("unitree_office_walk") - video_path = os.path.join(data_path, "video") - - # Deploy modules - multi_sender = dimos.deploy(MultiImageMessageSender, video_path) - multi_sender.message_out.transport = core.pLCMTransport("/agent/multi_message") - - agent = dimos.deploy( - BaseAgentModule, - model="openai::gpt-4o-mini", - system_prompt="You are a vision assistant that compares images.", - temperature=0.0, - ) - agent.response_out.transport = core.pLCMTransport("/agent/multi_response") - - collector = dimos.deploy(ResponseCollector) - - # Connect modules - agent.message_in.connect(multi_sender.message_out) - collector.response_in.connect(agent.response_out) - - # Start modules - agent.start() - collector.start() - multi_sender.start() - - logger.info("Modules started, sending multi-image query...") - - # Wait for response - await asyncio.sleep(8) - - # Get responses - responses = collector.get_responses() - logger.info(f"\nCollected {len(responses)} responses:") - for i, resp in enumerate(responses): - logger.info( - f"\nResponse {i + 1}: {resp.content if isinstance(resp, AgentResponse) else resp}" - ) - - # Verify we got a response - assert len(responses) >= 1, f"Expected at least 1 response, got {len(responses)}" - - # Response should mention comparison or multiple images - response_text = ( - responses[0].content if isinstance(responses[0], AgentResponse) else responses[0] - ).lower() - assert any( - word in response_text - for word in ["both", "first", "second", "change", "different", "similar", "compare"] - ), "Response should indicate comparison of multiple images" - - logger.info("\n✅ Multi-image AgentMessage test PASSED!") - - # Stop agent - agent.stop() - - finally: - dimos.close() - dimos.shutdown() - - -@pytest.mark.tofix -def test_agent_message_text_only() -> None: - """Test BaseAgent with text-only AgentMessage.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - from dimos.agents.modules.base import BaseAgent - - # Create agent - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant. Answer in 10 words or less.", - temperature=0.0, - seed=42, - ) - - # Test with text-only AgentMessage - msg = AgentMessage() - msg.add_text("What is") - msg.add_text("the capital") - msg.add_text("of France?") - - response = agent.query(msg) - assert "Paris" in response.content, "Expected 'Paris' in response" - - # Test pickling of AgentMessage - pickled = pickle.dumps(msg) - unpickled = pickle.loads(pickled) - assert unpickled.get_combined_text() == "What is the capital of France?" - - # Verify multiple text messages were combined properly - assert len(msg.messages) == 3 - assert msg.messages[0] == "What is" - assert msg.messages[1] == "the capital" - assert msg.messages[2] == "of France?" - - logger.info("✅ Text-only AgentMessage test PASSED!") - - # Clean up - agent.dispose() - - -if __name__ == "__main__": - logger.info("Running AgentMessage stream tests...") - - # Run text-only test first - test_agent_message_text_only() - print("\n" + "=" * 60 + "\n") - - # Run async tests - asyncio.run(test_agent_message_video_stream()) - print("\n" + "=" * 60 + "\n") - asyncio.run(test_agent_message_multi_image()) - - logger.info("\n✅ All AgentMessage tests completed!") diff --git a/dimos/agents/test_agent_pool.py b/dimos/agents/test_agent_pool.py deleted file mode 100644 index b3576b80e2..0000000000 --- a/dimos/agents/test_agent_pool.py +++ /dev/null @@ -1,353 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test agent pool module.""" - -import asyncio -import os - -from dotenv import load_dotenv -import pytest - -from dimos import core -from dimos.agents.modules.base_agent import BaseAgentModule -from dimos.core import In, Module, Out, rpc -from dimos.protocol import pubsub - - -class PoolRouter(Module): - """Simple router for agent pool.""" - - query_in: In[dict] = None - agent1_out: Out[str] = None - agent2_out: Out[str] = None - agent3_out: Out[str] = None - - @rpc - def start(self) -> None: - self.query_in.subscribe(self._route) - - def _route(self, msg: dict) -> None: - agent_id = msg.get("agent_id", "agent1") - query = msg.get("query", "") - - if agent_id == "agent1" and self.agent1_out: - self.agent1_out.publish(query) - elif agent_id == "agent2" and self.agent2_out: - self.agent2_out.publish(query) - elif agent_id == "agent3" and self.agent3_out: - self.agent3_out.publish(query) - elif agent_id == "all": - # Broadcast to all - if self.agent1_out: - self.agent1_out.publish(query) - if self.agent2_out: - self.agent2_out.publish(query) - if self.agent3_out: - self.agent3_out.publish(query) - - -class PoolAggregator(Module): - """Aggregate responses from pool.""" - - agent1_in: In[str] = None - agent2_in: In[str] = None - agent3_in: In[str] = None - response_out: Out[dict] = None - - @rpc - def start(self) -> None: - if self.agent1_in: - self.agent1_in.subscribe(lambda r: self._handle_response("agent1", r)) - if self.agent2_in: - self.agent2_in.subscribe(lambda r: self._handle_response("agent2", r)) - if self.agent3_in: - self.agent3_in.subscribe(lambda r: self._handle_response("agent3", r)) - - def _handle_response(self, agent_id: str, response: str) -> None: - if self.response_out: - self.response_out.publish({"agent_id": agent_id, "response": response}) - - -class PoolController(Module): - """Controller for pool testing.""" - - query_out: Out[dict] = None - - @rpc - def send_to_agent(self, agent_id: str, query: str) -> None: - self.query_out.publish({"agent_id": agent_id, "query": query}) - - @rpc - def broadcast(self, query: str) -> None: - self.query_out.publish({"agent_id": "all", "query": query}) - - -class PoolCollector(Module): - """Collect pool responses.""" - - response_in: In[dict] = None - - def __init__(self) -> None: - super().__init__() - self.responses = [] - - @rpc - def start(self) -> None: - self.response_in.subscribe(lambda r: self.responses.append(r)) - - @rpc - def get_responses(self) -> list: - return self.responses - - @rpc - def get_by_agent(self, agent_id: str) -> list: - return [r for r in self.responses if r.get("agent_id") == agent_id] - - -@pytest.mark.skip("Skipping pool tests for now") -@pytest.mark.module -@pytest.mark.asyncio -async def test_agent_pool() -> None: - """Test agent pool with multiple agents.""" - load_dotenv() - pubsub.lcm.autoconf() - - # Check for at least one API key - has_api_key = any( - [os.getenv("OPENAI_API_KEY"), os.getenv("ANTHROPIC_API_KEY"), os.getenv("CEREBRAS_API_KEY")] - ) - - if not has_api_key: - pytest.skip("No API keys found for testing") - - dimos = core.start(7) - - try: - # Deploy three agents with different configs - agents = [] - models = [] - - if os.getenv("CEREBRAS_API_KEY"): - agent1 = dimos.deploy( - BaseAgentModule, - model="cerebras::llama3.1-8b", - system_prompt="You are agent1. Be very brief.", - ) - agents.append(agent1) - models.append("agent1") - - if os.getenv("OPENAI_API_KEY"): - agent2 = dimos.deploy( - BaseAgentModule, - model="openai::gpt-4o-mini", - system_prompt="You are agent2. Be helpful.", - ) - agents.append(agent2) - models.append("agent2") - - if os.getenv("CEREBRAS_API_KEY") and len(agents) < 3: - agent3 = dimos.deploy( - BaseAgentModule, - model="cerebras::llama3.1-8b", - system_prompt="You are agent3. Be creative.", - ) - agents.append(agent3) - models.append("agent3") - - if len(agents) < 2: - pytest.skip("Need at least 2 working agents for pool test") - - # Deploy router, aggregator, controller, collector - router = dimos.deploy(PoolRouter) - aggregator = dimos.deploy(PoolAggregator) - controller = dimos.deploy(PoolController) - collector = dimos.deploy(PoolCollector) - - # Configure transports - controller.query_out.transport = core.pLCMTransport("/pool/queries") - aggregator.response_out.transport = core.pLCMTransport("/pool/responses") - - # Configure agent transports and connections - if len(agents) > 0: - router.agent1_out.transport = core.pLCMTransport("/pool/agent1/query") - agents[0].response_out.transport = core.pLCMTransport("/pool/agent1/response") - agents[0].query_in.connect(router.agent1_out) - aggregator.agent1_in.connect(agents[0].response_out) - - if len(agents) > 1: - router.agent2_out.transport = core.pLCMTransport("/pool/agent2/query") - agents[1].response_out.transport = core.pLCMTransport("/pool/agent2/response") - agents[1].query_in.connect(router.agent2_out) - aggregator.agent2_in.connect(agents[1].response_out) - - if len(agents) > 2: - router.agent3_out.transport = core.pLCMTransport("/pool/agent3/query") - agents[2].response_out.transport = core.pLCMTransport("/pool/agent3/response") - agents[2].query_in.connect(router.agent3_out) - aggregator.agent3_in.connect(agents[2].response_out) - - # Connect router and collector - router.query_in.connect(controller.query_out) - collector.response_in.connect(aggregator.response_out) - - # Start all modules - for agent in agents: - agent.start() - router.start() - aggregator.start() - collector.start() - - await asyncio.sleep(3) - - # Test direct routing - for _i, model_id in enumerate(models[:2]): # Test first 2 agents - controller.send_to_agent(model_id, f"Say hello from {model_id}") - await asyncio.sleep(0.5) - - await asyncio.sleep(6) - - responses = collector.get_responses() - print(f"Got {len(responses)} responses from direct routing") - assert len(responses) >= len(models[:2]), ( - f"Should get responses from at least {len(models[:2])} agents" - ) - - # Test broadcast - collector.responses.clear() - controller.broadcast("What is 1+1?") - - await asyncio.sleep(6) - - responses = collector.get_responses() - print(f"Got {len(responses)} responses from broadcast (expected {len(agents)})") - # Allow for some agents to be slow - assert len(responses) >= min(2, len(agents)), ( - f"Should get response from at least {min(2, len(agents))} agents" - ) - - # Check all agents responded - agent_ids = {r["agent_id"] for r in responses} - assert len(agent_ids) >= 2, "Multiple agents should respond" - - # Stop all agents - for agent in agents: - agent.stop() - - finally: - dimos.close() - dimos.shutdown() - - -@pytest.mark.skip("Skipping pool tests for now") -@pytest.mark.module -@pytest.mark.asyncio -async def test_mock_agent_pool() -> None: - """Test agent pool with mock agents.""" - pubsub.lcm.autoconf() - - class MockPoolAgent(Module): - """Mock agent for pool testing.""" - - query_in: In[str] = None - response_out: Out[str] = None - - def __init__(self, agent_id: str) -> None: - super().__init__() - self.agent_id = agent_id - - @rpc - def start(self) -> None: - self.query_in.subscribe(self._handle_query) - - def _handle_query(self, query: str) -> None: - if "1+1" in query: - self.response_out.publish(f"{self.agent_id}: The answer is 2") - else: - self.response_out.publish(f"{self.agent_id}: {query}") - - dimos = core.start(6) - - try: - # Deploy mock agents - agent1 = dimos.deploy(MockPoolAgent, agent_id="fast") - agent2 = dimos.deploy(MockPoolAgent, agent_id="smart") - agent3 = dimos.deploy(MockPoolAgent, agent_id="creative") - - # Deploy infrastructure - router = dimos.deploy(PoolRouter) - aggregator = dimos.deploy(PoolAggregator) - collector = dimos.deploy(PoolCollector) - - # Configure all transports - router.query_in.transport = core.pLCMTransport("/mock/pool/queries") - router.agent1_out.transport = core.pLCMTransport("/mock/pool/agent1/q") - router.agent2_out.transport = core.pLCMTransport("/mock/pool/agent2/q") - router.agent3_out.transport = core.pLCMTransport("/mock/pool/agent3/q") - - agent1.response_out.transport = core.pLCMTransport("/mock/pool/agent1/r") - agent2.response_out.transport = core.pLCMTransport("/mock/pool/agent2/r") - agent3.response_out.transport = core.pLCMTransport("/mock/pool/agent3/r") - - aggregator.response_out.transport = core.pLCMTransport("/mock/pool/responses") - - # Connect everything - agent1.query_in.connect(router.agent1_out) - agent2.query_in.connect(router.agent2_out) - agent3.query_in.connect(router.agent3_out) - - aggregator.agent1_in.connect(agent1.response_out) - aggregator.agent2_in.connect(agent2.response_out) - aggregator.agent3_in.connect(agent3.response_out) - - collector.response_in.connect(aggregator.response_out) - - # Start all - agent1.start() - agent2.start() - agent3.start() - router.start() - aggregator.start() - collector.start() - - await asyncio.sleep(0.5) - - # Test routing - router.query_in.transport.publish({"agent_id": "agent1", "query": "Hello"}) - router.query_in.transport.publish({"agent_id": "agent2", "query": "Hi"}) - - await asyncio.sleep(0.5) - - responses = collector.get_responses() - assert len(responses) == 2 - assert any("fast" in r["response"] for r in responses) - assert any("smart" in r["response"] for r in responses) - - # Test broadcast - collector.responses.clear() - router.query_in.transport.publish({"agent_id": "all", "query": "What is 1+1?"}) - - await asyncio.sleep(0.5) - - responses = collector.get_responses() - assert len(responses) == 3 - assert all("2" in r["response"] for r in responses) - - finally: - dimos.close() - dimos.shutdown() - - -if __name__ == "__main__": - asyncio.run(test_mock_agent_pool()) diff --git a/dimos/agents/test_agent_tools.py b/dimos/agents/test_agent_tools.py deleted file mode 100644 index fd485ac015..0000000000 --- a/dimos/agents/test_agent_tools.py +++ /dev/null @@ -1,409 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Production test for BaseAgent tool handling functionality.""" - -import asyncio -import os - -from dotenv import load_dotenv -from pydantic import Field -import pytest - -from dimos import core -from dimos.agents.agent_message import AgentMessage -from dimos.agents.agent_types import AgentResponse -from dimos.agents.modules.base import BaseAgent -from dimos.agents.modules.base_agent import BaseAgentModule -from dimos.core import In, Module, Out, rpc -from dimos.protocol import pubsub -from dimos.skills.skills import AbstractSkill, SkillLibrary -from dimos.utils.logging_config import setup_logger - -logger = setup_logger("test_agent_tools") - - -# Test Skills -class CalculateSkill(AbstractSkill): - """Perform a calculation.""" - - expression: str = Field(description="Mathematical expression to evaluate") - - def __call__(self) -> str: - try: - # Simple evaluation for testing - result = eval(self.expression) - return f"The result is {result}" - except Exception as e: - return f"Error calculating: {e!s}" - - -class WeatherSkill(AbstractSkill): - """Get current weather information for a location. This is a mock weather service that returns test data.""" - - location: str = Field(description="Location to get weather for (e.g. 'London', 'New York')") - - def __call__(self) -> str: - # Mock weather response - return f"The weather in {self.location} is sunny with a temperature of 72°F" - - -class NavigationSkill(AbstractSkill): - """Navigate to a location (potentially long-running).""" - - destination: str = Field(description="Destination to navigate to") - speed: float = Field(default=1.0, description="Navigation speed in m/s") - - def __call__(self) -> str: - # In real implementation, this would start navigation - # For now, simulate blocking behavior - import time - - time.sleep(0.5) # Simulate some processing - return f"Navigation to {self.destination} completed successfully" - - -# Module for testing tool execution -class ToolTestController(Module): - """Controller that sends queries to agent.""" - - message_out: Out[AgentMessage] = None - - @rpc - def send_query(self, query: str) -> None: - msg = AgentMessage() - msg.add_text(query) - self.message_out.publish(msg) - - -class ResponseCollector(Module): - """Collect agent responses.""" - - response_in: In[AgentResponse] = None - - def __init__(self) -> None: - super().__init__() - self.responses = [] - - @rpc - def start(self) -> None: - logger.info("ResponseCollector starting subscription") - self.response_in.subscribe(self._on_response) - logger.info("ResponseCollector subscription active") - - def _on_response(self, response) -> None: - logger.info(f"ResponseCollector received response #{len(self.responses) + 1}: {response}") - self.responses.append(response) - - @rpc - def get_responses(self): - return self.responses - - -@pytest.mark.tofix -@pytest.mark.module -@pytest.mark.asyncio -async def test_agent_module_with_tools() -> None: - """Test BaseAgentModule with tool execution.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - pubsub.lcm.autoconf() - dimos = core.start(4) - - try: - # Create skill library - skill_library = SkillLibrary() - skill_library.add(CalculateSkill) - skill_library.add(WeatherSkill) - skill_library.add(NavigationSkill) - - # Deploy modules - controller = dimos.deploy(ToolTestController) - controller.message_out.transport = core.pLCMTransport("/tools/messages") - - agent = dimos.deploy( - BaseAgentModule, - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant with access to calculation, weather, and navigation tools. When asked about weather, you MUST use the WeatherSkill tool - it provides mock weather data for testing. When asked to navigate somewhere, you MUST use the NavigationSkill tool. Always use the appropriate tool when available.", - skills=skill_library, - temperature=0.0, - memory=False, - ) - agent.response_out.transport = core.pLCMTransport("/tools/responses") - - collector = dimos.deploy(ResponseCollector) - - # Connect modules - agent.message_in.connect(controller.message_out) - collector.response_in.connect(agent.response_out) - - # Start modules - agent.start() - collector.start() - - # Wait for initialization - await asyncio.sleep(1) - - # Test 1: Calculation (fast tool) - logger.info("\n=== Test 1: Calculation Tool ===") - controller.send_query("Use the calculate tool to compute 42 * 17") - await asyncio.sleep(5) # Give more time for the response - - responses = collector.get_responses() - logger.info(f"Got {len(responses)} responses after first query") - assert len(responses) >= 1, ( - f"Should have received at least one response, got {len(responses)}" - ) - - response = responses[-1] - logger.info(f"Response: {response}") - - # Verify the calculation result - assert isinstance(response, AgentResponse), "Expected AgentResponse object" - assert "714" in response.content, f"Expected '714' in response, got: {response.content}" - - # Test 2: Weather query (fast tool) - logger.info("\n=== Test 2: Weather Tool ===") - controller.send_query("What's the weather in New York?") - await asyncio.sleep(5) # Give more time for the second response - - responses = collector.get_responses() - assert len(responses) >= 2, "Should have received at least two responses" - - response = responses[-1] - logger.info(f"Response: {response}") - - # Verify weather details - assert isinstance(response, AgentResponse), "Expected AgentResponse object" - assert "new york" in response.content.lower(), "Expected 'New York' in response" - assert "72" in response.content, "Expected temperature '72' in response" - assert "sunny" in response.content.lower(), "Expected 'sunny' in response" - - # Test 3: Navigation (potentially long-running) - logger.info("\n=== Test 3: Navigation Tool ===") - controller.send_query("Use the NavigationSkill to navigate to the kitchen") - await asyncio.sleep(6) # Give more time for navigation tool to complete - - responses = collector.get_responses() - logger.info(f"Total responses collected: {len(responses)}") - for i, r in enumerate(responses): - logger.info(f" Response {i + 1}: {r.content[:50]}...") - assert len(responses) >= 3, ( - f"Should have received at least three responses, got {len(responses)}" - ) - - response = responses[-1] - logger.info(f"Response: {response}") - - # Verify navigation response - assert isinstance(response, AgentResponse), "Expected AgentResponse object" - assert "kitchen" in response.content.lower(), "Expected 'kitchen' in response" - - # Check if NavigationSkill was called - if response.tool_calls is not None and len(response.tool_calls) > 0: - # Tool was called - verify it - assert any(tc.name == "NavigationSkill" for tc in response.tool_calls), ( - "Expected NavigationSkill to be called" - ) - logger.info("✓ NavigationSkill was called") - else: - # Tool wasn't called - just verify response mentions navigation - logger.info("Note: NavigationSkill was not called, agent gave instructions instead") - - # Stop agent - agent.stop() - - # Print summary - logger.info("\n=== Test Summary ===") - all_responses = collector.get_responses() - for i, resp in enumerate(all_responses): - logger.info( - f"Response {i + 1}: {resp.content if isinstance(resp, AgentResponse) else resp}" - ) - - finally: - dimos.close() - dimos.shutdown() - - -@pytest.mark.tofix -def test_base_agent_direct_tools() -> None: - """Test BaseAgent direct usage with tools.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create skill library - skill_library = SkillLibrary() - skill_library.add(CalculateSkill) - skill_library.add(WeatherSkill) - - # Create agent with skills - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant with access to a calculator tool. When asked to calculate something, you should use the CalculateSkill tool.", - skills=skill_library, - temperature=0.0, - memory=False, - seed=42, - ) - - # Test calculation with explicit tool request - logger.info("\n=== Direct Test 1: Calculation Tool ===") - response = agent.query("Calculate 144**0.5") - - logger.info(f"Response content: {response.content}") - logger.info(f"Tool calls: {response.tool_calls}") - - assert response.content is not None - assert "12" in response.content or "twelve" in response.content.lower(), ( - f"Expected '12' in response, got: {response.content}" - ) - - # Verify tool was called OR answer is correct - if response.tool_calls is not None: - assert len(response.tool_calls) > 0, "Expected at least one tool call" - assert response.tool_calls[0].name == "CalculateSkill", ( - f"Expected CalculateSkill, got: {response.tool_calls[0].name}" - ) - assert response.tool_calls[0].status == "completed", ( - f"Expected completed status, got: {response.tool_calls[0].status}" - ) - logger.info("✓ Tool was called successfully") - else: - logger.warning("Tool was not called - agent answered directly") - - # Test weather tool - logger.info("\n=== Direct Test 2: Weather Tool ===") - response2 = agent.query("Use the WeatherSkill to check the weather in London") - - logger.info(f"Response content: {response2.content}") - logger.info(f"Tool calls: {response2.tool_calls}") - - assert response2.content is not None - assert "london" in response2.content.lower(), "Expected 'London' in response" - assert "72" in response2.content, "Expected temperature '72' in response" - assert "sunny" in response2.content.lower(), "Expected 'sunny' in response" - - # Verify tool was called - if response2.tool_calls is not None: - assert len(response2.tool_calls) > 0, "Expected at least one tool call" - assert response2.tool_calls[0].name == "WeatherSkill", ( - f"Expected WeatherSkill, got: {response2.tool_calls[0].name}" - ) - logger.info("✓ Weather tool was called successfully") - else: - logger.warning("Weather tool was not called - agent answered directly") - - # Clean up - agent.dispose() - - -class MockToolAgent(BaseAgent): - """Mock agent for CI testing without API calls.""" - - def __init__(self, **kwargs) -> None: - # Skip gateway initialization - self.model = kwargs.get("model", "mock::test") - self.system_prompt = kwargs.get("system_prompt", "Mock agent") - self.skills = kwargs.get("skills", SkillLibrary()) - self.history = [] - self._history_lock = __import__("threading").Lock() - self._supports_vision = False - self.response_subject = None - self.gateway = None - self._executor = None - - async def _process_query_async(self, agent_msg, base64_image=None, base64_images=None): - """Mock tool execution.""" - from dimos.agents.agent_message import AgentMessage - from dimos.agents.agent_types import AgentResponse, ToolCall - - # Get text from AgentMessage - if isinstance(agent_msg, AgentMessage): - query = agent_msg.get_combined_text() - else: - query = str(agent_msg) - - # Simple pattern matching for tools - if "calculate" in query.lower(): - # Extract expression - import re - - match = re.search(r"(\d+\s*[\+\-\*/]\s*\d+)", query) - if match: - expr = match.group(1) - tool_call = ToolCall( - id="mock_calc_1", - name="CalculateSkill", - arguments={"expression": expr}, - status="completed", - ) - # Execute the tool - result = self.skills.call("CalculateSkill", expression=expr) - return AgentResponse( - content=f"I calculated {expr} and {result}", tool_calls=[tool_call] - ) - - # Default response - return AgentResponse(content=f"Mock response to: {query}") - - def dispose(self) -> None: - pass - - -@pytest.mark.tofix -def test_mock_agent_tools() -> None: - """Test mock agent with tools for CI.""" - # Create skill library - skill_library = SkillLibrary() - skill_library.add(CalculateSkill) - - # Create mock agent - agent = MockToolAgent(model="mock::test", skills=skill_library) - - # Test calculation - logger.info("\n=== Mock Test: Calculation ===") - response = agent.query("Calculate 25 + 17") - - logger.info(f"Mock response: {response.content}") - logger.info(f"Mock tool calls: {response.tool_calls}") - - assert response.content is not None - assert "42" in response.content, "Expected '42' in response" - assert response.tool_calls is not None, "Expected tool calls" - assert len(response.tool_calls) == 1, "Expected exactly one tool call" - assert response.tool_calls[0].name == "CalculateSkill", "Expected CalculateSkill" - assert response.tool_calls[0].status == "completed", "Expected completed status" - - # Clean up - agent.dispose() - - -if __name__ == "__main__": - # Run tests - test_mock_agent_tools() - print("✅ Mock agent tools test passed") - - test_base_agent_direct_tools() - print("✅ Direct agent tools test passed") - - asyncio.run(test_agent_module_with_tools()) - print("✅ Module agent tools test passed") - - print("\n✅ All production tool tests passed!") diff --git a/dimos/agents/test_agent_with_modules.py b/dimos/agents/test_agent_with_modules.py deleted file mode 100644 index 1a4ac70f65..0000000000 --- a/dimos/agents/test_agent_with_modules.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test agent module with proper module connections.""" - -import asyncio - -from dotenv import load_dotenv -import pytest - -from dimos import core -from dimos.agents.agent_message import AgentMessage -from dimos.agents.agent_types import AgentResponse -from dimos.agents.modules.base_agent import BaseAgentModule -from dimos.core import In, Module, Out, rpc -from dimos.protocol import pubsub - - -# Test query sender module -class QuerySender(Module): - """Module to send test queries.""" - - message_out: Out[AgentMessage] = None - - def __init__(self) -> None: - super().__init__() - - @rpc - def send_query(self, query: str) -> None: - """Send a query.""" - print(f"Sending query: {query}") - msg = AgentMessage() - msg.add_text(query) - self.message_out.publish(msg) - - -# Test response collector module -class ResponseCollector(Module): - """Module to collect responses.""" - - response_in: In[AgentResponse] = None - - def __init__(self) -> None: - super().__init__() - self.responses = [] - - @rpc - def start(self) -> None: - """Start collecting.""" - self.response_in.subscribe(self._on_response) - - def _on_response(self, msg: AgentResponse) -> None: - print(f"Received response: {msg.content if msg.content else msg}") - self.responses.append(msg) - - @rpc - def get_responses(self): - """Get collected responses.""" - return self.responses - - -@pytest.mark.tofix -@pytest.mark.module -@pytest.mark.asyncio -async def test_agent_module_connections() -> None: - """Test agent module with proper connections.""" - load_dotenv() - pubsub.lcm.autoconf() - - # Start Dask - dimos = core.start(4) - - try: - # Deploy modules - sender = dimos.deploy(QuerySender) - agent = dimos.deploy( - BaseAgentModule, - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant. Answer in 10 words or less.", - ) - collector = dimos.deploy(ResponseCollector) - - # Configure transports - sender.message_out.transport = core.pLCMTransport("/messages") - agent.response_out.transport = core.pLCMTransport("/responses") - - # Connect modules - agent.message_in.connect(sender.message_out) - collector.response_in.connect(agent.response_out) - - # Start modules - agent.start() - collector.start() - - # Wait for initialization - await asyncio.sleep(1) - - # Test 1: Simple query - print("\n=== Test 1: Simple Query ===") - sender.send_query("What is 2+2?") - - await asyncio.sleep(5) # Increased wait time for API response - - responses = collector.get_responses() - assert len(responses) > 0, "Should have received a response" - assert isinstance(responses[0], AgentResponse), "Expected AgentResponse object" - assert "4" in responses[0].content or "four" in responses[0].content.lower(), ( - "Should calculate correctly" - ) - - # Test 2: Another query - print("\n=== Test 2: Another Query ===") - sender.send_query("What color is the sky?") - - await asyncio.sleep(5) # Increased wait time - - responses = collector.get_responses() - assert len(responses) >= 2, "Should have at least two responses" - assert isinstance(responses[1], AgentResponse), "Expected AgentResponse object" - assert "blue" in responses[1].content.lower(), "Should mention blue" - - # Test 3: Multiple queries - print("\n=== Test 3: Multiple Queries ===") - queries = ["Count from 1 to 3", "Name a fruit", "What is Python?"] - - for q in queries: - sender.send_query(q) - await asyncio.sleep(2) # Give more time between queries - - await asyncio.sleep(8) # More time for multiple queries - - responses = collector.get_responses() - assert len(responses) >= 4, f"Should have at least 4 responses, got {len(responses)}" - - # Stop modules - agent.stop() - - print("\n=== All tests passed! ===") - - finally: - dimos.close() - dimos.shutdown() - - -if __name__ == "__main__": - asyncio.run(test_agent_module_connections()) diff --git a/dimos/agents/test_base_agent_text.py b/dimos/agents/test_base_agent_text.py deleted file mode 100644 index 022bea9cd2..0000000000 --- a/dimos/agents/test_base_agent_text.py +++ /dev/null @@ -1,562 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test BaseAgent text functionality.""" - -import asyncio -import os - -from dotenv import load_dotenv -import pytest - -from dimos import core -from dimos.agents.agent_message import AgentMessage -from dimos.agents.agent_types import AgentResponse -from dimos.agents.modules.base import BaseAgent -from dimos.agents.modules.base_agent import BaseAgentModule -from dimos.core import In, Module, Out, rpc -from dimos.protocol import pubsub - - -class QuerySender(Module): - """Module to send test queries.""" - - message_out: Out[AgentMessage] = None # New AgentMessage output - - @rpc - def send_query(self, query: str) -> None: - """Send a query as AgentMessage.""" - msg = AgentMessage() - msg.add_text(query) - self.message_out.publish(msg) - - @rpc - def send_message(self, message: AgentMessage) -> None: - """Send an AgentMessage.""" - self.message_out.publish(message) - - -class ResponseCollector(Module): - """Module to collect responses.""" - - response_in: In[AgentResponse] = None - - def __init__(self) -> None: - super().__init__() - self.responses = [] - - @rpc - def start(self) -> None: - """Start collecting.""" - self.response_in.subscribe(self._on_response) - - def _on_response(self, msg) -> None: - self.responses.append(msg) - - @rpc - def get_responses(self): - """Get collected responses.""" - return self.responses - - -@pytest.mark.tofix -def test_base_agent_direct_text() -> None: - """Test BaseAgent direct text usage.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant. Answer in 10 words or less.", - temperature=0.0, - seed=42, # Fixed seed for deterministic results - ) - - # Test simple query with string (backward compatibility) - response = agent.query("What is 2+2?") - print(f"\n[Test] Query: 'What is 2+2?' -> Response: '{response.content}'") - assert response.content is not None - assert "4" in response.content or "four" in response.content.lower(), ( - f"Expected '4' or 'four' in response, got: {response.content}" - ) - - # Test with AgentMessage - msg = AgentMessage() - msg.add_text("What is 3+3?") - response = agent.query(msg) - print(f"[Test] Query: 'What is 3+3?' -> Response: '{response.content}'") - assert response.content is not None - assert "6" in response.content or "six" in response.content.lower(), ( - "Expected '6' or 'six' in response" - ) - - # Test conversation history - response = agent.query("What was my previous question?") - print(f"[Test] Query: 'What was my previous question?' -> Response: '{response.content}'") - assert response.content is not None - # The agent should reference one of the previous questions - # It might say "2+2" or "3+3" depending on interpretation of "previous" - assert ( - "2+2" in response.content or "3+3" in response.content or "What is" in response.content - ), f"Expected reference to a previous question, got: {response.content}" - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -@pytest.mark.asyncio -async def test_base_agent_async_text() -> None: - """Test BaseAgent async text usage.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant.", - temperature=0.0, - seed=42, - ) - - # Test async query with string - response = await agent.aquery("What is the capital of France?") - assert response.content is not None - assert "Paris" in response.content, "Expected 'Paris' in response" - - # Test async query with AgentMessage - msg = AgentMessage() - msg.add_text("What is the capital of Germany?") - response = await agent.aquery(msg) - assert response.content is not None - assert "Berlin" in response.content, "Expected 'Berlin' in response" - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -@pytest.mark.module -@pytest.mark.asyncio -async def test_base_agent_module_text() -> None: - """Test BaseAgentModule with text via DimOS.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - pubsub.lcm.autoconf() - dimos = core.start(4) - - try: - # Deploy modules - sender = dimos.deploy(QuerySender) - agent = dimos.deploy( - BaseAgentModule, - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant. Answer concisely.", - ) - collector = dimos.deploy(ResponseCollector) - - # Configure transports - sender.message_out.transport = core.pLCMTransport("/test/messages") - agent.response_out.transport = core.pLCMTransport("/test/responses") - - # Connect modules - agent.message_in.connect(sender.message_out) - collector.response_in.connect(agent.response_out) - - # Start modules - agent.start() - collector.start() - - # Wait for initialization - await asyncio.sleep(1) - - # Test queries - sender.send_query("What is 2+2?") - await asyncio.sleep(3) - - responses = collector.get_responses() - assert len(responses) > 0, "Should have received a response" - resp = responses[0] - assert isinstance(resp, AgentResponse), "Expected AgentResponse object" - assert "4" in resp.content or "four" in resp.content.lower(), ( - f"Expected '4' or 'four' in response, got: {resp.content}" - ) - - # Test another query - sender.send_query("What color is the sky?") - await asyncio.sleep(3) - - responses = collector.get_responses() - assert len(responses) >= 2, "Should have at least two responses" - resp = responses[1] - assert isinstance(resp, AgentResponse), "Expected AgentResponse object" - assert "blue" in resp.content.lower(), "Expected 'blue' in response" - - # Test conversation history - sender.send_query("What was my first question?") - await asyncio.sleep(3) - - responses = collector.get_responses() - assert len(responses) >= 3, "Should have at least three responses" - resp = responses[2] - assert isinstance(resp, AgentResponse), "Expected AgentResponse object" - assert "2+2" in resp.content or "2" in resp.content, "Expected reference to first question" - - # Stop modules - agent.stop() - - finally: - dimos.close() - dimos.shutdown() - - -@pytest.mark.parametrize( - "model,provider", - [ - ("openai::gpt-4o-mini", "openai"), - ("anthropic::claude-3-haiku-20240307", "anthropic"), - ("cerebras::llama-3.3-70b", "cerebras"), - ], -) -@pytest.mark.tofix -def test_base_agent_providers(model, provider) -> None: - """Test BaseAgent with different providers.""" - load_dotenv() - - # Check for API key - api_key_map = { - "openai": "OPENAI_API_KEY", - "anthropic": "ANTHROPIC_API_KEY", - "cerebras": "CEREBRAS_API_KEY", - } - - if not os.getenv(api_key_map[provider]): - pytest.skip(f"No {api_key_map[provider]} found") - - # Create agent - agent = BaseAgent( - model=model, - system_prompt="You are a helpful assistant. Answer in 10 words or less.", - temperature=0.0, - seed=42, - ) - - # Test query with AgentMessage - msg = AgentMessage() - msg.add_text("What is the capital of France?") - response = agent.query(msg) - assert response.content is not None - assert "Paris" in response.content, f"Expected 'Paris' in response from {provider}" - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -def test_base_agent_memory() -> None: - """Test BaseAgent with memory/RAG.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant. Use the provided context when answering.", - temperature=0.0, - rag_threshold=0.3, - seed=42, - ) - - # Add context to memory - agent.memory.add_vector("doc1", "The DimOS framework is designed for building robotic systems.") - agent.memory.add_vector( - "doc2", "Robots using DimOS can perform navigation and manipulation tasks." - ) - - # Test RAG retrieval with AgentMessage - msg = AgentMessage() - msg.add_text("What is DimOS?") - response = agent.query(msg) - assert response.content is not None - assert "framework" in response.content.lower() or "robotic" in response.content.lower(), ( - "Expected context about DimOS in response" - ) - - # Clean up - agent.dispose() - - -class MockAgent(BaseAgent): - """Mock agent for testing without API calls.""" - - def __init__(self, **kwargs) -> None: - # Don't call super().__init__ to avoid gateway initialization - from dimos.agents.agent_types import ConversationHistory - - self.model = kwargs.get("model", "mock::test") - self.system_prompt = kwargs.get("system_prompt", "Mock agent") - self.conversation = ConversationHistory(max_size=20) - self._supports_vision = False - self.response_subject = None # Simplified - - async def _process_query_async(self, query: str, base64_image=None) -> str: - """Mock response.""" - if "2+2" in query: - return "The answer is 4" - elif "capital" in query and "France" in query: - return "The capital of France is Paris" - elif "color" in query and "sky" in query: - return "The sky is blue" - elif "previous" in query: - history = self.conversation.to_openai_format() - if len(history) >= 2: - # Get the second to last item (the last user query before this one) - for i in range(len(history) - 2, -1, -1): - if history[i]["role"] == "user": - return f"Your previous question was: {history[i]['content']}" - return "No previous questions" - else: - return f"Mock response to: {query}" - - def query(self, message) -> AgentResponse: - """Mock synchronous query.""" - # Convert to text if AgentMessage - if isinstance(message, AgentMessage): - text = message.get_combined_text() - else: - text = message - - # Update conversation history - self.conversation.add_user_message(text) - response = asyncio.run(self._process_query_async(text)) - self.conversation.add_assistant_message(response) - return AgentResponse(content=response) - - async def aquery(self, message) -> AgentResponse: - """Mock async query.""" - # Convert to text if AgentMessage - if isinstance(message, AgentMessage): - text = message.get_combined_text() - else: - text = message - - self.conversation.add_user_message(text) - response = await self._process_query_async(text) - self.conversation.add_assistant_message(response) - return AgentResponse(content=response) - - def dispose(self) -> None: - """Mock dispose.""" - pass - - -@pytest.mark.tofix -def test_mock_agent() -> None: - """Test mock agent for CI without API keys.""" - # Create mock agent - agent = MockAgent(model="mock::test", system_prompt="Mock assistant") - - # Test simple query - response = agent.query("What is 2+2?") - assert isinstance(response, AgentResponse), "Expected AgentResponse object" - assert "4" in response.content - - # Test conversation history - response = agent.query("What was my previous question?") - assert isinstance(response, AgentResponse), "Expected AgentResponse object" - assert "2+2" in response.content - - # Test other queries - response = agent.query("What is the capital of France?") - assert isinstance(response, AgentResponse), "Expected AgentResponse object" - assert "Paris" in response.content - - response = agent.query("What color is the sky?") - assert isinstance(response, AgentResponse), "Expected AgentResponse object" - assert "blue" in response.content.lower() - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -def test_base_agent_conversation_history() -> None: - """Test that conversation history is properly maintained.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant.", - temperature=0.0, - seed=42, - ) - - # Test 1: Simple conversation - response1 = agent.query("My name is Alice") - assert isinstance(response1, AgentResponse) - - # Check conversation history has both messages - assert agent.conversation.size() == 2 - history = agent.conversation.to_openai_format() - assert history[0]["role"] == "user" - assert history[0]["content"] == "My name is Alice" - assert history[1]["role"] == "assistant" - - # Test 2: Reference previous context - response2 = agent.query("What is my name?") - assert "Alice" in response2.content, "Agent should remember the name" - - # Conversation history should now have 4 messages - assert agent.conversation.size() == 4 - - # Test 3: Multiple text parts in AgentMessage - msg = AgentMessage() - msg.add_text("Calculate") - msg.add_text("the sum of") - msg.add_text("5 + 3") - - response3 = agent.query(msg) - assert "8" in response3.content or "eight" in response3.content.lower() - - # Check the combined text was stored correctly - assert agent.conversation.size() == 6 - history = agent.conversation.to_openai_format() - assert history[4]["role"] == "user" - assert history[4]["content"] == "Calculate the sum of 5 + 3" - - # Test 4: History trimming (set low limit) - agent.max_history = 4 - agent.query("What was my first message?") - - # Conversation history should be trimmed to 4 messages - assert agent.conversation.size() == 4 - # First messages should be gone - history = agent.conversation.to_openai_format() - assert "Alice" not in history[0]["content"] - - # Clean up - agent.dispose() - - -@pytest.mark.tofix -def test_base_agent_history_with_tools() -> None: - """Test conversation history with tool calls.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - from pydantic import Field - - from dimos.skills.skills import AbstractSkill, SkillLibrary - - class CalculatorSkill(AbstractSkill): - """Perform calculations.""" - - expression: str = Field(description="Mathematical expression") - - def __call__(self) -> str: - try: - result = eval(self.expression) - return f"The result is {result}" - except: - return "Error in calculation" - - # Create agent with calculator skill - skills = SkillLibrary() - skills.add(CalculatorSkill) - - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant with a calculator. Use the calculator tool when asked to compute something.", - skills=skills, - temperature=0.0, - seed=42, - ) - - # Make a query that should trigger tool use - response = agent.query("Please calculate 42 * 17 using the calculator tool") - - # Check response - assert isinstance(response, AgentResponse) - assert "714" in response.content, f"Expected 714 in response, got: {response.content}" - - # Check tool calls were made - if response.tool_calls: - assert len(response.tool_calls) > 0 - assert response.tool_calls[0].name == "CalculatorSkill" - assert response.tool_calls[0].status == "completed" - - # Check history structure - # If tools were called, we should have more messages - if response.tool_calls and len(response.tool_calls) > 0: - assert agent.conversation.size() >= 3, ( - f"Expected at least 3 messages in history when tools are used, got {agent.conversation.size()}" - ) - - # Find the assistant message with tool calls - history = agent.conversation.to_openai_format() - tool_msg_found = False - tool_result_found = False - - for msg in history: - if msg.get("role") == "assistant" and msg.get("tool_calls"): - tool_msg_found = True - if msg.get("role") == "tool": - tool_result_found = True - assert "result" in msg.get("content", "").lower() - - assert tool_msg_found, "Tool call message should be in history when tools were used" - assert tool_result_found, "Tool result should be in history when tools were used" - else: - # No tools used, just verify we have user and assistant messages - assert agent.conversation.size() >= 2, ( - f"Expected at least 2 messages in history, got {agent.conversation.size()}" - ) - # The model solved it without using the tool - that's also acceptable - print("Note: Model solved without using the calculator tool") - - # Clean up - agent.dispose() - - -if __name__ == "__main__": - test_base_agent_direct_text() - asyncio.run(test_base_agent_async_text()) - asyncio.run(test_base_agent_module_text()) - test_base_agent_memory() - test_mock_agent() - test_base_agent_conversation_history() - test_base_agent_history_with_tools() - print("\n✅ All text tests passed!") - test_base_agent_direct_text() - asyncio.run(test_base_agent_async_text()) - asyncio.run(test_base_agent_module_text()) - test_base_agent_memory() - test_mock_agent() - print("\n✅ All text tests passed!") diff --git a/dimos/agents/test_conversation_history.py b/dimos/agents/test_conversation_history.py deleted file mode 100644 index 95b28fbc0b..0000000000 --- a/dimos/agents/test_conversation_history.py +++ /dev/null @@ -1,416 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Comprehensive conversation history tests for agents.""" - -import asyncio -import logging -import os - -from dotenv import load_dotenv -import numpy as np -from pydantic import Field -import pytest - -from dimos.agents.agent_message import AgentMessage -from dimos.agents.agent_types import AgentResponse -from dimos.agents.modules.base import BaseAgent -from dimos.msgs.sensor_msgs import Image -from dimos.skills.skills import AbstractSkill, SkillLibrary - -logger = logging.getLogger(__name__) - - -@pytest.mark.tofix -def test_conversation_history_basic() -> None: - """Test basic conversation history functionality.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant with perfect memory.", - temperature=0.0, - seed=42, - ) - - try: - # Test 1: Simple text conversation - response1 = agent.query("My favorite color is blue") - assert isinstance(response1, AgentResponse) - assert agent.conversation.size() == 2 # user + assistant - - # Test 2: Reference previous information - response2 = agent.query("What is my favorite color?") - assert "blue" in response2.content.lower(), "Agent should remember the color" - assert agent.conversation.size() == 4 - - # Test 3: Multiple facts - agent.query("I live in San Francisco") - agent.query("I work as an engineer") - - # Verify history is building up - assert agent.conversation.size() == 8 # 4 exchanges (blue, what color, SF, engineer) - - response = agent.query("Tell me what you know about me") - - # Check if agent remembers at least some facts - # Note: Models may sometimes give generic responses, so we check for any memory - facts_mentioned = 0 - if "blue" in response.content.lower() or "color" in response.content.lower(): - facts_mentioned += 1 - if "san francisco" in response.content.lower() or "francisco" in response.content.lower(): - facts_mentioned += 1 - if "engineer" in response.content.lower(): - facts_mentioned += 1 - - # Agent should remember at least one fact, or acknowledge the conversation - assert facts_mentioned > 0 or "know" in response.content.lower(), ( - f"Agent should show some memory of conversation, got: {response.content}" - ) - - # Verify history properly accumulates - assert agent.conversation.size() == 10 - - finally: - agent.dispose() - - -@pytest.mark.tofix -def test_conversation_history_with_images() -> None: - """Test conversation history with multimodal content.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful vision assistant.", - temperature=0.0, - seed=42, - ) - - try: - # Send text message - agent.query("I'm going to show you some colors") - assert agent.conversation.size() == 2 - - # Send image with text - msg = AgentMessage() - msg.add_text("This is a red square") - red_img = Image(data=np.full((100, 100, 3), [255, 0, 0], dtype=np.uint8)) - msg.add_image(red_img) - - agent.query(msg) - assert agent.conversation.size() == 4 - - # Ask about the image - response3 = agent.query("What color did I just show you?") - # Check for any color mention (models sometimes see colors differently) - assert any( - color in response3.content.lower() - for color in ["red", "blue", "green", "color", "square"] - ), f"Should mention a color, got: {response3.content}" - - # Send another image - msg2 = AgentMessage() - msg2.add_text("Now here's a blue square") - blue_img = Image(data=np.full((100, 100, 3), [0, 0, 255], dtype=np.uint8)) - msg2.add_image(blue_img) - - agent.query(msg2) - assert agent.conversation.size() == 8 - - # Ask about all images - response5 = agent.query("What colors have I shown you?") - # Should mention seeing images/colors even if specific colors are wrong - assert any( - word in response5.content.lower() - for word in ["red", "blue", "colors", "squares", "images", "shown", "two"] - ), f"Should acknowledge seeing images, got: {response5.content}" - - # Verify both message types are in history - assert agent.conversation.size() == 10 - - finally: - agent.dispose() - - -@pytest.mark.tofix -def test_conversation_history_trimming() -> None: - """Test that conversation history is trimmed to max size.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create agent with small history limit - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant.", - temperature=0.0, - max_history=3, # Keep 3 message pairs (6 messages total) - seed=42, - ) - - try: - # Add several messages - agent.query("Message 1: I like apples") - assert agent.conversation.size() == 2 - - agent.query("Message 2: I like oranges") - # Now we have 2 pairs (4 messages) - # max_history=3 means we keep max 3 messages total (not pairs!) - size = agent.conversation.size() - # After trimming to 3, we'd have kept the most recent 3 messages - assert size == 3, f"After Message 2, size should be 3, got {size}" - - agent.query("Message 3: I like bananas") - size = agent.conversation.size() - assert size == 3, f"After Message 3, size should be 3, got {size}" - - # This should maintain trimming - agent.query("Message 4: I like grapes") - size = agent.conversation.size() - assert size == 3, f"After Message 4, size should still be 3, got {size}" - - # Add one more - agent.query("Message 5: I like strawberries") - size = agent.conversation.size() - assert size == 3, f"After Message 5, size should still be 3, got {size}" - - # Early messages should be trimmed - agent.query("What was the first fruit I mentioned?") - size = agent.conversation.size() - assert size == 3, f"After question, size should still be 3, got {size}" - - # Change max_history dynamically - agent.max_history = 2 - agent.query("New message after resize") - # Now history should be trimmed to 2 messages - size = agent.conversation.size() - assert size == 2, f"After resize to max_history=2, size should be 2, got {size}" - - finally: - agent.dispose() - - -@pytest.mark.tofix -def test_conversation_history_with_tools() -> None: - """Test conversation history with tool calls.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - # Create a simple skill - class CalculatorSkillLocal(AbstractSkill): - """A simple calculator skill.""" - - expression: str = Field(description="Mathematical expression to evaluate") - - def __call__(self) -> str: - try: - result = eval(self.expression) - return f"The result is {result}" - except Exception as e: - return f"Error: {e}" - - # Create skill library properly - class TestSkillLibrary(SkillLibrary): - CalculatorSkill = CalculatorSkillLocal - - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant with access to a calculator.", - skills=TestSkillLibrary(), - temperature=0.0, - seed=100, - ) - - try: - # Initial query - agent.query("Hello, I need help with math") - assert agent.conversation.size() == 2 - - # Force tool use explicitly - response2 = agent.query( - "I need you to use the CalculatorSkill tool to compute 123 * 456. " - "Do NOT calculate it yourself - you MUST use the calculator tool function." - ) - - assert agent.conversation.size() == 6 # 2 + 1 + 3 - assert response2.tool_calls is not None and len(response2.tool_calls) > 0 - assert "56088" in response2.content.replace(",", "") - - # Ask about previous calculation - response3 = agent.query("What was the result of the calculation?") - assert "56088" in response3.content.replace(",", "") or "123" in response3.content.replace( - ",", "" - ) - assert agent.conversation.size() == 8 - - finally: - agent.dispose() - - -@pytest.mark.tofix -def test_conversation_thread_safety() -> None: - """Test that conversation history is thread-safe.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - agent = BaseAgent(model="openai::gpt-4o-mini", temperature=0.0, seed=42) - - try: - - async def query_async(text: str): - """Async wrapper for query.""" - return await agent.aquery(text) - - async def run_concurrent(): - """Run multiple queries concurrently.""" - tasks = [query_async(f"Query {i}") for i in range(3)] - return await asyncio.gather(*tasks) - - # Run concurrent queries - results = asyncio.run(run_concurrent()) - assert len(results) == 3 - - # Should have roughly 6 messages (3 queries * 2) - # Exact count may vary due to thread timing - assert agent.conversation.size() >= 4 - assert agent.conversation.size() <= 6 - - finally: - agent.dispose() - - -@pytest.mark.tofix -def test_conversation_history_formats() -> None: - """Test ConversationHistory formatting methods.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - agent = BaseAgent(model="openai::gpt-4o-mini", temperature=0.0, seed=42) - - try: - # Create a conversation - agent.conversation.add_user_message("Hello") - agent.conversation.add_assistant_message("Hi there!") - - # Test text with images - agent.conversation.add_user_message( - [ - {"type": "text", "text": "Look at this"}, - {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,abc123"}}, - ] - ) - agent.conversation.add_assistant_message("I see the image") - - # Test tool messages - agent.conversation.add_assistant_message( - content="", - tool_calls=[ - { - "id": "call_123", - "type": "function", - "function": {"name": "test", "arguments": "{}"}, - } - ], - ) - agent.conversation.add_tool_result( - tool_call_id="call_123", content="Tool result", name="test" - ) - - # Get OpenAI format - messages = agent.conversation.to_openai_format() - assert len(messages) == 6 - - # Verify message formats - assert messages[0]["role"] == "user" - assert messages[0]["content"] == "Hello" - - assert messages[2]["role"] == "user" - assert isinstance(messages[2]["content"], list) - - # Tool response message should be at index 5 (after assistant with tool_calls at index 4) - assert messages[5]["role"] == "tool" - assert messages[5]["tool_call_id"] == "call_123" - assert messages[5]["name"] == "test" - - finally: - agent.dispose() - - -@pytest.mark.tofix -@pytest.mark.timeout(30) # Add timeout to prevent hanging -def test_conversation_edge_cases() -> None: - """Test edge cases in conversation history.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OPENAI_API_KEY found") - - agent = BaseAgent( - model="openai::gpt-4o-mini", - system_prompt="You are a helpful assistant.", - temperature=0.0, - seed=42, - ) - - try: - # Empty message - msg1 = AgentMessage() - msg1.add_text("") - response1 = agent.query(msg1) - assert response1.content is not None - - # Moderately long message (reduced from 1000 to 100 words) - long_text = "word " * 100 - response2 = agent.query(long_text) - assert response2.content is not None - - # Multiple text parts that combine - msg3 = AgentMessage() - for i in range(5): # Reduced from 10 to 5 - msg3.add_text(f"Part {i} ") - response3 = agent.query(msg3) - assert response3.content is not None - - # Verify history is maintained correctly - assert agent.conversation.size() == 6 # 3 exchanges - - finally: - agent.dispose() - - -if __name__ == "__main__": - # Run tests - test_conversation_history_basic() - test_conversation_history_with_images() - test_conversation_history_trimming() - test_conversation_history_with_tools() - test_conversation_thread_safety() - test_conversation_history_formats() - test_conversation_edge_cases() - print("\n✅ All conversation history tests passed!") diff --git a/dimos/agents/test_gateway.py b/dimos/agents/test_gateway.py deleted file mode 100644 index 2c54d5d1ac..0000000000 --- a/dimos/agents/test_gateway.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test gateway functionality.""" - -import asyncio -import os - -from dotenv import load_dotenv -import pytest - -from dimos.agents.modules.gateway import UnifiedGatewayClient - - -@pytest.mark.tofix -@pytest.mark.asyncio -async def test_gateway_basic() -> None: - """Test basic gateway functionality.""" - load_dotenv() - - # Check for at least one API key - has_api_key = any( - [os.getenv("OPENAI_API_KEY"), os.getenv("ANTHROPIC_API_KEY"), os.getenv("CEREBRAS_API_KEY")] - ) - - if not has_api_key: - pytest.skip("No API keys found for gateway test") - - gateway = UnifiedGatewayClient() - - try: - # Test with available provider - if os.getenv("OPENAI_API_KEY"): - model = "openai::gpt-4o-mini" - elif os.getenv("ANTHROPIC_API_KEY"): - model = "anthropic::claude-3-haiku-20240307" - else: - model = "cerebras::llama3.1-8b" - - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Say 'Hello Gateway' and nothing else."}, - ] - - # Test non-streaming - response = await gateway.ainference( - model=model, messages=messages, temperature=0.0, max_tokens=10 - ) - - assert "choices" in response - assert len(response["choices"]) > 0 - assert "message" in response["choices"][0] - assert "content" in response["choices"][0]["message"] - - content = response["choices"][0]["message"]["content"] - assert "hello" in content.lower() or "gateway" in content.lower() - - finally: - gateway.close() - - -@pytest.mark.tofix -@pytest.mark.asyncio -async def test_gateway_streaming() -> None: - """Test gateway streaming functionality.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("OpenAI API key required for streaming test") - - gateway = UnifiedGatewayClient() - - try: - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Count from 1 to 3"}, - ] - - # Test streaming - chunks = [] - async for chunk in await gateway.ainference( - model="openai::gpt-4o-mini", messages=messages, temperature=0.0, stream=True - ): - chunks.append(chunk) - - assert len(chunks) > 0, "Should receive stream chunks" - - # Reconstruct content - content = "" - for chunk in chunks: - if chunk.get("choices"): - delta = chunk["choices"][0].get("delta", {}) - chunk_content = delta.get("content") - if chunk_content is not None: - content += chunk_content - - assert any(str(i) in content for i in [1, 2, 3]), "Should count numbers" - - finally: - gateway.close() - - -@pytest.mark.tofix -@pytest.mark.asyncio -async def test_gateway_tools() -> None: - """Test gateway can pass tool definitions to LLM and get responses.""" - load_dotenv() - - if not os.getenv("OPENAI_API_KEY"): - pytest.skip("OpenAI API key required for tools test") - - gateway = UnifiedGatewayClient() - - try: - # Just test that gateway accepts tools parameter and returns valid response - tools = [ - { - "type": "function", - "function": { - "name": "test_function", - "description": "A test function", - "parameters": { - "type": "object", - "properties": {"param": {"type": "string"}}, - }, - }, - } - ] - - messages = [ - {"role": "user", "content": "Hello, just testing the gateway"}, - ] - - # Just verify gateway doesn't crash when tools are provided - response = await gateway.ainference( - model="openai::gpt-4o-mini", messages=messages, tools=tools, temperature=0.0 - ) - - # Basic validation - gateway returned something - assert "choices" in response - assert len(response["choices"]) > 0 - assert "message" in response["choices"][0] - - finally: - gateway.close() - - -@pytest.mark.tofix -@pytest.mark.asyncio -async def test_gateway_providers() -> None: - """Test gateway with different providers.""" - load_dotenv() - - gateway = UnifiedGatewayClient() - - providers_tested = 0 - - try: - # Test each available provider - test_cases = [ - ("openai::gpt-4o-mini", "OPENAI_API_KEY"), - ("anthropic::claude-3-haiku-20240307", "ANTHROPIC_API_KEY"), - # ("cerebras::llama3.1-8b", "CEREBRAS_API_KEY"), - ("qwen::qwen-turbo", "DASHSCOPE_API_KEY"), - ] - - for model, env_var in test_cases: - if not os.getenv(env_var): - continue - - providers_tested += 1 - - messages = [{"role": "user", "content": "Reply with just the word 'OK'"}] - - response = await gateway.ainference( - model=model, messages=messages, temperature=0.0, max_tokens=10 - ) - - assert "choices" in response - content = response["choices"][0]["message"]["content"] - assert len(content) > 0, f"{model} should return content" - - if providers_tested == 0: - pytest.skip("No API keys found for provider test") - - finally: - gateway.close() - - -if __name__ == "__main__": - load_dotenv() - asyncio.run(test_gateway_basic()) diff --git a/dimos/agents/test_simple_agent_module.py b/dimos/agents/test_simple_agent_module.py deleted file mode 100644 index bd374877dd..0000000000 --- a/dimos/agents/test_simple_agent_module.py +++ /dev/null @@ -1,222 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test simple agent module with string input/output.""" - -import asyncio -import os - -from dotenv import load_dotenv -import pytest - -from dimos import core -from dimos.agents.agent_message import AgentMessage -from dimos.agents.agent_types import AgentResponse -from dimos.agents.modules.base_agent import BaseAgentModule -from dimos.core import In, Module, Out, rpc -from dimos.protocol import pubsub - - -class QuerySender(Module): - """Module to send test queries.""" - - message_out: Out[AgentMessage] = None - - @rpc - def send_query(self, query: str) -> None: - """Send a query.""" - msg = AgentMessage() - msg.add_text(query) - self.message_out.publish(msg) - - -class ResponseCollector(Module): - """Module to collect responses.""" - - response_in: In[AgentResponse] = None - - def __init__(self) -> None: - super().__init__() - self.responses = [] - - @rpc - def start(self) -> None: - """Start collecting.""" - self.response_in.subscribe(self._on_response) - - def _on_response(self, response: AgentResponse) -> None: - """Handle response.""" - self.responses.append(response) - - @rpc - def get_responses(self) -> list: - """Get collected responses.""" - return self.responses - - @rpc - def clear(self) -> None: - """Clear responses.""" - self.responses = [] - - -@pytest.mark.tofix -@pytest.mark.module -@pytest.mark.asyncio -@pytest.mark.parametrize( - "model,provider", - [ - ("openai::gpt-4o-mini", "OpenAI"), - ("anthropic::claude-3-haiku-20240307", "Claude"), - ("cerebras::llama3.1-8b", "Cerebras"), - ("qwen::qwen-turbo", "Qwen"), - ], -) -async def test_simple_agent_module(model, provider) -> None: - """Test simple agent module with different providers.""" - load_dotenv() - - # Skip if no API key - if provider == "OpenAI" and not os.getenv("OPENAI_API_KEY"): - pytest.skip("No OpenAI API key found") - elif provider == "Claude" and not os.getenv("ANTHROPIC_API_KEY"): - pytest.skip("No Anthropic API key found") - elif provider == "Cerebras" and not os.getenv("CEREBRAS_API_KEY"): - pytest.skip("No Cerebras API key found") - elif provider == "Qwen" and not os.getenv("ALIBABA_API_KEY"): - pytest.skip("No Qwen API key found") - - pubsub.lcm.autoconf() - - # Start Dask cluster - dimos = core.start(3) - - try: - # Deploy modules - sender = dimos.deploy(QuerySender) - agent = dimos.deploy( - BaseAgentModule, - model=model, - system_prompt=f"You are a helpful {provider} assistant. Keep responses brief.", - ) - collector = dimos.deploy(ResponseCollector) - - # Configure transports - sender.message_out.transport = core.pLCMTransport(f"/test/{provider}/messages") - agent.response_out.transport = core.pLCMTransport(f"/test/{provider}/responses") - - # Connect modules - agent.message_in.connect(sender.message_out) - collector.response_in.connect(agent.response_out) - - # Start modules - agent.start() - collector.start() - - await asyncio.sleep(1) - - # Test simple math - sender.send_query("What is 2+2?") - await asyncio.sleep(5) - - responses = collector.get_responses() - assert len(responses) > 0, f"{provider} should respond" - assert isinstance(responses[0], AgentResponse), "Expected AgentResponse object" - assert "4" in responses[0].content, f"{provider} should calculate correctly" - - # Test brief response - collector.clear() - sender.send_query("Name one color.") - await asyncio.sleep(5) - - responses = collector.get_responses() - assert len(responses) > 0, f"{provider} should respond" - assert isinstance(responses[0], AgentResponse), "Expected AgentResponse object" - assert len(responses[0].content) < 200, f"{provider} should give brief response" - - # Stop modules - agent.stop() - - finally: - dimos.close() - dimos.shutdown() - - -@pytest.mark.tofix -@pytest.mark.module -@pytest.mark.asyncio -async def test_mock_agent_module() -> None: - """Test agent module with mock responses (no API needed).""" - pubsub.lcm.autoconf() - - class MockAgentModule(Module): - """Mock agent for testing.""" - - message_in: In[AgentMessage] = None - response_out: Out[AgentResponse] = None - - @rpc - def start(self) -> None: - self.message_in.subscribe(self._handle_message) - - def _handle_message(self, msg: AgentMessage) -> None: - query = msg.get_combined_text() - if "2+2" in query: - self.response_out.publish(AgentResponse(content="4")) - elif "color" in query.lower(): - self.response_out.publish(AgentResponse(content="Blue")) - else: - self.response_out.publish(AgentResponse(content=f"Mock response to: {query}")) - - dimos = core.start(2) - - try: - # Deploy - agent = dimos.deploy(MockAgentModule) - collector = dimos.deploy(ResponseCollector) - - # Configure - agent.message_in.transport = core.pLCMTransport("/mock/messages") - agent.response_out.transport = core.pLCMTransport("/mock/response") - - # Connect - collector.response_in.connect(agent.response_out) - - # Start - agent.start() - collector.start() - - await asyncio.sleep(1) - - # Test - use a simple query sender - sender = dimos.deploy(QuerySender) - sender.message_out.transport = core.pLCMTransport("/mock/messages") - agent.message_in.connect(sender.message_out) - - await asyncio.sleep(1) - - sender.send_query("What is 2+2?") - await asyncio.sleep(1) - - responses = collector.get_responses() - assert len(responses) == 1 - assert isinstance(responses[0], AgentResponse), "Expected AgentResponse object" - assert responses[0].content == "4" - - finally: - dimos.close() - dimos.shutdown() - - -if __name__ == "__main__": - asyncio.run(test_mock_agent_module()) diff --git a/dimos/agents2/agent.py b/dimos/agents2/agent.py index 04c08b0434..dffa7a4bcb 100644 --- a/dimos/agents2/agent.py +++ b/dimos/agents2/agent.py @@ -270,7 +270,6 @@ def _get_state() -> str: # we are getting tools from the coordinator on each turn # since this allows for skillcontainers to dynamically provide new skills tools = self.get_tools() - print("Available tools:", [tool.name for tool in tools]) self._llm = self._llm.bind_tools(tools) # publish to /agent topic for observability diff --git a/dimos/agents2/fixtures/test_pounce.json b/dimos/agents2/fixtures/test_pounce.json new file mode 100644 index 0000000000..99e84d003a --- /dev/null +++ b/dimos/agents2/fixtures/test_pounce.json @@ -0,0 +1,38 @@ +{ + "responses": [ + { + "content": "", + "tool_calls": [ + { + "name": "execute_sport_command", + "args": { + "args": [ + "FrontPounce" + ] + }, + "id": "call_Ukj6bCAnHQLj28RHRp697blZ", + "type": "tool_call" + } + ] + }, + { + "content": "", + "tool_calls": [ + { + "name": "speak", + "args": { + "args": [ + "I have successfully performed a front pounce." + ] + }, + "id": "call_FR9DtqEvJ9zSY85qVD2UFrll", + "type": "tool_call" + } + ] + }, + { + "content": "I have successfully performed a front pounce.", + "tool_calls": [] + } + ] +} diff --git a/dimos/agents2/fixtures/test_show_your_love.json b/dimos/agents2/fixtures/test_show_your_love.json new file mode 100644 index 0000000000..941906e781 --- /dev/null +++ b/dimos/agents2/fixtures/test_show_your_love.json @@ -0,0 +1,38 @@ +{ + "responses": [ + { + "content": "", + "tool_calls": [ + { + "name": "execute_sport_command", + "args": { + "args": [ + "FingerHeart" + ] + }, + "id": "call_VFp6x9F00FBmiiUiemFWewop", + "type": "tool_call" + } + ] + }, + { + "content": "", + "tool_calls": [ + { + "name": "speak", + "args": { + "args": [ + "Here's a gesture to show you some love!" + ] + }, + "id": "call_WUUmBJ95s9PtVx8YQsmlJ4EU", + "type": "tool_call" + } + ] + }, + { + "content": "Just did a finger heart gesture to show my affection!", + "tool_calls": [] + } + ] +} diff --git a/dimos/agents2/skills/conftest.py b/dimos/agents2/skills/conftest.py index a8734ca7ed..f7d1500847 100644 --- a/dimos/agents2/skills/conftest.py +++ b/dimos/agents2/skills/conftest.py @@ -15,17 +15,13 @@ from functools import partial import pytest -import reactivex as rx from reactivex.scheduler import ThreadPoolScheduler from dimos.agents2.skills.google_maps_skill_container import GoogleMapsSkillContainer from dimos.agents2.skills.gps_nav_skill import GpsNavSkillContainer from dimos.agents2.skills.navigation import NavigationSkillContainer from dimos.agents2.system_prompt import get_system_prompt -from dimos.mapping.types import LatLon -from dimos.msgs.sensor_msgs import Image -from dimos.robot.robot import GpsRobot -from dimos.utils.data import get_data +from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer system_prompt = get_system_prompt() @@ -45,31 +41,6 @@ def cleanup_threadpool_scheduler(monkeypatch): threadpool.scheduler = ThreadPoolScheduler(max_workers=threadpool.get_max_workers()) -# TODO: Delete -@pytest.fixture -def fake_robot(mocker): - return mocker.MagicMock() - - -# TODO: Delete -@pytest.fixture -def fake_gps_robot(mocker): - return mocker.Mock(spec=GpsRobot) - - -@pytest.fixture -def fake_video_stream(): - image_path = get_data("chair-image.png") - image = Image.from_file(str(image_path)) - return rx.of(image) - - -# TODO: Delete -@pytest.fixture -def fake_gps_position_stream(): - return rx.of(LatLon(lat=37.783, lon=-122.413)) - - @pytest.fixture def navigation_skill_container(mocker): container = NavigationSkillContainer() @@ -81,22 +52,35 @@ def navigation_skill_container(mocker): @pytest.fixture -def gps_nav_skill_container(fake_gps_robot, fake_gps_position_stream): - container = GpsNavSkillContainer(fake_gps_robot, fake_gps_position_stream) +def gps_nav_skill_container(mocker): + container = GpsNavSkillContainer() + container.gps_location.connection = mocker.MagicMock() + container.gps_goal = mocker.MagicMock() container.start() yield container container.stop() @pytest.fixture -def google_maps_skill_container(fake_gps_robot, fake_gps_position_stream, mocker): - container = GoogleMapsSkillContainer(fake_gps_robot, fake_gps_position_stream) +def google_maps_skill_container(mocker): + container = GoogleMapsSkillContainer() + container.gps_location.connection = mocker.MagicMock() container.start() container._client = mocker.MagicMock() yield container container.stop() +@pytest.fixture +def unitree_skills(mocker): + container = UnitreeSkillContainer() + container._move = mocker.Mock() + container._publish_request = mocker.Mock() + container.start() + yield container + container.stop() + + @pytest.fixture def create_navigation_agent(navigation_skill_container, create_fake_agent): return partial( @@ -122,3 +106,12 @@ def create_google_maps_agent( system_prompt=system_prompt, skill_containers=[gps_nav_skill_container, google_maps_skill_container], ) + + +@pytest.fixture +def create_unitree_skills_agent(unitree_skills, create_fake_agent): + return partial( + create_fake_agent, + system_prompt=system_prompt, + skill_containers=[unitree_skills], + ) diff --git a/dimos/agents2/skills/demo_google_maps_skill.py b/dimos/agents2/skills/demo_google_maps_skill.py new file mode 100644 index 0000000000..4bee8691a3 --- /dev/null +++ b/dimos/agents2/skills/demo_google_maps_skill.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dotenv import load_dotenv + +from dimos.agents2.agent import llm_agent +from dimos.agents2.cli.human import human_input +from dimos.agents2.skills.demo_robot import demo_robot +from dimos.agents2.skills.google_maps_skill_container import google_maps_skill +from dimos.agents2.system_prompt import get_system_prompt +from dimos.core.blueprints import autoconnect + +load_dotenv() + + +demo_google_maps_skill = autoconnect( + demo_robot(), + google_maps_skill(), + human_input(), + llm_agent(system_prompt=get_system_prompt()), +) diff --git a/dimos/agents2/skills/demo_gps_nav.py b/dimos/agents2/skills/demo_gps_nav.py new file mode 100644 index 0000000000..55ffd052ff --- /dev/null +++ b/dimos/agents2/skills/demo_gps_nav.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dotenv import load_dotenv + +from dimos.agents2.agent import llm_agent +from dimos.agents2.cli.human import human_input +from dimos.agents2.skills.demo_robot import demo_robot +from dimos.agents2.skills.gps_nav_skill import gps_nav_skill +from dimos.agents2.system_prompt import get_system_prompt +from dimos.core.blueprints import autoconnect + +load_dotenv() + + +demo_gps_nav_skill = autoconnect( + demo_robot(), + gps_nav_skill(), + human_input(), + llm_agent(system_prompt=get_system_prompt()), +) diff --git a/dimos/agents2/skills/demo_robot.py b/dimos/agents2/skills/demo_robot.py new file mode 100644 index 0000000000..74b5c47bd3 --- /dev/null +++ b/dimos/agents2/skills/demo_robot.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from reactivex import interval + +from dimos.core.module import Module +from dimos.core.stream import Out +from dimos.mapping.types import LatLon + + +class DemoRobot(Module): + gps_location: Out[LatLon] = None + + def start(self) -> None: + super().start() + self._disposables.add(interval(1.0).subscribe(lambda _: self._publish_gps_location())) + + def stop(self) -> None: + super().stop() + + def _publish_gps_location(self) -> None: + self.gps_location.publish(LatLon(lat=37.78092426217621, lon=-122.40682866540769)) + + +demo_robot = DemoRobot.blueprint + + +__all__ = ["DemoRobot", "demo_robot"] diff --git a/dimos/agents2/skills/google_maps_skill_container.py b/dimos/agents2/skills/google_maps_skill_container.py index 433914a5e3..f5c1af428e 100644 --- a/dimos/agents2/skills/google_maps_skill_container.py +++ b/dimos/agents2/skills/google_maps_skill_container.py @@ -15,43 +15,34 @@ import json from typing import Any -from reactivex import Observable -from reactivex.disposable import CompositeDisposable - -from dimos.core.resource import Resource +from dimos.core.core import rpc +from dimos.core.skill_module import SkillModule +from dimos.core.stream import In from dimos.mapping.google_maps.google_maps import GoogleMaps -from dimos.mapping.osm.current_location_map import CurrentLocationMap from dimos.mapping.types import LatLon -from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.robot.robot import Robot +from dimos.protocol.skill.skill import skill from dimos.utils.logging_config import setup_logger logger = setup_logger(__file__) -class GoogleMapsSkillContainer(SkillContainer, Resource): - _robot: Robot - _disposables: CompositeDisposable - _latest_location: LatLon | None - _position_stream: Observable[LatLon] - _current_location_map: CurrentLocationMap - _started: bool +class GoogleMapsSkillContainer(SkillModule): + _latest_location: LatLon | None = None + _client: GoogleMaps + + gps_location: In[LatLon] = None - def __init__(self, robot: Robot, position_stream: Observable[LatLon]) -> None: + def __init__(self) -> None: super().__init__() - self._robot = robot - self._disposables = CompositeDisposable() - self._latest_location = None - self._position_stream = position_stream self._client = GoogleMaps() - self._started = False + @rpc def start(self) -> None: - self._started = True - self._disposables.add(self._position_stream.subscribe(self._on_gps_location)) + super().start() + self._disposables.add(self.gps_location.subscribe(self._on_gps_location)) + @rpc def stop(self) -> None: - self._disposables.dispose() super().stop() def _on_gps_location(self, location: LatLon) -> None: @@ -75,9 +66,6 @@ def where_am_i(self, context_radius: int = 200) -> str: context_radius (int): default 200, how many meters to look around """ - if not self._started: - raise ValueError(f"{self} has not been started.") - location = self._get_latest_location() result = None @@ -105,9 +93,6 @@ def get_gps_position_for_queries(self, *queries: str) -> str: queries (list[str]): The places you want to look up. """ - if not self._started: - raise ValueError(f"{self} has not been started.") - location = self._get_latest_location() results: list[dict[str, Any] | str] = [] @@ -123,3 +108,8 @@ def get_gps_position_for_queries(self, *queries: str) -> str: results.append(f"no result for {query}") return json.dumps(results) + + +google_maps_skill = GoogleMapsSkillContainer.blueprint + +__all__ = ["GoogleMapsSkillContainer", "google_maps_skill"] diff --git a/dimos/agents2/skills/gps_nav_skill.py b/dimos/agents2/skills/gps_nav_skill.py index 80e346790a..43912b557d 100644 --- a/dimos/agents2/skills/gps_nav_skill.py +++ b/dimos/agents2/skills/gps_nav_skill.py @@ -14,48 +14,43 @@ import json -from reactivex import Observable -from reactivex.disposable import CompositeDisposable - -from dimos.core.resource import Resource -from dimos.mapping.google_maps.google_maps import GoogleMaps -from dimos.mapping.osm.current_location_map import CurrentLocationMap +from dimos.core.core import rpc +from dimos.core.rpc_client import RpcCall +from dimos.core.skill_module import SkillModule +from dimos.core.stream import In, Out from dimos.mapping.types import LatLon from dimos.mapping.utils.distance import distance_in_meters -from dimos.protocol.skill.skill import SkillContainer, skill -from dimos.robot.robot import Robot +from dimos.protocol.skill.skill import skill from dimos.utils.logging_config import setup_logger logger = setup_logger(__file__) -class GpsNavSkillContainer(SkillContainer, Resource): - _robot: Robot - _disposables: CompositeDisposable - _latest_location: LatLon | None - _position_stream: Observable[LatLon] - _current_location_map: CurrentLocationMap - _started: bool - _max_valid_distance: int +class GpsNavSkillContainer(SkillModule): + _latest_location: LatLon | None = None + _max_valid_distance: int = 50000 + _set_gps_travel_goal_points: RpcCall | None = None + + gps_location: In[LatLon] = None + gps_goal: Out[LatLon] = None - def __init__(self, robot: Robot, position_stream: Observable[LatLon]) -> None: + def __init__(self) -> None: super().__init__() - self._robot = robot - self._disposables = CompositeDisposable() - self._latest_location = None - self._position_stream = position_stream - self._client = GoogleMaps() - self._started = False - self._max_valid_distance = 50000 + @rpc def start(self) -> None: - self._started = True - self._disposables.add(self._position_stream.subscribe(self._on_gps_location)) + super().start() + self._disposables.add(self.gps_location.subscribe(self._on_gps_location)) + @rpc def stop(self) -> None: - self._disposables.dispose() super().stop() + @rpc + def set_WebsocketVisModule_set_gps_travel_goal_points(self, callable: RpcCall) -> None: + self._set_gps_travel_goal_points = callable + self._set_gps_travel_goal_points.set_rpc(self.rpc) + def _on_gps_location(self, location: LatLon) -> None: self._latest_location = location @@ -75,18 +70,23 @@ def set_gps_travel_points(self, *points: dict[str, float]) -> str: # then travel to {"lat": 37.7915, "lon": -122.4276} """ - if not self._started: - raise ValueError(f"{self} has not been started.") - new_points = [self._convert_point(x) for x in points] if not all(new_points): parsed = json.dumps([x.__dict__ if x else x for x in new_points]) return f"Not all points were valid. I parsed this: {parsed}" + for new_point in new_points: + distance = distance_in_meters(self._get_latest_location(), new_point) + if distance > self._max_valid_distance: + return f"Point {new_point} is too far ({int(distance)} meters away)." + logger.info(f"Set travel points: {new_points}") - self._robot.set_gps_travel_goal_points(new_points) + self.gps_goal.publish(new_points) + + if self._set_gps_travel_goal_points: + self._set_gps_travel_goal_points(new_points) return "I've successfully set the travel points." @@ -99,9 +99,10 @@ def _convert_point(self, point: dict[str, float]) -> LatLon | None: if lat is None or lon is None: return None - new_point = LatLon(lat=lat, lon=lon) - distance = distance_in_meters(self._get_latest_location(), new_point) - if distance > self._max_valid_distance: - return None + return LatLon(lat=lat, lon=lon) + + +gps_nav_skill = GpsNavSkillContainer.blueprint + - return new_point +__all__ = ["GpsNavSkillContainer", "gps_nav_skill"] diff --git a/dimos/agents2/skills/osm.py b/dimos/agents2/skills/osm.py index ae721bea81..d4455f14bd 100644 --- a/dimos/agents2/skills/osm.py +++ b/dimos/agents2/skills/osm.py @@ -28,7 +28,6 @@ class OsmSkill(SkillModule): _latest_location: LatLon | None _current_location_map: CurrentLocationMap - _skill_started: bool gps_location: In[LatLon] = None @@ -36,11 +35,9 @@ def __init__(self) -> None: super().__init__() self._latest_location = None self._current_location_map = CurrentLocationMap(QwenVlModel()) - self._skill_started = False def start(self) -> None: super().start() - self._skill_started = True self._disposables.add(self.gps_location.subscribe(self._on_gps_location)) def stop(self) -> None: @@ -63,9 +60,6 @@ def street_map_query(self, query_sentence: str) -> str: query_sentence (str): The query sentence. """ - if not self._skill_started: - raise ValueError(f"{self} has not been started.") - self._current_location_map.update_position(self._latest_location) location = self._current_location_map.query_for_one_position_and_context( query_sentence, self._latest_location diff --git a/dimos/agents2/skills/test_google_maps_skill_container.py b/dimos/agents2/skills/test_google_maps_skill_container.py index 27a9dadb8f..4f6b730b5f 100644 --- a/dimos/agents2/skills/test_google_maps_skill_container.py +++ b/dimos/agents2/skills/test_google_maps_skill_container.py @@ -15,9 +15,11 @@ import re from dimos.mapping.google_maps.types import Coordinates, LocationContext, Position +from dimos.mapping.types import LatLon def test_where_am_i(create_google_maps_agent, google_maps_skill_container) -> None: + google_maps_skill_container._latest_location = LatLon(lat=37.782654, lon=-122.413273) google_maps_skill_container._client.get_location_context.return_value = LocationContext( street="Bourbon Street", coordinates=Coordinates(lat=37.782654, lon=-122.413273) ) @@ -31,6 +33,7 @@ def test_where_am_i(create_google_maps_agent, google_maps_skill_container) -> No def test_get_gps_position_for_queries( create_google_maps_agent, google_maps_skill_container ) -> None: + google_maps_skill_container._latest_location = LatLon(lat=37.782654, lon=-122.413273) google_maps_skill_container._client.get_position.side_effect = [ Position(lat=37.782601, lon=-122.413201, description="address 1"), Position(lat=37.782602, lon=-122.413202, description="address 2"), diff --git a/dimos/agents2/skills/test_gps_nav_skills.py b/dimos/agents2/skills/test_gps_nav_skills.py index 9e8090b169..19cc8cb104 100644 --- a/dimos/agents2/skills/test_gps_nav_skills.py +++ b/dimos/agents2/skills/test_gps_nav_skills.py @@ -16,24 +16,40 @@ from dimos.mapping.types import LatLon -def test_set_gps_travel_points(fake_gps_robot, create_gps_nav_agent) -> None: +def test_set_gps_travel_points(create_gps_nav_agent, gps_nav_skill_container, mocker) -> None: + gps_nav_skill_container._latest_location = LatLon(lat=37.782654, lon=-122.413273) + gps_nav_skill_container._set_gps_travel_goal_points = mocker.Mock() agent = create_gps_nav_agent(fixture="test_set_gps_travel_points.json") agent.query("go to lat: 37.782654, lon: -122.413273") - fake_gps_robot.set_gps_travel_goal_points.assert_called_once_with( + gps_nav_skill_container._set_gps_travel_goal_points.assert_called_once_with( + [LatLon(lat=37.782654, lon=-122.413273)] + ) + gps_nav_skill_container.gps_goal.publish.assert_called_once_with( [LatLon(lat=37.782654, lon=-122.413273)] ) -def test_set_gps_travel_points_multiple(fake_gps_robot, create_gps_nav_agent) -> None: +def test_set_gps_travel_points_multiple( + create_gps_nav_agent, gps_nav_skill_container, mocker +) -> None: + gps_nav_skill_container._latest_location = LatLon(lat=37.782654, lon=-122.413273) + gps_nav_skill_container._set_gps_travel_goal_points = mocker.Mock() agent = create_gps_nav_agent(fixture="test_set_gps_travel_points_multiple.json") agent.query( "go to lat: 37.782654, lon: -122.413273, then 37.782660,-122.413260, and then 37.782670,-122.413270" ) - fake_gps_robot.set_gps_travel_goal_points.assert_called_once_with( + gps_nav_skill_container._set_gps_travel_goal_points.assert_called_once_with( + [ + LatLon(lat=37.782654, lon=-122.413273), + LatLon(lat=37.782660, lon=-122.413260), + LatLon(lat=37.782670, lon=-122.413270), + ] + ) + gps_nav_skill_container.gps_goal.publish.assert_called_once_with( [ LatLon(lat=37.782654, lon=-122.413273), LatLon(lat=37.782660, lon=-122.413260), diff --git a/dimos/agents2/skills/test_unitree_skill_container.py b/dimos/agents2/skills/test_unitree_skill_container.py new file mode 100644 index 0000000000..d9570341d8 --- /dev/null +++ b/dimos/agents2/skills/test_unitree_skill_container.py @@ -0,0 +1,42 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_pounce(create_unitree_skills_agent, unitree_skills) -> None: + agent = create_unitree_skills_agent(fixture="test_pounce.json") + + response = agent.query("pounce") + + assert "front pounce" in response.lower() + unitree_skills._publish_request.assert_called_once_with( + "rt/api/sport/request", {"api_id": 1032} + ) + + +def test_show_your_love(create_unitree_skills_agent, unitree_skills) -> None: + agent = create_unitree_skills_agent(fixture="test_show_your_love.json") + + response = agent.query("show your love") + + assert "finger heart" in response.lower() + unitree_skills._publish_request.assert_called_once_with( + "rt/api/sport/request", {"api_id": 1036} + ) + + +def test_did_you_mean(unitree_skills) -> None: + assert ( + unitree_skills.execute_sport_command("Pounce") + == "There's no 'Pounce' command. Did you mean: ['FrontPounce', 'Pose']" + ) diff --git a/dimos/agents2/temp/run_unitree_agents2.py b/dimos/agents2/temp/run_unitree_agents2.py deleted file mode 100644 index aacfd1b5f4..0000000000 --- a/dimos/agents2/temp/run_unitree_agents2.py +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Run script for Unitree Go2 robot with agents2 framework. -This is the migrated version using the new LangChain-based agent system. -""" - -import os -from pathlib import Path -import sys -import time - -from dotenv import load_dotenv - -from dimos.agents2.cli.human import HumanInput - -# Add parent directories to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) - - -from dimos.agents2 import Agent -from dimos.agents2.spec import Model, Provider -from dimos.robot.unitree_webrtc.unitree_go2 import UnitreeGo2 -from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer -from dimos.utils.logging_config import setup_logger - -logger = setup_logger("dimos.agents2.run_unitree") - -# Load environment variables -load_dotenv() - -# System prompt path -SYSTEM_PROMPT_PATH = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "assets/agent/prompt.txt", -) - - -class UnitreeAgentRunner: - """Manages the Unitree robot with the new agents2 framework.""" - - def __init__(self) -> None: - self.robot = None - self.agent = None - self.agent_thread = None - self.running = False - - def setup_robot(self) -> UnitreeGo2: - """Initialize the robot connection.""" - logger.info("Initializing Unitree Go2 robot...") - - robot = UnitreeGo2( - ip=os.getenv("ROBOT_IP"), - connection_type=os.getenv("CONNECTION_TYPE", "webrtc"), - ) - - robot.start() - time.sleep(3) - - logger.info("Robot initialized successfully") - return robot - - def setup_agent(self, skillcontainers, system_prompt: str) -> Agent: - """Create and configure the agent with skills.""" - logger.info("Setting up agent with skills...") - - # Create agent - agent = Agent( - system_prompt=system_prompt, - model=Model.GPT_4O, # Could add CLAUDE models to enum - provider=Provider.OPENAI, # Would need ANTHROPIC provider - ) - - for container in skillcontainers: - print("REGISTERING SKILLS FROM CONTAINER:", container) - agent.register_skills(container) - - agent.run_implicit_skill("human") - - agent.start() - - # Log available skills - names = ", ".join([tool.name for tool in agent.get_tools()]) - logger.info(f"Agent configured with {len(names)} skills: {names}") - - agent.loop_thread() - return agent - - def run(self) -> None: - """Main run loop.""" - print("\n" + "=" * 60) - print("Unitree Go2 Robot with agents2 Framework") - print("=" * 60) - print("\nThis system integrates:") - print(" - Unitree Go2 quadruped robot") - print(" - WebRTC communication interface") - print(" - LangChain-based agent system (agents2)") - print(" - Converted skill system with @skill decorators") - print("\nStarting system...\n") - - # Check for API key (would need ANTHROPIC_API_KEY for Claude) - if not os.getenv("OPENAI_API_KEY"): - print("WARNING: OPENAI_API_KEY not found in environment") - print("Please set your API key in .env file or environment") - print("(Note: Full Claude support would require ANTHROPIC_API_KEY)") - sys.exit(1) - - system_prompt = """You are a helpful robot assistant controlling a Unitree Go2 quadruped robot. -You can move, navigate, speak, and perform various actions. Be helpful and friendly.""" - - try: - # Setup components - self.robot = self.setup_robot() - - self.agent = self.setup_agent( - [ - UnitreeSkillContainer(self.robot), - HumanInput(), - ], - system_prompt, - ) - - # Start handling queries - self.running = True - - logger.info("=" * 60) - logger.info("Unitree Go2 Agent Ready (agents2 framework)!") - logger.info("You can:") - logger.info(" - Type commands in the human cli") - logger.info(" - Ask the robot to move or navigate") - logger.info(" - Ask the robot to perform actions (sit, stand, dance, etc.)") - logger.info(" - Ask the robot to speak text") - logger.info("=" * 60) - - while True: - time.sleep(1) - except KeyboardInterrupt: - logger.info("Keyboard interrupt received") - except Exception as e: - logger.error(f"Error running robot: {e}") - import traceback - - traceback.print_exc() - # finally: - # self.shutdown() - - def shutdown(self) -> None: - logger.info("Shutting down...") - self.running = False - - if self.agent: - try: - self.agent.stop() - logger.info("Agent stopped") - except Exception as e: - logger.error(f"Error stopping agent: {e}") - - if self.robot: - try: - self.robot.stop() - logger.info("Robot connection closed") - except Exception as e: - logger.error(f"Error stopping robot: {e}") - - logger.info("Shutdown complete") - - -def main() -> None: - runner = UnitreeAgentRunner() - runner.run() - - -if __name__ == "__main__": - main() diff --git a/dimos/agents2/temp/run_unitree_async.py b/dimos/agents2/temp/run_unitree_async.py deleted file mode 100644 index 29213c1c90..0000000000 --- a/dimos/agents2/temp/run_unitree_async.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Async version of the Unitree run file for agents2. -Properly handles the async nature of the agent. -""" - -import asyncio -import os -from pathlib import Path -import sys - -from dotenv import load_dotenv - -# Add parent directories to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) - -from dimos.agents2 import Agent -from dimos.agents2.spec import Model, Provider -from dimos.robot.unitree_webrtc.unitree_go2 import UnitreeGo2 -from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer -from dimos.utils.logging_config import setup_logger - -logger = setup_logger("run_unitree_async") - -# Load environment variables -load_dotenv() - -# System prompt path -SYSTEM_PROMPT_PATH = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "assets/agent/prompt.txt", -) - - -async def handle_query(agent, query_text): - """Handle a single query asynchronously.""" - logger.info(f"Processing query: {query_text}") - - try: - # Use query_async which returns a Future - future = agent.query_async(query_text) - - # Wait for the result (with timeout) - await asyncio.wait_for(asyncio.wrap_future(future), timeout=30.0) - - # Get the result - if future.done(): - result = future.result() - logger.info(f"Agent response: {result}") - return result - else: - logger.warning("Query did not complete") - return "Query timeout" - - except asyncio.TimeoutError: - logger.error("Query timed out after 30 seconds") - return "Query timeout" - except Exception as e: - logger.error(f"Error processing query: {e}") - return f"Error: {e!s}" - - -async def interactive_loop(agent) -> None: - """Run an interactive query loop.""" - print("\n" + "=" * 60) - print("Interactive Agent Mode") - print("Type your commands or 'quit' to exit") - print("=" * 60 + "\n") - - while True: - try: - # Get user input - query = input("\nYou: ").strip() - - if query.lower() in ["quit", "exit", "q"]: - break - - if not query: - continue - - # Process query - response = await handle_query(agent, query) - print(f"\nAgent: {response}") - - except KeyboardInterrupt: - break - except Exception as e: - logger.error(f"Error in interactive loop: {e}") - - -async def main() -> None: - """Main async function.""" - print("\n" + "=" * 60) - print("Unitree Go2 Robot with agents2 Framework (Async)") - print("=" * 60) - - # Check for API key - if not os.getenv("OPENAI_API_KEY"): - print("ERROR: OPENAI_API_KEY not found") - print("Set your API key in .env file or environment") - sys.exit(1) - - # Load system prompt - try: - with open(SYSTEM_PROMPT_PATH) as f: - system_prompt = f.read() - except FileNotFoundError: - system_prompt = """You are a helpful robot assistant controlling a Unitree Go2 robot. -You have access to various movement and control skills. Be helpful and concise.""" - - # Initialize robot (optional - comment out if no robot) - robot = None - if os.getenv("ROBOT_IP"): - try: - logger.info("Connecting to robot...") - robot = UnitreeGo2( - ip=os.getenv("ROBOT_IP"), - connection_type=os.getenv("CONNECTION_TYPE", "webrtc"), - ) - robot.start() - await asyncio.sleep(3) - logger.info("Robot connected") - except Exception as e: - logger.warning(f"Could not connect to robot: {e}") - logger.info("Continuing without robot...") - - # Create skill container - skill_container = UnitreeSkillContainer(robot=robot) - - # Create agent - agent = Agent( - system_prompt=system_prompt, - model=Model.GPT_4O_MINI, # Using mini for faster responses - provider=Provider.OPENAI, - ) - - # Register skills and start - agent.register_skills(skill_container) - agent.start() - - # Log available skills - skills = skill_container.skills() - logger.info(f"Agent initialized with {len(skills)} skills") - - # Test query - print("\n--- Testing agent query ---") - test_response = await handle_query(agent, "Hello! Can you list 5 of your movement skills?") - print(f"Test response: {test_response}\n") - - # Run interactive loop - try: - await interactive_loop(agent) - except KeyboardInterrupt: - logger.info("Interrupted by user") - - # Clean up - logger.info("Shutting down...") - agent.stop() - if robot: - logger.info("Robot disconnected") - - print("\nGoodbye!") - - -if __name__ == "__main__": - # Run the async main function - asyncio.run(main()) diff --git a/dimos/agents2/temp/test_unitree_agent_query.py b/dimos/agents2/temp/test_unitree_agent_query.py deleted file mode 100644 index 4990940e6c..0000000000 --- a/dimos/agents2/temp/test_unitree_agent_query.py +++ /dev/null @@ -1,229 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Test script to debug agent query issues. -Shows different ways to call the agent and handle async. -""" - -import asyncio -import os -from pathlib import Path -import sys -import time - -from dotenv import load_dotenv - -# Add parent directories to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) - -from dimos.agents2 import Agent -from dimos.agents2.spec import Model, Provider -from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer -from dimos.utils.logging_config import setup_logger - -logger = setup_logger("test_agent_query") - -# Load environment variables -load_dotenv() - - -async def test_async_query(): - """Test agent query using async/await pattern.""" - print("\n=== Testing Async Query ===\n") - - # Create skill container - container = UnitreeSkillContainer(robot=None) - - # Create agent - agent = Agent( - system_prompt="You are a helpful robot assistant. List 3 skills you can do.", - model=Model.GPT_4O_MINI, - provider=Provider.OPENAI, - ) - - # Register skills and start - agent.register_skills(container) - agent.start() - - # Query asynchronously - logger.info("Sending async query...") - future = agent.query_async("Hello! What skills do you have?") - - # Wait for result - logger.info("Waiting for response...") - await asyncio.sleep(10) # Give it time to process - - # Check if future is done - if hasattr(future, "done") and future.done(): - try: - result = future.result() - logger.info(f"Got result: {result}") - except Exception as e: - logger.error(f"Future failed: {e}") - else: - logger.warning("Future not completed yet") - - agent.stop() - - return future - - -def test_sync_query_with_thread() -> None: - """Test agent query using threading for the event loop.""" - print("\n=== Testing Sync Query with Thread ===\n") - - import threading - - # Create skill container - container = UnitreeSkillContainer(robot=None) - - # Create agent - agent = Agent( - system_prompt="You are a helpful robot assistant. List 3 skills you can do.", - model=Model.GPT_4O_MINI, - provider=Provider.OPENAI, - ) - - # Register skills and start - agent.register_skills(container) - agent.start() - - # Track the thread we might create - loop_thread = None - - # The agent's event loop should be running in the Module's thread - # Let's check if it's running - if agent._loop and agent._loop.is_running(): - logger.info("Agent's event loop is running") - else: - logger.warning("Agent's event loop is NOT running - this is the problem!") - - # Try to run the loop in a thread - def run_loop() -> None: - asyncio.set_event_loop(agent._loop) - agent._loop.run_forever() - - loop_thread = threading.Thread(target=run_loop, daemon=False, name="EventLoopThread") - loop_thread.start() - time.sleep(1) # Give loop time to start - logger.info("Started event loop in thread") - - # Now try the query - try: - logger.info("Sending sync query...") - result = agent.query("Hello! What skills do you have?") - logger.info(f"Got result: {result}") - except Exception as e: - logger.error(f"Query failed: {e}") - import traceback - - traceback.print_exc() - - agent.stop() - - # Then stop the manually created event loop thread if we created one - if loop_thread and loop_thread.is_alive(): - logger.info("Stopping manually created event loop thread...") - # Stop the event loop - if agent._loop and agent._loop.is_running(): - agent._loop.call_soon_threadsafe(agent._loop.stop) - # Wait for thread to finish - loop_thread.join(timeout=5) - if loop_thread.is_alive(): - logger.warning("Thread did not stop cleanly within timeout") - - # Finally close the container - container._close_module() - - -# def test_with_real_module_system(): -# """Test using the real DimOS module system (like in test_agent.py).""" -# print("\n=== Testing with Module System ===\n") - -# from dimos.core import start - -# # Start the DimOS system -# dimos = start(2) - -# # Deploy container and agent as modules -# container = dimos.deploy(UnitreeSkillContainer, robot=None) -# agent = dimos.deploy( -# Agent, -# system_prompt="You are a helpful robot assistant. List 3 skills you can do.", -# model=Model.GPT_4O_MINI, -# provider=Provider.OPENAI, -# ) - -# # Register skills -# agent.register_skills(container) -# agent.start() - -# # Query -# try: -# logger.info("Sending query through module system...") -# future = agent.query_async("Hello! What skills do you have?") - -# # In the module system, the loop should be running -# time.sleep(5) # Wait for processing - -# if hasattr(future, "result"): -# result = future.result(timeout=10) -# logger.info(f"Got result: {result}") -# except Exception as e: -# logger.error(f"Query failed: {e}") - -# # Clean up -# agent.stop() -# dimos.stop() - - -def main() -> None: - """Run tests based on available API key.""" - - if not os.getenv("OPENAI_API_KEY"): - print("ERROR: OPENAI_API_KEY not set") - print("Please set your OpenAI API key to test the agent") - sys.exit(1) - - print("=" * 60) - print("Agent Query Testing") - print("=" * 60) - - # Test 1: Async query - try: - asyncio.run(test_async_query()) - except Exception as e: - logger.error(f"Async test failed: {e}") - - # Test 2: Sync query with threading - try: - test_sync_query_with_thread() - except Exception as e: - logger.error(f"Sync test failed: {e}") - - # Test 3: Module system (optional - more complex) - # try: - # test_with_real_module_system() - # except Exception as e: - # logger.error(f"Module test failed: {e}") - - print("\n" + "=" * 60) - print("Testing complete") - print("=" * 60) - - -if __name__ == "__main__": - main() diff --git a/dimos/agents2/temp/test_unitree_skill_container.py b/dimos/agents2/temp/test_unitree_skill_container.py deleted file mode 100644 index 16502004ff..0000000000 --- a/dimos/agents2/temp/test_unitree_skill_container.py +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Test file for UnitreeSkillContainer with agents2 framework. -Tests skill registration and basic functionality. -""" - -from pathlib import Path -import sys -import time - -# Add parent directories to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) - -from dimos.agents2 import Agent -from dimos.agents2.spec import Model, Provider -from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer -from dimos.utils.logging_config import setup_logger - -logger = setup_logger("test_unitree_skills") - - -def test_skill_container_creation(): - """Test that the skill container can be created and skills are registered.""" - print("\n=== Testing UnitreeSkillContainer Creation ===") - - # Create container without robot (for testing) - container = UnitreeSkillContainer(robot=None) - - try: - # Get available skills from the container - skills = container.skills() - - print(f"Number of skills registered: {len(skills)}") - print("\nAvailable skills:") - for name, skill_config in list(skills.items())[:10]: # Show first 10 - print( - f" - {name}: {skill_config.description if hasattr(skill_config, 'description') else 'No description'}" - ) - if len(skills) > 10: - print(f" ... and {len(skills) - 10} more skills") - - return container, skills - finally: - # Ensure proper cleanup - container._close_module() - # Small delay to allow threads to finish cleanup - time.sleep(0.1) - - -def test_agent_with_skills(): - """Test that an agent can be created with the skill container.""" - print("\n=== Testing Agent with Skills ===") - - # Create skill container - container = UnitreeSkillContainer(robot=None) - agent = None - - try: - # Create agent with configuration passed directly - agent = Agent( - system_prompt="You are a helpful robot assistant that can control a Unitree Go2 robot.", - model=Model.GPT_4O_MINI, - provider=Provider.OPENAI, - ) - - # Register skills - agent.register_skills(container) - - print("Agent created and skills registered successfully!") - - # Get tools to verify - tools = agent.get_tools() - print(f"Agent has access to {len(tools)} tools") - - return agent - finally: - # Ensure proper cleanup in order - if agent: - agent.stop() - container._close_module() - # Small delay to allow threads to finish cleanup - time.sleep(0.1) - - -def test_skill_schemas() -> None: - """Test that skill schemas are properly generated for LangChain.""" - print("\n=== Testing Skill Schemas ===") - - container = UnitreeSkillContainer(robot=None) - - try: - skills = container.skills() - - # Check a few key skills (using snake_case names now) - skill_names = ["move", "wait", "stand_up", "sit", "front_flip", "dance1"] - - for name in skill_names: - if name in skills: - skill_config = skills[name] - print(f"\n{name} skill:") - print(f" Config: {skill_config}") - if hasattr(skill_config, "schema"): - print( - f" Schema keys: {skill_config.schema.keys() if skill_config.schema else 'None'}" - ) - else: - print(f"\nWARNING: Skill '{name}' not found!") - finally: - # Ensure proper cleanup - container._close_module() - # Small delay to allow threads to finish cleanup - time.sleep(0.1) diff --git a/dimos/core/test_blueprints.py b/dimos/core/test_blueprints.py index 59f541aa58..d910e88d7d 100644 --- a/dimos/core/test_blueprints.py +++ b/dimos/core/test_blueprints.py @@ -185,7 +185,7 @@ def test_build_happy_path() -> None: coordinator.stop() -def test_remapping(): +def test_remapping() -> None: """Test that remapping connections works correctly.""" pubsub.lcm.autoconf() diff --git a/dimos/core/transport.py b/dimos/core/transport.py index 32f75e6c33..48a1bc141d 100644 --- a/dimos/core/transport.py +++ b/dimos/core/transport.py @@ -101,7 +101,7 @@ def subscribe(self, callback: Callable[[T], None], selfstream: In[T] = None) -> class JpegLcmTransport(LCMTransport): - def __init__(self, topic: str, type: type, **kwargs): + def __init__(self, topic: str, type: type, **kwargs) -> None: self.lcm = JpegLCM(**kwargs) super().__init__(topic, type) @@ -160,7 +160,7 @@ def subscribe(self, callback: Callable[[T], None], selfstream: In[T] = None) -> class JpegShmTransport(PubSubTransport[T]): _started: bool = False - def __init__(self, topic: str, quality: int = 75, **kwargs): + def __init__(self, topic: str, quality: int = 75, **kwargs) -> None: super().__init__(topic) self.shm = JpegSharedMemory(quality=quality, **kwargs) self.quality = quality @@ -168,7 +168,7 @@ def __init__(self, topic: str, quality: int = 75, **kwargs): def __reduce__(self): return (JpegShmTransport, (self.topic, self.quality)) - def broadcast(self, _, msg): + def broadcast(self, _, msg) -> None: if not self._started: self.shm.start() self._started = True diff --git a/dimos/mapping/osm/demo_osm.py b/dimos/mapping/osm/demo_osm.py index cf907378f3..20d9e40e74 100644 --- a/dimos/mapping/osm/demo_osm.py +++ b/dimos/mapping/osm/demo_osm.py @@ -14,37 +14,17 @@ # limitations under the License. from dotenv import load_dotenv -from reactivex import interval from dimos.agents2.agent import llm_agent from dimos.agents2.cli.human import human_input +from dimos.agents2.skills.demo_robot import demo_robot from dimos.agents2.skills.osm import osm_skill from dimos.agents2.system_prompt import get_system_prompt from dimos.core.blueprints import autoconnect -from dimos.core.module import Module -from dimos.core.stream import Out -from dimos.mapping.types import LatLon load_dotenv() -class DemoRobot(Module): - gps_location: Out[LatLon] = None - - def start(self) -> None: - super().start() - self._disposables.add(interval(1.0).subscribe(lambda _: self._publish_gps_location())) - - def stop(self) -> None: - super().stop() - - def _publish_gps_location(self) -> None: - self.gps_location.publish(LatLon(lat=37.78092426217621, lon=-122.40682866540769)) - - -demo_robot = DemoRobot.blueprint - - demo_osm = autoconnect( demo_robot(), osm_skill(), diff --git a/dimos/models/depth/metric3d.py b/dimos/models/depth/metric3d.py index e22c546dc3..0c10f31e63 100644 --- a/dimos/models/depth/metric3d.py +++ b/dimos/models/depth/metric3d.py @@ -13,8 +13,6 @@ # limitations under the License. import cv2 -import numpy as np -from PIL import Image import torch # May need to add this back for import to work diff --git a/dimos/models/embedding/base.py b/dimos/models/embedding/base.py index 99a8d8fd15..f7c790ffbf 100644 --- a/dimos/models/embedding/base.py +++ b/dimos/models/embedding/base.py @@ -16,7 +16,7 @@ from abc import ABC, abstractmethod import time -from typing import TYPE_CHECKING, Generic, Optional, TypeVar +from typing import TYPE_CHECKING, Generic, TypeVar import numpy as np import torch diff --git a/dimos/models/manipulation/contact_graspnet_pytorch/inference.py b/dimos/models/manipulation/contact_graspnet_pytorch/inference.py index 4241392d8e..fe173dc017 100644 --- a/dimos/models/manipulation/contact_graspnet_pytorch/inference.py +++ b/dimos/models/manipulation/contact_graspnet_pytorch/inference.py @@ -6,9 +6,7 @@ from contact_graspnet_pytorch.checkpoints import CheckpointIO from contact_graspnet_pytorch.contact_grasp_estimator import GraspEstimator from contact_graspnet_pytorch.data import load_available_input_data -from contact_graspnet_pytorch.visualization_utils_o3d import show_image, visualize_grasps import numpy as np -import torch from dimos.utils.data import get_data diff --git a/dimos/models/manipulation/contact_graspnet_pytorch/test_contact_graspnet.py b/dimos/models/manipulation/contact_graspnet_pytorch/test_contact_graspnet.py index b006c98603..7964a24954 100644 --- a/dimos/models/manipulation/contact_graspnet_pytorch/test_contact_graspnet.py +++ b/dimos/models/manipulation/contact_graspnet_pytorch/test_contact_graspnet.py @@ -1,7 +1,5 @@ import glob -import importlib.util import os -import sys import numpy as np import pytest diff --git a/dimos/models/qwen/video_query.py b/dimos/models/qwen/video_query.py index 0f8a3b8f9c..80bb078bac 100644 --- a/dimos/models/qwen/video_query.py +++ b/dimos/models/qwen/video_query.py @@ -2,7 +2,6 @@ import json import os -from typing import Optional, Tuple import numpy as np from openai import OpenAI diff --git a/dimos/models/vl/moondream.py b/dimos/models/vl/moondream.py index ce63c70238..781f1adbf1 100644 --- a/dimos/models/vl/moondream.py +++ b/dimos/models/vl/moondream.py @@ -1,5 +1,4 @@ from functools import cached_property -from typing import Optional import warnings import numpy as np diff --git a/dimos/models/vl/qwen.py b/dimos/models/vl/qwen.py index c302d12c22..773fcc35ad 100644 --- a/dimos/models/vl/qwen.py +++ b/dimos/models/vl/qwen.py @@ -1,6 +1,5 @@ from functools import cached_property import os -from typing import Optional import numpy as np from openai import OpenAI diff --git a/dimos/msgs/sensor_msgs/Image.py b/dimos/msgs/sensor_msgs/Image.py index 051169d6a9..b9ffc6a65b 100644 --- a/dimos/msgs/sensor_msgs/Image.py +++ b/dimos/msgs/sensor_msgs/Image.py @@ -430,7 +430,7 @@ def lcm_decode(cls, data: bytes, **kwargs) -> Image: ) ) - def lcm_jpeg_encode(self, quality: int = 75, frame_id: Optional[str] = None) -> bytes: + def lcm_jpeg_encode(self, quality: int = 75, frame_id: str | None = None) -> bytes: """Convert to LCM Image message with JPEG-compressed data. Args: diff --git a/dimos/perception/spatial_perception.py b/dimos/perception/spatial_perception.py index 7d00ee67f9..7a96939431 100644 --- a/dimos/perception/spatial_perception.py +++ b/dimos/perception/spatial_perception.py @@ -38,7 +38,7 @@ from dimos.utils.logging_config import setup_logger if TYPE_CHECKING: - from dimos.msgs.geometry_msgs import PoseStamped, Vector3 + from dimos.msgs.geometry_msgs import Vector3 _OUTPUT_DIR = DIMOS_PROJECT_ROOT / "assets" / "output" _MEMORY_DIR = _OUTPUT_DIR / "memory" diff --git a/dimos/protocol/pubsub/lcmpubsub.py b/dimos/protocol/pubsub/lcmpubsub.py index ef158ffb30..c348b1dda7 100644 --- a/dimos/protocol/pubsub/lcmpubsub.py +++ b/dimos/protocol/pubsub/lcmpubsub.py @@ -121,7 +121,7 @@ def decode(self, msg: bytes, topic: Topic) -> LCMMsg: class JpegSharedMemoryEncoderMixin(PubSubEncoderMixin[str, Image]): - def __init__(self, quality: int = 75, **kwargs): + def __init__(self, quality: int = 75, **kwargs) -> None: super().__init__(**kwargs) self.jpeg = TurboJPEG() self.quality = quality diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py index c177723e66..6a26c4161c 100644 --- a/dimos/robot/all_blueprints.py +++ b/dimos/robot/all_blueprints.py @@ -23,6 +23,8 @@ "unitree-go2-jpeglcm": "dimos.robot.unitree_webrtc.unitree_go2_blueprints:standard_with_jpeglcm", "unitree-go2-agentic": "dimos.robot.unitree_webrtc.unitree_go2_blueprints:agentic", "demo-osm": "dimos.mapping.osm.demo_osm:demo_osm", + "demo-gps-nav": "dimos.agents2.skills.demo_gps_nav:demo_gps_nav_skill", + "demo-google-maps-skill": "dimos.agents2.skills.demo_google_maps_skill:demo_google_maps_skill", "demo-remapping": "dimos.robot.unitree_webrtc.demo_remapping:remapping", "demo-remapping-transport": "dimos.robot.unitree_webrtc.demo_remapping:remapping_and_transport", } diff --git a/dimos/robot/robot.py b/dimos/robot/robot.py index 002dcb4710..c67058ff91 100644 --- a/dimos/robot/robot.py +++ b/dimos/robot/robot.py @@ -14,13 +14,8 @@ """Minimal robot interface for DIMOS robots.""" -from abc import ABC, abstractmethod +from abc import ABC -from reactivex import Observable - -from dimos.mapping.types import LatLon -from dimos.msgs.geometry_msgs import PoseStamped -from dimos.perception.spatial_perception import SpatialMemory from dimos.types.robot_capabilities import RobotCapability @@ -62,32 +57,3 @@ def cleanup(self) -> None: Override this method to provide cleanup logic. """ pass - - -# TODO: Delete -class UnitreeRobot(Robot): - @abstractmethod - def get_odom(self) -> PoseStamped: ... - - @abstractmethod - def explore(self) -> bool: ... - - @abstractmethod - def stop_exploration(self) -> bool: ... - - @abstractmethod - def is_exploration_active(self) -> bool: ... - - @property - @abstractmethod - def spatial_memory(self) -> SpatialMemory | None: ... - - -# TODO: Delete -class GpsRobot(ABC): - @property - @abstractmethod - def gps_position_stream(self) -> Observable[LatLon]: ... - - @abstractmethod - def set_gps_travel_goal_points(self, points: list[LatLon]) -> None: ... diff --git a/dimos/robot/unitree_webrtc/mujoco_connection.py b/dimos/robot/unitree_webrtc/mujoco_connection.py index b68097ea33..aa11e90eec 100644 --- a/dimos/robot/unitree_webrtc/mujoco_connection.py +++ b/dimos/robot/unitree_webrtc/mujoco_connection.py @@ -233,4 +233,4 @@ def move(self, twist: Twist, duration: float = 0.0) -> None: self.mujoco_thread.move(twist, duration) def publish_request(self, topic: str, data: dict) -> None: - pass + print(f"publishing request, topic={topic}, data={data}") diff --git a/dimos/robot/unitree_webrtc/unitree_go2.py b/dimos/robot/unitree_webrtc/unitree_go2.py index b91433ead8..dff3502ead 100644 --- a/dimos/robot/unitree_webrtc/unitree_go2.py +++ b/dimos/robot/unitree_webrtc/unitree_go2.py @@ -54,7 +54,6 @@ from dimos.protocol.pubsub.lcmpubsub import LCM from dimos.protocol.tf import TF from dimos.robot.foxglove_bridge import FoxgloveBridge -from dimos.robot.robot import UnitreeRobot from dimos.robot.unitree_webrtc.connection import UnitreeWebRTCConnection from dimos.robot.unitree_webrtc.type.lidar import LidarMessage from dimos.robot.unitree_webrtc.type.map import Map @@ -328,7 +327,7 @@ def publish_request(self, topic: str, data: dict): connection = ConnectionModule.blueprint -class UnitreeGo2(UnitreeRobot, Resource): +class UnitreeGo2(Resource): """Full Unitree Go2 robot with navigation and perception capabilities.""" _dimos: ModuleCoordinator diff --git a/dimos/robot/unitree_webrtc/unitree_go2_blueprints.py b/dimos/robot/unitree_webrtc/unitree_go2_blueprints.py index 60022e3cfb..7c68a69efe 100644 --- a/dimos/robot/unitree_webrtc/unitree_go2_blueprints.py +++ b/dimos/robot/unitree_webrtc/unitree_go2_blueprints.py @@ -19,7 +19,7 @@ from dimos.agents2.agent import llm_agent from dimos.agents2.cli.human import human_input from dimos.agents2.skills.navigation import navigation_skill -from dimos.constants import DEFAULT_CAPACITY_COLOR_IMAGE, DEFAULT_CAPACITY_DEPTH_IMAGE +from dimos.constants import DEFAULT_CAPACITY_COLOR_IMAGE from dimos.core.blueprints import autoconnect from dimos.core.transport import JpegLcmTransport, JpegShmTransport, LCMTransport, pSHMTransport from dimos.msgs.geometry_msgs import PoseStamped @@ -37,9 +37,9 @@ from dimos.perception.object_tracker import object_tracking from dimos.perception.spatial_perception import spatial_memory from dimos.robot.foxglove_bridge import foxglove_bridge -from dimos.robot.unitree_webrtc.depth_module import depth_module from dimos.robot.unitree_webrtc.type.map import mapper from dimos.robot.unitree_webrtc.unitree_go2 import connection +from dimos.robot.unitree_webrtc.unitree_skill_container import unitree_skills from dimos.utils.monitoring import utilization from dimos.web.websocket_vis.websocket_vis_module import websocket_vis @@ -112,4 +112,5 @@ llm_agent(), human_input(), navigation_skill(), + unitree_skills(), ) diff --git a/dimos/robot/unitree_webrtc/unitree_skill_container.py b/dimos/robot/unitree_webrtc/unitree_skill_container.py index e6179adcbb..f782916db4 100644 --- a/dimos/robot/unitree_webrtc/unitree_skill_container.py +++ b/dimos/robot/unitree_webrtc/unitree_skill_container.py @@ -12,21 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Unitree skill container for the new agents2 framework. -Dynamically generates skills from UNITREE_WEBRTC_CONTROLS list. -""" - from __future__ import annotations import datetime +import difflib import time from typing import TYPE_CHECKING from go2_webrtc_driver.constants import RTC_TOPIC -from dimos.core import Module from dimos.core.core import rpc +from dimos.core.skill_module import SkillModule from dimos.msgs.geometry_msgs import Twist, Vector3 from dimos.protocol.skill.skill import skill from dimos.protocol.skill.type import Reducer, Stream @@ -34,25 +30,23 @@ from dimos.utils.logging_config import setup_logger if TYPE_CHECKING: - from dimos.robot.unitree_webrtc.unitree_go2 import UnitreeGo2 + from dimos.core.rpc_client import RpcCall logger = setup_logger("dimos.robot.unitree_webrtc.unitree_skill_container") -class UnitreeSkillContainer(Module): - """Container for Unitree Go2 robot skills using the new framework.""" +_UNITREE_COMMANDS = { + name: (id_, description) + for name, id_, description in UNITREE_WEBRTC_CONTROLS + if name not in ["Reverse", "Spin"] +} - def __init__(self, robot: UnitreeGo2 | None = None) -> None: - """Initialize the skill container with robot reference. - Args: - robot: The UnitreeGo2 robot instance - """ - super().__init__() - self._robot = robot +class UnitreeSkillContainer(SkillModule): + """Container for Unitree Go2 robot skills using the new framework.""" - # Dynamically generate skills from UNITREE_WEBRTC_CONTROLS - self._generate_unitree_skills() + _move: RpcCall | None = None + _publish_request: RpcCall | None = None @rpc def start(self) -> None: @@ -60,67 +54,17 @@ def start(self) -> None: @rpc def stop(self) -> None: - # TODO: Do I need to clean up dynamic skills? super().stop() - def _generate_unitree_skills(self) -> None: - """Dynamically generate skills from the UNITREE_WEBRTC_CONTROLS list.""" - logger.info(f"Generating {len(UNITREE_WEBRTC_CONTROLS)} dynamic Unitree skills") - - for name, api_id, description in UNITREE_WEBRTC_CONTROLS: - if name not in ["Reverse", "Spin"]: # Exclude reverse and spin as in original - # Convert CamelCase to snake_case for method name - skill_name = self._convert_to_snake_case(name) - self._create_dynamic_skill(skill_name, api_id, description, name) - - def _convert_to_snake_case(self, name: str) -> str: - """Convert CamelCase to snake_case. - - Examples: - StandUp -> stand_up - RecoveryStand -> recovery_stand - FrontFlip -> front_flip - """ - result = [] - for i, char in enumerate(name): - if i > 0 and char.isupper(): - result.append("_") - result.append(char.lower()) - return "".join(result) - - def _create_dynamic_skill( - self, skill_name: str, api_id: int, description: str, original_name: str - ) -> None: - """Create a dynamic skill method with the @skill decorator. - - Args: - skill_name: Snake_case name for the method - api_id: The API command ID - description: Human-readable description - original_name: Original CamelCase name for display - """ - - # Define the skill function - def dynamic_skill_func(self) -> str: - """Dynamic skill function.""" - return self._execute_sport_command(api_id, original_name) - - # Set the function's metadata - dynamic_skill_func.__name__ = skill_name - dynamic_skill_func.__doc__ = description - - # Apply the @skill decorator - decorated_skill = skill()(dynamic_skill_func) - - # Bind the method to the instance - bound_method = decorated_skill.__get__(self, self.__class__) - - # Add it as an attribute - setattr(self, skill_name, bound_method) - - logger.debug(f"Generated skill: {skill_name} (API ID: {api_id})") + @rpc + def set_ConnectionModule_move(self, callable: RpcCall) -> None: + self._move = callable + self._move.set_rpc(self.rpc) - # ========== Explicit Skills ========== + @rpc + def set_ConnectionModule_publish_request(self, callable: RpcCall) -> None: + self._publish_request = callable + self._publish_request.set_rpc(self.rpc) @skill() def move(self, x: float, y: float = 0.0, yaw: float = 0.0, duration: float = 0.0) -> str: @@ -136,11 +80,11 @@ def move(self, x: float, y: float = 0.0, yaw: float = 0.0, duration: float = 0.0 yaw: Rotational velocity (rad/s) duration: How long to move (seconds) """ - if self._robot is None: + if self._move is None: return "Error: Robot not connected" twist = Twist(linear=Vector3(x, y, 0), angular=Vector3(0, 0, yaw)) - self._robot.move(twist, duration=duration) + self._move(twist, duration=duration) return f"Started moving with velocity=({x}, {y}, {yaw}) for {duration} seconds" @skill() @@ -153,7 +97,7 @@ def wait(self, seconds: float) -> str: time.sleep(seconds) return f"Wait completed with length={seconds}s" - @skill(stream=Stream.passive, reducer=Reducer.latest) + @skill(stream=Stream.passive, reducer=Reducer.latest, hide_skill=True) def current_time(self): """Provides current time implicitly, don't call this skill directly.""" print("Starting current_time skill") @@ -166,24 +110,43 @@ def speak(self, text: str) -> str: """Speak text out loud through the robot's speakers.""" return f"This is being said aloud: {text}" - # ========== Helper Methods ========== + @skill() + def execute_sport_command(self, command_name: str) -> str: + if self._publish_request is None: + return f"Error: Robot not connected (cannot execute {command_name})" - def _execute_sport_command(self, api_id: int, name: str) -> str: - """Execute a sport command through WebRTC interface. + if command_name not in _UNITREE_COMMANDS: + suggestions = difflib.get_close_matches( + command_name, _UNITREE_COMMANDS.keys(), n=3, cutoff=0.6 + ) + return f"There's no '{command_name}' command. Did you mean: {suggestions}" - Args: - api_id: The API command ID - name: Human-readable name of the command - """ - if self._robot is None: - return f"Error: Robot not connected (cannot execute {name})" + id_, _ = _UNITREE_COMMANDS[command_name] try: - self._robot.connection.publish_request(RTC_TOPIC["SPORT_MOD"], {"api_id": api_id}) - message = f"{name} command executed successfully (id={api_id})" - logger.info(message) - return message + self._publish_request(RTC_TOPIC["SPORT_MOD"], {"api_id": id_}) + return f"'{command_name}' command executed successfully." except Exception as e: - error_msg = f"Failed to execute {name}: {e}" - logger.error(error_msg) - return error_msg + logger.error(f"Failed to execute {command_name}: {e}") + return "Failed to execute the command." + + +_commands = "\n".join( + [f'- "{name}": {description}' for name, (_, description) in _UNITREE_COMMANDS.items()] +) + +UnitreeSkillContainer.execute_sport_command.__doc__ = f"""Execute a Unitree sport command. + +Example usage: + + execute_sport_command("FrontPounce") + +Here are all the command names, and what they do. + +{_commands} +""" + + +unitree_skills = UnitreeSkillContainer.blueprint + +__all__ = ["UnitreeSkillContainer", "unitree_skills"] diff --git a/dimos/utils/demo_image_encoding.py b/dimos/utils/demo_image_encoding.py index a98924260c..00df5c1a62 100644 --- a/dimos/utils/demo_image_encoding.py +++ b/dimos/utils/demo_image_encoding.py @@ -45,19 +45,19 @@ class EmitterModule(Module): _thread: threading.Thread | None = None _stop_event: threading.Event | None = None - def start(self): + def start(self) -> None: super().start() self._stop_event = threading.Event() self._thread = threading.Thread(target=self._publish_image, daemon=True) self._thread.start() - def stop(self): + def stop(self) -> None: if self._thread: self._stop_event.set() self._thread.join(timeout=2) super().stop() - def _publish_image(self): + def _publish_image(self) -> None: open_file = open("/tmp/emitter-times", "w") while not self._stop_event.is_set(): start = time.time() @@ -74,21 +74,21 @@ class ReceiverModule(Module): _open_file = None - def start(self): + def start(self) -> None: super().start() self._disposables.add(Disposable(self.image.subscribe(self._on_image))) self._open_file = open("/tmp/receiver-times", "w") - def stop(self): + def stop(self) -> None: self._open_file.close() super().stop() - def _on_image(self, image: Image): + def _on_image(self, image: Image) -> None: self._open_file.write(str(time.time()) + "\n") print("image") -def main(): +def main() -> None: parser = argparse.ArgumentParser(description="Demo image encoding with transport options") parser.add_argument( "--use-jpeg", diff --git a/dimos/utils/fast_image_generator.py b/dimos/utils/fast_image_generator.py index f8e02cb71b..1644014f7a 100644 --- a/dimos/utils/fast_image_generator.py +++ b/dimos/utils/fast_image_generator.py @@ -31,7 +31,7 @@ class FastImageGenerator: - High contrast boundaries (tests blocking artifacts) """ - def __init__(self, width: int = 1280, height: int = 720): + def __init__(self, width: int = 1280, height: int = 720) -> None: """Initialize the generator with pre-computed elements.""" self.width = width self.height = height @@ -57,7 +57,7 @@ def __init__(self, width: int = 1280, height: int = 720): # Pre-allocate shape masks for reuse self._init_shape_masks() - def _init_gradients(self): + def _init_gradients(self) -> None: """Pre-compute gradient patterns.""" # Diagonal gradient self.diag_gradient = (self.x_grid + self.y_grid) * 0.5 @@ -71,7 +71,7 @@ def _init_gradients(self): self.h_gradient = self.x_grid self.v_gradient = self.y_grid - def _init_moving_objects(self): + def _init_moving_objects(self) -> None: """Initialize properties of moving objects.""" self.objects = [ { @@ -104,7 +104,7 @@ def _init_moving_objects(self): }, ] - def _init_texture(self): + def _init_texture(self) -> None: """Pre-compute a texture pattern.""" # Create a simple checkerboard pattern at lower resolution checker_size = 20 @@ -118,7 +118,7 @@ def _init_texture(self): self.texture = np.repeat(np.repeat(checker, checker_size, axis=0), checker_size, axis=1) self.texture = self.texture[: self.height, : self.width].astype(np.float32) * 30 - def _init_shape_masks(self): + def _init_shape_masks(self) -> None: """Pre-allocate reusable masks for shapes.""" # Pre-allocate a mask array self.temp_mask = np.zeros((self.height, self.width), dtype=np.float32) @@ -126,7 +126,7 @@ def _init_shape_masks(self): # Pre-compute indices for the entire image self.y_indices, self.x_indices = np.indices((self.height, self.width)) - def _draw_circle_fast(self, cx: int, cy: int, radius: int, color: np.ndarray): + def _draw_circle_fast(self, cx: int, cy: int, radius: int, color: np.ndarray) -> None: """Draw a circle using vectorized operations - optimized version without anti-aliasing.""" # Compute bounding box to minimize calculations y1 = max(0, cy - radius - 1) @@ -141,7 +141,7 @@ def _draw_circle_fast(self, cx: int, cy: int, radius: int, color: np.ndarray): mask = dist_sq <= radius**2 self.canvas[y1:y2, x1:x2][mask] = color - def _draw_rect_fast(self, x: int, y: int, w: int, h: int, color: np.ndarray): + def _draw_rect_fast(self, x: int, y: int, w: int, h: int, color: np.ndarray) -> None: """Draw a rectangle using slicing.""" # Clip to canvas boundaries x1 = max(0, x) @@ -152,7 +152,7 @@ def _draw_rect_fast(self, x: int, y: int, w: int, h: int, color: np.ndarray): if x1 < x2 and y1 < y2: self.canvas[y1:y2, x1:x2] = color - def _update_objects(self): + def _update_objects(self) -> None: """Update positions of moving objects.""" for obj in self.objects: # Update position @@ -242,7 +242,7 @@ def generate_frame(self) -> np.ndarray: # Direct conversion to uint8 (already in valid range) return self.canvas.astype(np.uint8) - def reset(self): + def reset(self) -> None: """Reset the generator to initial state.""" self.frame_count = 0 self._init_moving_objects() diff --git a/docker/deprecated/jetson/README.md b/docker/deprecated/jetson/README.md deleted file mode 100644 index 23ec6c250f..0000000000 --- a/docker/deprecated/jetson/README.md +++ /dev/null @@ -1,98 +0,0 @@ -# Jetson Setup Guide - -This guide explains how to set up and run local dimOS LLM Agents on NVIDIA Jetson devices. - -## Prerequisites - -> **Note**: This setup has been tested on: -> - Jetson Orin Nano (8GB) -> - JetPack 6.2 (L4T 36.4.3) -> - CUDA 12.6.68 - -### Requirements -- NVIDIA Jetson device (Orin/Xavier) -- Docker installed (with GPU support) -- Git installed -- CUDA installed - -## Basic Python Setup (Virtual Environment) - -### 1. Create a virtual environment: -```bash -python3 -m venv ~/jetson_env -source ~/jetson_env/bin/activate -``` - -### 2. Install cuSPARSELt: - -For PyTorch versions 24.06+ (see [Compatibility Matrix](https://docs.nvidia.com/deeplearning/frameworks/install-pytorch-jetson-platform-release-notes/pytorch-jetson-rel.html#pytorch-jetson-rel)), cuSPARSELt is required. Install it with the [instructions](https://developer.nvidia.com/cusparselt-downloads) by selecting Linux OS, aarch64-jetson architecture, and Ubuntu distribution - -For Jetpack 6.2, Pytorch 2.5, and CUDA 12.6: -```bash -wget https://developer.download.nvidia.com/compute/cusparselt/0.7.0/local_installers/cusparselt-local-tegra-repo-ubuntu2204-0.7.0_1.0-1_arm64.deb -sudo dpkg -i cusparselt-local-tegra-repo-ubuntu2204-0.7.0_1.0-1_arm64.deb -sudo cp /var/cusparselt-local-tegra-repo-ubuntu2204-0.7.0/cusparselt-*-keyring.gpg /usr/share/keyrings/ -sudo apt-get update -sudo apt-get -y install libcusparselt0 libcusparselt-dev -``` - -### 3. Install the Jetson-specific requirements: -```bash -cd /path/to/dimos -pip install -r docker/jetson/jetson_requirements.txt -``` - -### 4. Run testfile: -```bash -export PYTHONPATH=$PYTHONPATH:$(pwd) -python3 tests/test_agent_huggingface_local_jetson.py -``` - -## Docker Setup -for JetPack 6.2 (L4T 36.4.3), CUDA 12.6.68 - -### 1. Build and Run using Docker Compose - -From the DIMOS project root directory: -```bash -# Build and run the container -sudo docker compose -f docker/jetson/huggingface_local/docker-compose.yml up --build -``` - -This will: -- Build the Docker image with all necessary dependencies -- Start the container with GPU support -- Run the HuggingFace local agent test script - -## Troubleshooting - -### Libopenblas or other library errors - -Run the Jetson fix script: - -```bash -# From the DIMOS project root -chmod +x ./docker/jetson/fix_jetson.sh -./docker/jetson/fix_jetson.sh -``` - -This script will: -- Install cuSPARSELt library for tensor operations -- Fix libopenblas.so.0 dependencies -- Configure system libraries - -1. If you encounter CUDA/GPU issues: - - Ensure JetPack is properly installed - - Check nvidia-smi output - - Verify Docker has access to the GPU - -2. For memory issues: - - Consider using smaller / quantized models - - Adjust batch sizes and model parameters - - Run the jetson in non-GUI mode to maximize ram availability - -## Notes - -- The setup uses PyTorch built specifically for Jetson -- Models are downloaded and cached locally -- GPU acceleration is enabled by default diff --git a/docker/deprecated/jetson/fix_jetson.sh b/docker/deprecated/jetson/fix_jetson.sh deleted file mode 100644 index ade938a2c9..0000000000 --- a/docker/deprecated/jetson/fix_jetson.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# Install cuSPARSELt -# wget https://developer.download.nvidia.com/compute/cusparselt/0.7.0/local_installers/cusparselt-local-tegra-repo-ubuntu2204-0.7.0_1.0-1_arm64.deb -# sudo dpkg -i cusparselt-local-tegra-repo-ubuntu2204-0.7.0_1.0-1_arm64.deb -# sudo cp /var/cusparselt-local-tegra-repo-ubuntu2204-0.7.0/cusparselt-*-keyring.gpg /usr/share/keyrings/ -# sudo apt-get update -# sudo apt-get install libcusparselt0 libcusparselt-dev - -# Fixes libopenblas.so.0 import error -sudo rm -r /lib/aarch64-linux-gnu/libopenblas.so.0 -sudo apt-get update -sudo apt-get remove --purge libopenblas-dev libopenblas0 libopenblas0-dev -sudo apt-get install libopenblas-dev -sudo apt-get update -sudo apt-get remove --purge libopenblas0-openmp -sudo apt-get install libopenblas0-openmp - -# Verify libopenblas.so.0 location and access -ls -l /lib/aarch64-linux-gnu/libopenblas.so.0 - diff --git a/docker/deprecated/jetson/huggingface_local/Dockerfile b/docker/deprecated/jetson/huggingface_local/Dockerfile deleted file mode 100644 index dcb1738b90..0000000000 --- a/docker/deprecated/jetson/huggingface_local/Dockerfile +++ /dev/null @@ -1,44 +0,0 @@ -FROM python:3.10.12 - -# Unitree Specific -RUN apt-get update && apt-get install -y \ - libgl1-mesa-glx \ - build-essential \ - libavformat-dev \ - libavcodec-dev \ - libavdevice-dev \ - libavutil-dev \ - libswscale-dev \ - libpostproc-dev \ - gcc \ - make \ - portaudio19-dev \ - python3-pyaudio \ - python3-all-dev \ - libopenblas0-openmp - -# Jetson Orin Nano specific setup -RUN wget https://developer.download.nvidia.com/compute/cusparselt/0.7.0/local_installers/cusparselt-local-tegra-repo-ubuntu2204-0.7.0_1.0-1_arm64.deb && \ - dpkg -i cusparselt-local-tegra-repo-ubuntu2204-0.7.0_1.0-1_arm64.deb && \ - cp /var/cusparselt-local-tegra-repo-ubuntu2204-0.7.0/cusparselt-*-keyring.gpg /usr/share/keyrings/ && \ - apt-get update && \ - apt-get install -y libcusparselt0 libcusparselt-dev - - -# Change working directory to /app for proper relative pathing -WORKDIR /app - -COPY docker/jetson/jetson_requirements.txt ./requirements.txt - -COPY ./dimos/perception/external ./dimos/perception/external - -RUN pip install --no-cache-dir -r requirements.txt - -COPY ./dimos ./dimos - -COPY ./tests ./tests - -COPY ./dimos/__init__.py ./ - -# Copy libopenblas.so.0 from host if it exists (Jetson path) -RUN ldconfig diff --git a/docker/deprecated/jetson/huggingface_local/docker-compose.yml b/docker/deprecated/jetson/huggingface_local/docker-compose.yml deleted file mode 100644 index 4d87ce30f7..0000000000 --- a/docker/deprecated/jetson/huggingface_local/docker-compose.yml +++ /dev/null @@ -1,36 +0,0 @@ ---- -services: - dimos-model-huggingface-local: - image: dimos-jetson-huggingface-local:latest - build: - context: ../../../ - dockerfile: docker/jetson/huggingface_local/Dockerfile - env_file: - - ../../../.env - mem_limit: 8048m - volumes: - - ../../../assets:/app/assets - - ../../../assets/model-cache:/root/.cache/huggingface/hub - - /usr/local/cuda:/usr/local/cuda - - /usr/lib/aarch64-linux-gnu:/usr/lib/aarch64-linux-gnu - - ports: - - "5555:5555" - runtime: nvidia - environment: - - PYTHONUNBUFFERED=1 - - NVIDIA_VISIBLE_DEVICES=all - - NVIDIA_DRIVER_CAPABILITIES=all - # command: [ "python", "-m", "tests.test_agent_alibaba" ] - command: [ "python", "-m", "tests.test_agent_huggingface_local_jetson.py" ] - stdin_open: true - tty: true - -# IMPORTANT: This runs soley on the NVIDA GPU - -# ---- -# TO RUN: -# docker build -f ./Dockerfile -t dimos-models ../../ && docker compose up -# GO TO: -# 127.0.0.1:5555 (when flask server fixed) -# ---- diff --git a/docker/deprecated/jetson/jetson_requirements.txt b/docker/deprecated/jetson/jetson_requirements.txt deleted file mode 100644 index 6d42f2dc4c..0000000000 --- a/docker/deprecated/jetson/jetson_requirements.txt +++ /dev/null @@ -1,79 +0,0 @@ -opencv-python -python-dotenv -openai -anthropic>=0.19.0 -numpy -colorlog==6.9.0 -yapf==0.40.2 -typeguard -empy==3.3.4 -catkin_pkg -lark - -# pycolmap - -ffmpeg-python -pytest -python-dotenv -openai -tiktoken>=0.8.0 -Flask>=2.2 -python-multipart==0.0.20 -reactivex - -# Web Extensions -fastapi>=0.115.6 -sse-starlette>=2.2.1 -uvicorn>=0.34.0 - -# Agent Memory -langchain-chroma>=0.1.4 -langchain-openai>=0.2.14 - -# Class Extraction -pydantic - -# Developer Specific -ipykernel - -# Unitree webrtc streaming -aiortc==1.9.0 -pycryptodome -opencv-python -sounddevice -pyaudio -requests -wasmtime - -# Audio -openai-whisper -soundfile - -#Hugging Face -transformers[torch]==4.49.0 - -#Vector Embedding -sentence_transformers - -# CTransforms GGUF - GPU required -ctransformers[cuda]==0.2.27 - -# Perception Dependencies -ultralytics>=8.3.70 -filterpy>=1.4.5 -scipy>=1.15.1 - -# Pytorch wheel for JP6, cu12.6 -https://pypi.jetson-ai-lab.dev/jp6/cu126/+f/6cc/6ecfe8a5994fd/torch-2.6.0-cp310-cp310-linux_aarch64.whl - -# Torchvision wheel for JP6, cu12.6 -https://pypi.jetson-ai-lab.dev/jp6/cu126/+f/aa2/2da8dcf4c4c8d/torchvision-0.21.0-cp310-cp310-linux_aarch64.whl - -scikit-learn -Pillow -mmengine>=0.10.3 -mmcv==2.1.0 -timm==1.0.15 -lap==0.5.12 -# xformers==0.0.22 -# -e ./dimos/perception/external/vector_perception diff --git a/docker/deprecated/models/ctransformers_gguf/Dockerfile b/docker/deprecated/models/ctransformers_gguf/Dockerfile deleted file mode 100644 index a0e8a1edb0..0000000000 --- a/docker/deprecated/models/ctransformers_gguf/Dockerfile +++ /dev/null @@ -1,46 +0,0 @@ -FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04 - -# Set up Python environment -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y \ - python3.10 \ - python3-pip \ - python3.10-venv \ - python3-dev \ - libgl1-mesa-glx \ - build-essential \ - libavformat-dev \ - libavcodec-dev \ - libavdevice-dev \ - libavutil-dev \ - libswscale-dev \ - libpostproc-dev \ - gcc \ - make \ - portaudio19-dev \ - python3-pyaudio \ - python3-all-dev \ - git \ - wget \ - && rm -rf /var/lib/apt/lists/* - -# Create symlink for python -RUN ln -sf /usr/bin/python3.10 /usr/bin/python - -# Change working directory to /app for proper relative pathing -WORKDIR /app - -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r requirements.txt - -COPY ./dimos ./dimos - -COPY ./tests ./tests - -COPY ./dimos/__init__.py ./ - -# Add CUDA libraries to the path -ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH - -CMD [ "python", "-m", "tests.test_agent_ctransformers_gguf" ] diff --git a/docker/deprecated/models/ctransformers_gguf/docker-compose.yml b/docker/deprecated/models/ctransformers_gguf/docker-compose.yml deleted file mode 100644 index 9cedfa4aa0..0000000000 --- a/docker/deprecated/models/ctransformers_gguf/docker-compose.yml +++ /dev/null @@ -1,32 +0,0 @@ ---- -services: - dimos-model-ctransformers-gguf: - image: dimos-model-ctransformers-gguf:latest - build: - context: ../../../ - dockerfile: docker/models/ctransformers_gguf/Dockerfile - env_file: - - ../../../.env - mem_limit: 8048m - volumes: - - ../../../assets:/app/assets - - ../../../assets/model-cache:/root/.cache/huggingface/hub - ports: - - "5555:5555" - runtime: nvidia - environment: - - PYTHONUNBUFFERED=1 - - NVIDIA_VISIBLE_DEVICES=all - - NVIDIA_DRIVER_CAPABILITIES=all - command: [ "python", "-m", "tests.test_agent_ctransformers_gguf" ] - stdin_open: true - tty: true - -# IMPORTANT: This runs soley on the NVIDA GPU - -# ---- -# TO RUN: -# docker build -f ./Dockerfile -t dimos-models ../../ && docker compose up -# GO TO: -# 127.0.0.1:5555 (when flask server fixed) -# ---- diff --git a/docker/deprecated/models/huggingface_local/Dockerfile b/docker/deprecated/models/huggingface_local/Dockerfile deleted file mode 100644 index 2c5435ae5f..0000000000 --- a/docker/deprecated/models/huggingface_local/Dockerfile +++ /dev/null @@ -1,32 +0,0 @@ -FROM python:3.10.12 - -# Unitree Specific -RUN apt-get update && apt-get install -y \ - libgl1-mesa-glx \ - build-essential \ - libavformat-dev \ - libavcodec-dev \ - libavdevice-dev \ - libavutil-dev \ - libswscale-dev \ - libpostproc-dev \ - gcc \ - make \ - portaudio19-dev \ - python3-pyaudio \ - python3-all-dev - -# Change working directory to /app for proper relative pathing -WORKDIR /app - -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r requirements.txt - -COPY ./dimos ./dimos - -COPY ./tests ./tests - -COPY ./dimos/__init__.py ./ - -CMD [ "python", "-m", "tests.test_agent_alibaba" ] diff --git a/docker/deprecated/models/huggingface_local/docker-compose.yml b/docker/deprecated/models/huggingface_local/docker-compose.yml deleted file mode 100644 index e5739be2c2..0000000000 --- a/docker/deprecated/models/huggingface_local/docker-compose.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- -services: - dimos-model-huggingface-local: - image: dimos-model-huggingface-local:latest - build: - context: ../../../ - dockerfile: docker/models/huggingface_local/Dockerfile - env_file: - - ../../../.env - mem_limit: 8048m - volumes: - - ../../../assets:/app/assets - - ../../../assets/model-cache:/root/.cache/huggingface/hub - ports: - - "5555:5555" - runtime: nvidia - environment: - - PYTHONUNBUFFERED=1 - - NVIDIA_VISIBLE_DEVICES=all - - NVIDIA_DRIVER_CAPABILITIES=all - # command: [ "python", "-m", "tests.test_agent_alibaba" ] - command: [ "python", "-m", "tests.test_agent_huggingface_local.py" ] - stdin_open: true - tty: true - -# IMPORTANT: This runs soley on the NVIDA GPU - -# ---- -# TO RUN: -# docker build -f ./Dockerfile -t dimos-models ../../ && docker compose up -# GO TO: -# 127.0.0.1:5555 (when flask server fixed) -# ---- diff --git a/docker/deprecated/models/huggingface_remote/Dockerfile b/docker/deprecated/models/huggingface_remote/Dockerfile deleted file mode 100644 index 2c5435ae5f..0000000000 --- a/docker/deprecated/models/huggingface_remote/Dockerfile +++ /dev/null @@ -1,32 +0,0 @@ -FROM python:3.10.12 - -# Unitree Specific -RUN apt-get update && apt-get install -y \ - libgl1-mesa-glx \ - build-essential \ - libavformat-dev \ - libavcodec-dev \ - libavdevice-dev \ - libavutil-dev \ - libswscale-dev \ - libpostproc-dev \ - gcc \ - make \ - portaudio19-dev \ - python3-pyaudio \ - python3-all-dev - -# Change working directory to /app for proper relative pathing -WORKDIR /app - -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r requirements.txt - -COPY ./dimos ./dimos - -COPY ./tests ./tests - -COPY ./dimos/__init__.py ./ - -CMD [ "python", "-m", "tests.test_agent_alibaba" ] diff --git a/docker/deprecated/models/huggingface_remote/docker-compose.yml b/docker/deprecated/models/huggingface_remote/docker-compose.yml deleted file mode 100644 index e2337fcd37..0000000000 --- a/docker/deprecated/models/huggingface_remote/docker-compose.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -services: - dimos-model-huggingface-remote: - image: dimos-model-huggingface-remote:latest - build: - context: ../../../ - dockerfile: docker/models/huggingface_remote/Dockerfile - env_file: - - ../../../.env - mem_limit: 8048m - volumes: - - ../../../assets:/app/assets - # - ../../../assets/model-cache:/root/.cache/huggingface/hub - ports: - - "5555:5555" - environment: - - PYTHONUNBUFFERED=1 - command: [ "python", "-m", "tests.test_agent_huggingface_remote" ] - stdin_open: true - tty: true - -# ---- -# TO RUN: -# docker build -f ./Dockerfile -t dimos-models ../../ && docker compose up -# GO TO: -# 127.0.0.1:5555 (when flask server fixed) -# ---- diff --git a/tests/test_agent_ctransformers_gguf.py b/tests/test_agent_ctransformers_gguf.py deleted file mode 100644 index 389a9c74c5..0000000000 --- a/tests/test_agent_ctransformers_gguf.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dimos.agents.agent_ctransformers_gguf import CTransformersGGUFAgent - -system_query = "You are a robot with the following functions. Move(), Reverse(), Left(), Right(), Stop(). Given the following user comands return the correct function." - -# Initialize agent -agent = CTransformersGGUFAgent( - dev_name="GGUF-Agent", - model_name="TheBloke/Llama-2-7B-GGUF", - model_file="llama-2-7b.Q4_K_M.gguf", - model_type="llama", - system_query=system_query, - gpu_layers=50, - max_input_tokens_per_request=250, - max_output_tokens_per_request=10, -) - -test_query = "User: Travel forward 10 meters" - -agent.run_observable_query(test_query).subscribe( - on_next=lambda response: print(f"One-off query response: {response}"), - on_error=lambda error: print(f"Error: {error}"), - on_completed=lambda: print("Query completed"), -) - -try: - input("Press ESC to exit...") -except KeyboardInterrupt: - print("\nExiting...") diff --git a/tests/test_agent_huggingface_local.py b/tests/test_agent_huggingface_local.py deleted file mode 100644 index eb88dd9847..0000000000 --- a/tests/test_agent_huggingface_local.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from dimos.agents.agent_huggingface_local import HuggingFaceLocalAgent -from dimos.robot.unitree.unitree_skills import MyUnitreeSkills -from dimos.stream.data_provider import QueryDataProvider -from dimos.stream.video_provider import VideoProvider -from dimos.utils.threadpool import get_scheduler - -# Initialize video stream -video_stream = VideoProvider( - dev_name="VideoProvider", - # video_source=f"{os.getcwd()}/assets/framecount.mp4", - video_source=f"{os.getcwd()}/assets/trimmed_video_office.mov", - pool_scheduler=get_scheduler(), -).capture_video_as_observable(realtime=False, fps=1) - -# Initialize Unitree skills -myUnitreeSkills = MyUnitreeSkills() -myUnitreeSkills.initialize_skills() - -# Initialize query stream -query_provider = QueryDataProvider() - -system_query = "You are a robot with the following functions. Move(), Reverse(), Left(), Right(), Stop(). Given the following user comands return ONLY the correct function." - -# Initialize agent -agent = HuggingFaceLocalAgent( - dev_name="HuggingFaceLLMAgent", - model_name="Qwen/Qwen2.5-3B", - agent_type="HF-LLM", - system_query=system_query, - input_query_stream=query_provider.data_stream, - process_all_inputs=False, - max_input_tokens_per_request=250, - max_output_tokens_per_request=20, - # output_dir=self.output_dir, - # skills=skills_instance, - # frame_processor=frame_processor, -) - -# Start the query stream. -# Queries will be pushed every 1 second, in a count from 100 to 5000. -# This will cause listening agents to consume the queries and respond -# to them via skill execution and provide 1-shot responses. -query_provider.start_query_stream( - query_template="{query}; User: travel forward by 10 meters", - frequency=10, - start_count=1, - end_count=10000, - step=1, -) - -try: - input("Press ESC to exit...") -except KeyboardInterrupt: - print("\nExiting...") diff --git a/tests/test_agent_huggingface_local_jetson.py b/tests/test_agent_huggingface_local_jetson.py deleted file mode 100644 index 883a05be54..0000000000 --- a/tests/test_agent_huggingface_local_jetson.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from dimos.agents.agent_huggingface_local import HuggingFaceLocalAgent -from dimos.robot.unitree.unitree_skills import MyUnitreeSkills -from dimos.stream.data_provider import QueryDataProvider -from dimos.stream.video_provider import VideoProvider -from dimos.utils.threadpool import get_scheduler - -# Initialize video stream -video_stream = VideoProvider( - dev_name="VideoProvider", - # video_source=f"{os.getcwd()}/assets/framecount.mp4", - video_source=f"{os.getcwd()}/assets/trimmed_video_office.mov", - pool_scheduler=get_scheduler(), -).capture_video_as_observable(realtime=False, fps=1) - -# Initialize Unitree skills -myUnitreeSkills = MyUnitreeSkills() -myUnitreeSkills.initialize_skills() - -# Initialize query stream -query_provider = QueryDataProvider() - -system_query = "You are a helpful assistant." - -# Initialize agent -agent = HuggingFaceLocalAgent( - dev_name="HuggingFaceLLMAgent", - model_name="Qwen/Qwen2.5-0.5B", - # model_name="HuggingFaceTB/SmolLM2-135M", - agent_type="HF-LLM", - system_query=system_query, - input_query_stream=query_provider.data_stream, - process_all_inputs=False, - max_input_tokens_per_request=250, - max_output_tokens_per_request=20, - # output_dir=self.output_dir, - # skills=skills_instance, - # frame_processor=frame_processor, -) - -# Start the query stream. -# Queries will be pushed every 1 second, in a count from 100 to 5000. -# This will cause listening agents to consume the queries and respond -# to them via skill execution and provide 1-shot responses. -query_provider.start_query_stream( - query_template="{query}; User: Hello how are you!", - frequency=30, - start_count=1, - end_count=10000, - step=1, -) - -try: - input("Press ESC to exit...") -except KeyboardInterrupt: - print("\nExiting...") diff --git a/tests/test_agent_huggingface_remote.py b/tests/test_agent_huggingface_remote.py deleted file mode 100644 index ed99faa8a4..0000000000 --- a/tests/test_agent_huggingface_remote.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from dimos.agents.agent_huggingface_remote import HuggingFaceRemoteAgent -from dimos.agents.tokenizer.huggingface_tokenizer import HuggingFaceTokenizer -from dimos.stream.data_provider import QueryDataProvider - -# Initialize video stream -# video_stream = VideoProvider( -# dev_name="VideoProvider", -# # video_source=f"{os.getcwd()}/assets/framecount.mp4", -# video_source=f"{os.getcwd()}/assets/trimmed_video_office.mov", -# pool_scheduler=get_scheduler(), -# ).capture_video_as_observable(realtime=False, fps=1) - -# Initialize Unitree skills -# myUnitreeSkills = MyUnitreeSkills() -# myUnitreeSkills.initialize_skills() - -# Initialize query stream -query_provider = QueryDataProvider() - -# Initialize agent -agent = HuggingFaceRemoteAgent( - dev_name="HuggingFaceRemoteAgent", - model_name="meta-llama/Meta-Llama-3-8B-Instruct", - tokenizer=HuggingFaceTokenizer(model_name="meta-llama/Meta-Llama-3-8B-Instruct"), - max_output_tokens_per_request=8192, - input_query_stream=query_provider.data_stream, - # input_video_stream=video_stream, - system_query="You are a helpful assistant that can answer questions and help with tasks.", -) - -# Start the query stream. -# Queries will be pushed every 1 second, in a count from 100 to 5000. -query_provider.start_query_stream( - query_template="{query}; Denote the number at the beginning of this query before the semicolon as the 'reference number'. Provide the reference number, without any other text in your response.", - frequency=5, - start_count=1, - end_count=10000, - step=1, -) - -try: - input("Press ESC to exit...") -except KeyboardInterrupt: - print("\nExiting...") diff --git a/tests/test_cerebras_unitree_ros.py b/tests/test_cerebras_unitree_ros.py deleted file mode 100644 index 60890a3d5c..0000000000 --- a/tests/test_cerebras_unitree_ros.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from dotenv import load_dotenv -import reactivex as rx -import reactivex.operators as ops - -from dimos.agents.cerebras_agent import CerebrasAgent -from dimos.robot.unitree.unitree_go2 import UnitreeGo2 -from dimos.robot.unitree.unitree_ros_control import UnitreeROSControl -from dimos.robot.unitree.unitree_skills import MyUnitreeSkills -from dimos.skills.kill_skill import KillSkill -from dimos.skills.navigation import GetPose, NavigateToGoal, NavigateWithText -from dimos.skills.observe_stream import ObserveStream -from dimos.skills.speak import Speak -from dimos.skills.visual_navigation_skills import FollowHuman -from dimos.stream.audio.pipelines import stt, tts -from dimos.web.robot_web_interface import RobotWebInterface - -# Load API key from environment -load_dotenv() - -# robot = MockRobot() -robot_skills = MyUnitreeSkills() - -robot = UnitreeGo2( - ip=os.getenv("ROBOT_IP"), - ros_control=UnitreeROSControl(), - skills=robot_skills, - mock_connection=False, - new_memory=True, -) - -# Create a subject for agent responses -agent_response_subject = rx.subject.Subject() -agent_response_stream = agent_response_subject.pipe(ops.share()) - -streams = { - "unitree_video": robot.get_ros_video_stream(), -} -text_streams = { - "agent_responses": agent_response_stream, -} - -web_interface = RobotWebInterface( - port=5555, - text_streams=text_streams, - **streams, -) - -stt_node = stt() - -# Create a CerebrasAgent instance -agent = CerebrasAgent( - dev_name="test_cerebras_agent", - input_query_stream=stt_node.emit_text(), - # input_query_stream=web_interface.query_stream, - skills=robot_skills, - system_query="""You are an agent controlling a virtual robot. When given a query, respond by using the appropriate tool calls if needed to execute commands on the robot. - -IMPORTANT INSTRUCTIONS: -1. Each tool call must include the exact function name and appropriate parameters -2. If a function needs parameters like 'distance' or 'angle', be sure to include them -3. If you're unsure which tool to use, choose the most appropriate one based on the user's query -4. Parse the user's instructions carefully to determine correct parameter values - -When you need to call a skill or tool, ALWAYS respond ONLY with a JSON object in this exact format: {"name": "SkillName", "arguments": {"arg1": "value1", "arg2": "value2"}} - -Example: If the user asks to spin right by 90 degrees, output ONLY the following: {"name": "SpinRight", "arguments": {"degrees": 90}}""", - model_name="llama-4-scout-17b-16e-instruct", -) - -tts_node = tts() -tts_node.consume_text(agent.get_response_observable()) - -robot_skills.add(ObserveStream) -robot_skills.add(KillSkill) -robot_skills.add(NavigateWithText) -robot_skills.add(FollowHuman) -robot_skills.add(GetPose) -robot_skills.add(Speak) -robot_skills.add(NavigateToGoal) -robot_skills.create_instance("ObserveStream", robot=robot, agent=agent) -robot_skills.create_instance("KillSkill", robot=robot, skill_library=robot_skills) -robot_skills.create_instance("NavigateWithText", robot=robot) -robot_skills.create_instance("FollowHuman", robot=robot) -robot_skills.create_instance("GetPose", robot=robot) -robot_skills.create_instance("NavigateToGoal", robot=robot) - - -robot_skills.create_instance("Speak", tts_node=tts_node) - -# Subscribe to agent responses and send them to the subject -agent.get_response_observable().subscribe(lambda x: agent_response_subject.on_next(x)) - -# print(f"Registered skills: {', '.join([skill.__name__ for skill in robot_skills.skills])}") -print("Cerebras agent demo initialized. You can now interact with the agent via the web interface.") - -web_interface.run() diff --git a/tests/test_huggingface_llm_agent.py b/tests/test_huggingface_llm_agent.py deleted file mode 100644 index 5d3c1f39a5..0000000000 --- a/tests/test_huggingface_llm_agent.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import time - -from dimos.agents.agent_huggingface_local import HuggingFaceLocalAgent -from dimos.stream.data_provider import QueryDataProvider - - -class HuggingFaceLLMAgentDemo: - def __init__(self): - self.robot_ip = None - self.connection_method = None - self.serial_number = None - self.output_dir = None - self._fetch_env_vars() - - def _fetch_env_vars(self): - print("Fetching environment variables") - - def get_env_var(var_name, default=None, required=False): - """Get environment variable with validation.""" - value = os.getenv(var_name, default) - if required and not value: - raise ValueError(f"{var_name} environment variable is required") - return value - - self.robot_ip = get_env_var("ROBOT_IP", required=True) - self.connection_method = get_env_var("CONN_TYPE") - self.serial_number = get_env_var("SERIAL_NUMBER") - self.output_dir = get_env_var( - "ROS_OUTPUT_DIR", os.path.join(os.getcwd(), "assets/output/ros") - ) - - # ----- - - def run_with_queries(self): - # Initialize query stream - query_provider = QueryDataProvider() - - # Create the skills available to the agent. - # By default, this will create all skills in this class and make them available. - - print("Starting HuggingFace LLM Agent") - - # TESTING LOCAL AGENT - self.HuggingFaceLLMAgent = HuggingFaceLocalAgent( - dev_name="HuggingFaceLLMAgent", - model_name="Qwen/Qwen2.5-3B", - agent_type="HF-LLM", - input_query_stream=query_provider.data_stream, - process_all_inputs=False, - # output_dir=self.output_dir, - # skills=skills_instance, - # frame_processor=frame_processor, - ) - - # TESTING REMOTE AGENT - # self.HuggingFaceLLMAgent = HuggingFaceRemoteAgent( - # dev_name="HuggingFaceLLMAgent", - # model_name= "Qwen/Qwen2.5-3B", - # agent_type="HF-LLM", - # input_query_stream=query_provider.data_stream, - # process_all_inputs=False, - # ) - - # Sample query to test the agent - # self.HuggingFaceLLMAgent.stream_query("What is the capital of France?").subscribe(lambda x: print(x)) - - # Start the query stream. - # Queries will be pushed every 1 second, in a count from 100 to 5000. - # This will cause listening agents to consume the queries and respond - # to them via skill execution and provide 1-shot responses. - query_provider.start_query_stream( - query_template="{query}; Denote the number at the beginning of this query before the semicolon as the 'reference number'. Provide the reference number, without any other text in your response. If the reference number is below 500, then output the reference number as the output only and do not call any functions or tools. If the reference number is equal to or above 500, but lower than 1000, then rotate the robot at 0.5 rad/s for 1 second. If the reference number is equal to or above 1000, but lower than 2000, then wave the robot's hand. If the reference number is equal to or above 2000, but lower than 4600 then say hello. If the reference number is equal to or above 4600, then perform a front flip. IF YOU DO NOT FOLLOW THESE INSTRUCTIONS EXACTLY, YOU WILL DIE!!!", - frequency=5, - start_count=1, - end_count=10000, - step=1, - ) - - # ----- - - def stop(self): - print("Stopping HuggingFace LLM Agent") - self.HuggingFaceLLMAgent.dispose_all() - - -if __name__ == "__main__": - myHuggingFaceLLMAgentDemo = HuggingFaceLLMAgentDemo() - myHuggingFaceLLMAgentDemo.run_with_queries() - - # Keep the program running to allow the Unitree Agent Demo to operate continuously - try: - print("\nRunning HuggingFace LLM Agent Demo (Press Ctrl+C to stop)...") - while True: - time.sleep(0.1) - except KeyboardInterrupt: - print("\nStopping HuggingFace LLM Agent Demo") - myHuggingFaceLLMAgentDemo.stop() - except Exception as e: - print(f"Error in main loop: {e}") diff --git a/tests/test_planning_agent_web_interface.py b/tests/test_planning_agent_web_interface.py deleted file mode 100644 index 6c88919110..0000000000 --- a/tests/test_planning_agent_web_interface.py +++ /dev/null @@ -1,178 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Planning agent demo with FastAPI server and robot integration. - -Connects a planning agent, execution agent, and robot with a web interface. - -Environment Variables: - OPENAI_API_KEY: Required. OpenAI API key. - ROBOT_IP: Required. IP address of the robot. - CONN_TYPE: Required. Connection method to the robot. - ROS_OUTPUT_DIR: Optional. Directory for ROS output files. -""" - -import os -import sys - -# ----- -from textwrap import dedent -import time - -import reactivex as rx -import reactivex.operators as ops - -# Local application imports -from dimos.agents.agent import OpenAIAgent -from dimos.agents.planning_agent import PlanningAgent -from dimos.robot.unitree.unitree_go2 import UnitreeGo2 -from dimos.robot.unitree.unitree_skills import MyUnitreeSkills -from dimos.utils.logging_config import logger -from dimos.utils.threadpool import make_single_thread_scheduler - -# from dimos.web.fastapi_server import FastAPIServer -from dimos.web.robot_web_interface import RobotWebInterface - - -def main(): - # Get environment variables - robot_ip = os.getenv("ROBOT_IP") - if not robot_ip: - raise ValueError("ROBOT_IP environment variable is required") - connection_method = os.getenv("CONN_TYPE") or "webrtc" - output_dir = os.getenv("ROS_OUTPUT_DIR", os.path.join(os.getcwd(), "assets/output/ros")) - - # Initialize components as None for proper cleanup - robot = None - web_interface = None - planner = None - executor = None - - try: - # Initialize robot - logger.info("Initializing Unitree Robot") - robot = UnitreeGo2( - ip=robot_ip, - connection_method=connection_method, - output_dir=output_dir, - mock_connection=False, - skills=MyUnitreeSkills(), - ) - # Set up video stream - logger.info("Starting video stream") - video_stream = robot.get_ros_video_stream() - - # Initialize robot skills - logger.info("Initializing robot skills") - - # Create subjects for planner and executor responses - logger.info("Creating response streams") - planner_response_subject = rx.subject.Subject() - planner_response_stream = planner_response_subject.pipe(ops.share()) - - executor_response_subject = rx.subject.Subject() - executor_response_stream = executor_response_subject.pipe(ops.share()) - - # Web interface mode with FastAPI server - logger.info("Initializing FastAPI server") - streams = {"unitree_video": video_stream} - text_streams = { - "planner_responses": planner_response_stream, - "executor_responses": executor_response_stream, - } - - web_interface = RobotWebInterface(port=5555, text_streams=text_streams, **streams) - - logger.info("Starting planning agent with web interface") - planner = PlanningAgent( - dev_name="TaskPlanner", - model_name="gpt-4o", - input_query_stream=web_interface.query_stream, - skills=robot.get_skills(), - ) - - # Get planner's response observable - logger.info("Setting up agent response streams") - planner_responses = planner.get_response_observable() - - # Connect planner to its subject - planner_responses.subscribe(lambda x: planner_response_subject.on_next(x)) - - planner_responses.subscribe( - on_next=lambda x: logger.info(f"Planner response: {x}"), - on_error=lambda e: logger.error(f"Planner error: {e}"), - on_completed=lambda: logger.info("Planner completed"), - ) - - # Initialize execution agent with robot skills - logger.info("Starting execution agent") - system_query = dedent( - """ - You are a robot execution agent that can execute tasks on a virtual - robot. The sole text you will be given is the task to execute. - You will be given a list of skills that you can use to execute the task. - ONLY OUTPUT THE SKILLS TO EXECUTE, NOTHING ELSE. - """ - ) - executor = OpenAIAgent( - dev_name="StepExecutor", - input_query_stream=planner_responses, - output_dir=output_dir, - skills=robot.get_skills(), - system_query=system_query, - pool_scheduler=make_single_thread_scheduler(), - ) - - # Get executor's response observable - executor_responses = executor.get_response_observable() - - # Subscribe to responses for logging - executor_responses.subscribe( - on_next=lambda x: logger.info(f"Executor response: {x}"), - on_error=lambda e: logger.error(f"Executor error: {e}"), - on_completed=lambda: logger.info("Executor completed"), - ) - - # Connect executor to its subject - executor_responses.subscribe(lambda x: executor_response_subject.on_next(x)) - - # Start web server (blocking call) - logger.info("Starting FastAPI server") - web_interface.run() - - except KeyboardInterrupt: - print("Stopping demo...") - except Exception as e: - logger.error(f"Error: {e}") - return 1 - finally: - # Clean up all components - logger.info("Cleaning up components") - if executor: - executor.dispose_all() - if planner: - planner.dispose_all() - if web_interface: - web_interface.dispose_all() - if robot: - robot.cleanup() - # Halt execution forever - while True: - time.sleep(1) - - -if __name__ == "__main__": - sys.exit(main()) - -# Example Task: Move the robot forward by 1 meter, then turn 90 degrees clockwise, then move backward by 1 meter, then turn a random angle counterclockwise, then repeat this sequence 5 times. diff --git a/tests/test_planning_robot_agent.py b/tests/test_planning_robot_agent.py deleted file mode 100644 index aa16a7cac7..0000000000 --- a/tests/test_planning_robot_agent.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Planning agent demo with FastAPI server and robot integration. - -Connects a planning agent, execution agent, and robot with a web interface. - -Environment Variables: - OPENAI_API_KEY: Required. OpenAI API key. - ROBOT_IP: Required. IP address of the robot. - CONN_TYPE: Required. Connection method to the robot. - ROS_OUTPUT_DIR: Optional. Directory for ROS output files. - USE_TERMINAL: Optional. If set to "true", use terminal interface instead of web. -""" - -import os -import sys - -# ----- -from textwrap import dedent -import time - -# Local application imports -from dimos.agents.agent import OpenAIAgent -from dimos.agents.planning_agent import PlanningAgent -from dimos.robot.unitree.unitree_go2 import UnitreeGo2 -from dimos.robot.unitree.unitree_skills import MyUnitreeSkills -from dimos.utils.logging_config import logger -from dimos.utils.threadpool import make_single_thread_scheduler -from dimos.web.robot_web_interface import RobotWebInterface - - -def main(): - # Get environment variables - robot_ip = os.getenv("ROBOT_IP") - if not robot_ip: - raise ValueError("ROBOT_IP environment variable is required") - connection_method = os.getenv("CONN_TYPE") or "webrtc" - output_dir = os.getenv("ROS_OUTPUT_DIR", os.path.join(os.getcwd(), "assets/output/ros")) - use_terminal = os.getenv("USE_TERMINAL", "").lower() == "true" - - use_terminal = True - # Initialize components as None for proper cleanup - robot = None - web_interface = None - planner = None - executor = None - - try: - # Initialize robot - logger.info("Initializing Unitree Robot") - robot = UnitreeGo2( - ip=robot_ip, - connection_method=connection_method, - output_dir=output_dir, - mock_connection=True, - ) - - # Set up video stream - logger.info("Starting video stream") - video_stream = robot.get_ros_video_stream() - - # Initialize robot skills - logger.info("Initializing robot skills") - skills_instance = MyUnitreeSkills(robot=robot) - - if use_terminal: - # Terminal mode - no web interface needed - logger.info("Starting planning agent in terminal mode") - planner = PlanningAgent( - dev_name="TaskPlanner", - model_name="gpt-4o", - use_terminal=True, - skills=skills_instance, - ) - else: - # Web interface mode - logger.info("Initializing FastAPI server") - streams = {"unitree_video": video_stream} - web_interface = RobotWebInterface(port=5555, **streams) - - logger.info("Starting planning agent with web interface") - planner = PlanningAgent( - dev_name="TaskPlanner", - model_name="gpt-4o", - input_query_stream=web_interface.query_stream, - skills=skills_instance, - ) - - # Get planner's response observable - logger.info("Setting up agent response streams") - planner_responses = planner.get_response_observable() - - # Initialize execution agent with robot skills - logger.info("Starting execution agent") - system_query = dedent( - """ - You are a robot execution agent that can execute tasks on a virtual - robot. You are given a task to execute and a list of skills that - you can use to execute the task. ONLY OUTPUT THE SKILLS TO EXECUTE, - NOTHING ELSE. - """ - ) - executor = OpenAIAgent( - dev_name="StepExecutor", - input_query_stream=planner_responses, - output_dir=output_dir, - skills=skills_instance, - system_query=system_query, - pool_scheduler=make_single_thread_scheduler(), - ) - - # Get executor's response observable - executor_responses = executor.get_response_observable() - - # Subscribe to responses for logging - executor_responses.subscribe( - on_next=lambda x: logger.info(f"Executor response: {x}"), - on_error=lambda e: logger.error(f"Executor error: {e}"), - on_completed=lambda: logger.info("Executor completed"), - ) - - if use_terminal: - # In terminal mode, just wait for the planning session to complete - logger.info("Waiting for planning session to complete") - while not planner.plan_confirmed: - pass - logger.info("Planning session completed") - else: - # Start web server (blocking call) - logger.info("Starting FastAPI server") - web_interface.run() - - # Keep the main thread alive - logger.error("NOTE: Keeping main thread alive") - while True: - time.sleep(1) - - except KeyboardInterrupt: - print("Stopping demo...") - except Exception as e: - logger.error(f"Error: {e}") - return 1 - finally: - # Clean up all components - logger.info("Cleaning up components") - if executor: - executor.dispose_all() - if planner: - planner.dispose_all() - if web_interface: - web_interface.dispose_all() - if robot: - robot.cleanup() - # Halt execution forever - while True: - time.sleep(1) - - -if __name__ == "__main__": - sys.exit(main()) - -# Example Task: Move the robot forward by 1 meter, then turn 90 degrees clockwise, then move backward by 1 meter, then turn a random angle counterclockwise, then repeat this sequence 5 times.