Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
657c9a4
WIP agent tensorzero refactor
spomichter Aug 4, 2025
94bd48a
Added new agent and agent modules tests
spomichter Aug 5, 2025
a995fd1
New agent tensorzero implementation and agent modules
spomichter Aug 5, 2025
7206673
Added agent encode to Image type
spomichter Aug 5, 2025
ac3a824
Added agent message types
spomichter Aug 5, 2025
8de71e2
initial sketch of module-agent interface
leshy Aug 5, 2025
95e6185
message passing established
leshy Aug 5, 2025
3cd52a5
tool config propagation
leshy Aug 5, 2025
ac21dca
types extracted, tool config
leshy Aug 5, 2025
d80b1ce
__init__ files
leshy Aug 5, 2025
6468d96
agent interface work
leshy Aug 5, 2025
cceafb4
test fix
leshy Aug 5, 2025
debc902
working system
leshy Aug 5, 2025
a6e9443
agent callback, tool test
leshy Aug 5, 2025
33f86cc
tool decorator implies RPC decorator
leshy Aug 5, 2025
7ab913c
small cleanup
leshy Aug 6, 2025
6a47966
tool -> skill rename
leshy Aug 6, 2025
9b79096
type fixes
leshy Aug 6, 2025
1e33b96
module test
leshy Aug 6, 2025
afa7bcf
modules provide tf by default
leshy Aug 6, 2025
76eedee
agentspy cli, other cli tools installing corectly via pyproject
leshy Aug 6, 2025
bf35f92
small fixes
leshy Aug 6, 2025
21acf76
cleanup
leshy Aug 6, 2025
5f6c5db
small changes
leshy Aug 6, 2025
0f0beaa
disabled test_gateway
leshy Aug 6, 2025
be15482
Merge branch 'skillsketch' into great_agent_unification
leshy Aug 6, 2025
d6b8183
working on merging agent and skill implementations
leshy Aug 7, 2025
bd3888c
compatibility fixes for coordinator
leshy Aug 7, 2025
50e0fbe
work on agent
leshy Aug 7, 2025
e46ed9d
converting base.py
leshy Aug 7, 2025
8f5c052
parallell calls, toolids supported by skill coordinator
leshy Aug 8, 2025
572eb0d
looped and parallel tool calling, skill coordinator has the wheel
leshy Aug 8, 2025
10c8bd6
typo
leshy Aug 8, 2025
2463b94
coordinator image encoding
leshy Aug 8, 2025
8ec430a
working agent loop
leshy Aug 9, 2025
89b15c6
dev merge
leshy Aug 9, 2025
5f3f7b2
agents2
leshy Aug 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions bin/foxglove-bridge

This file was deleted.

7 changes: 0 additions & 7 deletions bin/lcmspy

This file was deleted.

101 changes: 101 additions & 0 deletions dimos/agents/agent_message.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Copyright 2025 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""AgentMessage type for multimodal agent communication."""

from dataclasses import dataclass, field
from typing import List, Optional, Union
import time

from dimos.msgs.sensor_msgs.Image import Image
from dimos.agents.agent_types import AgentImage


@dataclass
class AgentMessage:
"""Message type for agent communication with text and images.

This type supports multimodal messages containing both text strings
and AgentImage objects (base64 encoded) for vision-enabled agents.

The messages field contains multiple text strings that will be combined
into a single message when sent to the LLM.
"""

messages: List[str] = field(default_factory=list)
images: List[AgentImage] = field(default_factory=list)
sender_id: Optional[str] = None
timestamp: float = field(default_factory=time.time)

def add_text(self, text: str) -> None:
"""Add a text message."""
if text: # Only add non-empty text
self.messages.append(text)

def add_image(self, image: Union[Image, AgentImage]) -> None:
"""Add an image. Converts Image to AgentImage if needed."""
if isinstance(image, Image):
# Convert to AgentImage
agent_image = AgentImage(
base64_jpeg=image.agent_encode(),
width=image.width,
height=image.height,
metadata={"format": image.format.value, "frame_id": image.frame_id},
)
self.images.append(agent_image)
elif isinstance(image, AgentImage):
self.images.append(image)
else:
raise TypeError(f"Expected Image or AgentImage, got {type(image)}")

def has_text(self) -> bool:
"""Check if message contains text."""
# Check if we have any non-empty messages
return any(msg for msg in self.messages if msg)

def has_images(self) -> bool:
"""Check if message contains images."""
return len(self.images) > 0

def is_multimodal(self) -> bool:
"""Check if message contains both text and images."""
return self.has_text() and self.has_images()

def get_primary_text(self) -> Optional[str]:
"""Get the first text message, if any."""
return self.messages[0] if self.messages else None

def get_primary_image(self) -> Optional[AgentImage]:
"""Get the first image, if any."""
return self.images[0] if self.images else None

def get_combined_text(self) -> str:
"""Get all text messages combined into a single string."""
# Filter out any empty strings and join
return " ".join(msg for msg in self.messages if msg)

def clear(self) -> None:
"""Clear all content."""
self.messages.clear()
self.images.clear()

def __repr__(self) -> str:
"""String representation."""
return (
f"AgentMessage("
f"texts={len(self.messages)}, "
f"images={len(self.images)}, "
f"sender='{self.sender_id}', "
f"timestamp={self.timestamp})"
)
76 changes: 76 additions & 0 deletions dimos/agents/agent_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright 2025 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Agent-specific types for message passing."""

import time
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, TypedDict


@dataclass
class AgentImage:
"""Image data encoded for agent consumption.

Images are stored as base64-encoded JPEG strings ready for
direct use by LLM/vision models.
"""

base64_jpeg: str
width: Optional[int] = None
height: Optional[int] = None
metadata: Dict[str, Any] = field(default_factory=dict)

def __repr__(self) -> str:
return f"AgentImage(size={self.width}x{self.height}, metadata={list(self.metadata.keys())})"


@dataclass
class ToolCall:
"""Represents a tool/function call request from the LLM."""

id: str
name: str
arguments: Dict[str, Any]
status: str = "pending" # pending, executing, completed, failed

def __repr__(self) -> str:
return f"ToolCall(id='{self.id}', name='{self.name}', status='{self.status}')"


@dataclass
class AgentResponse:
"""Enhanced response from an agent query with tool support.

Based on common LLM response patterns, includes content and metadata.
"""

content: str
role: str = "assistant"
tool_calls: Optional[List[ToolCall]] = None
requires_follow_up: bool = False # Indicates if tool execution is needed
metadata: Dict[str, Any] = field(default_factory=dict)
timestamp: float = field(default_factory=time.time)

def __repr__(self) -> str:
content_preview = self.content[:50] + "..." if len(self.content) > 50 else self.content
tool_info = f", tools={len(self.tool_calls)}" if self.tool_calls else ""
return f"AgentResponse(role='{self.role}', content='{content_preview}'{tool_info})"


class ToolMessage(TypedDict):
role = "tool"
tool_call_id: str
content: str
name: str
11 changes: 9 additions & 2 deletions dimos/agents/memory/image_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def __init__(self, model_name: str = "clip", dimensions: int = 512):
self.dimensions = dimensions
self.model = None
self.processor = None
self.model_path = None

self._initialize_model()

Expand All @@ -68,10 +69,16 @@ def _initialize_model(self):

if self.model_name == "clip":
model_id = get_data("models_clip") / "model.onnx"
self.model_path = str(model_id) # Store for pickling
processor_id = "openai/clip-vit-base-patch32"
self.model = ort.InferenceSession(model_id)

providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]

self.model = ort.InferenceSession(str(model_id), providers=providers)

actual_providers = self.model.get_providers()
self.processor = CLIPProcessor.from_pretrained(processor_id)
logger.info(f"Loaded CLIP model: {model_id}")
logger.info(f"Loaded CLIP model: {model_id} with providers: {actual_providers}")
elif self.model_name == "resnet":
model_id = "microsoft/resnet-50"
self.model = AutoModel.from_pretrained(model_id)
Expand Down
17 changes: 13 additions & 4 deletions dimos/agents/memory/spatial_vector_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ class SpatialVectorDB:
"""

def __init__(
self, collection_name: str = "spatial_memory", chroma_client=None, visual_memory=None
self,
collection_name: str = "spatial_memory",
chroma_client=None,
visual_memory=None,
embedding_provider=None,
):
"""
Initialize the spatial vector database.
Expand All @@ -47,6 +51,7 @@ def __init__(
collection_name: Name of the vector database collection
chroma_client: Optional ChromaDB client for persistence. If None, an in-memory client is used.
visual_memory: Optional VisualMemory instance for storing images. If None, a new one is created.
embedding_provider: Optional ImageEmbeddingProvider instance for computing embeddings. If None, one will be created.
"""
self.collection_name = collection_name

Expand Down Expand Up @@ -77,6 +82,9 @@ def __init__(
# Use provided visual memory or create a new one
self.visual_memory = visual_memory if visual_memory is not None else VisualMemory()

# Store the embedding provider to reuse for all operations
self.embedding_provider = embedding_provider

# Log initialization info with details about whether using existing collection
client_type = "persistent" if chroma_client is not None else "in-memory"
try:
Expand Down Expand Up @@ -223,11 +231,12 @@ def query_by_text(self, text: str, limit: int = 5) -> List[Dict]:
Returns:
List of results, each containing the image, its metadata, and similarity score
"""
from dimos.agents.memory.image_embedding import ImageEmbeddingProvider
if self.embedding_provider is None:
from dimos.agents.memory.image_embedding import ImageEmbeddingProvider

embedding_provider = ImageEmbeddingProvider(model_name="clip")
self.embedding_provider = ImageEmbeddingProvider(model_name="clip")

text_embedding = embedding_provider.get_text_embedding(text)
text_embedding = self.embedding_provider.get_text_embedding(text)

results = self.image_collection.query(
query_embeddings=[text_embedding.tolist()],
Expand Down
15 changes: 15 additions & 0 deletions dimos/agents/modules/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2025 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Agent modules for DimOS."""
Loading
Loading