Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
{
"name": "dimos-dev",
"image": "ghcr.io/dimensionalos/dimos-dev:latest",
"postCreateCommand": "pre-commit install",
"image": "ghcr.io/dimensionalos/dev:dev",
"customizations": {
"vscode": {
"extensions": [
"charliermarsh.ruff",
"ms-python.vscode-pylance"
]
}
},

"settings": {
"notebook.formatOnSave.enabled": true,
"notebook.codeActionsOnSave": {
"notebook.source.fixAll": "explicit",
"notebook.source.organizeImports": "explicit"
},
"editor.codeActionsOnSave": {
"source.fixAll": "explicit",
"source.organizeImports": "explicit"
},
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.formatOnSave": true
}
}
520 changes: 296 additions & 224 deletions dimos/agents/agent.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions dimos/agents/agent_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from typing import List
from dimos.agents.agent import Agent


class AgentConfig:
def __init__(self, agents: List[Agent] = None):
"""
Expand Down
86 changes: 47 additions & 39 deletions dimos/agents/agent_ctransformers_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from __future__ import annotations

# Standard library imports
import json
import logging
import os
from typing import Any, Optional
Expand All @@ -26,14 +25,11 @@
from reactivex.scheduler import ThreadPoolScheduler
from reactivex.subject import Subject
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Local imports
from dimos.agents.agent import LLMAgent
from dimos.agents.memory.base import AbstractAgentSemanticMemory
from dimos.agents.prompt_builder.impl import PromptBuilder
from dimos.agents.tokenizer.base import AbstractTokenizer
from dimos.agents.tokenizer.huggingface_tokenizer import HuggingFaceTokenizer
from dimos.utils.logging_config import setup_logger

# Initialize environment variables
Expand All @@ -44,6 +40,7 @@

from ctransformers import AutoModelForCausalLM as CTransformersModel


class CTransformersTokenizerAdapter:
def __init__(self, model):
self.model = model
Expand All @@ -66,7 +63,9 @@ def detokenize_text(self, tokenized_text):
except Exception as e:
raise ValueError(f"Failed to detokenize text. Error: {str(e)}")

def apply_chat_template(self, conversation, tokenize=False, add_generation_prompt=True):
def apply_chat_template(
self, conversation, tokenize=False, add_generation_prompt=True
):
prompt = ""
for message in conversation:
role = message["role"]
Expand All @@ -84,26 +83,27 @@ def apply_chat_template(self, conversation, tokenize=False, add_generation_promp

# CTransformers Agent Class
class CTransformersGGUFAgent(LLMAgent):
def __init__(self,
dev_name: str,
agent_type: str = "HF-LLM",
model_name: str = "TheBloke/Llama-2-7B-GGUF",
model_file: str = "llama-2-7b.Q4_K_M.gguf",
model_type: str = "llama",
gpu_layers: int = 50,
device: str = "auto",
query: str = "How many r's are in the word 'strawberry'?",
input_query_stream: Optional[Observable] = None,
input_video_stream: Optional[Observable] = None,
output_dir: str = os.path.join(os.getcwd(), "assets", "agent"),
agent_memory: Optional[AbstractAgentSemanticMemory] = None,
system_query: Optional[str] = "You are a helpful assistant.",
max_output_tokens_per_request: int = 10,
max_input_tokens_per_request: int = 250,
prompt_builder: Optional[PromptBuilder] = None,
pool_scheduler: Optional[ThreadPoolScheduler] = None,
process_all_inputs: Optional[bool] = None,):

def __init__(
self,
dev_name: str,
agent_type: str = "HF-LLM",
model_name: str = "TheBloke/Llama-2-7B-GGUF",
model_file: str = "llama-2-7b.Q4_K_M.gguf",
model_type: str = "llama",
gpu_layers: int = 50,
device: str = "auto",
query: str = "How many r's are in the word 'strawberry'?",
input_query_stream: Optional[Observable] = None,
input_video_stream: Optional[Observable] = None,
output_dir: str = os.path.join(os.getcwd(), "assets", "agent"),
agent_memory: Optional[AbstractAgentSemanticMemory] = None,
system_query: Optional[str] = "You are a helpful assistant.",
max_output_tokens_per_request: int = 10,
max_input_tokens_per_request: int = 250,
prompt_builder: Optional[PromptBuilder] = None,
pool_scheduler: Optional[ThreadPoolScheduler] = None,
process_all_inputs: Optional[bool] = None,
):
# Determine appropriate default for process_all_inputs if not provided
if process_all_inputs is None:
# Default to True for text queries, False for video streams
Expand All @@ -120,7 +120,7 @@ def __init__(self,
process_all_inputs=process_all_inputs,
system_query=system_query,
max_output_tokens_per_request=max_output_tokens_per_request,
max_input_tokens_per_request=max_input_tokens_per_request
max_input_tokens_per_request=max_input_tokens_per_request,
)

self.query = query
Expand All @@ -141,14 +141,13 @@ def __init__(self,
model_name,
model_file=model_file,
model_type=model_type,
gpu_layers=gpu_layers
gpu_layers=gpu_layers,
)

self.tokenizer = CTransformersTokenizerAdapter(self.model)

self.prompt_builder = prompt_builder or PromptBuilder(
self.model_name,
tokenizer=self.tokenizer
self.model_name, tokenizer=self.tokenizer
)

self.max_output_tokens_per_request = max_output_tokens_per_request
Expand All @@ -167,12 +166,13 @@ def __init__(self,
if self.input_video_stream is not None:
logger.info("Subscribing to input video stream...")
self.disposables.add(
self.subscribe_to_image_processing(self.input_video_stream))
self.subscribe_to_image_processing(self.input_video_stream)
)
if self.input_query_stream is not None:
logger.info("Subscribing to input query stream...")
self.disposables.add(
self.subscribe_to_query_processing(self.input_query_stream))

self.subscribe_to_query_processing(self.input_query_stream)
)

def _send_query(self, messages: list) -> Any:
try:
Expand All @@ -186,22 +186,26 @@ def _send_query(self, messages: list) -> Any:
content = msg["content"]
if isinstance(content, list):
# Assume it's a list of {'type': 'text', 'text': ...}
text_parts = [c["text"] for c in content if isinstance(c, dict) and "text" in c]
text_parts = [
c["text"]
for c in content
if isinstance(c, dict) and "text" in c
]
content = " ".join(text_parts)
flat_messages.append({"role": role, "content": content})

print(f"{_BLUE_PRINT_COLOR}Messages: {flat_messages}{_RESET_COLOR}")

print("Applying chat template...")
prompt_text = self.tokenizer.apply_chat_template(
conversation=flat_messages,
tokenize=False,
add_generation_prompt=True
conversation=flat_messages, tokenize=False, add_generation_prompt=True
)
print("Chat template applied.")
print(f"Prompt text:\n{prompt_text}")

response = self.model(prompt_text, max_new_tokens=self.max_output_tokens_per_request)
response = self.model(
prompt_text, max_new_tokens=self.max_output_tokens_per_request
)
print("Model response received.")
return response

Expand All @@ -213,7 +217,11 @@ def stream_query(self, query_text: str) -> Subject:
"""
Creates an observable that processes a text query and emits the response.
"""
return create(lambda observer, _: self._observable_query(
observer, incoming_query=query_text))
return create(
lambda observer, _: self._observable_query(
observer, incoming_query=query_text
)
)


# endregion HuggingFaceLLMAgent Subclass (HuggingFace-Specific Implementation)
Loading