Dembrane · spashii · Oct 30, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/echo/.devcontainer/docker-compose.yml b/echo/.devcontainer/docker-compose.yml
@@ -8,7 +8,7 @@ services:
       - 9001:9001
     entrypoint: >
       /bin/sh -c "
-      /usr/bin/mc config host add myminio http://minio:9000 ${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD};
+      /usr/bin/mc config host add myminio http://minio:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD;
       /usr/bin/mc mb --ignore-existing myminio/dembrane;
       /usr/bin/mc policy set download myminio/dembrane;
       minio server /mnt/data --console-address ":9001"
@@ -22,11 +22,9 @@ services:
     restart: unless-stopped
 
   redis:
-    image: 'bitnami/redis:6.2.14'
-    environment:
-      - ALLOW_EMPTY_PASSWORD=yes
+    image: valkey/valkey:8.0
     volumes:
-      - ./redis_data:/bitnami/redis/data
+      - ./redis_data:/data
 
   postgres:
     image: pgvector/pgvector:0.6.2-pg16
@@ -141,5 +139,3 @@ services:
       - postgres
       - redis
       - neo4j
-
-
diff --git a/echo/server/dembrane/anthropic.py b/echo/server/dembrane/anthropic.py
@@ -25,8 +25,8 @@ def stream_anthropic_chat_response(
     system: List[Dict[str, Any]], messages: List[Dict[str, Any]], protocol: str = "data"
 ) -> Generator[str, None, None]:
     """
-    Generates response from Anthropic 
-    and returns openAI like stream response 
+    Generates response from Anthropic
+    and returns openAI like stream response
     """
     stream = anthropic_client.messages.create(
         model="claude-3-5-sonnet-20241022",

diff --git a/echo/server/dembrane/api/conversation.py b/echo/server/dembrane/api/conversation.py
@@ -5,13 +5,15 @@
 
 from fastapi import Request, APIRouter
 from pydantic import BaseModel
+from litellm.utils import token_counter
 from sqlalchemy.orm import noload, selectinload
 from fastapi.responses import RedirectResponse, StreamingResponse
 from fastapi.exceptions import HTTPException
 from litellm.exceptions import ContentPolicyViolationError
 
 from dembrane.s3 import get_signed_url
 from dembrane.utils import CacheWithExpiration, generate_uuid, get_utc_timestamp
+from dembrane.config import LIGHTRAG_LITELLM_INFERENCE_MODEL
 from dembrane.database import (
     ConversationModel,
     ConversationChunkModel,
@@ -23,7 +25,6 @@
     sanitize_filename_component,
     merge_multiple_audio_files_and_save_to_s3,
 )
-from dembrane.quote_utils import count_tokens
 from dembrane.reply_utils import generate_reply_for_conversation
 from dembrane.api.stateless import (
     DeleteConversationRequest,
@@ -230,10 +231,7 @@ async def get_conversation_counts(
 
     from dembrane.service import conversation_service
 
-    counts = await run_in_thread_pool(
-        conversation_service.get_chunk_counts,
-        conversation_id
-    )
+    counts = await run_in_thread_pool(conversation_service.get_chunk_counts, conversation_id)
 
     return counts
 
@@ -339,10 +337,7 @@ async def get_conversation_content(
 
         duration = -1.0
         try:
-            duration = await run_in_thread_pool(
-                get_duration_from_s3,
-                merged_path
-            )
+            duration = await run_in_thread_pool(get_duration_from_s3, merged_path)
         except Exception as e:
             logger.error(f"Error getting duration from s3: {str(e)}")
 
@@ -462,10 +457,11 @@ async def get_conversation_token_count(
 
     # If not in cache, calculate the token count
     transcript = await get_conversation_transcript(conversation_id, auth)
+
     token_count = await run_in_thread_pool(
-        count_tokens,
-        transcript,
-        provider="anthropic"
+        token_counter,
+        model=LIGHTRAG_LITELLM_INFERENCE_MODEL,
+        messages=[{"role": "user", "content": transcript}],
     )
 
     # Store the result in the cache
@@ -535,7 +531,9 @@ async def summarize_conversation(
 
     language = conversation_data["project_id"]["language"]
 
-    transcript_str = await get_conversation_transcript(conversation_id, auth, include_project_data=True)
+    transcript_str = await get_conversation_transcript(
+        conversation_id, auth, include_project_data=True
+    )
 
     if transcript_str == "":
         return {
@@ -544,9 +542,7 @@ async def summarize_conversation(
         }
     else:
         summary = await run_in_thread_pool(
-            generate_summary,
-            transcript_str,
-            language if language else "en"
+            generate_summary, transcript_str, language if language else "en"
         )
 
         await run_in_thread_pool(
@@ -634,10 +630,7 @@ async def retranscribe_conversation(
 
         duration = None
         try:
-            duration = await run_in_thread_pool(
-                get_duration_from_s3,
-                merged_audio_path
-            )
+            duration = await run_in_thread_pool(get_duration_from_s3, merged_audio_path)
         except Exception as e:
             logger.error(f"Error getting duration from s3: {str(e)}")
 
@@ -669,15 +662,17 @@ async def retranscribe_conversation(
 
         try:
             logger.info(f"Creating links from {conversation_id} to {new_conversation_id}")
-            link_id = (await run_in_thread_pool(
-                directus.create_item,
-                "conversation_link",
-                item_data={
-                    "source_conversation_id": conversation_id,
-                    "target_conversation_id": new_conversation_id,
-                    "link_type": "CLONE",
-                },
-            ))["data"]["id"]
+            link_id = (
+                await run_in_thread_pool(
+                    directus.create_item,
+                    "conversation_link",
+                    item_data={
+                        "source_conversation_id": conversation_id,
+                        "target_conversation_id": new_conversation_id,
+                        "link_type": "CLONE",
+                    },
+                )
+            )["data"]["id"]
             logger.info(f"Link created: {link_id}")
         except Exception as e:
             logger.error(f"Error creating links: {str(e)}")
@@ -687,17 +682,19 @@ async def retranscribe_conversation(
             chunk_id = generate_uuid()
             timestamp = get_utc_timestamp().isoformat()
 
-            (await run_in_thread_pool(
-                directus.create_item,
-                "conversation_chunk",
-                item_data={
-                    "id": chunk_id,
-                    "conversation_id": new_conversation_id,
-                    "timestamp": timestamp,
-                    "path": merged_audio_path,
-                    "source": "CLONE",
-                },
-            ))["data"]
+            (
+                await run_in_thread_pool(
+                    directus.create_item,
+                    "conversation_chunk",
+                    item_data={
+                        "id": chunk_id,
+                        "conversation_id": new_conversation_id,
+                        "timestamp": timestamp,
+                        "path": merged_audio_path,
+                        "source": "CLONE",
+                    },
+                )
+            )["data"]
 
             logger.debug(f"Queuing transcription for chunk {chunk_id}")
             # Import task locally to avoid circular imports
@@ -712,11 +709,7 @@ async def retranscribe_conversation(
             }
         except Exception as e:
             # Clean up the partially created conversation
-            await run_in_thread_pool(
-                directus.delete_item,
-                "conversation",
-                new_conversation_id
-            )
+            await run_in_thread_pool(directus.delete_item, "conversation", new_conversation_id)
             logger.error(f"Error during retranscription: {str(e)}")
             raise HTTPException(status_code=500, detail=f"Failed to process audio: {str(e)}") from e
 
@@ -763,11 +756,7 @@ async def delete_conversation(
             session=auth,
         )
         # Run Directus deletion
-        await run_in_thread_pool(
-            directus.delete_item,
-            "conversation",
-            conversation_id
-        )
+        await run_in_thread_pool(directus.delete_item, "conversation", conversation_id)
         return {"status": "success", "message": "Conversation deleted successfully"}
     except Exception as e:
         logger.exception(f"Error deleting conversation {conversation_id}: {e}")

diff --git a/echo/server/dembrane/api/project.py b/echo/server/dembrane/api/project.py
@@ -201,7 +201,7 @@ def iterfile() -> Generator[bytes, None, None]:
 
 async def get_latest_project_analysis_run(project_id: str) -> Optional[dict]:
     try:
-        def _get_analysis_run():
+        def _get_analysis_run() -> Optional[list[dict]]:
             with directus_client_context() as client:
                 return client.get_items(
                     "project_analysis_run",
@@ -215,7 +215,7 @@ def _get_analysis_run():
                     },
                 )
 
-        analysis_run = await run_in_thread_pool(_get_analysis_run)
+        analysis_run: Optional[list[dict]] = await run_in_thread_pool(_get_analysis_run)
 
         if analysis_run is None:
             return None
@@ -247,10 +247,7 @@ async def post_create_project_library(
     from dembrane.service.project import ProjectNotFoundException
 
     try:
-        project = await run_in_thread_pool(
-            project_service.get_by_id_or_raise,
-            project_id
-        )
+        project = await run_in_thread_pool(project_service.get_by_id_or_raise, project_id)
     except ProjectNotFoundException as e:
         raise HTTPException(status_code=404, detail="Project not found") from e
 
@@ -298,10 +295,7 @@ async def post_create_view(
     from dembrane.service.project import ProjectNotFoundException
 
     try:
-        project = await run_in_thread_pool(
-            project_service.get_by_id_or_raise,
-            project_id
-        )
+        project = await run_in_thread_pool(project_service.get_by_id_or_raise, project_id)
     except ProjectNotFoundException as e:
         raise HTTPException(status_code=404, detail="Project not found") from e
 
@@ -325,12 +319,13 @@ class CreateReportRequestBodySchema(BaseModel):
 
 
 @ProjectRouter.post("/{project_id}/create-report")
-async def create_report(project_id: str, body: CreateReportRequestBodySchema) -> None:
+async def create_report(project_id: str, body: CreateReportRequestBodySchema) -> dict:
     language = body.language or "en"
     try:
         report_content_response = await get_report_content_for_project(project_id, language)
     except ContextTooLongException:
-        def _create_error_report():
+
+        def _create_error_report() -> dict:
             with directus_client_context() as client:
                 return client.create_item(
                     "project_report",
@@ -342,13 +337,13 @@ def _create_error_report():
                         "error_code": "CONTEXT_TOO_LONG",
                     },
                 )["data"]
-        
+
         report = await run_in_thread_pool(_create_error_report)
         return report
     except Exception as e:
         raise e
 
-    except Exception as e:
-        raise e
+    except Exception:
+        logger.exception(f"create_report failed for project {project_id}")
+        raise
-    except Exception as e:
-        raise e
+    except Exception:
+        logger.exception(f"create_report failed for project {project_id}")
+        raise
-    def _create_report():
+    def _create_report() -> dict:
         with directus_client_context() as client:
             return client.create_item(
                 "project_report",
@@ -359,7 +354,7 @@ def _create_report():
                     "status": "archived",
                 },
             )["data"]
-    
+
     report = await run_in_thread_pool(_create_report)
     return report
 

diff --git a/echo/server/dembrane/api/stateless.py b/echo/server/dembrane/api/stateless.py
@@ -7,6 +7,12 @@
 from lightrag.lightrag import QueryParam
 from lightrag.kg.shared_storage import initialize_pipeline_status
 
+from dembrane.config import (
+    SMALL_LITELLM_MODEL,
+    SMALL_LITELLM_API_KEY,
+    SMALL_LITELLM_API_BASE,
+    SMALL_LITELLM_API_VERSION,
+)
 from dembrane.prompts import render_prompt
 from dembrane.rag_manager import RAGManager, get_rag
 from dembrane.postgresdb_manager import PostgresDBManager
@@ -50,13 +56,16 @@ def generate_summary(transcript: str, language: str | None) -> str:
 
     # Call the model over the provided API endpoint
     response = completion(
-        model="anthropic/claude-3-5-sonnet-20240620",
+        model=SMALL_LITELLM_MODEL,
         messages=[
             {
                 "content": prompt,
                 "role": "user",
             }
         ],
+        api_key=SMALL_LITELLM_API_KEY,
+        api_base=SMALL_LITELLM_API_BASE,
+        api_version=SMALL_LITELLM_API_VERSION,
     )
 
     response_content = response["choices"][0]["message"]["content"]
@@ -262,7 +271,7 @@ async def get_lightrag_prompt(
                 ids=[str(id) for id in echo_segment_ids],
                 top_k=payload.top_k,
             )
-            
+
             try:
                 response = await rag.aquery(payload.query, param=param)
                 logger.debug(f"Response: {response}")