Dembrane · spashii · Apr 29, 2025 · Apr 22, 2025 · Apr 22, 2025 · Apr 24, 2025
diff --git a/echo/docs/litellm_config.md b/echo/docs/litellm_config.md
@@ -0,0 +1,53 @@
+# LiteLLM Configuration Documentation
+
+This document outlines all LiteLLM-related configurations and their explanations used in the system.
+
+## Main LLM Model
+**LIGHTRAG_LITELLM_MODEL**: Used by lightrag to perform Named Entity Recognition (NER) and create the knowledge graph
+- Required Configurations:
+  - `LIGHTRAG_LITELLM_MODEL`: Model identifier (e.g., azure/gpt-4o-mini)
+  - `LIGHTRAG_LITELLM_API_KEY`: API key for authentication
+  - `LIGHTRAG_LITELLM_API_VERSION`: API version
+  - `LIGHTRAG_LITELLM_API_BASE`: Base URL for the API
+
+## Audio Transcription Model
+**LIGHTRAG_LITELLM_AUDIOMODEL_MODEL**: Used by audio-lightrag to convert input to transcript and generate contextual transcript
+- Required Configurations:
+  - `LIGHTRAG_LITELLM_AUDIOMODEL_MODEL`: Model identifier (e.g., azure/whisper-large-v3)
+  - `LIGHTRAG_LITELLM_AUDIOMODEL_API_BASE`: Base URL for the audio model API
+  - `LIGHTRAG_LITELLM_AUDIOMODEL_API_KEY`: API key for authentication
+  - `LIGHTRAG_LITELLM_AUDIOMODEL_API_VERSION`: API version
+
+## Text Structure Model
+**LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_MODEL**: Used to structure the output of the audio model into desired format
+- Required Configurations:
+  - `LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_MODEL`: Model identifier (e.g., azure/gpt-4o-mini)
+  - `LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_API_BASE`: Base URL for the text structure model API
+  - `LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_API_KEY`: API key for authentication
+  - `LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_API_VERSION`: API version
+
+## Embedding Model
+**LIGHTRAG_LITELLM_EMBEDDING_MODEL**: Used by lightrag to create embeddings for text
+- Required Configurations:
+  - `LIGHTRAG_LITELLM_EMBEDDING_MODEL`: Model identifier (e.g., azure/text-embedding-ada-002)
+  - `LIGHTRAG_LITELLM_EMBEDDING_API_BASE`: Base URL for the embedding model API
+  - `LIGHTRAG_LITELLM_EMBEDDING_API_KEY`: API key for authentication
+  - `LIGHTRAG_LITELLM_EMBEDDING_API_VERSION`: API version
+
+## Inference Model
+**LIGHTRAG_LITELLM_INFERENCE_MODEL**: Used for responding to queries with auto-select capability
+- Required Configurations:
+  - `LIGHTRAG_LITELLM_INFERENCE_MODEL`: Model identifier (default: anthropic/claude-3-5-sonnet-20240620)
+  - `LIGHTRAG_LITELLM_INFERENCE_API_KEY`: API key for authentication
+
+## Additional Audio LightRAG Configurations
+
+### Audio Processing Settings
+- `AUDIO_LIGHTRAG_CONVERSATION_HISTORY_NUM`: Number of conversation history items to maintain (default: 10)
+- `AUDIO_LIGHTRAG_TIME_THRESHOLD_SECONDS`: Time threshold for audio processing in seconds (default: 60)
+- `AUDIO_LIGHTRAG_MAX_AUDIO_FILE_SIZE_MB`: Maximum allowed audio file size in MB (default: 15)
+- `AUDIO_LIGHTRAG_TOP_K_PROMPT`: Top K value for prompt processing (default: 100)
+
+### Feature Flags
+- `ENABLE_AUDIO_LIGHTRAG_INPUT`: Enable/disable audio input processing (default: false)
+- `AUTO_SELECT_ENABLED`: Enable/disable auto-select feature (default: false) 
diff --git a/echo/server/dembrane/api/chat.py b/echo/server/dembrane/api/chat.py
@@ -8,10 +8,7 @@
 
 import litellm
 from fastapi import Query, APIRouter, HTTPException
-from litellm import (  # type: ignore
-    # completion,
-    token_counter,
-)
+from litellm import token_counter  # type: ignore
 from pydantic import BaseModel
 from fastapi.responses import StreamingResponse
 
@@ -22,11 +19,6 @@
     LIGHTRAG_LITELLM_INFERENCE_MODEL,
     LIGHTRAG_LITELLM_INFERENCE_API_KEY,
 )
-
-# LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_MODEL,
-# LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_API_KEY,
-# LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_API_BASE,
-# LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_API_VERSION,
 from dembrane.database import (
     DatabaseSession,
     ProjectChatModel,
@@ -39,6 +31,8 @@
 from dembrane.chat_utils import (
     MAX_CHAT_CONTEXT_LENGTH,
     get_project_chat_history,
+    get_conversation_citations,
+    get_conversation_references,
     get_lightrag_prompt_by_params,
     create_system_messages_for_chat,
 )
@@ -47,9 +41,6 @@
 from dembrane.api.dependency_auth import DirectusSession, DependencyDirectusSession
 from dembrane.audio_lightrag.utils.lightrag_utils import (
     get_project_id,
-    # get_conversation_name_from_id,
-    # run_segment_id_to_conversation_id,
-    get_conversation_details_for_rag_query,
 )
 
 ChatRouter = APIRouter(tags=["chat"])
@@ -165,7 +156,7 @@ async def get_chat_context(
     )
 
     for conversation in used_conversations:
-        is_conversation_locked = conversation.id in locked_conversations
+        is_conversation_locked = conversation.id in locked_conversations # Verify with directus
         chat_context_resource = ChatContextConversationSchema(
             conversation_id=conversation.id,
             conversation_participant_name=conversation.participant_name,
@@ -335,7 +326,7 @@ async def lock_conversations(
             .all()
         )
 
-        dembrane_message = ProjectChatMessageModel(
+        dembrane_search_complete_message = ProjectChatMessageModel(
             id=generate_uuid(),
             date_created=get_utc_timestamp(),
             message_from="dembrane",
@@ -344,7 +335,7 @@ async def lock_conversations(
             used_conversations=added_conversations,
             added_conversations=added_conversations,
         )
-        db.add(dembrane_message)
+        db.add(dembrane_search_complete_message)
         db.commit()
 
     # Fetch ConversationModel objects for used_conversations
@@ -365,12 +356,6 @@ class ChatBodyMessageSchema(BaseModel):
 class ChatBodySchema(BaseModel):
     messages: List[ChatBodyMessageSchema]
 
-class CitationSingleSchema(BaseModel):
-    segment_id: int
-    verbatim_reference_text_chunk: str
-
-class CitationsSchema(BaseModel):
-    citations: List[CitationSingleSchema]
 
 @ChatRouter.post("/{chat_id}")
 async def post_chat(
@@ -452,45 +437,22 @@ async def post_chat(
                                                messages=formatted_messages)
             if top_k <= 5:
                 raise HTTPException(status_code=400, detail="Auto select is not possible with the current context length")
-
-        dembrane_dummy_message = ProjectChatMessageModel(
-            id=generate_uuid(),
-            date_created=get_utc_timestamp(),
-            message_from="dembrane",
-            text="searched",
-            project_chat_id=chat_id,
-        )
-        db.add(dembrane_dummy_message)
-        db.commit()
 
-        try:
-            conversation_references = await get_conversation_details_for_rag_query(rag_prompt)
-            conversation_references = {'conversation_references': conversation_references}
-        except Exception as e:
-            logger.info(f"No references found. Error: {str(e)}")
-            conversation_references = {'conversation_references':{}}
-
-        ## TODO: Enable when frontend can handle
-        # dembrane_prompt_conversations_message = ProjectChatMessageModel(
-        #     id=generate_uuid(),
-        #     date_created=get_utc_timestamp(),
-        #     message_from="dembrane",
-        #     text="prompt_conversations created",
-        #     prompt_conversations=conversation_references,
-        #     project_chat_id=chat_id,
-        # )
-        # db.add(dembrane_prompt_conversations_message)
-        # db.commit()
+        conversation_references = await get_conversation_references(rag_prompt, [project_id])
         async def stream_response_async() -> AsyncGenerator[str, None]:
+            conversation_references_yeild = f"h:{json.dumps(conversation_references)}\n"
+            yield conversation_references_yeild
+
             accumulated_response = ""
             try:
                 response = await litellm.acompletion(
                     model=LIGHTRAG_LITELLM_INFERENCE_MODEL,
                     messages=formatted_messages,
                     stream=True,
-                    api_key=LIGHTRAG_LITELLM_INFERENCE_API_KEY
+                    api_key=LIGHTRAG_LITELLM_INFERENCE_API_KEY,
+                    # mock_response="It's simple to use and easy to get started",
                 )
-                async for chunk in response:
+                async for chunk in response: #type: ignore
                     if chunk.choices[0].delta.content:
                         content = chunk.choices[0].delta.content
                         accumulated_response += content
@@ -511,56 +473,9 @@ async def stream_response_async() -> AsyncGenerator[str, None]:
                     yield "Error: An error occurred while processing the chat response."
                 return # Stop generation on error
 
-            ## TODO: Enable when frontend can handle
-            # # Move all this to utils 
-            # text_structuring_model_message = f'''
-            # You are a helpful assistant that maps the correct references to the generated response.
-            # Your task is to map the references segment_id to the correct reference text.
-            # For every reference segment_id, you need to provide the most relevant reference text verbatim.
-            # Segment ID is always of the format: SEGMENT_ID_<number>.
-            # Here is the generated response:
-            # {accumulated_response}
-            # Here are the rag prompt:
-            # {rag_prompt}
-            # '''
-            # text_structuring_model_messages = [
-            #     {"role": "system", "content": text_structuring_model_message},
-            # ]
-            # # Generate citations
-
-            # text_structuring_model_generation = completion(
-            #                                     model=f"{LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_MODEL}",
-            #                                     messages=text_structuring_model_messages,
-            #                                     api_base=LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_API_BASE,
-            #                                     api_version=LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_API_VERSION,
-            #                                     api_key=LIGHTRAG_LITELLM_TEXTSTRUCTUREMODEL_API_KEY,
-            #                                     response_format=CitationsSchema)
-            # try: 
-            #     citations_dict = json.loads(text_structuring_model_generation.choices[0].message.content)
-            #     citations_list = citations_dict["citations"]# List[Dict[str, str]]
-            #     if len(citations_list) > 0:
-            #         for idx, citation in enumerate(citations_list):
-            #             conversation_id = await run_segment_id_to_conversation_id(citation['segment_id'])
-            #             citations_list[idx]['conversation_id'] = conversation_id
-            #         conversation_name = get_conversation_name_from_id(conversation_id)
-            #         citations_list[idx]['conversation_name'] = conversation_name
-            #     else:
-            #         logger.warning("WARNING: No citations found")
-            #     citations_list = json.dumps(citations_list)
-            # except Exception as e:
-            #     logger.warning(f"WARNING: Error in citation extraction. Skipping citations: {str(e)}")
-            #     citations_list = []
-            # citations_count = len(citations_list)
-            # dembrane_citations_message = ProjectChatMessageModel(
-            #     id=generate_uuid(),
-            #     date_created=get_utc_timestamp(),
-            #     message_from="dembrane",
-            #     text=f"{citations_count} citations found.",
-            #     project_chat_id=chat_id,
-            #     citations=citations_list,
-            # )
-            # db.add(dembrane_citations_message)
-            # db.commit()
+            citations_list = await get_conversation_citations(rag_prompt, accumulated_response, [project_id])
+            citations_yeild = f"h:{json.dumps(citations_list)}\n"
+            yield citations_yeild
         headers = {"Content-Type": "text/event-stream"}
         if protocol == "data":
             headers["x-vercel-ai-data-stream"] = "v1"

diff --git a/echo/server/dembrane/api/stateless.py b/echo/server/dembrane/api/stateless.py
@@ -7,9 +7,9 @@
 from lightrag.lightrag import QueryParam
 from lightrag.kg.shared_storage import initialize_pipeline_status
 
-from dembrane.rag import RAGManager, get_rag
 from dembrane.prompts import render_prompt
-from dembrane.postgresdbmanager import PostgresDBManager
+from dembrane.rag_manager import RAGManager, get_rag
+from dembrane.postgresdb_manager import PostgresDBManager
 from dembrane.api.dependency_auth import DependencyDirectusSession
 from dembrane.audio_lightrag.utils.lightrag_utils import (
     upsert_transcript,
@@ -222,6 +222,7 @@ async def get_lightrag_prompt(payload: GetLightragQueryRequest,
                                ids= [str(id) for id in echo_segment_ids],
                                top_k = payload.top_k)
             response = await rag.aquery(payload.query, param=param)
+            logger.debug(f"***Response: {response}")
             return response
 
         else:

diff --git a/echo/server/dembrane/audio_lightrag/main/run_etl.py b/echo/server/dembrane/audio_lightrag/main/run_etl.py
@@ -77,5 +77,11 @@ def run_etl_pipeline(conv_id_list: list[str]) -> Optional[bool]:
     # Steps for manual run
     # cd server 
     # python -m dembrane.audio_lightrag.main.run_etl
-    conv_id_list: list[str] = ['8f224582-5d1b-4d96-a450-a0bdb891dd28'] # Upload UUIDs
+    import os
+
+    from dotenv import load_dotenv
+    load_dotenv()
+
+    TEST_CONV_UUID = str(os.getenv("TEST_CONV_UUID"))
+    conv_id_list: list[str] = [TEST_CONV_UUID]
     run_etl_pipeline(conv_id_list)
diff --git a/echo/server/dembrane/audio_lightrag/pipelines/audio_etl_pipeline.py b/echo/server/dembrane/audio_lightrag/pipelines/audio_etl_pipeline.py
@@ -79,7 +79,7 @@ def transform(self) -> None:
                             "conversation_segment_id": segment_id,
                             "conversation_chunk_id": chunk_id
                         }
-                        directus.create_item("conversation_segment_conversation_chunk_1", mapping_data)
+                        directus.create_item("conversation_segment_conversation_chunk", mapping_data)
 
                     chunk_id_2_segment.extend(chunk_id_2_segment_temp)
                 except Exception as e:

diff --git a/echo/server/dembrane/audio_lightrag/pipelines/contextual_chunk_etl_pipeline.py b/echo/server/dembrane/audio_lightrag/pipelines/contextual_chunk_etl_pipeline.py
@@ -51,9 +51,7 @@ async def load(self) -> None:
                         logger.exception(f"Error in getting contextual transcript : {e}")
                         continue
                 previous_contextual_transcript = '\n\n'.join(previous_contextual_transcript_li)
-                audio_model_prompt = Prompts.audio_model_system_prompt()
-                audio_model_prompt = audio_model_prompt.format(event_text = event_text, 
-                                        previous_conversation_text = previous_contextual_transcript)
+                audio_model_prompt = Prompts.audio_model_system_prompt(event_text, previous_contextual_transcript)
                 try: 
                     response = directus.get_item('conversation_segment', int(segment_id))
                 except Exception as e: