diff --git a/echo/frontend/src/components/chat/Sources.tsx b/echo/frontend/src/components/chat/Sources.tsx index 327ddacc..55fe1811 100644 --- a/echo/frontend/src/components/chat/Sources.tsx +++ b/echo/frontend/src/components/chat/Sources.tsx @@ -18,7 +18,7 @@ export const Sources = ({ - Citing the following sources + The following conversations were automatically added to the context diff --git a/echo/frontend/src/components/conversation/AutoSelectConversations.tsx b/echo/frontend/src/components/conversation/AutoSelectConversations.tsx index edb9b6c2..251c457d 100644 --- a/echo/frontend/src/components/conversation/AutoSelectConversations.tsx +++ b/echo/frontend/src/components/conversation/AutoSelectConversations.tsx @@ -58,15 +58,6 @@ export const AutoSelectConversations = () => { (conversation) => conversation.is_audio_processing_finished, ); - console.log(hasProcessedConversations, conversations); - - // Show warning if feature is available but no conversations are processed - const showProcessingWarning = - !isDisabled && - conversations && - conversations.length > 0 && - !hasProcessedConversations; - const handleCheckboxChange = (checked: boolean) => { if (isDisabled) { return; @@ -177,22 +168,6 @@ export const AutoSelectConversations = () => { /> - {showProcessingWarning && ( - } - title={Audio Processing In Progress} - className="border-t border-yellow-200 bg-yellow-50 p-3" - > - - - Some conversations are still being processed. Auto-select will - work optimally once audio processing is complete. - - - - )} - {isDisabled && ( diff --git a/echo/frontend/src/components/conversation/ConversationAccordion.tsx b/echo/frontend/src/components/conversation/ConversationAccordion.tsx index 1af7faad..7236cc44 100644 --- a/echo/frontend/src/components/conversation/ConversationAccordion.tsx +++ b/echo/frontend/src/components/conversation/ConversationAccordion.tsx @@ -400,28 +400,6 @@ export const ConversationStatusIndicators = ({ )} - {!!project?.is_enhanced_audio_processing_enabled && - // if processing still - // don't show this if both is_finished and is_audio_processing_finished are true - // but if project.is_enhanced_audio_processing_enabled is true, just see the is_finished - !( - conversation.is_finished && conversation.is_audio_processing_finished - ) && ( - - - - Processing - - - - - )} - {hasOnlyTextContent && ( Text @@ -498,7 +476,6 @@ const ConversationAccordionItem = ({ "!bg-primary-50": isLocked, })} rightSection={ - (!ENABLE_CHAT_AUTO_SELECT || !isAutoSelectEnabled) && inChatMode && ( { onResponse: async (_response) => { setShowProgress(false); setProgressValue(0); + if (ENABLE_CHAT_AUTO_SELECT && contextToBeAdded?.auto_select_bool) { + chatContextQuery.refetch(); + } }, onFinish: async (message) => { // this uses the response stream from the backend and makes a chat message IN THE FRONTEND diff --git a/echo/server/dembrane/api/chat.py b/echo/server/dembrane/api/chat.py index bfb3cc9d..80fa434d 100644 --- a/echo/server/dembrane/api/chat.py +++ b/echo/server/dembrane/api/chat.py @@ -15,13 +15,16 @@ from dembrane.utils import generate_uuid, get_utc_timestamp from dembrane.config import ( + SMALL_LITELLM_MODEL, + SMALL_LITELLM_API_KEY, + SMALL_LITELLM_API_BASE, ENABLE_CHAT_AUTO_SELECT, - AUDIO_LIGHTRAG_TOP_K_PROMPT, LIGHTRAG_LITELLM_INFERENCE_MODEL, LIGHTRAG_LITELLM_INFERENCE_API_KEY, LIGHTRAG_LITELLM_INFERENCE_API_BASE, LIGHTRAG_LITELLM_INFERENCE_API_VERSION, ) +from dembrane.prompts import render_prompt from dembrane.database import ( DatabaseSession, ProjectChatModel, @@ -34,23 +37,72 @@ MAX_CHAT_CONTEXT_LENGTH, generate_title, get_project_chat_history, - get_conversation_citations, - get_conversation_references, - get_lightrag_prompt_by_params, + auto_select_conversations, create_system_messages_for_chat, ) from dembrane.quote_utils import count_tokens from dembrane.api.conversation import get_conversation_token_count from dembrane.api.dependency_auth import DirectusSession, DependencyDirectusSession -from dembrane.audio_lightrag.utils.lightrag_utils import ( - get_project_id, -) +from dembrane.audio_lightrag.utils.lightrag_utils import get_project_id ChatRouter = APIRouter(tags=["chat"]) logger = logging.getLogger("dembrane.chat") +async def is_followup_question( + conversation_history: List[Dict[str, str]], language: str = "en" +) -> bool: + """ + Determine if the current question is a follow-up to previous messages. + Uses a small LLM call to check semantic relationship. + + Returns: + True if it's a follow-up question, False if it's a new independent question + """ + if len(conversation_history) < 2: + # No previous context, can't be a follow-up + return False + + # Take last 4 messages for context (2 exchanges) + recent_messages = conversation_history[-4:] + + # Format messages for the prompt + previous_messages = [ + {"role": msg["role"], "content": msg["content"]} for msg in recent_messages[:-1] + ] + current_question = recent_messages[-1]["content"] + + prompt = render_prompt( + "is_followup_question", + language, + { + "previous_messages": previous_messages, + "current_question": current_question, + }, + ) + + try: + response = await litellm.acompletion( + model=SMALL_LITELLM_MODEL, + api_key=SMALL_LITELLM_API_KEY, + api_base=SMALL_LITELLM_API_BASE, + messages=[{"role": "user", "content": prompt}], + temperature=0, # Deterministic + timeout=60, # 1 minute timeout for quick decision + ) + + result_text = response.choices[0].message.content.strip() + result = json.loads(result_text) + is_followup = result.get("is_followup", False) + + logger.info(f"Follow-up detection: {is_followup} for query: {current_question[:50]}...") + return is_followup + except Exception as e: + logger.warning(f"Follow-up detection failed: {e}. Defaulting to False (run auto-select)") + return False + + class ChatContextConversationSchema(BaseModel): conversation_id: str conversation_participant_name: str @@ -385,25 +437,20 @@ async def post_chat( ) -> StreamingResponse: # ignore: type """ Handle a chat interaction: persist the user's message, optionally generate a title, and stream an LLM-generated response. - This endpoint records the incoming user message into the chat, may asynchronously generate and persist a chat title if missing, and then produces a streaming response from the configured LLM. Two generation modes are supported: - Auto-select (when enabled for the chat): builds a RAG prompt, retrieves conversation references and citations, and streams the model output. - Manual-select: builds system messages from locked conversations and streams the model output. - Side effects: - Persists a new ProjectChatMessageModel for the user message. - May update the chat name and the message's template key. - On generation failure the in-flight user message is deleted. - Parameters: - chat_id: ID of the target chat (used to validate access and load context). - body: ChatBodySchema containing the messages (the last user message is used as the prompt) and optional template_key. - protocol: Response protocol; "data" (default) yields structured data frames, "text" yields raw text chunks. - language: Language code used for title generation and system message creation. - Returns: - StreamingResponse that yields streamed model content and, in auto-select mode, header payloads containing conversation references and citations. - Raises: - HTTPException: 404 if the chat (or required conversation data) is not found; 400 when auto-select cannot satisfy context-length constraints or request validation fails. """ @@ -473,38 +520,141 @@ async def post_chat( and filtered_messages[-2]["content"] == filtered_messages[-1]["content"] ): filtered_messages = filtered_messages[:-1] - top_k = AUDIO_LIGHTRAG_TOP_K_PROMPT - prompt_len = float("inf") - while MAX_CHAT_CONTEXT_LENGTH < prompt_len: + + query = filtered_messages[-1]["content"] + conversation_history = filtered_messages + + # Track newly added conversations for displaying in the frontend + conversations_added: list[ConversationModel] = [] + + # Check if this is a follow-up question (only if we have locked conversations) + should_reuse_locked = False + if locked_conversation_id_list: + is_followup = await is_followup_question(conversation_history, language) + if is_followup: + logger.info("Detected follow-up question - reusing locked conversations") + should_reuse_locked = True + else: + logger.info("New independent question - running auto-select") + + if should_reuse_locked: + # Reuse existing locked conversations for follow-up questions + updated_conversation_id_list = locked_conversation_id_list + + system_messages = await create_system_messages_for_chat( + updated_conversation_id_list, db, language, project_id + ) + formatted_messages = [] - top_k = max(5, top_k - 10) - query = filtered_messages[-1]["content"] - conversation_history = filtered_messages - rag_prompt = await get_lightrag_prompt_by_params( - query=query, - conversation_history=conversation_history, - echo_conversation_ids=chat_context.conversation_id_list, - echo_project_ids=[project_id], - auto_select_bool=chat_context.auto_select_bool, - get_transcripts=True, - top_k=top_k, + if isinstance(system_messages, list): + for msg in system_messages: + formatted_messages.append({"role": "system", "content": msg["text"]}) + formatted_messages.extend(conversation_history) + else: + formatted_messages = [ + {"role": "system", "content": system_messages} + ] + conversation_history + + # Check context length + prompt_len = token_counter( + model=LIGHTRAG_LITELLM_INFERENCE_MODEL, messages=formatted_messages + ) + + if prompt_len > MAX_CHAT_CONTEXT_LENGTH: + raise HTTPException( + status_code=400, + detail="The conversation context with the new message exceeds the maximum context length.", + ) + else: + # Run auto-select for first query or new independent questions + user_query_inputs = [query] + + logger.info(f"Calling auto_select_conversations with query: {query}") + auto_select_result = await auto_select_conversations( + user_query_inputs=user_query_inputs, + project_id_list=[project_id], + db=db, + language=language, + ) + + logger.info(f"Auto-select result: {auto_select_result}") + + # Extract selected conversation IDs + selected_conversation_ids = [] + if "results" in auto_select_result: + for proj_result in auto_select_result["results"].values(): + if "conversation_id_list" in proj_result: + selected_conversation_ids.extend(proj_result["conversation_id_list"]) + + # Add selected conversations to chat context + conversations_added = [] + for conversation_id in selected_conversation_ids: + conversation = db.get(ConversationModel, conversation_id) + if conversation and conversation not in chat.used_conversations: + chat.used_conversations.append(conversation) + conversations_added.append(conversation) + + # Create a message to lock the auto-selected conversations + if conversations_added: + auto_select_message = ProjectChatMessageModel( + id=generate_uuid(), + date_created=get_utc_timestamp(), + message_from="dembrane", + text=f"Auto-selected and added {len(conversations_added)} conversations as context to the chat.", + project_chat_id=chat_id, + used_conversations=conversations_added, + ) + db.add(auto_select_message) + db.commit() + logger.info(f"Added {len(conversations_added)} conversations via auto-select") + + # Get updated chat context + updated_chat_context = await get_chat_context(chat_id, db, auth) + updated_conversation_id_list = updated_chat_context.conversation_id_list + + # Build system messages from the selected conversations + system_messages = await create_system_messages_for_chat( + updated_conversation_id_list, db, language, project_id ) - logger.info(f"rag_prompt: {rag_prompt}") - formatted_messages.append({"role": "system", "content": rag_prompt}) - formatted_messages.append({"role": "user", "content": filtered_messages[-1]["content"]}) + + # Build messages to send + formatted_messages = [] + if isinstance(system_messages, list): + for msg in system_messages: + formatted_messages.append({"role": "system", "content": msg["text"]}) + formatted_messages.extend(conversation_history) + else: + formatted_messages = [ + {"role": "system", "content": system_messages} + ] + conversation_history + + # Check context length prompt_len = token_counter( model=LIGHTRAG_LITELLM_INFERENCE_MODEL, messages=formatted_messages ) - if top_k <= 5: + + if prompt_len > MAX_CHAT_CONTEXT_LENGTH: raise HTTPException( status_code=400, - detail="Auto select is not possible with the current context length", + detail="Auto select returned too many conversations. The selected conversations exceed the maximum context length.", ) - conversation_references = await get_conversation_references(rag_prompt, [project_id]) + # Build references list from ONLY newly added conversations (not all conversations) + conversation_references: dict[str, list[dict[str, str]]] = {"references": []} + # Only include conversations that were just added via auto-select + for conv in conversations_added: + conversation_references["references"].append( + { + "conversation": conv.id, + "conversation_title": conv.participant_name, + } + ) + + logger.info(f"Newly added conversations for frontend: {conversation_references}") async def stream_response_async_autoselect() -> AsyncGenerator[str, None]: - conversation_references_yeild = f"h:{json.dumps(conversation_references)}\n" + # Send conversation references (selected conversations) + conversation_references_yeild = f"h:{json.dumps([conversation_references])}\n" yield conversation_references_yeild accumulated_response = "" @@ -516,6 +666,8 @@ async def stream_response_async_autoselect() -> AsyncGenerator[str, None]: api_base=LIGHTRAG_LITELLM_INFERENCE_API_BASE, messages=formatted_messages, stream=True, + timeout=300, # 5 minute timeout for response + stream_timeout=180, # 3 minute timeout for streaming # mock_response="It's simple to use and easy to get started", ) async for chunk in response: @@ -539,12 +691,6 @@ async def stream_response_async_autoselect() -> AsyncGenerator[str, None]: yield "Error: An error occurred while processing the chat response." return # Stop generation on error - citations_list = await get_conversation_citations( - rag_prompt, accumulated_response, [project_id] - ) - citations_yeild = f"h:{json.dumps(citations_list)}\n" - yield citations_yeild - headers = {"Content-Type": "text/event-stream"} if protocol == "data": headers["x-vercel-ai-data-stream"] = "v1" @@ -558,17 +704,17 @@ async def stream_response_async_autoselect() -> AsyncGenerator[str, None]: async def stream_response_async_manualselect() -> AsyncGenerator[str, None]: """ Asynchronously stream a model-generated assistant response for the manual-selection chat path. - + Builds the outgoing message sequence by combining provided system messages (list or string) with recent user/assistant messages, removes a duplicated trailing user message if present, then calls the Litellm streaming completion API and yields text chunks as they arrive. - + Yields: - If protocol == "text": successive raw text fragments from the model. - If protocol == "data": framed data lines of the form `0:` for each fragment. - On generation error: a single error payload matching the active protocol (`"Error: ..." ` for text, or `3:"..."` for data). - + Side effects: - On an exception during generation, deletes the in-flight `user_message` from the database and commits the change. - + Notes: - Expects surrounding scope variables: `messages`, `system_messages`, `litellm`, model/API constants, `protocol`, `user_message`, and `logger`. - Returns when the stream completes. @@ -612,6 +758,8 @@ async def stream_response_async_manualselect() -> AsyncGenerator[str, None]: api_base=LIGHTRAG_LITELLM_INFERENCE_API_BASE, messages=messages_to_send, stream=True, + timeout=300, # 5 minute timeout for response + stream_timeout=180, # 3 minute timeout for streaming ) async for chunk in response: if chunk.choices[0].delta.content: diff --git a/echo/server/dembrane/chat_utils.py b/echo/server/dembrane/chat_utils.py index d7a1d18a..0c73bd8b 100644 --- a/echo/server/dembrane/chat_utils.py +++ b/echo/server/dembrane/chat_utils.py @@ -1,10 +1,21 @@ import json +import math +import asyncio import logging from typing import Any, Dict, List, Optional +import backoff from litellm import completion, acompletion from pydantic import BaseModel -from sqlalchemy.orm import Session +from litellm.utils import token_counter +from sqlalchemy.orm import Session, selectinload +from litellm.exceptions import ( + Timeout, + APIError, + RateLimitError, + BadRequestError, + ContextWindowExceededError, +) from dembrane.config import ( SMALL_LITELLM_MODEL, @@ -205,13 +216,13 @@ async def generate_title( ) -> str | None: """ Generate a short chat title from a user's query using a small LLM. - + If title generation is disabled via configuration or the trimmed query is shorter than 2 characters, the function returns None. The function builds a prompt (using the English prompt template) and asynchronously calls a configured small LLM; it returns the generated title string or None if the model returns no content. - + Parameters: user_query (str): The user's chat message or query to generate a title from. language (str): Target language for the generated title (affects prompt content; the prompt template used is English). - + Returns: str | None: The generated title, or None if generation is disabled, the query is too short, or the model produced no content. """ @@ -242,6 +253,324 @@ async def generate_title( return response.choices[0].message.content +async def auto_select_conversations( + user_query_inputs: List[str], + project_id_list: List[str], + db: Session, + language: str = "en", + batch_size: int = 20, +) -> Dict[str, Any]: + """ + Auto-select conversations based on user queries using LLM-based relevance assessment. + + This function fetches conversation summaries from the database and uses an LLM + to select the most relevant conversations for the given queries. It handles + batching to stay within LLM context limits and processes batches in parallel + for optimal performance. + + Args: + user_query_inputs: List of user query strings (currently up to 3) + project_id_list: List containing a single project ID + db: Database session + language: Language code for the prompt template (default: "en") + batch_size: Number of conversations to process in each LLM call (default: 20) + + Returns: + Dictionary with structure: + { + "results": { + "": { + "conversation_id_list": [] + } + } + } + """ + logger.info(f"Auto-select called with queries: {user_query_inputs}") + logger.info(f"Auto-select called for project(s): {project_id_list}") + + results: Dict[str, Any] = {} + # Batch size: number of conversations to process in each LLM call + # Can be adjusted per-chat via the auto_select_batch_size field + BATCH_SIZE = batch_size + + for project_id in project_id_list: + # Get all conversations for this project + conversations = ( + db.query(ConversationModel) + .filter(ConversationModel.project_id == project_id) + .options(selectinload(ConversationModel.tags)) + .all() + ) + + if not conversations: + logger.warning(f"No conversations found for project {project_id}") + results[project_id] = {"conversation_id_list": []} + continue + + logger.info(f"Found {len(conversations)} total conversations for project {project_id}") + + # Calculate expected number of LLM calls for observability + expected_llm_calls = math.ceil(len(conversations) / BATCH_SIZE) + logger.info( + f"Auto-select will make {expected_llm_calls} parallel LLM call(s) " + f"for {len(conversations)} conversations (batch size: {BATCH_SIZE})" + ) + + # Create batches and prepare parallel tasks + tasks = [] + for i in range(0, len(conversations), BATCH_SIZE): + batch = conversations[i : i + BATCH_SIZE] + batch_num = i // BATCH_SIZE + 1 + tasks.append( + _process_single_batch( + batch=batch, + batch_num=batch_num, + user_query_inputs=user_query_inputs, + language=language, + ) + ) + + # Execute all batches in parallel + logger.info(f"Executing {len(tasks)} batches in parallel...") + batch_results = await asyncio.gather(*tasks, return_exceptions=True) + + # Aggregate results from all batches + all_selected_ids = [] + successful_batches = 0 + failed_batches = 0 + + for i, batch_result in enumerate(batch_results): + # Handle exceptions from gather + if isinstance(batch_result, Exception): + logger.error(f"Batch {i + 1} failed with exception: {str(batch_result)}") + failed_batches += 1 + continue + + # Type check: ensure batch_result is a dict, not an exception + if not isinstance(batch_result, dict): + logger.error(f"Batch {i + 1} returned unexpected type: {type(batch_result)}") + failed_batches += 1 + continue + + # Handle batch results + if "error" in batch_result: + failed_batches += 1 + else: + successful_batches += 1 + + selected_ids = batch_result.get("selected_ids", []) + all_selected_ids.extend(selected_ids) + + # Remove duplicates while preserving order + unique_selected_ids = list(dict.fromkeys(all_selected_ids)) + + logger.info( + f"Auto-select completed: {successful_batches}/{len(tasks)} batches successful " + f"({failed_batches} failed), selected {len(unique_selected_ids)} unique conversations " + f"for project {project_id}: {unique_selected_ids}" + ) + + results[project_id] = {"conversation_id_list": unique_selected_ids} + + return {"results": results} + + +@backoff.on_exception( + backoff.expo, + (RateLimitError, Timeout, APIError), + max_tries=3, + max_time=5 * 60, # 5 minutes +) +async def _call_llm_with_backoff(prompt: str, batch_num: int) -> Any: + """Call LLM with automatic retry for transient errors.""" + logger.debug(f"Calling LLM for batch {batch_num}") + return await acompletion( + model=SMALL_LITELLM_MODEL, + messages=[{"role": "user", "content": prompt}], + api_base=SMALL_LITELLM_API_BASE, + api_key=SMALL_LITELLM_API_KEY, + response_format={"type": "json_object"}, + timeout=5 * 60, # 5 minutes + ) + + +async def _process_single_batch( + batch: List[ConversationModel], + batch_num: int, + user_query_inputs: List[str], + language: str, +) -> Dict[str, Any]: + """ + Process a single batch of conversations and return selected IDs. + + Args: + batch: List of ConversationModel instances to process + batch_num: Batch number for logging + user_query_inputs: User queries to match against + language: Language code for the prompt template + + Returns: + Dictionary with: + - "selected_ids": List of selected conversation IDs + - "batch_num": The batch number + - "error": Error message if processing failed (optional) + """ + logger.info(f"Processing batch {batch_num} ({len(batch)} conversations, parallel execution)") + + # Prepare conversation data for the prompt + conversation_data = [] + for conv in batch: + # Get summary or fallback to transcript excerpt + summary_text = None + if conv.summary and conv.summary.strip(): + summary_text = conv.summary + else: + # Use transcript as fallback + try: + transcript = get_conversation_transcript( + conv.id, + DirectusSession(user_id="none", is_admin=True), + ) + # Limit transcript to first 500 characters for context + if transcript and len(transcript) > 500: + summary_text = transcript[:500] + "..." + elif transcript: + summary_text = transcript + except Exception as e: + logger.warning(f"Could not get transcript for conversation {conv.id}: {e}") + + # Skip conversations with no content at all + if not summary_text: + logger.debug(f"Skipping conversation {conv.id} - no summary or transcript") + continue + + conv_data = { + "id": conv.id, + "participant_name": conv.participant_name or "Unknown", + "summary": summary_text, + } + if conv.tags: + conv_data["tags"] = ", ".join([tag.text for tag in conv.tags]) + if conv.created_at: + conv_data["created_at"] = conv.created_at.isoformat() + conversation_data.append(conv_data) + + # Skip batch if no valid conversations + if not conversation_data: + logger.warning(f"Batch {batch_num} has no valid conversations with content. Skipping.") + return {"selected_ids": [], "batch_num": batch_num} + + # Render the prompt + prompt = render_prompt( + "auto_select_conversations", + language, + { + "user_queries": user_query_inputs, + "conversations": conversation_data, + }, + ) + + # Validate prompt size before sending + try: + prompt_tokens = token_counter(model=SMALL_LITELLM_MODEL, text=prompt) + MAX_BATCH_CONTEXT = 100000 # Leave headroom for response + + if prompt_tokens > MAX_BATCH_CONTEXT: + # If batch has only 1 conversation, we can't split further + if len(batch) == 1: + logger.error( + f"Batch {batch_num} single conversation exceeds context limit: " + f"{prompt_tokens} tokens. Skipping conversation {batch[0].id}." + ) + return { + "selected_ids": [], + "batch_num": batch_num, + "error": "single_conversation_too_large", + } + + # Split batch in half and process recursively + mid = len(batch) // 2 + batch_1 = batch[:mid] + batch_2 = batch[mid:] + + logger.warning( + f"Batch {batch_num} prompt too large ({prompt_tokens} tokens). " + f"Splitting into 2 sub-batches: {len(batch_1)} and {len(batch_2)} conversations." + ) + + # Process both halves recursively + result_1 = await _process_single_batch(batch_1, batch_num, user_query_inputs, language) + result_2 = await _process_single_batch(batch_2, batch_num, user_query_inputs, language) + + # Combine results from both sub-batches + combined_ids = result_1.get("selected_ids", []) + result_2.get("selected_ids", []) + + logger.info( + f"Batch {batch_num} split processing complete: " + f"{len(combined_ids)} conversations selected from sub-batches." + ) + + return {"selected_ids": combined_ids, "batch_num": batch_num} + except Exception as e: + logger.warning(f"Could not count tokens for batch {batch_num}: {e}") + + # Call the LLM with retry logic for transient errors + try: + response = await _call_llm_with_backoff( + prompt=prompt, + batch_num=batch_num, + ) + + if response.choices[0].message.content: + result = json.loads(response.choices[0].message.content) + raw_selected_ids = result.get("selected_conversation_ids", []) + + # Validate LLM response: ensure all returned IDs are from this batch + valid_ids = {conv.id for conv in batch} + batch_selected_ids = [ + id for id in raw_selected_ids if isinstance(id, (int, str)) and id in valid_ids + ] + + # Log warning if LLM returned invalid IDs + if len(batch_selected_ids) != len(raw_selected_ids): + filtered_count = len(raw_selected_ids) - len(batch_selected_ids) + invalid_ids = [id for id in raw_selected_ids if id not in valid_ids] + logger.warning( + f"Batch {batch_num}: LLM returned {filtered_count} invalid ID(s), " + f"filtered from {len(raw_selected_ids)} to {len(batch_selected_ids)}. " + f"Invalid IDs: {invalid_ids}" + ) + + logger.info( + f"Batch {batch_num} selected {len(batch_selected_ids)} " + f"conversations: {batch_selected_ids}" + ) + return {"selected_ids": batch_selected_ids, "batch_num": batch_num} + else: + logger.warning(f"No response from LLM for batch {batch_num}") + return {"selected_ids": [], "batch_num": batch_num} + + except ContextWindowExceededError as e: + logger.error( + f"Batch {batch_num} exceeded context window ({len(batch)} conversations). " + f"Error: {str(e)}" + ) + return {"selected_ids": [], "batch_num": batch_num, "error": "context_exceeded"} + + except (RateLimitError, Timeout) as e: + # These are already retried by backoff, so if we get here, all retries failed + logger.error(f"Batch {batch_num} failed after retries: {type(e).__name__}") + return {"selected_ids": [], "batch_num": batch_num, "error": str(e)} + + except (APIError, BadRequestError) as e: + logger.error(f"Batch {batch_num} API error: {str(e)}") + return {"selected_ids": [], "batch_num": batch_num, "error": "api_error"} + + except Exception as e: + logger.error(f"Batch {batch_num} unexpected error: {str(e)}") + return {"selected_ids": [], "batch_num": batch_num, "error": "unknown"} + + async def get_conversation_citations( rag_prompt: str, accumulated_response: str, @@ -250,7 +579,7 @@ async def get_conversation_citations( ) -> List[Dict[str, Any]]: """ Extract structured conversation citations from an accumulated assistant response using a text-structuring model, map those citations to conversations, and return only citations that belong to the given project IDs. - + This function: - Renders a text-structuring prompt using `rag_prompt` and `accumulated_response` and sends it to the configured text-structure LLM. - Parses the model's JSON response (expected to follow `CitationsSchema`) to obtain citation entries that include `segment_id` and `verbatim_reference_text_chunk`. @@ -260,7 +589,7 @@ async def get_conversation_citations( - "conversation": conversation id (str) - "reference_text": verbatim reference text chunk (str) - "conversation_title": conversation name/title (str) - + If the model output cannot be parsed or a segment-to-conversation mapping fails for an individual citation, that citation is skipped; parsing errors do not raise but are logged and result in an empty citations list in the returned structure. """ text_structuring_model_message = render_prompt( diff --git a/echo/server/prompt_templates/auto_select_conversations.de.jinja b/echo/server/prompt_templates/auto_select_conversations.de.jinja new file mode 100644 index 00000000..e8e68d19 --- /dev/null +++ b/echo/server/prompt_templates/auto_select_conversations.de.jinja @@ -0,0 +1,34 @@ +Sie sind ein Assistent für die Auswahl von Gesprächen. Ihre Aufgabe ist es zu identifizieren, welche Gespräche für die Anfragen des Benutzers am relevantesten sind. + +Benutzeranfragen: +{% for query in user_queries %} +- {{ query }} +{% endfor %} + +Verfügbare Gespräche: +{% for conv in conversations %} +--- +Gesprächs-ID: {{ conv.id }} +Teilnehmer: {{ conv.participant_name }} +Zusammenfassung: {{ conv.summary or "Keine Zusammenfassung verfügbar" }} +{% if conv.tags %}Tags: {{ conv.tags }}{% endif %} +{% if conv.created_at %}Erstellt: {{ conv.created_at }}{% endif %} +{% endfor %} +--- + +Anweisungen: +- Analysieren Sie jede Gesprächszusammenfassung sorgfältig +- Wählen Sie Gespräche aus, die für die Anfragen des Benutzers relevant oder potenziell relevant sind +- Beziehen Sie Gespräche mit direkter Relevanz sowie solche mit teilweiser oder indirekter Relevanz ein +- Berücksichtigen Sie Synonyme, verwandte Themen und kontextuelle Verbindungen +- Wenn ein Gespräch Informationen enthalten könnte, die für die Beantwortung der Anfragen nützlich sind, nehmen Sie es auf +- Schließen Sie nur Gespräche aus, die eindeutig nicht mit allen Anfragen zusammenhängen +- Wenn KEINE Gespräche relevant sind, geben Sie eine leere Liste zurück + +Antworten Sie mit einem JSON-Objekt, das nur die Liste der relevanten Gesprächs-IDs enthält: +{ + "selected_conversation_ids": ["id1", "id2", ...] +} + +Geben Sie NUR das JSON-Objekt zurück, keinen anderen Text. + diff --git a/echo/server/prompt_templates/auto_select_conversations.en.jinja b/echo/server/prompt_templates/auto_select_conversations.en.jinja new file mode 100644 index 00000000..38d6274d --- /dev/null +++ b/echo/server/prompt_templates/auto_select_conversations.en.jinja @@ -0,0 +1,33 @@ +You are a conversation selection assistant. Your task is to identify which conversations are most relevant to the user's queries. + +User Queries: +{% for query in user_queries %} +- {{ query }} +{% endfor %} + +Available Conversations: +{% for conv in conversations %} +--- +Conversation ID: {{ conv.id }} +Participant: {{ conv.participant_name }} +Summary: {{ conv.summary or "No summary available" }} +{% if conv.tags %}Tags: {{ conv.tags }}{% endif %} +{% if conv.created_at %}Created: {{ conv.created_at }}{% endif %} +{% endfor %} +--- + +Instructions: +- Analyze each conversation summary carefully +- Select conversations that are relevant or potentially relevant to the user's queries +- Include conversations with direct relevance as well as those with partial or indirect relevance +- Consider synonyms, related topics, and contextual connections +- If a conversation might contain information useful for answering the queries, include it +- Only exclude conversations that are clearly unrelated to all queries +- If NO conversations are relevant, return an empty list + +Respond with a JSON object containing only the list of relevant conversation IDs: +{ + "selected_conversation_ids": ["id1", "id2", ...] +} + +Return ONLY the JSON object, no other text. diff --git a/echo/server/prompt_templates/auto_select_conversations.es.jinja b/echo/server/prompt_templates/auto_select_conversations.es.jinja new file mode 100644 index 00000000..3a63c3e5 --- /dev/null +++ b/echo/server/prompt_templates/auto_select_conversations.es.jinja @@ -0,0 +1,34 @@ +Eres un asistente de selección de conversaciones. Tu tarea es identificar qué conversaciones son más relevantes para las consultas del usuario. + +Consultas del usuario: +{% for query in user_queries %} +- {{ query }} +{% endfor %} + +Conversaciones disponibles: +{% for conv in conversations %} +--- +ID de conversación: {{ conv.id }} +Participante: {{ conv.participant_name }} +Resumen: {{ conv.summary or "No hay resumen disponible" }} +{% if conv.tags %}Etiquetas: {{ conv.tags }}{% endif %} +{% if conv.created_at %}Creado: {{ conv.created_at }}{% endif %} +{% endfor %} +--- + +Instrucciones: +- Analiza cuidadosamente cada resumen de conversación +- Selecciona conversaciones que sean relevantes o potencialmente relevantes para las consultas del usuario +- Incluye conversaciones con relevancia directa así como aquellas con relevancia parcial o indirecta +- Considera sinónimos, temas relacionados y conexiones contextuales +- Si una conversación podría contener información útil para responder las consultas, inclúyela +- Solo excluye conversaciones que claramente no estén relacionadas con ninguna consulta +- Si NINGUNA conversación es relevante, devuelve una lista vacía + +Responde con un objeto JSON que contenga solo la lista de IDs de conversaciones relevantes: +{ + "selected_conversation_ids": ["id1", "id2", ...] +} + +Devuelve SOLO el objeto JSON, ningún otro texto. + diff --git a/echo/server/prompt_templates/auto_select_conversations.fr.jinja b/echo/server/prompt_templates/auto_select_conversations.fr.jinja new file mode 100644 index 00000000..a76c5c83 --- /dev/null +++ b/echo/server/prompt_templates/auto_select_conversations.fr.jinja @@ -0,0 +1,34 @@ +Vous êtes un assistant de sélection de conversations. Votre tâche consiste à identifier quelles conversations sont les plus pertinentes pour les requêtes de l'utilisateur. + +Requêtes de l'utilisateur : +{% for query in user_queries %} +- {{ query }} +{% endfor %} + +Conversations disponibles : +{% for conv in conversations %} +--- +ID de conversation : {{ conv.id }} +Participant : {{ conv.participant_name }} +Résumé : {{ conv.summary or "Aucun résumé disponible" }} +{% if conv.tags %}Tags : {{ conv.tags }}{% endif %} +{% if conv.created_at %}Créé : {{ conv.created_at }}{% endif %} +{% endfor %} +--- + +Instructions : +- Analysez attentivement chaque résumé de conversation +- Sélectionnez les conversations qui sont pertinentes ou potentiellement pertinentes pour les requêtes de l'utilisateur +- Incluez les conversations avec une pertinence directe ainsi que celles avec une pertinence partielle ou indirecte +- Considérez les synonymes, les sujets connexes et les liens contextuels +- Si une conversation peut contenir des informations utiles pour répondre aux requêtes, incluez-la +- N'excluez que les conversations qui ne sont clairement pas liées à toutes les requêtes +- Si AUCUNE conversation n'est pertinente, renvoyez une liste vide + +Répondez avec un objet JSON contenant uniquement la liste des IDs de conversations pertinentes : +{ + "selected_conversation_ids": ["id1", "id2", ...] +} + +Renvoyez UNIQUEMENT l'objet JSON, aucun autre texte. + diff --git a/echo/server/prompt_templates/auto_select_conversations.nl.jinja b/echo/server/prompt_templates/auto_select_conversations.nl.jinja new file mode 100644 index 00000000..a990f9e5 --- /dev/null +++ b/echo/server/prompt_templates/auto_select_conversations.nl.jinja @@ -0,0 +1,34 @@ +U bent een assistent voor het selecteren van gesprekken. Uw taak is om te identificeren welke gesprekken het meest relevant zijn voor de vragen van de gebruiker. + +Gebruikersvragen: +{% for query in user_queries %} +- {{ query }} +{% endfor %} + +Beschikbare gesprekken: +{% for conv in conversations %} +--- +Gesprek-ID: {{ conv.id }} +Deelnemer: {{ conv.participant_name }} +Samenvatting: {{ conv.summary or "Geen samenvatting beschikbaar" }} +{% if conv.tags %}Tags: {{ conv.tags }}{% endif %} +{% if conv.created_at %}Gemaakt: {{ conv.created_at }}{% endif %} +{% endfor %} +--- + +Instructies: +- Analyseer elke gespreksamenvatting zorgvuldig +- Selecteer gesprekken die relevant of potentieel relevant zijn voor de vragen van de gebruiker +- Neem gesprekken op met directe relevantie evenals die met gedeeltelijke of indirecte relevantie +- Overweeg synoniemen, gerelateerde onderwerpen en contextuele verbindingen +- Als een gesprek informatie kan bevatten die nuttig is voor het beantwoorden van de vragen, neem het dan op +- Sluit alleen gesprekken uit die duidelijk niet gerelateerd zijn aan alle vragen +- Als GEEN gesprekken relevant zijn, retourneer dan een lege lijst + +Reageer met een JSON-object dat alleen de lijst met relevante gesprek-ID's bevat: +{ + "selected_conversation_ids": ["id1", "id2", ...] +} + +Retourneer ALLEEN het JSON-object, geen andere tekst. + diff --git a/echo/server/prompt_templates/is_followup_question.de.jinja b/echo/server/prompt_templates/is_followup_question.de.jinja new file mode 100644 index 00000000..375312db --- /dev/null +++ b/echo/server/prompt_templates/is_followup_question.de.jinja @@ -0,0 +1,20 @@ +Sie analysieren, ob die Frage eines Benutzers eine Folgefrage zu vorherigen Konversationsnachrichten ist. + +Vorherige Nachrichten: +{% for msg in previous_messages %} +{{ msg.role }}: {{ msg.content }} +{% endfor %} + +Aktuelle Frage: {{ current_question }} + +Ist die aktuelle Frage eine Folgefrage oder Klärung im Zusammenhang mit den vorherigen Nachrichten? +Antworten Sie NUR mit einem JSON-Objekt: +{"is_followup": true} wenn es eine Folgefrage/Klärung ist +{"is_followup": false} wenn es eine neue unabhängige Frage ist + +Beispiele für Folgefragen: "was noch?", "können Sie das näher erläutern?", "erzählen Sie mir mehr", "und was ist mit X in diesem Kontext?" +Beispiele für neue Fragen: "wie sieht es mit der Bereitstellung aus?", "erzählen Sie mir über das Testen", "welche Konversationen besprechen Y?" + +Geben Sie NUR das JSON-Objekt zurück, keinen anderen Text. + + diff --git a/echo/server/prompt_templates/is_followup_question.en.jinja b/echo/server/prompt_templates/is_followup_question.en.jinja new file mode 100644 index 00000000..4ce9e5c7 --- /dev/null +++ b/echo/server/prompt_templates/is_followup_question.en.jinja @@ -0,0 +1,19 @@ +You are analyzing if a user's question is a follow-up to previous conversation messages. + +Previous messages: +{% for msg in previous_messages %} +{{ msg.role }}: {{ msg.content }} +{% endfor %} + +Current question: {{ current_question }} + +Is the current question a follow-up or clarification related to the previous messages? +Answer ONLY with a JSON object: +{"is_followup": true} if it's a follow-up/clarification +{"is_followup": false} if it's a new independent question + +Examples of follow-ups: "what else?", "can you elaborate?", "tell me more", "and what about X in that context?" +Examples of new questions: "what about deployment?", "tell me about testing", "which conversations discuss Y?" + +Return ONLY the JSON object, no other text. + diff --git a/echo/server/prompt_templates/is_followup_question.es.jinja b/echo/server/prompt_templates/is_followup_question.es.jinja new file mode 100644 index 00000000..6a89c018 --- /dev/null +++ b/echo/server/prompt_templates/is_followup_question.es.jinja @@ -0,0 +1,20 @@ +Estás analizando si la pregunta de un usuario es un seguimiento de mensajes de conversación anteriores. + +Mensajes anteriores: +{% for msg in previous_messages %} +{{ msg.role }}: {{ msg.content }} +{% endfor %} + +Pregunta actual: {{ current_question }} + +¿Es la pregunta actual un seguimiento o aclaración relacionada con los mensajes anteriores? +Responde SOLO con un objeto JSON: +{"is_followup": true} si es un seguimiento/aclaración +{"is_followup": false} si es una pregunta nueva e independiente + +Ejemplos de seguimientos: "¿qué más?", "¿puedes elaborar?", "cuéntame más", "¿y qué hay de X en ese contexto?" +Ejemplos de preguntas nuevas: "¿qué hay del despliegue?", "cuéntame sobre las pruebas", "¿qué conversaciones discuten Y?" + +Devuelve SOLO el objeto JSON, ningún otro texto. + + diff --git a/echo/server/prompt_templates/is_followup_question.fr.jinja b/echo/server/prompt_templates/is_followup_question.fr.jinja new file mode 100644 index 00000000..cf96c81d --- /dev/null +++ b/echo/server/prompt_templates/is_followup_question.fr.jinja @@ -0,0 +1,20 @@ +Vous analysez si la question d'un utilisateur fait suite à des messages de conversation précédents. + +Messages précédents : +{% for msg in previous_messages %} +{{ msg.role }}: {{ msg.content }} +{% endfor %} + +Question actuelle : {{ current_question }} + +La question actuelle est-elle une suite ou une clarification liée aux messages précédents ? +Répondez UNIQUEMENT avec un objet JSON : +{"is_followup": true} si c'est une suite/clarification +{"is_followup": false} si c'est une nouvelle question indépendante + +Exemples de suites : "quoi d'autre ?", "pouvez-vous développer ?", "dites-m'en plus", "et qu'en est-il de X dans ce contexte ?" +Exemples de nouvelles questions : "qu'en est-il du déploiement ?", "parlez-moi des tests", "quelles conversations discutent de Y ?" + +Retournez UNIQUEMENT l'objet JSON, aucun autre texte. + + diff --git a/echo/server/prompt_templates/is_followup_question.nl.jinja b/echo/server/prompt_templates/is_followup_question.nl.jinja new file mode 100644 index 00000000..4b8e6dc0 --- /dev/null +++ b/echo/server/prompt_templates/is_followup_question.nl.jinja @@ -0,0 +1,20 @@ +U analyseert of de vraag van een gebruiker een vervolgvraag is op eerdere gespreksberichten. + +Eerdere berichten: +{% for msg in previous_messages %} +{{ msg.role }}: {{ msg.content }} +{% endfor %} + +Huidige vraag: {{ current_question }} + +Is de huidige vraag een vervolgvraag of verduidelijking gerelateerd aan de eerdere berichten? +Antwoord ALLEEN met een JSON-object: +{"is_followup": true} als het een vervolgvraag/verduidelijking is +{"is_followup": false} als het een nieuwe onafhankelijke vraag is + +Voorbeelden van vervolgvragen: "wat nog meer?", "kun je dat uitwerken?", "vertel me meer", "en hoe zit het met X in die context?" +Voorbeelden van nieuwe vragen: "hoe zit het met deployment?", "vertel me over testen", "welke gesprekken bespreken Y?" + +Geef ALLEEN het JSON-object terug, geen andere tekst. + +