diff --git a/echo/frontend/src/components/chat/Sources.tsx b/echo/frontend/src/components/chat/Sources.tsx
index 327ddacc..55fe1811 100644
--- a/echo/frontend/src/components/chat/Sources.tsx
+++ b/echo/frontend/src/components/chat/Sources.tsx
@@ -18,7 +18,7 @@ export const Sources = ({
       <Group gap="sm" align="center">
         <Box w={15} h={15} bg="green.5" style={{ borderRadius: "50%" }} />
         <Text size="sm" fw={500} my={2}>
-          <Trans>Citing the following sources</Trans>
+          <Trans>The following conversations were automatically added to the context</Trans>
         </Text>
       </Group>
       <Group gap="xs" mt={10}>
diff --git a/echo/frontend/src/components/conversation/AutoSelectConversations.tsx b/echo/frontend/src/components/conversation/AutoSelectConversations.tsx
index edb9b6c2..251c457d 100644
--- a/echo/frontend/src/components/conversation/AutoSelectConversations.tsx
+++ b/echo/frontend/src/components/conversation/AutoSelectConversations.tsx
@@ -58,15 +58,6 @@ export const AutoSelectConversations = () => {
     (conversation) => conversation.is_audio_processing_finished,
   );
 
-  console.log(hasProcessedConversations, conversations);
-
-  // Show warning if feature is available but no conversations are processed
-  const showProcessingWarning =
-    !isDisabled &&
-    conversations &&
-    conversations.length > 0 &&
-    !hasProcessedConversations;
-
   const handleCheckboxChange = (checked: boolean) => {
     if (isDisabled) {
       return;
@@ -177,22 +168,6 @@ export const AutoSelectConversations = () => {
         />
       </Group>
 
-      {showProcessingWarning && (
-        <Alert
-          color="yellow"
-          icon={<IconInfoCircle size={16} />}
-          title={<Trans>Audio Processing In Progress</Trans>}
-          className="border-t border-yellow-200 bg-yellow-50 p-3"
-        >
-          <Text size="xs">
-            <Trans>
-              Some conversations are still being processed. Auto-select will
-              work optimally once audio processing is complete.
-            </Trans>
-          </Text>
-        </Alert>
-      )}
-
       {isDisabled && (
         <Box className="border-t border-gray-200 bg-gray-50 p-4">
           <Stack gap="sm">
diff --git a/echo/frontend/src/components/conversation/ConversationAccordion.tsx b/echo/frontend/src/components/conversation/ConversationAccordion.tsx
index 1af7faad..7236cc44 100644
--- a/echo/frontend/src/components/conversation/ConversationAccordion.tsx
+++ b/echo/frontend/src/components/conversation/ConversationAccordion.tsx
@@ -400,28 +400,6 @@ export const ConversationStatusIndicators = ({
         </Badge>
       )}
 
-      {!!project?.is_enhanced_audio_processing_enabled &&
-        // if processing still
-        // don't show this if both is_finished and is_audio_processing_finished are true
-        // but if project.is_enhanced_audio_processing_enabled is true, just see the is_finished
-        !(
-          conversation.is_finished && conversation.is_audio_processing_finished
-        ) && (
-          <Tooltip
-            label={
-              t`This conversation is still being processed. It will be available for analysis and chat shortly. ` +
-              t`(for enhanced audio processing)`
-            }
-          >
-            <Badge size="xs" color="violet" variant="light">
-              <Group gap="xs">
-                <Trans>Processing</Trans>
-                <IconInfoCircle size={12} />
-              </Group>
-            </Badge>
-          </Tooltip>
-        )}
-
       {hasOnlyTextContent && (
         <Badge size="xs" color="blue" variant="light">
           <Trans>Text</Trans>
@@ -498,7 +476,6 @@ const ConversationAccordionItem = ({
         "!bg-primary-50": isLocked,
       })}
       rightSection={
-        (!ENABLE_CHAT_AUTO_SELECT || !isAutoSelectEnabled) &&
         inChatMode && (
           <ConversationAccordionLabelChatSelection
             conversation={conversation}
diff --git a/echo/frontend/src/routes/project/chat/ProjectChatRoute.tsx b/echo/frontend/src/routes/project/chat/ProjectChatRoute.tsx
index ed6591bc..f147a384 100644
--- a/echo/frontend/src/routes/project/chat/ProjectChatRoute.tsx
+++ b/echo/frontend/src/routes/project/chat/ProjectChatRoute.tsx
@@ -121,6 +121,9 @@ const useDembraneChat = ({ chatId }: { chatId: string }) => {
     onResponse: async (_response) => {
       setShowProgress(false);
       setProgressValue(0);
+      if (ENABLE_CHAT_AUTO_SELECT && contextToBeAdded?.auto_select_bool) {
+        chatContextQuery.refetch();
+      }
     },
     onFinish: async (message) => {
       // this uses the response stream from the backend and makes a chat message IN THE FRONTEND
diff --git a/echo/server/dembrane/api/chat.py b/echo/server/dembrane/api/chat.py
index bfb3cc9d..80fa434d 100644
--- a/echo/server/dembrane/api/chat.py
+++ b/echo/server/dembrane/api/chat.py
@@ -15,13 +15,16 @@
 
 from dembrane.utils import generate_uuid, get_utc_timestamp
 from dembrane.config import (
+    SMALL_LITELLM_MODEL,
+    SMALL_LITELLM_API_KEY,
+    SMALL_LITELLM_API_BASE,
     ENABLE_CHAT_AUTO_SELECT,
-    AUDIO_LIGHTRAG_TOP_K_PROMPT,
     LIGHTRAG_LITELLM_INFERENCE_MODEL,
     LIGHTRAG_LITELLM_INFERENCE_API_KEY,
     LIGHTRAG_LITELLM_INFERENCE_API_BASE,
     LIGHTRAG_LITELLM_INFERENCE_API_VERSION,
 )
+from dembrane.prompts import render_prompt
 from dembrane.database import (
     DatabaseSession,
     ProjectChatModel,
@@ -34,23 +37,72 @@
     MAX_CHAT_CONTEXT_LENGTH,
     generate_title,
     get_project_chat_history,
-    get_conversation_citations,
-    get_conversation_references,
-    get_lightrag_prompt_by_params,
+    auto_select_conversations,
     create_system_messages_for_chat,
 )
 from dembrane.quote_utils import count_tokens
 from dembrane.api.conversation import get_conversation_token_count
 from dembrane.api.dependency_auth import DirectusSession, DependencyDirectusSession
-from dembrane.audio_lightrag.utils.lightrag_utils import (
-    get_project_id,
-)
+from dembrane.audio_lightrag.utils.lightrag_utils import get_project_id
 
 ChatRouter = APIRouter(tags=["chat"])
 
 logger = logging.getLogger("dembrane.chat")
 
 
+async def is_followup_question(
+    conversation_history: List[Dict[str, str]], language: str = "en"
+) -> bool:
+    """
+    Determine if the current question is a follow-up to previous messages.
+    Uses a small LLM call to check semantic relationship.
+
+    Returns:
+        True if it's a follow-up question, False if it's a new independent question
+    """
+    if len(conversation_history) < 2:
+        # No previous context, can't be a follow-up
+        return False
+
+    # Take last 4 messages for context (2 exchanges)
+    recent_messages = conversation_history[-4:]
+
+    # Format messages for the prompt
+    previous_messages = [
+        {"role": msg["role"], "content": msg["content"]} for msg in recent_messages[:-1]
+    ]
+    current_question = recent_messages[-1]["content"]
+
+    prompt = render_prompt(
+        "is_followup_question",
+        language,
+        {
+            "previous_messages": previous_messages,
+            "current_question": current_question,
+        },
+    )
+
+    try:
+        response = await litellm.acompletion(
+            model=SMALL_LITELLM_MODEL,
+            api_key=SMALL_LITELLM_API_KEY,
+            api_base=SMALL_LITELLM_API_BASE,
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0,  # Deterministic
+            timeout=60,  # 1 minute timeout for quick decision
+        )
+
+        result_text = response.choices[0].message.content.strip()
+        result = json.loads(result_text)
+        is_followup = result.get("is_followup", False)
+
+        logger.info(f"Follow-up detection: {is_followup} for query: {current_question[:50]}...")
+        return is_followup
+    except Exception as e:
+        logger.warning(f"Follow-up detection failed: {e}. Defaulting to False (run auto-select)")
+        return False
+
+
 class ChatContextConversationSchema(BaseModel):
     conversation_id: str
     conversation_participant_name: str
@@ -385,25 +437,20 @@ async def post_chat(
 ) -> StreamingResponse:  # ignore: type
     """
     Handle a chat interaction: persist the user's message, optionally generate a title, and stream an LLM-generated response.
-    
     This endpoint records the incoming user message into the chat, may asynchronously generate and persist a chat title if missing, and then produces a streaming response from the configured LLM. Two generation modes are supported:
     - Auto-select (when enabled for the chat): builds a RAG prompt, retrieves conversation references and citations, and streams the model output.
     - Manual-select: builds system messages from locked conversations and streams the model output.
-    
     Side effects:
     - Persists a new ProjectChatMessageModel for the user message.
     - May update the chat name and the message's template key.
     - On generation failure the in-flight user message is deleted.
-    
     Parameters:
     - chat_id: ID of the target chat (used to validate access and load context).
     - body: ChatBodySchema containing the messages (the last user message is used as the prompt) and optional template_key.
     - protocol: Response protocol; "data" (default) yields structured data frames, "text" yields raw text chunks.
     - language: Language code used for title generation and system message creation.
-    
     Returns:
     - StreamingResponse that yields streamed model content and, in auto-select mode, header payloads containing conversation references and citations.
-    
     Raises:
     - HTTPException: 404 if the chat (or required conversation data) is not found; 400 when auto-select cannot satisfy context-length constraints or request validation fails.
     """
@@ -473,38 +520,141 @@ async def post_chat(
             and filtered_messages[-2]["content"] == filtered_messages[-1]["content"]
         ):
             filtered_messages = filtered_messages[:-1]
-        top_k = AUDIO_LIGHTRAG_TOP_K_PROMPT
-        prompt_len = float("inf")
-        while MAX_CHAT_CONTEXT_LENGTH < prompt_len:
+
+        query = filtered_messages[-1]["content"]
+        conversation_history = filtered_messages
+
+        # Track newly added conversations for displaying in the frontend
+        conversations_added: list[ConversationModel] = []
+
+        # Check if this is a follow-up question (only if we have locked conversations)
+        should_reuse_locked = False
+        if locked_conversation_id_list:
+            is_followup = await is_followup_question(conversation_history, language)
+            if is_followup:
+                logger.info("Detected follow-up question - reusing locked conversations")
+                should_reuse_locked = True
+            else:
+                logger.info("New independent question - running auto-select")
+
+        if should_reuse_locked:
+            # Reuse existing locked conversations for follow-up questions
+            updated_conversation_id_list = locked_conversation_id_list
+
+            system_messages = await create_system_messages_for_chat(
+                updated_conversation_id_list, db, language, project_id
+            )
+
             formatted_messages = []
-            top_k = max(5, top_k - 10)
-            query = filtered_messages[-1]["content"]
-            conversation_history = filtered_messages
-            rag_prompt = await get_lightrag_prompt_by_params(
-                query=query,
-                conversation_history=conversation_history,
-                echo_conversation_ids=chat_context.conversation_id_list,
-                echo_project_ids=[project_id],
-                auto_select_bool=chat_context.auto_select_bool,
-                get_transcripts=True,
-                top_k=top_k,
+            if isinstance(system_messages, list):
+                for msg in system_messages:
+                    formatted_messages.append({"role": "system", "content": msg["text"]})
+                formatted_messages.extend(conversation_history)
+            else:
+                formatted_messages = [
+                    {"role": "system", "content": system_messages}
+                ] + conversation_history
+
+            # Check context length
+            prompt_len = token_counter(
+                model=LIGHTRAG_LITELLM_INFERENCE_MODEL, messages=formatted_messages
+            )
+
+            if prompt_len > MAX_CHAT_CONTEXT_LENGTH:
+                raise HTTPException(
+                    status_code=400,
+                    detail="The conversation context with the new message exceeds the maximum context length.",
+                )
+        else:
+            # Run auto-select for first query or new independent questions
+            user_query_inputs = [query]
+
+            logger.info(f"Calling auto_select_conversations with query: {query}")
+            auto_select_result = await auto_select_conversations(
+                user_query_inputs=user_query_inputs,
+                project_id_list=[project_id],
+                db=db,
+                language=language,
+            )
+
+            logger.info(f"Auto-select result: {auto_select_result}")
+
+            # Extract selected conversation IDs
+            selected_conversation_ids = []
+            if "results" in auto_select_result:
+                for proj_result in auto_select_result["results"].values():
+                    if "conversation_id_list" in proj_result:
+                        selected_conversation_ids.extend(proj_result["conversation_id_list"])
+
+            # Add selected conversations to chat context
+            conversations_added = []
+            for conversation_id in selected_conversation_ids:
+                conversation = db.get(ConversationModel, conversation_id)
+                if conversation and conversation not in chat.used_conversations:
+                    chat.used_conversations.append(conversation)
+                    conversations_added.append(conversation)
+
+            # Create a message to lock the auto-selected conversations
+            if conversations_added:
+                auto_select_message = ProjectChatMessageModel(
+                    id=generate_uuid(),
+                    date_created=get_utc_timestamp(),
+                    message_from="dembrane",
+                    text=f"Auto-selected and added {len(conversations_added)} conversations as context to the chat.",
+                    project_chat_id=chat_id,
+                    used_conversations=conversations_added,
+                )
+                db.add(auto_select_message)
+                db.commit()
+                logger.info(f"Added {len(conversations_added)} conversations via auto-select")
+
+            # Get updated chat context
+            updated_chat_context = await get_chat_context(chat_id, db, auth)
+            updated_conversation_id_list = updated_chat_context.conversation_id_list
+
+            # Build system messages from the selected conversations
+            system_messages = await create_system_messages_for_chat(
+                updated_conversation_id_list, db, language, project_id
             )
-            logger.info(f"rag_prompt: {rag_prompt}")
-            formatted_messages.append({"role": "system", "content": rag_prompt})
-            formatted_messages.append({"role": "user", "content": filtered_messages[-1]["content"]})
+
+            # Build messages to send
+            formatted_messages = []
+            if isinstance(system_messages, list):
+                for msg in system_messages:
+                    formatted_messages.append({"role": "system", "content": msg["text"]})
+                formatted_messages.extend(conversation_history)
+            else:
+                formatted_messages = [
+                    {"role": "system", "content": system_messages}
+                ] + conversation_history
+
+            # Check context length
             prompt_len = token_counter(
                 model=LIGHTRAG_LITELLM_INFERENCE_MODEL, messages=formatted_messages
             )
-            if top_k <= 5:
+
+            if prompt_len > MAX_CHAT_CONTEXT_LENGTH:
                 raise HTTPException(
                     status_code=400,
-                    detail="Auto select is not possible with the current context length",
+                    detail="Auto select returned too many conversations. The selected conversations exceed the maximum context length.",
                 )
 
-        conversation_references = await get_conversation_references(rag_prompt, [project_id])
+        # Build references list from ONLY newly added conversations (not all conversations)
+        conversation_references: dict[str, list[dict[str, str]]] = {"references": []}
+        # Only include conversations that were just added via auto-select
+        for conv in conversations_added:
+            conversation_references["references"].append(
+                {
+                    "conversation": conv.id,
+                    "conversation_title": conv.participant_name,
+                }
+            )
+
+        logger.info(f"Newly added conversations for frontend: {conversation_references}")
 
         async def stream_response_async_autoselect() -> AsyncGenerator[str, None]:
-            conversation_references_yeild = f"h:{json.dumps(conversation_references)}\n"
+            # Send conversation references (selected conversations)
+            conversation_references_yeild = f"h:{json.dumps([conversation_references])}\n"
             yield conversation_references_yeild
 
             accumulated_response = ""
@@ -516,6 +666,8 @@ async def stream_response_async_autoselect() -> AsyncGenerator[str, None]:
                     api_base=LIGHTRAG_LITELLM_INFERENCE_API_BASE,
                     messages=formatted_messages,
                     stream=True,
+                    timeout=300,  # 5 minute timeout for response
+                    stream_timeout=180,  # 3 minute timeout for streaming
                     # mock_response="It's simple to use and easy to get started",
                 )
                 async for chunk in response:
@@ -539,12 +691,6 @@ async def stream_response_async_autoselect() -> AsyncGenerator[str, None]:
                     yield "Error: An error occurred while processing the chat response."
                 return  # Stop generation on error
 
-            citations_list = await get_conversation_citations(
-                rag_prompt, accumulated_response, [project_id]
-            )
-            citations_yeild = f"h:{json.dumps(citations_list)}\n"
-            yield citations_yeild
-
         headers = {"Content-Type": "text/event-stream"}
         if protocol == "data":
             headers["x-vercel-ai-data-stream"] = "v1"
@@ -558,17 +704,17 @@ async def stream_response_async_autoselect() -> AsyncGenerator[str, None]:
         async def stream_response_async_manualselect() -> AsyncGenerator[str, None]:
             """
             Asynchronously stream a model-generated assistant response for the manual-selection chat path.
-            
+
             Builds the outgoing message sequence by combining provided system messages (list or string) with recent user/assistant messages, removes a duplicated trailing user message if present, then calls the Litellm streaming completion API and yields text chunks as they arrive.
-            
+
             Yields:
                 - If protocol == "text": successive raw text fragments from the model.
                 - If protocol == "data": framed data lines of the form `0:<json>` for each fragment.
                 - On generation error: a single error payload matching the active protocol (`"Error: ..." ` for text, or `3:"..."` for data).
-            
+
             Side effects:
                 - On an exception during generation, deletes the in-flight `user_message` from the database and commits the change.
-            
+
             Notes:
                 - Expects surrounding scope variables: `messages`, `system_messages`, `litellm`, model/API constants, `protocol`, `user_message`, and `logger`.
                 - Returns when the stream completes.
@@ -612,6 +758,8 @@ async def stream_response_async_manualselect() -> AsyncGenerator[str, None]:
                         api_base=LIGHTRAG_LITELLM_INFERENCE_API_BASE,
                         messages=messages_to_send,
                         stream=True,
+                        timeout=300,  # 5 minute timeout for response
+                        stream_timeout=180,  # 3 minute timeout for streaming
                     )
                     async for chunk in response:
                         if chunk.choices[0].delta.content:
diff --git a/echo/server/dembrane/chat_utils.py b/echo/server/dembrane/chat_utils.py
index d7a1d18a..0c73bd8b 100644
--- a/echo/server/dembrane/chat_utils.py
+++ b/echo/server/dembrane/chat_utils.py
@@ -1,10 +1,21 @@
 import json
+import math
+import asyncio
 import logging
 from typing import Any, Dict, List, Optional
 
+import backoff
 from litellm import completion, acompletion
 from pydantic import BaseModel
-from sqlalchemy.orm import Session
+from litellm.utils import token_counter
+from sqlalchemy.orm import Session, selectinload
+from litellm.exceptions import (
+    Timeout,
+    APIError,
+    RateLimitError,
+    BadRequestError,
+    ContextWindowExceededError,
+)
 
 from dembrane.config import (
     SMALL_LITELLM_MODEL,
@@ -205,13 +216,13 @@ async def generate_title(
 ) -> str | None:
     """
     Generate a short chat title from a user's query using a small LLM.
-    
+
     If title generation is disabled via configuration or the trimmed query is shorter than 2 characters, the function returns None. The function builds a prompt (using the English prompt template) and asynchronously calls a configured small LLM; it returns the generated title string or None if the model returns no content.
-    
+
     Parameters:
         user_query (str): The user's chat message or query to generate a title from.
         language (str): Target language for the generated title (affects prompt content; the prompt template used is English).
-    
+
     Returns:
         str | None: The generated title, or None if generation is disabled, the query is too short, or the model produced no content.
     """
@@ -242,6 +253,324 @@ async def generate_title(
     return response.choices[0].message.content
 
 
+async def auto_select_conversations(
+    user_query_inputs: List[str],
+    project_id_list: List[str],
+    db: Session,
+    language: str = "en",
+    batch_size: int = 20,
+) -> Dict[str, Any]:
+    """
+    Auto-select conversations based on user queries using LLM-based relevance assessment.
+
+    This function fetches conversation summaries from the database and uses an LLM
+    to select the most relevant conversations for the given queries. It handles
+    batching to stay within LLM context limits and processes batches in parallel
+    for optimal performance.
+
+    Args:
+        user_query_inputs: List of user query strings (currently up to 3)
+        project_id_list: List containing a single project ID
+        db: Database session
+        language: Language code for the prompt template (default: "en")
+        batch_size: Number of conversations to process in each LLM call (default: 20)
+
+    Returns:
+        Dictionary with structure:
+        {
+            "results": {
+                "<project_id>": {
+                    "conversation_id_list": [<conversation_ids>]
+                }
+            }
+        }
+    """
+    logger.info(f"Auto-select called with queries: {user_query_inputs}")
+    logger.info(f"Auto-select called for project(s): {project_id_list}")
+
+    results: Dict[str, Any] = {}
+    # Batch size: number of conversations to process in each LLM call
+    # Can be adjusted per-chat via the auto_select_batch_size field
+    BATCH_SIZE = batch_size
+
+    for project_id in project_id_list:
+        # Get all conversations for this project
+        conversations = (
+            db.query(ConversationModel)
+            .filter(ConversationModel.project_id == project_id)
+            .options(selectinload(ConversationModel.tags))
+            .all()
+        )
+
+        if not conversations:
+            logger.warning(f"No conversations found for project {project_id}")
+            results[project_id] = {"conversation_id_list": []}
+            continue
+
+        logger.info(f"Found {len(conversations)} total conversations for project {project_id}")
+
+        # Calculate expected number of LLM calls for observability
+        expected_llm_calls = math.ceil(len(conversations) / BATCH_SIZE)
+        logger.info(
+            f"Auto-select will make {expected_llm_calls} parallel LLM call(s) "
+            f"for {len(conversations)} conversations (batch size: {BATCH_SIZE})"
+        )
+
+        # Create batches and prepare parallel tasks
+        tasks = []
+        for i in range(0, len(conversations), BATCH_SIZE):
+            batch = conversations[i : i + BATCH_SIZE]
+            batch_num = i // BATCH_SIZE + 1
+            tasks.append(
+                _process_single_batch(
+                    batch=batch,
+                    batch_num=batch_num,
+                    user_query_inputs=user_query_inputs,
+                    language=language,
+                )
+            )
+
+        # Execute all batches in parallel
+        logger.info(f"Executing {len(tasks)} batches in parallel...")
+        batch_results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        # Aggregate results from all batches
+        all_selected_ids = []
+        successful_batches = 0
+        failed_batches = 0
+
+        for i, batch_result in enumerate(batch_results):
+            # Handle exceptions from gather
+            if isinstance(batch_result, Exception):
+                logger.error(f"Batch {i + 1} failed with exception: {str(batch_result)}")
+                failed_batches += 1
+                continue
+
+            # Type check: ensure batch_result is a dict, not an exception
+            if not isinstance(batch_result, dict):
+                logger.error(f"Batch {i + 1} returned unexpected type: {type(batch_result)}")
+                failed_batches += 1
+                continue
+
+            # Handle batch results
+            if "error" in batch_result:
+                failed_batches += 1
+            else:
+                successful_batches += 1
+
+            selected_ids = batch_result.get("selected_ids", [])
+            all_selected_ids.extend(selected_ids)
+
+        # Remove duplicates while preserving order
+        unique_selected_ids = list(dict.fromkeys(all_selected_ids))
+
+        logger.info(
+            f"Auto-select completed: {successful_batches}/{len(tasks)} batches successful "
+            f"({failed_batches} failed), selected {len(unique_selected_ids)} unique conversations "
+            f"for project {project_id}: {unique_selected_ids}"
+        )
+
+        results[project_id] = {"conversation_id_list": unique_selected_ids}
+
+    return {"results": results}
+
+
+@backoff.on_exception(
+    backoff.expo,
+    (RateLimitError, Timeout, APIError),
+    max_tries=3,
+    max_time=5 * 60,  # 5 minutes
+)
+async def _call_llm_with_backoff(prompt: str, batch_num: int) -> Any:
+    """Call LLM with automatic retry for transient errors."""
+    logger.debug(f"Calling LLM for batch {batch_num}")
+    return await acompletion(
+        model=SMALL_LITELLM_MODEL,
+        messages=[{"role": "user", "content": prompt}],
+        api_base=SMALL_LITELLM_API_BASE,
+        api_key=SMALL_LITELLM_API_KEY,
+        response_format={"type": "json_object"},
+        timeout=5 * 60,  # 5 minutes
+    )
+
+
+async def _process_single_batch(
+    batch: List[ConversationModel],
+    batch_num: int,
+    user_query_inputs: List[str],
+    language: str,
+) -> Dict[str, Any]:
+    """
+    Process a single batch of conversations and return selected IDs.
+
+    Args:
+        batch: List of ConversationModel instances to process
+        batch_num: Batch number for logging
+        user_query_inputs: User queries to match against
+        language: Language code for the prompt template
+
+    Returns:
+        Dictionary with:
+        - "selected_ids": List of selected conversation IDs
+        - "batch_num": The batch number
+        - "error": Error message if processing failed (optional)
+    """
+    logger.info(f"Processing batch {batch_num} ({len(batch)} conversations, parallel execution)")
+
+    # Prepare conversation data for the prompt
+    conversation_data = []
+    for conv in batch:
+        # Get summary or fallback to transcript excerpt
+        summary_text = None
+        if conv.summary and conv.summary.strip():
+            summary_text = conv.summary
+        else:
+            # Use transcript as fallback
+            try:
+                transcript = get_conversation_transcript(
+                    conv.id,
+                    DirectusSession(user_id="none", is_admin=True),
+                )
+                # Limit transcript to first 500 characters for context
+                if transcript and len(transcript) > 500:
+                    summary_text = transcript[:500] + "..."
+                elif transcript:
+                    summary_text = transcript
+            except Exception as e:
+                logger.warning(f"Could not get transcript for conversation {conv.id}: {e}")
+
+        # Skip conversations with no content at all
+        if not summary_text:
+            logger.debug(f"Skipping conversation {conv.id} - no summary or transcript")
+            continue
+
+        conv_data = {
+            "id": conv.id,
+            "participant_name": conv.participant_name or "Unknown",
+            "summary": summary_text,
+        }
+        if conv.tags:
+            conv_data["tags"] = ", ".join([tag.text for tag in conv.tags])
+        if conv.created_at:
+            conv_data["created_at"] = conv.created_at.isoformat()
+        conversation_data.append(conv_data)
+
+    # Skip batch if no valid conversations
+    if not conversation_data:
+        logger.warning(f"Batch {batch_num} has no valid conversations with content. Skipping.")
+        return {"selected_ids": [], "batch_num": batch_num}
+
+    # Render the prompt
+    prompt = render_prompt(
+        "auto_select_conversations",
+        language,
+        {
+            "user_queries": user_query_inputs,
+            "conversations": conversation_data,
+        },
+    )
+
+    # Validate prompt size before sending
+    try:
+        prompt_tokens = token_counter(model=SMALL_LITELLM_MODEL, text=prompt)
+        MAX_BATCH_CONTEXT = 100000  # Leave headroom for response
+
+        if prompt_tokens > MAX_BATCH_CONTEXT:
+            # If batch has only 1 conversation, we can't split further
+            if len(batch) == 1:
+                logger.error(
+                    f"Batch {batch_num} single conversation exceeds context limit: "
+                    f"{prompt_tokens} tokens. Skipping conversation {batch[0].id}."
+                )
+                return {
+                    "selected_ids": [],
+                    "batch_num": batch_num,
+                    "error": "single_conversation_too_large",
+                }
+
+            # Split batch in half and process recursively
+            mid = len(batch) // 2
+            batch_1 = batch[:mid]
+            batch_2 = batch[mid:]
+
+            logger.warning(
+                f"Batch {batch_num} prompt too large ({prompt_tokens} tokens). "
+                f"Splitting into 2 sub-batches: {len(batch_1)} and {len(batch_2)} conversations."
+            )
+
+            # Process both halves recursively
+            result_1 = await _process_single_batch(batch_1, batch_num, user_query_inputs, language)
+            result_2 = await _process_single_batch(batch_2, batch_num, user_query_inputs, language)
+
+            # Combine results from both sub-batches
+            combined_ids = result_1.get("selected_ids", []) + result_2.get("selected_ids", [])
+
+            logger.info(
+                f"Batch {batch_num} split processing complete: "
+                f"{len(combined_ids)} conversations selected from sub-batches."
+            )
+
+            return {"selected_ids": combined_ids, "batch_num": batch_num}
+    except Exception as e:
+        logger.warning(f"Could not count tokens for batch {batch_num}: {e}")
+
+    # Call the LLM with retry logic for transient errors
+    try:
+        response = await _call_llm_with_backoff(
+            prompt=prompt,
+            batch_num=batch_num,
+        )
+
+        if response.choices[0].message.content:
+            result = json.loads(response.choices[0].message.content)
+            raw_selected_ids = result.get("selected_conversation_ids", [])
+
+            # Validate LLM response: ensure all returned IDs are from this batch
+            valid_ids = {conv.id for conv in batch}
+            batch_selected_ids = [
+                id for id in raw_selected_ids if isinstance(id, (int, str)) and id in valid_ids
+            ]
+
+            # Log warning if LLM returned invalid IDs
+            if len(batch_selected_ids) != len(raw_selected_ids):
+                filtered_count = len(raw_selected_ids) - len(batch_selected_ids)
+                invalid_ids = [id for id in raw_selected_ids if id not in valid_ids]
+                logger.warning(
+                    f"Batch {batch_num}: LLM returned {filtered_count} invalid ID(s), "
+                    f"filtered from {len(raw_selected_ids)} to {len(batch_selected_ids)}. "
+                    f"Invalid IDs: {invalid_ids}"
+                )
+
+            logger.info(
+                f"Batch {batch_num} selected {len(batch_selected_ids)} "
+                f"conversations: {batch_selected_ids}"
+            )
+            return {"selected_ids": batch_selected_ids, "batch_num": batch_num}
+        else:
+            logger.warning(f"No response from LLM for batch {batch_num}")
+            return {"selected_ids": [], "batch_num": batch_num}
+
+    except ContextWindowExceededError as e:
+        logger.error(
+            f"Batch {batch_num} exceeded context window ({len(batch)} conversations). "
+            f"Error: {str(e)}"
+        )
+        return {"selected_ids": [], "batch_num": batch_num, "error": "context_exceeded"}
+
+    except (RateLimitError, Timeout) as e:
+        # These are already retried by backoff, so if we get here, all retries failed
+        logger.error(f"Batch {batch_num} failed after retries: {type(e).__name__}")
+        return {"selected_ids": [], "batch_num": batch_num, "error": str(e)}
+
+    except (APIError, BadRequestError) as e:
+        logger.error(f"Batch {batch_num} API error: {str(e)}")
+        return {"selected_ids": [], "batch_num": batch_num, "error": "api_error"}
+
+    except Exception as e:
+        logger.error(f"Batch {batch_num} unexpected error: {str(e)}")
+        return {"selected_ids": [], "batch_num": batch_num, "error": "unknown"}
+
+
 async def get_conversation_citations(
     rag_prompt: str,
     accumulated_response: str,
@@ -250,7 +579,7 @@ async def get_conversation_citations(
 ) -> List[Dict[str, Any]]:
     """
     Extract structured conversation citations from an accumulated assistant response using a text-structuring model, map those citations to conversations, and return only citations that belong to the given project IDs.
-    
+
     This function:
     - Renders a text-structuring prompt using `rag_prompt` and `accumulated_response` and sends it to the configured text-structure LLM.
     - Parses the model's JSON response (expected to follow `CitationsSchema`) to obtain citation entries that include `segment_id` and `verbatim_reference_text_chunk`.
@@ -260,7 +589,7 @@ async def get_conversation_citations(
       - "conversation": conversation id (str)
       - "reference_text": verbatim reference text chunk (str)
       - "conversation_title": conversation name/title (str)
-    
+
     If the model output cannot be parsed or a segment-to-conversation mapping fails for an individual citation, that citation is skipped; parsing errors do not raise but are logged and result in an empty citations list in the returned structure.
     """
     text_structuring_model_message = render_prompt(
diff --git a/echo/server/prompt_templates/auto_select_conversations.de.jinja b/echo/server/prompt_templates/auto_select_conversations.de.jinja
new file mode 100644
index 00000000..e8e68d19
--- /dev/null
+++ b/echo/server/prompt_templates/auto_select_conversations.de.jinja
@@ -0,0 +1,34 @@
+Sie sind ein Assistent für die Auswahl von Gesprächen. Ihre Aufgabe ist es zu identifizieren, welche Gespräche für die Anfragen des Benutzers am relevantesten sind.
+
+Benutzeranfragen:
+{% for query in user_queries %}
+- {{ query }}
+{% endfor %}
+
+Verfügbare Gespräche:
+{% for conv in conversations %}
+---
+Gesprächs-ID: {{ conv.id }}
+Teilnehmer: {{ conv.participant_name }}
+Zusammenfassung: {{ conv.summary or "Keine Zusammenfassung verfügbar" }}
+{% if conv.tags %}Tags: {{ conv.tags }}{% endif %}
+{% if conv.created_at %}Erstellt: {{ conv.created_at }}{% endif %}
+{% endfor %}
+---
+
+Anweisungen:
+- Analysieren Sie jede Gesprächszusammenfassung sorgfältig
+- Wählen Sie Gespräche aus, die für die Anfragen des Benutzers relevant oder potenziell relevant sind
+- Beziehen Sie Gespräche mit direkter Relevanz sowie solche mit teilweiser oder indirekter Relevanz ein
+- Berücksichtigen Sie Synonyme, verwandte Themen und kontextuelle Verbindungen
+- Wenn ein Gespräch Informationen enthalten könnte, die für die Beantwortung der Anfragen nützlich sind, nehmen Sie es auf
+- Schließen Sie nur Gespräche aus, die eindeutig nicht mit allen Anfragen zusammenhängen
+- Wenn KEINE Gespräche relevant sind, geben Sie eine leere Liste zurück
+
+Antworten Sie mit einem JSON-Objekt, das nur die Liste der relevanten Gesprächs-IDs enthält:
+{
+  "selected_conversation_ids": ["id1", "id2", ...]
+}
+
+Geben Sie NUR das JSON-Objekt zurück, keinen anderen Text.
+
diff --git a/echo/server/prompt_templates/auto_select_conversations.en.jinja b/echo/server/prompt_templates/auto_select_conversations.en.jinja
new file mode 100644
index 00000000..38d6274d
--- /dev/null
+++ b/echo/server/prompt_templates/auto_select_conversations.en.jinja
@@ -0,0 +1,33 @@
+You are a conversation selection assistant. Your task is to identify which conversations are most relevant to the user's queries.
+
+User Queries:
+{% for query in user_queries %}
+- {{ query }}
+{% endfor %}
+
+Available Conversations:
+{% for conv in conversations %}
+---
+Conversation ID: {{ conv.id }}
+Participant: {{ conv.participant_name }}
+Summary: {{ conv.summary or "No summary available" }}
+{% if conv.tags %}Tags: {{ conv.tags }}{% endif %}
+{% if conv.created_at %}Created: {{ conv.created_at }}{% endif %}
+{% endfor %}
+---
+
+Instructions:
+- Analyze each conversation summary carefully
+- Select conversations that are relevant or potentially relevant to the user's queries
+- Include conversations with direct relevance as well as those with partial or indirect relevance
+- Consider synonyms, related topics, and contextual connections
+- If a conversation might contain information useful for answering the queries, include it
+- Only exclude conversations that are clearly unrelated to all queries
+- If NO conversations are relevant, return an empty list
+
+Respond with a JSON object containing only the list of relevant conversation IDs:
+{
+  "selected_conversation_ids": ["id1", "id2", ...]
+}
+
+Return ONLY the JSON object, no other text.
diff --git a/echo/server/prompt_templates/auto_select_conversations.es.jinja b/echo/server/prompt_templates/auto_select_conversations.es.jinja
new file mode 100644
index 00000000..3a63c3e5
--- /dev/null
+++ b/echo/server/prompt_templates/auto_select_conversations.es.jinja
@@ -0,0 +1,34 @@
+Eres un asistente de selección de conversaciones. Tu tarea es identificar qué conversaciones son más relevantes para las consultas del usuario.
+
+Consultas del usuario:
+{% for query in user_queries %}
+- {{ query }}
+{% endfor %}
+
+Conversaciones disponibles:
+{% for conv in conversations %}
+---
+ID de conversación: {{ conv.id }}
+Participante: {{ conv.participant_name }}
+Resumen: {{ conv.summary or "No hay resumen disponible" }}
+{% if conv.tags %}Etiquetas: {{ conv.tags }}{% endif %}
+{% if conv.created_at %}Creado: {{ conv.created_at }}{% endif %}
+{% endfor %}
+---
+
+Instrucciones:
+- Analiza cuidadosamente cada resumen de conversación
+- Selecciona conversaciones que sean relevantes o potencialmente relevantes para las consultas del usuario
+- Incluye conversaciones con relevancia directa así como aquellas con relevancia parcial o indirecta
+- Considera sinónimos, temas relacionados y conexiones contextuales
+- Si una conversación podría contener información útil para responder las consultas, inclúyela
+- Solo excluye conversaciones que claramente no estén relacionadas con ninguna consulta
+- Si NINGUNA conversación es relevante, devuelve una lista vacía
+
+Responde con un objeto JSON que contenga solo la lista de IDs de conversaciones relevantes:
+{
+  "selected_conversation_ids": ["id1", "id2", ...]
+}
+
+Devuelve SOLO el objeto JSON, ningún otro texto.
+
diff --git a/echo/server/prompt_templates/auto_select_conversations.fr.jinja b/echo/server/prompt_templates/auto_select_conversations.fr.jinja
new file mode 100644
index 00000000..a76c5c83
--- /dev/null
+++ b/echo/server/prompt_templates/auto_select_conversations.fr.jinja
@@ -0,0 +1,34 @@
+Vous êtes un assistant de sélection de conversations. Votre tâche consiste à identifier quelles conversations sont les plus pertinentes pour les requêtes de l'utilisateur.
+
+Requêtes de l'utilisateur :
+{% for query in user_queries %}
+- {{ query }}
+{% endfor %}
+
+Conversations disponibles :
+{% for conv in conversations %}
+---
+ID de conversation : {{ conv.id }}
+Participant : {{ conv.participant_name }}
+Résumé : {{ conv.summary or "Aucun résumé disponible" }}
+{% if conv.tags %}Tags : {{ conv.tags }}{% endif %}
+{% if conv.created_at %}Créé : {{ conv.created_at }}{% endif %}
+{% endfor %}
+---
+
+Instructions :
+- Analysez attentivement chaque résumé de conversation
+- Sélectionnez les conversations qui sont pertinentes ou potentiellement pertinentes pour les requêtes de l'utilisateur
+- Incluez les conversations avec une pertinence directe ainsi que celles avec une pertinence partielle ou indirecte
+- Considérez les synonymes, les sujets connexes et les liens contextuels
+- Si une conversation peut contenir des informations utiles pour répondre aux requêtes, incluez-la
+- N'excluez que les conversations qui ne sont clairement pas liées à toutes les requêtes
+- Si AUCUNE conversation n'est pertinente, renvoyez une liste vide
+
+Répondez avec un objet JSON contenant uniquement la liste des IDs de conversations pertinentes :
+{
+  "selected_conversation_ids": ["id1", "id2", ...]
+}
+
+Renvoyez UNIQUEMENT l'objet JSON, aucun autre texte.
+
diff --git a/echo/server/prompt_templates/auto_select_conversations.nl.jinja b/echo/server/prompt_templates/auto_select_conversations.nl.jinja
new file mode 100644
index 00000000..a990f9e5
--- /dev/null
+++ b/echo/server/prompt_templates/auto_select_conversations.nl.jinja
@@ -0,0 +1,34 @@
+U bent een assistent voor het selecteren van gesprekken. Uw taak is om te identificeren welke gesprekken het meest relevant zijn voor de vragen van de gebruiker.
+
+Gebruikersvragen:
+{% for query in user_queries %}
+- {{ query }}
+{% endfor %}
+
+Beschikbare gesprekken:
+{% for conv in conversations %}
+---
+Gesprek-ID: {{ conv.id }}
+Deelnemer: {{ conv.participant_name }}
+Samenvatting: {{ conv.summary or "Geen samenvatting beschikbaar" }}
+{% if conv.tags %}Tags: {{ conv.tags }}{% endif %}
+{% if conv.created_at %}Gemaakt: {{ conv.created_at }}{% endif %}
+{% endfor %}
+---
+
+Instructies:
+- Analyseer elke gespreksamenvatting zorgvuldig
+- Selecteer gesprekken die relevant of potentieel relevant zijn voor de vragen van de gebruiker
+- Neem gesprekken op met directe relevantie evenals die met gedeeltelijke of indirecte relevantie
+- Overweeg synoniemen, gerelateerde onderwerpen en contextuele verbindingen
+- Als een gesprek informatie kan bevatten die nuttig is voor het beantwoorden van de vragen, neem het dan op
+- Sluit alleen gesprekken uit die duidelijk niet gerelateerd zijn aan alle vragen
+- Als GEEN gesprekken relevant zijn, retourneer dan een lege lijst
+
+Reageer met een JSON-object dat alleen de lijst met relevante gesprek-ID's bevat:
+{
+  "selected_conversation_ids": ["id1", "id2", ...]
+}
+
+Retourneer ALLEEN het JSON-object, geen andere tekst.
+
diff --git a/echo/server/prompt_templates/is_followup_question.de.jinja b/echo/server/prompt_templates/is_followup_question.de.jinja
new file mode 100644
index 00000000..375312db
--- /dev/null
+++ b/echo/server/prompt_templates/is_followup_question.de.jinja
@@ -0,0 +1,20 @@
+Sie analysieren, ob die Frage eines Benutzers eine Folgefrage zu vorherigen Konversationsnachrichten ist.
+
+Vorherige Nachrichten:
+{% for msg in previous_messages %}
+{{ msg.role }}: {{ msg.content }}
+{% endfor %}
+
+Aktuelle Frage: {{ current_question }}
+
+Ist die aktuelle Frage eine Folgefrage oder Klärung im Zusammenhang mit den vorherigen Nachrichten?
+Antworten Sie NUR mit einem JSON-Objekt:
+{"is_followup": true} wenn es eine Folgefrage/Klärung ist
+{"is_followup": false} wenn es eine neue unabhängige Frage ist
+
+Beispiele für Folgefragen: "was noch?", "können Sie das näher erläutern?", "erzählen Sie mir mehr", "und was ist mit X in diesem Kontext?"
+Beispiele für neue Fragen: "wie sieht es mit der Bereitstellung aus?", "erzählen Sie mir über das Testen", "welche Konversationen besprechen Y?"
+
+Geben Sie NUR das JSON-Objekt zurück, keinen anderen Text.
+
+
diff --git a/echo/server/prompt_templates/is_followup_question.en.jinja b/echo/server/prompt_templates/is_followup_question.en.jinja
new file mode 100644
index 00000000..4ce9e5c7
--- /dev/null
+++ b/echo/server/prompt_templates/is_followup_question.en.jinja
@@ -0,0 +1,19 @@
+You are analyzing if a user's question is a follow-up to previous conversation messages.
+
+Previous messages:
+{% for msg in previous_messages %}
+{{ msg.role }}: {{ msg.content }}
+{% endfor %}
+
+Current question: {{ current_question }}
+
+Is the current question a follow-up or clarification related to the previous messages?
+Answer ONLY with a JSON object:
+{"is_followup": true} if it's a follow-up/clarification
+{"is_followup": false} if it's a new independent question
+
+Examples of follow-ups: "what else?", "can you elaborate?", "tell me more", "and what about X in that context?"
+Examples of new questions: "what about deployment?", "tell me about testing", "which conversations discuss Y?"
+
+Return ONLY the JSON object, no other text.
+
diff --git a/echo/server/prompt_templates/is_followup_question.es.jinja b/echo/server/prompt_templates/is_followup_question.es.jinja
new file mode 100644
index 00000000..6a89c018
--- /dev/null
+++ b/echo/server/prompt_templates/is_followup_question.es.jinja
@@ -0,0 +1,20 @@
+Estás analizando si la pregunta de un usuario es un seguimiento de mensajes de conversación anteriores.
+
+Mensajes anteriores:
+{% for msg in previous_messages %}
+{{ msg.role }}: {{ msg.content }}
+{% endfor %}
+
+Pregunta actual: {{ current_question }}
+
+¿Es la pregunta actual un seguimiento o aclaración relacionada con los mensajes anteriores?
+Responde SOLO con un objeto JSON:
+{"is_followup": true} si es un seguimiento/aclaración
+{"is_followup": false} si es una pregunta nueva e independiente
+
+Ejemplos de seguimientos: "¿qué más?", "¿puedes elaborar?", "cuéntame más", "¿y qué hay de X en ese contexto?"
+Ejemplos de preguntas nuevas: "¿qué hay del despliegue?", "cuéntame sobre las pruebas", "¿qué conversaciones discuten Y?"
+
+Devuelve SOLO el objeto JSON, ningún otro texto.
+
+
diff --git a/echo/server/prompt_templates/is_followup_question.fr.jinja b/echo/server/prompt_templates/is_followup_question.fr.jinja
new file mode 100644
index 00000000..cf96c81d
--- /dev/null
+++ b/echo/server/prompt_templates/is_followup_question.fr.jinja
@@ -0,0 +1,20 @@
+Vous analysez si la question d'un utilisateur fait suite à des messages de conversation précédents.
+
+Messages précédents :
+{% for msg in previous_messages %}
+{{ msg.role }}: {{ msg.content }}
+{% endfor %}
+
+Question actuelle : {{ current_question }}
+
+La question actuelle est-elle une suite ou une clarification liée aux messages précédents ?
+Répondez UNIQUEMENT avec un objet JSON :
+{"is_followup": true} si c'est une suite/clarification
+{"is_followup": false} si c'est une nouvelle question indépendante
+
+Exemples de suites : "quoi d'autre ?", "pouvez-vous développer ?", "dites-m'en plus", "et qu'en est-il de X dans ce contexte ?"
+Exemples de nouvelles questions : "qu'en est-il du déploiement ?", "parlez-moi des tests", "quelles conversations discutent de Y ?"
+
+Retournez UNIQUEMENT l'objet JSON, aucun autre texte.
+
+
diff --git a/echo/server/prompt_templates/is_followup_question.nl.jinja b/echo/server/prompt_templates/is_followup_question.nl.jinja
new file mode 100644
index 00000000..4b8e6dc0
--- /dev/null
+++ b/echo/server/prompt_templates/is_followup_question.nl.jinja
@@ -0,0 +1,20 @@
+U analyseert of de vraag van een gebruiker een vervolgvraag is op eerdere gespreksberichten.
+
+Eerdere berichten:
+{% for msg in previous_messages %}
+{{ msg.role }}: {{ msg.content }}
+{% endfor %}
+
+Huidige vraag: {{ current_question }}
+
+Is de huidige vraag een vervolgvraag of verduidelijking gerelateerd aan de eerdere berichten?
+Antwoord ALLEEN met een JSON-object:
+{"is_followup": true} als het een vervolgvraag/verduidelijking is
+{"is_followup": false} als het een nieuwe onafhankelijke vraag is
+
+Voorbeelden van vervolgvragen: "wat nog meer?", "kun je dat uitwerken?", "vertel me meer", "en hoe zit het met X in die context?"
+Voorbeelden van nieuwe vragen: "hoe zit het met deployment?", "vertel me over testen", "welke gesprekken bespreken Y?"
+
+Geef ALLEEN het JSON-object terug, geen andere tekst.
+
+