CortexLM · echobt · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026
diff --git a/src/cortex-app-server/src/auth.rs b/src/cortex-app-server/src/auth.rs
@@ -45,7 +45,7 @@ impl Claims {
     pub fn new(user_id: impl Into<String>, expiry_seconds: u64) -> Self {
         let now = SystemTime::now()
             .duration_since(UNIX_EPOCH)
-            .unwrap()
+            .unwrap_or_default()
             .as_secs();
 
         Self {
@@ -75,7 +75,7 @@ impl Claims {
     pub fn is_expired(&self) -> bool {
         let now = SystemTime::now()
             .duration_since(UNIX_EPOCH)
-            .unwrap()
+            .unwrap_or_default()
             .as_secs();
         self.exp < now
     }
@@ -187,7 +187,7 @@ impl AuthService {
     pub async fn cleanup_revoked_tokens(&self) {
         let now = SystemTime::now()
             .duration_since(UNIX_EPOCH)
-            .unwrap()
+            .unwrap_or_default()
             .as_secs();
 
         let mut revoked = self.revoked_tokens.write().await;

diff --git a/src/cortex-app-server/src/config.rs b/src/cortex-app-server/src/config.rs
@@ -49,12 +49,18 @@ pub struct ServerConfig {
     pub max_body_size: usize,
 
     /// Request timeout in seconds (applies to full request lifecycle).
+    ///
+    /// See `cortex_common::http_client` module documentation for the complete
+    /// timeout hierarchy across Cortex services.
     #[serde(default = "default_request_timeout")]
     pub request_timeout: u64,
 
     /// Read timeout for individual chunks in seconds.
     /// Applies to chunked transfer encoding to prevent indefinite hangs
     /// when clients disconnect without sending the terminal chunk.
+    ///
+    /// See `cortex_common::http_client` module documentation for the complete
+    /// timeout hierarchy across Cortex services.
     #[serde(default = "default_read_timeout")]
     pub read_timeout: u64,
 
@@ -71,12 +77,16 @@ pub struct ServerConfig {
     pub cors_origins: Vec<String>,
 
     /// Graceful shutdown timeout in seconds.
+    ///
+    /// See `cortex_common::http_client` module documentation for the complete
+    /// timeout hierarchy across Cortex services.
     #[serde(default = "default_shutdown_timeout")]
     pub shutdown_timeout: u64,
 }
 
 fn default_shutdown_timeout() -> u64 {
     30 // 30 seconds for graceful shutdown
+       // See cortex_common::http_client for timeout hierarchy documentation
 }
 
 fn default_listen_addr() -> String {

diff --git a/src/cortex-app-server/src/storage.rs b/src/cortex-app-server/src/storage.rs
@@ -47,8 +47,6 @@ pub struct StoredToolCall {
 
 /// Session storage manager.
 pub struct SessionStorage {
-    #[allow(dead_code)]
-    base_dir: PathBuf,
     sessions_dir: PathBuf,
     history_dir: PathBuf,
 }
@@ -66,7 +64,6 @@ impl SessionStorage {
         info!("Session storage initialized at {:?}", base_dir);
 
         Ok(Self {
-            base_dir,
             sessions_dir,
             history_dir,
         })

diff --git a/src/cortex-apply-patch/src/hunk.rs b/src/cortex-apply-patch/src/hunk.rs
@@ -250,9 +250,6 @@ pub struct SearchReplace {
     pub search: String,
     /// The text to replace with.
     pub replace: String,
-    /// Replace all occurrences (true) or just the first (false).
-    #[allow(dead_code)]
-    pub replace_all: bool,
 }
 
 impl SearchReplace {
@@ -266,16 +263,8 @@ impl SearchReplace {
             path: path.into(),
             search: search.into(),
             replace: replace.into(),
-            replace_all: false,
         }
     }
-
-    /// Set whether to replace all occurrences.
-    #[allow(dead_code)]
-    pub fn with_replace_all(mut self, replace_all: bool) -> Self {
-        self.replace_all = replace_all;
-        self
-    }
 }
 
 #[cfg(test)]

diff --git a/src/cortex-common/src/http_client.rs b/src/cortex-common/src/http_client.rs
@@ -9,6 +9,54 @@
 //!
 //! DNS caching is configured with reasonable TTL to allow failover and load
 //! balancer updates (#2177).
+//!
+//! # Timeout Configuration Guide
+//!
+//! This section documents the timeout hierarchy across the Cortex codebase. Use this
+//! as a reference when configuring timeouts for new features or debugging timeout issues.
+//!
+//! ## Timeout Hierarchy
+//!
+//! | Use Case                    | Timeout | Constant/Location                          | Rationale                               |
+//! |-----------------------------|---------|--------------------------------------------|-----------------------------------------|
+//! | Health checks               | 5s      | `HEALTH_CHECK_TIMEOUT` (this module)       | Quick validation of service status      |
+//! | Standard HTTP requests      | 30s     | `DEFAULT_TIMEOUT` (this module)            | Normal API calls with reasonable margin |
+//! | Per-chunk read (streaming)  | 30s     | `read_timeout` (cortex-app-server/config)  | Individual chunk timeout during stream  |
+//! | Pool idle timeout           | 60s     | `POOL_IDLE_TIMEOUT` (this module)          | DNS re-resolution for failover          |
+//! | LLM Request (non-streaming) | 120s    | `DEFAULT_REQUEST_TIMEOUT_SECS` (cortex-exec/runner) | Model inference takes time |
+//! | LLM Streaming total         | 300s    | `STREAMING_TIMEOUT` (this module)          | Long-running streaming responses        |
+//! | Server request lifecycle    | 300s    | `request_timeout` (cortex-app-server/config) | Full HTTP request/response cycle      |
+//! | Entire exec session         | 600s    | `DEFAULT_TIMEOUT_SECS` (cortex-exec/runner) | Multi-turn conversation limit          |
+//! | Graceful shutdown           | 30s     | `shutdown_timeout` (cortex-app-server/config) | Time for cleanup on shutdown        |
+//!
+//! ## Module-Specific Timeouts
+//!
+//! ### cortex-common (this module)
+//! - `DEFAULT_TIMEOUT` (30s): Use for standard API calls.
+//! - `STREAMING_TIMEOUT` (300s): Use for LLM streaming endpoints.
+//! - `HEALTH_CHECK_TIMEOUT` (5s): Use for health/readiness checks.
+//! - `POOL_IDLE_TIMEOUT` (60s): Connection pool cleanup for DNS freshness.
+//!
+//! ### cortex-exec (runner.rs)
+//! - `DEFAULT_TIMEOUT_SECS` (600s): Maximum duration for entire exec session.
+//! - `DEFAULT_REQUEST_TIMEOUT_SECS` (120s): Single LLM request timeout.
+//!
+//! ### cortex-app-server (config.rs)
+//! - `request_timeout` (300s): Full request lifecycle timeout.
+//! - `read_timeout` (30s): Per-chunk timeout for streaming reads.
+//! - `shutdown_timeout` (30s): Graceful shutdown duration.
+//!
+//! ### cortex-engine (api_client.rs)
+//! - Re-exports constants from this module for consistency.
+//!
+//! ## Recommendations
+//!
+//! When adding new timeout configurations:
+//! 1. Use constants from this module when possible for consistency.
+//! 2. Document any new timeout constants with their rationale.
+//! 3. Consider the timeout hierarchy - inner timeouts should be shorter than outer ones.
+//! 4. For LLM operations, use longer timeouts (120s-300s) to accommodate model inference.
+//! 5. For health checks and quick validations, use short timeouts (5s-10s).
 
 use reqwest::Client;
 use std::time::Duration;

diff --git a/src/cortex-engine/src/tools/handlers/glob.rs b/src/cortex-engine/src/tools/handlers/glob.rs
@@ -17,8 +17,10 @@ pub struct GlobHandler;
 #[derive(Debug, Deserialize)]
 struct GlobArgs {
     patterns: Vec<String>,
+    #[serde(alias = "folder")]
     directory: Option<String>,
     #[serde(default)]
+    #[serde(alias = "excludePatterns")]
     exclude_patterns: Vec<String>,
 }
 

diff --git a/src/cortex-engine/src/tools/handlers/grep.rs b/src/cortex-engine/src/tools/handlers/grep.rs
@@ -29,6 +29,7 @@ struct GrepArgs {
     glob_pattern: Option<String>,
     #[serde(default = "default_output_mode")]
     output_mode: String,
+    #[serde(alias = "head_limit")]
     max_results: Option<usize>,
     #[serde(default)]
     multiline: bool,

diff --git a/src/cortex-exec/src/runner.rs b/src/cortex-exec/src/runner.rs
@@ -27,9 +27,17 @@ use cortex_protocol::ConversationId;
 use crate::output::{OutputFormat, OutputWriter};
 
 /// Default timeout for the entire execution (10 minutes).
+///
+/// This is the maximum duration for a multi-turn exec session.
+/// See `cortex_common::http_client` module documentation for the complete
+/// timeout hierarchy across Cortex services.
 const DEFAULT_TIMEOUT_SECS: u64 = 600;
 
 /// Default timeout for a single LLM request (2 minutes).
+///
+/// Allows sufficient time for model inference while preventing indefinite hangs.
+/// See `cortex_common::http_client` module documentation for the complete
+/// timeout hierarchy across Cortex services.
 const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 120;
 
 /// Maximum retries for transient errors.

diff --git a/src/cortex-mcp-client/src/transport.rs b/src/cortex-mcp-client/src/transport.rs
@@ -20,8 +20,7 @@ use cortex_mcp_types::{
 use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
 use tokio::process::{Child, Command};
 use tokio::sync::{Mutex, RwLock};
-use tokio::time::sleep;
-use tracing::{debug, error, info, warn};
+use tracing::{debug, info, warn};
 
 // ============================================================================
 // Transport Trait
@@ -199,61 +198,6 @@ impl StdioTransport {
         Ok(())
     }
 
-    /// Reconnect with exponential backoff.
-    ///
-    /// Properly cleans up existing connections before each attempt to prevent
-    /// file descriptor leaks (#2198).
-    #[allow(dead_code)]
-    async fn reconnect(&self) -> Result<()> {
-        if !self.reconnect_config.enabled {
-            return Err(anyhow!("Reconnection disabled"));
-        }
-
-        let mut attempt = 0;
-        let mut delay = self.reconnect_config.initial_delay;
-
-        while attempt < self.reconnect_config.max_attempts {
-            attempt += 1;
-            info!(
-                attempt,
-                max = self.reconnect_config.max_attempts,
-                "Attempting reconnection"
-            );
-
-            // Clean up any existing connection before attempting reconnect
-            // This prevents file descriptor leaks on repeated failures (#2198)
-            {
-                let mut process_guard = self.process.lock().await;
-                if let Some(mut child) = process_guard.take() {
-                    // Kill the process and wait for it to clean up
-                    let _ = child.kill().await;
-                    // Wait a short time for resources to be released
-                    drop(child);
-                }
-                self.connected.store(false, Ordering::SeqCst);
-            }
-
-            // Clear any stale pending responses
-            self.pending_responses.write().await.clear();
-
-            match self.connect().await {
-                Ok(()) => {
-                    info!("Reconnection successful");
-                    return Ok(());
-                }
-                Err(e) => {
-                    error!(error = %e, attempt, "Reconnection failed");
-                    if attempt < self.reconnect_config.max_attempts {
-                        sleep(delay).await;
-                        delay = (delay * 2).min(self.reconnect_config.max_delay);
-                    }
-                }
-            }
-        }
-
-        Err(anyhow!("Failed to reconnect after {} attempts", attempt))
-    }
-
     /// Send a request and wait for response.
     async fn send_request(&self, request: JsonRpcRequest) -> Result<JsonRpcResponse> {
         // Ensure connected
@@ -516,51 +460,6 @@ impl HttpTransport {
     fn next_request_id(&self) -> RequestId {
         RequestId::Number(self.request_id.fetch_add(1, Ordering::SeqCst) as i64)
     }
-
-    /// Test connection.
-    #[allow(dead_code)]
-    async fn test_connection(&self) -> Result<()> {
-        let request = JsonRpcRequest::new(self.next_request_id(), methods::PING);
-        self.send_request(request).await?;
-        Ok(())
-    }
-
-    /// Reconnect with exponential backoff.
-    #[allow(dead_code)]
-    async fn reconnect(&self) -> Result<()> {
-        if !self.reconnect_config.enabled {
-            return Err(anyhow!("Reconnection disabled"));
-        }
-
-        let mut attempt = 0;
-        let mut delay = self.reconnect_config.initial_delay;
-
-        while attempt < self.reconnect_config.max_attempts {
-            attempt += 1;
-            info!(
-                attempt,
-                max = self.reconnect_config.max_attempts,
-                "Attempting HTTP reconnection"
-            );
-
-            match self.test_connection().await {
-                Ok(()) => {
-                    info!("HTTP reconnection successful");
-                    self.connected.store(true, Ordering::SeqCst);
-                    return Ok(());
-                }
-                Err(e) => {
-                    error!(error = %e, attempt, "HTTP reconnection failed");
-                    if attempt < self.reconnect_config.max_attempts {
-                        sleep(delay).await;
-                        delay = (delay * 2).min(self.reconnect_config.max_delay);
-                    }
-                }
-            }
-        }
-
-        Err(anyhow!("Failed to reconnect after {} attempts", attempt))
-    }
 }
 
 #[async_trait]

diff --git a/src/cortex-tui/src/runner/event_loop/streaming.rs b/src/cortex-tui/src/runner/event_loop/streaming.rs
@@ -20,6 +20,14 @@ use cortex_engine::streaming::StreamEvent;
 
 use super::core::{EventLoop, PendingToolCall, simplify_error_message};
 
+/// Initial connection timeout for streaming requests.
+/// See cortex_common::http_client for timeout hierarchy documentation.
+const STREAMING_CONNECTION_TIMEOUT: Duration = Duration::from_secs(60);
+
+/// Per-chunk timeout during streaming responses.
+/// See cortex_common::http_client for timeout hierarchy documentation.
+const STREAMING_CHUNK_TIMEOUT: Duration = Duration::from_secs(30);
+
 impl EventLoop {
     /// Handles message submission using the new provider system.
     ///
@@ -176,7 +184,7 @@ impl EventLoop {
 
             // Start the completion request with timeout
             let stream_result = tokio::time::timeout(
-                Duration::from_secs(60), // 60 second timeout for initial connection
+                STREAMING_CONNECTION_TIMEOUT,
                 client.complete(request),
             )
             .await;
@@ -215,7 +223,7 @@ impl EventLoop {
 
                 // Wait for next event with timeout
                 let event = tokio::time::timeout(
-                    Duration::from_secs(30), // 30 second timeout between chunks
+                    STREAMING_CHUNK_TIMEOUT,
                     stream.next(),
                 )
                 .await;
@@ -738,7 +746,7 @@ impl EventLoop {
             };
 
             let stream_result =
-                tokio::time::timeout(Duration::from_secs(60), client.complete(request)).await;
+                tokio::time::timeout(STREAMING_CONNECTION_TIMEOUT, client.complete(request)).await;
 
             let mut stream = match stream_result {
                 Ok(Ok(s)) => s,
@@ -770,7 +778,7 @@ impl EventLoop {
                     break;
                 }
 
-                let event = tokio::time::timeout(Duration::from_secs(30), stream.next()).await;
+                let event = tokio::time::timeout(STREAMING_CHUNK_TIMEOUT, stream.next()).await;
 
                 match event {
                     Ok(Some(Ok(ResponseEvent::Delta(delta)))) => {