CortexLM · echobt · Feb 4, 2026
diff --git a/src/cortex-app-server/src/config.rs b/src/cortex-app-server/src/config.rs
@@ -49,12 +49,18 @@ pub struct ServerConfig {
     pub max_body_size: usize,
 
     /// Request timeout in seconds (applies to full request lifecycle).
+    ///
+    /// See `cortex_common::http_client` module documentation for the complete
+    /// timeout hierarchy across Cortex services.
     #[serde(default = "default_request_timeout")]
     pub request_timeout: u64,
 
     /// Read timeout for individual chunks in seconds.
     /// Applies to chunked transfer encoding to prevent indefinite hangs
     /// when clients disconnect without sending the terminal chunk.
+    ///
+    /// See `cortex_common::http_client` module documentation for the complete
+    /// timeout hierarchy across Cortex services.
     #[serde(default = "default_read_timeout")]
     pub read_timeout: u64,
 
@@ -71,12 +77,16 @@ pub struct ServerConfig {
     pub cors_origins: Vec<String>,
 
     /// Graceful shutdown timeout in seconds.
+    ///
+    /// See `cortex_common::http_client` module documentation for the complete
+    /// timeout hierarchy across Cortex services.
     #[serde(default = "default_shutdown_timeout")]
     pub shutdown_timeout: u64,
 }
 
 fn default_shutdown_timeout() -> u64 {
     30 // 30 seconds for graceful shutdown
+       // See cortex_common::http_client for timeout hierarchy documentation
 }
 
 fn default_listen_addr() -> String {

diff --git a/src/cortex-common/src/http_client.rs b/src/cortex-common/src/http_client.rs
@@ -9,25 +9,99 @@
 //!
 //! DNS caching is configured with reasonable TTL to allow failover and load
 //! balancer updates (#2177).
+//!
+//! # Timeout Configuration Guide
+//!
+//! This section documents the timeout hierarchy across the Cortex codebase. Use this
+//! as a reference when configuring timeouts for new features or debugging timeout issues.
+//!
+//! ## Timeout Hierarchy
+//!
+//! | Use Case                    | Timeout | Constant/Location                          | Rationale                               |
+//! |-----------------------------|---------|--------------------------------------------|-----------------------------------------|
+//! | Health checks               | 5s      | `HEALTH_CHECK_TIMEOUT` (this module)       | Quick validation of service status      |
+//! | Standard HTTP requests      | 30s     | `DEFAULT_TIMEOUT` (this module)            | Normal API calls with reasonable margin |
+//! | Per-chunk read (streaming)  | 30s     | `read_timeout` (cortex-app-server/config)  | Individual chunk timeout during stream  |
+//! | Pool idle timeout           | 60s     | `POOL_IDLE_TIMEOUT` (this module)          | DNS re-resolution for failover          |
+//! | LLM Request (non-streaming) | 120s    | `DEFAULT_REQUEST_TIMEOUT_SECS` (cortex-exec/runner) | Model inference takes time |
+//! | LLM Streaming total         | 300s    | `STREAMING_TIMEOUT` (this module)          | Long-running streaming responses        |
+//! | Server request lifecycle    | 300s    | `request_timeout` (cortex-app-server/config) | Full HTTP request/response cycle      |
+//! | Entire exec session         | 600s    | `DEFAULT_TIMEOUT_SECS` (cortex-exec/runner) | Multi-turn conversation limit          |
+//! | Graceful shutdown           | 30s     | `shutdown_timeout` (cortex-app-server/config) | Time for cleanup on shutdown        |
+//!
+//! ## Module-Specific Timeouts
+//!
+//! ### cortex-common (this module)
+//! - `DEFAULT_TIMEOUT` (30s): Use for standard API calls.
+//! - `STREAMING_TIMEOUT` (300s): Use for LLM streaming endpoints.
+//! - `HEALTH_CHECK_TIMEOUT` (5s): Use for health/readiness checks.
+//! - `POOL_IDLE_TIMEOUT` (60s): Connection pool cleanup for DNS freshness.
+//!
+//! ### cortex-exec (runner.rs)
+//! - `DEFAULT_TIMEOUT_SECS` (600s): Maximum duration for entire exec session.
+//! - `DEFAULT_REQUEST_TIMEOUT_SECS` (120s): Single LLM request timeout.
+//!
+//! ### cortex-app-server (config.rs)
+//! - `request_timeout` (300s): Full request lifecycle timeout.
+//! - `read_timeout` (30s): Per-chunk timeout for streaming reads.
+//! - `shutdown_timeout` (30s): Graceful shutdown duration.
+//!
+//! ### cortex-engine (api_client.rs)
+//! - Re-exports constants from this module for consistency.
+//!
+//! ## Recommendations
+//!
+//! When adding new timeout configurations:
+//! 1. Use constants from this module when possible for consistency.
+//! 2. Document any new timeout constants with their rationale.
+//! 3. Consider the timeout hierarchy - inner timeouts should be shorter than outer ones.
+//! 4. For LLM operations, use longer timeouts (120s-300s) to accommodate model inference.
+//! 5. For health checks and quick validations, use short timeouts (5s-10s).
 
 use reqwest::Client;
 use std::time::Duration;
 
+// ============================================================
+// Timeout Configuration Constants
+// ============================================================
+//
+// Timeout Hierarchy Documentation
+// ===============================
+//
+// This module defines the authoritative timeout constants for HTTP operations.
+// Other modules should reference these constants or document deviations.
+//
+// Timeout Categories:
+// - Request timeouts: Total time for a complete request/response cycle
+// - Connection timeouts: Time to establish TCP connection
+// - Read timeouts: Time to receive response data
+// - Pool timeouts: How long idle connections stay in pool
+//
+// Recommended Naming Convention:
+// - Constants: SCREAMING_SNAKE_CASE with Duration type
+// - Config fields: snake_case with _secs suffix for u64 values
+//
+// ============================================================
+
 /// User-Agent string for all HTTP requests
 pub const USER_AGENT: &str = concat!("cortex-cli/", env!("CARGO_PKG_VERSION"));
 
-/// Default timeout for standard API requests (30 seconds)
+/// Default timeout for standard HTTP requests (30 seconds).
+/// Used for non-streaming API calls, health checks with retries, etc.
 pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
 
-/// Extended timeout for LLM streaming requests (5 minutes)
+/// Timeout for streaming HTTP requests (5 minutes).
+/// Longer duration to accommodate LLM inference time.
 pub const STREAMING_TIMEOUT: Duration = Duration::from_secs(300);
 
-/// Short timeout for health checks (5 seconds)
+/// Timeout for health check requests (5 seconds).
+/// Short timeout since health endpoints should respond quickly.
 pub const HEALTH_CHECK_TIMEOUT: Duration = Duration::from_secs(5);
 
-/// Connection pool idle timeout to ensure DNS is re-resolved periodically.
+/// Idle timeout for connection pool (60 seconds).
+/// Connections are closed after being idle for this duration
+/// to allow DNS re-resolution for services behind load balancers.
 /// This helps with failover scenarios where DNS records change (#2177).
-/// Set to 60 seconds to balance between performance and DNS freshness.
 pub const POOL_IDLE_TIMEOUT: Duration = Duration::from_secs(60);
 
 /// Creates an HTTP client with default configuration (30s timeout).

diff --git a/src/cortex-exec/src/runner.rs b/src/cortex-exec/src/runner.rs
@@ -27,9 +27,17 @@ use cortex_protocol::ConversationId;
 use crate::output::{OutputFormat, OutputWriter};
 
 /// Default timeout for the entire execution (10 minutes).
+///
+/// This is the maximum duration for a multi-turn exec session.
+/// See `cortex_common::http_client` module documentation for the complete
+/// timeout hierarchy across Cortex services.
 const DEFAULT_TIMEOUT_SECS: u64 = 600;
 
 /// Default timeout for a single LLM request (2 minutes).
+///
+/// Allows sufficient time for model inference while preventing indefinite hangs.
+/// See `cortex_common::http_client` module documentation for the complete
+/// timeout hierarchy across Cortex services.
 const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 120;
 
 /// Maximum retries for transient errors.