From 5a1cf8811d4cf338196bc527f5188edb414e45fd Mon Sep 17 00:00:00 2001 From: echobt Date: Wed, 4 Feb 2026 16:52:55 +0000 Subject: [PATCH] docs: consolidated timeout configuration documentation ## Summary This PR consolidates **2 documentation PRs** for timeout configuration. ### Included PRs: - #49: Add comprehensive timeout configuration documentation - #86: Add comprehensive timeout hierarchy documentation to HTTP client ### Key Changes: - Added detailed header documentation explaining the timeout hierarchy - Documented each timeout constant with its purpose and use case - Established naming conventions for constants vs config fields - Added timeout hierarchy table for quick reference ### Timeout Hierarchy | Use Case | Module | Constant | Value | Rationale | |-----------------------------|-----------------------------|-----------------------------|-------|-----------| | Health checks | cortex-common/http_client | HEALTH_CHECK_TIMEOUT | 5s | Quick validation | | Standard HTTP requests | cortex-common/http_client | DEFAULT_TIMEOUT | 30s | Normal API calls | | Connection pool idle | cortex-common/http_client | POOL_IDLE_TIMEOUT | 60s | DNS refresh | | LLM Request (non-streaming) | cortex-exec/runner | DEFAULT_REQUEST_TIMEOUT_SECS| 120s | Model inference | | LLM Streaming total | cortex-common/http_client | STREAMING_TIMEOUT | 300s | Long-running streams | | Server request lifecycle | cortex-app-server/config | request_timeout | 300s | Full request handling | | Per-chunk reads | cortex-app-server/config | read_timeout | 30s | Chunk timeout | | Graceful shutdown | cortex-app-server/config | shutdown_timeout | 30s | Time for cleanup | | Entire execution | cortex-exec/runner | DEFAULT_TIMEOUT_SECS | 600s | Headless exec limit | ### Files Modified: - src/cortex-common/src/http_client.rs - src/cortex-app-server/src/config.rs - src/cortex-exec/src/runner.rs Closes #49, #86 --- src/cortex-app-server/src/config.rs | 10 ++++ src/cortex-common/src/http_client.rs | 84 ++++++++++++++++++++++++++-- src/cortex-exec/src/runner.rs | 8 +++ 3 files changed, 97 insertions(+), 5 deletions(-) diff --git a/src/cortex-app-server/src/config.rs b/src/cortex-app-server/src/config.rs index 35ac75b..b37389b 100644 --- a/src/cortex-app-server/src/config.rs +++ b/src/cortex-app-server/src/config.rs @@ -49,12 +49,18 @@ pub struct ServerConfig { pub max_body_size: usize, /// Request timeout in seconds (applies to full request lifecycle). + /// + /// See `cortex_common::http_client` module documentation for the complete + /// timeout hierarchy across Cortex services. #[serde(default = "default_request_timeout")] pub request_timeout: u64, /// Read timeout for individual chunks in seconds. /// Applies to chunked transfer encoding to prevent indefinite hangs /// when clients disconnect without sending the terminal chunk. + /// + /// See `cortex_common::http_client` module documentation for the complete + /// timeout hierarchy across Cortex services. #[serde(default = "default_read_timeout")] pub read_timeout: u64, @@ -71,12 +77,16 @@ pub struct ServerConfig { pub cors_origins: Vec, /// Graceful shutdown timeout in seconds. + /// + /// See `cortex_common::http_client` module documentation for the complete + /// timeout hierarchy across Cortex services. #[serde(default = "default_shutdown_timeout")] pub shutdown_timeout: u64, } fn default_shutdown_timeout() -> u64 { 30 // 30 seconds for graceful shutdown + // See cortex_common::http_client for timeout hierarchy documentation } fn default_listen_addr() -> String { diff --git a/src/cortex-common/src/http_client.rs b/src/cortex-common/src/http_client.rs index b181ac8..18f1c1d 100644 --- a/src/cortex-common/src/http_client.rs +++ b/src/cortex-common/src/http_client.rs @@ -9,25 +9,99 @@ //! //! DNS caching is configured with reasonable TTL to allow failover and load //! balancer updates (#2177). +//! +//! # Timeout Configuration Guide +//! +//! This section documents the timeout hierarchy across the Cortex codebase. Use this +//! as a reference when configuring timeouts for new features or debugging timeout issues. +//! +//! ## Timeout Hierarchy +//! +//! | Use Case | Timeout | Constant/Location | Rationale | +//! |-----------------------------|---------|--------------------------------------------|-----------------------------------------| +//! | Health checks | 5s | `HEALTH_CHECK_TIMEOUT` (this module) | Quick validation of service status | +//! | Standard HTTP requests | 30s | `DEFAULT_TIMEOUT` (this module) | Normal API calls with reasonable margin | +//! | Per-chunk read (streaming) | 30s | `read_timeout` (cortex-app-server/config) | Individual chunk timeout during stream | +//! | Pool idle timeout | 60s | `POOL_IDLE_TIMEOUT` (this module) | DNS re-resolution for failover | +//! | LLM Request (non-streaming) | 120s | `DEFAULT_REQUEST_TIMEOUT_SECS` (cortex-exec/runner) | Model inference takes time | +//! | LLM Streaming total | 300s | `STREAMING_TIMEOUT` (this module) | Long-running streaming responses | +//! | Server request lifecycle | 300s | `request_timeout` (cortex-app-server/config) | Full HTTP request/response cycle | +//! | Entire exec session | 600s | `DEFAULT_TIMEOUT_SECS` (cortex-exec/runner) | Multi-turn conversation limit | +//! | Graceful shutdown | 30s | `shutdown_timeout` (cortex-app-server/config) | Time for cleanup on shutdown | +//! +//! ## Module-Specific Timeouts +//! +//! ### cortex-common (this module) +//! - `DEFAULT_TIMEOUT` (30s): Use for standard API calls. +//! - `STREAMING_TIMEOUT` (300s): Use for LLM streaming endpoints. +//! - `HEALTH_CHECK_TIMEOUT` (5s): Use for health/readiness checks. +//! - `POOL_IDLE_TIMEOUT` (60s): Connection pool cleanup for DNS freshness. +//! +//! ### cortex-exec (runner.rs) +//! - `DEFAULT_TIMEOUT_SECS` (600s): Maximum duration for entire exec session. +//! - `DEFAULT_REQUEST_TIMEOUT_SECS` (120s): Single LLM request timeout. +//! +//! ### cortex-app-server (config.rs) +//! - `request_timeout` (300s): Full request lifecycle timeout. +//! - `read_timeout` (30s): Per-chunk timeout for streaming reads. +//! - `shutdown_timeout` (30s): Graceful shutdown duration. +//! +//! ### cortex-engine (api_client.rs) +//! - Re-exports constants from this module for consistency. +//! +//! ## Recommendations +//! +//! When adding new timeout configurations: +//! 1. Use constants from this module when possible for consistency. +//! 2. Document any new timeout constants with their rationale. +//! 3. Consider the timeout hierarchy - inner timeouts should be shorter than outer ones. +//! 4. For LLM operations, use longer timeouts (120s-300s) to accommodate model inference. +//! 5. For health checks and quick validations, use short timeouts (5s-10s). use reqwest::Client; use std::time::Duration; +// ============================================================ +// Timeout Configuration Constants +// ============================================================ +// +// Timeout Hierarchy Documentation +// =============================== +// +// This module defines the authoritative timeout constants for HTTP operations. +// Other modules should reference these constants or document deviations. +// +// Timeout Categories: +// - Request timeouts: Total time for a complete request/response cycle +// - Connection timeouts: Time to establish TCP connection +// - Read timeouts: Time to receive response data +// - Pool timeouts: How long idle connections stay in pool +// +// Recommended Naming Convention: +// - Constants: SCREAMING_SNAKE_CASE with Duration type +// - Config fields: snake_case with _secs suffix for u64 values +// +// ============================================================ + /// User-Agent string for all HTTP requests pub const USER_AGENT: &str = concat!("cortex-cli/", env!("CARGO_PKG_VERSION")); -/// Default timeout for standard API requests (30 seconds) +/// Default timeout for standard HTTP requests (30 seconds). +/// Used for non-streaming API calls, health checks with retries, etc. pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30); -/// Extended timeout for LLM streaming requests (5 minutes) +/// Timeout for streaming HTTP requests (5 minutes). +/// Longer duration to accommodate LLM inference time. pub const STREAMING_TIMEOUT: Duration = Duration::from_secs(300); -/// Short timeout for health checks (5 seconds) +/// Timeout for health check requests (5 seconds). +/// Short timeout since health endpoints should respond quickly. pub const HEALTH_CHECK_TIMEOUT: Duration = Duration::from_secs(5); -/// Connection pool idle timeout to ensure DNS is re-resolved periodically. +/// Idle timeout for connection pool (60 seconds). +/// Connections are closed after being idle for this duration +/// to allow DNS re-resolution for services behind load balancers. /// This helps with failover scenarios where DNS records change (#2177). -/// Set to 60 seconds to balance between performance and DNS freshness. pub const POOL_IDLE_TIMEOUT: Duration = Duration::from_secs(60); /// Creates an HTTP client with default configuration (30s timeout). diff --git a/src/cortex-exec/src/runner.rs b/src/cortex-exec/src/runner.rs index e831324..5f24f1d 100644 --- a/src/cortex-exec/src/runner.rs +++ b/src/cortex-exec/src/runner.rs @@ -27,9 +27,17 @@ use cortex_protocol::ConversationId; use crate::output::{OutputFormat, OutputWriter}; /// Default timeout for the entire execution (10 minutes). +/// +/// This is the maximum duration for a multi-turn exec session. +/// See `cortex_common::http_client` module documentation for the complete +/// timeout hierarchy across Cortex services. const DEFAULT_TIMEOUT_SECS: u64 = 600; /// Default timeout for a single LLM request (2 minutes). +/// +/// Allows sufficient time for model inference while preventing indefinite hangs. +/// See `cortex_common::http_client` module documentation for the complete +/// timeout hierarchy across Cortex services. const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 120; /// Maximum retries for transient errors.