Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/cortex-app-server/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,18 @@ pub struct ServerConfig {
pub max_body_size: usize,

/// Request timeout in seconds (applies to full request lifecycle).
///
/// See `cortex_common::http_client` module documentation for the complete
/// timeout hierarchy across Cortex services.
#[serde(default = "default_request_timeout")]
pub request_timeout: u64,

/// Read timeout for individual chunks in seconds.
/// Applies to chunked transfer encoding to prevent indefinite hangs
/// when clients disconnect without sending the terminal chunk.
///
/// See `cortex_common::http_client` module documentation for the complete
/// timeout hierarchy across Cortex services.
#[serde(default = "default_read_timeout")]
pub read_timeout: u64,

Expand All @@ -71,12 +77,16 @@ pub struct ServerConfig {
pub cors_origins: Vec<String>,

/// Graceful shutdown timeout in seconds.
///
/// See `cortex_common::http_client` module documentation for the complete
/// timeout hierarchy across Cortex services.
#[serde(default = "default_shutdown_timeout")]
pub shutdown_timeout: u64,
}

fn default_shutdown_timeout() -> u64 {
30 // 30 seconds for graceful shutdown
// See cortex_common::http_client for timeout hierarchy documentation
}

fn default_listen_addr() -> String {
Expand Down
84 changes: 79 additions & 5 deletions src/cortex-common/src/http_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,99 @@
//!
//! DNS caching is configured with reasonable TTL to allow failover and load
//! balancer updates (#2177).
//!
//! # Timeout Configuration Guide
//!
//! This section documents the timeout hierarchy across the Cortex codebase. Use this
//! as a reference when configuring timeouts for new features or debugging timeout issues.
//!
//! ## Timeout Hierarchy
//!
//! | Use Case | Timeout | Constant/Location | Rationale |
//! |-----------------------------|---------|--------------------------------------------|-----------------------------------------|
//! | Health checks | 5s | `HEALTH_CHECK_TIMEOUT` (this module) | Quick validation of service status |
//! | Standard HTTP requests | 30s | `DEFAULT_TIMEOUT` (this module) | Normal API calls with reasonable margin |
//! | Per-chunk read (streaming) | 30s | `read_timeout` (cortex-app-server/config) | Individual chunk timeout during stream |
//! | Pool idle timeout | 60s | `POOL_IDLE_TIMEOUT` (this module) | DNS re-resolution for failover |
//! | LLM Request (non-streaming) | 120s | `DEFAULT_REQUEST_TIMEOUT_SECS` (cortex-exec/runner) | Model inference takes time |
//! | LLM Streaming total | 300s | `STREAMING_TIMEOUT` (this module) | Long-running streaming responses |
//! | Server request lifecycle | 300s | `request_timeout` (cortex-app-server/config) | Full HTTP request/response cycle |
//! | Entire exec session | 600s | `DEFAULT_TIMEOUT_SECS` (cortex-exec/runner) | Multi-turn conversation limit |
//! | Graceful shutdown | 30s | `shutdown_timeout` (cortex-app-server/config) | Time for cleanup on shutdown |
//!
//! ## Module-Specific Timeouts
//!
//! ### cortex-common (this module)
//! - `DEFAULT_TIMEOUT` (30s): Use for standard API calls.
//! - `STREAMING_TIMEOUT` (300s): Use for LLM streaming endpoints.
//! - `HEALTH_CHECK_TIMEOUT` (5s): Use for health/readiness checks.
//! - `POOL_IDLE_TIMEOUT` (60s): Connection pool cleanup for DNS freshness.
//!
//! ### cortex-exec (runner.rs)
//! - `DEFAULT_TIMEOUT_SECS` (600s): Maximum duration for entire exec session.
//! - `DEFAULT_REQUEST_TIMEOUT_SECS` (120s): Single LLM request timeout.
//!
//! ### cortex-app-server (config.rs)
//! - `request_timeout` (300s): Full request lifecycle timeout.
//! - `read_timeout` (30s): Per-chunk timeout for streaming reads.
//! - `shutdown_timeout` (30s): Graceful shutdown duration.
//!
//! ### cortex-engine (api_client.rs)
//! - Re-exports constants from this module for consistency.
//!
//! ## Recommendations
//!
//! When adding new timeout configurations:
//! 1. Use constants from this module when possible for consistency.
//! 2. Document any new timeout constants with their rationale.
//! 3. Consider the timeout hierarchy - inner timeouts should be shorter than outer ones.
//! 4. For LLM operations, use longer timeouts (120s-300s) to accommodate model inference.
//! 5. For health checks and quick validations, use short timeouts (5s-10s).

use reqwest::Client;
use std::time::Duration;

// ============================================================
// Timeout Configuration Constants
// ============================================================
//
// Timeout Hierarchy Documentation
// ===============================
//
// This module defines the authoritative timeout constants for HTTP operations.
// Other modules should reference these constants or document deviations.
//
// Timeout Categories:
// - Request timeouts: Total time for a complete request/response cycle
// - Connection timeouts: Time to establish TCP connection
// - Read timeouts: Time to receive response data
// - Pool timeouts: How long idle connections stay in pool
//
// Recommended Naming Convention:
// - Constants: SCREAMING_SNAKE_CASE with Duration type
// - Config fields: snake_case with _secs suffix for u64 values
//
// ============================================================

/// User-Agent string for all HTTP requests
pub const USER_AGENT: &str = concat!("cortex-cli/", env!("CARGO_PKG_VERSION"));

/// Default timeout for standard API requests (30 seconds)
/// Default timeout for standard HTTP requests (30 seconds).
/// Used for non-streaming API calls, health checks with retries, etc.
pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);

/// Extended timeout for LLM streaming requests (5 minutes)
/// Timeout for streaming HTTP requests (5 minutes).
/// Longer duration to accommodate LLM inference time.
pub const STREAMING_TIMEOUT: Duration = Duration::from_secs(300);

/// Short timeout for health checks (5 seconds)
/// Timeout for health check requests (5 seconds).
/// Short timeout since health endpoints should respond quickly.
pub const HEALTH_CHECK_TIMEOUT: Duration = Duration::from_secs(5);

/// Connection pool idle timeout to ensure DNS is re-resolved periodically.
/// Idle timeout for connection pool (60 seconds).
/// Connections are closed after being idle for this duration
/// to allow DNS re-resolution for services behind load balancers.
/// This helps with failover scenarios where DNS records change (#2177).
/// Set to 60 seconds to balance between performance and DNS freshness.
pub const POOL_IDLE_TIMEOUT: Duration = Duration::from_secs(60);

/// Creates an HTTP client with default configuration (30s timeout).
Expand Down
8 changes: 8 additions & 0 deletions src/cortex-exec/src/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,17 @@ use cortex_protocol::ConversationId;
use crate::output::{OutputFormat, OutputWriter};

/// Default timeout for the entire execution (10 minutes).
///
/// This is the maximum duration for a multi-turn exec session.
/// See `cortex_common::http_client` module documentation for the complete
/// timeout hierarchy across Cortex services.
const DEFAULT_TIMEOUT_SECS: u64 = 600;

/// Default timeout for a single LLM request (2 minutes).
///
/// Allows sufficient time for model inference while preventing indefinite hangs.
/// See `cortex_common::http_client` module documentation for the complete
/// timeout hierarchy across Cortex services.
const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 120;

/// Maximum retries for transient errors.
Expand Down
Loading