Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 3 additions & 25 deletions pkg/tools/mcp/mcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,23 +113,6 @@ func NewRemoteToolset(name, urlString, transport string, headers map[string]stri
// because there is no live connection to monitor.
var errServerUnavailable = errors.New("MCP server unavailable")

const (
// mcpInitTimeout is the maximum time allowed for the MCP initialization
// handshake (connect + initialize). If a remote server accepts the TCP
// connection but never responds, this prevents the agent from hanging
// indefinitely.
mcpInitTimeout = 2 * time.Minute

// mcpCallToolTimeout is the maximum time allowed for a single tool call.
// Tool calls may be long-running (e.g. code execution), so this is
// deliberately generous.
mcpCallToolTimeout = 10 * time.Minute

// mcpListTimeout is the maximum time allowed for listing tools, prompts
// or fetching a single prompt from the MCP server.
mcpListTimeout = 1 * time.Minute
)

// Describe returns a short, user-visible description of this toolset instance.
// It never includes secrets.
func (ts *Toolset) Describe() string {
Expand Down Expand Up @@ -193,11 +176,6 @@ func (ts *Toolset) doStart(ctx context.Context) error {
// This is critical for OAuth flows where the toolset connection needs to remain alive after the initial HTTP request completes.
ctx = context.WithoutCancel(ctx)

// Apply an initialization timeout so we don't hang forever if the
// remote server accepts the connection but never responds.
initCtx, cancel := context.WithTimeout(ctx, mcpInitTimeout)
defer cancel()

slog.Debug("Starting MCP toolset", "server", ts.logID)

// Register notification handlers to invalidate caches when the server
Expand Down Expand Up @@ -241,7 +219,7 @@ func (ts *Toolset) doStart(ctx context.Context) error {
const maxRetries = 3
for attempt := 0; ; attempt++ {
var err error
result, err = ts.mcpClient.Initialize(initCtx, initRequest)
result, err = ts.mcpClient.Initialize(ctx, initRequest)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 HIGH: Initialize can hang indefinitely

Removing mcpInitTimeout means this Initialize call has no timeout protection. If the remote MCP server accepts the connection but never responds to the Initialize handshake, this will hang forever.

The context comes from context.WithoutCancel(ctx) (line 177), which means ctx.Done() will never fire unless the parent is canceled. This blocks agent startup indefinitely.

Impact: Agent cannot start if any MCP server is unresponsive.

Fix: Restore timeout protection or use a separate timeout context for the Initialize call.

if err == nil {
break
}
Expand Down Expand Up @@ -269,8 +247,8 @@ func (ts *Toolset) doStart(ctx context.Context) error {
slog.Debug("MCP initialize failed to send initialized notification; retrying", "id", ts.logID, "attempt", attempt+1, "backoff_ms", backoff.Milliseconds())
select {
case <-time.After(backoff):
case <-initCtx.Done():
return fmt.Errorf("failed to initialize MCP client: %w", initCtx.Err())
case <-ctx.Done():
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 HIGH: Initialize retry loop can run forever

The retry loop uses ctx.Done() to detect cancellation, but ctx comes from context.WithoutCancel(ctx) (line 177). This means ctx.Done() will never fire unless the parent context is canceled.

If Initialize repeatedly fails with retriable errors, the loop will run forever with exponential backoff, but no timeout.

Impact: Agent startup hangs indefinitely if MCP server keeps accepting connections but never completes the handshake.

Fix: Use a separate timeout context for the initialization phase.

return fmt.Errorf("failed to initialize MCP client: %w", ctx.Err())
}
}

Expand Down
25 changes: 2 additions & 23 deletions pkg/tools/mcp/session_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,7 @@ func (c *sessionClient) Close(context.Context) error {

func (c *sessionClient) ListTools(ctx context.Context, request *gomcp.ListToolsParams) iter.Seq2[*gomcp.Tool, error] {
if s := c.getSession(); s != nil {
ctx, cancel := context.WithTimeout(ctx, mcpListTimeout)
// Wrap the iterator so the cancel fires after iteration completes.
return func(yield func(*gomcp.Tool, error) bool) {
defer cancel()
for t, err := range s.Tools(ctx, request) {
if !yield(t, err) {
return
}
}
}
return s.Tools(ctx, request)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 HIGH: ListTools can block indefinitely

Removing the mcpListTimeout wrapper means this iterator has no timeout protection. If the remote MCP server never yields results, any code iterating over tools will hang forever.

Impact: Agent startup blocks when enumerating available tools from unresponsive MCP servers.

Fix: Restore timeout protection (original was 1 minute).

}
return func(yield func(*gomcp.Tool, error) bool) {
yield(nil, errors.New("session not initialized"))
Expand All @@ -112,24 +103,14 @@ func (c *sessionClient) ListTools(ctx context.Context, request *gomcp.ListToolsP

func (c *sessionClient) CallTool(ctx context.Context, request *gomcp.CallToolParams) (*gomcp.CallToolResult, error) {
if s := c.getSession(); s != nil {
ctx, cancel := context.WithTimeout(ctx, mcpCallToolTimeout)
defer cancel()
return s.CallTool(ctx, request)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 HIGH: CallTool can hang indefinitely

Removing the mcpCallToolTimeout wrapper means this call has no timeout protection. If the remote MCP server accepts the request but never returns a response, the agent will wait forever.

Impact: Agent freezes mid-conversation, blocking all further tool calls and interactions. User must restart the process.

Fix: Restore timeout protection (original was 10 minutes, which is appropriate for long-running tool executions).

}
return nil, errors.New("session not initialized")
}

func (c *sessionClient) ListPrompts(ctx context.Context, request *gomcp.ListPromptsParams) iter.Seq2[*gomcp.Prompt, error] {
if s := c.getSession(); s != nil {
ctx, cancel := context.WithTimeout(ctx, mcpListTimeout)
return func(yield func(*gomcp.Prompt, error) bool) {
defer cancel()
for p, err := range s.Prompts(ctx, request) {
if !yield(p, err) {
return
}
}
}
return s.Prompts(ctx, request)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 MEDIUM: ListPrompts can block indefinitely

Removing the mcpListTimeout wrapper means this iterator has no timeout protection. If the remote MCP server never responds, code iterating over prompts will hang indefinitely.

Impact: UI freezes when enumerating available prompts from unresponsive servers.

Fix: Restore timeout protection (original was 1 minute).

}
return func(yield func(*gomcp.Prompt, error) bool) {
yield(nil, errors.New("session not initialized"))
Expand All @@ -138,8 +119,6 @@ func (c *sessionClient) ListPrompts(ctx context.Context, request *gomcp.ListProm

func (c *sessionClient) GetPrompt(ctx context.Context, request *gomcp.GetPromptParams) (*gomcp.GetPromptResult, error) {
if s := c.getSession(); s != nil {
ctx, cancel := context.WithTimeout(ctx, mcpListTimeout)
defer cancel()
return s.GetPrompt(ctx, request)
}
return nil, errors.New("session not initialized")
Expand Down
Loading