diff --git a/internal/launcher/launcher.go b/internal/launcher/launcher.go index 010df56f..70b9a3cd 100644 --- a/internal/launcher/launcher.go +++ b/internal/launcher/launcher.go @@ -25,6 +25,13 @@ type connectionResult struct { err error } +// ServerState represents the observed runtime state of a backend server. +type ServerState struct { + Status string // "running" | "stopped" | "error" + StartedAt time.Time // zero value means never started + LastError string // most recent error message, if any +} + // Launcher manages backend MCP server connections type Launcher struct { ctx context.Context @@ -35,6 +42,8 @@ type Launcher struct { runningInContainer bool startupTimeout time.Duration // Timeout for backend server startup oidcProvider *oidc.Provider + serverStartTimes map[string]time.Time // tracks when each server was successfully launched + serverErrors map[string]string // tracks the most recent error per server } // New creates a new Launcher @@ -84,6 +93,8 @@ func New(ctx context.Context, cfg *config.Config) *Launcher { runningInContainer: inContainer, startupTimeout: startupTimeout, oidcProvider: oidcProvider, + serverStartTimes: make(map[string]time.Time), + serverErrors: make(map[string]string), } } @@ -120,6 +131,7 @@ func GetOrLaunch(l *Launcher, serverID string) (*mcp.Connection, error) { if oidcProvider == nil { logger.LogErrorWithServer(serverID, "backend", "Server %q requires OIDC auth but ACTIONS_ID_TOKEN_REQUEST_URL is not set", serverID) + l.recordError(serverID, "OIDC provider not available") return nil, fmt.Errorf( "server %q requires OIDC authentication but ACTIONS_ID_TOKEN_REQUEST_URL is not set; "+ "OIDC auth is only available in GitHub Actions with `permissions: { id-token: write }`", @@ -133,17 +145,25 @@ func GetOrLaunch(l *Launcher, serverID string) (*mcp.Connection, error) { logger.LogErrorWithServer(serverID, "backend", "Failed to create HTTP connection: %s, error=%v", serverID, err) log.Printf("[LAUNCHER] ❌ FAILED to create HTTP connection for '%s'", serverID) log.Printf("[LAUNCHER] Error: %v", err) + l.recordError(serverID, err.Error()) return nil, fmt.Errorf("failed to create HTTP connection: %w", err) } logger.LogInfoWithServer(serverID, "backend", "Successfully configured HTTP MCP backend: %s", serverID) log.Printf("[LAUNCHER] Successfully configured HTTP backend: %s", serverID) + l.recordStart(serverID) return conn, nil } // stdio backends from this point - return l.launchStdioConnection(serverID, "", serverCfg) + conn, err := l.launchStdioConnection(serverID, "", serverCfg) + if err != nil { + l.recordError(serverID, err.Error()) + return nil, err + } + l.recordStart(serverID) + return conn, nil }) } @@ -190,13 +210,15 @@ func GetOrLaunchForSession(l *Launcher, serverID, sessionID string) (*mcp.Connec conn, err := l.launchStdioConnection(serverID, sessionID, serverCfg) if err != nil { - // Record error in session pool + // Record error in session pool and server state l.sessionPool.RecordError(serverID, sessionID) + l.recordError(serverID, err.Error()) return nil, err } - // Add to session pool + // Add to session pool and record start l.sessionPool.Set(serverID, sessionID, conn) + l.recordStart(serverID) return conn, nil } @@ -283,3 +305,41 @@ func (l *Launcher) Close() { } logLauncher.Print("Launcher closed successfully") } + +// recordStart records a successful server launch time. +// Only the first successful launch is recorded; subsequent calls are no-ops. +func (l *Launcher) recordStart(serverID string) { + if _, exists := l.serverStartTimes[serverID]; !exists { + l.serverStartTimes[serverID] = time.Now() + delete(l.serverErrors, serverID) + logLauncher.Printf("Recorded server start: serverID=%s", serverID) + } +} + +// recordError records a server launch failure. +func (l *Launcher) recordError(serverID string, errMsg string) { + l.serverErrors[serverID] = errMsg + logLauncher.Printf("Recorded server error: serverID=%s, error=%s", serverID, errMsg) +} + +// GetServerState returns the observed runtime state for a single server. +func (l *Launcher) GetServerState(serverID string) ServerState { + l.mu.RLock() + defer l.mu.RUnlock() + + if errMsg, hasErr := l.serverErrors[serverID]; hasErr { + return ServerState{ + Status: "error", + LastError: errMsg, + } + } + + if startedAt, ok := l.serverStartTimes[serverID]; ok { + return ServerState{ + Status: "running", + StartedAt: startedAt, + } + } + + return ServerState{Status: "stopped"} +} diff --git a/internal/launcher/launcher_test.go b/internal/launcher/launcher_test.go index 7bb3e048..7d0717fe 100644 --- a/internal/launcher/launcher_test.go +++ b/internal/launcher/launcher_test.go @@ -773,3 +773,103 @@ func TestLauncher_MixedAuthTypes(t *testing.T) { assert.NotNil(t, l, "Launcher should be created even with OIDC servers and missing env vars") assert.Nil(t, l.oidcProvider, "OIDC provider should be nil without env vars") } + +func TestGetServerState_StoppedByDefault(t *testing.T) { + cfg := newTestConfig(map[string]*config.ServerConfig{ + "test-server": {Type: "stdio", Command: "echo"}, + }) + l := New(context.Background(), cfg) + defer l.Close() + + state := l.GetServerState("test-server") + assert.Equal(t, "stopped", state.Status) + assert.True(t, state.StartedAt.IsZero()) + assert.Empty(t, state.LastError) +} + +func TestGetServerState_UnknownServer(t *testing.T) { + cfg := newTestConfig(map[string]*config.ServerConfig{}) + l := New(context.Background(), cfg) + defer l.Close() + + state := l.GetServerState("nonexistent") + assert.Equal(t, "stopped", state.Status) +} + +func TestRecordStart_SetsRunningState(t *testing.T) { + cfg := newTestConfig(map[string]*config.ServerConfig{ + "test-server": {Type: "stdio", Command: "echo"}, + }) + l := New(context.Background(), cfg) + defer l.Close() + + l.recordStart("test-server") + + state := l.GetServerState("test-server") + assert.Equal(t, "running", state.Status) + assert.False(t, state.StartedAt.IsZero()) + assert.Empty(t, state.LastError) +} + +func TestRecordStart_OnlyRecordsFirstLaunch(t *testing.T) { + cfg := newTestConfig(map[string]*config.ServerConfig{ + "test-server": {Type: "stdio", Command: "echo"}, + }) + l := New(context.Background(), cfg) + defer l.Close() + + l.recordStart("test-server") + firstStart := l.serverStartTimes["test-server"] + + // Second call should be a no-op + l.recordStart("test-server") + secondStart := l.serverStartTimes["test-server"] + + assert.Equal(t, firstStart, secondStart) +} + +func TestRecordError_SetsErrorState(t *testing.T) { + cfg := newTestConfig(map[string]*config.ServerConfig{ + "test-server": {Type: "stdio", Command: "echo"}, + }) + l := New(context.Background(), cfg) + defer l.Close() + + l.recordError("test-server", "connection refused") + + state := l.GetServerState("test-server") + assert.Equal(t, "error", state.Status) + assert.Equal(t, "connection refused", state.LastError) +} + +func TestRecordStart_ClearsError(t *testing.T) { + cfg := newTestConfig(map[string]*config.ServerConfig{ + "test-server": {Type: "stdio", Command: "echo"}, + }) + l := New(context.Background(), cfg) + defer l.Close() + + l.recordError("test-server", "connection refused") + state := l.GetServerState("test-server") + assert.Equal(t, "error", state.Status) + + l.recordStart("test-server") + state = l.GetServerState("test-server") + assert.Equal(t, "running", state.Status) + assert.Empty(t, state.LastError) +} + +func TestGetServerState_ErrorTakesPrecedenceOverStart(t *testing.T) { + cfg := newTestConfig(map[string]*config.ServerConfig{ + "test-server": {Type: "stdio", Command: "echo"}, + }) + l := New(context.Background(), cfg) + defer l.Close() + + l.recordStart("test-server") + l.recordError("test-server", "server crashed") + + state := l.GetServerState("test-server") + assert.Equal(t, "error", state.Status) + assert.Equal(t, "server crashed", state.LastError) +} diff --git a/internal/server/health_test.go b/internal/server/health_test.go index ce10afe7..6954fe57 100644 --- a/internal/server/health_test.go +++ b/internal/server/health_test.go @@ -465,3 +465,45 @@ func TestBuildHealthResponse_HealthyStatus(t *testing.T) { assert.NotEmpty(response.GatewayVersion, "gatewayVersion should not be empty") assert.NotNil(response.Servers, "Servers map should not be nil") } + +// TestGetServerStatus_ErrorOnFailedLaunch verifies servers report "error" when launch fails +func TestGetServerStatus_ErrorOnFailedLaunch(t *testing.T) { + assert := assert.New(t) + require := require.New(t) + + cfg := &config.Config{ + Servers: map[string]*config.ServerConfig{ + "server-a": {Type: "stdio", Command: "docker", Args: []string{"run", "test"}}, + }, + } + + ctx := context.Background() + us, err := NewUnified(ctx, cfg) + require.NoError(err) + t.Cleanup(func() { us.Close() }) + + status := us.GetServerStatus() + require.Contains(status, "server-a") + assert.Equal("error", status["server-a"].Status, "Server should report 'error' after failed launch") + assert.Equal(0, status["server-a"].Uptime, "Uptime should be 0 for errored server") +} + +// TestBuildHealthResponse_UnhealthyOnError verifies that failed servers make the gateway unhealthy +func TestBuildHealthResponse_UnhealthyOnError(t *testing.T) { + assert := assert.New(t) + require := require.New(t) + + cfg := &config.Config{ + Servers: map[string]*config.ServerConfig{ + "server-a": {Type: "stdio", Command: "docker", Args: []string{"run", "test"}}, + }, + } + + ctx := context.Background() + us, err := NewUnified(ctx, cfg) + require.NoError(err) + t.Cleanup(func() { us.Close() }) + + response := BuildHealthResponse(us) + assert.Equal("unhealthy", response.Status, "Gateway should be unhealthy when a server has errors") +} diff --git a/internal/server/unified.go b/internal/server/unified.go index 9aadf701..297232c8 100644 --- a/internal/server/unified.go +++ b/internal/server/unified.go @@ -585,18 +585,17 @@ func (us *UnifiedServer) GetServerIDs() []string { func (us *UnifiedServer) GetServerStatus() map[string]ServerStatus { status := make(map[string]ServerStatus) - // Get all configured servers serverIDs := us.launcher.ServerIDs() for _, serverID := range serverIDs { - // Check if server has been launched by checking launcher connections - // For now, we'll return "running" for all configured servers - // and track uptime from when the gateway started - // This is a simple implementation - a more sophisticated version - // would track actual connection state per server + state := us.launcher.GetServerState(serverID) + uptime := 0 + if !state.StartedAt.IsZero() { + uptime = int(time.Since(state.StartedAt).Seconds()) + } status[serverID] = ServerStatus{ - Status: "running", - Uptime: 0, // Will be properly tracked when servers are actually launched + Status: state.Status, + Uptime: uptime, } }