diff --git a/core/application/config_file_watcher.go b/core/application/config_file_watcher.go index 30b3e5ad6f62..4a19cc128d96 100644 --- a/core/application/config_file_watcher.go +++ b/core/application/config_file_watcher.go @@ -185,33 +185,6 @@ func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHan return handler } -type runtimeSettings struct { - WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` - WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` - WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` - WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` - WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` - SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead - MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode) - ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` - Threads *int `json:"threads,omitempty"` - ContextSize *int `json:"context_size,omitempty"` - F16 *bool `json:"f16,omitempty"` - Debug *bool `json:"debug,omitempty"` - CORS *bool `json:"cors,omitempty"` - CSRF *bool `json:"csrf,omitempty"` - CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"` - P2PToken *string `json:"p2p_token,omitempty"` - P2PNetworkID *string `json:"p2p_network_id,omitempty"` - Federated *bool `json:"federated,omitempty"` - Galleries *[]config.Gallery `json:"galleries,omitempty"` - BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"` - AutoloadGalleries *bool `json:"autoload_galleries,omitempty"` - AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"` - ApiKeys *[]string `json:"api_keys,omitempty"` - AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"` -} - func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler { handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { log.Debug().Msg("processing runtime_settings.json") @@ -227,6 +200,8 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend envMaxActiveBackends := appConfig.MaxActiveBackends == startupAppConfig.MaxActiveBackends envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests + envMemoryReclaimerEnabled := appConfig.MemoryReclaimerEnabled == startupAppConfig.MemoryReclaimerEnabled + envMemoryReclaimerThreshold := appConfig.MemoryReclaimerThreshold == startupAppConfig.MemoryReclaimerThreshold envThreads := appConfig.Threads == startupAppConfig.Threads envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize envF16 := appConfig.F16 == startupAppConfig.F16 @@ -242,7 +217,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays if len(fileContent) > 0 { - var settings runtimeSettings + var settings config.RuntimeSettings err := json.Unmarshal(fileContent, &settings) if err != nil { return err @@ -294,6 +269,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand if settings.ParallelBackendRequests != nil && !envParallelRequests { appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests } + if settings.MemoryReclaimerEnabled != nil && !envMemoryReclaimerEnabled { + appConfig.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled + if appConfig.MemoryReclaimerEnabled { + appConfig.WatchDog = true // Memory reclaimer requires watchdog + } + } + if settings.MemoryReclaimerThreshold != nil && !envMemoryReclaimerThreshold { + appConfig.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold + } if settings.Threads != nil && !envThreads { appConfig.Threads = *settings.Threads } diff --git a/core/application/startup.go b/core/application/startup.go index 3a238655d28c..d5e06c4e2b94 100644 --- a/core/application/startup.go +++ b/core/application/startup.go @@ -218,17 +218,7 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) { return } - var settings struct { - WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` - WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` - WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` - WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` - WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` - SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead - MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited) - ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` - AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"` - } + var settings config.RuntimeSettings if err := json.Unmarshal(fileContent, &settings); err != nil { log.Warn().Err(err).Msg("failed to parse runtime_settings.json") @@ -281,6 +271,16 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) { } } } + if settings.WatchdogInterval != nil { + if options.WatchDogInterval == 0 { + dur, err := time.ParseDuration(*settings.WatchdogInterval) + if err == nil { + options.WatchDogInterval = dur + } else { + log.Warn().Err(err).Str("interval", *settings.WatchdogInterval).Msg("invalid watchdog interval in runtime_settings.json") + } + } + } // Handle MaxActiveBackends (new) and SingleBackend (deprecated) if settings.MaxActiveBackends != nil { // Only apply if current value is default (0), suggesting it wasn't set from env var @@ -303,6 +303,21 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) { options.ParallelBackendRequests = *settings.ParallelBackendRequests } } + if settings.MemoryReclaimerEnabled != nil { + // Only apply if current value is default (false), suggesting it wasn't set from env var + if !options.MemoryReclaimerEnabled { + options.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled + if options.MemoryReclaimerEnabled { + options.WatchDog = true // Memory reclaimer requires watchdog + } + } + } + if settings.MemoryReclaimerThreshold != nil { + // Only apply if current value is default (0), suggesting it wasn't set from env var + if options.MemoryReclaimerThreshold == 0 { + options.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold + } + } if settings.AgentJobRetentionDays != nil { // Only apply if current value is default (0), suggesting it wasn't set from env var if options.AgentJobRetentionDays == 0 { @@ -323,19 +338,24 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon // Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend) lruLimit := options.GetEffectiveMaxActiveBackends() - // Create watchdog if enabled OR if LRU limit is set - if options.WatchDog || lruLimit > 0 { + // Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled + if options.WatchDog || lruLimit > 0 || options.MemoryReclaimerEnabled { wd := model.NewWatchDog( - application.ModelLoader(), - options.WatchDogBusyTimeout, - options.WatchDogIdleTimeout, - options.WatchDogBusy, - options.WatchDogIdle, - lruLimit) + model.WithProcessManager(application.ModelLoader()), + model.WithBusyTimeout(options.WatchDogBusyTimeout), + model.WithIdleTimeout(options.WatchDogIdleTimeout), + model.WithWatchdogInterval(options.WatchDogInterval), + model.WithBusyCheck(options.WatchDogBusy), + model.WithIdleCheck(options.WatchDogIdle), + model.WithLRULimit(lruLimit), + model.WithMemoryReclaimer(options.MemoryReclaimerEnabled, options.MemoryReclaimerThreshold), + ) application.ModelLoader().SetWatchDog(wd) - // Start watchdog goroutine only if busy/idle checks are enabled - if options.WatchDogBusy || options.WatchDogIdle { + // Start watchdog goroutine if any periodic checks are enabled + // LRU eviction doesn't need the Run() loop - it's triggered on model load + // But memory reclaimer needs the Run() loop for periodic checking + if options.WatchDogBusy || options.WatchDogIdle || options.MemoryReclaimerEnabled { go wd.Run() } diff --git a/core/application/watchdog.go b/core/application/watchdog.go index e82ac28dcaef..bceb06e19c4b 100644 --- a/core/application/watchdog.go +++ b/core/application/watchdog.go @@ -23,24 +23,28 @@ func (a *Application) startWatchdog() error { // Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend) lruLimit := appConfig.GetEffectiveMaxActiveBackends() - // Create watchdog if enabled OR if LRU limit is set + // Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled // LRU eviction requires watchdog infrastructure even without busy/idle checks - if appConfig.WatchDog || lruLimit > 0 { + if appConfig.WatchDog || lruLimit > 0 || appConfig.MemoryReclaimerEnabled { wd := model.NewWatchDog( - a.modelLoader, - appConfig.WatchDogBusyTimeout, - appConfig.WatchDogIdleTimeout, - appConfig.WatchDogBusy, - appConfig.WatchDogIdle, - lruLimit) + model.WithProcessManager(a.modelLoader), + model.WithBusyTimeout(appConfig.WatchDogBusyTimeout), + model.WithIdleTimeout(appConfig.WatchDogIdleTimeout), + model.WithWatchdogInterval(appConfig.WatchDogInterval), + model.WithBusyCheck(appConfig.WatchDogBusy), + model.WithIdleCheck(appConfig.WatchDogIdle), + model.WithLRULimit(lruLimit), + model.WithMemoryReclaimer(appConfig.MemoryReclaimerEnabled, appConfig.MemoryReclaimerThreshold), + ) a.modelLoader.SetWatchDog(wd) // Create new stop channel a.watchdogStop = make(chan bool, 1) - // Start watchdog goroutine only if busy/idle checks are enabled + // Start watchdog goroutine if any periodic checks are enabled // LRU eviction doesn't need the Run() loop - it's triggered on model load - if appConfig.WatchDogBusy || appConfig.WatchDogIdle { + // But memory reclaimer needs the Run() loop for periodic checking + if appConfig.WatchDogBusy || appConfig.WatchDogIdle || appConfig.MemoryReclaimerEnabled { go wd.Run() } @@ -56,7 +60,14 @@ func (a *Application) startWatchdog() error { } }() - log.Info().Int("lruLimit", lruLimit).Bool("busyCheck", appConfig.WatchDogBusy).Bool("idleCheck", appConfig.WatchDogIdle).Msg("Watchdog started with new settings") + log.Info(). + Int("lruLimit", lruLimit). + Bool("busyCheck", appConfig.WatchDogBusy). + Bool("idleCheck", appConfig.WatchDogIdle). + Bool("memoryReclaimer", appConfig.MemoryReclaimerEnabled). + Float64("memoryThreshold", appConfig.MemoryReclaimerThreshold). + Dur("interval", appConfig.WatchDogInterval). + Msg("Watchdog started with new settings") } else { log.Info().Msg("Watchdog disabled") } diff --git a/core/cli/run.go b/core/cli/run.go index 4df4fbdf3ba1..a37a19d3512c 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -72,6 +72,8 @@ type RunCMD struct { WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"` EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"` WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` + EnableMemoryReclaimer bool `env:"LOCALAI_MEMORY_RECLAIMER,MEMORY_RECLAIMER,LOCALAI_GPU_RECLAIMER,GPU_RECLAIMER" default:"false" help:"Enable memory threshold monitoring to auto-evict backends when memory usage exceeds threshold (uses GPU VRAM if available, otherwise RAM)" group:"backends"` + MemoryReclaimerThreshold float64 `env:"LOCALAI_MEMORY_RECLAIMER_THRESHOLD,MEMORY_RECLAIMER_THRESHOLD,LOCALAI_GPU_RECLAIMER_THRESHOLD,GPU_RECLAIMER_THRESHOLD" default:"0.95" help:"Memory usage threshold (0.0-1.0) that triggers backend eviction (default 0.95 = 95%%)" group:"backends"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"` @@ -200,6 +202,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { opts = append(opts, config.SetWatchDogBusyTimeout(dur)) } } + + // Handle memory reclaimer (uses GPU VRAM if available, otherwise RAM) + if r.EnableMemoryReclaimer { + opts = append(opts, config.WithMemoryReclaimer(true, r.MemoryReclaimerThreshold)) + } + if r.ParallelRequests { opts = append(opts, config.EnableParallelBackendRequests) } diff --git a/core/config/application_config.go b/core/config/application_config.go index c67e24f5c697..e70f721babd8 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -60,9 +60,14 @@ type ApplicationConfig struct { WatchDogBusy bool WatchDog bool + // Memory Reclaimer settings (works with GPU if available, otherwise RAM) + MemoryReclaimerEnabled bool // Enable memory threshold monitoring + MemoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%) + ModelsURL []string WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration + WatchDogInterval time.Duration // Interval between watchdog checks MachineTag string @@ -187,6 +192,39 @@ func SetWatchDogIdleTimeout(t time.Duration) AppOption { } } +// EnableMemoryReclaimer enables memory threshold monitoring. +// When enabled, the watchdog will evict backends if memory usage exceeds the threshold. +// Works with GPU VRAM if available, otherwise uses system RAM. +var EnableMemoryReclaimer = func(o *ApplicationConfig) { + o.MemoryReclaimerEnabled = true + o.WatchDog = true // Memory reclaimer requires watchdog infrastructure +} + +// SetMemoryReclaimerThreshold sets the memory usage threshold (0.0-1.0). +// When memory usage exceeds this threshold, backends will be evicted using LRU strategy. +func SetMemoryReclaimerThreshold(threshold float64) AppOption { + return func(o *ApplicationConfig) { + if threshold > 0 && threshold <= 1.0 { + o.MemoryReclaimerThreshold = threshold + o.MemoryReclaimerEnabled = true + o.WatchDog = true // Memory reclaimer requires watchdog infrastructure + } + } +} + +// WithMemoryReclaimer configures the memory reclaimer with the given settings +func WithMemoryReclaimer(enabled bool, threshold float64) AppOption { + return func(o *ApplicationConfig) { + o.MemoryReclaimerEnabled = enabled + if threshold > 0 && threshold <= 1.0 { + o.MemoryReclaimerThreshold = threshold + } + if enabled { + o.WatchDog = true // Memory reclaimer requires watchdog infrastructure + } + } +} + // EnableSingleBackend is deprecated: use SetMaxActiveBackends(1) instead. // This is kept for backward compatibility. var EnableSingleBackend = func(o *ApplicationConfig) { @@ -454,6 +492,208 @@ func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption { } } +// ToRuntimeSettings converts ApplicationConfig to RuntimeSettings for API responses and JSON serialization. +// This provides a single source of truth - ApplicationConfig holds the live values, +// and this method creates a RuntimeSettings snapshot for external consumption. +func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings { + // Create local copies for pointer fields + watchdogEnabled := o.WatchDog + watchdogIdle := o.WatchDogIdle + watchdogBusy := o.WatchDogBusy + singleBackend := o.SingleBackend + maxActiveBackends := o.MaxActiveBackends + parallelBackendRequests := o.ParallelBackendRequests + memoryReclaimerEnabled := o.MemoryReclaimerEnabled + memoryReclaimerThreshold := o.MemoryReclaimerThreshold + threads := o.Threads + contextSize := o.ContextSize + f16 := o.F16 + debug := o.Debug + cors := o.CORS + csrf := o.CSRF + corsAllowOrigins := o.CORSAllowOrigins + p2pToken := o.P2PToken + p2pNetworkID := o.P2PNetworkID + federated := o.Federated + galleries := o.Galleries + backendGalleries := o.BackendGalleries + autoloadGalleries := o.AutoloadGalleries + autoloadBackendGalleries := o.AutoloadBackendGalleries + apiKeys := o.ApiKeys + agentJobRetentionDays := o.AgentJobRetentionDays + + // Format timeouts as strings + var idleTimeout, busyTimeout, watchdogInterval string + if o.WatchDogIdleTimeout > 0 { + idleTimeout = o.WatchDogIdleTimeout.String() + } else { + idleTimeout = "15m" // default + } + if o.WatchDogBusyTimeout > 0 { + busyTimeout = o.WatchDogBusyTimeout.String() + } else { + busyTimeout = "5m" // default + } + if o.WatchDogInterval > 0 { + watchdogInterval = o.WatchDogInterval.String() + } else { + watchdogInterval = "2s" // default + } + + return RuntimeSettings{ + WatchdogEnabled: &watchdogEnabled, + WatchdogIdleEnabled: &watchdogIdle, + WatchdogBusyEnabled: &watchdogBusy, + WatchdogIdleTimeout: &idleTimeout, + WatchdogBusyTimeout: &busyTimeout, + WatchdogInterval: &watchdogInterval, + SingleBackend: &singleBackend, + MaxActiveBackends: &maxActiveBackends, + ParallelBackendRequests: ¶llelBackendRequests, + MemoryReclaimerEnabled: &memoryReclaimerEnabled, + MemoryReclaimerThreshold: &memoryReclaimerThreshold, + Threads: &threads, + ContextSize: &contextSize, + F16: &f16, + Debug: &debug, + CORS: &cors, + CSRF: &csrf, + CORSAllowOrigins: &corsAllowOrigins, + P2PToken: &p2pToken, + P2PNetworkID: &p2pNetworkID, + Federated: &federated, + Galleries: &galleries, + BackendGalleries: &backendGalleries, + AutoloadGalleries: &autoloadGalleries, + AutoloadBackendGalleries: &autoloadBackendGalleries, + ApiKeys: &apiKeys, + AgentJobRetentionDays: &agentJobRetentionDays, + } +} + +// ApplyRuntimeSettings applies RuntimeSettings to ApplicationConfig. +// Only non-nil fields in RuntimeSettings are applied. +// Returns true if watchdog-related settings changed (requiring restart). +func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (requireRestart bool) { + if settings == nil { + return false + } + + if settings.WatchdogEnabled != nil { + o.WatchDog = *settings.WatchdogEnabled + requireRestart = true + } + if settings.WatchdogIdleEnabled != nil { + o.WatchDogIdle = *settings.WatchdogIdleEnabled + if o.WatchDogIdle { + o.WatchDog = true + } + requireRestart = true + } + if settings.WatchdogBusyEnabled != nil { + o.WatchDogBusy = *settings.WatchdogBusyEnabled + if o.WatchDogBusy { + o.WatchDog = true + } + requireRestart = true + } + if settings.WatchdogIdleTimeout != nil { + if dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err == nil { + o.WatchDogIdleTimeout = dur + requireRestart = true + } + } + if settings.WatchdogBusyTimeout != nil { + if dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err == nil { + o.WatchDogBusyTimeout = dur + requireRestart = true + } + } + if settings.WatchdogInterval != nil { + if dur, err := time.ParseDuration(*settings.WatchdogInterval); err == nil { + o.WatchDogInterval = dur + requireRestart = true + } + } + if settings.MaxActiveBackends != nil { + o.MaxActiveBackends = *settings.MaxActiveBackends + o.SingleBackend = (*settings.MaxActiveBackends == 1) + requireRestart = true + } else if settings.SingleBackend != nil { + o.SingleBackend = *settings.SingleBackend + if *settings.SingleBackend { + o.MaxActiveBackends = 1 + } else { + o.MaxActiveBackends = 0 + } + requireRestart = true + } + if settings.ParallelBackendRequests != nil { + o.ParallelBackendRequests = *settings.ParallelBackendRequests + } + if settings.MemoryReclaimerEnabled != nil { + o.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled + if *settings.MemoryReclaimerEnabled { + o.WatchDog = true + } + requireRestart = true + } + if settings.MemoryReclaimerThreshold != nil { + if *settings.MemoryReclaimerThreshold > 0 && *settings.MemoryReclaimerThreshold <= 1.0 { + o.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold + requireRestart = true + } + } + if settings.Threads != nil { + o.Threads = *settings.Threads + } + if settings.ContextSize != nil { + o.ContextSize = *settings.ContextSize + } + if settings.F16 != nil { + o.F16 = *settings.F16 + } + if settings.Debug != nil { + o.Debug = *settings.Debug + } + if settings.CORS != nil { + o.CORS = *settings.CORS + } + if settings.CSRF != nil { + o.CSRF = *settings.CSRF + } + if settings.CORSAllowOrigins != nil { + o.CORSAllowOrigins = *settings.CORSAllowOrigins + } + if settings.P2PToken != nil { + o.P2PToken = *settings.P2PToken + } + if settings.P2PNetworkID != nil { + o.P2PNetworkID = *settings.P2PNetworkID + } + if settings.Federated != nil { + o.Federated = *settings.Federated + } + if settings.Galleries != nil { + o.Galleries = *settings.Galleries + } + if settings.BackendGalleries != nil { + o.BackendGalleries = *settings.BackendGalleries + } + if settings.AutoloadGalleries != nil { + o.AutoloadGalleries = *settings.AutoloadGalleries + } + if settings.AutoloadBackendGalleries != nil { + o.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries + } + if settings.AgentJobRetentionDays != nil { + o.AgentJobRetentionDays = *settings.AgentJobRetentionDays + } + // Note: ApiKeys requires special handling (merging with startup keys) - handled in caller + + return requireRestart +} + // func WithMetrics(meter *metrics.Metrics) AppOption { // return func(o *StartupOptions) { // o.Metrics = meter diff --git a/core/config/application_config_test.go b/core/config/application_config_test.go new file mode 100644 index 000000000000..c6d4fbecd6bc --- /dev/null +++ b/core/config/application_config_test.go @@ -0,0 +1,577 @@ +package config + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("ApplicationConfig RuntimeSettings Conversion", func() { + Describe("ToRuntimeSettings", func() { + It("should convert all fields correctly", func() { + appConfig := &ApplicationConfig{ + WatchDog: true, + WatchDogIdle: true, + WatchDogBusy: true, + WatchDogIdleTimeout: 20 * time.Minute, + WatchDogBusyTimeout: 10 * time.Minute, + SingleBackend: false, + MaxActiveBackends: 5, + ParallelBackendRequests: true, + MemoryReclaimerEnabled: true, + MemoryReclaimerThreshold: 0.85, + Threads: 8, + ContextSize: 4096, + F16: true, + Debug: true, + CORS: true, + CSRF: true, + CORSAllowOrigins: "https://example.com", + P2PToken: "test-token", + P2PNetworkID: "test-network", + Federated: true, + Galleries: []Gallery{{Name: "test-gallery", URL: "https://example.com"}}, + BackendGalleries: []Gallery{{Name: "backend-gallery", URL: "https://example.com/backend"}}, + AutoloadGalleries: true, + AutoloadBackendGalleries: true, + ApiKeys: []string{"key1", "key2"}, + AgentJobRetentionDays: 30, + } + + rs := appConfig.ToRuntimeSettings() + + Expect(rs.WatchdogEnabled).ToNot(BeNil()) + Expect(*rs.WatchdogEnabled).To(BeTrue()) + + Expect(rs.WatchdogIdleEnabled).ToNot(BeNil()) + Expect(*rs.WatchdogIdleEnabled).To(BeTrue()) + + Expect(rs.WatchdogBusyEnabled).ToNot(BeNil()) + Expect(*rs.WatchdogBusyEnabled).To(BeTrue()) + + Expect(rs.WatchdogIdleTimeout).ToNot(BeNil()) + Expect(*rs.WatchdogIdleTimeout).To(Equal("20m0s")) + + Expect(rs.WatchdogBusyTimeout).ToNot(BeNil()) + Expect(*rs.WatchdogBusyTimeout).To(Equal("10m0s")) + + Expect(rs.SingleBackend).ToNot(BeNil()) + Expect(*rs.SingleBackend).To(BeFalse()) + + Expect(rs.MaxActiveBackends).ToNot(BeNil()) + Expect(*rs.MaxActiveBackends).To(Equal(5)) + + Expect(rs.ParallelBackendRequests).ToNot(BeNil()) + Expect(*rs.ParallelBackendRequests).To(BeTrue()) + + Expect(rs.MemoryReclaimerEnabled).ToNot(BeNil()) + Expect(*rs.MemoryReclaimerEnabled).To(BeTrue()) + + Expect(rs.MemoryReclaimerThreshold).ToNot(BeNil()) + Expect(*rs.MemoryReclaimerThreshold).To(Equal(0.85)) + + Expect(rs.Threads).ToNot(BeNil()) + Expect(*rs.Threads).To(Equal(8)) + + Expect(rs.ContextSize).ToNot(BeNil()) + Expect(*rs.ContextSize).To(Equal(4096)) + + Expect(rs.F16).ToNot(BeNil()) + Expect(*rs.F16).To(BeTrue()) + + Expect(rs.Debug).ToNot(BeNil()) + Expect(*rs.Debug).To(BeTrue()) + + Expect(rs.CORS).ToNot(BeNil()) + Expect(*rs.CORS).To(BeTrue()) + + Expect(rs.CSRF).ToNot(BeNil()) + Expect(*rs.CSRF).To(BeTrue()) + + Expect(rs.CORSAllowOrigins).ToNot(BeNil()) + Expect(*rs.CORSAllowOrigins).To(Equal("https://example.com")) + + Expect(rs.P2PToken).ToNot(BeNil()) + Expect(*rs.P2PToken).To(Equal("test-token")) + + Expect(rs.P2PNetworkID).ToNot(BeNil()) + Expect(*rs.P2PNetworkID).To(Equal("test-network")) + + Expect(rs.Federated).ToNot(BeNil()) + Expect(*rs.Federated).To(BeTrue()) + + Expect(rs.Galleries).ToNot(BeNil()) + Expect(*rs.Galleries).To(HaveLen(1)) + Expect((*rs.Galleries)[0].Name).To(Equal("test-gallery")) + + Expect(rs.BackendGalleries).ToNot(BeNil()) + Expect(*rs.BackendGalleries).To(HaveLen(1)) + Expect((*rs.BackendGalleries)[0].Name).To(Equal("backend-gallery")) + + Expect(rs.AutoloadGalleries).ToNot(BeNil()) + Expect(*rs.AutoloadGalleries).To(BeTrue()) + + Expect(rs.AutoloadBackendGalleries).ToNot(BeNil()) + Expect(*rs.AutoloadBackendGalleries).To(BeTrue()) + + Expect(rs.ApiKeys).ToNot(BeNil()) + Expect(*rs.ApiKeys).To(HaveLen(2)) + Expect(*rs.ApiKeys).To(ContainElements("key1", "key2")) + + Expect(rs.AgentJobRetentionDays).ToNot(BeNil()) + Expect(*rs.AgentJobRetentionDays).To(Equal(30)) + }) + + It("should use default timeouts when not set", func() { + appConfig := &ApplicationConfig{} + + rs := appConfig.ToRuntimeSettings() + + Expect(rs.WatchdogIdleTimeout).ToNot(BeNil()) + Expect(*rs.WatchdogIdleTimeout).To(Equal("15m")) + + Expect(rs.WatchdogBusyTimeout).ToNot(BeNil()) + Expect(*rs.WatchdogBusyTimeout).To(Equal("5m")) + }) + }) + + Describe("ApplyRuntimeSettings", func() { + It("should return false when settings is nil", func() { + appConfig := &ApplicationConfig{} + changed := appConfig.ApplyRuntimeSettings(nil) + Expect(changed).To(BeFalse()) + }) + + It("should only apply non-nil fields", func() { + appConfig := &ApplicationConfig{ + WatchDog: false, + Threads: 4, + ContextSize: 2048, + } + + watchdogEnabled := true + rs := &RuntimeSettings{ + WatchdogEnabled: &watchdogEnabled, + // Leave other fields nil + } + + changed := appConfig.ApplyRuntimeSettings(rs) + + Expect(changed).To(BeTrue()) + Expect(appConfig.WatchDog).To(BeTrue()) + // Unchanged fields should remain + Expect(appConfig.Threads).To(Equal(4)) + Expect(appConfig.ContextSize).To(Equal(2048)) + }) + + It("should apply watchdog settings and return changed=true", func() { + appConfig := &ApplicationConfig{} + + watchdogEnabled := true + watchdogIdle := true + watchdogBusy := true + idleTimeout := "30m" + busyTimeout := "15m" + + rs := &RuntimeSettings{ + WatchdogEnabled: &watchdogEnabled, + WatchdogIdleEnabled: &watchdogIdle, + WatchdogBusyEnabled: &watchdogBusy, + WatchdogIdleTimeout: &idleTimeout, + WatchdogBusyTimeout: &busyTimeout, + } + + changed := appConfig.ApplyRuntimeSettings(rs) + + Expect(changed).To(BeTrue()) + Expect(appConfig.WatchDog).To(BeTrue()) + Expect(appConfig.WatchDogIdle).To(BeTrue()) + Expect(appConfig.WatchDogBusy).To(BeTrue()) + Expect(appConfig.WatchDogIdleTimeout).To(Equal(30 * time.Minute)) + Expect(appConfig.WatchDogBusyTimeout).To(Equal(15 * time.Minute)) + }) + + It("should enable watchdog when idle is enabled", func() { + appConfig := &ApplicationConfig{WatchDog: false} + + watchdogIdle := true + rs := &RuntimeSettings{ + WatchdogIdleEnabled: &watchdogIdle, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.WatchDog).To(BeTrue()) + Expect(appConfig.WatchDogIdle).To(BeTrue()) + }) + + It("should enable watchdog when busy is enabled", func() { + appConfig := &ApplicationConfig{WatchDog: false} + + watchdogBusy := true + rs := &RuntimeSettings{ + WatchdogBusyEnabled: &watchdogBusy, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.WatchDog).To(BeTrue()) + Expect(appConfig.WatchDogBusy).To(BeTrue()) + }) + + It("should handle MaxActiveBackends and update SingleBackend accordingly", func() { + appConfig := &ApplicationConfig{} + + maxBackends := 1 + rs := &RuntimeSettings{ + MaxActiveBackends: &maxBackends, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.MaxActiveBackends).To(Equal(1)) + Expect(appConfig.SingleBackend).To(BeTrue()) + + // Test with multiple backends + maxBackends = 5 + rs = &RuntimeSettings{ + MaxActiveBackends: &maxBackends, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.MaxActiveBackends).To(Equal(5)) + Expect(appConfig.SingleBackend).To(BeFalse()) + }) + + It("should handle SingleBackend and update MaxActiveBackends accordingly", func() { + appConfig := &ApplicationConfig{} + + singleBackend := true + rs := &RuntimeSettings{ + SingleBackend: &singleBackend, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.SingleBackend).To(BeTrue()) + Expect(appConfig.MaxActiveBackends).To(Equal(1)) + + // Test disabling single backend + singleBackend = false + rs = &RuntimeSettings{ + SingleBackend: &singleBackend, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.SingleBackend).To(BeFalse()) + Expect(appConfig.MaxActiveBackends).To(Equal(0)) + }) + + It("should enable watchdog when memory reclaimer is enabled", func() { + appConfig := &ApplicationConfig{WatchDog: false} + + memoryEnabled := true + threshold := 0.90 + rs := &RuntimeSettings{ + MemoryReclaimerEnabled: &memoryEnabled, + MemoryReclaimerThreshold: &threshold, + } + + changed := appConfig.ApplyRuntimeSettings(rs) + + Expect(changed).To(BeTrue()) + Expect(appConfig.WatchDog).To(BeTrue()) + Expect(appConfig.MemoryReclaimerEnabled).To(BeTrue()) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.90)) + }) + + It("should reject invalid memory threshold values", func() { + appConfig := &ApplicationConfig{MemoryReclaimerThreshold: 0.50} + + // Test threshold > 1.0 + invalidThreshold := 1.5 + rs := &RuntimeSettings{ + MemoryReclaimerThreshold: &invalidThreshold, + } + appConfig.ApplyRuntimeSettings(rs) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged + + // Test threshold <= 0 + invalidThreshold = 0.0 + rs = &RuntimeSettings{ + MemoryReclaimerThreshold: &invalidThreshold, + } + appConfig.ApplyRuntimeSettings(rs) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged + + // Test negative threshold + invalidThreshold = -0.5 + rs = &RuntimeSettings{ + MemoryReclaimerThreshold: &invalidThreshold, + } + appConfig.ApplyRuntimeSettings(rs) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged + }) + + It("should accept valid memory threshold at boundary", func() { + appConfig := &ApplicationConfig{} + + // Test threshold = 1.0 (maximum valid) + threshold := 1.0 + rs := &RuntimeSettings{ + MemoryReclaimerThreshold: &threshold, + } + appConfig.ApplyRuntimeSettings(rs) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(1.0)) + + // Test threshold just above 0 + threshold = 0.01 + rs = &RuntimeSettings{ + MemoryReclaimerThreshold: &threshold, + } + appConfig.ApplyRuntimeSettings(rs) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.01)) + }) + + It("should apply performance settings without triggering watchdog change", func() { + appConfig := &ApplicationConfig{} + + threads := 16 + contextSize := 8192 + f16 := true + debug := true + + rs := &RuntimeSettings{ + Threads: &threads, + ContextSize: &contextSize, + F16: &f16, + Debug: &debug, + } + + changed := appConfig.ApplyRuntimeSettings(rs) + + // These settings don't require watchdog restart + Expect(changed).To(BeFalse()) + Expect(appConfig.Threads).To(Equal(16)) + Expect(appConfig.ContextSize).To(Equal(8192)) + Expect(appConfig.F16).To(BeTrue()) + Expect(appConfig.Debug).To(BeTrue()) + }) + + It("should apply CORS and security settings", func() { + appConfig := &ApplicationConfig{} + + cors := true + csrf := true + origins := "https://example.com,https://other.com" + + rs := &RuntimeSettings{ + CORS: &cors, + CSRF: &csrf, + CORSAllowOrigins: &origins, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.CORS).To(BeTrue()) + Expect(appConfig.CSRF).To(BeTrue()) + Expect(appConfig.CORSAllowOrigins).To(Equal("https://example.com,https://other.com")) + }) + + It("should apply P2P settings", func() { + appConfig := &ApplicationConfig{} + + token := "p2p-test-token" + networkID := "p2p-test-network" + federated := true + + rs := &RuntimeSettings{ + P2PToken: &token, + P2PNetworkID: &networkID, + Federated: &federated, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.P2PToken).To(Equal("p2p-test-token")) + Expect(appConfig.P2PNetworkID).To(Equal("p2p-test-network")) + Expect(appConfig.Federated).To(BeTrue()) + }) + + It("should apply gallery settings", func() { + appConfig := &ApplicationConfig{} + + galleries := []Gallery{ + {Name: "gallery1", URL: "https://gallery1.com"}, + {Name: "gallery2", URL: "https://gallery2.com"}, + } + backendGalleries := []Gallery{ + {Name: "backend-gallery", URL: "https://backend.com"}, + } + autoload := true + autoloadBackend := true + + rs := &RuntimeSettings{ + Galleries: &galleries, + BackendGalleries: &backendGalleries, + AutoloadGalleries: &autoload, + AutoloadBackendGalleries: &autoloadBackend, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.Galleries).To(HaveLen(2)) + Expect(appConfig.Galleries[0].Name).To(Equal("gallery1")) + Expect(appConfig.BackendGalleries).To(HaveLen(1)) + Expect(appConfig.AutoloadGalleries).To(BeTrue()) + Expect(appConfig.AutoloadBackendGalleries).To(BeTrue()) + }) + + It("should apply agent settings", func() { + appConfig := &ApplicationConfig{} + + retentionDays := 14 + + rs := &RuntimeSettings{ + AgentJobRetentionDays: &retentionDays, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.AgentJobRetentionDays).To(Equal(14)) + }) + }) + + Describe("Round-trip conversion", func() { + It("should maintain values through ToRuntimeSettings -> ApplyRuntimeSettings", func() { + original := &ApplicationConfig{ + WatchDog: true, + WatchDogIdle: true, + WatchDogBusy: false, + WatchDogIdleTimeout: 25 * time.Minute, + WatchDogBusyTimeout: 12 * time.Minute, + SingleBackend: false, + MaxActiveBackends: 3, + ParallelBackendRequests: true, + MemoryReclaimerEnabled: true, + MemoryReclaimerThreshold: 0.92, + Threads: 12, + ContextSize: 6144, + F16: true, + Debug: false, + CORS: true, + CSRF: false, + CORSAllowOrigins: "https://test.com", + P2PToken: "round-trip-token", + P2PNetworkID: "round-trip-network", + Federated: true, + AutoloadGalleries: true, + AutoloadBackendGalleries: false, + AgentJobRetentionDays: 60, + } + + // Convert to RuntimeSettings + rs := original.ToRuntimeSettings() + + // Apply to a new ApplicationConfig + target := &ApplicationConfig{} + target.ApplyRuntimeSettings(&rs) + + // Verify all values match + Expect(target.WatchDog).To(Equal(original.WatchDog)) + Expect(target.WatchDogIdle).To(Equal(original.WatchDogIdle)) + Expect(target.WatchDogBusy).To(Equal(original.WatchDogBusy)) + Expect(target.WatchDogIdleTimeout).To(Equal(original.WatchDogIdleTimeout)) + Expect(target.WatchDogBusyTimeout).To(Equal(original.WatchDogBusyTimeout)) + Expect(target.MaxActiveBackends).To(Equal(original.MaxActiveBackends)) + Expect(target.ParallelBackendRequests).To(Equal(original.ParallelBackendRequests)) + Expect(target.MemoryReclaimerEnabled).To(Equal(original.MemoryReclaimerEnabled)) + Expect(target.MemoryReclaimerThreshold).To(Equal(original.MemoryReclaimerThreshold)) + Expect(target.Threads).To(Equal(original.Threads)) + Expect(target.ContextSize).To(Equal(original.ContextSize)) + Expect(target.F16).To(Equal(original.F16)) + Expect(target.Debug).To(Equal(original.Debug)) + Expect(target.CORS).To(Equal(original.CORS)) + Expect(target.CSRF).To(Equal(original.CSRF)) + Expect(target.CORSAllowOrigins).To(Equal(original.CORSAllowOrigins)) + Expect(target.P2PToken).To(Equal(original.P2PToken)) + Expect(target.P2PNetworkID).To(Equal(original.P2PNetworkID)) + Expect(target.Federated).To(Equal(original.Federated)) + Expect(target.AutoloadGalleries).To(Equal(original.AutoloadGalleries)) + Expect(target.AutoloadBackendGalleries).To(Equal(original.AutoloadBackendGalleries)) + Expect(target.AgentJobRetentionDays).To(Equal(original.AgentJobRetentionDays)) + }) + + It("should handle empty galleries correctly in round-trip", func() { + original := &ApplicationConfig{ + Galleries: []Gallery{}, + BackendGalleries: []Gallery{}, + ApiKeys: []string{}, + } + + rs := original.ToRuntimeSettings() + target := &ApplicationConfig{} + target.ApplyRuntimeSettings(&rs) + + Expect(target.Galleries).To(BeEmpty()) + Expect(target.BackendGalleries).To(BeEmpty()) + }) + }) + + Describe("Edge cases", func() { + It("should handle invalid timeout string in ApplyRuntimeSettings", func() { + appConfig := &ApplicationConfig{ + WatchDogIdleTimeout: 10 * time.Minute, + } + + invalidTimeout := "not-a-duration" + rs := &RuntimeSettings{ + WatchdogIdleTimeout: &invalidTimeout, + } + + appConfig.ApplyRuntimeSettings(rs) + + // Should remain unchanged due to parse error + Expect(appConfig.WatchDogIdleTimeout).To(Equal(10 * time.Minute)) + }) + + It("should handle zero values in ApplicationConfig", func() { + appConfig := &ApplicationConfig{ + // All zero values + } + + rs := appConfig.ToRuntimeSettings() + + // Should still have non-nil pointers with zero/default values + Expect(rs.WatchdogEnabled).ToNot(BeNil()) + Expect(*rs.WatchdogEnabled).To(BeFalse()) + + Expect(rs.Threads).ToNot(BeNil()) + Expect(*rs.Threads).To(Equal(0)) + + Expect(rs.MemoryReclaimerThreshold).ToNot(BeNil()) + Expect(*rs.MemoryReclaimerThreshold).To(Equal(0.0)) + }) + + It("should prefer MaxActiveBackends over SingleBackend when both are set", func() { + appConfig := &ApplicationConfig{} + + maxBackends := 3 + singleBackend := true + + rs := &RuntimeSettings{ + MaxActiveBackends: &maxBackends, + SingleBackend: &singleBackend, + } + + appConfig.ApplyRuntimeSettings(rs) + + // MaxActiveBackends should take precedence + Expect(appConfig.MaxActiveBackends).To(Equal(3)) + Expect(appConfig.SingleBackend).To(BeFalse()) // 3 != 1, so single backend is false + }) + }) +}) diff --git a/core/config/runtime_settings.go b/core/config/runtime_settings.go new file mode 100644 index 000000000000..c02d4fcd7c20 --- /dev/null +++ b/core/config/runtime_settings.go @@ -0,0 +1,56 @@ +package config + +// RuntimeSettings represents runtime configuration that can be changed dynamically. +// This struct is used for: +// - API responses (GET /api/settings) +// - API requests (POST /api/settings) +// - Persisting to runtime_settings.json +// - Loading from runtime_settings.json on startup +// +// All fields are pointers to distinguish between "not set" and "set to zero/false value". +type RuntimeSettings struct { + // Watchdog settings + WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` + WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` + WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` + WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` + WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` + WatchdogInterval *string `json:"watchdog_interval,omitempty"` // Interval between watchdog checks (e.g., 2s, 30s) + + // Backend management + SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead + MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode) + ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` + + // Memory Reclaimer settings (works with GPU if available, otherwise RAM) + MemoryReclaimerEnabled *bool `json:"memory_reclaimer_enabled,omitempty"` // Enable memory threshold monitoring + MemoryReclaimerThreshold *float64 `json:"memory_reclaimer_threshold,omitempty"` // Threshold 0.0-1.0 (e.g., 0.95 = 95%) + + // Performance settings + Threads *int `json:"threads,omitempty"` + ContextSize *int `json:"context_size,omitempty"` + F16 *bool `json:"f16,omitempty"` + Debug *bool `json:"debug,omitempty"` + + // Security/CORS settings + CORS *bool `json:"cors,omitempty"` + CSRF *bool `json:"csrf,omitempty"` + CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"` + + // P2P settings + P2PToken *string `json:"p2p_token,omitempty"` + P2PNetworkID *string `json:"p2p_network_id,omitempty"` + Federated *bool `json:"federated,omitempty"` + + // Gallery settings + Galleries *[]Gallery `json:"galleries,omitempty"` + BackendGalleries *[]Gallery `json:"backend_galleries,omitempty"` + AutoloadGalleries *bool `json:"autoload_galleries,omitempty"` + AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"` + + // API keys - No omitempty as we need to save empty arrays to clear keys + ApiKeys *[]string `json:"api_keys"` + + // Agent settings + AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"` +} diff --git a/core/http/endpoints/localai/settings.go b/core/http/endpoints/localai/settings.go index dee77646ed62..1cc7666e02c7 100644 --- a/core/http/endpoints/localai/settings.go +++ b/core/http/endpoints/localai/settings.go @@ -12,115 +12,15 @@ import ( "github.com/mudler/LocalAI/core/application" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/p2p" + "github.com/mudler/LocalAI/core/schema" "github.com/rs/zerolog/log" ) -type SettingsResponse struct { - Success bool `json:"success"` - Error string `json:"error,omitempty"` - Message string `json:"message,omitempty"` -} - -type RuntimeSettings struct { - WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` - WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` - WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` - WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` - WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` - SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead - MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode) - ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` - Threads *int `json:"threads,omitempty"` - ContextSize *int `json:"context_size,omitempty"` - F16 *bool `json:"f16,omitempty"` - Debug *bool `json:"debug,omitempty"` - CORS *bool `json:"cors,omitempty"` - CSRF *bool `json:"csrf,omitempty"` - CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"` - P2PToken *string `json:"p2p_token,omitempty"` - P2PNetworkID *string `json:"p2p_network_id,omitempty"` - Federated *bool `json:"federated,omitempty"` - Galleries *[]config.Gallery `json:"galleries,omitempty"` - BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"` - AutoloadGalleries *bool `json:"autoload_galleries,omitempty"` - AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"` - ApiKeys *[]string `json:"api_keys"` // No omitempty - we need to save empty arrays to clear keys - AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"` -} - // GetSettingsEndpoint returns current settings with precedence (env > file > defaults) func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { appConfig := app.ApplicationConfig() - startupConfig := app.StartupConfig() - - if startupConfig == nil { - // Fallback if startup config not available - startupConfig = appConfig - } - - settings := RuntimeSettings{} - - // Set all current values (using pointers for RuntimeSettings) - watchdogIdle := appConfig.WatchDogIdle - watchdogBusy := appConfig.WatchDogBusy - watchdogEnabled := appConfig.WatchDog - singleBackend := appConfig.SingleBackend - maxActiveBackends := appConfig.MaxActiveBackends - parallelBackendRequests := appConfig.ParallelBackendRequests - threads := appConfig.Threads - contextSize := appConfig.ContextSize - f16 := appConfig.F16 - debug := appConfig.Debug - cors := appConfig.CORS - csrf := appConfig.CSRF - corsAllowOrigins := appConfig.CORSAllowOrigins - p2pToken := appConfig.P2PToken - p2pNetworkID := appConfig.P2PNetworkID - federated := appConfig.Federated - galleries := appConfig.Galleries - backendGalleries := appConfig.BackendGalleries - autoloadGalleries := appConfig.AutoloadGalleries - autoloadBackendGalleries := appConfig.AutoloadBackendGalleries - apiKeys := appConfig.ApiKeys - agentJobRetentionDays := appConfig.AgentJobRetentionDays - - settings.WatchdogIdleEnabled = &watchdogIdle - settings.WatchdogBusyEnabled = &watchdogBusy - settings.WatchdogEnabled = &watchdogEnabled - settings.SingleBackend = &singleBackend - settings.MaxActiveBackends = &maxActiveBackends - settings.ParallelBackendRequests = ¶llelBackendRequests - settings.Threads = &threads - settings.ContextSize = &contextSize - settings.F16 = &f16 - settings.Debug = &debug - settings.CORS = &cors - settings.CSRF = &csrf - settings.CORSAllowOrigins = &corsAllowOrigins - settings.P2PToken = &p2pToken - settings.P2PNetworkID = &p2pNetworkID - settings.Federated = &federated - settings.Galleries = &galleries - settings.BackendGalleries = &backendGalleries - settings.AutoloadGalleries = &autoloadGalleries - settings.AutoloadBackendGalleries = &autoloadBackendGalleries - settings.ApiKeys = &apiKeys - settings.AgentJobRetentionDays = &agentJobRetentionDays - - var idleTimeout, busyTimeout string - if appConfig.WatchDogIdleTimeout > 0 { - idleTimeout = appConfig.WatchDogIdleTimeout.String() - } else { - idleTimeout = "15m" // default - } - if appConfig.WatchDogBusyTimeout > 0 { - busyTimeout = appConfig.WatchDogBusyTimeout.String() - } else { - busyTimeout = "5m" // default - } - settings.WatchdogIdleTimeout = &idleTimeout - settings.WatchdogBusyTimeout = &busyTimeout + settings := appConfig.ToRuntimeSettings() return c.JSON(http.StatusOK, settings) } } @@ -132,21 +32,20 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { startupConfig := app.StartupConfig() if startupConfig == nil { - // Fallback if startup config not available startupConfig = appConfig } body, err := io.ReadAll(c.Request().Body) if err != nil { - return c.JSON(http.StatusBadRequest, SettingsResponse{ + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ Success: false, Error: "Failed to read request body: " + err.Error(), }) } - var settings RuntimeSettings + var settings config.RuntimeSettings if err := json.Unmarshal(body, &settings); err != nil { - return c.JSON(http.StatusBadRequest, SettingsResponse{ + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ Success: false, Error: "Failed to parse JSON: " + err.Error(), }) @@ -154,27 +53,33 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { // Validate timeouts if provided if settings.WatchdogIdleTimeout != nil { - _, err := time.ParseDuration(*settings.WatchdogIdleTimeout) - if err != nil { - return c.JSON(http.StatusBadRequest, SettingsResponse{ + if _, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err != nil { + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ Success: false, Error: "Invalid watchdog_idle_timeout format: " + err.Error(), }) } } if settings.WatchdogBusyTimeout != nil { - _, err := time.ParseDuration(*settings.WatchdogBusyTimeout) - if err != nil { - return c.JSON(http.StatusBadRequest, SettingsResponse{ + if _, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err != nil { + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ Success: false, Error: "Invalid watchdog_busy_timeout format: " + err.Error(), }) } } + if settings.WatchdogInterval != nil { + if _, err := time.ParseDuration(*settings.WatchdogInterval); err != nil { + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ + Success: false, + Error: "Invalid watchdog_interval format: " + err.Error(), + }) + } + } // Save to file if appConfig.DynamicConfigsDir == "" { - return c.JSON(http.StatusBadRequest, SettingsResponse{ + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ Success: false, Error: "DynamicConfigsDir is not set", }) @@ -183,133 +88,38 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json") settingsJSON, err := json.MarshalIndent(settings, "", " ") if err != nil { - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Failed to marshal settings: " + err.Error(), }) } if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil { - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Failed to write settings file: " + err.Error(), }) } - // Apply settings immediately, checking env var overrides per field - watchdogChanged := false - if settings.WatchdogEnabled != nil { - appConfig.WatchDog = *settings.WatchdogEnabled - watchdogChanged = true - } - if settings.WatchdogIdleEnabled != nil { - appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled - if appConfig.WatchDogIdle { - appConfig.WatchDog = true - } - watchdogChanged = true - } - if settings.WatchdogBusyEnabled != nil { - appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled - if appConfig.WatchDogBusy { - appConfig.WatchDog = true - } - watchdogChanged = true - } - if settings.WatchdogIdleTimeout != nil { - dur, _ := time.ParseDuration(*settings.WatchdogIdleTimeout) - appConfig.WatchDogIdleTimeout = dur - watchdogChanged = true - } - if settings.WatchdogBusyTimeout != nil { - dur, _ := time.ParseDuration(*settings.WatchdogBusyTimeout) - appConfig.WatchDogBusyTimeout = dur - watchdogChanged = true - } - if settings.MaxActiveBackends != nil { - appConfig.MaxActiveBackends = *settings.MaxActiveBackends - // For backward compatibility, update SingleBackend too - appConfig.SingleBackend = (*settings.MaxActiveBackends == 1) - watchdogChanged = true // LRU limit is managed by watchdog - } else if settings.SingleBackend != nil { - // Legacy support: SingleBackend maps to MaxActiveBackends = 1 - appConfig.SingleBackend = *settings.SingleBackend - if *settings.SingleBackend { - appConfig.MaxActiveBackends = 1 - } else { - appConfig.MaxActiveBackends = 0 - } - watchdogChanged = true // LRU limit is managed by watchdog - } - if settings.ParallelBackendRequests != nil { - appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests - } - if settings.Threads != nil { - appConfig.Threads = *settings.Threads - } - if settings.ContextSize != nil { - appConfig.ContextSize = *settings.ContextSize - } - if settings.F16 != nil { - appConfig.F16 = *settings.F16 - } - if settings.Debug != nil { - appConfig.Debug = *settings.Debug - } - if settings.CORS != nil { - appConfig.CORS = *settings.CORS - } - if settings.CSRF != nil { - appConfig.CSRF = *settings.CSRF - } - if settings.CORSAllowOrigins != nil { - appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins - } - if settings.P2PToken != nil { - appConfig.P2PToken = *settings.P2PToken - } - if settings.P2PNetworkID != nil { - appConfig.P2PNetworkID = *settings.P2PNetworkID - } - if settings.Federated != nil { - appConfig.Federated = *settings.Federated - } - if settings.Galleries != nil { - appConfig.Galleries = *settings.Galleries - } - if settings.BackendGalleries != nil { - appConfig.BackendGalleries = *settings.BackendGalleries - } - if settings.AutoloadGalleries != nil { - appConfig.AutoloadGalleries = *settings.AutoloadGalleries - } - if settings.AutoloadBackendGalleries != nil { - appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries - } - agentJobChanged := false - if settings.AgentJobRetentionDays != nil { - appConfig.AgentJobRetentionDays = *settings.AgentJobRetentionDays - agentJobChanged = true - } + // Apply settings using centralized method + watchdogChanged := appConfig.ApplyRuntimeSettings(&settings) + + // Handle API keys specially (merge with startup keys) if settings.ApiKeys != nil { - // API keys from env vars (startup) should be kept, runtime settings keys are added - // Combine startup keys (env vars) with runtime settings keys envKeys := startupConfig.ApiKeys runtimeKeys := *settings.ApiKeys - // Merge: env keys first (they take precedence), then runtime keys appConfig.ApiKeys = append(envKeys, runtimeKeys...) - - // Note: We only save to runtime_settings.json (not api_keys.json) to avoid duplication - // The runtime_settings.json is the unified config file. If api_keys.json exists, - // it will be loaded first, but runtime_settings.json takes precedence and deduplicates. } + // Check if agent job retention changed + agentJobChanged := settings.AgentJobRetentionDays != nil + // Restart watchdog if settings changed if watchdogChanged { - if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled || settings.WatchdogEnabled == nil { + if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled { if err := app.StopWatchdog(); err != nil { log.Error().Err(err).Msg("Failed to stop watchdog") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Settings saved but failed to stop watchdog: " + err.Error(), }) @@ -317,7 +127,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { } else { if err := app.RestartWatchdog(); err != nil { log.Error().Err(err).Msg("Failed to restart watchdog") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Settings saved but failed to restart watchdog: " + err.Error(), }) @@ -329,7 +139,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { if agentJobChanged { if err := app.RestartAgentJobService(); err != nil { log.Error().Err(err).Msg("Failed to restart agent job service") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Settings saved but failed to restart agent job service: " + err.Error(), }) @@ -340,33 +150,30 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil if p2pChanged { if settings.P2PToken != nil && *settings.P2PToken == "" { - // stop P2P if err := app.StopP2P(); err != nil { log.Error().Err(err).Msg("Failed to stop P2P") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Settings saved but failed to stop P2P: " + err.Error(), }) } } else { if settings.P2PToken != nil && *settings.P2PToken == "0" { - // generate a token if users sets 0 (disabled) token := p2p.GenerateToken(60, 60) settings.P2PToken = &token appConfig.P2PToken = token } - // Stop existing P2P if err := app.RestartP2P(); err != nil { - log.Error().Err(err).Msg("Failed to stop P2P") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + log.Error().Err(err).Msg("Failed to restart P2P") + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, - Error: "Settings saved but failed to stop P2P: " + err.Error(), + Error: "Settings saved but failed to restart P2P: " + err.Error(), }) } } } - return c.JSON(http.StatusOK, SettingsResponse{ + return c.JSON(http.StatusOK, schema.SettingsResponse{ Success: true, Message: "Settings updated successfully", }) diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index 9287b31742f9..ae6f868aa2d5 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -19,6 +19,7 @@ import ( "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/xsysinfo" "github.com/rs/zerolog/log" ) @@ -917,6 +918,30 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model }) }) + // Resources API endpoint - unified memory info (GPU if available, otherwise RAM) + app.GET("/api/resources", func(c echo.Context) error { + resourceInfo := xsysinfo.GetResourceInfo() + + // Format watchdog interval + watchdogInterval := "2s" // default + if appConfig.WatchDogInterval > 0 { + watchdogInterval = appConfig.WatchDogInterval.String() + } + + response := map[string]interface{}{ + "type": resourceInfo.Type, // "gpu" or "ram" + "available": resourceInfo.Available, + "gpus": resourceInfo.GPUs, + "ram": resourceInfo.RAM, + "aggregate": resourceInfo.Aggregate, + "reclaimer_enabled": appConfig.MemoryReclaimerEnabled, + "reclaimer_threshold": appConfig.MemoryReclaimerThreshold, + "watchdog_interval": watchdogInterval, + } + + return c.JSON(200, response) + }) + if !appConfig.DisableRuntimeSettings { // Settings API app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance)) diff --git a/core/http/views/index.html b/core/http/views/index.html index 598a7b0adc67..f5222c3639c0 100644 --- a/core/http/views/index.html +++ b/core/http/views/index.html @@ -462,6 +462,27 @@

How can I help you today?

+ +
+ +
+ {{ $loadedModels := .LoadedModels }}
How can I help you today? // Make functions available globally for Alpine.js window.stopModel = stopModel; window.stopAllModels = stopAllModels; + +// Resource Monitor component (GPU if available, otherwise RAM) +function resourceMonitor() { + return { + resourceData: null, + pollInterval: null, + + async fetchResourceData() { + try { + const response = await fetch('/api/resources'); + if (response.ok) { + this.resourceData = await response.json(); + } + } catch (error) { + console.error('Error fetching resource data:', error); + } + }, + + startPolling() { + // Initial fetch + this.fetchResourceData(); + // Poll every 5 seconds + this.pollInterval = setInterval(() => this.fetchResourceData(), 5000); + }, + + stopPolling() { + if (this.pollInterval) { + clearInterval(this.pollInterval); + } + } + } +} diff --git a/core/http/views/manage.html b/core/http/views/manage.html index 87e077c6bf9e..224f762fe16f 100644 --- a/core/http/views/manage.html +++ b/core/http/views/manage.html @@ -73,6 +73,106 @@

+ +
+ +
+
{{template "views/partials/inprogress" .}} @@ -426,6 +526,47 @@

No backends installed yet

diff --git a/core/schema/localai.go b/core/schema/localai.go index 5eb56d91bf5d..29e1faf3f1cd 100644 --- a/core/schema/localai.go +++ b/core/schema/localai.go @@ -163,3 +163,10 @@ type ImportModelRequest struct { URI string `json:"uri"` Preferences json.RawMessage `json:"preferences,omitempty"` } + +// SettingsResponse is the response type for settings API operations +type SettingsResponse struct { + Success bool `json:"success"` + Error string `json:"error,omitempty"` + Message string `json:"message,omitempty"` +} diff --git a/pkg/model/watchdog.go b/pkg/model/watchdog.go index 4feb49c35dd6..77ae4572b96c 100644 --- a/pkg/model/watchdog.go +++ b/pkg/model/watchdog.go @@ -5,6 +5,7 @@ import ( "sync" "time" + "github.com/mudler/LocalAI/pkg/xsysinfo" process "github.com/mudler/go-processmanager" "github.com/rs/zerolog/log" ) @@ -17,6 +18,9 @@ import ( // force a reload of the model. // The watchdog also supports LRU (Least Recently Used) eviction when a maximum // number of active backends is configured. +// The watchdog also supports memory threshold monitoring - when memory usage +// (GPU VRAM if available, otherwise system RAM) exceeds the threshold, +// it will evict backends using the LRU strategy. // The watchdog runs as a separate go routine, // and the GRPC client talks to it via a channel to send status updates type WatchDog struct { @@ -32,26 +36,48 @@ type WatchDog struct { busyCheck, idleCheck bool lruLimit int // Maximum number of active backends (0 = unlimited) + + // Memory reclaimer settings (works with GPU if available, otherwise RAM) + memoryReclaimerEnabled bool // Enable memory threshold monitoring + memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%) + watchdogInterval time.Duration } type ProcessManager interface { ShutdownModel(modelName string) error } -func NewWatchDog(pm ProcessManager, timeoutBusy, timeoutIdle time.Duration, busy, idle bool, lruLimit int) *WatchDog { +// NewWatchDog creates a new WatchDog with the provided options. +// Example usage: +// +// wd := NewWatchDog( +// WithProcessManager(pm), +// WithBusyTimeout(5*time.Minute), +// WithIdleTimeout(15*time.Minute), +// WithBusyCheck(true), +// WithIdleCheck(true), +// WithLRULimit(3), +// WithMemoryReclaimer(true, 0.95), +// ) +func NewWatchDog(opts ...WatchDogOption) *WatchDog { + o := NewWatchDogOptions(opts...) + return &WatchDog{ - timeout: timeoutBusy, - idletimeout: timeoutIdle, - pm: pm, - busyTime: make(map[string]time.Time), - idleTime: make(map[string]time.Time), - lastUsed: make(map[string]time.Time), - addressMap: make(map[string]*process.Process), - busyCheck: busy, - idleCheck: idle, - lruLimit: lruLimit, - addressModelMap: make(map[string]string), - stop: make(chan bool, 1), + timeout: o.busyTimeout, + idletimeout: o.idleTimeout, + pm: o.processManager, + busyTime: make(map[string]time.Time), + idleTime: make(map[string]time.Time), + lastUsed: make(map[string]time.Time), + addressMap: make(map[string]*process.Process), + busyCheck: o.busyCheck, + idleCheck: o.idleCheck, + lruLimit: o.lruLimit, + addressModelMap: make(map[string]string), + stop: make(chan bool, 1), + memoryReclaimerEnabled: o.memoryReclaimerEnabled, + memoryReclaimerThreshold: o.memoryReclaimerThreshold, + watchdogInterval: o.watchdogInterval, } } @@ -69,6 +95,21 @@ func (wd *WatchDog) GetLRULimit() int { return wd.lruLimit } +// SetMemoryReclaimer updates the memory reclaimer settings dynamically +func (wd *WatchDog) SetMemoryReclaimer(enabled bool, threshold float64) { + wd.Lock() + defer wd.Unlock() + wd.memoryReclaimerEnabled = enabled + wd.memoryReclaimerThreshold = threshold +} + +// GetMemoryReclaimerSettings returns the current memory reclaimer settings +func (wd *WatchDog) GetMemoryReclaimerSettings() (enabled bool, threshold float64) { + wd.Lock() + defer wd.Unlock() + return wd.memoryReclaimerEnabled, wd.memoryReclaimerThreshold +} + func (wd *WatchDog) Shutdown() { wd.Lock() defer wd.Unlock() @@ -202,17 +243,27 @@ func (wd *WatchDog) Run() { case <-wd.stop: log.Info().Msg("[WatchDog] Stopping watchdog") return - case <-time.After(30 * time.Second): - if !wd.busyCheck && !wd.idleCheck { + case <-time.After(wd.watchdogInterval): + // Check if any monitoring is enabled + wd.Lock() + busyCheck := wd.busyCheck + idleCheck := wd.idleCheck + memoryCheck := wd.memoryReclaimerEnabled + wd.Unlock() + + if !busyCheck && !idleCheck && !memoryCheck { log.Info().Msg("[WatchDog] No checks enabled, stopping watchdog") return } - if wd.busyCheck { + if busyCheck { wd.checkBusy() } - if wd.idleCheck { + if idleCheck { wd.checkIdle() } + if memoryCheck { + wd.checkMemory() + } } } } @@ -278,6 +329,105 @@ func (wd *WatchDog) checkBusy() { } } +// checkMemory monitors memory usage (GPU VRAM if available, otherwise RAM) and evicts backends when usage exceeds threshold +func (wd *WatchDog) checkMemory() { + wd.Lock() + threshold := wd.memoryReclaimerThreshold + enabled := wd.memoryReclaimerEnabled + modelCount := len(wd.addressModelMap) + wd.Unlock() + + if !enabled || threshold <= 0 || modelCount == 0 { + return + } + + // Get current memory usage (GPU if available, otherwise RAM) + aggregate := xsysinfo.GetResourceAggregateInfo() + if aggregate.TotalMemory == 0 { + log.Debug().Msg("[WatchDog] No memory information available for memory reclaimer") + return + } + + // Convert threshold from 0.0-1.0 to percentage + thresholdPercent := threshold * 100 + + memoryType := "GPU" + if aggregate.GPUCount == 0 { + memoryType = "RAM" + } + + log.Debug(). + Str("type", memoryType). + Float64("usage_percent", aggregate.UsagePercent). + Float64("threshold_percent", thresholdPercent). + Int("loaded_models", modelCount). + Msg("[WatchDog] Memory check") + + // Check if usage exceeds threshold + if aggregate.UsagePercent > thresholdPercent { + log.Warn(). + Str("type", memoryType). + Float64("usage_percent", aggregate.UsagePercent). + Float64("threshold_percent", thresholdPercent). + Msg("[WatchDog] Memory usage exceeds threshold, evicting LRU backend") + + // Evict the least recently used model + wd.evictLRUModel() + } +} + +// evictLRUModel evicts the least recently used model +func (wd *WatchDog) evictLRUModel() { + wd.Lock() + + if len(wd.addressModelMap) == 0 { + wd.Unlock() + return + } + + // Build a list of models sorted by last used time (oldest first) + var models []modelUsageInfo + for address, model := range wd.addressModelMap { + lastUsed := wd.lastUsed[address] + if lastUsed.IsZero() { + lastUsed = time.Time{} + } + models = append(models, modelUsageInfo{ + address: address, + model: model, + lastUsed: lastUsed, + }) + } + + if len(models) == 0 { + wd.Unlock() + return + } + + // Sort by lastUsed time (oldest first) + sort.Slice(models, func(i, j int) bool { + return models[i].lastUsed.Before(models[j].lastUsed) + }) + + // Get the LRU model + lruModel := models[0] + log.Info(). + Str("model", lruModel.model). + Time("lastUsed", lruModel.lastUsed). + Msg("[WatchDog] Memory reclaimer evicting LRU model") + + // Untrack the model + wd.untrack(lruModel.address) + wd.Unlock() + + // Shutdown the model + if err := wd.pm.ShutdownModel(lruModel.model); err != nil { + log.Error().Err(err).Str("model", lruModel.model).Msg("[WatchDog] error shutting down model during memory reclamation") + } else { + log.Info().Str("model", lruModel.model).Msg("[WatchDog] Memory reclaimer eviction complete") + } +} + func (wd *WatchDog) untrack(address string) { delete(wd.busyTime, address) delete(wd.idleTime, address) diff --git a/pkg/model/watchdog_options.go b/pkg/model/watchdog_options.go new file mode 100644 index 000000000000..68e1a78b1b9a --- /dev/null +++ b/pkg/model/watchdog_options.go @@ -0,0 +1,124 @@ +package model + +import ( + "time" +) + +// WatchDogOptions contains all configuration for the WatchDog +type WatchDogOptions struct { + processManager ProcessManager + + // Timeout settings + busyTimeout time.Duration + idleTimeout time.Duration + watchdogInterval time.Duration + + // Check toggles + busyCheck bool + idleCheck bool + + // LRU settings + lruLimit int // Maximum number of active backends (0 = unlimited) + + // Memory reclaimer settings (works with GPU if available, otherwise RAM) + memoryReclaimerEnabled bool // Enable memory threshold monitoring + memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%) +} + +// WatchDogOption is a function that configures WatchDogOptions +type WatchDogOption func(*WatchDogOptions) + +// WithProcessManager sets the process manager for the watchdog +func WithProcessManager(pm ProcessManager) WatchDogOption { + return func(o *WatchDogOptions) { + o.processManager = pm + } +} + +// WithBusyTimeout sets the busy timeout duration +func WithBusyTimeout(timeout time.Duration) WatchDogOption { + return func(o *WatchDogOptions) { + o.busyTimeout = timeout + } +} + +// WithIdleTimeout sets the idle timeout duration +func WithIdleTimeout(timeout time.Duration) WatchDogOption { + return func(o *WatchDogOptions) { + o.idleTimeout = timeout + } +} + +// WithWatchdogCheck sets the watchdog check duration +func WithWatchdogInterval(interval time.Duration) WatchDogOption { + return func(o *WatchDogOptions) { + o.watchdogInterval = interval + } +} + +// WithBusyCheck enables or disables busy checking +func WithBusyCheck(enabled bool) WatchDogOption { + return func(o *WatchDogOptions) { + o.busyCheck = enabled + } +} + +// WithIdleCheck enables or disables idle checking +func WithIdleCheck(enabled bool) WatchDogOption { + return func(o *WatchDogOptions) { + o.idleCheck = enabled + } +} + +// WithLRULimit sets the maximum number of active backends (0 = unlimited) +func WithLRULimit(limit int) WatchDogOption { + return func(o *WatchDogOptions) { + o.lruLimit = limit + } +} + +// WithMemoryReclaimer enables memory threshold monitoring with the specified threshold +// Works with GPU VRAM if available, otherwise uses system RAM +func WithMemoryReclaimer(enabled bool, threshold float64) WatchDogOption { + return func(o *WatchDogOptions) { + o.memoryReclaimerEnabled = enabled + o.memoryReclaimerThreshold = threshold + } +} + +// WithMemoryReclaimerEnabled enables or disables memory threshold monitoring +func WithMemoryReclaimerEnabled(enabled bool) WatchDogOption { + return func(o *WatchDogOptions) { + o.memoryReclaimerEnabled = enabled + } +} + +// WithMemoryReclaimerThreshold sets the memory threshold (0.0-1.0) +func WithMemoryReclaimerThreshold(threshold float64) WatchDogOption { + return func(o *WatchDogOptions) { + o.memoryReclaimerThreshold = threshold + } +} + +// DefaultWatchDogOptions returns default options for the watchdog +func DefaultWatchDogOptions() *WatchDogOptions { + return &WatchDogOptions{ + busyTimeout: 5 * time.Minute, + idleTimeout: 15 * time.Minute, + watchdogInterval: 2 * time.Second, + busyCheck: false, + idleCheck: false, + lruLimit: 0, + memoryReclaimerEnabled: false, + memoryReclaimerThreshold: 0.95, + } +} + +// NewWatchDogOptions creates WatchDogOptions with the provided options applied +func NewWatchDogOptions(opts ...WatchDogOption) *WatchDogOptions { + o := DefaultWatchDogOptions() + for _, opt := range opts { + opt(o) + } + return o +} diff --git a/pkg/model/watchdog_options_test.go b/pkg/model/watchdog_options_test.go new file mode 100644 index 000000000000..2710408f6dc3 --- /dev/null +++ b/pkg/model/watchdog_options_test.go @@ -0,0 +1,187 @@ +package model_test + +import ( + "time" + + "github.com/mudler/LocalAI/pkg/model" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("WatchDogOptions", func() { + Context("DefaultWatchDogOptions", func() { + It("should return sensible defaults", func() { + opts := model.DefaultWatchDogOptions() + + Expect(opts).ToNot(BeNil()) + }) + }) + + Context("NewWatchDogOptions", func() { + It("should apply options in order", func() { + pm := newMockProcessManager() + opts := model.NewWatchDogOptions( + model.WithProcessManager(pm), + model.WithBusyTimeout(10*time.Minute), + model.WithIdleTimeout(20*time.Minute), + model.WithBusyCheck(true), + model.WithIdleCheck(true), + model.WithLRULimit(5), + model.WithMemoryReclaimer(true, 0.85), + ) + + Expect(opts).ToNot(BeNil()) + }) + + It("should allow overriding options", func() { + opts := model.NewWatchDogOptions( + model.WithLRULimit(3), + model.WithLRULimit(7), // override + ) + + // Create watchdog to verify + wd := model.NewWatchDog( + model.WithProcessManager(newMockProcessManager()), + model.WithLRULimit(3), + model.WithLRULimit(7), // override + ) + Expect(wd.GetLRULimit()).To(Equal(7)) + + Expect(opts).ToNot(BeNil()) + }) + }) + + Context("Individual Options", func() { + var pm *mockProcessManager + + BeforeEach(func() { + pm = newMockProcessManager() + }) + + It("WithProcessManager should set process manager", func() { + wd := model.NewWatchDog( + model.WithProcessManager(pm), + ) + Expect(wd).ToNot(BeNil()) + }) + + It("WithBusyTimeout should set busy timeout", func() { + wd := model.NewWatchDog( + model.WithProcessManager(pm), + model.WithBusyTimeout(7*time.Minute), + ) + Expect(wd).ToNot(BeNil()) + }) + + It("WithIdleTimeout should set idle timeout", func() { + wd := model.NewWatchDog( + model.WithProcessManager(pm), + model.WithIdleTimeout(25*time.Minute), + ) + Expect(wd).ToNot(BeNil()) + }) + + It("WithBusyCheck should enable busy checking", func() { + wd := model.NewWatchDog( + model.WithProcessManager(pm), + model.WithBusyCheck(true), + ) + Expect(wd).ToNot(BeNil()) + }) + + It("WithIdleCheck should enable idle checking", func() { + wd := model.NewWatchDog( + model.WithProcessManager(pm), + model.WithIdleCheck(true), + ) + Expect(wd).ToNot(BeNil()) + }) + + It("WithLRULimit should set LRU limit", func() { + wd := model.NewWatchDog( + model.WithProcessManager(pm), + model.WithLRULimit(10), + ) + Expect(wd.GetLRULimit()).To(Equal(10)) + }) + + It("WithMemoryReclaimer should set both enabled and threshold", func() { + wd := model.NewWatchDog( + model.WithProcessManager(pm), + model.WithMemoryReclaimer(true, 0.88), + ) + enabled, threshold := wd.GetMemoryReclaimerSettings() + Expect(enabled).To(BeTrue()) + Expect(threshold).To(Equal(0.88)) + }) + + It("WithMemoryReclaimerEnabled should set enabled flag only", func() { + wd := model.NewWatchDog( + model.WithProcessManager(pm), + model.WithMemoryReclaimerEnabled(true), + ) + enabled, _ := wd.GetMemoryReclaimerSettings() + Expect(enabled).To(BeTrue()) + }) + + It("WithMemoryReclaimerThreshold should set threshold only", func() { + wd := model.NewWatchDog( + model.WithProcessManager(pm), + model.WithMemoryReclaimerThreshold(0.75), + ) + _, threshold := wd.GetMemoryReclaimerSettings() + Expect(threshold).To(Equal(0.75)) + }) + }) + + Context("Option Combinations", func() { + It("should work with all options combined", func() { + pm := newMockProcessManager() + wd := model.NewWatchDog( + model.WithProcessManager(pm), + model.WithBusyTimeout(3*time.Minute), + model.WithIdleTimeout(10*time.Minute), + model.WithBusyCheck(true), + model.WithIdleCheck(true), + model.WithLRULimit(2), + model.WithMemoryReclaimerEnabled(true), + model.WithMemoryReclaimerThreshold(0.92), + ) + + Expect(wd).ToNot(BeNil()) + Expect(wd.GetLRULimit()).To(Equal(2)) + + enabled, threshold := wd.GetMemoryReclaimerSettings() + Expect(enabled).To(BeTrue()) + Expect(threshold).To(Equal(0.92)) + }) + + It("should work with no options (all defaults)", func() { + wd := model.NewWatchDog() + + Expect(wd).ToNot(BeNil()) + Expect(wd.GetLRULimit()).To(Equal(0)) + + enabled, threshold := wd.GetMemoryReclaimerSettings() + Expect(enabled).To(BeFalse()) + Expect(threshold).To(Equal(0.95)) // default + }) + + It("should allow partial configuration", func() { + pm := newMockProcessManager() + wd := model.NewWatchDog( + model.WithProcessManager(pm), + model.WithLRULimit(3), + ) + + Expect(wd).ToNot(BeNil()) + Expect(wd.GetLRULimit()).To(Equal(3)) + + // Memory reclaimer should use defaults + enabled, threshold := wd.GetMemoryReclaimerSettings() + Expect(enabled).To(BeFalse()) + Expect(threshold).To(Equal(0.95)) + }) + }) +}) + diff --git a/pkg/model/watchdog_test.go b/pkg/model/watchdog_test.go index 30d7ffc6667a..da25bdf4d8d8 100644 --- a/pkg/model/watchdog_test.go +++ b/pkg/model/watchdog_test.go @@ -53,25 +53,82 @@ var _ = Describe("WatchDog", func() { Context("LRU Limit", func() { It("should create watchdog with LRU limit", func() { - wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2) + wd = model.NewWatchDog( + model.WithProcessManager(pm), + model.WithBusyTimeout(5*time.Minute), + model.WithIdleTimeout(15*time.Minute), + model.WithLRULimit(2), + ) Expect(wd.GetLRULimit()).To(Equal(2)) }) It("should allow updating LRU limit dynamically", func() { - wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2) + wd = model.NewWatchDog( + model.WithProcessManager(pm), + model.WithLRULimit(2), + ) wd.SetLRULimit(5) Expect(wd.GetLRULimit()).To(Equal(5)) }) It("should return 0 for disabled LRU", func() { - wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 0) + wd = model.NewWatchDog( + model.WithProcessManager(pm), + model.WithLRULimit(0), + ) Expect(wd.GetLRULimit()).To(Equal(0)) }) }) + Context("Memory Reclaimer Options", func() { + It("should create watchdog with memory reclaimer settings", func() { + wd = model.NewWatchDog( + model.WithProcessManager(pm), + model.WithMemoryReclaimer(true, 0.85), + ) + enabled, threshold := wd.GetMemoryReclaimerSettings() + Expect(enabled).To(BeTrue()) + Expect(threshold).To(Equal(0.85)) + }) + + It("should allow setting memory reclaimer via separate options", func() { + wd = model.NewWatchDog( + model.WithProcessManager(pm), + model.WithMemoryReclaimerEnabled(true), + model.WithMemoryReclaimerThreshold(0.90), + ) + enabled, threshold := wd.GetMemoryReclaimerSettings() + Expect(enabled).To(BeTrue()) + Expect(threshold).To(Equal(0.90)) + }) + + It("should use default threshold when not specified", func() { + wd = model.NewWatchDog( + model.WithProcessManager(pm), + ) + _, threshold := wd.GetMemoryReclaimerSettings() + Expect(threshold).To(Equal(0.95)) // default + }) + + It("should allow updating memory reclaimer settings dynamically", func() { + wd = model.NewWatchDog( + model.WithProcessManager(pm), + ) + wd.SetMemoryReclaimer(true, 0.80) + enabled, threshold := wd.GetMemoryReclaimerSettings() + Expect(enabled).To(BeTrue()) + Expect(threshold).To(Equal(0.80)) + }) + }) + Context("Model Tracking", func() { BeforeEach(func() { - wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 3) + wd = model.NewWatchDog( + model.WithProcessManager(pm), + model.WithBusyTimeout(5*time.Minute), + model.WithIdleTimeout(15*time.Minute), + model.WithLRULimit(3), + ) }) It("should track loaded models count", func() { @@ -108,7 +165,12 @@ var _ = Describe("WatchDog", func() { Context("EnforceLRULimit", func() { BeforeEach(func() { - wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2) + wd = model.NewWatchDog( + model.WithProcessManager(pm), + model.WithBusyTimeout(5*time.Minute), + model.WithIdleTimeout(15*time.Minute), + model.WithLRULimit(2), + ) }) It("should not evict when under limit", func() { @@ -218,7 +280,12 @@ var _ = Describe("WatchDog", func() { Context("Single Backend Mode (LRU=1)", func() { BeforeEach(func() { - wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 1) + wd = model.NewWatchDog( + model.WithProcessManager(pm), + model.WithBusyTimeout(5*time.Minute), + model.WithIdleTimeout(15*time.Minute), + model.WithLRULimit(1), + ) }) It("should evict existing model when loading new one", func() { @@ -241,4 +308,36 @@ var _ = Describe("WatchDog", func() { Expect(len(pm.getShutdownCalls())).To(Equal(5)) }) }) + + Context("Functional Options", func() { + It("should use default options when none provided", func() { + wd = model.NewWatchDog( + model.WithProcessManager(pm), + ) + Expect(wd.GetLRULimit()).To(Equal(0)) + + enabled, threshold := wd.GetMemoryReclaimerSettings() + Expect(enabled).To(BeFalse()) + Expect(threshold).To(Equal(0.95)) + }) + + It("should allow combining multiple options", func() { + wd = model.NewWatchDog( + model.WithProcessManager(pm), + model.WithBusyTimeout(10*time.Minute), + model.WithIdleTimeout(30*time.Minute), + model.WithBusyCheck(true), + model.WithIdleCheck(true), + model.WithLRULimit(5), + model.WithMemoryReclaimerEnabled(true), + model.WithMemoryReclaimerThreshold(0.80), + ) + + Expect(wd.GetLRULimit()).To(Equal(5)) + + enabled, threshold := wd.GetMemoryReclaimerSettings() + Expect(enabled).To(BeTrue()) + Expect(threshold).To(Equal(0.80)) + }) + }) }) diff --git a/pkg/xsysinfo/gpu.go b/pkg/xsysinfo/gpu.go index bfcf9a59d191..560377044ce5 100644 --- a/pkg/xsysinfo/gpu.go +++ b/pkg/xsysinfo/gpu.go @@ -1,13 +1,83 @@ package xsysinfo import ( + "bytes" + "encoding/json" + "os/exec" + "strconv" "strings" "sync" "github.com/jaypipes/ghw" "github.com/jaypipes/ghw/pkg/gpu" + "github.com/rs/zerolog/log" ) +// GPU vendor constants +const ( + VendorNVIDIA = "nvidia" + VendorAMD = "amd" + VendorIntel = "intel" + VendorVulkan = "vulkan" + VendorUnknown = "unknown" +) + +// UnifiedMemoryDevices is a list of GPU device name patterns that use unified memory +// (shared with system RAM). When these devices are detected and report N/A for VRAM, +// we fall back to system RAM information. +var UnifiedMemoryDevices = []string{ + "NVIDIA GB10", + "GB10", + // Add more unified memory devices here as needed +} + +// GPUMemoryInfo contains real-time GPU memory usage information +type GPUMemoryInfo struct { + Index int `json:"index"` + Name string `json:"name"` + Vendor string `json:"vendor"` + TotalVRAM uint64 `json:"total_vram"` // Total VRAM in bytes + UsedVRAM uint64 `json:"used_vram"` // Used VRAM in bytes + FreeVRAM uint64 `json:"free_vram"` // Free VRAM in bytes + UsagePercent float64 `json:"usage_percent"` // Usage as percentage (0-100) +} + +// GPUAggregateInfo contains aggregate GPU information across all GPUs +type GPUAggregateInfo struct { + TotalVRAM uint64 `json:"total_vram"` + UsedVRAM uint64 `json:"used_vram"` + FreeVRAM uint64 `json:"free_vram"` + UsagePercent float64 `json:"usage_percent"` + GPUCount int `json:"gpu_count"` +} + +// SystemRAMInfo contains system RAM usage information +type SystemRAMInfo struct { + Total uint64 `json:"total"` + Used uint64 `json:"used"` + Free uint64 `json:"free"` + Available uint64 `json:"available"` + UsagePercent float64 `json:"usage_percent"` +} + +// AggregateMemoryInfo contains aggregate memory information (unified for GPU/RAM) +type AggregateMemoryInfo struct { + TotalMemory uint64 `json:"total_memory"` + UsedMemory uint64 `json:"used_memory"` + FreeMemory uint64 `json:"free_memory"` + UsagePercent float64 `json:"usage_percent"` + GPUCount int `json:"gpu_count"` +} + +// ResourceInfo represents unified memory resource information +type ResourceInfo struct { + Type string `json:"type"` // "gpu" or "ram" + Available bool `json:"available"` + GPUs []GPUMemoryInfo `json:"gpus,omitempty"` + RAM *SystemRAMInfo `json:"ram,omitempty"` + Aggregate AggregateMemoryInfo `json:"aggregate"` +} + var ( gpuCache []*gpu.GraphicsCard gpuCacheOnce sync.Once @@ -60,3 +130,632 @@ func HasGPU(vendor string) bool { } return false } + +// isUnifiedMemoryDevice checks if the given GPU name matches any known unified memory device +func isUnifiedMemoryDevice(gpuName string) bool { + gpuNameUpper := strings.ToUpper(gpuName) + for _, pattern := range UnifiedMemoryDevices { + if strings.Contains(gpuNameUpper, strings.ToUpper(pattern)) { + return true + } + } + return false +} + +// getSystemRAM returns system RAM information using ghw +func getSystemRAM() (total, used, free uint64, err error) { + memory, err := ghw.Memory() + if err != nil { + return 0, 0, 0, err + } + + total = uint64(memory.TotalUsableBytes) + // ghw doesn't provide used/free directly, but we can estimate + // For unified memory GPUs, we report total system RAM as available VRAM + // since the GPU can potentially use all of it + free = total + used = 0 + + return total, used, free, nil +} + +// GetGPUMemoryUsage returns real-time GPU memory usage for all detected GPUs. +// It tries multiple vendor-specific tools in order: NVIDIA, AMD, Intel, Vulkan. +// Returns an empty slice if no GPU monitoring tools are available. +func GetGPUMemoryUsage() []GPUMemoryInfo { + var gpus []GPUMemoryInfo + + // Try NVIDIA first + nvidiaGPUs := getNVIDIAGPUMemory() + if len(nvidiaGPUs) > 0 { + gpus = append(gpus, nvidiaGPUs...) + } + + // XXX: Note - I could not test this with AMD and Intel GPUs, so I'm not sure if it works and it was added with the help of AI. + + // Try AMD ROCm + amdGPUs := getAMDGPUMemory() + if len(amdGPUs) > 0 { + // Adjust indices to continue from NVIDIA GPUs + startIdx := len(gpus) + for i := range amdGPUs { + amdGPUs[i].Index = startIdx + i + } + gpus = append(gpus, amdGPUs...) + } + + // Try Intel + intelGPUs := getIntelGPUMemory() + if len(intelGPUs) > 0 { + startIdx := len(gpus) + for i := range intelGPUs { + intelGPUs[i].Index = startIdx + i + } + gpus = append(gpus, intelGPUs...) + } + + // Try Vulkan as fallback for device detection (limited real-time data) + if len(gpus) == 0 { + vulkanGPUs := getVulkanGPUMemory() + gpus = append(gpus, vulkanGPUs...) + } + + return gpus +} + +// GetGPUAggregateInfo returns aggregate GPU information across all GPUs +func GetGPUAggregateInfo() GPUAggregateInfo { + gpus := GetGPUMemoryUsage() + + var aggregate GPUAggregateInfo + aggregate.GPUCount = len(gpus) + + for _, gpu := range gpus { + aggregate.TotalVRAM += gpu.TotalVRAM + aggregate.UsedVRAM += gpu.UsedVRAM + aggregate.FreeVRAM += gpu.FreeVRAM + } + + if aggregate.TotalVRAM > 0 { + aggregate.UsagePercent = float64(aggregate.UsedVRAM) / float64(aggregate.TotalVRAM) * 100 + } + + return aggregate +} + +// getNVIDIAGPUMemory queries NVIDIA GPUs using nvidia-smi +func getNVIDIAGPUMemory() []GPUMemoryInfo { + // Check if nvidia-smi is available + if _, err := exec.LookPath("nvidia-smi"); err != nil { + return nil + } + + cmd := exec.Command("nvidia-smi", + "--query-gpu=index,name,memory.total,memory.used,memory.free", + "--format=csv,noheader,nounits") + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + log.Debug().Err(err).Str("stderr", stderr.String()).Msg("nvidia-smi failed") + return nil + } + + var gpus []GPUMemoryInfo + lines := strings.Split(strings.TrimSpace(stdout.String()), "\n") + + for _, line := range lines { + if line == "" { + continue + } + + parts := strings.Split(line, ", ") + if len(parts) < 5 { + continue + } + + idx, _ := strconv.Atoi(strings.TrimSpace(parts[0])) + name := strings.TrimSpace(parts[1]) + totalStr := strings.TrimSpace(parts[2]) + usedStr := strings.TrimSpace(parts[3]) + freeStr := strings.TrimSpace(parts[4]) + + var totalBytes, usedBytes, freeBytes uint64 + var usagePercent float64 + + // Check if memory values are N/A (unified memory devices like GB10) + isNA := totalStr == "[N/A]" || usedStr == "[N/A]" || freeStr == "[N/A]" + + if isNA && isUnifiedMemoryDevice(name) { + // Unified memory device - fall back to system RAM + sysTotal, sysUsed, sysFree, err := getSystemRAM() + if err != nil { + log.Debug().Err(err).Str("device", name).Msg("failed to get system RAM for unified memory device") + // Still add the GPU but with zero memory info + gpus = append(gpus, GPUMemoryInfo{ + Index: idx, + Name: name, + Vendor: VendorNVIDIA, + TotalVRAM: 0, + UsedVRAM: 0, + FreeVRAM: 0, + UsagePercent: 0, + }) + continue + } + + totalBytes = sysTotal + usedBytes = sysUsed + freeBytes = sysFree + if totalBytes > 0 { + usagePercent = float64(usedBytes) / float64(totalBytes) * 100 + } + + log.Debug(). + Str("device", name). + Uint64("system_ram_bytes", totalBytes). + Msg("using system RAM for unified memory GPU") + } else if isNA { + // Unknown device with N/A values - skip memory info + log.Debug().Str("device", name).Msg("nvidia-smi returned N/A for unknown device") + gpus = append(gpus, GPUMemoryInfo{ + Index: idx, + Name: name, + Vendor: VendorNVIDIA, + TotalVRAM: 0, + UsedVRAM: 0, + FreeVRAM: 0, + UsagePercent: 0, + }) + continue + } else { + // Normal GPU with dedicated VRAM + totalMB, _ := strconv.ParseFloat(totalStr, 64) + usedMB, _ := strconv.ParseFloat(usedStr, 64) + freeMB, _ := strconv.ParseFloat(freeStr, 64) + + // Convert MB to bytes + totalBytes = uint64(totalMB * 1024 * 1024) + usedBytes = uint64(usedMB * 1024 * 1024) + freeBytes = uint64(freeMB * 1024 * 1024) + + if totalBytes > 0 { + usagePercent = float64(usedBytes) / float64(totalBytes) * 100 + } + } + + gpus = append(gpus, GPUMemoryInfo{ + Index: idx, + Name: name, + Vendor: VendorNVIDIA, + TotalVRAM: totalBytes, + UsedVRAM: usedBytes, + FreeVRAM: freeBytes, + UsagePercent: usagePercent, + }) + } + + return gpus +} + +// getAMDGPUMemory queries AMD GPUs using rocm-smi +func getAMDGPUMemory() []GPUMemoryInfo { + // Check if rocm-smi is available + if _, err := exec.LookPath("rocm-smi"); err != nil { + return nil + } + + // Try CSV format first + cmd := exec.Command("rocm-smi", "--showmeminfo", "vram", "--csv") + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + log.Debug().Err(err).Str("stderr", stderr.String()).Msg("rocm-smi failed") + return nil + } + + var gpus []GPUMemoryInfo + lines := strings.Split(strings.TrimSpace(stdout.String()), "\n") + + // Skip header line + for i, line := range lines { + if i == 0 || line == "" { + continue + } + + parts := strings.Split(line, ",") + if len(parts) < 3 { + continue + } + + // Parse GPU index from first column (usually "GPU[0]" format) + idxStr := strings.TrimSpace(parts[0]) + idx := 0 + if strings.HasPrefix(idxStr, "GPU[") { + idxStr = strings.TrimPrefix(idxStr, "GPU[") + idxStr = strings.TrimSuffix(idxStr, "]") + idx, _ = strconv.Atoi(idxStr) + } + + // Parse memory values (in bytes or MB depending on rocm-smi version) + usedBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[1]), 10, 64) + totalBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[2]), 10, 64) + + // If values seem like MB, convert to bytes + if totalBytes < 1000000 { + usedBytes *= 1024 * 1024 + totalBytes *= 1024 * 1024 + } + + freeBytes := uint64(0) + if totalBytes > usedBytes { + freeBytes = totalBytes - usedBytes + } + + usagePercent := 0.0 + if totalBytes > 0 { + usagePercent = float64(usedBytes) / float64(totalBytes) * 100 + } + + gpus = append(gpus, GPUMemoryInfo{ + Index: idx, + Name: "AMD GPU", + Vendor: VendorAMD, + TotalVRAM: totalBytes, + UsedVRAM: usedBytes, + FreeVRAM: freeBytes, + UsagePercent: usagePercent, + }) + } + + return gpus +} + +// getIntelGPUMemory queries Intel GPUs using xpu-smi or intel_gpu_top +func getIntelGPUMemory() []GPUMemoryInfo { + // Try xpu-smi first (Intel's official GPU management tool) + gpus := getIntelXPUSMI() + if len(gpus) > 0 { + return gpus + } + + // Fallback to intel_gpu_top + return getIntelGPUTop() +} + +// getIntelXPUSMI queries Intel GPUs using xpu-smi +func getIntelXPUSMI() []GPUMemoryInfo { + if _, err := exec.LookPath("xpu-smi"); err != nil { + return nil + } + + // Get device list + cmd := exec.Command("xpu-smi", "discovery", "--json") + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + log.Debug().Err(err).Str("stderr", stderr.String()).Msg("xpu-smi discovery failed") + return nil + } + + // Parse JSON output + var result struct { + DeviceList []struct { + DeviceID int `json:"device_id"` + DeviceName string `json:"device_name"` + VendorName string `json:"vendor_name"` + MemoryPhysicalSizeBytes uint64 `json:"memory_physical_size_byte"` + } `json:"device_list"` + } + + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + log.Debug().Err(err).Msg("failed to parse xpu-smi discovery output") + return nil + } + + var gpus []GPUMemoryInfo + + for _, device := range result.DeviceList { + // Get memory usage for this device + statsCmd := exec.Command("xpu-smi", "stats", "-d", strconv.Itoa(device.DeviceID), "--json") + + var statsStdout bytes.Buffer + statsCmd.Stdout = &statsStdout + + usedBytes := uint64(0) + if err := statsCmd.Run(); err == nil { + var stats struct { + DeviceID int `json:"device_id"` + MemoryUsed uint64 `json:"memory_used"` + } + if err := json.Unmarshal(statsStdout.Bytes(), &stats); err == nil { + usedBytes = stats.MemoryUsed + } + } + + totalBytes := device.MemoryPhysicalSizeBytes + freeBytes := uint64(0) + if totalBytes > usedBytes { + freeBytes = totalBytes - usedBytes + } + + usagePercent := 0.0 + if totalBytes > 0 { + usagePercent = float64(usedBytes) / float64(totalBytes) * 100 + } + + gpus = append(gpus, GPUMemoryInfo{ + Index: device.DeviceID, + Name: device.DeviceName, + Vendor: VendorIntel, + TotalVRAM: totalBytes, + UsedVRAM: usedBytes, + FreeVRAM: freeBytes, + UsagePercent: usagePercent, + }) + } + + return gpus +} + +// getIntelGPUTop queries Intel GPUs using intel_gpu_top +func getIntelGPUTop() []GPUMemoryInfo { + if _, err := exec.LookPath("intel_gpu_top"); err != nil { + return nil + } + + // intel_gpu_top with -J outputs JSON, -s 1 for single sample + cmd := exec.Command("intel_gpu_top", "-J", "-s", "1") + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + log.Debug().Err(err).Str("stderr", stderr.String()).Msg("intel_gpu_top failed") + return nil + } + + // Parse JSON output - intel_gpu_top outputs NDJSON + lines := strings.Split(strings.TrimSpace(stdout.String()), "\n") + if len(lines) == 0 { + return nil + } + + // Take the last complete JSON object + var lastJSON string + for i := len(lines) - 1; i >= 0; i-- { + if strings.HasPrefix(strings.TrimSpace(lines[i]), "{") { + lastJSON = lines[i] + break + } + } + + if lastJSON == "" { + return nil + } + + var result struct { + Engines map[string]interface{} `json:"engines"` + // Memory info if available + } + + if err := json.Unmarshal([]byte(lastJSON), &result); err != nil { + log.Debug().Err(err).Msg("failed to parse intel_gpu_top output") + return nil + } + + // intel_gpu_top doesn't always provide memory info + // Return empty if we can't get useful data + return nil +} + +// GetSystemRAMInfo returns real-time system RAM usage +func GetSystemRAMInfo() (*SystemRAMInfo, error) { + memory, err := ghw.Memory() + if err != nil { + return nil, err + } + + total := uint64(memory.TotalUsableBytes) + + // Try to get more accurate memory info from /proc/meminfo on Linux + used, available, free := getDetailedMemoryInfo(total) + + usagePercent := 0.0 + if total > 0 { + usagePercent = float64(used) / float64(total) * 100 + } + + return &SystemRAMInfo{ + Total: total, + Used: used, + Free: free, + Available: available, + UsagePercent: usagePercent, + }, nil +} + +// getDetailedMemoryInfo tries to get detailed memory info from /proc/meminfo on Linux +// Returns used, available, and free memory in bytes +func getDetailedMemoryInfo(total uint64) (used, available, free uint64) { + // Try to read /proc/meminfo for more accurate data + cmd := exec.Command("cat", "/proc/meminfo") + var stdout bytes.Buffer + cmd.Stdout = &stdout + + if err := cmd.Run(); err != nil { + // Fallback: assume all memory is available + return 0, total, total + } + + lines := strings.Split(stdout.String(), "\n") + memInfo := make(map[string]uint64) + + for _, line := range lines { + parts := strings.Fields(line) + if len(parts) < 2 { + continue + } + key := strings.TrimSuffix(parts[0], ":") + value, err := strconv.ParseUint(parts[1], 10, 64) + if err != nil { + continue + } + // Values in /proc/meminfo are in kB + memInfo[key] = value * 1024 + } + + // Get MemAvailable if present (preferred), otherwise calculate from free + buffers + cached + if avail, ok := memInfo["MemAvailable"]; ok { + available = avail + } else { + available = memInfo["MemFree"] + memInfo["Buffers"] + memInfo["Cached"] + } + + free = memInfo["MemFree"] + + // Calculate used memory + if total > available { + used = total - available + } else { + used = 0 + } + + return used, available, free +} + +// GetResourceInfo returns GPU info if available, otherwise system RAM info +func GetResourceInfo() ResourceInfo { + gpus := GetGPUMemoryUsage() + + if len(gpus) > 0 { + // GPU available - return GPU info + aggregate := GetGPUAggregateInfo() + return ResourceInfo{ + Type: "gpu", + Available: true, + GPUs: gpus, + RAM: nil, + Aggregate: AggregateMemoryInfo{ + TotalMemory: aggregate.TotalVRAM, + UsedMemory: aggregate.UsedVRAM, + FreeMemory: aggregate.FreeVRAM, + UsagePercent: aggregate.UsagePercent, + GPUCount: aggregate.GPUCount, + }, + } + } + + // No GPU - fall back to system RAM + ramInfo, err := GetSystemRAMInfo() + if err != nil { + log.Debug().Err(err).Msg("failed to get system RAM info") + return ResourceInfo{ + Type: "ram", + Available: false, + Aggregate: AggregateMemoryInfo{}, + } + } + + return ResourceInfo{ + Type: "ram", + Available: true, + GPUs: nil, + RAM: ramInfo, + Aggregate: AggregateMemoryInfo{ + TotalMemory: ramInfo.Total, + UsedMemory: ramInfo.Used, + FreeMemory: ramInfo.Free, + UsagePercent: ramInfo.UsagePercent, + GPUCount: 0, + }, + } +} + +// GetResourceAggregateInfo returns aggregate memory info (GPU if available, otherwise RAM) +// This is used by the memory reclaimer to check memory usage +func GetResourceAggregateInfo() AggregateMemoryInfo { + resourceInfo := GetResourceInfo() + return resourceInfo.Aggregate +} + +// getVulkanGPUMemory queries GPUs using vulkaninfo as a fallback +// Note: Vulkan provides memory heap info but not real-time usage +func getVulkanGPUMemory() []GPUMemoryInfo { + if _, err := exec.LookPath("vulkaninfo"); err != nil { + return nil + } + + cmd := exec.Command("vulkaninfo", "--json") + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + log.Debug().Err(err).Str("stderr", stderr.String()).Msg("vulkaninfo failed") + return nil + } + + // Parse Vulkan JSON output + var result struct { + VkPhysicalDevices []struct { + DeviceName string `json:"deviceName"` + DeviceType string `json:"deviceType"` + VkPhysicalDeviceMemoryProperties struct { + MemoryHeaps []struct { + Flags int `json:"flags"` + Size uint64 `json:"size"` + } `json:"memoryHeaps"` + } `json:"VkPhysicalDeviceMemoryProperties"` + } `json:"VkPhysicalDevices"` + } + + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + log.Debug().Err(err).Msg("failed to parse vulkaninfo output") + return nil + } + + var gpus []GPUMemoryInfo + + for i, device := range result.VkPhysicalDevices { + // Skip non-discrete/integrated GPUs if possible + if device.DeviceType == "VK_PHYSICAL_DEVICE_TYPE_CPU" { + continue + } + + // Sum up device-local memory heaps + var totalVRAM uint64 + for _, heap := range device.VkPhysicalDeviceMemoryProperties.MemoryHeaps { + // Flag 1 = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT + if heap.Flags&1 != 0 { + totalVRAM += heap.Size + } + } + + if totalVRAM == 0 { + continue + } + + gpus = append(gpus, GPUMemoryInfo{ + Index: i, + Name: device.DeviceName, + Vendor: VendorVulkan, + TotalVRAM: totalVRAM, + UsedVRAM: 0, // Vulkan doesn't provide real-time usage + FreeVRAM: totalVRAM, + UsagePercent: 0, + }) + } + + return gpus +}