diff --git a/core/application/config_file_watcher.go b/core/application/config_file_watcher.go index 30b3e5ad6f62..4a19cc128d96 100644 --- a/core/application/config_file_watcher.go +++ b/core/application/config_file_watcher.go @@ -185,33 +185,6 @@ func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHan return handler } -type runtimeSettings struct { - WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` - WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` - WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` - WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` - WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` - SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead - MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode) - ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` - Threads *int `json:"threads,omitempty"` - ContextSize *int `json:"context_size,omitempty"` - F16 *bool `json:"f16,omitempty"` - Debug *bool `json:"debug,omitempty"` - CORS *bool `json:"cors,omitempty"` - CSRF *bool `json:"csrf,omitempty"` - CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"` - P2PToken *string `json:"p2p_token,omitempty"` - P2PNetworkID *string `json:"p2p_network_id,omitempty"` - Federated *bool `json:"federated,omitempty"` - Galleries *[]config.Gallery `json:"galleries,omitempty"` - BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"` - AutoloadGalleries *bool `json:"autoload_galleries,omitempty"` - AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"` - ApiKeys *[]string `json:"api_keys,omitempty"` - AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"` -} - func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler { handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { log.Debug().Msg("processing runtime_settings.json") @@ -227,6 +200,8 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend envMaxActiveBackends := appConfig.MaxActiveBackends == startupAppConfig.MaxActiveBackends envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests + envMemoryReclaimerEnabled := appConfig.MemoryReclaimerEnabled == startupAppConfig.MemoryReclaimerEnabled + envMemoryReclaimerThreshold := appConfig.MemoryReclaimerThreshold == startupAppConfig.MemoryReclaimerThreshold envThreads := appConfig.Threads == startupAppConfig.Threads envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize envF16 := appConfig.F16 == startupAppConfig.F16 @@ -242,7 +217,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays if len(fileContent) > 0 { - var settings runtimeSettings + var settings config.RuntimeSettings err := json.Unmarshal(fileContent, &settings) if err != nil { return err @@ -294,6 +269,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand if settings.ParallelBackendRequests != nil && !envParallelRequests { appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests } + if settings.MemoryReclaimerEnabled != nil && !envMemoryReclaimerEnabled { + appConfig.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled + if appConfig.MemoryReclaimerEnabled { + appConfig.WatchDog = true // Memory reclaimer requires watchdog + } + } + if settings.MemoryReclaimerThreshold != nil && !envMemoryReclaimerThreshold { + appConfig.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold + } if settings.Threads != nil && !envThreads { appConfig.Threads = *settings.Threads } diff --git a/core/application/startup.go b/core/application/startup.go index 3a238655d28c..d5e06c4e2b94 100644 --- a/core/application/startup.go +++ b/core/application/startup.go @@ -218,17 +218,7 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) { return } - var settings struct { - WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` - WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` - WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` - WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` - WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` - SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead - MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited) - ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` - AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"` - } + var settings config.RuntimeSettings if err := json.Unmarshal(fileContent, &settings); err != nil { log.Warn().Err(err).Msg("failed to parse runtime_settings.json") @@ -281,6 +271,16 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) { } } } + if settings.WatchdogInterval != nil { + if options.WatchDogInterval == 0 { + dur, err := time.ParseDuration(*settings.WatchdogInterval) + if err == nil { + options.WatchDogInterval = dur + } else { + log.Warn().Err(err).Str("interval", *settings.WatchdogInterval).Msg("invalid watchdog interval in runtime_settings.json") + } + } + } // Handle MaxActiveBackends (new) and SingleBackend (deprecated) if settings.MaxActiveBackends != nil { // Only apply if current value is default (0), suggesting it wasn't set from env var @@ -303,6 +303,21 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) { options.ParallelBackendRequests = *settings.ParallelBackendRequests } } + if settings.MemoryReclaimerEnabled != nil { + // Only apply if current value is default (false), suggesting it wasn't set from env var + if !options.MemoryReclaimerEnabled { + options.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled + if options.MemoryReclaimerEnabled { + options.WatchDog = true // Memory reclaimer requires watchdog + } + } + } + if settings.MemoryReclaimerThreshold != nil { + // Only apply if current value is default (0), suggesting it wasn't set from env var + if options.MemoryReclaimerThreshold == 0 { + options.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold + } + } if settings.AgentJobRetentionDays != nil { // Only apply if current value is default (0), suggesting it wasn't set from env var if options.AgentJobRetentionDays == 0 { @@ -323,19 +338,24 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon // Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend) lruLimit := options.GetEffectiveMaxActiveBackends() - // Create watchdog if enabled OR if LRU limit is set - if options.WatchDog || lruLimit > 0 { + // Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled + if options.WatchDog || lruLimit > 0 || options.MemoryReclaimerEnabled { wd := model.NewWatchDog( - application.ModelLoader(), - options.WatchDogBusyTimeout, - options.WatchDogIdleTimeout, - options.WatchDogBusy, - options.WatchDogIdle, - lruLimit) + model.WithProcessManager(application.ModelLoader()), + model.WithBusyTimeout(options.WatchDogBusyTimeout), + model.WithIdleTimeout(options.WatchDogIdleTimeout), + model.WithWatchdogInterval(options.WatchDogInterval), + model.WithBusyCheck(options.WatchDogBusy), + model.WithIdleCheck(options.WatchDogIdle), + model.WithLRULimit(lruLimit), + model.WithMemoryReclaimer(options.MemoryReclaimerEnabled, options.MemoryReclaimerThreshold), + ) application.ModelLoader().SetWatchDog(wd) - // Start watchdog goroutine only if busy/idle checks are enabled - if options.WatchDogBusy || options.WatchDogIdle { + // Start watchdog goroutine if any periodic checks are enabled + // LRU eviction doesn't need the Run() loop - it's triggered on model load + // But memory reclaimer needs the Run() loop for periodic checking + if options.WatchDogBusy || options.WatchDogIdle || options.MemoryReclaimerEnabled { go wd.Run() } diff --git a/core/application/watchdog.go b/core/application/watchdog.go index e82ac28dcaef..bceb06e19c4b 100644 --- a/core/application/watchdog.go +++ b/core/application/watchdog.go @@ -23,24 +23,28 @@ func (a *Application) startWatchdog() error { // Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend) lruLimit := appConfig.GetEffectiveMaxActiveBackends() - // Create watchdog if enabled OR if LRU limit is set + // Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled // LRU eviction requires watchdog infrastructure even without busy/idle checks - if appConfig.WatchDog || lruLimit > 0 { + if appConfig.WatchDog || lruLimit > 0 || appConfig.MemoryReclaimerEnabled { wd := model.NewWatchDog( - a.modelLoader, - appConfig.WatchDogBusyTimeout, - appConfig.WatchDogIdleTimeout, - appConfig.WatchDogBusy, - appConfig.WatchDogIdle, - lruLimit) + model.WithProcessManager(a.modelLoader), + model.WithBusyTimeout(appConfig.WatchDogBusyTimeout), + model.WithIdleTimeout(appConfig.WatchDogIdleTimeout), + model.WithWatchdogInterval(appConfig.WatchDogInterval), + model.WithBusyCheck(appConfig.WatchDogBusy), + model.WithIdleCheck(appConfig.WatchDogIdle), + model.WithLRULimit(lruLimit), + model.WithMemoryReclaimer(appConfig.MemoryReclaimerEnabled, appConfig.MemoryReclaimerThreshold), + ) a.modelLoader.SetWatchDog(wd) // Create new stop channel a.watchdogStop = make(chan bool, 1) - // Start watchdog goroutine only if busy/idle checks are enabled + // Start watchdog goroutine if any periodic checks are enabled // LRU eviction doesn't need the Run() loop - it's triggered on model load - if appConfig.WatchDogBusy || appConfig.WatchDogIdle { + // But memory reclaimer needs the Run() loop for periodic checking + if appConfig.WatchDogBusy || appConfig.WatchDogIdle || appConfig.MemoryReclaimerEnabled { go wd.Run() } @@ -56,7 +60,14 @@ func (a *Application) startWatchdog() error { } }() - log.Info().Int("lruLimit", lruLimit).Bool("busyCheck", appConfig.WatchDogBusy).Bool("idleCheck", appConfig.WatchDogIdle).Msg("Watchdog started with new settings") + log.Info(). + Int("lruLimit", lruLimit). + Bool("busyCheck", appConfig.WatchDogBusy). + Bool("idleCheck", appConfig.WatchDogIdle). + Bool("memoryReclaimer", appConfig.MemoryReclaimerEnabled). + Float64("memoryThreshold", appConfig.MemoryReclaimerThreshold). + Dur("interval", appConfig.WatchDogInterval). + Msg("Watchdog started with new settings") } else { log.Info().Msg("Watchdog disabled") } diff --git a/core/cli/run.go b/core/cli/run.go index 4df4fbdf3ba1..a37a19d3512c 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -72,6 +72,8 @@ type RunCMD struct { WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"` EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"` WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` + EnableMemoryReclaimer bool `env:"LOCALAI_MEMORY_RECLAIMER,MEMORY_RECLAIMER,LOCALAI_GPU_RECLAIMER,GPU_RECLAIMER" default:"false" help:"Enable memory threshold monitoring to auto-evict backends when memory usage exceeds threshold (uses GPU VRAM if available, otherwise RAM)" group:"backends"` + MemoryReclaimerThreshold float64 `env:"LOCALAI_MEMORY_RECLAIMER_THRESHOLD,MEMORY_RECLAIMER_THRESHOLD,LOCALAI_GPU_RECLAIMER_THRESHOLD,GPU_RECLAIMER_THRESHOLD" default:"0.95" help:"Memory usage threshold (0.0-1.0) that triggers backend eviction (default 0.95 = 95%%)" group:"backends"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"` @@ -200,6 +202,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { opts = append(opts, config.SetWatchDogBusyTimeout(dur)) } } + + // Handle memory reclaimer (uses GPU VRAM if available, otherwise RAM) + if r.EnableMemoryReclaimer { + opts = append(opts, config.WithMemoryReclaimer(true, r.MemoryReclaimerThreshold)) + } + if r.ParallelRequests { opts = append(opts, config.EnableParallelBackendRequests) } diff --git a/core/config/application_config.go b/core/config/application_config.go index c67e24f5c697..e70f721babd8 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -60,9 +60,14 @@ type ApplicationConfig struct { WatchDogBusy bool WatchDog bool + // Memory Reclaimer settings (works with GPU if available, otherwise RAM) + MemoryReclaimerEnabled bool // Enable memory threshold monitoring + MemoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%) + ModelsURL []string WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration + WatchDogInterval time.Duration // Interval between watchdog checks MachineTag string @@ -187,6 +192,39 @@ func SetWatchDogIdleTimeout(t time.Duration) AppOption { } } +// EnableMemoryReclaimer enables memory threshold monitoring. +// When enabled, the watchdog will evict backends if memory usage exceeds the threshold. +// Works with GPU VRAM if available, otherwise uses system RAM. +var EnableMemoryReclaimer = func(o *ApplicationConfig) { + o.MemoryReclaimerEnabled = true + o.WatchDog = true // Memory reclaimer requires watchdog infrastructure +} + +// SetMemoryReclaimerThreshold sets the memory usage threshold (0.0-1.0). +// When memory usage exceeds this threshold, backends will be evicted using LRU strategy. +func SetMemoryReclaimerThreshold(threshold float64) AppOption { + return func(o *ApplicationConfig) { + if threshold > 0 && threshold <= 1.0 { + o.MemoryReclaimerThreshold = threshold + o.MemoryReclaimerEnabled = true + o.WatchDog = true // Memory reclaimer requires watchdog infrastructure + } + } +} + +// WithMemoryReclaimer configures the memory reclaimer with the given settings +func WithMemoryReclaimer(enabled bool, threshold float64) AppOption { + return func(o *ApplicationConfig) { + o.MemoryReclaimerEnabled = enabled + if threshold > 0 && threshold <= 1.0 { + o.MemoryReclaimerThreshold = threshold + } + if enabled { + o.WatchDog = true // Memory reclaimer requires watchdog infrastructure + } + } +} + // EnableSingleBackend is deprecated: use SetMaxActiveBackends(1) instead. // This is kept for backward compatibility. var EnableSingleBackend = func(o *ApplicationConfig) { @@ -454,6 +492,208 @@ func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption { } } +// ToRuntimeSettings converts ApplicationConfig to RuntimeSettings for API responses and JSON serialization. +// This provides a single source of truth - ApplicationConfig holds the live values, +// and this method creates a RuntimeSettings snapshot for external consumption. +func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings { + // Create local copies for pointer fields + watchdogEnabled := o.WatchDog + watchdogIdle := o.WatchDogIdle + watchdogBusy := o.WatchDogBusy + singleBackend := o.SingleBackend + maxActiveBackends := o.MaxActiveBackends + parallelBackendRequests := o.ParallelBackendRequests + memoryReclaimerEnabled := o.MemoryReclaimerEnabled + memoryReclaimerThreshold := o.MemoryReclaimerThreshold + threads := o.Threads + contextSize := o.ContextSize + f16 := o.F16 + debug := o.Debug + cors := o.CORS + csrf := o.CSRF + corsAllowOrigins := o.CORSAllowOrigins + p2pToken := o.P2PToken + p2pNetworkID := o.P2PNetworkID + federated := o.Federated + galleries := o.Galleries + backendGalleries := o.BackendGalleries + autoloadGalleries := o.AutoloadGalleries + autoloadBackendGalleries := o.AutoloadBackendGalleries + apiKeys := o.ApiKeys + agentJobRetentionDays := o.AgentJobRetentionDays + + // Format timeouts as strings + var idleTimeout, busyTimeout, watchdogInterval string + if o.WatchDogIdleTimeout > 0 { + idleTimeout = o.WatchDogIdleTimeout.String() + } else { + idleTimeout = "15m" // default + } + if o.WatchDogBusyTimeout > 0 { + busyTimeout = o.WatchDogBusyTimeout.String() + } else { + busyTimeout = "5m" // default + } + if o.WatchDogInterval > 0 { + watchdogInterval = o.WatchDogInterval.String() + } else { + watchdogInterval = "2s" // default + } + + return RuntimeSettings{ + WatchdogEnabled: &watchdogEnabled, + WatchdogIdleEnabled: &watchdogIdle, + WatchdogBusyEnabled: &watchdogBusy, + WatchdogIdleTimeout: &idleTimeout, + WatchdogBusyTimeout: &busyTimeout, + WatchdogInterval: &watchdogInterval, + SingleBackend: &singleBackend, + MaxActiveBackends: &maxActiveBackends, + ParallelBackendRequests: ¶llelBackendRequests, + MemoryReclaimerEnabled: &memoryReclaimerEnabled, + MemoryReclaimerThreshold: &memoryReclaimerThreshold, + Threads: &threads, + ContextSize: &contextSize, + F16: &f16, + Debug: &debug, + CORS: &cors, + CSRF: &csrf, + CORSAllowOrigins: &corsAllowOrigins, + P2PToken: &p2pToken, + P2PNetworkID: &p2pNetworkID, + Federated: &federated, + Galleries: &galleries, + BackendGalleries: &backendGalleries, + AutoloadGalleries: &autoloadGalleries, + AutoloadBackendGalleries: &autoloadBackendGalleries, + ApiKeys: &apiKeys, + AgentJobRetentionDays: &agentJobRetentionDays, + } +} + +// ApplyRuntimeSettings applies RuntimeSettings to ApplicationConfig. +// Only non-nil fields in RuntimeSettings are applied. +// Returns true if watchdog-related settings changed (requiring restart). +func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (requireRestart bool) { + if settings == nil { + return false + } + + if settings.WatchdogEnabled != nil { + o.WatchDog = *settings.WatchdogEnabled + requireRestart = true + } + if settings.WatchdogIdleEnabled != nil { + o.WatchDogIdle = *settings.WatchdogIdleEnabled + if o.WatchDogIdle { + o.WatchDog = true + } + requireRestart = true + } + if settings.WatchdogBusyEnabled != nil { + o.WatchDogBusy = *settings.WatchdogBusyEnabled + if o.WatchDogBusy { + o.WatchDog = true + } + requireRestart = true + } + if settings.WatchdogIdleTimeout != nil { + if dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err == nil { + o.WatchDogIdleTimeout = dur + requireRestart = true + } + } + if settings.WatchdogBusyTimeout != nil { + if dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err == nil { + o.WatchDogBusyTimeout = dur + requireRestart = true + } + } + if settings.WatchdogInterval != nil { + if dur, err := time.ParseDuration(*settings.WatchdogInterval); err == nil { + o.WatchDogInterval = dur + requireRestart = true + } + } + if settings.MaxActiveBackends != nil { + o.MaxActiveBackends = *settings.MaxActiveBackends + o.SingleBackend = (*settings.MaxActiveBackends == 1) + requireRestart = true + } else if settings.SingleBackend != nil { + o.SingleBackend = *settings.SingleBackend + if *settings.SingleBackend { + o.MaxActiveBackends = 1 + } else { + o.MaxActiveBackends = 0 + } + requireRestart = true + } + if settings.ParallelBackendRequests != nil { + o.ParallelBackendRequests = *settings.ParallelBackendRequests + } + if settings.MemoryReclaimerEnabled != nil { + o.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled + if *settings.MemoryReclaimerEnabled { + o.WatchDog = true + } + requireRestart = true + } + if settings.MemoryReclaimerThreshold != nil { + if *settings.MemoryReclaimerThreshold > 0 && *settings.MemoryReclaimerThreshold <= 1.0 { + o.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold + requireRestart = true + } + } + if settings.Threads != nil { + o.Threads = *settings.Threads + } + if settings.ContextSize != nil { + o.ContextSize = *settings.ContextSize + } + if settings.F16 != nil { + o.F16 = *settings.F16 + } + if settings.Debug != nil { + o.Debug = *settings.Debug + } + if settings.CORS != nil { + o.CORS = *settings.CORS + } + if settings.CSRF != nil { + o.CSRF = *settings.CSRF + } + if settings.CORSAllowOrigins != nil { + o.CORSAllowOrigins = *settings.CORSAllowOrigins + } + if settings.P2PToken != nil { + o.P2PToken = *settings.P2PToken + } + if settings.P2PNetworkID != nil { + o.P2PNetworkID = *settings.P2PNetworkID + } + if settings.Federated != nil { + o.Federated = *settings.Federated + } + if settings.Galleries != nil { + o.Galleries = *settings.Galleries + } + if settings.BackendGalleries != nil { + o.BackendGalleries = *settings.BackendGalleries + } + if settings.AutoloadGalleries != nil { + o.AutoloadGalleries = *settings.AutoloadGalleries + } + if settings.AutoloadBackendGalleries != nil { + o.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries + } + if settings.AgentJobRetentionDays != nil { + o.AgentJobRetentionDays = *settings.AgentJobRetentionDays + } + // Note: ApiKeys requires special handling (merging with startup keys) - handled in caller + + return requireRestart +} + // func WithMetrics(meter *metrics.Metrics) AppOption { // return func(o *StartupOptions) { // o.Metrics = meter diff --git a/core/config/application_config_test.go b/core/config/application_config_test.go new file mode 100644 index 000000000000..c6d4fbecd6bc --- /dev/null +++ b/core/config/application_config_test.go @@ -0,0 +1,577 @@ +package config + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("ApplicationConfig RuntimeSettings Conversion", func() { + Describe("ToRuntimeSettings", func() { + It("should convert all fields correctly", func() { + appConfig := &ApplicationConfig{ + WatchDog: true, + WatchDogIdle: true, + WatchDogBusy: true, + WatchDogIdleTimeout: 20 * time.Minute, + WatchDogBusyTimeout: 10 * time.Minute, + SingleBackend: false, + MaxActiveBackends: 5, + ParallelBackendRequests: true, + MemoryReclaimerEnabled: true, + MemoryReclaimerThreshold: 0.85, + Threads: 8, + ContextSize: 4096, + F16: true, + Debug: true, + CORS: true, + CSRF: true, + CORSAllowOrigins: "https://example.com", + P2PToken: "test-token", + P2PNetworkID: "test-network", + Federated: true, + Galleries: []Gallery{{Name: "test-gallery", URL: "https://example.com"}}, + BackendGalleries: []Gallery{{Name: "backend-gallery", URL: "https://example.com/backend"}}, + AutoloadGalleries: true, + AutoloadBackendGalleries: true, + ApiKeys: []string{"key1", "key2"}, + AgentJobRetentionDays: 30, + } + + rs := appConfig.ToRuntimeSettings() + + Expect(rs.WatchdogEnabled).ToNot(BeNil()) + Expect(*rs.WatchdogEnabled).To(BeTrue()) + + Expect(rs.WatchdogIdleEnabled).ToNot(BeNil()) + Expect(*rs.WatchdogIdleEnabled).To(BeTrue()) + + Expect(rs.WatchdogBusyEnabled).ToNot(BeNil()) + Expect(*rs.WatchdogBusyEnabled).To(BeTrue()) + + Expect(rs.WatchdogIdleTimeout).ToNot(BeNil()) + Expect(*rs.WatchdogIdleTimeout).To(Equal("20m0s")) + + Expect(rs.WatchdogBusyTimeout).ToNot(BeNil()) + Expect(*rs.WatchdogBusyTimeout).To(Equal("10m0s")) + + Expect(rs.SingleBackend).ToNot(BeNil()) + Expect(*rs.SingleBackend).To(BeFalse()) + + Expect(rs.MaxActiveBackends).ToNot(BeNil()) + Expect(*rs.MaxActiveBackends).To(Equal(5)) + + Expect(rs.ParallelBackendRequests).ToNot(BeNil()) + Expect(*rs.ParallelBackendRequests).To(BeTrue()) + + Expect(rs.MemoryReclaimerEnabled).ToNot(BeNil()) + Expect(*rs.MemoryReclaimerEnabled).To(BeTrue()) + + Expect(rs.MemoryReclaimerThreshold).ToNot(BeNil()) + Expect(*rs.MemoryReclaimerThreshold).To(Equal(0.85)) + + Expect(rs.Threads).ToNot(BeNil()) + Expect(*rs.Threads).To(Equal(8)) + + Expect(rs.ContextSize).ToNot(BeNil()) + Expect(*rs.ContextSize).To(Equal(4096)) + + Expect(rs.F16).ToNot(BeNil()) + Expect(*rs.F16).To(BeTrue()) + + Expect(rs.Debug).ToNot(BeNil()) + Expect(*rs.Debug).To(BeTrue()) + + Expect(rs.CORS).ToNot(BeNil()) + Expect(*rs.CORS).To(BeTrue()) + + Expect(rs.CSRF).ToNot(BeNil()) + Expect(*rs.CSRF).To(BeTrue()) + + Expect(rs.CORSAllowOrigins).ToNot(BeNil()) + Expect(*rs.CORSAllowOrigins).To(Equal("https://example.com")) + + Expect(rs.P2PToken).ToNot(BeNil()) + Expect(*rs.P2PToken).To(Equal("test-token")) + + Expect(rs.P2PNetworkID).ToNot(BeNil()) + Expect(*rs.P2PNetworkID).To(Equal("test-network")) + + Expect(rs.Federated).ToNot(BeNil()) + Expect(*rs.Federated).To(BeTrue()) + + Expect(rs.Galleries).ToNot(BeNil()) + Expect(*rs.Galleries).To(HaveLen(1)) + Expect((*rs.Galleries)[0].Name).To(Equal("test-gallery")) + + Expect(rs.BackendGalleries).ToNot(BeNil()) + Expect(*rs.BackendGalleries).To(HaveLen(1)) + Expect((*rs.BackendGalleries)[0].Name).To(Equal("backend-gallery")) + + Expect(rs.AutoloadGalleries).ToNot(BeNil()) + Expect(*rs.AutoloadGalleries).To(BeTrue()) + + Expect(rs.AutoloadBackendGalleries).ToNot(BeNil()) + Expect(*rs.AutoloadBackendGalleries).To(BeTrue()) + + Expect(rs.ApiKeys).ToNot(BeNil()) + Expect(*rs.ApiKeys).To(HaveLen(2)) + Expect(*rs.ApiKeys).To(ContainElements("key1", "key2")) + + Expect(rs.AgentJobRetentionDays).ToNot(BeNil()) + Expect(*rs.AgentJobRetentionDays).To(Equal(30)) + }) + + It("should use default timeouts when not set", func() { + appConfig := &ApplicationConfig{} + + rs := appConfig.ToRuntimeSettings() + + Expect(rs.WatchdogIdleTimeout).ToNot(BeNil()) + Expect(*rs.WatchdogIdleTimeout).To(Equal("15m")) + + Expect(rs.WatchdogBusyTimeout).ToNot(BeNil()) + Expect(*rs.WatchdogBusyTimeout).To(Equal("5m")) + }) + }) + + Describe("ApplyRuntimeSettings", func() { + It("should return false when settings is nil", func() { + appConfig := &ApplicationConfig{} + changed := appConfig.ApplyRuntimeSettings(nil) + Expect(changed).To(BeFalse()) + }) + + It("should only apply non-nil fields", func() { + appConfig := &ApplicationConfig{ + WatchDog: false, + Threads: 4, + ContextSize: 2048, + } + + watchdogEnabled := true + rs := &RuntimeSettings{ + WatchdogEnabled: &watchdogEnabled, + // Leave other fields nil + } + + changed := appConfig.ApplyRuntimeSettings(rs) + + Expect(changed).To(BeTrue()) + Expect(appConfig.WatchDog).To(BeTrue()) + // Unchanged fields should remain + Expect(appConfig.Threads).To(Equal(4)) + Expect(appConfig.ContextSize).To(Equal(2048)) + }) + + It("should apply watchdog settings and return changed=true", func() { + appConfig := &ApplicationConfig{} + + watchdogEnabled := true + watchdogIdle := true + watchdogBusy := true + idleTimeout := "30m" + busyTimeout := "15m" + + rs := &RuntimeSettings{ + WatchdogEnabled: &watchdogEnabled, + WatchdogIdleEnabled: &watchdogIdle, + WatchdogBusyEnabled: &watchdogBusy, + WatchdogIdleTimeout: &idleTimeout, + WatchdogBusyTimeout: &busyTimeout, + } + + changed := appConfig.ApplyRuntimeSettings(rs) + + Expect(changed).To(BeTrue()) + Expect(appConfig.WatchDog).To(BeTrue()) + Expect(appConfig.WatchDogIdle).To(BeTrue()) + Expect(appConfig.WatchDogBusy).To(BeTrue()) + Expect(appConfig.WatchDogIdleTimeout).To(Equal(30 * time.Minute)) + Expect(appConfig.WatchDogBusyTimeout).To(Equal(15 * time.Minute)) + }) + + It("should enable watchdog when idle is enabled", func() { + appConfig := &ApplicationConfig{WatchDog: false} + + watchdogIdle := true + rs := &RuntimeSettings{ + WatchdogIdleEnabled: &watchdogIdle, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.WatchDog).To(BeTrue()) + Expect(appConfig.WatchDogIdle).To(BeTrue()) + }) + + It("should enable watchdog when busy is enabled", func() { + appConfig := &ApplicationConfig{WatchDog: false} + + watchdogBusy := true + rs := &RuntimeSettings{ + WatchdogBusyEnabled: &watchdogBusy, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.WatchDog).To(BeTrue()) + Expect(appConfig.WatchDogBusy).To(BeTrue()) + }) + + It("should handle MaxActiveBackends and update SingleBackend accordingly", func() { + appConfig := &ApplicationConfig{} + + maxBackends := 1 + rs := &RuntimeSettings{ + MaxActiveBackends: &maxBackends, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.MaxActiveBackends).To(Equal(1)) + Expect(appConfig.SingleBackend).To(BeTrue()) + + // Test with multiple backends + maxBackends = 5 + rs = &RuntimeSettings{ + MaxActiveBackends: &maxBackends, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.MaxActiveBackends).To(Equal(5)) + Expect(appConfig.SingleBackend).To(BeFalse()) + }) + + It("should handle SingleBackend and update MaxActiveBackends accordingly", func() { + appConfig := &ApplicationConfig{} + + singleBackend := true + rs := &RuntimeSettings{ + SingleBackend: &singleBackend, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.SingleBackend).To(BeTrue()) + Expect(appConfig.MaxActiveBackends).To(Equal(1)) + + // Test disabling single backend + singleBackend = false + rs = &RuntimeSettings{ + SingleBackend: &singleBackend, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.SingleBackend).To(BeFalse()) + Expect(appConfig.MaxActiveBackends).To(Equal(0)) + }) + + It("should enable watchdog when memory reclaimer is enabled", func() { + appConfig := &ApplicationConfig{WatchDog: false} + + memoryEnabled := true + threshold := 0.90 + rs := &RuntimeSettings{ + MemoryReclaimerEnabled: &memoryEnabled, + MemoryReclaimerThreshold: &threshold, + } + + changed := appConfig.ApplyRuntimeSettings(rs) + + Expect(changed).To(BeTrue()) + Expect(appConfig.WatchDog).To(BeTrue()) + Expect(appConfig.MemoryReclaimerEnabled).To(BeTrue()) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.90)) + }) + + It("should reject invalid memory threshold values", func() { + appConfig := &ApplicationConfig{MemoryReclaimerThreshold: 0.50} + + // Test threshold > 1.0 + invalidThreshold := 1.5 + rs := &RuntimeSettings{ + MemoryReclaimerThreshold: &invalidThreshold, + } + appConfig.ApplyRuntimeSettings(rs) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged + + // Test threshold <= 0 + invalidThreshold = 0.0 + rs = &RuntimeSettings{ + MemoryReclaimerThreshold: &invalidThreshold, + } + appConfig.ApplyRuntimeSettings(rs) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged + + // Test negative threshold + invalidThreshold = -0.5 + rs = &RuntimeSettings{ + MemoryReclaimerThreshold: &invalidThreshold, + } + appConfig.ApplyRuntimeSettings(rs) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged + }) + + It("should accept valid memory threshold at boundary", func() { + appConfig := &ApplicationConfig{} + + // Test threshold = 1.0 (maximum valid) + threshold := 1.0 + rs := &RuntimeSettings{ + MemoryReclaimerThreshold: &threshold, + } + appConfig.ApplyRuntimeSettings(rs) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(1.0)) + + // Test threshold just above 0 + threshold = 0.01 + rs = &RuntimeSettings{ + MemoryReclaimerThreshold: &threshold, + } + appConfig.ApplyRuntimeSettings(rs) + Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.01)) + }) + + It("should apply performance settings without triggering watchdog change", func() { + appConfig := &ApplicationConfig{} + + threads := 16 + contextSize := 8192 + f16 := true + debug := true + + rs := &RuntimeSettings{ + Threads: &threads, + ContextSize: &contextSize, + F16: &f16, + Debug: &debug, + } + + changed := appConfig.ApplyRuntimeSettings(rs) + + // These settings don't require watchdog restart + Expect(changed).To(BeFalse()) + Expect(appConfig.Threads).To(Equal(16)) + Expect(appConfig.ContextSize).To(Equal(8192)) + Expect(appConfig.F16).To(BeTrue()) + Expect(appConfig.Debug).To(BeTrue()) + }) + + It("should apply CORS and security settings", func() { + appConfig := &ApplicationConfig{} + + cors := true + csrf := true + origins := "https://example.com,https://other.com" + + rs := &RuntimeSettings{ + CORS: &cors, + CSRF: &csrf, + CORSAllowOrigins: &origins, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.CORS).To(BeTrue()) + Expect(appConfig.CSRF).To(BeTrue()) + Expect(appConfig.CORSAllowOrigins).To(Equal("https://example.com,https://other.com")) + }) + + It("should apply P2P settings", func() { + appConfig := &ApplicationConfig{} + + token := "p2p-test-token" + networkID := "p2p-test-network" + federated := true + + rs := &RuntimeSettings{ + P2PToken: &token, + P2PNetworkID: &networkID, + Federated: &federated, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.P2PToken).To(Equal("p2p-test-token")) + Expect(appConfig.P2PNetworkID).To(Equal("p2p-test-network")) + Expect(appConfig.Federated).To(BeTrue()) + }) + + It("should apply gallery settings", func() { + appConfig := &ApplicationConfig{} + + galleries := []Gallery{ + {Name: "gallery1", URL: "https://gallery1.com"}, + {Name: "gallery2", URL: "https://gallery2.com"}, + } + backendGalleries := []Gallery{ + {Name: "backend-gallery", URL: "https://backend.com"}, + } + autoload := true + autoloadBackend := true + + rs := &RuntimeSettings{ + Galleries: &galleries, + BackendGalleries: &backendGalleries, + AutoloadGalleries: &autoload, + AutoloadBackendGalleries: &autoloadBackend, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.Galleries).To(HaveLen(2)) + Expect(appConfig.Galleries[0].Name).To(Equal("gallery1")) + Expect(appConfig.BackendGalleries).To(HaveLen(1)) + Expect(appConfig.AutoloadGalleries).To(BeTrue()) + Expect(appConfig.AutoloadBackendGalleries).To(BeTrue()) + }) + + It("should apply agent settings", func() { + appConfig := &ApplicationConfig{} + + retentionDays := 14 + + rs := &RuntimeSettings{ + AgentJobRetentionDays: &retentionDays, + } + + appConfig.ApplyRuntimeSettings(rs) + + Expect(appConfig.AgentJobRetentionDays).To(Equal(14)) + }) + }) + + Describe("Round-trip conversion", func() { + It("should maintain values through ToRuntimeSettings -> ApplyRuntimeSettings", func() { + original := &ApplicationConfig{ + WatchDog: true, + WatchDogIdle: true, + WatchDogBusy: false, + WatchDogIdleTimeout: 25 * time.Minute, + WatchDogBusyTimeout: 12 * time.Minute, + SingleBackend: false, + MaxActiveBackends: 3, + ParallelBackendRequests: true, + MemoryReclaimerEnabled: true, + MemoryReclaimerThreshold: 0.92, + Threads: 12, + ContextSize: 6144, + F16: true, + Debug: false, + CORS: true, + CSRF: false, + CORSAllowOrigins: "https://test.com", + P2PToken: "round-trip-token", + P2PNetworkID: "round-trip-network", + Federated: true, + AutoloadGalleries: true, + AutoloadBackendGalleries: false, + AgentJobRetentionDays: 60, + } + + // Convert to RuntimeSettings + rs := original.ToRuntimeSettings() + + // Apply to a new ApplicationConfig + target := &ApplicationConfig{} + target.ApplyRuntimeSettings(&rs) + + // Verify all values match + Expect(target.WatchDog).To(Equal(original.WatchDog)) + Expect(target.WatchDogIdle).To(Equal(original.WatchDogIdle)) + Expect(target.WatchDogBusy).To(Equal(original.WatchDogBusy)) + Expect(target.WatchDogIdleTimeout).To(Equal(original.WatchDogIdleTimeout)) + Expect(target.WatchDogBusyTimeout).To(Equal(original.WatchDogBusyTimeout)) + Expect(target.MaxActiveBackends).To(Equal(original.MaxActiveBackends)) + Expect(target.ParallelBackendRequests).To(Equal(original.ParallelBackendRequests)) + Expect(target.MemoryReclaimerEnabled).To(Equal(original.MemoryReclaimerEnabled)) + Expect(target.MemoryReclaimerThreshold).To(Equal(original.MemoryReclaimerThreshold)) + Expect(target.Threads).To(Equal(original.Threads)) + Expect(target.ContextSize).To(Equal(original.ContextSize)) + Expect(target.F16).To(Equal(original.F16)) + Expect(target.Debug).To(Equal(original.Debug)) + Expect(target.CORS).To(Equal(original.CORS)) + Expect(target.CSRF).To(Equal(original.CSRF)) + Expect(target.CORSAllowOrigins).To(Equal(original.CORSAllowOrigins)) + Expect(target.P2PToken).To(Equal(original.P2PToken)) + Expect(target.P2PNetworkID).To(Equal(original.P2PNetworkID)) + Expect(target.Federated).To(Equal(original.Federated)) + Expect(target.AutoloadGalleries).To(Equal(original.AutoloadGalleries)) + Expect(target.AutoloadBackendGalleries).To(Equal(original.AutoloadBackendGalleries)) + Expect(target.AgentJobRetentionDays).To(Equal(original.AgentJobRetentionDays)) + }) + + It("should handle empty galleries correctly in round-trip", func() { + original := &ApplicationConfig{ + Galleries: []Gallery{}, + BackendGalleries: []Gallery{}, + ApiKeys: []string{}, + } + + rs := original.ToRuntimeSettings() + target := &ApplicationConfig{} + target.ApplyRuntimeSettings(&rs) + + Expect(target.Galleries).To(BeEmpty()) + Expect(target.BackendGalleries).To(BeEmpty()) + }) + }) + + Describe("Edge cases", func() { + It("should handle invalid timeout string in ApplyRuntimeSettings", func() { + appConfig := &ApplicationConfig{ + WatchDogIdleTimeout: 10 * time.Minute, + } + + invalidTimeout := "not-a-duration" + rs := &RuntimeSettings{ + WatchdogIdleTimeout: &invalidTimeout, + } + + appConfig.ApplyRuntimeSettings(rs) + + // Should remain unchanged due to parse error + Expect(appConfig.WatchDogIdleTimeout).To(Equal(10 * time.Minute)) + }) + + It("should handle zero values in ApplicationConfig", func() { + appConfig := &ApplicationConfig{ + // All zero values + } + + rs := appConfig.ToRuntimeSettings() + + // Should still have non-nil pointers with zero/default values + Expect(rs.WatchdogEnabled).ToNot(BeNil()) + Expect(*rs.WatchdogEnabled).To(BeFalse()) + + Expect(rs.Threads).ToNot(BeNil()) + Expect(*rs.Threads).To(Equal(0)) + + Expect(rs.MemoryReclaimerThreshold).ToNot(BeNil()) + Expect(*rs.MemoryReclaimerThreshold).To(Equal(0.0)) + }) + + It("should prefer MaxActiveBackends over SingleBackend when both are set", func() { + appConfig := &ApplicationConfig{} + + maxBackends := 3 + singleBackend := true + + rs := &RuntimeSettings{ + MaxActiveBackends: &maxBackends, + SingleBackend: &singleBackend, + } + + appConfig.ApplyRuntimeSettings(rs) + + // MaxActiveBackends should take precedence + Expect(appConfig.MaxActiveBackends).To(Equal(3)) + Expect(appConfig.SingleBackend).To(BeFalse()) // 3 != 1, so single backend is false + }) + }) +}) diff --git a/core/config/runtime_settings.go b/core/config/runtime_settings.go new file mode 100644 index 000000000000..c02d4fcd7c20 --- /dev/null +++ b/core/config/runtime_settings.go @@ -0,0 +1,56 @@ +package config + +// RuntimeSettings represents runtime configuration that can be changed dynamically. +// This struct is used for: +// - API responses (GET /api/settings) +// - API requests (POST /api/settings) +// - Persisting to runtime_settings.json +// - Loading from runtime_settings.json on startup +// +// All fields are pointers to distinguish between "not set" and "set to zero/false value". +type RuntimeSettings struct { + // Watchdog settings + WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` + WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` + WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` + WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` + WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` + WatchdogInterval *string `json:"watchdog_interval,omitempty"` // Interval between watchdog checks (e.g., 2s, 30s) + + // Backend management + SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead + MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode) + ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` + + // Memory Reclaimer settings (works with GPU if available, otherwise RAM) + MemoryReclaimerEnabled *bool `json:"memory_reclaimer_enabled,omitempty"` // Enable memory threshold monitoring + MemoryReclaimerThreshold *float64 `json:"memory_reclaimer_threshold,omitempty"` // Threshold 0.0-1.0 (e.g., 0.95 = 95%) + + // Performance settings + Threads *int `json:"threads,omitempty"` + ContextSize *int `json:"context_size,omitempty"` + F16 *bool `json:"f16,omitempty"` + Debug *bool `json:"debug,omitempty"` + + // Security/CORS settings + CORS *bool `json:"cors,omitempty"` + CSRF *bool `json:"csrf,omitempty"` + CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"` + + // P2P settings + P2PToken *string `json:"p2p_token,omitempty"` + P2PNetworkID *string `json:"p2p_network_id,omitempty"` + Federated *bool `json:"federated,omitempty"` + + // Gallery settings + Galleries *[]Gallery `json:"galleries,omitempty"` + BackendGalleries *[]Gallery `json:"backend_galleries,omitempty"` + AutoloadGalleries *bool `json:"autoload_galleries,omitempty"` + AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"` + + // API keys - No omitempty as we need to save empty arrays to clear keys + ApiKeys *[]string `json:"api_keys"` + + // Agent settings + AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"` +} diff --git a/core/http/endpoints/localai/settings.go b/core/http/endpoints/localai/settings.go index dee77646ed62..1cc7666e02c7 100644 --- a/core/http/endpoints/localai/settings.go +++ b/core/http/endpoints/localai/settings.go @@ -12,115 +12,15 @@ import ( "github.com/mudler/LocalAI/core/application" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/p2p" + "github.com/mudler/LocalAI/core/schema" "github.com/rs/zerolog/log" ) -type SettingsResponse struct { - Success bool `json:"success"` - Error string `json:"error,omitempty"` - Message string `json:"message,omitempty"` -} - -type RuntimeSettings struct { - WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` - WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` - WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` - WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` - WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` - SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead - MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode) - ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` - Threads *int `json:"threads,omitempty"` - ContextSize *int `json:"context_size,omitempty"` - F16 *bool `json:"f16,omitempty"` - Debug *bool `json:"debug,omitempty"` - CORS *bool `json:"cors,omitempty"` - CSRF *bool `json:"csrf,omitempty"` - CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"` - P2PToken *string `json:"p2p_token,omitempty"` - P2PNetworkID *string `json:"p2p_network_id,omitempty"` - Federated *bool `json:"federated,omitempty"` - Galleries *[]config.Gallery `json:"galleries,omitempty"` - BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"` - AutoloadGalleries *bool `json:"autoload_galleries,omitempty"` - AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"` - ApiKeys *[]string `json:"api_keys"` // No omitempty - we need to save empty arrays to clear keys - AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"` -} - // GetSettingsEndpoint returns current settings with precedence (env > file > defaults) func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { appConfig := app.ApplicationConfig() - startupConfig := app.StartupConfig() - - if startupConfig == nil { - // Fallback if startup config not available - startupConfig = appConfig - } - - settings := RuntimeSettings{} - - // Set all current values (using pointers for RuntimeSettings) - watchdogIdle := appConfig.WatchDogIdle - watchdogBusy := appConfig.WatchDogBusy - watchdogEnabled := appConfig.WatchDog - singleBackend := appConfig.SingleBackend - maxActiveBackends := appConfig.MaxActiveBackends - parallelBackendRequests := appConfig.ParallelBackendRequests - threads := appConfig.Threads - contextSize := appConfig.ContextSize - f16 := appConfig.F16 - debug := appConfig.Debug - cors := appConfig.CORS - csrf := appConfig.CSRF - corsAllowOrigins := appConfig.CORSAllowOrigins - p2pToken := appConfig.P2PToken - p2pNetworkID := appConfig.P2PNetworkID - federated := appConfig.Federated - galleries := appConfig.Galleries - backendGalleries := appConfig.BackendGalleries - autoloadGalleries := appConfig.AutoloadGalleries - autoloadBackendGalleries := appConfig.AutoloadBackendGalleries - apiKeys := appConfig.ApiKeys - agentJobRetentionDays := appConfig.AgentJobRetentionDays - - settings.WatchdogIdleEnabled = &watchdogIdle - settings.WatchdogBusyEnabled = &watchdogBusy - settings.WatchdogEnabled = &watchdogEnabled - settings.SingleBackend = &singleBackend - settings.MaxActiveBackends = &maxActiveBackends - settings.ParallelBackendRequests = ¶llelBackendRequests - settings.Threads = &threads - settings.ContextSize = &contextSize - settings.F16 = &f16 - settings.Debug = &debug - settings.CORS = &cors - settings.CSRF = &csrf - settings.CORSAllowOrigins = &corsAllowOrigins - settings.P2PToken = &p2pToken - settings.P2PNetworkID = &p2pNetworkID - settings.Federated = &federated - settings.Galleries = &galleries - settings.BackendGalleries = &backendGalleries - settings.AutoloadGalleries = &autoloadGalleries - settings.AutoloadBackendGalleries = &autoloadBackendGalleries - settings.ApiKeys = &apiKeys - settings.AgentJobRetentionDays = &agentJobRetentionDays - - var idleTimeout, busyTimeout string - if appConfig.WatchDogIdleTimeout > 0 { - idleTimeout = appConfig.WatchDogIdleTimeout.String() - } else { - idleTimeout = "15m" // default - } - if appConfig.WatchDogBusyTimeout > 0 { - busyTimeout = appConfig.WatchDogBusyTimeout.String() - } else { - busyTimeout = "5m" // default - } - settings.WatchdogIdleTimeout = &idleTimeout - settings.WatchdogBusyTimeout = &busyTimeout + settings := appConfig.ToRuntimeSettings() return c.JSON(http.StatusOK, settings) } } @@ -132,21 +32,20 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { startupConfig := app.StartupConfig() if startupConfig == nil { - // Fallback if startup config not available startupConfig = appConfig } body, err := io.ReadAll(c.Request().Body) if err != nil { - return c.JSON(http.StatusBadRequest, SettingsResponse{ + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ Success: false, Error: "Failed to read request body: " + err.Error(), }) } - var settings RuntimeSettings + var settings config.RuntimeSettings if err := json.Unmarshal(body, &settings); err != nil { - return c.JSON(http.StatusBadRequest, SettingsResponse{ + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ Success: false, Error: "Failed to parse JSON: " + err.Error(), }) @@ -154,27 +53,33 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { // Validate timeouts if provided if settings.WatchdogIdleTimeout != nil { - _, err := time.ParseDuration(*settings.WatchdogIdleTimeout) - if err != nil { - return c.JSON(http.StatusBadRequest, SettingsResponse{ + if _, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err != nil { + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ Success: false, Error: "Invalid watchdog_idle_timeout format: " + err.Error(), }) } } if settings.WatchdogBusyTimeout != nil { - _, err := time.ParseDuration(*settings.WatchdogBusyTimeout) - if err != nil { - return c.JSON(http.StatusBadRequest, SettingsResponse{ + if _, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err != nil { + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ Success: false, Error: "Invalid watchdog_busy_timeout format: " + err.Error(), }) } } + if settings.WatchdogInterval != nil { + if _, err := time.ParseDuration(*settings.WatchdogInterval); err != nil { + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ + Success: false, + Error: "Invalid watchdog_interval format: " + err.Error(), + }) + } + } // Save to file if appConfig.DynamicConfigsDir == "" { - return c.JSON(http.StatusBadRequest, SettingsResponse{ + return c.JSON(http.StatusBadRequest, schema.SettingsResponse{ Success: false, Error: "DynamicConfigsDir is not set", }) @@ -183,133 +88,38 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json") settingsJSON, err := json.MarshalIndent(settings, "", " ") if err != nil { - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Failed to marshal settings: " + err.Error(), }) } if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil { - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Failed to write settings file: " + err.Error(), }) } - // Apply settings immediately, checking env var overrides per field - watchdogChanged := false - if settings.WatchdogEnabled != nil { - appConfig.WatchDog = *settings.WatchdogEnabled - watchdogChanged = true - } - if settings.WatchdogIdleEnabled != nil { - appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled - if appConfig.WatchDogIdle { - appConfig.WatchDog = true - } - watchdogChanged = true - } - if settings.WatchdogBusyEnabled != nil { - appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled - if appConfig.WatchDogBusy { - appConfig.WatchDog = true - } - watchdogChanged = true - } - if settings.WatchdogIdleTimeout != nil { - dur, _ := time.ParseDuration(*settings.WatchdogIdleTimeout) - appConfig.WatchDogIdleTimeout = dur - watchdogChanged = true - } - if settings.WatchdogBusyTimeout != nil { - dur, _ := time.ParseDuration(*settings.WatchdogBusyTimeout) - appConfig.WatchDogBusyTimeout = dur - watchdogChanged = true - } - if settings.MaxActiveBackends != nil { - appConfig.MaxActiveBackends = *settings.MaxActiveBackends - // For backward compatibility, update SingleBackend too - appConfig.SingleBackend = (*settings.MaxActiveBackends == 1) - watchdogChanged = true // LRU limit is managed by watchdog - } else if settings.SingleBackend != nil { - // Legacy support: SingleBackend maps to MaxActiveBackends = 1 - appConfig.SingleBackend = *settings.SingleBackend - if *settings.SingleBackend { - appConfig.MaxActiveBackends = 1 - } else { - appConfig.MaxActiveBackends = 0 - } - watchdogChanged = true // LRU limit is managed by watchdog - } - if settings.ParallelBackendRequests != nil { - appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests - } - if settings.Threads != nil { - appConfig.Threads = *settings.Threads - } - if settings.ContextSize != nil { - appConfig.ContextSize = *settings.ContextSize - } - if settings.F16 != nil { - appConfig.F16 = *settings.F16 - } - if settings.Debug != nil { - appConfig.Debug = *settings.Debug - } - if settings.CORS != nil { - appConfig.CORS = *settings.CORS - } - if settings.CSRF != nil { - appConfig.CSRF = *settings.CSRF - } - if settings.CORSAllowOrigins != nil { - appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins - } - if settings.P2PToken != nil { - appConfig.P2PToken = *settings.P2PToken - } - if settings.P2PNetworkID != nil { - appConfig.P2PNetworkID = *settings.P2PNetworkID - } - if settings.Federated != nil { - appConfig.Federated = *settings.Federated - } - if settings.Galleries != nil { - appConfig.Galleries = *settings.Galleries - } - if settings.BackendGalleries != nil { - appConfig.BackendGalleries = *settings.BackendGalleries - } - if settings.AutoloadGalleries != nil { - appConfig.AutoloadGalleries = *settings.AutoloadGalleries - } - if settings.AutoloadBackendGalleries != nil { - appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries - } - agentJobChanged := false - if settings.AgentJobRetentionDays != nil { - appConfig.AgentJobRetentionDays = *settings.AgentJobRetentionDays - agentJobChanged = true - } + // Apply settings using centralized method + watchdogChanged := appConfig.ApplyRuntimeSettings(&settings) + + // Handle API keys specially (merge with startup keys) if settings.ApiKeys != nil { - // API keys from env vars (startup) should be kept, runtime settings keys are added - // Combine startup keys (env vars) with runtime settings keys envKeys := startupConfig.ApiKeys runtimeKeys := *settings.ApiKeys - // Merge: env keys first (they take precedence), then runtime keys appConfig.ApiKeys = append(envKeys, runtimeKeys...) - - // Note: We only save to runtime_settings.json (not api_keys.json) to avoid duplication - // The runtime_settings.json is the unified config file. If api_keys.json exists, - // it will be loaded first, but runtime_settings.json takes precedence and deduplicates. } + // Check if agent job retention changed + agentJobChanged := settings.AgentJobRetentionDays != nil + // Restart watchdog if settings changed if watchdogChanged { - if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled || settings.WatchdogEnabled == nil { + if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled { if err := app.StopWatchdog(); err != nil { log.Error().Err(err).Msg("Failed to stop watchdog") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Settings saved but failed to stop watchdog: " + err.Error(), }) @@ -317,7 +127,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { } else { if err := app.RestartWatchdog(); err != nil { log.Error().Err(err).Msg("Failed to restart watchdog") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Settings saved but failed to restart watchdog: " + err.Error(), }) @@ -329,7 +139,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { if agentJobChanged { if err := app.RestartAgentJobService(); err != nil { log.Error().Err(err).Msg("Failed to restart agent job service") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Settings saved but failed to restart agent job service: " + err.Error(), }) @@ -340,33 +150,30 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil if p2pChanged { if settings.P2PToken != nil && *settings.P2PToken == "" { - // stop P2P if err := app.StopP2P(); err != nil { log.Error().Err(err).Msg("Failed to stop P2P") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, Error: "Settings saved but failed to stop P2P: " + err.Error(), }) } } else { if settings.P2PToken != nil && *settings.P2PToken == "0" { - // generate a token if users sets 0 (disabled) token := p2p.GenerateToken(60, 60) settings.P2PToken = &token appConfig.P2PToken = token } - // Stop existing P2P if err := app.RestartP2P(); err != nil { - log.Error().Err(err).Msg("Failed to stop P2P") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ + log.Error().Err(err).Msg("Failed to restart P2P") + return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{ Success: false, - Error: "Settings saved but failed to stop P2P: " + err.Error(), + Error: "Settings saved but failed to restart P2P: " + err.Error(), }) } } } - return c.JSON(http.StatusOK, SettingsResponse{ + return c.JSON(http.StatusOK, schema.SettingsResponse{ Success: true, Message: "Settings updated successfully", }) diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index 9287b31742f9..ae6f868aa2d5 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -19,6 +19,7 @@ import ( "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/xsysinfo" "github.com/rs/zerolog/log" ) @@ -917,6 +918,30 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model }) }) + // Resources API endpoint - unified memory info (GPU if available, otherwise RAM) + app.GET("/api/resources", func(c echo.Context) error { + resourceInfo := xsysinfo.GetResourceInfo() + + // Format watchdog interval + watchdogInterval := "2s" // default + if appConfig.WatchDogInterval > 0 { + watchdogInterval = appConfig.WatchDogInterval.String() + } + + response := map[string]interface{}{ + "type": resourceInfo.Type, // "gpu" or "ram" + "available": resourceInfo.Available, + "gpus": resourceInfo.GPUs, + "ram": resourceInfo.RAM, + "aggregate": resourceInfo.Aggregate, + "reclaimer_enabled": appConfig.MemoryReclaimerEnabled, + "reclaimer_threshold": appConfig.MemoryReclaimerThreshold, + "watchdog_interval": watchdogInterval, + } + + return c.JSON(200, response) + }) + if !appConfig.DisableRuntimeSettings { // Settings API app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance)) diff --git a/core/http/views/index.html b/core/http/views/index.html index 598a7b0adc67..f5222c3639c0 100644 --- a/core/http/views/index.html +++ b/core/http/views/index.html @@ -462,6 +462,27 @@