diff --git a/core/application/config_file_watcher.go b/core/application/config_file_watcher.go
index 30b3e5ad6f62..4a19cc128d96 100644
--- a/core/application/config_file_watcher.go
+++ b/core/application/config_file_watcher.go
@@ -185,33 +185,6 @@ func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHan
 	return handler
 }
 
-type runtimeSettings struct {
-	WatchdogEnabled          *bool             `json:"watchdog_enabled,omitempty"`
-	WatchdogIdleEnabled      *bool             `json:"watchdog_idle_enabled,omitempty"`
-	WatchdogBusyEnabled      *bool             `json:"watchdog_busy_enabled,omitempty"`
-	WatchdogIdleTimeout      *string           `json:"watchdog_idle_timeout,omitempty"`
-	WatchdogBusyTimeout      *string           `json:"watchdog_busy_timeout,omitempty"`
-	SingleBackend            *bool             `json:"single_backend,omitempty"`      // Deprecated: use MaxActiveBackends = 1 instead
-	MaxActiveBackends        *int              `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
-	ParallelBackendRequests  *bool             `json:"parallel_backend_requests,omitempty"`
-	Threads                  *int              `json:"threads,omitempty"`
-	ContextSize              *int              `json:"context_size,omitempty"`
-	F16                      *bool             `json:"f16,omitempty"`
-	Debug                    *bool             `json:"debug,omitempty"`
-	CORS                     *bool             `json:"cors,omitempty"`
-	CSRF                     *bool             `json:"csrf,omitempty"`
-	CORSAllowOrigins         *string           `json:"cors_allow_origins,omitempty"`
-	P2PToken                 *string           `json:"p2p_token,omitempty"`
-	P2PNetworkID             *string           `json:"p2p_network_id,omitempty"`
-	Federated                *bool             `json:"federated,omitempty"`
-	Galleries                *[]config.Gallery `json:"galleries,omitempty"`
-	BackendGalleries         *[]config.Gallery `json:"backend_galleries,omitempty"`
-	AutoloadGalleries        *bool             `json:"autoload_galleries,omitempty"`
-	AutoloadBackendGalleries *bool             `json:"autoload_backend_galleries,omitempty"`
-	ApiKeys                  *[]string         `json:"api_keys,omitempty"`
-	AgentJobRetentionDays    *int              `json:"agent_job_retention_days,omitempty"`
-}
-
 func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler {
 	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
 		log.Debug().Msg("processing runtime_settings.json")
@@ -227,6 +200,8 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
 		envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
 		envMaxActiveBackends := appConfig.MaxActiveBackends == startupAppConfig.MaxActiveBackends
 		envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests
+		envMemoryReclaimerEnabled := appConfig.MemoryReclaimerEnabled == startupAppConfig.MemoryReclaimerEnabled
+		envMemoryReclaimerThreshold := appConfig.MemoryReclaimerThreshold == startupAppConfig.MemoryReclaimerThreshold
 		envThreads := appConfig.Threads == startupAppConfig.Threads
 		envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize
 		envF16 := appConfig.F16 == startupAppConfig.F16
@@ -242,7 +217,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
 		envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
 
 		if len(fileContent) > 0 {
-			var settings runtimeSettings
+			var settings config.RuntimeSettings
 			err := json.Unmarshal(fileContent, &settings)
 			if err != nil {
 				return err
@@ -294,6 +269,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
 			if settings.ParallelBackendRequests != nil && !envParallelRequests {
 				appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
 			}
+			if settings.MemoryReclaimerEnabled != nil && !envMemoryReclaimerEnabled {
+				appConfig.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
+				if appConfig.MemoryReclaimerEnabled {
+					appConfig.WatchDog = true // Memory reclaimer requires watchdog
+				}
+			}
+			if settings.MemoryReclaimerThreshold != nil && !envMemoryReclaimerThreshold {
+				appConfig.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
+			}
 			if settings.Threads != nil && !envThreads {
 				appConfig.Threads = *settings.Threads
 			}
diff --git a/core/application/startup.go b/core/application/startup.go
index 3a238655d28c..d5e06c4e2b94 100644
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -218,17 +218,7 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
 		return
 	}
 
-	var settings struct {
-		WatchdogEnabled         *bool   `json:"watchdog_enabled,omitempty"`
-		WatchdogIdleEnabled     *bool   `json:"watchdog_idle_enabled,omitempty"`
-		WatchdogBusyEnabled     *bool   `json:"watchdog_busy_enabled,omitempty"`
-		WatchdogIdleTimeout     *string `json:"watchdog_idle_timeout,omitempty"`
-		WatchdogBusyTimeout     *string `json:"watchdog_busy_timeout,omitempty"`
-		SingleBackend           *bool   `json:"single_backend,omitempty"`      // Deprecated: use MaxActiveBackends = 1 instead
-		MaxActiveBackends       *int    `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited)
-		ParallelBackendRequests *bool   `json:"parallel_backend_requests,omitempty"`
-		AgentJobRetentionDays   *int    `json:"agent_job_retention_days,omitempty"`
-	}
+	var settings config.RuntimeSettings
 
 	if err := json.Unmarshal(fileContent, &settings); err != nil {
 		log.Warn().Err(err).Msg("failed to parse runtime_settings.json")
@@ -281,6 +271,16 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
 			}
 		}
 	}
+	if settings.WatchdogInterval != nil {
+		if options.WatchDogInterval == 0 {
+			dur, err := time.ParseDuration(*settings.WatchdogInterval)
+			if err == nil {
+				options.WatchDogInterval = dur
+			} else {
+				log.Warn().Err(err).Str("interval", *settings.WatchdogInterval).Msg("invalid watchdog interval in runtime_settings.json")
+			}
+		}
+	}
 	// Handle MaxActiveBackends (new) and SingleBackend (deprecated)
 	if settings.MaxActiveBackends != nil {
 		// Only apply if current value is default (0), suggesting it wasn't set from env var
@@ -303,6 +303,21 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
 			options.ParallelBackendRequests = *settings.ParallelBackendRequests
 		}
 	}
+	if settings.MemoryReclaimerEnabled != nil {
+		// Only apply if current value is default (false), suggesting it wasn't set from env var
+		if !options.MemoryReclaimerEnabled {
+			options.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
+			if options.MemoryReclaimerEnabled {
+				options.WatchDog = true // Memory reclaimer requires watchdog
+			}
+		}
+	}
+	if settings.MemoryReclaimerThreshold != nil {
+		// Only apply if current value is default (0), suggesting it wasn't set from env var
+		if options.MemoryReclaimerThreshold == 0 {
+			options.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
+		}
+	}
 	if settings.AgentJobRetentionDays != nil {
 		// Only apply if current value is default (0), suggesting it wasn't set from env var
 		if options.AgentJobRetentionDays == 0 {
@@ -323,19 +338,24 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon
 	// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
 	lruLimit := options.GetEffectiveMaxActiveBackends()
 
-	// Create watchdog if enabled OR if LRU limit is set
-	if options.WatchDog || lruLimit > 0 {
+	// Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled
+	if options.WatchDog || lruLimit > 0 || options.MemoryReclaimerEnabled {
 		wd := model.NewWatchDog(
-			application.ModelLoader(),
-			options.WatchDogBusyTimeout,
-			options.WatchDogIdleTimeout,
-			options.WatchDogBusy,
-			options.WatchDogIdle,
-			lruLimit)
+			model.WithProcessManager(application.ModelLoader()),
+			model.WithBusyTimeout(options.WatchDogBusyTimeout),
+			model.WithIdleTimeout(options.WatchDogIdleTimeout),
+			model.WithWatchdogInterval(options.WatchDogInterval),
+			model.WithBusyCheck(options.WatchDogBusy),
+			model.WithIdleCheck(options.WatchDogIdle),
+			model.WithLRULimit(lruLimit),
+			model.WithMemoryReclaimer(options.MemoryReclaimerEnabled, options.MemoryReclaimerThreshold),
+		)
 		application.ModelLoader().SetWatchDog(wd)
 
-		// Start watchdog goroutine only if busy/idle checks are enabled
-		if options.WatchDogBusy || options.WatchDogIdle {
+		// Start watchdog goroutine if any periodic checks are enabled
+		// LRU eviction doesn't need the Run() loop - it's triggered on model load
+		// But memory reclaimer needs the Run() loop for periodic checking
+		if options.WatchDogBusy || options.WatchDogIdle || options.MemoryReclaimerEnabled {
 			go wd.Run()
 		}
 
diff --git a/core/application/watchdog.go b/core/application/watchdog.go
index e82ac28dcaef..bceb06e19c4b 100644
--- a/core/application/watchdog.go
+++ b/core/application/watchdog.go
@@ -23,24 +23,28 @@ func (a *Application) startWatchdog() error {
 	// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
 	lruLimit := appConfig.GetEffectiveMaxActiveBackends()
 
-	// Create watchdog if enabled OR if LRU limit is set
+	// Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled
 	// LRU eviction requires watchdog infrastructure even without busy/idle checks
-	if appConfig.WatchDog || lruLimit > 0 {
+	if appConfig.WatchDog || lruLimit > 0 || appConfig.MemoryReclaimerEnabled {
 		wd := model.NewWatchDog(
-			a.modelLoader,
-			appConfig.WatchDogBusyTimeout,
-			appConfig.WatchDogIdleTimeout,
-			appConfig.WatchDogBusy,
-			appConfig.WatchDogIdle,
-			lruLimit)
+			model.WithProcessManager(a.modelLoader),
+			model.WithBusyTimeout(appConfig.WatchDogBusyTimeout),
+			model.WithIdleTimeout(appConfig.WatchDogIdleTimeout),
+			model.WithWatchdogInterval(appConfig.WatchDogInterval),
+			model.WithBusyCheck(appConfig.WatchDogBusy),
+			model.WithIdleCheck(appConfig.WatchDogIdle),
+			model.WithLRULimit(lruLimit),
+			model.WithMemoryReclaimer(appConfig.MemoryReclaimerEnabled, appConfig.MemoryReclaimerThreshold),
+		)
 		a.modelLoader.SetWatchDog(wd)
 
 		// Create new stop channel
 		a.watchdogStop = make(chan bool, 1)
 
-		// Start watchdog goroutine only if busy/idle checks are enabled
+		// Start watchdog goroutine if any periodic checks are enabled
 		// LRU eviction doesn't need the Run() loop - it's triggered on model load
-		if appConfig.WatchDogBusy || appConfig.WatchDogIdle {
+		// But memory reclaimer needs the Run() loop for periodic checking
+		if appConfig.WatchDogBusy || appConfig.WatchDogIdle || appConfig.MemoryReclaimerEnabled {
 			go wd.Run()
 		}
 
@@ -56,7 +60,14 @@ func (a *Application) startWatchdog() error {
 			}
 		}()
 
-		log.Info().Int("lruLimit", lruLimit).Bool("busyCheck", appConfig.WatchDogBusy).Bool("idleCheck", appConfig.WatchDogIdle).Msg("Watchdog started with new settings")
+		log.Info().
+			Int("lruLimit", lruLimit).
+			Bool("busyCheck", appConfig.WatchDogBusy).
+			Bool("idleCheck", appConfig.WatchDogIdle).
+			Bool("memoryReclaimer", appConfig.MemoryReclaimerEnabled).
+			Float64("memoryThreshold", appConfig.MemoryReclaimerThreshold).
+			Dur("interval", appConfig.WatchDogInterval).
+			Msg("Watchdog started with new settings")
 	} else {
 		log.Info().Msg("Watchdog disabled")
 	}
diff --git a/core/cli/run.go b/core/cli/run.go
index 4df4fbdf3ba1..a37a19d3512c 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -72,6 +72,8 @@ type RunCMD struct {
 	WatchdogIdleTimeout                string   `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
 	EnableWatchdogBusy                 bool     `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
 	WatchdogBusyTimeout                string   `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
+	EnableMemoryReclaimer              bool     `env:"LOCALAI_MEMORY_RECLAIMER,MEMORY_RECLAIMER,LOCALAI_GPU_RECLAIMER,GPU_RECLAIMER" default:"false" help:"Enable memory threshold monitoring to auto-evict backends when memory usage exceeds threshold (uses GPU VRAM if available, otherwise RAM)" group:"backends"`
+	MemoryReclaimerThreshold           float64  `env:"LOCALAI_MEMORY_RECLAIMER_THRESHOLD,MEMORY_RECLAIMER_THRESHOLD,LOCALAI_GPU_RECLAIMER_THRESHOLD,GPU_RECLAIMER_THRESHOLD" default:"0.95" help:"Memory usage threshold (0.0-1.0) that triggers backend eviction (default 0.95 = 95%%)" group:"backends"`
 	Federated                          bool     `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
 	DisableGalleryEndpoint             bool     `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
 	MachineTag                         string   `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"`
@@ -200,6 +202,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 			opts = append(opts, config.SetWatchDogBusyTimeout(dur))
 		}
 	}
+
+	// Handle memory reclaimer (uses GPU VRAM if available, otherwise RAM)
+	if r.EnableMemoryReclaimer {
+		opts = append(opts, config.WithMemoryReclaimer(true, r.MemoryReclaimerThreshold))
+	}
+
 	if r.ParallelRequests {
 		opts = append(opts, config.EnableParallelBackendRequests)
 	}
diff --git a/core/config/application_config.go b/core/config/application_config.go
index c67e24f5c697..e70f721babd8 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -60,9 +60,14 @@ type ApplicationConfig struct {
 	WatchDogBusy bool
 	WatchDog     bool
 
+	// Memory Reclaimer settings (works with GPU if available, otherwise RAM)
+	MemoryReclaimerEnabled   bool    // Enable memory threshold monitoring
+	MemoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
+
 	ModelsURL []string
 
 	WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
+	WatchDogInterval                         time.Duration // Interval between watchdog checks
 
 	MachineTag string
 
@@ -187,6 +192,39 @@ func SetWatchDogIdleTimeout(t time.Duration) AppOption {
 	}
 }
 
+// EnableMemoryReclaimer enables memory threshold monitoring.
+// When enabled, the watchdog will evict backends if memory usage exceeds the threshold.
+// Works with GPU VRAM if available, otherwise uses system RAM.
+var EnableMemoryReclaimer = func(o *ApplicationConfig) {
+	o.MemoryReclaimerEnabled = true
+	o.WatchDog = true // Memory reclaimer requires watchdog infrastructure
+}
+
+// SetMemoryReclaimerThreshold sets the memory usage threshold (0.0-1.0).
+// When memory usage exceeds this threshold, backends will be evicted using LRU strategy.
+func SetMemoryReclaimerThreshold(threshold float64) AppOption {
+	return func(o *ApplicationConfig) {
+		if threshold > 0 && threshold <= 1.0 {
+			o.MemoryReclaimerThreshold = threshold
+			o.MemoryReclaimerEnabled = true
+			o.WatchDog = true // Memory reclaimer requires watchdog infrastructure
+		}
+	}
+}
+
+// WithMemoryReclaimer configures the memory reclaimer with the given settings
+func WithMemoryReclaimer(enabled bool, threshold float64) AppOption {
+	return func(o *ApplicationConfig) {
+		o.MemoryReclaimerEnabled = enabled
+		if threshold > 0 && threshold <= 1.0 {
+			o.MemoryReclaimerThreshold = threshold
+		}
+		if enabled {
+			o.WatchDog = true // Memory reclaimer requires watchdog infrastructure
+		}
+	}
+}
+
 // EnableSingleBackend is deprecated: use SetMaxActiveBackends(1) instead.
 // This is kept for backward compatibility.
 var EnableSingleBackend = func(o *ApplicationConfig) {
@@ -454,6 +492,208 @@ func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption {
 	}
 }
 
+// ToRuntimeSettings converts ApplicationConfig to RuntimeSettings for API responses and JSON serialization.
+// This provides a single source of truth - ApplicationConfig holds the live values,
+// and this method creates a RuntimeSettings snapshot for external consumption.
+func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
+	// Create local copies for pointer fields
+	watchdogEnabled := o.WatchDog
+	watchdogIdle := o.WatchDogIdle
+	watchdogBusy := o.WatchDogBusy
+	singleBackend := o.SingleBackend
+	maxActiveBackends := o.MaxActiveBackends
+	parallelBackendRequests := o.ParallelBackendRequests
+	memoryReclaimerEnabled := o.MemoryReclaimerEnabled
+	memoryReclaimerThreshold := o.MemoryReclaimerThreshold
+	threads := o.Threads
+	contextSize := o.ContextSize
+	f16 := o.F16
+	debug := o.Debug
+	cors := o.CORS
+	csrf := o.CSRF
+	corsAllowOrigins := o.CORSAllowOrigins
+	p2pToken := o.P2PToken
+	p2pNetworkID := o.P2PNetworkID
+	federated := o.Federated
+	galleries := o.Galleries
+	backendGalleries := o.BackendGalleries
+	autoloadGalleries := o.AutoloadGalleries
+	autoloadBackendGalleries := o.AutoloadBackendGalleries
+	apiKeys := o.ApiKeys
+	agentJobRetentionDays := o.AgentJobRetentionDays
+
+	// Format timeouts as strings
+	var idleTimeout, busyTimeout, watchdogInterval string
+	if o.WatchDogIdleTimeout > 0 {
+		idleTimeout = o.WatchDogIdleTimeout.String()
+	} else {
+		idleTimeout = "15m" // default
+	}
+	if o.WatchDogBusyTimeout > 0 {
+		busyTimeout = o.WatchDogBusyTimeout.String()
+	} else {
+		busyTimeout = "5m" // default
+	}
+	if o.WatchDogInterval > 0 {
+		watchdogInterval = o.WatchDogInterval.String()
+	} else {
+		watchdogInterval = "2s" // default
+	}
+
+	return RuntimeSettings{
+		WatchdogEnabled:          &watchdogEnabled,
+		WatchdogIdleEnabled:      &watchdogIdle,
+		WatchdogBusyEnabled:      &watchdogBusy,
+		WatchdogIdleTimeout:      &idleTimeout,
+		WatchdogBusyTimeout:      &busyTimeout,
+		WatchdogInterval:         &watchdogInterval,
+		SingleBackend:            &singleBackend,
+		MaxActiveBackends:        &maxActiveBackends,
+		ParallelBackendRequests:  &parallelBackendRequests,
+		MemoryReclaimerEnabled:   &memoryReclaimerEnabled,
+		MemoryReclaimerThreshold: &memoryReclaimerThreshold,
+		Threads:                  &threads,
+		ContextSize:              &contextSize,
+		F16:                      &f16,
+		Debug:                    &debug,
+		CORS:                     &cors,
+		CSRF:                     &csrf,
+		CORSAllowOrigins:         &corsAllowOrigins,
+		P2PToken:                 &p2pToken,
+		P2PNetworkID:             &p2pNetworkID,
+		Federated:                &federated,
+		Galleries:                &galleries,
+		BackendGalleries:         &backendGalleries,
+		AutoloadGalleries:        &autoloadGalleries,
+		AutoloadBackendGalleries: &autoloadBackendGalleries,
+		ApiKeys:                  &apiKeys,
+		AgentJobRetentionDays:    &agentJobRetentionDays,
+	}
+}
+
+// ApplyRuntimeSettings applies RuntimeSettings to ApplicationConfig.
+// Only non-nil fields in RuntimeSettings are applied.
+// Returns true if watchdog-related settings changed (requiring restart).
+func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (requireRestart bool) {
+	if settings == nil {
+		return false
+	}
+
+	if settings.WatchdogEnabled != nil {
+		o.WatchDog = *settings.WatchdogEnabled
+		requireRestart = true
+	}
+	if settings.WatchdogIdleEnabled != nil {
+		o.WatchDogIdle = *settings.WatchdogIdleEnabled
+		if o.WatchDogIdle {
+			o.WatchDog = true
+		}
+		requireRestart = true
+	}
+	if settings.WatchdogBusyEnabled != nil {
+		o.WatchDogBusy = *settings.WatchdogBusyEnabled
+		if o.WatchDogBusy {
+			o.WatchDog = true
+		}
+		requireRestart = true
+	}
+	if settings.WatchdogIdleTimeout != nil {
+		if dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err == nil {
+			o.WatchDogIdleTimeout = dur
+			requireRestart = true
+		}
+	}
+	if settings.WatchdogBusyTimeout != nil {
+		if dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err == nil {
+			o.WatchDogBusyTimeout = dur
+			requireRestart = true
+		}
+	}
+	if settings.WatchdogInterval != nil {
+		if dur, err := time.ParseDuration(*settings.WatchdogInterval); err == nil {
+			o.WatchDogInterval = dur
+			requireRestart = true
+		}
+	}
+	if settings.MaxActiveBackends != nil {
+		o.MaxActiveBackends = *settings.MaxActiveBackends
+		o.SingleBackend = (*settings.MaxActiveBackends == 1)
+		requireRestart = true
+	} else if settings.SingleBackend != nil {
+		o.SingleBackend = *settings.SingleBackend
+		if *settings.SingleBackend {
+			o.MaxActiveBackends = 1
+		} else {
+			o.MaxActiveBackends = 0
+		}
+		requireRestart = true
+	}
+	if settings.ParallelBackendRequests != nil {
+		o.ParallelBackendRequests = *settings.ParallelBackendRequests
+	}
+	if settings.MemoryReclaimerEnabled != nil {
+		o.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
+		if *settings.MemoryReclaimerEnabled {
+			o.WatchDog = true
+		}
+		requireRestart = true
+	}
+	if settings.MemoryReclaimerThreshold != nil {
+		if *settings.MemoryReclaimerThreshold > 0 && *settings.MemoryReclaimerThreshold <= 1.0 {
+			o.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
+			requireRestart = true
+		}
+	}
+	if settings.Threads != nil {
+		o.Threads = *settings.Threads
+	}
+	if settings.ContextSize != nil {
+		o.ContextSize = *settings.ContextSize
+	}
+	if settings.F16 != nil {
+		o.F16 = *settings.F16
+	}
+	if settings.Debug != nil {
+		o.Debug = *settings.Debug
+	}
+	if settings.CORS != nil {
+		o.CORS = *settings.CORS
+	}
+	if settings.CSRF != nil {
+		o.CSRF = *settings.CSRF
+	}
+	if settings.CORSAllowOrigins != nil {
+		o.CORSAllowOrigins = *settings.CORSAllowOrigins
+	}
+	if settings.P2PToken != nil {
+		o.P2PToken = *settings.P2PToken
+	}
+	if settings.P2PNetworkID != nil {
+		o.P2PNetworkID = *settings.P2PNetworkID
+	}
+	if settings.Federated != nil {
+		o.Federated = *settings.Federated
+	}
+	if settings.Galleries != nil {
+		o.Galleries = *settings.Galleries
+	}
+	if settings.BackendGalleries != nil {
+		o.BackendGalleries = *settings.BackendGalleries
+	}
+	if settings.AutoloadGalleries != nil {
+		o.AutoloadGalleries = *settings.AutoloadGalleries
+	}
+	if settings.AutoloadBackendGalleries != nil {
+		o.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
+	}
+	if settings.AgentJobRetentionDays != nil {
+		o.AgentJobRetentionDays = *settings.AgentJobRetentionDays
+	}
+	// Note: ApiKeys requires special handling (merging with startup keys) - handled in caller
+
+	return requireRestart
+}
+
 // func WithMetrics(meter *metrics.Metrics) AppOption {
 // 	return func(o *StartupOptions) {
 // 		o.Metrics = meter
diff --git a/core/config/application_config_test.go b/core/config/application_config_test.go
new file mode 100644
index 000000000000..c6d4fbecd6bc
--- /dev/null
+++ b/core/config/application_config_test.go
@@ -0,0 +1,577 @@
+package config
+
+import (
+	"time"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("ApplicationConfig RuntimeSettings Conversion", func() {
+	Describe("ToRuntimeSettings", func() {
+		It("should convert all fields correctly", func() {
+			appConfig := &ApplicationConfig{
+				WatchDog:                 true,
+				WatchDogIdle:             true,
+				WatchDogBusy:             true,
+				WatchDogIdleTimeout:      20 * time.Minute,
+				WatchDogBusyTimeout:      10 * time.Minute,
+				SingleBackend:            false,
+				MaxActiveBackends:        5,
+				ParallelBackendRequests:  true,
+				MemoryReclaimerEnabled:   true,
+				MemoryReclaimerThreshold: 0.85,
+				Threads:                  8,
+				ContextSize:              4096,
+				F16:                      true,
+				Debug:                    true,
+				CORS:                     true,
+				CSRF:                     true,
+				CORSAllowOrigins:         "https://example.com",
+				P2PToken:                 "test-token",
+				P2PNetworkID:             "test-network",
+				Federated:                true,
+				Galleries:                []Gallery{{Name: "test-gallery", URL: "https://example.com"}},
+				BackendGalleries:         []Gallery{{Name: "backend-gallery", URL: "https://example.com/backend"}},
+				AutoloadGalleries:        true,
+				AutoloadBackendGalleries: true,
+				ApiKeys:                  []string{"key1", "key2"},
+				AgentJobRetentionDays:    30,
+			}
+
+			rs := appConfig.ToRuntimeSettings()
+
+			Expect(rs.WatchdogEnabled).ToNot(BeNil())
+			Expect(*rs.WatchdogEnabled).To(BeTrue())
+
+			Expect(rs.WatchdogIdleEnabled).ToNot(BeNil())
+			Expect(*rs.WatchdogIdleEnabled).To(BeTrue())
+
+			Expect(rs.WatchdogBusyEnabled).ToNot(BeNil())
+			Expect(*rs.WatchdogBusyEnabled).To(BeTrue())
+
+			Expect(rs.WatchdogIdleTimeout).ToNot(BeNil())
+			Expect(*rs.WatchdogIdleTimeout).To(Equal("20m0s"))
+
+			Expect(rs.WatchdogBusyTimeout).ToNot(BeNil())
+			Expect(*rs.WatchdogBusyTimeout).To(Equal("10m0s"))
+
+			Expect(rs.SingleBackend).ToNot(BeNil())
+			Expect(*rs.SingleBackend).To(BeFalse())
+
+			Expect(rs.MaxActiveBackends).ToNot(BeNil())
+			Expect(*rs.MaxActiveBackends).To(Equal(5))
+
+			Expect(rs.ParallelBackendRequests).ToNot(BeNil())
+			Expect(*rs.ParallelBackendRequests).To(BeTrue())
+
+			Expect(rs.MemoryReclaimerEnabled).ToNot(BeNil())
+			Expect(*rs.MemoryReclaimerEnabled).To(BeTrue())
+
+			Expect(rs.MemoryReclaimerThreshold).ToNot(BeNil())
+			Expect(*rs.MemoryReclaimerThreshold).To(Equal(0.85))
+
+			Expect(rs.Threads).ToNot(BeNil())
+			Expect(*rs.Threads).To(Equal(8))
+
+			Expect(rs.ContextSize).ToNot(BeNil())
+			Expect(*rs.ContextSize).To(Equal(4096))
+
+			Expect(rs.F16).ToNot(BeNil())
+			Expect(*rs.F16).To(BeTrue())
+
+			Expect(rs.Debug).ToNot(BeNil())
+			Expect(*rs.Debug).To(BeTrue())
+
+			Expect(rs.CORS).ToNot(BeNil())
+			Expect(*rs.CORS).To(BeTrue())
+
+			Expect(rs.CSRF).ToNot(BeNil())
+			Expect(*rs.CSRF).To(BeTrue())
+
+			Expect(rs.CORSAllowOrigins).ToNot(BeNil())
+			Expect(*rs.CORSAllowOrigins).To(Equal("https://example.com"))
+
+			Expect(rs.P2PToken).ToNot(BeNil())
+			Expect(*rs.P2PToken).To(Equal("test-token"))
+
+			Expect(rs.P2PNetworkID).ToNot(BeNil())
+			Expect(*rs.P2PNetworkID).To(Equal("test-network"))
+
+			Expect(rs.Federated).ToNot(BeNil())
+			Expect(*rs.Federated).To(BeTrue())
+
+			Expect(rs.Galleries).ToNot(BeNil())
+			Expect(*rs.Galleries).To(HaveLen(1))
+			Expect((*rs.Galleries)[0].Name).To(Equal("test-gallery"))
+
+			Expect(rs.BackendGalleries).ToNot(BeNil())
+			Expect(*rs.BackendGalleries).To(HaveLen(1))
+			Expect((*rs.BackendGalleries)[0].Name).To(Equal("backend-gallery"))
+
+			Expect(rs.AutoloadGalleries).ToNot(BeNil())
+			Expect(*rs.AutoloadGalleries).To(BeTrue())
+
+			Expect(rs.AutoloadBackendGalleries).ToNot(BeNil())
+			Expect(*rs.AutoloadBackendGalleries).To(BeTrue())
+
+			Expect(rs.ApiKeys).ToNot(BeNil())
+			Expect(*rs.ApiKeys).To(HaveLen(2))
+			Expect(*rs.ApiKeys).To(ContainElements("key1", "key2"))
+
+			Expect(rs.AgentJobRetentionDays).ToNot(BeNil())
+			Expect(*rs.AgentJobRetentionDays).To(Equal(30))
+		})
+
+		It("should use default timeouts when not set", func() {
+			appConfig := &ApplicationConfig{}
+
+			rs := appConfig.ToRuntimeSettings()
+
+			Expect(rs.WatchdogIdleTimeout).ToNot(BeNil())
+			Expect(*rs.WatchdogIdleTimeout).To(Equal("15m"))
+
+			Expect(rs.WatchdogBusyTimeout).ToNot(BeNil())
+			Expect(*rs.WatchdogBusyTimeout).To(Equal("5m"))
+		})
+	})
+
+	Describe("ApplyRuntimeSettings", func() {
+		It("should return false when settings is nil", func() {
+			appConfig := &ApplicationConfig{}
+			changed := appConfig.ApplyRuntimeSettings(nil)
+			Expect(changed).To(BeFalse())
+		})
+
+		It("should only apply non-nil fields", func() {
+			appConfig := &ApplicationConfig{
+				WatchDog:    false,
+				Threads:     4,
+				ContextSize: 2048,
+			}
+
+			watchdogEnabled := true
+			rs := &RuntimeSettings{
+				WatchdogEnabled: &watchdogEnabled,
+				// Leave other fields nil
+			}
+
+			changed := appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(changed).To(BeTrue())
+			Expect(appConfig.WatchDog).To(BeTrue())
+			// Unchanged fields should remain
+			Expect(appConfig.Threads).To(Equal(4))
+			Expect(appConfig.ContextSize).To(Equal(2048))
+		})
+
+		It("should apply watchdog settings and return changed=true", func() {
+			appConfig := &ApplicationConfig{}
+
+			watchdogEnabled := true
+			watchdogIdle := true
+			watchdogBusy := true
+			idleTimeout := "30m"
+			busyTimeout := "15m"
+
+			rs := &RuntimeSettings{
+				WatchdogEnabled:     &watchdogEnabled,
+				WatchdogIdleEnabled: &watchdogIdle,
+				WatchdogBusyEnabled: &watchdogBusy,
+				WatchdogIdleTimeout: &idleTimeout,
+				WatchdogBusyTimeout: &busyTimeout,
+			}
+
+			changed := appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(changed).To(BeTrue())
+			Expect(appConfig.WatchDog).To(BeTrue())
+			Expect(appConfig.WatchDogIdle).To(BeTrue())
+			Expect(appConfig.WatchDogBusy).To(BeTrue())
+			Expect(appConfig.WatchDogIdleTimeout).To(Equal(30 * time.Minute))
+			Expect(appConfig.WatchDogBusyTimeout).To(Equal(15 * time.Minute))
+		})
+
+		It("should enable watchdog when idle is enabled", func() {
+			appConfig := &ApplicationConfig{WatchDog: false}
+
+			watchdogIdle := true
+			rs := &RuntimeSettings{
+				WatchdogIdleEnabled: &watchdogIdle,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(appConfig.WatchDog).To(BeTrue())
+			Expect(appConfig.WatchDogIdle).To(BeTrue())
+		})
+
+		It("should enable watchdog when busy is enabled", func() {
+			appConfig := &ApplicationConfig{WatchDog: false}
+
+			watchdogBusy := true
+			rs := &RuntimeSettings{
+				WatchdogBusyEnabled: &watchdogBusy,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(appConfig.WatchDog).To(BeTrue())
+			Expect(appConfig.WatchDogBusy).To(BeTrue())
+		})
+
+		It("should handle MaxActiveBackends and update SingleBackend accordingly", func() {
+			appConfig := &ApplicationConfig{}
+
+			maxBackends := 1
+			rs := &RuntimeSettings{
+				MaxActiveBackends: &maxBackends,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(appConfig.MaxActiveBackends).To(Equal(1))
+			Expect(appConfig.SingleBackend).To(BeTrue())
+
+			// Test with multiple backends
+			maxBackends = 5
+			rs = &RuntimeSettings{
+				MaxActiveBackends: &maxBackends,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(appConfig.MaxActiveBackends).To(Equal(5))
+			Expect(appConfig.SingleBackend).To(BeFalse())
+		})
+
+		It("should handle SingleBackend and update MaxActiveBackends accordingly", func() {
+			appConfig := &ApplicationConfig{}
+
+			singleBackend := true
+			rs := &RuntimeSettings{
+				SingleBackend: &singleBackend,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(appConfig.SingleBackend).To(BeTrue())
+			Expect(appConfig.MaxActiveBackends).To(Equal(1))
+
+			// Test disabling single backend
+			singleBackend = false
+			rs = &RuntimeSettings{
+				SingleBackend: &singleBackend,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(appConfig.SingleBackend).To(BeFalse())
+			Expect(appConfig.MaxActiveBackends).To(Equal(0))
+		})
+
+		It("should enable watchdog when memory reclaimer is enabled", func() {
+			appConfig := &ApplicationConfig{WatchDog: false}
+
+			memoryEnabled := true
+			threshold := 0.90
+			rs := &RuntimeSettings{
+				MemoryReclaimerEnabled:   &memoryEnabled,
+				MemoryReclaimerThreshold: &threshold,
+			}
+
+			changed := appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(changed).To(BeTrue())
+			Expect(appConfig.WatchDog).To(BeTrue())
+			Expect(appConfig.MemoryReclaimerEnabled).To(BeTrue())
+			Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.90))
+		})
+
+		It("should reject invalid memory threshold values", func() {
+			appConfig := &ApplicationConfig{MemoryReclaimerThreshold: 0.50}
+
+			// Test threshold > 1.0
+			invalidThreshold := 1.5
+			rs := &RuntimeSettings{
+				MemoryReclaimerThreshold: &invalidThreshold,
+			}
+			appConfig.ApplyRuntimeSettings(rs)
+			Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged
+
+			// Test threshold <= 0
+			invalidThreshold = 0.0
+			rs = &RuntimeSettings{
+				MemoryReclaimerThreshold: &invalidThreshold,
+			}
+			appConfig.ApplyRuntimeSettings(rs)
+			Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged
+
+			// Test negative threshold
+			invalidThreshold = -0.5
+			rs = &RuntimeSettings{
+				MemoryReclaimerThreshold: &invalidThreshold,
+			}
+			appConfig.ApplyRuntimeSettings(rs)
+			Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged
+		})
+
+		It("should accept valid memory threshold at boundary", func() {
+			appConfig := &ApplicationConfig{}
+
+			// Test threshold = 1.0 (maximum valid)
+			threshold := 1.0
+			rs := &RuntimeSettings{
+				MemoryReclaimerThreshold: &threshold,
+			}
+			appConfig.ApplyRuntimeSettings(rs)
+			Expect(appConfig.MemoryReclaimerThreshold).To(Equal(1.0))
+
+			// Test threshold just above 0
+			threshold = 0.01
+			rs = &RuntimeSettings{
+				MemoryReclaimerThreshold: &threshold,
+			}
+			appConfig.ApplyRuntimeSettings(rs)
+			Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.01))
+		})
+
+		It("should apply performance settings without triggering watchdog change", func() {
+			appConfig := &ApplicationConfig{}
+
+			threads := 16
+			contextSize := 8192
+			f16 := true
+			debug := true
+
+			rs := &RuntimeSettings{
+				Threads:     &threads,
+				ContextSize: &contextSize,
+				F16:         &f16,
+				Debug:       &debug,
+			}
+
+			changed := appConfig.ApplyRuntimeSettings(rs)
+
+			// These settings don't require watchdog restart
+			Expect(changed).To(BeFalse())
+			Expect(appConfig.Threads).To(Equal(16))
+			Expect(appConfig.ContextSize).To(Equal(8192))
+			Expect(appConfig.F16).To(BeTrue())
+			Expect(appConfig.Debug).To(BeTrue())
+		})
+
+		It("should apply CORS and security settings", func() {
+			appConfig := &ApplicationConfig{}
+
+			cors := true
+			csrf := true
+			origins := "https://example.com,https://other.com"
+
+			rs := &RuntimeSettings{
+				CORS:             &cors,
+				CSRF:             &csrf,
+				CORSAllowOrigins: &origins,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(appConfig.CORS).To(BeTrue())
+			Expect(appConfig.CSRF).To(BeTrue())
+			Expect(appConfig.CORSAllowOrigins).To(Equal("https://example.com,https://other.com"))
+		})
+
+		It("should apply P2P settings", func() {
+			appConfig := &ApplicationConfig{}
+
+			token := "p2p-test-token"
+			networkID := "p2p-test-network"
+			federated := true
+
+			rs := &RuntimeSettings{
+				P2PToken:     &token,
+				P2PNetworkID: &networkID,
+				Federated:    &federated,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(appConfig.P2PToken).To(Equal("p2p-test-token"))
+			Expect(appConfig.P2PNetworkID).To(Equal("p2p-test-network"))
+			Expect(appConfig.Federated).To(BeTrue())
+		})
+
+		It("should apply gallery settings", func() {
+			appConfig := &ApplicationConfig{}
+
+			galleries := []Gallery{
+				{Name: "gallery1", URL: "https://gallery1.com"},
+				{Name: "gallery2", URL: "https://gallery2.com"},
+			}
+			backendGalleries := []Gallery{
+				{Name: "backend-gallery", URL: "https://backend.com"},
+			}
+			autoload := true
+			autoloadBackend := true
+
+			rs := &RuntimeSettings{
+				Galleries:                &galleries,
+				BackendGalleries:         &backendGalleries,
+				AutoloadGalleries:        &autoload,
+				AutoloadBackendGalleries: &autoloadBackend,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(appConfig.Galleries).To(HaveLen(2))
+			Expect(appConfig.Galleries[0].Name).To(Equal("gallery1"))
+			Expect(appConfig.BackendGalleries).To(HaveLen(1))
+			Expect(appConfig.AutoloadGalleries).To(BeTrue())
+			Expect(appConfig.AutoloadBackendGalleries).To(BeTrue())
+		})
+
+		It("should apply agent settings", func() {
+			appConfig := &ApplicationConfig{}
+
+			retentionDays := 14
+
+			rs := &RuntimeSettings{
+				AgentJobRetentionDays: &retentionDays,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			Expect(appConfig.AgentJobRetentionDays).To(Equal(14))
+		})
+	})
+
+	Describe("Round-trip conversion", func() {
+		It("should maintain values through ToRuntimeSettings -> ApplyRuntimeSettings", func() {
+			original := &ApplicationConfig{
+				WatchDog:                 true,
+				WatchDogIdle:             true,
+				WatchDogBusy:             false,
+				WatchDogIdleTimeout:      25 * time.Minute,
+				WatchDogBusyTimeout:      12 * time.Minute,
+				SingleBackend:            false,
+				MaxActiveBackends:        3,
+				ParallelBackendRequests:  true,
+				MemoryReclaimerEnabled:   true,
+				MemoryReclaimerThreshold: 0.92,
+				Threads:                  12,
+				ContextSize:              6144,
+				F16:                      true,
+				Debug:                    false,
+				CORS:                     true,
+				CSRF:                     false,
+				CORSAllowOrigins:         "https://test.com",
+				P2PToken:                 "round-trip-token",
+				P2PNetworkID:             "round-trip-network",
+				Federated:                true,
+				AutoloadGalleries:        true,
+				AutoloadBackendGalleries: false,
+				AgentJobRetentionDays:    60,
+			}
+
+			// Convert to RuntimeSettings
+			rs := original.ToRuntimeSettings()
+
+			// Apply to a new ApplicationConfig
+			target := &ApplicationConfig{}
+			target.ApplyRuntimeSettings(&rs)
+
+			// Verify all values match
+			Expect(target.WatchDog).To(Equal(original.WatchDog))
+			Expect(target.WatchDogIdle).To(Equal(original.WatchDogIdle))
+			Expect(target.WatchDogBusy).To(Equal(original.WatchDogBusy))
+			Expect(target.WatchDogIdleTimeout).To(Equal(original.WatchDogIdleTimeout))
+			Expect(target.WatchDogBusyTimeout).To(Equal(original.WatchDogBusyTimeout))
+			Expect(target.MaxActiveBackends).To(Equal(original.MaxActiveBackends))
+			Expect(target.ParallelBackendRequests).To(Equal(original.ParallelBackendRequests))
+			Expect(target.MemoryReclaimerEnabled).To(Equal(original.MemoryReclaimerEnabled))
+			Expect(target.MemoryReclaimerThreshold).To(Equal(original.MemoryReclaimerThreshold))
+			Expect(target.Threads).To(Equal(original.Threads))
+			Expect(target.ContextSize).To(Equal(original.ContextSize))
+			Expect(target.F16).To(Equal(original.F16))
+			Expect(target.Debug).To(Equal(original.Debug))
+			Expect(target.CORS).To(Equal(original.CORS))
+			Expect(target.CSRF).To(Equal(original.CSRF))
+			Expect(target.CORSAllowOrigins).To(Equal(original.CORSAllowOrigins))
+			Expect(target.P2PToken).To(Equal(original.P2PToken))
+			Expect(target.P2PNetworkID).To(Equal(original.P2PNetworkID))
+			Expect(target.Federated).To(Equal(original.Federated))
+			Expect(target.AutoloadGalleries).To(Equal(original.AutoloadGalleries))
+			Expect(target.AutoloadBackendGalleries).To(Equal(original.AutoloadBackendGalleries))
+			Expect(target.AgentJobRetentionDays).To(Equal(original.AgentJobRetentionDays))
+		})
+
+		It("should handle empty galleries correctly in round-trip", func() {
+			original := &ApplicationConfig{
+				Galleries:        []Gallery{},
+				BackendGalleries: []Gallery{},
+				ApiKeys:          []string{},
+			}
+
+			rs := original.ToRuntimeSettings()
+			target := &ApplicationConfig{}
+			target.ApplyRuntimeSettings(&rs)
+
+			Expect(target.Galleries).To(BeEmpty())
+			Expect(target.BackendGalleries).To(BeEmpty())
+		})
+	})
+
+	Describe("Edge cases", func() {
+		It("should handle invalid timeout string in ApplyRuntimeSettings", func() {
+			appConfig := &ApplicationConfig{
+				WatchDogIdleTimeout: 10 * time.Minute,
+			}
+
+			invalidTimeout := "not-a-duration"
+			rs := &RuntimeSettings{
+				WatchdogIdleTimeout: &invalidTimeout,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			// Should remain unchanged due to parse error
+			Expect(appConfig.WatchDogIdleTimeout).To(Equal(10 * time.Minute))
+		})
+
+		It("should handle zero values in ApplicationConfig", func() {
+			appConfig := &ApplicationConfig{
+				// All zero values
+			}
+
+			rs := appConfig.ToRuntimeSettings()
+
+			// Should still have non-nil pointers with zero/default values
+			Expect(rs.WatchdogEnabled).ToNot(BeNil())
+			Expect(*rs.WatchdogEnabled).To(BeFalse())
+
+			Expect(rs.Threads).ToNot(BeNil())
+			Expect(*rs.Threads).To(Equal(0))
+
+			Expect(rs.MemoryReclaimerThreshold).ToNot(BeNil())
+			Expect(*rs.MemoryReclaimerThreshold).To(Equal(0.0))
+		})
+
+		It("should prefer MaxActiveBackends over SingleBackend when both are set", func() {
+			appConfig := &ApplicationConfig{}
+
+			maxBackends := 3
+			singleBackend := true
+
+			rs := &RuntimeSettings{
+				MaxActiveBackends: &maxBackends,
+				SingleBackend:     &singleBackend,
+			}
+
+			appConfig.ApplyRuntimeSettings(rs)
+
+			// MaxActiveBackends should take precedence
+			Expect(appConfig.MaxActiveBackends).To(Equal(3))
+			Expect(appConfig.SingleBackend).To(BeFalse()) // 3 != 1, so single backend is false
+		})
+	})
+})
diff --git a/core/config/runtime_settings.go b/core/config/runtime_settings.go
new file mode 100644
index 000000000000..c02d4fcd7c20
--- /dev/null
+++ b/core/config/runtime_settings.go
@@ -0,0 +1,56 @@
+package config
+
+// RuntimeSettings represents runtime configuration that can be changed dynamically.
+// This struct is used for:
+// - API responses (GET /api/settings)
+// - API requests (POST /api/settings)
+// - Persisting to runtime_settings.json
+// - Loading from runtime_settings.json on startup
+//
+// All fields are pointers to distinguish between "not set" and "set to zero/false value".
+type RuntimeSettings struct {
+	// Watchdog settings
+	WatchdogEnabled     *bool   `json:"watchdog_enabled,omitempty"`
+	WatchdogIdleEnabled *bool   `json:"watchdog_idle_enabled,omitempty"`
+	WatchdogBusyEnabled *bool   `json:"watchdog_busy_enabled,omitempty"`
+	WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
+	WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
+	WatchdogInterval    *string `json:"watchdog_interval,omitempty"` // Interval between watchdog checks (e.g., 2s, 30s)
+
+	// Backend management
+	SingleBackend           *bool `json:"single_backend,omitempty"`      // Deprecated: use MaxActiveBackends = 1 instead
+	MaxActiveBackends       *int  `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
+	ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
+
+	// Memory Reclaimer settings (works with GPU if available, otherwise RAM)
+	MemoryReclaimerEnabled   *bool    `json:"memory_reclaimer_enabled,omitempty"`   // Enable memory threshold monitoring
+	MemoryReclaimerThreshold *float64 `json:"memory_reclaimer_threshold,omitempty"` // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
+
+	// Performance settings
+	Threads     *int  `json:"threads,omitempty"`
+	ContextSize *int  `json:"context_size,omitempty"`
+	F16         *bool `json:"f16,omitempty"`
+	Debug       *bool `json:"debug,omitempty"`
+
+	// Security/CORS settings
+	CORS             *bool   `json:"cors,omitempty"`
+	CSRF             *bool   `json:"csrf,omitempty"`
+	CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"`
+
+	// P2P settings
+	P2PToken     *string `json:"p2p_token,omitempty"`
+	P2PNetworkID *string `json:"p2p_network_id,omitempty"`
+	Federated    *bool   `json:"federated,omitempty"`
+
+	// Gallery settings
+	Galleries                *[]Gallery `json:"galleries,omitempty"`
+	BackendGalleries         *[]Gallery `json:"backend_galleries,omitempty"`
+	AutoloadGalleries        *bool      `json:"autoload_galleries,omitempty"`
+	AutoloadBackendGalleries *bool      `json:"autoload_backend_galleries,omitempty"`
+
+	// API keys - No omitempty as we need to save empty arrays to clear keys
+	ApiKeys *[]string `json:"api_keys"`
+
+	// Agent settings
+	AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
+}
diff --git a/core/http/endpoints/localai/settings.go b/core/http/endpoints/localai/settings.go
index dee77646ed62..1cc7666e02c7 100644
--- a/core/http/endpoints/localai/settings.go
+++ b/core/http/endpoints/localai/settings.go
@@ -12,115 +12,15 @@ import (
 	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/core/schema"
 	"github.com/rs/zerolog/log"
 )
 
-type SettingsResponse struct {
-	Success bool   `json:"success"`
-	Error   string `json:"error,omitempty"`
-	Message string `json:"message,omitempty"`
-}
-
-type RuntimeSettings struct {
-	WatchdogEnabled          *bool             `json:"watchdog_enabled,omitempty"`
-	WatchdogIdleEnabled      *bool             `json:"watchdog_idle_enabled,omitempty"`
-	WatchdogBusyEnabled      *bool             `json:"watchdog_busy_enabled,omitempty"`
-	WatchdogIdleTimeout      *string           `json:"watchdog_idle_timeout,omitempty"`
-	WatchdogBusyTimeout      *string           `json:"watchdog_busy_timeout,omitempty"`
-	SingleBackend            *bool             `json:"single_backend,omitempty"`            // Deprecated: use MaxActiveBackends = 1 instead
-	MaxActiveBackends        *int              `json:"max_active_backends,omitempty"`       // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
-	ParallelBackendRequests  *bool             `json:"parallel_backend_requests,omitempty"`
-	Threads                  *int              `json:"threads,omitempty"`
-	ContextSize              *int              `json:"context_size,omitempty"`
-	F16                      *bool             `json:"f16,omitempty"`
-	Debug                    *bool             `json:"debug,omitempty"`
-	CORS                     *bool             `json:"cors,omitempty"`
-	CSRF                     *bool             `json:"csrf,omitempty"`
-	CORSAllowOrigins         *string           `json:"cors_allow_origins,omitempty"`
-	P2PToken                 *string           `json:"p2p_token,omitempty"`
-	P2PNetworkID             *string           `json:"p2p_network_id,omitempty"`
-	Federated                *bool             `json:"federated,omitempty"`
-	Galleries                *[]config.Gallery `json:"galleries,omitempty"`
-	BackendGalleries         *[]config.Gallery `json:"backend_galleries,omitempty"`
-	AutoloadGalleries        *bool             `json:"autoload_galleries,omitempty"`
-	AutoloadBackendGalleries *bool             `json:"autoload_backend_galleries,omitempty"`
-	ApiKeys                  *[]string         `json:"api_keys"` // No omitempty - we need to save empty arrays to clear keys
-	AgentJobRetentionDays    *int              `json:"agent_job_retention_days,omitempty"`
-}
-
 // GetSettingsEndpoint returns current settings with precedence (env > file > defaults)
 func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		appConfig := app.ApplicationConfig()
-		startupConfig := app.StartupConfig()
-
-		if startupConfig == nil {
-			// Fallback if startup config not available
-			startupConfig = appConfig
-		}
-
-		settings := RuntimeSettings{}
-
-		// Set all current values (using pointers for RuntimeSettings)
-		watchdogIdle := appConfig.WatchDogIdle
-		watchdogBusy := appConfig.WatchDogBusy
-		watchdogEnabled := appConfig.WatchDog
-		singleBackend := appConfig.SingleBackend
-		maxActiveBackends := appConfig.MaxActiveBackends
-		parallelBackendRequests := appConfig.ParallelBackendRequests
-		threads := appConfig.Threads
-		contextSize := appConfig.ContextSize
-		f16 := appConfig.F16
-		debug := appConfig.Debug
-		cors := appConfig.CORS
-		csrf := appConfig.CSRF
-		corsAllowOrigins := appConfig.CORSAllowOrigins
-		p2pToken := appConfig.P2PToken
-		p2pNetworkID := appConfig.P2PNetworkID
-		federated := appConfig.Federated
-		galleries := appConfig.Galleries
-		backendGalleries := appConfig.BackendGalleries
-		autoloadGalleries := appConfig.AutoloadGalleries
-		autoloadBackendGalleries := appConfig.AutoloadBackendGalleries
-		apiKeys := appConfig.ApiKeys
-		agentJobRetentionDays := appConfig.AgentJobRetentionDays
-
-		settings.WatchdogIdleEnabled = &watchdogIdle
-		settings.WatchdogBusyEnabled = &watchdogBusy
-		settings.WatchdogEnabled = &watchdogEnabled
-		settings.SingleBackend = &singleBackend
-		settings.MaxActiveBackends = &maxActiveBackends
-		settings.ParallelBackendRequests = &parallelBackendRequests
-		settings.Threads = &threads
-		settings.ContextSize = &contextSize
-		settings.F16 = &f16
-		settings.Debug = &debug
-		settings.CORS = &cors
-		settings.CSRF = &csrf
-		settings.CORSAllowOrigins = &corsAllowOrigins
-		settings.P2PToken = &p2pToken
-		settings.P2PNetworkID = &p2pNetworkID
-		settings.Federated = &federated
-		settings.Galleries = &galleries
-		settings.BackendGalleries = &backendGalleries
-		settings.AutoloadGalleries = &autoloadGalleries
-		settings.AutoloadBackendGalleries = &autoloadBackendGalleries
-		settings.ApiKeys = &apiKeys
-		settings.AgentJobRetentionDays = &agentJobRetentionDays
-
-		var idleTimeout, busyTimeout string
-		if appConfig.WatchDogIdleTimeout > 0 {
-			idleTimeout = appConfig.WatchDogIdleTimeout.String()
-		} else {
-			idleTimeout = "15m" // default
-		}
-		if appConfig.WatchDogBusyTimeout > 0 {
-			busyTimeout = appConfig.WatchDogBusyTimeout.String()
-		} else {
-			busyTimeout = "5m" // default
-		}
-		settings.WatchdogIdleTimeout = &idleTimeout
-		settings.WatchdogBusyTimeout = &busyTimeout
+		settings := appConfig.ToRuntimeSettings()
 		return c.JSON(http.StatusOK, settings)
 	}
 }
@@ -132,21 +32,20 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 		startupConfig := app.StartupConfig()
 
 		if startupConfig == nil {
-			// Fallback if startup config not available
 			startupConfig = appConfig
 		}
 
 		body, err := io.ReadAll(c.Request().Body)
 		if err != nil {
-			return c.JSON(http.StatusBadRequest, SettingsResponse{
+			return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
 				Success: false,
 				Error:   "Failed to read request body: " + err.Error(),
 			})
 		}
 
-		var settings RuntimeSettings
+		var settings config.RuntimeSettings
 		if err := json.Unmarshal(body, &settings); err != nil {
-			return c.JSON(http.StatusBadRequest, SettingsResponse{
+			return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
 				Success: false,
 				Error:   "Failed to parse JSON: " + err.Error(),
 			})
@@ -154,27 +53,33 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 
 		// Validate timeouts if provided
 		if settings.WatchdogIdleTimeout != nil {
-			_, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
-			if err != nil {
-				return c.JSON(http.StatusBadRequest, SettingsResponse{
+			if _, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err != nil {
+				return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
 					Success: false,
 					Error:   "Invalid watchdog_idle_timeout format: " + err.Error(),
 				})
 			}
 		}
 		if settings.WatchdogBusyTimeout != nil {
-			_, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
-			if err != nil {
-				return c.JSON(http.StatusBadRequest, SettingsResponse{
+			if _, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err != nil {
+				return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
 					Success: false,
 					Error:   "Invalid watchdog_busy_timeout format: " + err.Error(),
 				})
 			}
 		}
+		if settings.WatchdogInterval != nil {
+			if _, err := time.ParseDuration(*settings.WatchdogInterval); err != nil {
+				return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
+					Success: false,
+					Error:   "Invalid watchdog_interval format: " + err.Error(),
+				})
+			}
+		}
 
 		// Save to file
 		if appConfig.DynamicConfigsDir == "" {
-			return c.JSON(http.StatusBadRequest, SettingsResponse{
+			return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
 				Success: false,
 				Error:   "DynamicConfigsDir is not set",
 			})
@@ -183,133 +88,38 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 		settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
 		settingsJSON, err := json.MarshalIndent(settings, "", "  ")
 		if err != nil {
-			return c.JSON(http.StatusInternalServerError, SettingsResponse{
+			return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
 				Success: false,
 				Error:   "Failed to marshal settings: " + err.Error(),
 			})
 		}
 
 		if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
-			return c.JSON(http.StatusInternalServerError, SettingsResponse{
+			return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
 				Success: false,
 				Error:   "Failed to write settings file: " + err.Error(),
 			})
 		}
 
-		// Apply settings immediately, checking env var overrides per field
-		watchdogChanged := false
-		if settings.WatchdogEnabled != nil {
-			appConfig.WatchDog = *settings.WatchdogEnabled
-			watchdogChanged = true
-		}
-		if settings.WatchdogIdleEnabled != nil {
-			appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled
-			if appConfig.WatchDogIdle {
-				appConfig.WatchDog = true
-			}
-			watchdogChanged = true
-		}
-		if settings.WatchdogBusyEnabled != nil {
-			appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled
-			if appConfig.WatchDogBusy {
-				appConfig.WatchDog = true
-			}
-			watchdogChanged = true
-		}
-		if settings.WatchdogIdleTimeout != nil {
-			dur, _ := time.ParseDuration(*settings.WatchdogIdleTimeout)
-			appConfig.WatchDogIdleTimeout = dur
-			watchdogChanged = true
-		}
-		if settings.WatchdogBusyTimeout != nil {
-			dur, _ := time.ParseDuration(*settings.WatchdogBusyTimeout)
-			appConfig.WatchDogBusyTimeout = dur
-			watchdogChanged = true
-		}
-		if settings.MaxActiveBackends != nil {
-			appConfig.MaxActiveBackends = *settings.MaxActiveBackends
-			// For backward compatibility, update SingleBackend too
-			appConfig.SingleBackend = (*settings.MaxActiveBackends == 1)
-			watchdogChanged = true // LRU limit is managed by watchdog
-		} else if settings.SingleBackend != nil {
-			// Legacy support: SingleBackend maps to MaxActiveBackends = 1
-			appConfig.SingleBackend = *settings.SingleBackend
-			if *settings.SingleBackend {
-				appConfig.MaxActiveBackends = 1
-			} else {
-				appConfig.MaxActiveBackends = 0
-			}
-			watchdogChanged = true // LRU limit is managed by watchdog
-		}
-		if settings.ParallelBackendRequests != nil {
-			appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
-		}
-		if settings.Threads != nil {
-			appConfig.Threads = *settings.Threads
-		}
-		if settings.ContextSize != nil {
-			appConfig.ContextSize = *settings.ContextSize
-		}
-		if settings.F16 != nil {
-			appConfig.F16 = *settings.F16
-		}
-		if settings.Debug != nil {
-			appConfig.Debug = *settings.Debug
-		}
-		if settings.CORS != nil {
-			appConfig.CORS = *settings.CORS
-		}
-		if settings.CSRF != nil {
-			appConfig.CSRF = *settings.CSRF
-		}
-		if settings.CORSAllowOrigins != nil {
-			appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins
-		}
-		if settings.P2PToken != nil {
-			appConfig.P2PToken = *settings.P2PToken
-		}
-		if settings.P2PNetworkID != nil {
-			appConfig.P2PNetworkID = *settings.P2PNetworkID
-		}
-		if settings.Federated != nil {
-			appConfig.Federated = *settings.Federated
-		}
-		if settings.Galleries != nil {
-			appConfig.Galleries = *settings.Galleries
-		}
-		if settings.BackendGalleries != nil {
-			appConfig.BackendGalleries = *settings.BackendGalleries
-		}
-		if settings.AutoloadGalleries != nil {
-			appConfig.AutoloadGalleries = *settings.AutoloadGalleries
-		}
-		if settings.AutoloadBackendGalleries != nil {
-			appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
-		}
-		agentJobChanged := false
-		if settings.AgentJobRetentionDays != nil {
-			appConfig.AgentJobRetentionDays = *settings.AgentJobRetentionDays
-			agentJobChanged = true
-		}
+		// Apply settings using centralized method
+		watchdogChanged := appConfig.ApplyRuntimeSettings(&settings)
+
+		// Handle API keys specially (merge with startup keys)
 		if settings.ApiKeys != nil {
-			// API keys from env vars (startup) should be kept, runtime settings keys are added
-			// Combine startup keys (env vars) with runtime settings keys
 			envKeys := startupConfig.ApiKeys
 			runtimeKeys := *settings.ApiKeys
-			// Merge: env keys first (they take precedence), then runtime keys
 			appConfig.ApiKeys = append(envKeys, runtimeKeys...)
-
-			// Note: We only save to runtime_settings.json (not api_keys.json) to avoid duplication
-			// The runtime_settings.json is the unified config file. If api_keys.json exists,
-			// it will be loaded first, but runtime_settings.json takes precedence and deduplicates.
 		}
 
+		// Check if agent job retention changed
+		agentJobChanged := settings.AgentJobRetentionDays != nil
+
 		// Restart watchdog if settings changed
 		if watchdogChanged {
-			if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled || settings.WatchdogEnabled == nil {
+			if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled {
 				if err := app.StopWatchdog(); err != nil {
 					log.Error().Err(err).Msg("Failed to stop watchdog")
-					return c.JSON(http.StatusInternalServerError, SettingsResponse{
+					return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
 						Success: false,
 						Error:   "Settings saved but failed to stop watchdog: " + err.Error(),
 					})
@@ -317,7 +127,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 			} else {
 				if err := app.RestartWatchdog(); err != nil {
 					log.Error().Err(err).Msg("Failed to restart watchdog")
-					return c.JSON(http.StatusInternalServerError, SettingsResponse{
+					return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
 						Success: false,
 						Error:   "Settings saved but failed to restart watchdog: " + err.Error(),
 					})
@@ -329,7 +139,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 		if agentJobChanged {
 			if err := app.RestartAgentJobService(); err != nil {
 				log.Error().Err(err).Msg("Failed to restart agent job service")
-				return c.JSON(http.StatusInternalServerError, SettingsResponse{
+				return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
 					Success: false,
 					Error:   "Settings saved but failed to restart agent job service: " + err.Error(),
 				})
@@ -340,33 +150,30 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 		p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil
 		if p2pChanged {
 			if settings.P2PToken != nil && *settings.P2PToken == "" {
-				// stop P2P
 				if err := app.StopP2P(); err != nil {
 					log.Error().Err(err).Msg("Failed to stop P2P")
-					return c.JSON(http.StatusInternalServerError, SettingsResponse{
+					return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
 						Success: false,
 						Error:   "Settings saved but failed to stop P2P: " + err.Error(),
 					})
 				}
 			} else {
 				if settings.P2PToken != nil && *settings.P2PToken == "0" {
-					// generate a token if users sets 0 (disabled)
 					token := p2p.GenerateToken(60, 60)
 					settings.P2PToken = &token
 					appConfig.P2PToken = token
 				}
-				// Stop existing P2P
 				if err := app.RestartP2P(); err != nil {
-					log.Error().Err(err).Msg("Failed to stop P2P")
-					return c.JSON(http.StatusInternalServerError, SettingsResponse{
+					log.Error().Err(err).Msg("Failed to restart P2P")
+					return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
 						Success: false,
-						Error:   "Settings saved but failed to stop P2P: " + err.Error(),
+						Error:   "Settings saved but failed to restart P2P: " + err.Error(),
 					})
 				}
 			}
 		}
 
-		return c.JSON(http.StatusOK, SettingsResponse{
+		return c.JSON(http.StatusOK, schema.SettingsResponse{
 			Success: true,
 			Message: "Settings updated successfully",
 		})
diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go
index 9287b31742f9..ae6f868aa2d5 100644
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -19,6 +19,7 @@ import (
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/xsysinfo"
 	"github.com/rs/zerolog/log"
 )
 
@@ -917,6 +918,30 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		})
 	})
 
+	// Resources API endpoint - unified memory info (GPU if available, otherwise RAM)
+	app.GET("/api/resources", func(c echo.Context) error {
+		resourceInfo := xsysinfo.GetResourceInfo()
+
+		// Format watchdog interval
+		watchdogInterval := "2s" // default
+		if appConfig.WatchDogInterval > 0 {
+			watchdogInterval = appConfig.WatchDogInterval.String()
+		}
+
+		response := map[string]interface{}{
+			"type":                resourceInfo.Type, // "gpu" or "ram"
+			"available":           resourceInfo.Available,
+			"gpus":                resourceInfo.GPUs,
+			"ram":                 resourceInfo.RAM,
+			"aggregate":           resourceInfo.Aggregate,
+			"reclaimer_enabled":   appConfig.MemoryReclaimerEnabled,
+			"reclaimer_threshold": appConfig.MemoryReclaimerThreshold,
+			"watchdog_interval":   watchdogInterval,
+		}
+
+		return c.JSON(200, response)
+	})
+
 	if !appConfig.DisableRuntimeSettings {
 		// Settings API
 		app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance))
diff --git a/core/http/views/index.html b/core/http/views/index.html
index 598a7b0adc67..f5222c3639c0 100644
--- a/core/http/views/index.html
+++ b/core/http/views/index.html
@@ -462,6 +462,27 @@ <h1 class="hero-title">How can I help you today?</h1>
                 </a>
             </div>
 
+            <!-- Memory Status Indicator (GPU or RAM) -->
+            <div class="mb-4" x-data="resourceMonitor()" x-init="startPolling()">
+                <template x-if="resourceData && resourceData.available">
+                    <div class="flex items-center justify-center gap-3 text-xs text-[var(--color-text-secondary)]">
+                        <div class="flex items-center gap-2 px-3 py-1.5 rounded-full bg-[var(--color-bg-secondary)] border border-[var(--color-primary-border)]/20">
+                            <i :class="resourceData.type === 'gpu' ? 'fas fa-microchip' : 'fas fa-memory'"
+                               :class="resourceData.aggregate.usage_percent > 90 ? 'text-red-400' : resourceData.aggregate.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"></i>
+                            <span class="text-[var(--color-text-secondary)]" x-text="resourceData.type === 'gpu' ? 'GPU' : 'RAM'"></span>
+                            <span class="font-mono" 
+                                  :class="resourceData.aggregate.usage_percent > 90 ? 'text-red-400' : resourceData.aggregate.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
+                                  x-text="`${resourceData.aggregate.usage_percent.toFixed(0)}%`"></span>
+                            <div class="w-16 bg-[var(--color-bg-primary)] rounded-full h-1.5 overflow-hidden">
+                                <div class="h-full rounded-full transition-all duration-300"
+                                     :class="resourceData.aggregate.usage_percent > 90 ? 'bg-red-500' : resourceData.aggregate.usage_percent > 70 ? 'bg-yellow-500' : 'bg-[var(--color-success)]'"
+                                     :style="`width: ${resourceData.aggregate.usage_percent}%`"></div>
+                            </div>
+                        </div>
+                    </div>
+                </template>
+            </div>
+
             <!-- Model Status Summary - Subtle -->
             {{ $loadedModels := .LoadedModels }}
             <div class="mb-8 flex items-center justify-center gap-2 text-xs text-[var(--color-text-secondary)]" 
@@ -687,6 +708,38 @@ <h1 class="hero-title">How can I help you today?</h1>
 // Make functions available globally for Alpine.js
 window.stopModel = stopModel;
 window.stopAllModels = stopAllModels;
+
+// Resource Monitor component (GPU if available, otherwise RAM)
+function resourceMonitor() {
+    return {
+        resourceData: null,
+        pollInterval: null,
+        
+        async fetchResourceData() {
+            try {
+                const response = await fetch('/api/resources');
+                if (response.ok) {
+                    this.resourceData = await response.json();
+                }
+            } catch (error) {
+                console.error('Error fetching resource data:', error);
+            }
+        },
+        
+        startPolling() {
+            // Initial fetch
+            this.fetchResourceData();
+            // Poll every 5 seconds
+            this.pollInterval = setInterval(() => this.fetchResourceData(), 5000);
+        },
+        
+        stopPolling() {
+            if (this.pollInterval) {
+                clearInterval(this.pollInterval);
+            }
+        }
+    }
+}
 </script>
 
 </body>
diff --git a/core/http/views/manage.html b/core/http/views/manage.html
index 87e077c6bf9e..224f762fe16f 100644
--- a/core/http/views/manage.html
+++ b/core/http/views/manage.html
@@ -73,6 +73,106 @@ <h1 class="hero-title">
             </div>
         </div>
 
+        <!-- Memory Info Section (GPU or RAM) -->
+        <div class="mt-8" x-data="resourceMonitor()" x-init="startPolling()">
+            <template x-if="resourceData && resourceData.available">
+                <div class="bg-[var(--color-bg-secondary)] border border-[var(--color-primary-border)]/20 rounded-lg p-4 mb-6">
+                    <div class="flex items-center justify-between mb-3">
+                        <h2 class="h3 flex items-center">
+                            <i :class="resourceData.type === 'gpu' ? 'fas fa-microchip' : 'fas fa-memory'" class="mr-2 text-[var(--color-primary)] text-sm"></i>
+                            <span x-text="resourceData.type === 'gpu' ? 'GPU Status' : 'Memory Status'"></span>
+                        </h2>
+                        <div class="flex items-center gap-2 text-xs text-[var(--color-text-secondary)]">
+                            <template x-if="resourceData.type === 'gpu'">
+                                <span x-text="`${resourceData.aggregate.gpu_count} GPU${resourceData.aggregate.gpu_count > 1 ? 's' : ''}`"></span>
+                            </template>
+                            <template x-if="resourceData.type === 'ram'">
+                                <span>System RAM</span>
+                            </template>
+                            <template x-if="resourceData.reclaimer_enabled">
+                                <span class="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-[var(--color-primary)]/10 text-[var(--color-primary)]">
+                                    <i class="fas fa-shield-alt text-[8px] mr-1"></i>Reclaimer Active
+                                </span>
+                            </template>
+                        </div>
+                    </div>
+                    
+                    <!-- Per-GPU Stats (when GPU available) -->
+                    <template x-if="resourceData.type === 'gpu' && resourceData.gpus">
+                        <div class="space-y-3">
+                            <template x-for="gpu in resourceData.gpus" :key="gpu.index">
+                                <div class="bg-[var(--color-bg-primary)] rounded p-3">
+                                    <div class="flex items-center justify-between mb-2">
+                                        <div class="flex items-center gap-2">
+                                            <span class="text-xs font-medium text-[var(--color-text-primary)] truncate max-w-[200px]" x-text="gpu.name"></span>
+                                            <span class="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium"
+                                                  :class="gpu.vendor === 'nvidia' ? 'bg-green-500/10 text-green-300' : 
+                                                          gpu.vendor === 'amd' ? 'bg-red-500/10 text-red-300' : 
+                                                          gpu.vendor === 'intel' ? 'bg-blue-500/10 text-blue-300' : 
+                                                          'bg-[var(--color-accent-light)] text-[var(--color-accent)]'"
+                                                  x-text="gpu.vendor.toUpperCase()">
+                                            </span>
+                                        </div>
+                                        <span class="text-xs font-mono" 
+                                              :class="gpu.usage_percent > 90 ? 'text-red-400' : gpu.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
+                                              x-text="`${gpu.usage_percent.toFixed(1)}%`"></span>
+                                    </div>
+                                    <!-- Progress Bar -->
+                                    <div class="w-full bg-[var(--color-bg-secondary)] rounded-full h-2 overflow-hidden">
+                                        <div class="h-full rounded-full transition-all duration-300"
+                                             :class="gpu.usage_percent > 90 ? 'bg-red-500' : gpu.usage_percent > 70 ? 'bg-yellow-500' : 'bg-[var(--color-success)]'"
+                                             :style="`width: ${gpu.usage_percent}%`"></div>
+                                    </div>
+                                    <div class="flex justify-between mt-1 text-[10px] text-[var(--color-text-secondary)]">
+                                        <span x-text="`Used: ${formatBytes(gpu.used_vram)}`"></span>
+                                        <span x-text="`Total: ${formatBytes(gpu.total_vram)}`"></span>
+                                    </div>
+                                </div>
+                            </template>
+                        </div>
+                    </template>
+                    
+                    <!-- RAM Stats (when no GPU) -->
+                    <template x-if="resourceData.type === 'ram' && resourceData.ram">
+                        <div class="bg-[var(--color-bg-primary)] rounded p-3">
+                            <div class="flex items-center justify-between mb-2">
+                                <div class="flex items-center gap-2">
+                                    <span class="text-xs font-medium text-[var(--color-text-primary)]">System RAM</span>
+                                    <span class="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-[var(--color-accent-light)] text-[var(--color-accent)]">
+                                        RAM
+                                    </span>
+                                </div>
+                                <span class="text-xs font-mono" 
+                                      :class="resourceData.ram.usage_percent > 90 ? 'text-red-400' : resourceData.ram.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
+                                      x-text="`${resourceData.ram.usage_percent.toFixed(1)}%`"></span>
+                            </div>
+                            <!-- Progress Bar -->
+                            <div class="w-full bg-[var(--color-bg-secondary)] rounded-full h-2 overflow-hidden">
+                                <div class="h-full rounded-full transition-all duration-300"
+                                     :class="resourceData.ram.usage_percent > 90 ? 'bg-red-500' : resourceData.ram.usage_percent > 70 ? 'bg-yellow-500' : 'bg-[var(--color-success)]'"
+                                     :style="`width: ${resourceData.ram.usage_percent}%`"></div>
+                            </div>
+                            <div class="flex justify-between mt-1 text-[10px] text-[var(--color-text-secondary)]">
+                                <span x-text="`Used: ${formatBytes(resourceData.ram.used)}`"></span>
+                                <span x-text="`Total: ${formatBytes(resourceData.ram.total)}`"></span>
+                            </div>
+                        </div>
+                    </template>
+                    
+                    <!-- Aggregate Stats (if multiple GPUs) -->
+                    <template x-if="resourceData.type === 'gpu' && resourceData.aggregate.gpu_count > 1">
+                        <div class="mt-3 pt-3 border-t border-[var(--color-primary-border)]/20">
+                            <div class="flex items-center justify-between text-xs">
+                                <span class="text-[var(--color-text-secondary)]">Total VRAM:</span>
+                                <span class="font-mono text-[var(--color-text-primary)]" 
+                                      x-text="`${formatBytes(resourceData.aggregate.used_memory)} / ${formatBytes(resourceData.aggregate.total_memory)} (${resourceData.aggregate.usage_percent.toFixed(1)}%)`"></span>
+                            </div>
+                        </div>
+                    </template>
+                </div>
+            </template>
+        </div>
+
         <!-- Models Section -->
         <div class="models mt-8">
             {{template "views/partials/inprogress" .}}
@@ -426,6 +526,47 @@ <h2 class="h2 mb-2">No backends installed yet</h2>
 </div>
 
 <script>
+// Resource Monitor component (GPU if available, otherwise RAM)
+function resourceMonitor() {
+    return {
+        resourceData: null,
+        pollInterval: null,
+        
+        async fetchResourceData() {
+            try {
+                const response = await fetch('/api/resources');
+                if (response.ok) {
+                    this.resourceData = await response.json();
+                }
+            } catch (error) {
+                console.error('Error fetching resource data:', error);
+            }
+        },
+        
+        startPolling() {
+            // Initial fetch
+            this.fetchResourceData();
+            // Poll every 5 seconds
+            this.pollInterval = setInterval(() => this.fetchResourceData(), 5000);
+        },
+        
+        stopPolling() {
+            if (this.pollInterval) {
+                clearInterval(this.pollInterval);
+            }
+        }
+    }
+}
+
+// Helper function to format bytes
+function formatBytes(bytes) {
+    if (bytes === 0) return '0 B';
+    const k = 1024;
+    const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
+    const i = Math.floor(Math.log(bytes) / Math.log(k));
+    return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
+}
+
 // Alpine.js component for index dashboard
 function indexDashboard() {
     return {
diff --git a/core/http/views/settings.html b/core/http/views/settings.html
index 37292007e50b..c960862a07c5 100644
--- a/core/http/views/settings.html
+++ b/core/http/views/settings.html
@@ -124,6 +124,90 @@ <h2 class="text-xl font-semibold text-[var(--color-text-primary)] mb-4 flex item
                                class="w-full px-3 py-2 bg-[var(--color-bg-primary)] border border-[var(--color-primary-border)]/20 rounded text-sm text-[var(--color-text-primary)] focus:outline-none focus:ring-2 focus:ring-[var(--color-primary-border)]"
                                :class="!settings.watchdog_busy_enabled ? 'opacity-50 cursor-not-allowed' : ''">
                     </div>
+
+                    <!-- Watchdog Check Interval -->
+                    <div>
+                        <label class="block text-sm font-medium text-[var(--color-text-primary)] mb-2">Check Interval</label>
+                        <p class="text-xs text-[var(--color-text-secondary)] mb-2">How often the watchdog checks backends and memory usage (e.g., 2s, 30s)</p>
+                        <input type="text" x-model="settings.watchdog_interval" 
+                               :disabled="!settings.watchdog_enabled"
+                               placeholder="2s"
+                               class="w-full px-3 py-2 bg-[var(--color-bg-primary)] border border-[var(--color-primary-border)]/20 rounded text-sm text-[var(--color-text-primary)] focus:outline-none focus:ring-2 focus:ring-[var(--color-primary-border)]"
+                               :class="!settings.watchdog_enabled ? 'opacity-50 cursor-not-allowed' : ''">
+                    </div>
+
+                    <!-- Memory Reclaimer Subsection -->
+                    <div class="mt-6 pt-4 border-t border-[var(--color-primary-border)]/20">
+                        <h3 class="text-md font-medium text-[var(--color-text-primary)] mb-3 flex items-center">
+                            <i class="fas fa-memory mr-2 text-[var(--color-primary)] text-xs"></i>
+                            Memory Reclaimer
+                        </h3>
+                        <p class="text-xs text-[var(--color-text-secondary)] mb-4">
+                            Automatically evict backends when memory usage exceeds a threshold. Uses GPU VRAM if available, otherwise system RAM. Uses LRU strategy.
+                        </p>
+
+                        <!-- Memory Status Preview -->
+                        <div x-data="resourceStatus()" x-init="fetchResource()" class="p-3 bg-[var(--color-bg-primary)] rounded mb-4">
+                            <div class="flex items-center justify-between mb-2">
+                                <span class="text-xs text-[var(--color-text-secondary)]" x-text="resourceData && resourceData.type === 'gpu' ? 'Current GPU Status' : 'Current Memory Status'">Current Memory Status</span>
+                                <button @click="fetchResource()" class="text-[10px] text-[var(--color-primary)] hover:underline">
+                                    <i class="fas fa-sync-alt mr-1"></i>Refresh
+                                </button>
+                            </div>
+                            <template x-if="resourceData && resourceData.available && resourceData.type === 'gpu'">
+                                <div class="space-y-2">
+                                    <template x-for="gpu in resourceData.gpus" :key="gpu.index">
+                                        <div class="flex items-center justify-between text-xs">
+                                            <span class="text-[var(--color-text-primary)] truncate max-w-[200px]" x-text="gpu.name"></span>
+                                            <span class="font-mono" 
+                                                  :class="gpu.usage_percent > 90 ? 'text-red-400' : gpu.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
+                                                  x-text="`${gpu.usage_percent.toFixed(1)}%`"></span>
+                                        </div>
+                                    </template>
+                                </div>
+                            </template>
+                            <template x-if="resourceData && resourceData.available && resourceData.type === 'ram'">
+                                <div class="flex items-center justify-between text-xs">
+                                    <span class="text-[var(--color-text-primary)]">System RAM</span>
+                                    <span class="font-mono" 
+                                          :class="resourceData.ram.usage_percent > 90 ? 'text-red-400' : resourceData.ram.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
+                                          x-text="`${resourceData.ram.usage_percent.toFixed(1)}%`"></span>
+                                </div>
+                            </template>
+                            <template x-if="!resourceData || !resourceData.available">
+                                <p class="text-xs text-[var(--color-text-secondary)]">Memory monitoring unavailable</p>
+                            </template>
+                        </div>
+
+                        <!-- Enable Memory Reclaimer -->
+                        <div class="flex items-center justify-between mb-4">
+                            <div>
+                                <label class="text-sm font-medium text-[var(--color-text-primary)]">Enable Memory Reclaimer</label>
+                                <p class="text-xs text-[var(--color-text-secondary)] mt-1">Evict backends when memory usage exceeds threshold</p>
+                            </div>
+                            <label class="relative inline-flex items-center cursor-pointer">
+                                <input type="checkbox" x-model="settings.memory_reclaimer_enabled" 
+                                       :disabled="!settings.watchdog_enabled"
+                                       class="sr-only peer" :class="!settings.watchdog_enabled ? 'opacity-50' : ''">
+                                <div class="w-11 h-6 bg-[var(--color-bg-primary)] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[var(--color-primary-light)] rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[var(--color-primary)]"></div>
+                            </label>
+                        </div>
+
+                        <!-- Memory Reclaimer Threshold -->
+                        <div>
+                            <label class="block text-sm font-medium text-[var(--color-text-primary)] mb-2">Memory Threshold (%)</label>
+                            <p class="text-xs text-[var(--color-text-secondary)] mb-2">When memory usage exceeds this, backends will be evicted (50-100%)</p>
+                            <div class="flex items-center gap-3">
+                                <input type="range" x-model="settings.memory_reclaimer_threshold_percent" 
+                                       min="50" max="100" step="1"
+                                       :disabled="!settings.memory_reclaimer_enabled || !settings.watchdog_enabled"
+                                       class="flex-1 h-2 bg-[var(--color-bg-primary)] rounded-lg appearance-none cursor-pointer"
+                                       :class="(!settings.memory_reclaimer_enabled || !settings.watchdog_enabled) ? 'opacity-50' : ''">
+                                <span class="text-sm font-mono text-[var(--color-text-primary)] w-12 text-right" 
+                                      x-text="`${settings.memory_reclaimer_threshold_percent}%`"></span>
+                            </div>
+                        </div>
+                    </div>
                 </div>
             </div>
 
@@ -460,8 +544,12 @@ <h2 class="text-xl font-semibold text-[var(--color-text-primary)] mb-4 flex item
             watchdog_busy_enabled: false,
             watchdog_idle_timeout: '15m',
             watchdog_busy_timeout: '5m',
+            watchdog_interval: '2s',
             max_active_backends: 0,
             parallel_backend_requests: false,
+            memory_reclaimer_enabled: false,
+            memory_reclaimer_threshold: 0.95,
+            memory_reclaimer_threshold_percent: 95,
             threads: 0,
             context_size: 0,
             f16: false,
@@ -498,8 +586,12 @@ <h2 class="text-xl font-semibold text-[var(--color-text-primary)] mb-4 flex item
                         watchdog_busy_enabled: data.watchdog_busy_enabled,
                         watchdog_idle_timeout: data.watchdog_idle_timeout || '15m',
                         watchdog_busy_timeout: data.watchdog_busy_timeout || '5m',
+                        watchdog_interval: data.watchdog_interval || '2s',
                         max_active_backends: data.max_active_backends || 0,
                         parallel_backend_requests: data.parallel_backend_requests,
+                        memory_reclaimer_enabled: data.memory_reclaimer_enabled || false,
+                        memory_reclaimer_threshold: data.memory_reclaimer_threshold || 0.95,
+                        memory_reclaimer_threshold_percent: Math.round((data.memory_reclaimer_threshold || 0.95) * 100),
                         threads: data.threads || 0,
                         context_size: data.context_size || 0,
                         f16: data.f16 || false,
@@ -531,6 +623,7 @@ <h2 class="text-xl font-semibold text-[var(--color-text-primary)] mb-4 flex item
             if (!this.settings.watchdog_enabled) {
                 this.settings.watchdog_idle_enabled = false;
                 this.settings.watchdog_busy_enabled = false;
+                this.settings.memory_reclaimer_enabled = false;
             }
         },
         
@@ -564,12 +657,22 @@ <h2 class="text-xl font-semibold text-[var(--color-text-primary)] mb-4 flex item
                 if (this.settings.watchdog_busy_timeout) {
                     payload.watchdog_busy_timeout = this.settings.watchdog_busy_timeout;
                 }
+                if (this.settings.watchdog_interval) {
+                    payload.watchdog_interval = this.settings.watchdog_interval;
+                }
                 if (this.settings.max_active_backends !== undefined) {
                     payload.max_active_backends = parseInt(this.settings.max_active_backends) || 0;
                 }
                 if (this.settings.parallel_backend_requests !== undefined) {
                     payload.parallel_backend_requests = this.settings.parallel_backend_requests;
                 }
+                if (this.settings.memory_reclaimer_enabled !== undefined) {
+                    payload.memory_reclaimer_enabled = this.settings.memory_reclaimer_enabled;
+                }
+                if (this.settings.memory_reclaimer_threshold_percent !== undefined) {
+                    // Convert percent to decimal (0.0-1.0)
+                    payload.memory_reclaimer_threshold = parseInt(this.settings.memory_reclaimer_threshold_percent) / 100;
+                }
                 if (this.settings.threads !== undefined) {
                     payload.threads = parseInt(this.settings.threads) || 0;
                 }
@@ -678,6 +781,24 @@ <h2 class="text-xl font-semibold text-[var(--color-text-primary)] mb-4 flex item
         }
     }
 }
+
+// Resource Status component for settings page (GPU if available, otherwise RAM)
+function resourceStatus() {
+    return {
+        resourceData: null,
+        
+        async fetchResource() {
+            try {
+                const response = await fetch('/api/resources');
+                if (response.ok) {
+                    this.resourceData = await response.json();
+                }
+            } catch (error) {
+                console.error('Error fetching resource data:', error);
+            }
+        }
+    }
+}
 </script>
 
 </body>
diff --git a/core/schema/localai.go b/core/schema/localai.go
index 5eb56d91bf5d..29e1faf3f1cd 100644
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -163,3 +163,10 @@ type ImportModelRequest struct {
 	URI         string          `json:"uri"`
 	Preferences json.RawMessage `json:"preferences,omitempty"`
 }
+
+// SettingsResponse is the response type for settings API operations
+type SettingsResponse struct {
+	Success bool   `json:"success"`
+	Error   string `json:"error,omitempty"`
+	Message string `json:"message,omitempty"`
+}
diff --git a/pkg/model/watchdog.go b/pkg/model/watchdog.go
index 4feb49c35dd6..77ae4572b96c 100644
--- a/pkg/model/watchdog.go
+++ b/pkg/model/watchdog.go
@@ -5,6 +5,7 @@ import (
 	"sync"
 	"time"
 
+	"github.com/mudler/LocalAI/pkg/xsysinfo"
 	process "github.com/mudler/go-processmanager"
 	"github.com/rs/zerolog/log"
 )
@@ -17,6 +18,9 @@ import (
 // force a reload of the model.
 // The watchdog also supports LRU (Least Recently Used) eviction when a maximum
 // number of active backends is configured.
+// The watchdog also supports memory threshold monitoring - when memory usage
+// (GPU VRAM if available, otherwise system RAM) exceeds the threshold,
+// it will evict backends using the LRU strategy.
 // The watchdog runs as a separate go routine,
 // and the GRPC client talks to it via a channel to send status updates
 type WatchDog struct {
@@ -32,26 +36,48 @@ type WatchDog struct {
 
 	busyCheck, idleCheck bool
 	lruLimit             int // Maximum number of active backends (0 = unlimited)
+
+	// Memory reclaimer settings (works with GPU if available, otherwise RAM)
+	memoryReclaimerEnabled   bool    // Enable memory threshold monitoring
+	memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
+	watchdogInterval         time.Duration
 }
 
 type ProcessManager interface {
 	ShutdownModel(modelName string) error
 }
 
-func NewWatchDog(pm ProcessManager, timeoutBusy, timeoutIdle time.Duration, busy, idle bool, lruLimit int) *WatchDog {
+// NewWatchDog creates a new WatchDog with the provided options.
+// Example usage:
+//
+//	wd := NewWatchDog(
+//	    WithProcessManager(pm),
+//	    WithBusyTimeout(5*time.Minute),
+//	    WithIdleTimeout(15*time.Minute),
+//	    WithBusyCheck(true),
+//	    WithIdleCheck(true),
+//	    WithLRULimit(3),
+//	    WithMemoryReclaimer(true, 0.95),
+//	)
+func NewWatchDog(opts ...WatchDogOption) *WatchDog {
+	o := NewWatchDogOptions(opts...)
+
 	return &WatchDog{
-		timeout:         timeoutBusy,
-		idletimeout:     timeoutIdle,
-		pm:              pm,
-		busyTime:        make(map[string]time.Time),
-		idleTime:        make(map[string]time.Time),
-		lastUsed:        make(map[string]time.Time),
-		addressMap:      make(map[string]*process.Process),
-		busyCheck:       busy,
-		idleCheck:       idle,
-		lruLimit:        lruLimit,
-		addressModelMap: make(map[string]string),
-		stop:            make(chan bool, 1),
+		timeout:                  o.busyTimeout,
+		idletimeout:              o.idleTimeout,
+		pm:                       o.processManager,
+		busyTime:                 make(map[string]time.Time),
+		idleTime:                 make(map[string]time.Time),
+		lastUsed:                 make(map[string]time.Time),
+		addressMap:               make(map[string]*process.Process),
+		busyCheck:                o.busyCheck,
+		idleCheck:                o.idleCheck,
+		lruLimit:                 o.lruLimit,
+		addressModelMap:          make(map[string]string),
+		stop:                     make(chan bool, 1),
+		memoryReclaimerEnabled:   o.memoryReclaimerEnabled,
+		memoryReclaimerThreshold: o.memoryReclaimerThreshold,
+		watchdogInterval:         o.watchdogInterval,
 	}
 }
 
@@ -69,6 +95,21 @@ func (wd *WatchDog) GetLRULimit() int {
 	return wd.lruLimit
 }
 
+// SetMemoryReclaimer updates the memory reclaimer settings dynamically
+func (wd *WatchDog) SetMemoryReclaimer(enabled bool, threshold float64) {
+	wd.Lock()
+	defer wd.Unlock()
+	wd.memoryReclaimerEnabled = enabled
+	wd.memoryReclaimerThreshold = threshold
+}
+
+// GetMemoryReclaimerSettings returns the current memory reclaimer settings
+func (wd *WatchDog) GetMemoryReclaimerSettings() (enabled bool, threshold float64) {
+	wd.Lock()
+	defer wd.Unlock()
+	return wd.memoryReclaimerEnabled, wd.memoryReclaimerThreshold
+}
+
 func (wd *WatchDog) Shutdown() {
 	wd.Lock()
 	defer wd.Unlock()
@@ -202,17 +243,27 @@ func (wd *WatchDog) Run() {
 		case <-wd.stop:
 			log.Info().Msg("[WatchDog] Stopping watchdog")
 			return
-		case <-time.After(30 * time.Second):
-			if !wd.busyCheck && !wd.idleCheck {
+		case <-time.After(wd.watchdogInterval):
+			// Check if any monitoring is enabled
+			wd.Lock()
+			busyCheck := wd.busyCheck
+			idleCheck := wd.idleCheck
+			memoryCheck := wd.memoryReclaimerEnabled
+			wd.Unlock()
+
+			if !busyCheck && !idleCheck && !memoryCheck {
 				log.Info().Msg("[WatchDog] No checks enabled, stopping watchdog")
 				return
 			}
-			if wd.busyCheck {
+			if busyCheck {
 				wd.checkBusy()
 			}
-			if wd.idleCheck {
+			if idleCheck {
 				wd.checkIdle()
 			}
+			if memoryCheck {
+				wd.checkMemory()
+			}
 		}
 	}
 }
@@ -278,6 +329,105 @@ func (wd *WatchDog) checkBusy() {
 	}
 }
 
+// checkMemory monitors memory usage (GPU VRAM if available, otherwise RAM) and evicts backends when usage exceeds threshold
+func (wd *WatchDog) checkMemory() {
+	wd.Lock()
+	threshold := wd.memoryReclaimerThreshold
+	enabled := wd.memoryReclaimerEnabled
+	modelCount := len(wd.addressModelMap)
+	wd.Unlock()
+
+	if !enabled || threshold <= 0 || modelCount == 0 {
+		return
+	}
+
+	// Get current memory usage (GPU if available, otherwise RAM)
+	aggregate := xsysinfo.GetResourceAggregateInfo()
+	if aggregate.TotalMemory == 0 {
+		log.Debug().Msg("[WatchDog] No memory information available for memory reclaimer")
+		return
+	}
+
+	// Convert threshold from 0.0-1.0 to percentage
+	thresholdPercent := threshold * 100
+
+	memoryType := "GPU"
+	if aggregate.GPUCount == 0 {
+		memoryType = "RAM"
+	}
+
+	log.Debug().
+		Str("type", memoryType).
+		Float64("usage_percent", aggregate.UsagePercent).
+		Float64("threshold_percent", thresholdPercent).
+		Int("loaded_models", modelCount).
+		Msg("[WatchDog] Memory check")
+
+	// Check if usage exceeds threshold
+	if aggregate.UsagePercent > thresholdPercent {
+		log.Warn().
+			Str("type", memoryType).
+			Float64("usage_percent", aggregate.UsagePercent).
+			Float64("threshold_percent", thresholdPercent).
+			Msg("[WatchDog] Memory usage exceeds threshold, evicting LRU backend")
+
+		// Evict the least recently used model
+		wd.evictLRUModel()
+	}
+}
+
+// evictLRUModel evicts the least recently used model
+func (wd *WatchDog) evictLRUModel() {
+	wd.Lock()
+
+	if len(wd.addressModelMap) == 0 {
+		wd.Unlock()
+		return
+	}
+
+	// Build a list of models sorted by last used time (oldest first)
+	var models []modelUsageInfo
+	for address, model := range wd.addressModelMap {
+		lastUsed := wd.lastUsed[address]
+		if lastUsed.IsZero() {
+			lastUsed = time.Time{}
+		}
+		models = append(models, modelUsageInfo{
+			address:  address,
+			model:    model,
+			lastUsed: lastUsed,
+		})
+	}
+
+	if len(models) == 0 {
+		wd.Unlock()
+		return
+	}
+
+	// Sort by lastUsed time (oldest first)
+	sort.Slice(models, func(i, j int) bool {
+		return models[i].lastUsed.Before(models[j].lastUsed)
+	})
+
+	// Get the LRU model
+	lruModel := models[0]
+	log.Info().
+		Str("model", lruModel.model).
+		Time("lastUsed", lruModel.lastUsed).
+		Msg("[WatchDog] Memory reclaimer evicting LRU model")
+
+	// Untrack the model
+	wd.untrack(lruModel.address)
+	wd.Unlock()
+
+	// Shutdown the model
+	if err := wd.pm.ShutdownModel(lruModel.model); err != nil {
+		log.Error().Err(err).Str("model", lruModel.model).Msg("[WatchDog] error shutting down model during memory reclamation")
+	} else {
+		log.Info().Str("model", lruModel.model).Msg("[WatchDog] Memory reclaimer eviction complete")
+	}
+}
+
 func (wd *WatchDog) untrack(address string) {
 	delete(wd.busyTime, address)
 	delete(wd.idleTime, address)
diff --git a/pkg/model/watchdog_options.go b/pkg/model/watchdog_options.go
new file mode 100644
index 000000000000..68e1a78b1b9a
--- /dev/null
+++ b/pkg/model/watchdog_options.go
@@ -0,0 +1,124 @@
+package model
+
+import (
+	"time"
+)
+
+// WatchDogOptions contains all configuration for the WatchDog
+type WatchDogOptions struct {
+	processManager ProcessManager
+
+	// Timeout settings
+	busyTimeout      time.Duration
+	idleTimeout      time.Duration
+	watchdogInterval time.Duration
+
+	// Check toggles
+	busyCheck bool
+	idleCheck bool
+
+	// LRU settings
+	lruLimit int // Maximum number of active backends (0 = unlimited)
+
+	// Memory reclaimer settings (works with GPU if available, otherwise RAM)
+	memoryReclaimerEnabled   bool    // Enable memory threshold monitoring
+	memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
+}
+
+// WatchDogOption is a function that configures WatchDogOptions
+type WatchDogOption func(*WatchDogOptions)
+
+// WithProcessManager sets the process manager for the watchdog
+func WithProcessManager(pm ProcessManager) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.processManager = pm
+	}
+}
+
+// WithBusyTimeout sets the busy timeout duration
+func WithBusyTimeout(timeout time.Duration) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.busyTimeout = timeout
+	}
+}
+
+// WithIdleTimeout sets the idle timeout duration
+func WithIdleTimeout(timeout time.Duration) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.idleTimeout = timeout
+	}
+}
+
+// WithWatchdogCheck sets the watchdog check duration
+func WithWatchdogInterval(interval time.Duration) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.watchdogInterval = interval
+	}
+}
+
+// WithBusyCheck enables or disables busy checking
+func WithBusyCheck(enabled bool) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.busyCheck = enabled
+	}
+}
+
+// WithIdleCheck enables or disables idle checking
+func WithIdleCheck(enabled bool) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.idleCheck = enabled
+	}
+}
+
+// WithLRULimit sets the maximum number of active backends (0 = unlimited)
+func WithLRULimit(limit int) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.lruLimit = limit
+	}
+}
+
+// WithMemoryReclaimer enables memory threshold monitoring with the specified threshold
+// Works with GPU VRAM if available, otherwise uses system RAM
+func WithMemoryReclaimer(enabled bool, threshold float64) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.memoryReclaimerEnabled = enabled
+		o.memoryReclaimerThreshold = threshold
+	}
+}
+
+// WithMemoryReclaimerEnabled enables or disables memory threshold monitoring
+func WithMemoryReclaimerEnabled(enabled bool) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.memoryReclaimerEnabled = enabled
+	}
+}
+
+// WithMemoryReclaimerThreshold sets the memory threshold (0.0-1.0)
+func WithMemoryReclaimerThreshold(threshold float64) WatchDogOption {
+	return func(o *WatchDogOptions) {
+		o.memoryReclaimerThreshold = threshold
+	}
+}
+
+// DefaultWatchDogOptions returns default options for the watchdog
+func DefaultWatchDogOptions() *WatchDogOptions {
+	return &WatchDogOptions{
+		busyTimeout:              5 * time.Minute,
+		idleTimeout:              15 * time.Minute,
+		watchdogInterval:         2 * time.Second,
+		busyCheck:                false,
+		idleCheck:                false,
+		lruLimit:                 0,
+		memoryReclaimerEnabled:   false,
+		memoryReclaimerThreshold: 0.95,
+	}
+}
+
+// NewWatchDogOptions creates WatchDogOptions with the provided options applied
+func NewWatchDogOptions(opts ...WatchDogOption) *WatchDogOptions {
+	o := DefaultWatchDogOptions()
+	for _, opt := range opts {
+		opt(o)
+	}
+	return o
+}
diff --git a/pkg/model/watchdog_options_test.go b/pkg/model/watchdog_options_test.go
new file mode 100644
index 000000000000..2710408f6dc3
--- /dev/null
+++ b/pkg/model/watchdog_options_test.go
@@ -0,0 +1,187 @@
+package model_test
+
+import (
+	"time"
+
+	"github.com/mudler/LocalAI/pkg/model"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("WatchDogOptions", func() {
+	Context("DefaultWatchDogOptions", func() {
+		It("should return sensible defaults", func() {
+			opts := model.DefaultWatchDogOptions()
+
+			Expect(opts).ToNot(BeNil())
+		})
+	})
+
+	Context("NewWatchDogOptions", func() {
+		It("should apply options in order", func() {
+			pm := newMockProcessManager()
+			opts := model.NewWatchDogOptions(
+				model.WithProcessManager(pm),
+				model.WithBusyTimeout(10*time.Minute),
+				model.WithIdleTimeout(20*time.Minute),
+				model.WithBusyCheck(true),
+				model.WithIdleCheck(true),
+				model.WithLRULimit(5),
+				model.WithMemoryReclaimer(true, 0.85),
+			)
+
+			Expect(opts).ToNot(BeNil())
+		})
+
+		It("should allow overriding options", func() {
+			opts := model.NewWatchDogOptions(
+				model.WithLRULimit(3),
+				model.WithLRULimit(7), // override
+			)
+
+			// Create watchdog to verify
+			wd := model.NewWatchDog(
+				model.WithProcessManager(newMockProcessManager()),
+				model.WithLRULimit(3),
+				model.WithLRULimit(7), // override
+			)
+			Expect(wd.GetLRULimit()).To(Equal(7))
+
+			Expect(opts).ToNot(BeNil())
+		})
+	})
+
+	Context("Individual Options", func() {
+		var pm *mockProcessManager
+
+		BeforeEach(func() {
+			pm = newMockProcessManager()
+		})
+
+		It("WithProcessManager should set process manager", func() {
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+			)
+			Expect(wd).ToNot(BeNil())
+		})
+
+		It("WithBusyTimeout should set busy timeout", func() {
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithBusyTimeout(7*time.Minute),
+			)
+			Expect(wd).ToNot(BeNil())
+		})
+
+		It("WithIdleTimeout should set idle timeout", func() {
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithIdleTimeout(25*time.Minute),
+			)
+			Expect(wd).ToNot(BeNil())
+		})
+
+		It("WithBusyCheck should enable busy checking", func() {
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithBusyCheck(true),
+			)
+			Expect(wd).ToNot(BeNil())
+		})
+
+		It("WithIdleCheck should enable idle checking", func() {
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithIdleCheck(true),
+			)
+			Expect(wd).ToNot(BeNil())
+		})
+
+		It("WithLRULimit should set LRU limit", func() {
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithLRULimit(10),
+			)
+			Expect(wd.GetLRULimit()).To(Equal(10))
+		})
+
+		It("WithMemoryReclaimer should set both enabled and threshold", func() {
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithMemoryReclaimer(true, 0.88),
+			)
+			enabled, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(enabled).To(BeTrue())
+			Expect(threshold).To(Equal(0.88))
+		})
+
+		It("WithMemoryReclaimerEnabled should set enabled flag only", func() {
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithMemoryReclaimerEnabled(true),
+			)
+			enabled, _ := wd.GetMemoryReclaimerSettings()
+			Expect(enabled).To(BeTrue())
+		})
+
+		It("WithMemoryReclaimerThreshold should set threshold only", func() {
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithMemoryReclaimerThreshold(0.75),
+			)
+			_, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(threshold).To(Equal(0.75))
+		})
+	})
+
+	Context("Option Combinations", func() {
+		It("should work with all options combined", func() {
+			pm := newMockProcessManager()
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithBusyTimeout(3*time.Minute),
+				model.WithIdleTimeout(10*time.Minute),
+				model.WithBusyCheck(true),
+				model.WithIdleCheck(true),
+				model.WithLRULimit(2),
+				model.WithMemoryReclaimerEnabled(true),
+				model.WithMemoryReclaimerThreshold(0.92),
+			)
+
+			Expect(wd).ToNot(BeNil())
+			Expect(wd.GetLRULimit()).To(Equal(2))
+
+			enabled, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(enabled).To(BeTrue())
+			Expect(threshold).To(Equal(0.92))
+		})
+
+		It("should work with no options (all defaults)", func() {
+			wd := model.NewWatchDog()
+
+			Expect(wd).ToNot(BeNil())
+			Expect(wd.GetLRULimit()).To(Equal(0))
+
+			enabled, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(enabled).To(BeFalse())
+			Expect(threshold).To(Equal(0.95)) // default
+		})
+
+		It("should allow partial configuration", func() {
+			pm := newMockProcessManager()
+			wd := model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithLRULimit(3),
+			)
+
+			Expect(wd).ToNot(BeNil())
+			Expect(wd.GetLRULimit()).To(Equal(3))
+
+			// Memory reclaimer should use defaults
+			enabled, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(enabled).To(BeFalse())
+			Expect(threshold).To(Equal(0.95))
+		})
+	})
+})
+
diff --git a/pkg/model/watchdog_test.go b/pkg/model/watchdog_test.go
index 30d7ffc6667a..da25bdf4d8d8 100644
--- a/pkg/model/watchdog_test.go
+++ b/pkg/model/watchdog_test.go
@@ -53,25 +53,82 @@ var _ = Describe("WatchDog", func() {
 
 	Context("LRU Limit", func() {
 		It("should create watchdog with LRU limit", func() {
-			wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2)
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithBusyTimeout(5*time.Minute),
+				model.WithIdleTimeout(15*time.Minute),
+				model.WithLRULimit(2),
+			)
 			Expect(wd.GetLRULimit()).To(Equal(2))
 		})
 
 		It("should allow updating LRU limit dynamically", func() {
-			wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2)
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithLRULimit(2),
+			)
 			wd.SetLRULimit(5)
 			Expect(wd.GetLRULimit()).To(Equal(5))
 		})
 
 		It("should return 0 for disabled LRU", func() {
-			wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 0)
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithLRULimit(0),
+			)
 			Expect(wd.GetLRULimit()).To(Equal(0))
 		})
 	})
 
+	Context("Memory Reclaimer Options", func() {
+		It("should create watchdog with memory reclaimer settings", func() {
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithMemoryReclaimer(true, 0.85),
+			)
+			enabled, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(enabled).To(BeTrue())
+			Expect(threshold).To(Equal(0.85))
+		})
+
+		It("should allow setting memory reclaimer via separate options", func() {
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithMemoryReclaimerEnabled(true),
+				model.WithMemoryReclaimerThreshold(0.90),
+			)
+			enabled, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(enabled).To(BeTrue())
+			Expect(threshold).To(Equal(0.90))
+		})
+
+		It("should use default threshold when not specified", func() {
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+			)
+			_, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(threshold).To(Equal(0.95)) // default
+		})
+
+		It("should allow updating memory reclaimer settings dynamically", func() {
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+			)
+			wd.SetMemoryReclaimer(true, 0.80)
+			enabled, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(enabled).To(BeTrue())
+			Expect(threshold).To(Equal(0.80))
+		})
+	})
+
 	Context("Model Tracking", func() {
 		BeforeEach(func() {
-			wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 3)
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithBusyTimeout(5*time.Minute),
+				model.WithIdleTimeout(15*time.Minute),
+				model.WithLRULimit(3),
+			)
 		})
 
 		It("should track loaded models count", func() {
@@ -108,7 +165,12 @@ var _ = Describe("WatchDog", func() {
 
 	Context("EnforceLRULimit", func() {
 		BeforeEach(func() {
-			wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2)
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithBusyTimeout(5*time.Minute),
+				model.WithIdleTimeout(15*time.Minute),
+				model.WithLRULimit(2),
+			)
 		})
 
 		It("should not evict when under limit", func() {
@@ -218,7 +280,12 @@ var _ = Describe("WatchDog", func() {
 
 	Context("Single Backend Mode (LRU=1)", func() {
 		BeforeEach(func() {
-			wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 1)
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithBusyTimeout(5*time.Minute),
+				model.WithIdleTimeout(15*time.Minute),
+				model.WithLRULimit(1),
+			)
 		})
 
 		It("should evict existing model when loading new one", func() {
@@ -241,4 +308,36 @@ var _ = Describe("WatchDog", func() {
 			Expect(len(pm.getShutdownCalls())).To(Equal(5))
 		})
 	})
+
+	Context("Functional Options", func() {
+		It("should use default options when none provided", func() {
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+			)
+			Expect(wd.GetLRULimit()).To(Equal(0))
+
+			enabled, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(enabled).To(BeFalse())
+			Expect(threshold).To(Equal(0.95))
+		})
+
+		It("should allow combining multiple options", func() {
+			wd = model.NewWatchDog(
+				model.WithProcessManager(pm),
+				model.WithBusyTimeout(10*time.Minute),
+				model.WithIdleTimeout(30*time.Minute),
+				model.WithBusyCheck(true),
+				model.WithIdleCheck(true),
+				model.WithLRULimit(5),
+				model.WithMemoryReclaimerEnabled(true),
+				model.WithMemoryReclaimerThreshold(0.80),
+			)
+
+			Expect(wd.GetLRULimit()).To(Equal(5))
+
+			enabled, threshold := wd.GetMemoryReclaimerSettings()
+			Expect(enabled).To(BeTrue())
+			Expect(threshold).To(Equal(0.80))
+		})
+	})
 })
diff --git a/pkg/xsysinfo/gpu.go b/pkg/xsysinfo/gpu.go
index bfcf9a59d191..560377044ce5 100644
--- a/pkg/xsysinfo/gpu.go
+++ b/pkg/xsysinfo/gpu.go
@@ -1,13 +1,83 @@
 package xsysinfo
 
 import (
+	"bytes"
+	"encoding/json"
+	"os/exec"
+	"strconv"
 	"strings"
 	"sync"
 
 	"github.com/jaypipes/ghw"
 	"github.com/jaypipes/ghw/pkg/gpu"
+	"github.com/rs/zerolog/log"
 )
 
+// GPU vendor constants
+const (
+	VendorNVIDIA  = "nvidia"
+	VendorAMD     = "amd"
+	VendorIntel   = "intel"
+	VendorVulkan  = "vulkan"
+	VendorUnknown = "unknown"
+)
+
+// UnifiedMemoryDevices is a list of GPU device name patterns that use unified memory
+// (shared with system RAM). When these devices are detected and report N/A for VRAM,
+// we fall back to system RAM information.
+var UnifiedMemoryDevices = []string{
+	"NVIDIA GB10",
+	"GB10",
+	// Add more unified memory devices here as needed
+}
+
+// GPUMemoryInfo contains real-time GPU memory usage information
+type GPUMemoryInfo struct {
+	Index        int     `json:"index"`
+	Name         string  `json:"name"`
+	Vendor       string  `json:"vendor"`
+	TotalVRAM    uint64  `json:"total_vram"`    // Total VRAM in bytes
+	UsedVRAM     uint64  `json:"used_vram"`     // Used VRAM in bytes
+	FreeVRAM     uint64  `json:"free_vram"`     // Free VRAM in bytes
+	UsagePercent float64 `json:"usage_percent"` // Usage as percentage (0-100)
+}
+
+// GPUAggregateInfo contains aggregate GPU information across all GPUs
+type GPUAggregateInfo struct {
+	TotalVRAM    uint64  `json:"total_vram"`
+	UsedVRAM     uint64  `json:"used_vram"`
+	FreeVRAM     uint64  `json:"free_vram"`
+	UsagePercent float64 `json:"usage_percent"`
+	GPUCount     int     `json:"gpu_count"`
+}
+
+// SystemRAMInfo contains system RAM usage information
+type SystemRAMInfo struct {
+	Total        uint64  `json:"total"`
+	Used         uint64  `json:"used"`
+	Free         uint64  `json:"free"`
+	Available    uint64  `json:"available"`
+	UsagePercent float64 `json:"usage_percent"`
+}
+
+// AggregateMemoryInfo contains aggregate memory information (unified for GPU/RAM)
+type AggregateMemoryInfo struct {
+	TotalMemory  uint64  `json:"total_memory"`
+	UsedMemory   uint64  `json:"used_memory"`
+	FreeMemory   uint64  `json:"free_memory"`
+	UsagePercent float64 `json:"usage_percent"`
+	GPUCount     int     `json:"gpu_count"`
+}
+
+// ResourceInfo represents unified memory resource information
+type ResourceInfo struct {
+	Type      string              `json:"type"` // "gpu" or "ram"
+	Available bool                `json:"available"`
+	GPUs      []GPUMemoryInfo     `json:"gpus,omitempty"`
+	RAM       *SystemRAMInfo      `json:"ram,omitempty"`
+	Aggregate AggregateMemoryInfo `json:"aggregate"`
+}
+
 var (
 	gpuCache     []*gpu.GraphicsCard
 	gpuCacheOnce sync.Once
@@ -60,3 +130,632 @@ func HasGPU(vendor string) bool {
 	}
 	return false
 }
+
+// isUnifiedMemoryDevice checks if the given GPU name matches any known unified memory device
+func isUnifiedMemoryDevice(gpuName string) bool {
+	gpuNameUpper := strings.ToUpper(gpuName)
+	for _, pattern := range UnifiedMemoryDevices {
+		if strings.Contains(gpuNameUpper, strings.ToUpper(pattern)) {
+			return true
+		}
+	}
+	return false
+}
+
+// getSystemRAM returns system RAM information using ghw
+func getSystemRAM() (total, used, free uint64, err error) {
+	memory, err := ghw.Memory()
+	if err != nil {
+		return 0, 0, 0, err
+	}
+
+	total = uint64(memory.TotalUsableBytes)
+	// ghw doesn't provide used/free directly, but we can estimate
+	// For unified memory GPUs, we report total system RAM as available VRAM
+	// since the GPU can potentially use all of it
+	free = total
+	used = 0
+
+	return total, used, free, nil
+}
+
+// GetGPUMemoryUsage returns real-time GPU memory usage for all detected GPUs.
+// It tries multiple vendor-specific tools in order: NVIDIA, AMD, Intel, Vulkan.
+// Returns an empty slice if no GPU monitoring tools are available.
+func GetGPUMemoryUsage() []GPUMemoryInfo {
+	var gpus []GPUMemoryInfo
+
+	// Try NVIDIA first
+	nvidiaGPUs := getNVIDIAGPUMemory()
+	if len(nvidiaGPUs) > 0 {
+		gpus = append(gpus, nvidiaGPUs...)
+	}
+
+	// XXX: Note - I could not test this with AMD and Intel GPUs, so I'm not sure if it works and it was added with the help of AI.
+
+	// Try AMD ROCm
+	amdGPUs := getAMDGPUMemory()
+	if len(amdGPUs) > 0 {
+		// Adjust indices to continue from NVIDIA GPUs
+		startIdx := len(gpus)
+		for i := range amdGPUs {
+			amdGPUs[i].Index = startIdx + i
+		}
+		gpus = append(gpus, amdGPUs...)
+	}
+
+	// Try Intel
+	intelGPUs := getIntelGPUMemory()
+	if len(intelGPUs) > 0 {
+		startIdx := len(gpus)
+		for i := range intelGPUs {
+			intelGPUs[i].Index = startIdx + i
+		}
+		gpus = append(gpus, intelGPUs...)
+	}
+
+	// Try Vulkan as fallback for device detection (limited real-time data)
+	if len(gpus) == 0 {
+		vulkanGPUs := getVulkanGPUMemory()
+		gpus = append(gpus, vulkanGPUs...)
+	}
+
+	return gpus
+}
+
+// GetGPUAggregateInfo returns aggregate GPU information across all GPUs
+func GetGPUAggregateInfo() GPUAggregateInfo {
+	gpus := GetGPUMemoryUsage()
+
+	var aggregate GPUAggregateInfo
+	aggregate.GPUCount = len(gpus)
+
+	for _, gpu := range gpus {
+		aggregate.TotalVRAM += gpu.TotalVRAM
+		aggregate.UsedVRAM += gpu.UsedVRAM
+		aggregate.FreeVRAM += gpu.FreeVRAM
+	}
+
+	if aggregate.TotalVRAM > 0 {
+		aggregate.UsagePercent = float64(aggregate.UsedVRAM) / float64(aggregate.TotalVRAM) * 100
+	}
+
+	return aggregate
+}
+
+// getNVIDIAGPUMemory queries NVIDIA GPUs using nvidia-smi
+func getNVIDIAGPUMemory() []GPUMemoryInfo {
+	// Check if nvidia-smi is available
+	if _, err := exec.LookPath("nvidia-smi"); err != nil {
+		return nil
+	}
+
+	cmd := exec.Command("nvidia-smi",
+		"--query-gpu=index,name,memory.total,memory.used,memory.free",
+		"--format=csv,noheader,nounits")
+
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		log.Debug().Err(err).Str("stderr", stderr.String()).Msg("nvidia-smi failed")
+		return nil
+	}
+
+	var gpus []GPUMemoryInfo
+	lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
+
+	for _, line := range lines {
+		if line == "" {
+			continue
+		}
+
+		parts := strings.Split(line, ", ")
+		if len(parts) < 5 {
+			continue
+		}
+
+		idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
+		name := strings.TrimSpace(parts[1])
+		totalStr := strings.TrimSpace(parts[2])
+		usedStr := strings.TrimSpace(parts[3])
+		freeStr := strings.TrimSpace(parts[4])
+
+		var totalBytes, usedBytes, freeBytes uint64
+		var usagePercent float64
+
+		// Check if memory values are N/A (unified memory devices like GB10)
+		isNA := totalStr == "[N/A]" || usedStr == "[N/A]" || freeStr == "[N/A]"
+
+		if isNA && isUnifiedMemoryDevice(name) {
+			// Unified memory device - fall back to system RAM
+			sysTotal, sysUsed, sysFree, err := getSystemRAM()
+			if err != nil {
+				log.Debug().Err(err).Str("device", name).Msg("failed to get system RAM for unified memory device")
+				// Still add the GPU but with zero memory info
+				gpus = append(gpus, GPUMemoryInfo{
+					Index:        idx,
+					Name:         name,
+					Vendor:       VendorNVIDIA,
+					TotalVRAM:    0,
+					UsedVRAM:     0,
+					FreeVRAM:     0,
+					UsagePercent: 0,
+				})
+				continue
+			}
+
+			totalBytes = sysTotal
+			usedBytes = sysUsed
+			freeBytes = sysFree
+			if totalBytes > 0 {
+				usagePercent = float64(usedBytes) / float64(totalBytes) * 100
+			}
+
+			log.Debug().
+				Str("device", name).
+				Uint64("system_ram_bytes", totalBytes).
+				Msg("using system RAM for unified memory GPU")
+		} else if isNA {
+			// Unknown device with N/A values - skip memory info
+			log.Debug().Str("device", name).Msg("nvidia-smi returned N/A for unknown device")
+			gpus = append(gpus, GPUMemoryInfo{
+				Index:        idx,
+				Name:         name,
+				Vendor:       VendorNVIDIA,
+				TotalVRAM:    0,
+				UsedVRAM:     0,
+				FreeVRAM:     0,
+				UsagePercent: 0,
+			})
+			continue
+		} else {
+			// Normal GPU with dedicated VRAM
+			totalMB, _ := strconv.ParseFloat(totalStr, 64)
+			usedMB, _ := strconv.ParseFloat(usedStr, 64)
+			freeMB, _ := strconv.ParseFloat(freeStr, 64)
+
+			// Convert MB to bytes
+			totalBytes = uint64(totalMB * 1024 * 1024)
+			usedBytes = uint64(usedMB * 1024 * 1024)
+			freeBytes = uint64(freeMB * 1024 * 1024)
+
+			if totalBytes > 0 {
+				usagePercent = float64(usedBytes) / float64(totalBytes) * 100
+			}
+		}
+
+		gpus = append(gpus, GPUMemoryInfo{
+			Index:        idx,
+			Name:         name,
+			Vendor:       VendorNVIDIA,
+			TotalVRAM:    totalBytes,
+			UsedVRAM:     usedBytes,
+			FreeVRAM:     freeBytes,
+			UsagePercent: usagePercent,
+		})
+	}
+
+	return gpus
+}
+
+// getAMDGPUMemory queries AMD GPUs using rocm-smi
+func getAMDGPUMemory() []GPUMemoryInfo {
+	// Check if rocm-smi is available
+	if _, err := exec.LookPath("rocm-smi"); err != nil {
+		return nil
+	}
+
+	// Try CSV format first
+	cmd := exec.Command("rocm-smi", "--showmeminfo", "vram", "--csv")
+
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		log.Debug().Err(err).Str("stderr", stderr.String()).Msg("rocm-smi failed")
+		return nil
+	}
+
+	var gpus []GPUMemoryInfo
+	lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
+
+	// Skip header line
+	for i, line := range lines {
+		if i == 0 || line == "" {
+			continue
+		}
+
+		parts := strings.Split(line, ",")
+		if len(parts) < 3 {
+			continue
+		}
+
+		// Parse GPU index from first column (usually "GPU[0]" format)
+		idxStr := strings.TrimSpace(parts[0])
+		idx := 0
+		if strings.HasPrefix(idxStr, "GPU[") {
+			idxStr = strings.TrimPrefix(idxStr, "GPU[")
+			idxStr = strings.TrimSuffix(idxStr, "]")
+			idx, _ = strconv.Atoi(idxStr)
+		}
+
+		// Parse memory values (in bytes or MB depending on rocm-smi version)
+		usedBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[1]), 10, 64)
+		totalBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[2]), 10, 64)
+
+		// If values seem like MB, convert to bytes
+		if totalBytes < 1000000 {
+			usedBytes *= 1024 * 1024
+			totalBytes *= 1024 * 1024
+		}
+
+		freeBytes := uint64(0)
+		if totalBytes > usedBytes {
+			freeBytes = totalBytes - usedBytes
+		}
+
+		usagePercent := 0.0
+		if totalBytes > 0 {
+			usagePercent = float64(usedBytes) / float64(totalBytes) * 100
+		}
+
+		gpus = append(gpus, GPUMemoryInfo{
+			Index:        idx,
+			Name:         "AMD GPU",
+			Vendor:       VendorAMD,
+			TotalVRAM:    totalBytes,
+			UsedVRAM:     usedBytes,
+			FreeVRAM:     freeBytes,
+			UsagePercent: usagePercent,
+		})
+	}
+
+	return gpus
+}
+
+// getIntelGPUMemory queries Intel GPUs using xpu-smi or intel_gpu_top
+func getIntelGPUMemory() []GPUMemoryInfo {
+	// Try xpu-smi first (Intel's official GPU management tool)
+	gpus := getIntelXPUSMI()
+	if len(gpus) > 0 {
+		return gpus
+	}
+
+	// Fallback to intel_gpu_top
+	return getIntelGPUTop()
+}
+
+// getIntelXPUSMI queries Intel GPUs using xpu-smi
+func getIntelXPUSMI() []GPUMemoryInfo {
+	if _, err := exec.LookPath("xpu-smi"); err != nil {
+		return nil
+	}
+
+	// Get device list
+	cmd := exec.Command("xpu-smi", "discovery", "--json")
+
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		log.Debug().Err(err).Str("stderr", stderr.String()).Msg("xpu-smi discovery failed")
+		return nil
+	}
+
+	// Parse JSON output
+	var result struct {
+		DeviceList []struct {
+			DeviceID                int    `json:"device_id"`
+			DeviceName              string `json:"device_name"`
+			VendorName              string `json:"vendor_name"`
+			MemoryPhysicalSizeBytes uint64 `json:"memory_physical_size_byte"`
+		} `json:"device_list"`
+	}
+
+	if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
+		log.Debug().Err(err).Msg("failed to parse xpu-smi discovery output")
+		return nil
+	}
+
+	var gpus []GPUMemoryInfo
+
+	for _, device := range result.DeviceList {
+		// Get memory usage for this device
+		statsCmd := exec.Command("xpu-smi", "stats", "-d", strconv.Itoa(device.DeviceID), "--json")
+
+		var statsStdout bytes.Buffer
+		statsCmd.Stdout = &statsStdout
+
+		usedBytes := uint64(0)
+		if err := statsCmd.Run(); err == nil {
+			var stats struct {
+				DeviceID   int    `json:"device_id"`
+				MemoryUsed uint64 `json:"memory_used"`
+			}
+			if err := json.Unmarshal(statsStdout.Bytes(), &stats); err == nil {
+				usedBytes = stats.MemoryUsed
+			}
+		}
+
+		totalBytes := device.MemoryPhysicalSizeBytes
+		freeBytes := uint64(0)
+		if totalBytes > usedBytes {
+			freeBytes = totalBytes - usedBytes
+		}
+
+		usagePercent := 0.0
+		if totalBytes > 0 {
+			usagePercent = float64(usedBytes) / float64(totalBytes) * 100
+		}
+
+		gpus = append(gpus, GPUMemoryInfo{
+			Index:        device.DeviceID,
+			Name:         device.DeviceName,
+			Vendor:       VendorIntel,
+			TotalVRAM:    totalBytes,
+			UsedVRAM:     usedBytes,
+			FreeVRAM:     freeBytes,
+			UsagePercent: usagePercent,
+		})
+	}
+
+	return gpus
+}
+
+// getIntelGPUTop queries Intel GPUs using intel_gpu_top
+func getIntelGPUTop() []GPUMemoryInfo {
+	if _, err := exec.LookPath("intel_gpu_top"); err != nil {
+		return nil
+	}
+
+	// intel_gpu_top with -J outputs JSON, -s 1 for single sample
+	cmd := exec.Command("intel_gpu_top", "-J", "-s", "1")
+
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		log.Debug().Err(err).Str("stderr", stderr.String()).Msg("intel_gpu_top failed")
+		return nil
+	}
+
+	// Parse JSON output - intel_gpu_top outputs NDJSON
+	lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
+	if len(lines) == 0 {
+		return nil
+	}
+
+	// Take the last complete JSON object
+	var lastJSON string
+	for i := len(lines) - 1; i >= 0; i-- {
+		if strings.HasPrefix(strings.TrimSpace(lines[i]), "{") {
+			lastJSON = lines[i]
+			break
+		}
+	}
+
+	if lastJSON == "" {
+		return nil
+	}
+
+	var result struct {
+		Engines map[string]interface{} `json:"engines"`
+		// Memory info if available
+	}
+
+	if err := json.Unmarshal([]byte(lastJSON), &result); err != nil {
+		log.Debug().Err(err).Msg("failed to parse intel_gpu_top output")
+		return nil
+	}
+
+	// intel_gpu_top doesn't always provide memory info
+	// Return empty if we can't get useful data
+	return nil
+}
+
+// GetSystemRAMInfo returns real-time system RAM usage
+func GetSystemRAMInfo() (*SystemRAMInfo, error) {
+	memory, err := ghw.Memory()
+	if err != nil {
+		return nil, err
+	}
+
+	total := uint64(memory.TotalUsableBytes)
+
+	// Try to get more accurate memory info from /proc/meminfo on Linux
+	used, available, free := getDetailedMemoryInfo(total)
+
+	usagePercent := 0.0
+	if total > 0 {
+		usagePercent = float64(used) / float64(total) * 100
+	}
+
+	return &SystemRAMInfo{
+		Total:        total,
+		Used:         used,
+		Free:         free,
+		Available:    available,
+		UsagePercent: usagePercent,
+	}, nil
+}
+
+// getDetailedMemoryInfo tries to get detailed memory info from /proc/meminfo on Linux
+// Returns used, available, and free memory in bytes
+func getDetailedMemoryInfo(total uint64) (used, available, free uint64) {
+	// Try to read /proc/meminfo for more accurate data
+	cmd := exec.Command("cat", "/proc/meminfo")
+	var stdout bytes.Buffer
+	cmd.Stdout = &stdout
+
+	if err := cmd.Run(); err != nil {
+		// Fallback: assume all memory is available
+		return 0, total, total
+	}
+
+	lines := strings.Split(stdout.String(), "\n")
+	memInfo := make(map[string]uint64)
+
+	for _, line := range lines {
+		parts := strings.Fields(line)
+		if len(parts) < 2 {
+			continue
+		}
+		key := strings.TrimSuffix(parts[0], ":")
+		value, err := strconv.ParseUint(parts[1], 10, 64)
+		if err != nil {
+			continue
+		}
+		// Values in /proc/meminfo are in kB
+		memInfo[key] = value * 1024
+	}
+
+	// Get MemAvailable if present (preferred), otherwise calculate from free + buffers + cached
+	if avail, ok := memInfo["MemAvailable"]; ok {
+		available = avail
+	} else {
+		available = memInfo["MemFree"] + memInfo["Buffers"] + memInfo["Cached"]
+	}
+
+	free = memInfo["MemFree"]
+
+	// Calculate used memory
+	if total > available {
+		used = total - available
+	} else {
+		used = 0
+	}
+
+	return used, available, free
+}
+
+// GetResourceInfo returns GPU info if available, otherwise system RAM info
+func GetResourceInfo() ResourceInfo {
+	gpus := GetGPUMemoryUsage()
+
+	if len(gpus) > 0 {
+		// GPU available - return GPU info
+		aggregate := GetGPUAggregateInfo()
+		return ResourceInfo{
+			Type:      "gpu",
+			Available: true,
+			GPUs:      gpus,
+			RAM:       nil,
+			Aggregate: AggregateMemoryInfo{
+				TotalMemory:  aggregate.TotalVRAM,
+				UsedMemory:   aggregate.UsedVRAM,
+				FreeMemory:   aggregate.FreeVRAM,
+				UsagePercent: aggregate.UsagePercent,
+				GPUCount:     aggregate.GPUCount,
+			},
+		}
+	}
+
+	// No GPU - fall back to system RAM
+	ramInfo, err := GetSystemRAMInfo()
+	if err != nil {
+		log.Debug().Err(err).Msg("failed to get system RAM info")
+		return ResourceInfo{
+			Type:      "ram",
+			Available: false,
+			Aggregate: AggregateMemoryInfo{},
+		}
+	}
+
+	return ResourceInfo{
+		Type:      "ram",
+		Available: true,
+		GPUs:      nil,
+		RAM:       ramInfo,
+		Aggregate: AggregateMemoryInfo{
+			TotalMemory:  ramInfo.Total,
+			UsedMemory:   ramInfo.Used,
+			FreeMemory:   ramInfo.Free,
+			UsagePercent: ramInfo.UsagePercent,
+			GPUCount:     0,
+		},
+	}
+}
+
+// GetResourceAggregateInfo returns aggregate memory info (GPU if available, otherwise RAM)
+// This is used by the memory reclaimer to check memory usage
+func GetResourceAggregateInfo() AggregateMemoryInfo {
+	resourceInfo := GetResourceInfo()
+	return resourceInfo.Aggregate
+}
+
+// getVulkanGPUMemory queries GPUs using vulkaninfo as a fallback
+// Note: Vulkan provides memory heap info but not real-time usage
+func getVulkanGPUMemory() []GPUMemoryInfo {
+	if _, err := exec.LookPath("vulkaninfo"); err != nil {
+		return nil
+	}
+
+	cmd := exec.Command("vulkaninfo", "--json")
+
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		log.Debug().Err(err).Str("stderr", stderr.String()).Msg("vulkaninfo failed")
+		return nil
+	}
+
+	// Parse Vulkan JSON output
+	var result struct {
+		VkPhysicalDevices []struct {
+			DeviceName                       string `json:"deviceName"`
+			DeviceType                       string `json:"deviceType"`
+			VkPhysicalDeviceMemoryProperties struct {
+				MemoryHeaps []struct {
+					Flags int    `json:"flags"`
+					Size  uint64 `json:"size"`
+				} `json:"memoryHeaps"`
+			} `json:"VkPhysicalDeviceMemoryProperties"`
+		} `json:"VkPhysicalDevices"`
+	}
+
+	if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
+		log.Debug().Err(err).Msg("failed to parse vulkaninfo output")
+		return nil
+	}
+
+	var gpus []GPUMemoryInfo
+
+	for i, device := range result.VkPhysicalDevices {
+		// Skip non-discrete/integrated GPUs if possible
+		if device.DeviceType == "VK_PHYSICAL_DEVICE_TYPE_CPU" {
+			continue
+		}
+
+		// Sum up device-local memory heaps
+		var totalVRAM uint64
+		for _, heap := range device.VkPhysicalDeviceMemoryProperties.MemoryHeaps {
+			// Flag 1 = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
+			if heap.Flags&1 != 0 {
+				totalVRAM += heap.Size
+			}
+		}
+
+		if totalVRAM == 0 {
+			continue
+		}
+
+		gpus = append(gpus, GPUMemoryInfo{
+			Index:        i,
+			Name:         device.DeviceName,
+			Vendor:       VendorVulkan,
+			TotalVRAM:    totalVRAM,
+			UsedVRAM:     0, // Vulkan doesn't provide real-time usage
+			FreeVRAM:     totalVRAM,
+			UsagePercent: 0,
+		})
+	}
+
+	return gpus
+}