Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 27 additions & 14 deletions cmd/mnemonic/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,26 @@ func buildEncodingConfig(cfg *config.Config) encoding.EncodingConfig {
}
}

// newAPIProvider creates an API-based LLM provider from config.
func newAPIProvider(cfg *config.Config) llm.Provider {
timeout := time.Duration(cfg.LLM.TimeoutSec) * time.Second
if timeout == 0 {
timeout = 30 * time.Second
}
return llm.NewLMStudioProvider(
cfg.LLM.Endpoint,
cfg.LLM.ChatModel,
cfg.LLM.EmbeddingModel,
cfg.LLM.APIKey,
timeout,
cfg.LLM.MaxConcurrent,
)
}

// newLLMProvider creates the appropriate LLM provider based on config.
// For "api" (default), it creates an LMStudioProvider for OpenAI-compatible APIs.
// For "embedded", it creates an EmbeddedProvider for in-process llama.cpp inference.
// For "embedded", it creates a SwitchableProvider with embedded as primary
// and API as a fallback that can be toggled at runtime.
func newLLMProvider(cfg *config.Config) llm.Provider {
switch cfg.LLM.Provider {
case "embedded":
Expand All @@ -215,19 +232,15 @@ func newLLMProvider(cfg *config.Config) llm.Provider {
} else {
slog.Warn("embedded provider selected but llama.cpp not compiled in (build with: make build-embedded)")
}
return ep
default: // "api" or ""
timeout := time.Duration(cfg.LLM.TimeoutSec) * time.Second
if timeout == 0 {
timeout = 30 * time.Second

// Create API provider as runtime fallback (Gemini, etc.)
var apiProvider llm.Provider
if cfg.LLM.Endpoint != "" {
apiProvider = newAPIProvider(cfg)
}
return llm.NewLMStudioProvider(
cfg.LLM.Endpoint,
cfg.LLM.ChatModel,
cfg.LLM.EmbeddingModel,
cfg.LLM.APIKey,
timeout,
cfg.LLM.MaxConcurrent,
)

return llm.NewSwitchableProvider(ep, apiProvider, cfg.LLM.ChatModel)
default: // "api" or ""
return newAPIProvider(cfg)
}
}
6 changes: 6 additions & 0 deletions cmd/mnemonic/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,12 @@ func serveCommand(configPath string) {
StartTime: time.Now(),
Log: log,
}
// Wire model manager if using switchable/embedded provider
if sp, ok := llmProvider.(*llm.SwitchableProvider); ok {
apiDeps.ModelManager = sp
} else if ep, ok := llmProvider.(*llm.EmbeddedProvider); ok {
apiDeps.ModelManager = ep
}
// Only set Consolidator if it's non-nil (avoids Go nil-interface trap)
if consolidator != nil {
apiDeps.Consolidator = consolidator
Expand Down
119 changes: 119 additions & 0 deletions internal/api/routes/models.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
package routes

import (
"encoding/json"
"log/slog"
"net/http"

"github.com/appsprout-dev/mnemonic/internal/llm"
)

// HandleListModels returns available GGUF models in the models directory.
func HandleListModels(mgr llm.ModelManager, log *slog.Logger) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if mgr == nil {
writeJSON(w, http.StatusOK, map[string]interface{}{
"models": []interface{}{},
"enabled": false,
"message": "embedded provider not active",
})
return
}

models, err := mgr.ListAvailableModels()
if err != nil {
log.Error("failed to list models", "error", err)
writeError(w, http.StatusInternalServerError, "failed to list models: "+err.Error(), "MODEL_ERROR")
return
}

active := mgr.ActiveModel()

writeJSON(w, http.StatusOK, map[string]interface{}{
"models": models,
"active": active,
"enabled": true,
"mode": mgr.ProviderMode(),
})
}
}

// HandleActiveModel returns the currently loaded model status.
func HandleActiveModel(mgr llm.ModelManager, log *slog.Logger) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if mgr == nil {
writeJSON(w, http.StatusOK, map[string]interface{}{
"enabled": false,
"message": "embedded provider not active",
})
return
}

active := mgr.ActiveModel()
writeJSON(w, http.StatusOK, map[string]interface{}{
"active": active,
"enabled": true,
})
}
}

// swapModelRequest is the JSON body for POST /api/v1/models/active.
type swapModelRequest struct {
ChatModel string `json:"chat_model"`
EmbedModel string `json:"embed_model"`
Mode string `json:"mode"` // "embedded" or "api" — switches provider
}

// HandleSwapModel hot-swaps the active chat or embedding model.
func HandleSwapModel(mgr llm.ModelManager, log *slog.Logger) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if mgr == nil {
writeError(w, http.StatusBadRequest, "embedded provider not active — model swap unavailable", "MODEL_ERROR")
return
}

var req swapModelRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid JSON body: "+err.Error(), "INVALID_PARAM")
return
}

if req.ChatModel == "" && req.EmbedModel == "" && req.Mode == "" {
writeError(w, http.StatusBadRequest, "specify chat_model, embed_model, or mode", "INVALID_PARAM")
return
}

if req.Mode != "" {
log.Info("switching provider mode", "mode", req.Mode)
if err := mgr.SetProviderMode(req.Mode); err != nil {
log.Error("failed to switch provider mode", "error", err)
writeError(w, http.StatusBadRequest, "failed to switch mode: "+err.Error(), "MODEL_ERROR")
return
}
}

if req.ChatModel != "" {
log.Info("swapping chat model", "model", req.ChatModel)
if err := mgr.SwapChatModel(req.ChatModel); err != nil {
log.Error("failed to swap chat model", "error", err)
writeError(w, http.StatusInternalServerError, "failed to swap chat model: "+err.Error(), "MODEL_ERROR")
return
}
}

if req.EmbedModel != "" {
log.Info("swapping embed model", "model", req.EmbedModel)
if err := mgr.SwapEmbedModel(req.EmbedModel); err != nil {
log.Error("failed to swap embed model", "error", err)
writeError(w, http.StatusInternalServerError, "failed to swap embed model: "+err.Error(), "MODEL_ERROR")
return
}
}

active := mgr.ActiveModel()
writeJSON(w, http.StatusOK, map[string]interface{}{
"status": "ok",
"active": active,
})
}
}
6 changes: 6 additions & 0 deletions internal/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type ServerConfig struct {
type ServerDeps struct {
Store store.Store
LLM llm.Provider
ModelManager llm.ModelManager // can be nil if not using embedded provider
Bus events.Bus
Retriever *retrieval.RetrievalAgent
Consolidator routes.ConsolidationRunner // can be nil if disabled
Expand Down Expand Up @@ -132,6 +133,11 @@ func (s *Server) registerRoutes() {
s.mux.HandleFunc("GET /api/v1/abstractions", routes.HandleListAbstractions(s.deps.Store, s.deps.Log))
s.mux.HandleFunc("GET /api/v1/projects", routes.HandleListProjects(s.deps.Store, s.deps.Log))

// Model management (control center)
s.mux.HandleFunc("GET /api/v1/models", routes.HandleListModels(s.deps.ModelManager, s.deps.Log))
s.mux.HandleFunc("GET /api/v1/models/active", routes.HandleActiveModel(s.deps.ModelManager, s.deps.Log))
s.mux.HandleFunc("POST /api/v1/models/active", routes.HandleSwapModel(s.deps.ModelManager, s.deps.Log))

// LLM usage monitoring
s.mux.HandleFunc("GET /api/v1/llm/usage", routes.HandleLLMUsage(s.deps.Store, s.deps.Log))

Expand Down
Loading