From 38bd2214f5f04b20c554666642c89e20d858e053 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 18:12:17 +0100 Subject: [PATCH 01/17] move downloader out --- api/api.go | 5 +- api/config/config.go | 7 +-- embedded/embedded.go | 53 +++++++++++++++++++ embedded/model_library.yaml | 9 ++++ embedded/models/llava.yaml | 31 ++++++++++++ embedded/models/mistral-openorca.yaml | 24 +++++++++ pkg/downloader/progress.go | 26 ++++++++++ pkg/{utils => downloader}/uri.go | 73 ++++++++++++++------------- pkg/{utils => downloader}/uri_test.go | 4 +- pkg/gallery/models.go | 3 +- pkg/utils/hash.go | 10 ++++ 11 files changed, 202 insertions(+), 43 deletions(-) create mode 100644 embedded/embedded.go create mode 100644 embedded/model_library.yaml create mode 100644 embedded/models/llava.yaml create mode 100644 embedded/models/mistral-openorca.yaml create mode 100644 pkg/downloader/progress.go rename pkg/{utils => downloader}/uri.go (87%) rename pkg/{utils => downloader}/uri_test.go (93%) create mode 100644 pkg/utils/hash.go diff --git a/api/api.go b/api/api.go index 365346bdbb41..3a7366f7002d 100644 --- a/api/api.go +++ b/api/api.go @@ -16,6 +16,7 @@ import ( "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/metrics" "github.com/go-skynet/LocalAI/pkg/assets" + "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" @@ -41,13 +42,13 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader, modelPath := options.Loader.ModelPath if len(options.ModelsURL) > 0 { for _, url := range options.ModelsURL { - if utils.LooksLikeURL(url) { + if downloader.LooksLikeURL(url) { // md5 of model name md5Name := utils.MD5(url) // check if file exists if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := utils.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) { + err := downloader.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) { utils.DisplayDownloadFunction(fileName, current, total, percent) }) if err != nil { diff --git a/api/config/config.go b/api/config/config.go index ab62841b9f22..0b49b039b2f5 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -9,6 +9,7 @@ import ( "strings" "sync" + "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/utils" "github.com/rs/zerolog/log" "gopkg.in/yaml.v3" @@ -300,7 +301,7 @@ func (cm *ConfigLoader) Preload(modelPath string) error { // Create file path filePath := filepath.Join(modelPath, file.Filename) - if err := utils.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { return err } } @@ -308,13 +309,13 @@ func (cm *ConfigLoader) Preload(modelPath string) error { modelURL := config.PredictionOptions.Model modelURL = utils.ConvertURL(modelURL) - if utils.LooksLikeURL(modelURL) { + if downloader.LooksLikeURL(modelURL) { // md5 of model name md5Name := utils.MD5(modelURL) // check if file exists if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := utils.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) if err != nil { return err } diff --git a/embedded/embedded.go b/embedded/embedded.go new file mode 100644 index 000000000000..a76e87cd43f1 --- /dev/null +++ b/embedded/embedded.go @@ -0,0 +1,53 @@ +package embedded + +import ( + "embed" + "fmt" + "slices" + "strings" + + "github.com/go-skynet/LocalAI/pkg/assets" + "gopkg.in/yaml.v3" +) + +var modelShorteners map[string]string + +//go:embed model_library.yaml +var modelLibrary []byte + +//go:embed models/* +var embeddedModels embed.FS + +func ModelShortURL(s string) string { + if _, ok := modelShorteners[s]; ok { + s = modelShorteners[s] + } + + return s +} + +func init() { + yaml.Unmarshal(modelLibrary, &modelShorteners) +} + +// ExistsInModelsLibrary checks if a model exists in the embedded models library +func ExistsInModelsLibrary(s string) bool { + f := fmt.Sprintf("%s.yaml", s) + + a := []string{} + + for _, j := range assets.ListFiles(embeddedModels) { + a = append(a, strings.TrimPrefix(j, "models/")) + } + + return slices.Contains(a, f) +} + +// ResolveContent returns the content in the embedded model library +func ResolveContent(s string) ([]byte, error) { + if ExistsInModelsLibrary(s) { + return embeddedModels.ReadFile(fmt.Sprintf("models/%s.yaml", s)) + } + + return nil, fmt.Errorf("cannot find model %s", s) +} diff --git a/embedded/model_library.yaml b/embedded/model_library.yaml new file mode 100644 index 000000000000..68e966559d75 --- /dev/null +++ b/embedded/model_library.yaml @@ -0,0 +1,9 @@ +### +### +### This file contains the list of models that are available in the library +### The URLs are automatically expanded when local-ai is being called with the key as argument +### +### For models with an entire YAML file to be embededd, put the file inside the `models` +### directory, it will be automatically available with the file name as key (without the .yaml extension) + +phi-2: "github://mudler/LocalAI/examples/configurations/phi-2.yaml@master" \ No newline at end of file diff --git a/embedded/models/llava.yaml b/embedded/models/llava.yaml new file mode 100644 index 000000000000..662cac832a53 --- /dev/null +++ b/embedded/models/llava.yaml @@ -0,0 +1,31 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: bakllava-mmproj.gguf +parameters: + model: bakllava.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: bakllava.gguf + uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf +- filename: bakllava-mmproj.gguf + uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf \ No newline at end of file diff --git a/embedded/models/mistral-openorca.yaml b/embedded/models/mistral-openorca.yaml new file mode 100644 index 000000000000..abde9e8b49d3 --- /dev/null +++ b/embedded/models/mistral-openorca.yaml @@ -0,0 +1,24 @@ +name: mistral-openorca +mmap: true +parameters: + model: huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q6_K.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}} + <|im_end|> + + chat: | + {{.Input}} + <|im_start|>assistant + + completion: | + {{.Input}} +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +threads: 4 diff --git a/pkg/downloader/progress.go b/pkg/downloader/progress.go new file mode 100644 index 000000000000..6806f5863311 --- /dev/null +++ b/pkg/downloader/progress.go @@ -0,0 +1,26 @@ +package downloader + +import "hash" + +type progressWriter struct { + fileName string + total int64 + written int64 + downloadStatus func(string, string, string, float64) + hash hash.Hash +} + +func (pw *progressWriter) Write(p []byte) (n int, err error) { + n, err = pw.hash.Write(p) + pw.written += int64(n) + + if pw.total > 0 { + percentage := float64(pw.written) / float64(pw.total) * 100 + //log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage) + pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage) + } else { + pw.downloadStatus(pw.fileName, formatBytes(pw.written), "", 0) + } + + return +} diff --git a/pkg/utils/uri.go b/pkg/downloader/uri.go similarity index 87% rename from pkg/utils/uri.go rename to pkg/downloader/uri.go index 185e44b9610f..80214f5b860d 100644 --- a/pkg/utils/uri.go +++ b/pkg/downloader/uri.go @@ -1,10 +1,9 @@ -package utils +package downloader import ( - "crypto/md5" "crypto/sha256" + "encoding/base64" "fmt" - "hash" "io" "net/http" "os" @@ -12,9 +11,18 @@ import ( "strconv" "strings" + "github.com/go-skynet/LocalAI/pkg/utils" "github.com/rs/zerolog/log" ) +const ( + HuggingFacePrefix = "huggingface://" + HTTPPrefix = "http://" + HTTPSPrefix = "https://" + GithubURI = "github:" + GithubURI2 = "github://" +) + func GetURI(url string, f func(url string, i []byte) error) error { url = ConvertURL(url) @@ -52,14 +60,6 @@ func GetURI(url string, f func(url string, i []byte) error) error { return f(url, body) } -const ( - HuggingFacePrefix = "huggingface://" - HTTPPrefix = "http://" - HTTPSPrefix = "https://" - GithubURI = "github:" - GithubURI2 = "github://" -) - func LooksLikeURL(s string) bool { return strings.HasPrefix(s, HTTPPrefix) || strings.HasPrefix(s, HTTPSPrefix) || @@ -229,10 +229,10 @@ func DownloadFile(url string, filePath, sha string, downloadStatus func(string, } log.Info().Msgf("File %q downloaded and verified", filePath) - if IsArchive(filePath) { + if utils.IsArchive(filePath) { basePath := filepath.Dir(filePath) log.Info().Msgf("File %q is an archive, uncompressing to %s", filePath, basePath) - if err := ExtractArchive(filePath, basePath); err != nil { + if err := utils.ExtractArchive(filePath, basePath); err != nil { log.Debug().Msgf("Failed decompressing %q: %s", filePath, err.Error()) return err } @@ -241,32 +241,35 @@ func DownloadFile(url string, filePath, sha string, downloadStatus func(string, return nil } -type progressWriter struct { - fileName string - total int64 - written int64 - downloadStatus func(string, string, string, float64) - hash hash.Hash -} +// this function check if the string is an URL, if it's an URL downloads the image in memory +// encodes it in base64 and returns the base64 string +func GetBase64Image(s string) (string, error) { + if strings.HasPrefix(s, "http") { + // download the image + resp, err := http.Get(s) + if err != nil { + return "", err + } + defer resp.Body.Close() -func (pw *progressWriter) Write(p []byte) (n int, err error) { - n, err = pw.hash.Write(p) - pw.written += int64(n) + // read the image data into memory + data, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } - if pw.total > 0 { - percentage := float64(pw.written) / float64(pw.total) * 100 - //log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage) - pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage) - } else { - pw.downloadStatus(pw.fileName, formatBytes(pw.written), "", 0) - } + // encode the image data in base64 + encoded := base64.StdEncoding.EncodeToString(data) - return -} + // return the base64 string + return encoded, nil + } -// MD5 of a string -func MD5(s string) string { - return fmt.Sprintf("%x", md5.Sum([]byte(s))) + // if the string instead is prefixed with "data:image/jpeg;base64,", drop it + if strings.HasPrefix(s, "data:image/jpeg;base64,") { + return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil + } + return "", fmt.Errorf("not valid string") } func formatBytes(bytes int64) string { diff --git a/pkg/utils/uri_test.go b/pkg/downloader/uri_test.go similarity index 93% rename from pkg/utils/uri_test.go rename to pkg/downloader/uri_test.go index 79a9f4ae5c73..cd17b7ca45dc 100644 --- a/pkg/utils/uri_test.go +++ b/pkg/downloader/uri_test.go @@ -1,7 +1,7 @@ -package utils_test +package downloader_test import ( - . "github.com/go-skynet/LocalAI/pkg/utils" + . "github.com/go-skynet/LocalAI/pkg/downloader" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index 9a1697981614..ba9146e05334 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -9,6 +9,7 @@ import ( "path/filepath" "strconv" + "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/utils" "github.com/imdario/mergo" "github.com/rs/zerolog/log" @@ -114,7 +115,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides // Create file path filePath := filepath.Join(basePath, file.Filename) - if err := utils.DownloadFile(file.URI, filePath, file.SHA256, downloadStatus); err != nil { + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, downloadStatus); err != nil { return err } } diff --git a/pkg/utils/hash.go b/pkg/utils/hash.go new file mode 100644 index 000000000000..5e86fb187b35 --- /dev/null +++ b/pkg/utils/hash.go @@ -0,0 +1,10 @@ +package utils + +import ( + "crypto/md5" + "fmt" +) + +func MD5(s string) string { + return fmt.Sprintf("%x", md5.Sum([]byte(s))) +} From 9822cb44148a63ed795bc6c565a46fde910debca Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 18:17:49 +0100 Subject: [PATCH 02/17] separate startup functions for preloading configuration files --- api/api.go | 24 +---------- pkg/startup/model_preload.go | 54 +++++++++++++++++++++++++ pkg/startup/model_preload_test.go | 66 +++++++++++++++++++++++++++++++ pkg/startup/startup_suite_test.go | 13 ++++++ 4 files changed, 135 insertions(+), 22 deletions(-) create mode 100644 pkg/startup/model_preload.go create mode 100644 pkg/startup/model_preload_test.go create mode 100644 pkg/startup/startup_suite_test.go diff --git a/api/api.go b/api/api.go index 3a7366f7002d..fed962db616b 100644 --- a/api/api.go +++ b/api/api.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "os" - "path/filepath" "strings" config "github.com/go-skynet/LocalAI/api/config" @@ -16,9 +15,8 @@ import ( "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/metrics" "github.com/go-skynet/LocalAI/pkg/assets" - "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/go-skynet/LocalAI/pkg/startup" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" @@ -39,25 +37,7 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader, log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath) log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) - modelPath := options.Loader.ModelPath - if len(options.ModelsURL) > 0 { - for _, url := range options.ModelsURL { - if downloader.LooksLikeURL(url) { - // md5 of model name - md5Name := utils.MD5(url) - - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - }) - if err != nil { - log.Error().Msgf("error loading model: %s", err.Error()) - } - } - } - } - } + startup.PreloadModelsConfigurations(options.Loader.ModelPath, options.ModelsURL...) cl := config.NewConfigLoader() if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil { diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go new file mode 100644 index 000000000000..ceec9deede2c --- /dev/null +++ b/pkg/startup/model_preload.go @@ -0,0 +1,54 @@ +package startup + +import ( + "errors" + "os" + "path/filepath" + + "github.com/go-skynet/LocalAI/core/embedded" + "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" +) + +// PreloadModelsConfigurations will preload models from the given list of URLs +// It will download the model if it is not already present in the model path +// It will also try to resolve if the model is an embedded model YAML configuration +func PreloadModelsConfigurations(modelPath string, models ...string) { + for _, url := range models { + url = embedded.ModelShortURL(url) + + switch { + case embedded.ExistsInModelsLibrary(url): + modelYAML, err := embedded.ResolveContent(url) + // If we resolve something, just save it to disk and continue + if err != nil { + log.Error().Msgf("error loading model: %s", err.Error()) + continue + } + + log.Debug().Msgf("[startup] resolved embedded model: %s", url) + md5Name := utils.MD5(url) + if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil { + log.Error().Msgf("error loading model: %s", err.Error()) + } + case downloader.LooksLikeURL(url): + log.Debug().Msgf("[startup] resolved model to download: %s", url) + + // md5 of model name + md5Name := utils.MD5(url) + + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + }) + if err != nil { + log.Error().Msgf("error loading model: %s", err.Error()) + } + } + default: + log.Warn().Msgf("[startup] failed resolving model '%s'", url) + } + } +} diff --git a/pkg/startup/model_preload_test.go b/pkg/startup/model_preload_test.go new file mode 100644 index 000000000000..9d11083497ad --- /dev/null +++ b/pkg/startup/model_preload_test.go @@ -0,0 +1,66 @@ +package startup_test + +import ( + "fmt" + "os" + "path/filepath" + + . "github.com/go-skynet/LocalAI/core/startup" + "github.com/go-skynet/LocalAI/pkg/utils" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Preload test", func() { + + Context("Preloading from strings", func() { + It("loads from embedded full-urls", func() { + tmpdir, err := os.MkdirTemp("", "") + Expect(err).ToNot(HaveOccurred()) + url := "https://raw.githubusercontent.com/mudler/LocalAI/master/examples/configurations/phi-2.yaml" + fileName := fmt.Sprintf("%s.yaml", utils.MD5(url)) + + PreloadModelsConfigurations(tmpdir, url) + + resultFile := filepath.Join(tmpdir, fileName) + + content, err := os.ReadFile(resultFile) + Expect(err).ToNot(HaveOccurred()) + + Expect(string(content)).To(ContainSubstring("name: phi-2")) + }) + It("loads from embedded short-urls", func() { + tmpdir, err := os.MkdirTemp("", "") + Expect(err).ToNot(HaveOccurred()) + url := "phi-2" + + PreloadModelsConfigurations(tmpdir, url) + + entry, err := os.ReadDir(tmpdir) + Expect(err).ToNot(HaveOccurred()) + Expect(entry).To(HaveLen(1)) + resultFile := entry[0].Name() + + content, err := os.ReadFile(filepath.Join(tmpdir, resultFile)) + Expect(err).ToNot(HaveOccurred()) + + Expect(string(content)).To(ContainSubstring("name: phi-2")) + }) + It("loads from embedded models", func() { + tmpdir, err := os.MkdirTemp("", "") + Expect(err).ToNot(HaveOccurred()) + url := "mistral-openorca" + fileName := fmt.Sprintf("%s.yaml", utils.MD5(url)) + + PreloadModelsConfigurations(tmpdir, url) + + resultFile := filepath.Join(tmpdir, fileName) + + content, err := os.ReadFile(resultFile) + Expect(err).ToNot(HaveOccurred()) + + Expect(string(content)).To(ContainSubstring("name: mistral-openorca")) + }) + }) +}) diff --git a/pkg/startup/startup_suite_test.go b/pkg/startup/startup_suite_test.go new file mode 100644 index 000000000000..00aec8da8d3d --- /dev/null +++ b/pkg/startup/startup_suite_test.go @@ -0,0 +1,13 @@ +package startup_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestStartup(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "LocalAI startup test") +} From 17a46439ec034f1cdbb89ec207f337d40ca247a7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 1 Jan 2024 21:53:48 +0100 Subject: [PATCH 03/17] docs: add popular model examples Signed-off-by: Ettore Di Giacinto --- docs/content/getting_started/_index.en.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/content/getting_started/_index.en.md b/docs/content/getting_started/_index.en.md index 60ea3d47c377..27c077a50e93 100644 --- a/docs/content/getting_started/_index.en.md +++ b/docs/content/getting_started/_index.en.md @@ -115,6 +115,13 @@ helm install local-ai go-skynet/local-ai -f values.yaml {{< /tabs >}} +### Popular models + +| Model | CPU | CUDA11 | CUDA12 | +| --- | --- | --- | --- | +| phi-2 | `docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core --debug --models-path /models --context-size 2048 --threads 4 github://mudler/LocalAI/examples/configurations/phi-2.yaml@master` | `docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core --debug --models-path /models --context-size 2048 --threads 4 github://mudler/LocalAI/examples/configurations/phi-2.yaml@master` | `docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core --debug --models-path /models --context-size 2048 --threads 4 github://mudler/LocalAI/examples/configurations/phi-2.yaml@master` | + + ### Container images LocalAI has a set of images to support CUDA, ffmpeg and 'vanilla' (CPU-only). The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags): From 283c6762b84250a6f7704fdb3aa31b696a89641b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 1 Jan 2024 22:22:27 +0100 Subject: [PATCH 04/17] shorteners --- docs/content/advanced/_index.en.md | 8 +++++++- docs/content/build/_index.en.md | 8 ++++++++ docs/content/getting_started/_index.en.md | 24 ++++++++++++++++++++++- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/docs/content/advanced/_index.en.md b/docs/content/advanced/_index.en.md index 3b00d24e89cb..608254bc26e2 100644 --- a/docs/content/advanced/_index.en.md +++ b/docs/content/advanced/_index.en.md @@ -9,7 +9,7 @@ weight = 6 In order to define default prompts, model parameters (such as custom default `top_p` or `top_k`), LocalAI can be configured to serve user-defined models with a set of default parameters and templates. -You can create multiple `yaml` files in the models path or either specify a single YAML configuration file. +In order to configure a model, you can create multiple `yaml` files in the models path or either specify a single YAML configuration file. Consider the following `models` folder in the `example/chatbot-ui`: ``` @@ -96,6 +96,12 @@ Specifying a `config-file` via CLI allows to declare models in a single file as See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files. +It is possible to specify a full URL or a short-hand URL to a YAML model configuration file and use it on start with local-ai, for example to use phi-2: + +``` +local-ai github://mudler/LocalAI/examples/configurations/phi-2.yaml@master +``` + ### Full config model file reference ```yaml diff --git a/docs/content/build/_index.en.md b/docs/content/build/_index.en.md index 2697468f01d6..a10c96cf9ab4 100644 --- a/docs/content/build/_index.en.md +++ b/docs/content/build/_index.en.md @@ -235,6 +235,14 @@ make GRPC_BACKENDS=backend-assets/grpc/llama-cpp build By default, all the backends are built. +### Specific llama.cpp version + +To build with a specific version of llama.cpp, set `CPPLLAMA_VERSION` to the tag or wanted sha: + +``` +CPPLLAMA_VERSION= make build +``` + ### Windows compatibility Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2 diff --git a/docs/content/getting_started/_index.en.md b/docs/content/getting_started/_index.en.md index 27c077a50e93..75d0c86bef1b 100644 --- a/docs/content/getting_started/_index.en.md +++ b/docs/content/getting_started/_index.en.md @@ -119,8 +119,30 @@ helm install local-ai go-skynet/local-ai -f values.yaml | Model | CPU | CUDA11 | CUDA12 | | --- | --- | --- | --- | -| phi-2 | `docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core --debug --models-path /models --context-size 2048 --threads 4 github://mudler/LocalAI/examples/configurations/phi-2.yaml@master` | `docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core --debug --models-path /models --context-size 2048 --threads 4 github://mudler/LocalAI/examples/configurations/phi-2.yaml@master` | `docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core --debug --models-path /models --context-size 2048 --threads 4 github://mudler/LocalAI/examples/configurations/phi-2.yaml@master` | +| phi-2 | docker run -p 8080:8080 -ti
--rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core
phi-2
| docker run -p 8080:8080 -ti
--rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core
phi-2
| docker run -p 8080:8080 -ti
--rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core
phi-2
| +{{% notice note %}} + +LocalAI can be started (either the container image or the binary) with a list of model config files URLs or our short-handed format (e.g. `huggingface://`. `github://`). It works by passing the urls as arguments or environment variable, for example: + +``` +local-ai github://owner/repo/file.yaml@branch + +# Env +MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branch" local-ai + +# Args +local-ai --models github://owner/repo/file.yaml@branch --models github://owner/repo/file.yaml@branch +``` + +For example, to start localai with phi-2, it's possible for instance to also use a full config file from gists: + +```bash +./local-ai https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml +``` + +The file should be a valid YAML configuration file, for the full syntax see [advanced]({{%relref "advanced" %}}). +{{% /notice %}} ### Container images From a47ffbf491dfd00ff4d008118ae9157c7871a94b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 2 Jan 2024 19:13:04 +0100 Subject: [PATCH 05/17] Add llava --- docs/content/getting_started/_index.en.md | 25 ++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/docs/content/getting_started/_index.en.md b/docs/content/getting_started/_index.en.md index 75d0c86bef1b..6e3ccdb50048 100644 --- a/docs/content/getting_started/_index.en.md +++ b/docs/content/getting_started/_index.en.md @@ -14,6 +14,8 @@ See also our [How to]({{%relref "howtos" %}}) section for end-to-end guided exam The easiest way to run LocalAI is by using [`docker compose`](https://docs.docker.com/compose/install/) or with [Docker](https://docs.docker.com/engine/install/) (to build locally, see the [build section]({{%relref "build" %}})). +LocalAI needs at least a model file to work, or a configuration YAML file, or both. You can customize further model defaults and specific settings with a configuration file (see [advanced]({{%relref "advanced" %}})). + {{% notice note %}} To run with GPU Accelleration, see [GPU acceleration]({{%relref "features/gpu-acceleration" %}}). {{% /notice %}} @@ -117,9 +119,26 @@ helm install local-ai go-skynet/local-ai -f values.yaml ### Popular models -| Model | CPU | CUDA11 | CUDA12 | -| --- | --- | --- | --- | -| phi-2 | docker run -p 8080:8080 -ti
--rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core
phi-2
| docker run -p 8080:8080 -ti
--rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core
phi-2
| docker run -p 8080:8080 -ti
--rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core
phi-2
| +#### CPU-only + +| Model | Docker command | +| --- | --- | +| phi2 | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core phi-2``` | +| llava | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core llava``` | + +#### GPU (CUDA 11) + +| Model | Docker command | +| --- | --- | +| phi-2 | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core phi-2``` | +| llava | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core llava``` | + +#### GPU (CUDA 12) + +| Model | Docker command | +| --- | --- | +| phi-2 | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core phi-2``` | +| llava | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core llava``` | {{% notice note %}} From a9d0ab69fcecb5c1545811b11479b0c43c166e51 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 2 Jan 2024 19:22:59 +0100 Subject: [PATCH 06/17] Add mistral-openorca --- docs/content/getting_started/_index.en.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/content/getting_started/_index.en.md b/docs/content/getting_started/_index.en.md index 6e3ccdb50048..348d20913f8d 100644 --- a/docs/content/getting_started/_index.en.md +++ b/docs/content/getting_started/_index.en.md @@ -125,6 +125,7 @@ helm install local-ai go-skynet/local-ai -f values.yaml | --- | --- | | phi2 | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core phi-2``` | | llava | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core llava``` | +| mistral-openorca | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core mistral-openorca``` | #### GPU (CUDA 11) @@ -132,6 +133,7 @@ helm install local-ai go-skynet/local-ai -f values.yaml | --- | --- | | phi-2 | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core phi-2``` | | llava | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core llava``` | +| mistral-openorca | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core mistral-openorca``` | #### GPU (CUDA 12) @@ -139,6 +141,7 @@ helm install local-ai go-skynet/local-ai -f values.yaml | --- | --- | | phi-2 | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core phi-2``` | | llava | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core llava``` | +| mistral-openorca | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core mistral-openorca``` | {{% notice note %}} From 3e19d99ce8f5e3760177f94ee42f6af1c173cd1a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 2 Jan 2024 19:27:18 +0100 Subject: [PATCH 07/17] Better link to build section --- docs/content/getting_started/_index.en.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/content/getting_started/_index.en.md b/docs/content/getting_started/_index.en.md index 348d20913f8d..884e982403b8 100644 --- a/docs/content/getting_started/_index.en.md +++ b/docs/content/getting_started/_index.en.md @@ -115,9 +115,15 @@ helm install local-ai go-skynet/local-ai -f values.yaml {{% /tab %}} +{{% tab name="From source" %}} + +See the [build section]({{%relref "build" %}}). + +{{% /tab %}} + {{< /tabs >}} -### Popular models +### Running Popular models (one-click!) #### CPU-only @@ -408,10 +414,6 @@ affinity: {} -### Build from source - -See the [build section]({{%relref "build" %}}). - ### Other examples ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png) From 7dfae7c6bc57698b6631a1e8de9c0e01a4866bb4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 16:02:21 +0100 Subject: [PATCH 08/17] docs: update --- docs/content/getting_started/_index.en.md | 16 +++++++++++----- docs/content/model-compatibility/diffusers.md | 11 ----------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/docs/content/getting_started/_index.en.md b/docs/content/getting_started/_index.en.md index 884e982403b8..ebd10541bcea 100644 --- a/docs/content/getting_started/_index.en.md +++ b/docs/content/getting_started/_index.en.md @@ -125,6 +125,12 @@ See the [build section]({{%relref "build" %}}). ### Running Popular models (one-click!) +{{% notice note %}} + +Note: this feature currently is available only on master builds. + +{{% /notice %}} + #### CPU-only | Model | Docker command | @@ -137,17 +143,17 @@ See the [build section]({{%relref "build" %}}). | Model | Docker command | | --- | --- | -| phi-2 | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core phi-2``` | +| phi-2 | ```docker run -p 8080:8080 --gpus all -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core phi-2``` | | llava | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core llava``` | -| mistral-openorca | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core mistral-openorca``` | +| mistral-openorca | ```docker run -p 8080:8080 --gpus all -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core mistral-openorca``` | #### GPU (CUDA 12) | Model | Docker command | | --- | --- | -| phi-2 | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core phi-2``` | -| llava | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core llava``` | -| mistral-openorca | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core mistral-openorca``` | +| phi-2 | ```docker run -p 8080:8080 -ti --gpus all --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core phi-2``` | +| llava | ```docker run -p 8080:8080 -ti --gpus all --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core llava``` | +| mistral-openorca | ```docker run -p 8080:8080 --gpus all -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core mistral-openorca``` | {{% notice note %}} diff --git a/docs/content/model-compatibility/diffusers.md b/docs/content/model-compatibility/diffusers.md index fb07688794be..c7a84dc4f9c1 100644 --- a/docs/content/model-compatibility/diffusers.md +++ b/docs/content/model-compatibility/diffusers.md @@ -167,11 +167,6 @@ curl -H "Content-Type: application/json" -d @- http://localhost:8080/v1/images/ ## img2vid -{{% notice note %}} - -Experimental and available only on master builds. See: https://github.com/mudler/LocalAI/pull/1442 - -{{% /notice %}} ```yaml name: img2vid @@ -193,12 +188,6 @@ curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/ ## txt2vid -{{% notice note %}} - -Experimental and available only on master builds. See: https://github.com/mudler/LocalAI/pull/1442 - -{{% /notice %}} - ```yaml name: txt2vid parameters: From c37420cd35300f368519cc56ac7545cb587eb782 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 18:23:22 +0100 Subject: [PATCH 09/17] fixup --- pkg/assets/list.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 pkg/assets/list.go diff --git a/pkg/assets/list.go b/pkg/assets/list.go new file mode 100644 index 000000000000..7b705b49a95e --- /dev/null +++ b/pkg/assets/list.go @@ -0,0 +1,22 @@ +package assets + +import ( + "embed" + "io/fs" +) + +func ListFiles(content embed.FS) (files []string) { + fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + if d.IsDir() { + return nil + } + + files = append(files, path) + return nil + }) + return +} From f104f39732f4ed74df36b895732b427fb4bdeba7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 18:24:25 +0100 Subject: [PATCH 10/17] Drop code dups --- pkg/gallery/models.go | 55 ------------------------------------------- 1 file changed, 55 deletions(-) diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index ba9146e05334..db9112794b34 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -1,13 +1,9 @@ package gallery import ( - "crypto/sha256" "fmt" - "hash" - "io" "os" "path/filepath" - "strconv" "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/utils" @@ -184,54 +180,3 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides return nil } - -type progressWriter struct { - fileName string - total int64 - written int64 - downloadStatus func(string, string, string, float64) - hash hash.Hash -} - -func (pw *progressWriter) Write(p []byte) (n int, err error) { - n, err = pw.hash.Write(p) - pw.written += int64(n) - - if pw.total > 0 { - percentage := float64(pw.written) / float64(pw.total) * 100 - //log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage) - pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage) - } else { - pw.downloadStatus(pw.fileName, formatBytes(pw.written), "", 0) - } - - return -} - -func formatBytes(bytes int64) string { - const unit = 1024 - if bytes < unit { - return strconv.FormatInt(bytes, 10) + " B" - } - div, exp := int64(unit), 0 - for n := bytes / unit; n >= unit; n /= unit { - div *= unit - exp++ - } - return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp]) -} - -func calculateSHA(filePath string) (string, error) { - file, err := os.Open(filePath) - if err != nil { - return "", err - } - defer file.Close() - - hash := sha256.New() - if _, err := io.Copy(hash, file); err != nil { - return "", err - } - - return fmt.Sprintf("%x", hash.Sum(nil)), nil -} From 3c58089e5943a7e59f1e9bfa6b3767c2a4358bd4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 18:29:20 +0100 Subject: [PATCH 11/17] Minor fixups --- api/api_test.go | 4 ++-- api/config/config.go | 2 +- pkg/gallery/gallery.go | 6 +++--- pkg/gallery/models.go | 2 +- pkg/startup/model_preload.go | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/api/api_test.go b/api/api_test.go index a71b450ada7d..491a56b5a2ea 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -16,9 +16,9 @@ import ( . "github.com/go-skynet/LocalAI/api" "github.com/go-skynet/LocalAI/api/options" "github.com/go-skynet/LocalAI/metrics" + "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" "github.com/gofiber/fiber/v2" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -61,7 +61,7 @@ func getModelStatus(url string) (response map[string]interface{}) { } func getModels(url string) (response []gallery.GalleryModel) { - utils.GetURI(url, func(url string, i []byte) error { + downloader.GetURI(url, func(url string, i []byte) error { // Unmarshal YAML data into a struct return json.Unmarshal(i, &response) }) diff --git a/api/config/config.go b/api/config/config.go index 0b49b039b2f5..6aeb48d1d34c 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -307,7 +307,7 @@ func (cm *ConfigLoader) Preload(modelPath string) error { } modelURL := config.PredictionOptions.Model - modelURL = utils.ConvertURL(modelURL) + modelURL = downloader.ConvertURL(modelURL) if downloader.LooksLikeURL(modelURL) { // md5 of model name diff --git a/pkg/gallery/gallery.go b/pkg/gallery/gallery.go index 7957ed59d638..c45758175caf 100644 --- a/pkg/gallery/gallery.go +++ b/pkg/gallery/gallery.go @@ -6,7 +6,7 @@ import ( "path/filepath" "strings" - "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/imdario/mergo" "github.com/rs/zerolog/log" "gopkg.in/yaml.v2" @@ -140,7 +140,7 @@ func AvailableGalleryModels(galleries []Gallery, basePath string) ([]*GalleryMod func findGalleryURLFromReferenceURL(url string) (string, error) { var refFile string - err := utils.GetURI(url, func(url string, d []byte) error { + err := downloader.GetURI(url, func(url string, d []byte) error { refFile = string(d) if len(refFile) == 0 { return fmt.Errorf("invalid reference file at url %s: %s", url, d) @@ -163,7 +163,7 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error) } } - err := utils.GetURI(gallery.URL, func(url string, d []byte) error { + err := downloader.GetURI(gallery.URL, func(url string, d []byte) error { return yaml.Unmarshal(d, &models) }) if err != nil { diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index db9112794b34..65d0401f2bf4 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -63,7 +63,7 @@ type PromptTemplate struct { func GetGalleryConfigFromURL(url string) (Config, error) { var config Config - err := utils.GetURI(url, func(url string, d []byte) error { + err := downloader.GetURI(url, func(url string, d []byte) error { return yaml.Unmarshal(d, &config) }) if err != nil { diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index ceec9deede2c..c23b7b410cbc 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -5,7 +5,7 @@ import ( "os" "path/filepath" - "github.com/go-skynet/LocalAI/core/embedded" + "github.com/go-skynet/LocalAI/embedded" "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/utils" "github.com/rs/zerolog/log" From 6c651766f05e2740d4a901c153ecac300385a682 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 18:52:11 +0100 Subject: [PATCH 12/17] Apply suggestions from code review Signed-off-by: Ettore Di Giacinto --- embedded/models/mistral-openorca.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/embedded/models/mistral-openorca.yaml b/embedded/models/mistral-openorca.yaml index abde9e8b49d3..66a42ab1bd9a 100644 --- a/embedded/models/mistral-openorca.yaml +++ b/embedded/models/mistral-openorca.yaml @@ -21,4 +21,3 @@ context_size: 4096 f16: true stopwords: - <|im_end|> -threads: 4 From 3b5fa1a036b423581869c6eedd884d2b40a951ab Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 19:23:46 +0100 Subject: [PATCH 13/17] ci: try to cache gRPC build during tests Signed-off-by: Ettore Di Giacinto --- .github/workflows/test.yml | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 89c6c512df28..6753071739ac 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -86,11 +86,23 @@ jobs: sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \ # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn) GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build - + - name: Cache grpc + id: cache-grpc + uses: actions/cache@v3 + with: + path: grpc + key: ${{ runner.os }}-grpc + - name: Build grpc + if: steps.cache-grpc.outputs.cache-hit != 'true' + run: | + git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ + cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ + -DgRPC_BUILD_TESTS=OFF \ + ../.. && sudo make -j12 + - name: Install gRPC + run: | git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ - cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ - -DgRPC_BUILD_TESTS=OFF \ - ../.. && sudo make -j12 install + cd grpc && cd cmake/build && sudo make -j12 install - name: Test run: | GO_TAGS="stablediffusion tts" make test From 9c35ff83382b9701b703eb534b2c6ec8984fdebf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 19:30:12 +0100 Subject: [PATCH 14/17] ci: do not build all images for tests, just necessary --- .github/workflows/image-pr.yml | 86 ++++++++++++++++++++++++++++++++++ .github/workflows/image.yml | 1 - 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/image-pr.yml diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml new file mode 100644 index 000000000000..c95608c73b8b --- /dev/null +++ b/.github/workflows/image-pr.yml @@ -0,0 +1,86 @@ +--- +name: 'build container images tests' + +on: + pull_request: + +concurrency: + group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }} + cancel-in-progress: true + +jobs: + extras-image-build: + uses: ./.github/workflows/image_build.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + ffmpeg: ${{ matrix.ffmpeg }} + image-type: ${{ matrix.image-type }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + runs-on: ${{ matrix.runs-on }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + # Pushing with all jobs in parallel + # eats the bandwidth of all the nodes + max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }} + matrix: + include: + - build-type: '' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-ffmpeg' + ffmpeg: 'true' + image-type: 'extras' + runs-on: 'arc-runner-set' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "1" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-cublas-cuda12-ffmpeg' + ffmpeg: 'true' + image-type: 'extras' + runs-on: 'arc-runner-set' + core-image-build: + uses: ./.github/workflows/image_build.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + ffmpeg: ${{ matrix.ffmpeg }} + image-type: ${{ matrix.image-type }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + runs-on: ${{ matrix.runs-on }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + matrix: + include: + - build-type: '' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + runs-on: 'ubuntu-latest' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "1" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-cublas-cuda12-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + runs-on: 'ubuntu-latest' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 5fe86590c07a..ad13ce0599fc 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -2,7 +2,6 @@ name: 'build container images' on: - pull_request: push: branches: - master From 21dc2499139d0afc8eb16ce41a54907bd3ca4ce7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 19:32:23 +0100 Subject: [PATCH 15/17] ci: cache gRPC also in release pipeline --- .github/workflows/release.yaml | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index e5fd84c425b7..929cabf3f33a 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -34,10 +34,23 @@ jobs: sudo apt-get update sudo apt-get install build-essential ffmpeg + - name: Cache grpc + id: cache-grpc + uses: actions/cache@v3 + with: + path: grpc + key: ${{ runner.os }}-grpc + - name: Build grpc + if: steps.cache-grpc.outputs.cache-hit != 'true' + run: | + git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ + cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ + -DgRPC_BUILD_TESTS=OFF \ + ../.. && sudo make -j12 + - name: Install gRPC + run: | git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ - cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ - -DgRPC_BUILD_TESTS=OFF \ - ../.. && sudo make -j12 install + cd grpc && cd cmake/build && sudo make -j12 install - name: Build id: build From 315de23cf19031ec78fd25594f3b69c96e7985d8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 19:50:03 +0100 Subject: [PATCH 16/17] fixes --- .github/workflows/release.yaml | 1 - .github/workflows/test.yml | 1 - docs/content/features/GPU-acceleration.md | 12 ++++++++++-- docs/content/getting_started/_index.en.md | 20 ++++++++++++++++++++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 929cabf3f33a..6c66138ca333 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -49,7 +49,6 @@ jobs: ../.. && sudo make -j12 - name: Install gRPC run: | - git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ cd grpc && cd cmake/build && sudo make -j12 install - name: Build diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6753071739ac..2a2cc6c89ff0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -101,7 +101,6 @@ jobs: ../.. && sudo make -j12 - name: Install gRPC run: | - git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ cd grpc && cd cmake/build && sudo make -j12 install - name: Test run: | diff --git a/docs/content/features/GPU-acceleration.md b/docs/content/features/GPU-acceleration.md index 1e8b90cf2f6b..5a6ebe41a667 100644 --- a/docs/content/features/GPU-acceleration.md +++ b/docs/content/features/GPU-acceleration.md @@ -15,11 +15,19 @@ This section contains instruction on how to use LocalAI with GPU acceleration. For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "build/#acceleration" %}}) {{% /notice %}} -### CUDA +### CUDA(NVIDIA) acceleration Requirement: nvidia-container-toolkit (installation instructions [1](https://www.server-world.info/en/note?os=Ubuntu_22.04&p=nvidia&f=2) [2](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)) -To use CUDA, use the images with the `cublas` tag. +To check what CUDA version do you need, you can either run `nvidia-smi` or `nvcc --version`. + +Alternatively, you can also check nvidia-smi with docker: + +``` +docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi +``` + +To use CUDA, use the images with the `cublas` tag, for example. The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags): diff --git a/docs/content/getting_started/_index.en.md b/docs/content/getting_started/_index.en.md index ebd10541bcea..5e085dfa7404 100644 --- a/docs/content/getting_started/_index.en.md +++ b/docs/content/getting_started/_index.en.md @@ -131,8 +131,12 @@ Note: this feature currently is available only on master builds. {{% /notice %}} +You can run `local-ai` directly with a model name, and it will download the model and start the API with the model loaded. + #### CPU-only +> You can use these images which are lighter and do not have Nvidia dependencies + | Model | Docker command | | --- | --- | | phi2 | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core phi-2``` | @@ -141,6 +145,10 @@ Note: this feature currently is available only on master builds. #### GPU (CUDA 11) +For accellerated images with Nvidia and CUDA11, use the following images. + +> If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` + | Model | Docker command | | --- | --- | | phi-2 | ```docker run -p 8080:8080 --gpus all -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core phi-2``` | @@ -149,6 +157,8 @@ Note: this feature currently is available only on master builds. #### GPU (CUDA 12) +> If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` + | Model | Docker command | | --- | --- | | phi-2 | ```docker run -p 8080:8080 -ti --gpus all --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core phi-2``` | @@ -194,6 +204,11 @@ Core Images - Smaller images without predownload python dependencies {{% /tab %}} {{% tab name="GPU Images CUDA 11" %}} + +Images with Nvidia accelleration support + +> If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` + - `master-cublas-cuda11` - `master-cublas-cuda11-core` - `{{< version >}}-cublas-cuda11` @@ -205,6 +220,11 @@ Core Images - Smaller images without predownload python dependencies {{% /tab %}} {{% tab name="GPU Images CUDA 12" %}} + +Images with Nvidia accelleration support + +> If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` + - `master-cublas-cuda12` - `master-cublas-cuda12-core` - `{{< version >}}-cublas-cuda12` From 9f9c5ce58bb4494b7a96165e14b5c09d96dd76b4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 5 Jan 2024 22:01:07 +0100 Subject: [PATCH 17/17] Update model_preload_test.go Signed-off-by: Ettore Di Giacinto --- pkg/startup/model_preload_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/startup/model_preload_test.go b/pkg/startup/model_preload_test.go index 9d11083497ad..d1e0eab31ae0 100644 --- a/pkg/startup/model_preload_test.go +++ b/pkg/startup/model_preload_test.go @@ -5,7 +5,7 @@ import ( "os" "path/filepath" - . "github.com/go-skynet/LocalAI/core/startup" + . "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" . "github.com/onsi/ginkgo/v2"