Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/bump_deps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ jobs:
- repository: "ggerganov/whisper.cpp"
variable: "WHISPER_CPP_VERSION"
branch: "master"
- repository: "go-skynet/go-bert.cpp"
variable: "BERT_VERSION"
branch: "master"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand Down
25 changes: 20 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58
WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993
BERT_VERSION?=ec771ec715576ac050263bb7bb74bfd616a5ba13


GREEN := $(shell tput -Txterm setaf 2)
Expand All @@ -17,8 +18,8 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)

C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert

# Use this if you want to set the default behavior
ifndef BUILD_TYPE
Expand Down Expand Up @@ -49,6 +50,14 @@ go-gpt4all-j:
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +

## BERT embeddings
go-bert:
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
@find ./go-bert -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
@find ./go-bert -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
@find ./go-bert -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +

## RWKV
go-rwkv:
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
Expand All @@ -60,6 +69,9 @@ go-rwkv:
go-rwkv/librwkv.a: go-rwkv
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a ..

go-bert/libgobert.a: go-bert
$(MAKE) -C go-bert libgobert.a

go-gpt4all-j/libgptj.a: go-gpt4all-j
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a

Expand Down Expand Up @@ -98,8 +110,9 @@ replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert

prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp
prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp go-bert
$(GOCMD) mod download

## GENERIC
Expand All @@ -109,15 +122,17 @@ rebuild: ## Rebuilds the project
$(MAKE) -C go-gpt2 clean
$(MAKE) -C go-rwkv clean
$(MAKE) -C whisper.cpp clean
$(MAKE) -C go-bert clean
$(MAKE) build

prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a replace ## Prepares for building
prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a replace ## Prepares for building

clean: ## Remove build related file
rm -fr ./go-llama
rm -rf ./go-gpt4all-j
rm -rf ./go-gpt2
rm -rf ./go-rwkv
rm -rf ./go-bert
rm -rf $(BINARY_NAME)

## Build:
Expand All @@ -141,7 +156,7 @@ test-models/testmodel:

test: prepare test-models/testmodel
cp tests/fixtures/* test-models
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./...
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api

## Help:
help: ## Show this help.
Expand Down
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

[![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy)

**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is licensed under Apache 2.0.
**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware, supporting multiple models families. Supports also GPT4ALL-J which is licensed under Apache 2.0.

- OpenAI compatible API
- Supports multiple models
Expand All @@ -19,10 +19,14 @@

LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).

LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp), [ggml](https://github.com/ggerganov/ggml), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) for audio transcriptions, and [bert.cpp](https://github.com/skeskinen/bert.cpp) for embedding.

See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).

## News

- 10-05-2023: Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 )
- 09-05-2023: Added experimental support for transcriptions endpoint ( https://github.com/go-skynet/LocalAI/pull/211 )
- 08-05-2023: Support for embeddings with models using the `llama.cpp` backend ( https://github.com/go-skynet/LocalAI/pull/207 )
- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
- 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 )
Expand Down Expand Up @@ -534,18 +538,18 @@ curl http://localhost:8080/v1/models

<details>

The embedding endpoint is experimental and enabled only if the model is configured with `emebddings: true` in its `yaml` file, for example:
The embedding endpoint is experimental and enabled only if the model is configured with `embeddings: true` in its `yaml` file, for example:

```yaml
name: text-embedding-ada-002
parameters:
model: wizardLM-7B.ggml.q5_1.bin
model: bert
embeddings: true
```

There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).

Note: embeddings is supported only with `llama.cpp` compatible models. (doesn't work with gpt4-all-j, yet).
Note: embeddings is supported only with `llama.cpp` compatible models and `bert` models. bert is more performant and available independently of the LLM model.

</details>

Expand Down Expand Up @@ -666,6 +670,7 @@ MIT
- [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
- [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
- [go-skynet/go-bert.cpp](https://github.com/go-skynet/go-bert.cpp)
- [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)

## Acknowledgements
Expand Down
2 changes: 1 addition & 1 deletion api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ var _ = Describe("API test", func() {
It("returns errors", func() {
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 5 errors occurred:"))
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 6 errors occurred:"))
})

})
Expand Down
9 changes: 9 additions & 0 deletions api/prediction.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/donomii/go-rwkv.cpp"
model "github.com/go-skynet/LocalAI/pkg/model"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
Expand Down Expand Up @@ -62,6 +63,14 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
}
return model.Embeddings(s, predictOptions...)
}
// bert embeddings
case *bert.Bert:
fn = func() ([]float32, error) {
if len(tokens) > 0 {
return nil, fmt.Errorf("embeddings endpoint for this model supports only string")
}
return model.Embeddings(s, bert.SetThreads(c.Threads))
}
default:
fn = func() ([]float32, error) {
return nil, fmt.Errorf("embeddings not supported by the backend")
Expand Down
9 changes: 3 additions & 6 deletions examples/query_data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,7 @@ Summary of the steps:

## Requirements

For this in order to work, you will need LocalAI and a model compatible with the `llama.cpp` backend. This is will not work with gpt4all, however you can mix models (use a llama.cpp one to build the index database, and gpt4all to query it).

The example uses `WizardLM` for both embeddings and Q&A. Edit the config files in `models/` accordingly to specify the model you use (change `HERE` in the configuration files).

You will also need a training data set. Copy that over `data`.
You will need a training data set. Copy that over `data`.

## Setup

Expand All @@ -28,7 +24,8 @@ git clone https://github.com/go-skynet/LocalAI

cd LocalAI/examples/query_data

# Copy your models, edit config files accordingly
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j

# start with docker-compose
docker-compose up -d --build
Expand Down
16 changes: 2 additions & 14 deletions examples/query_data/models/embeddings.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
name: text-embedding-ada-002
parameters:
model: HERE
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
model: bert
threads: 14
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
backend: bert-embeddings
embeddings: true
template:
completion: completion
chat: gpt4all
5 changes: 2 additions & 3 deletions examples/query_data/models/gpt-3.5-turbo.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
name: gpt-3.5-turbo
parameters:
model: HERE
model: ggml-gpt4all-j
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
threads: 14
embeddings: true
stopwords:
- "HUMAN:"
- "GPT:"
Expand All @@ -15,4 +14,4 @@ roles:
system: " "
template:
completion: completion
chat: wizardlm
chat: gpt4all
3 changes: 0 additions & 3 deletions examples/query_data/models/wizardlm.tmpl

This file was deleted.

2 changes: 1 addition & 1 deletion examples/query_data/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))

# Configure prompt parameters and initialise helper
max_input_size = 1024
max_input_size = 500
num_output = 256
max_chunk_overlap = 20

Expand Down
6 changes: 3 additions & 3 deletions examples/query_data/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))

# Configure prompt parameters and initialise helper
max_input_size = 512
num_output = 512
max_input_size = 400
num_output = 400
max_chunk_overlap = 30

prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

# Load documents from the 'data' directory
documents = SimpleDirectoryReader('data').load_data()
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 512)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 400)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(persist_dir="./storage")

1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35
github.com/go-audio/wav v1.1.0
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
github.com/go-skynet/go-llama.cpp v0.0.0-20230509080828-f4d26f43f1d3
Expand Down
13 changes: 2 additions & 11 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
Expand All @@ -38,14 +36,6 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
github.com/go-skynet/go-llama.cpp v0.0.0-20230508165257-c03e8adbc45c h1:JoW2+LKrSemoV32QRwrEC5f53erym96NCsUSM3wSVbM=
github.com/go-skynet/go-llama.cpp v0.0.0-20230508165257-c03e8adbc45c/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
github.com/go-skynet/go-llama.cpp v0.0.0-20230509080828-f4d26f43f1d3 h1:YNi1oetK5kGJoUgT3/r/Wj3XPOICWf3nwHsz5v89iSs=
github.com/go-skynet/go-llama.cpp v0.0.0-20230509080828-f4d26f43f1d3/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
Expand Down Expand Up @@ -197,8 +187,9 @@ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8T
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U=
gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
Expand Down
Loading