Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/bump_deps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ jobs:
- repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION"
branch: "main"
- repository: "go-skynet/gpt4all"
variable: "GPT4ALL_VERSION"
branch: "main"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand Down
54 changes: 31 additions & 23 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ GOVET=$(GOCMD) vet
BINARY_NAME=local-ai

GOLLAMA_VERSION?=c03e8adbc45c866e0f6d876af1887d6b01d57eb4
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
GPT4ALL_VERSION?=3657f9417e17edf378c27d0a9274a1bf41caa914
GOGPT2_VERSION?=abf038a7d8efa4eefdc7c891f05ad33d4e59e49d
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
Expand All @@ -19,8 +20,8 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)

C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz

# Use this if you want to set the default behavior
ifndef BUILD_TYPE
Expand All @@ -37,19 +38,26 @@ endif

all: help

## GPT4ALL-J
go-gpt4all-j:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1
## GPT4ALL
gpt4all:
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h

## BERT embeddings
go-bert:
Expand Down Expand Up @@ -85,8 +93,8 @@ bloomz/libbloomz.a: bloomz
go-bert/libgobert.a: go-bert
$(MAKE) -C go-bert libgobert.a

go-gpt4all-j/libgptj.a: go-gpt4all-j
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ $(GENERIC_PREFIX)libgpt4all.a

## CEREBRAS GPT
go-gpt2:
Expand Down Expand Up @@ -119,32 +127,32 @@ go-llama/libbinding.a: go-llama

replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
$(GOCMD) mod edit -replace github.com/nomic/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz

prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp go-bert bloomz
prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz replace
$(GOCMD) mod download

## GENERIC
rebuild: ## Rebuilds the project
$(MAKE) -C go-llama clean
$(MAKE) -C go-gpt4all-j clean
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C go-gpt2 clean
$(MAKE) -C go-rwkv clean
$(MAKE) -C whisper.cpp clean
$(MAKE) -C go-bert clean
$(MAKE) -C bloomz clean
$(MAKE) build

prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a replace ## Prepares for building
prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building

clean: ## Remove build related file
rm -fr ./go-llama
rm -rf ./go-gpt4all-j
rm -rf ./gpt4all
rm -rf ./go-gpt2
rm -rf ./go-rwkv
rm -rf ./go-bert
Expand All @@ -156,7 +164,7 @@ clean: ## Remove build related file
build: prepare ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -o $(BINARY_NAME) ./

generic-build: ## Build the project using generic
BUILD_TYPE="generic" $(MAKE) build
Expand Down
2 changes: 1 addition & 1 deletion api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ var _ = Describe("API test", func() {
It("returns errors", func() {
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 9 errors occurred:"))
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 10 errors occurred:"))
})

})
Expand Down
34 changes: 20 additions & 14 deletions api/prediction.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ import (
"github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
)

// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
Expand Down Expand Up @@ -315,29 +315,35 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
predictOptions...,
)
}
case *gptj.GPTJ:
case *gpt4all.Model:
supportStreams = true

fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gptj.PredictOption{
gptj.SetTemperature(c.Temperature),
gptj.SetTopP(c.TopP),
gptj.SetTopK(c.TopK),
gptj.SetTokens(c.Maxtokens),
gptj.SetThreads(c.Threads),
if tokenCallback != nil {
model.SetTokenCallback(tokenCallback)
}

if c.Batch != 0 {
predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
// Generate the prediction using the language model
predictOptions := []gpt4all.PredictOption{
gpt4all.SetTemperature(c.Temperature),
gpt4all.SetTopP(c.TopP),
gpt4all.SetTopK(c.TopK),
gpt4all.SetTokens(c.Maxtokens),
}

if c.Seed != 0 {
predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
}

return model.Predict(
str, er := model.Predict(
s,
predictOptions...,
)
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
// after a stream event has occurred
model.SetTokenCallback(nil)
return str, er
}
case *llama.LLama:
supportStreams = true
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.18 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/nomic/gpt4all/gpt4all-bindings/golang v0.0.0-00010101000000-000000000000 // indirect
github.com/philhofer/fwd v1.1.2 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
Expand Down
46 changes: 33 additions & 13 deletions pkg/model/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ import (
bloomz "github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/hashicorp/go-multierror"
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
"github.com/rs/zerolog/log"
)

Expand All @@ -25,7 +25,7 @@ type ModelLoader struct {
mu sync.Mutex
// TODO: this needs generics
models map[string]*llama.LLama
gptmodels map[string]*gptj.GPTJ
gptmodels map[string]*gpt4all.Model
gpt2models map[string]*gpt2.GPT2
gptstablelmmodels map[string]*gpt2.StableLM
dollymodels map[string]*gpt2.Dolly
Expand All @@ -41,7 +41,7 @@ func NewModelLoader(modelPath string) *ModelLoader {
return &ModelLoader{
ModelPath: modelPath,
gpt2models: make(map[string]*gpt2.GPT2),
gptmodels: make(map[string]*gptj.GPTJ),
gptmodels: make(map[string]*gpt4all.Model),
gptstablelmmodels: make(map[string]*gpt2.StableLM),
dollymodels: make(map[string]*gpt2.Dolly),
redpajama: make(map[string]*gpt2.RedPajama),
Expand Down Expand Up @@ -326,7 +326,7 @@ func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
return model, err
}

func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
func (ml *ModelLoader) LoadGPT4AllModel(modelName string, opts ...gpt4all.ModelOption) (*gpt4all.Model, error) {
ml.mu.Lock()
defer ml.mu.Unlock()

Expand All @@ -344,7 +344,7 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

model, err := gptj.New(modelFile)
model, err := gpt4all.New(modelFile, opts...)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -441,8 +441,12 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
return ml.LoadRedPajama(modelFile)
case "gpt2":
return ml.LoadGPT2Model(modelFile)
case "gptj":
return ml.LoadGPTJModel(modelFile)
case "gpt4all-llama":
return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType))
case "gpt4all-mpt":
return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType))
case "gpt4all-j":
return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType))
case "bert-embeddings":
return ml.LoadBERT(modelFile)
case "rwkv":
Expand Down Expand Up @@ -475,53 +479,69 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt
err = multierror.Append(err, modelerr)
}

model, modelerr = ml.LoadGPTJModel(modelFile)
model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType))
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}

model, modelerr = ml.LoadGPT2Model(modelFile)
model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType))
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}

model, modelerr = ml.LoadStableLMModel(modelFile)
model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType))
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}

model, modelerr = ml.LoadDollyModel(modelFile)
model, modelerr = ml.LoadGPT2Model(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}

model, modelerr = ml.LoadRedPajama(modelFile)
model, modelerr = ml.LoadStableLMModel(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}

model, modelerr = ml.LoadBloomz(modelFile)
model, modelerr = ml.LoadDollyModel(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}

model, modelerr = ml.LoadRedPajama(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
// Do not autoload bloomz
//model, modelerr = ml.LoadBloomz(modelFile)
//if modelerr == nil {
// updateModels(model)
// return model, nil
//} else {
// err = multierror.Append(err, modelerr)
//}

model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
if modelerr == nil {
Expand Down