From 718d568c6650f7658a96d7271862bd11db04571c Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Thu, 9 Nov 2023 04:13:51 -0800 Subject: [PATCH 1/5] Use llama-tokenizer-js to count tokens --- app/page.js | 11 ++++------- app/src/tokenizer.js | 5 +++++ package-lock.json | 11 +++++++++++ package.json | 1 + 4 files changed, 21 insertions(+), 7 deletions(-) create mode 100644 app/src/tokenizer.js diff --git a/app/page.js b/app/page.js index 1d6db1c..6429449 100644 --- a/app/page.js +++ b/app/page.js @@ -8,10 +8,7 @@ import EmptyState from "./components/EmptyState"; import { Cog6ToothIcon, CodeBracketIcon } from "@heroicons/react/20/solid"; import { useCompletion } from "ai/react"; import { Toaster, toast } from "react-hot-toast"; - -function approximateTokenCount(text) { - return Math.ceil(text.length * 0.4); -} +import { countTokens } from "./src/tokenizer.js"; const VERSIONS = [ { @@ -177,7 +174,7 @@ export default function HomePage() { // Generate initial prompt and calculate tokens let prompt = `${generatePrompt(messageHistory)}\n`; // Check if we exceed max tokens and truncate the message history if so. - while (approximateTokenCount(prompt) > MAX_TOKENS) { + while (countTokens(prompt) > MAX_TOKENS) { if (messageHistory.length < 3) { setError( "Your message is too long. Please try again with a shorter message." @@ -224,8 +221,8 @@ export default function HomePage() { {size.shortened == "Llava" ? "🌋" : size.shortened == "Salmonn" - ? "🐟" - : "🦙"}{" "} + ? "🐟" + : "🦙"}{" "} Chat with{" "}
@@ -309,9 +309,9 @@ export default function HomePage() { setMaxTokens={setMaxTokens} topP={topP} setTopP={setTopP} - versions={VERSIONS} - size={size} - setSize={setSize} + models={MODELS} + size={model} + setSize={setModel} /> {image && ( From a516fc6d06e42965d6a74a00ba382b2af9d36adf Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Wed, 6 Dec 2023 10:52:55 -0800 Subject: [PATCH 5/5] Use official model endpoints for llamas --- app/api/route.js | 14 +++----------- app/components/SlideOver.js | 4 ++-- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/app/api/route.js b/app/api/route.js index e06e2e4..e8a7890 100644 --- a/app/api/route.js +++ b/app/api/route.js @@ -14,9 +14,6 @@ if (!process.env.REPLICATE_API_TOKEN) { } const VERSIONS = { - "meta/llama-2-7b-chat": "13c3cdee13ee059ab779f0291d29054dab00a47dad8261375654de5540165fb0", - "meta/llama-2-13b-chat": "f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d", - "meta/llama-2-70b-chat": "02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", "yorickvp/llava-13b": "e272157381e2a3bf12df3a8edd1f38d1dbd736bbb7437277c8b34175f8fce358", "nateraw/salmonn": "ad1d3f9d2bd683628242b68d890bef7f7bd97f738a7c2ccbf1743a594c723d83", }; @@ -46,8 +43,9 @@ async function runLlama({ }) { console.log("running llama"); - return await replicate.predictions.create({ - // IMPORTANT! You must enable streaming. + const [owner, name] = model.split("/"); + + return await replicate.models.predictions.create(owner, name, { stream: true, input: { prompt: `${prompt}`, @@ -57,8 +55,6 @@ async function runLlama({ repetition_penalty: 1, top_p: topP, }, - // IMPORTANT! The model must support streaming. See https://replicate.com/docs/streaming - version: VERSIONS[model], }); } @@ -66,7 +62,6 @@ async function runLlava({ prompt, maxTokens, temperature, topP, image }) { console.log("running llava"); return await replicate.predictions.create({ - // IMPORTANT! You must enable streaming. stream: true, input: { prompt: `${prompt}`, @@ -75,7 +70,6 @@ async function runLlava({ prompt, maxTokens, temperature, topP, image }) { max_tokens: maxTokens, image: image, }, - // IMPORTANT! The model must support streaming. See https://replicate.com/docs/streaming version: models["yorickvp/llava-13b"] }); } @@ -84,7 +78,6 @@ async function runSalmonn({ prompt, maxTokens, temperature, topP, audio }) { console.log("running salmonn"); return await replicate.predictions.create({ - // IMPORTANT! You must enable streaming. stream: true, input: { prompt: `${prompt}`, @@ -93,7 +86,6 @@ async function runSalmonn({ prompt, maxTokens, temperature, topP, audio }) { max_length: maxTokens, wav_path: audio, }, - // IMPORTANT! The model must support streaming. See https://replicate.com/docs/streaming version: models["nateraw/salmonn"] }); } diff --git a/app/components/SlideOver.js b/app/components/SlideOver.js index 60b7f34..e46d778 100644 --- a/app/components/SlideOver.js +++ b/app/components/SlideOver.js @@ -125,8 +125,8 @@ export default function SlideOver({ <> {model.name}