From 20011d53c40fb2bab8b38aadf11e4a50645a0472 Mon Sep 17 00:00:00 2001 From: jakmro Date: Tue, 25 Feb 2025 12:54:11 +0100 Subject: [PATCH 1/2] docs: Add LLM messageHistory --- docs/docs/hookless-api/LLMModule.md | 22 ++++--- docs/docs/llms/{running-llms.md => useLLM.md} | 59 ++++++++++++++++++- 2 files changed, 71 insertions(+), 10 deletions(-) rename docs/docs/llms/{running-llms.md => useLLM.md} (86%) diff --git a/docs/docs/hookless-api/LLMModule.md b/docs/docs/hookless-api/LLMModule.md index 7656db14be..bd36239f2d 100644 --- a/docs/docs/hookless-api/LLMModule.md +++ b/docs/docs/hookless-api/LLMModule.md @@ -39,20 +39,25 @@ LLMModule.delete(); ### Methods -| Method | Type | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ | -| `load` | `(modelSource: ResourceSource, tokenizerSource: ResourceSource, systemPrompt?: string, contextWindowLength?: number): Promise` | Loads the model. Checkout the [loading the model](#loading-the-model) section for details. | -| `onDownloadProgress` | `(callback: (downloadProgress: number) => void): any` | Subscribe to the download progress event. | -| `generate` | `(input: string): Promise` | Method to start generating a response with the given input string. | -| `onToken` | (callback: (data: string | undefined) => void): any | Subscribe to the token generation event. | -| `interrupt` | `(): void` | Method to interrupt the current inference | -| `delete` | `(): void` | Method to delete the model from memory. | +| Method | Type | Description | +| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ | +| `load` | `LLMModule.load(modelSource: ResourceSource, tokenizerSource: ResourceSource, systemPrompt?: string, messageHistory?: MessageType[], contextWindowLength?: number): Promise` | Loads the model. Checkout the [loading the model](#loading-the-model) section for details. | +| `onDownloadProgress` | `(callback: (downloadProgress: number) => void): any` | Subscribe to the download progress event. | +| `generate` | `(input: string): Promise` | Method to start generating a response with the given input string. | +| `onToken` | (callback: (data: string | undefined) => void): any | Subscribe to the token generation event. | +| `interrupt` | `(): void` | Method to interrupt the current inference | +| `delete` | `(): void` | Method to delete the model from memory. |
Type definitions ```typescript type ResourceSource = string | number; + +interface MessageType { + role: 'user' | 'assistant'; + content: string; +} ```
@@ -64,6 +69,7 @@ To load the model, use the `load` method. It accepts: - `modelSource` - A string that specifies the location of the model binary. For more information, take a look at [loading models](../fundamentals/loading-models.md) page. - `tokenizerSource` - URL to the binary file which contains the tokenizer - `systemPrompt` - Often used to tell the model what is its purpose, for example - "Be a helpful translator" +- `messageHistory` - An array of `MessageType` objects that represent the conversation history. - `contextWindowLength` - The number of messages from the current conversation that the model will use to generate a response. The higher the number, the more context the model will have. Keep in mind that using larger context windows will result in longer inference time and higher memory usage. This method returns a promise, which can resolve to an error or void. diff --git a/docs/docs/llms/running-llms.md b/docs/docs/llms/useLLM.md similarity index 86% rename from docs/docs/llms/running-llms.md rename to docs/docs/llms/useLLM.md index 36016f2c50..63d86f5218 100644 --- a/docs/docs/llms/running-llms.md +++ b/docs/docs/llms/useLLM.md @@ -1,5 +1,5 @@ --- -title: Running LLMs +title: useLLM sidebar_position: 1 --- @@ -16,13 +16,62 @@ In order to load a model into the app, you need to run the following code: ```typescript import { useLLM, LLAMA3_2_1B } from 'react-native-executorch'; +const messageHistory = [ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi, how can I help you?' }, +]; + const llama = useLLM({ modelSource: LLAMA3_2_1B, tokenizerSource: require('../assets/tokenizer.bin'), + systemPrompt: 'Be a helpful assistant', + messageHistory: messageHistory, contextWindowLength: 3, }); ``` +
+Type definitions + +```typescript +const useLLM: ({ + modelSource, + tokenizerSource, + systemPrompt, + messageHistory, + contextWindowLength, +}: { + modelSource: ResourceSource; + tokenizerSource: ResourceSource; + systemPrompt?: string; + messageHistory?: MessageType[]; + contextWindowLength?: number; +}) => Model; + +interface Model { + generate: (input: string) => Promise; + response: string; + downloadProgress: number; + error: string | null; + isModelGenerating: boolean; + isGenerating: boolean; + isModelReady: boolean; + isReady: boolean; + interrupt: () => void; +} + +type ResourceSource = string | number; + +interface MessageType { + role: 'user' | 'assistant'; + content: string; +} +``` + +
+ +
+ The code snippet above fetches the model from the specified URL, loads it into memory, and returns an object with various methods and properties for controlling the model. You can monitor the loading progress by checking the `llama.downloadProgress` and `llama.isReady` property, and if anything goes wrong, the `llama.error` property will contain the error message. :::danger @@ -39,9 +88,15 @@ Given computational constraints, our architecture is designed to support only on **`tokenizerSource`** - URL to the binary file which contains the tokenizer +**`systemPrompt`** - Often used to tell the model what is its purpose, for example - "Be a helpful translator" + +**`messageHistory`** - An array of `MessageType` objects that represent the conversation history. + **`contextWindowLength`** - The number of messages from the current conversation that the model will use to generate a response. The higher the number, the more context the model will have. Keep in mind that using larger context windows will result in longer inference time and higher memory usage. -**`systemPrompt`** - Often used to tell the model what is its purpose, for example - "Be a helpful translator" +:::note +Make sure that the reference to the `messageHistory` array is stable. Depending on your use case, you might use `useState` or `useRef` to store the message history. +::: ### Returns From 77cefb3fc15b9217a21ee5f1b97370374c2a35da Mon Sep 17 00:00:00 2001 From: jakmro Date: Wed, 26 Feb 2025 09:56:25 +0100 Subject: [PATCH 2/2] Add suggested changes --- docs/docs/hookless-api/LLMModule.md | 2 +- docs/docs/llms/useLLM.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/hookless-api/LLMModule.md b/docs/docs/hookless-api/LLMModule.md index bd36239f2d..d52e2e0376 100644 --- a/docs/docs/hookless-api/LLMModule.md +++ b/docs/docs/hookless-api/LLMModule.md @@ -69,7 +69,7 @@ To load the model, use the `load` method. It accepts: - `modelSource` - A string that specifies the location of the model binary. For more information, take a look at [loading models](../fundamentals/loading-models.md) page. - `tokenizerSource` - URL to the binary file which contains the tokenizer - `systemPrompt` - Often used to tell the model what is its purpose, for example - "Be a helpful translator" -- `messageHistory` - An array of `MessageType` objects that represent the conversation history. +- `messageHistory` - An array of `MessageType` objects that represent the conversation history. This can be used to provide context to the model. - `contextWindowLength` - The number of messages from the current conversation that the model will use to generate a response. The higher the number, the more context the model will have. Keep in mind that using larger context windows will result in longer inference time and higher memory usage. This method returns a promise, which can resolve to an error or void. diff --git a/docs/docs/llms/useLLM.md b/docs/docs/llms/useLLM.md index 63d86f5218..cb32bff784 100644 --- a/docs/docs/llms/useLLM.md +++ b/docs/docs/llms/useLLM.md @@ -90,7 +90,7 @@ Given computational constraints, our architecture is designed to support only on **`systemPrompt`** - Often used to tell the model what is its purpose, for example - "Be a helpful translator" -**`messageHistory`** - An array of `MessageType` objects that represent the conversation history. +**`messageHistory`** - An array of `MessageType` objects that represent the conversation history. This can be used to provide context to the model. **`contextWindowLength`** - The number of messages from the current conversation that the model will use to generate a response. The higher the number, the more context the model will have. Keep in mind that using larger context windows will result in longer inference time and higher memory usage.