software-mansion · jakmro · Feb 26, 2025 · Feb 25, 2025 · Feb 26, 2025
diff --git a/docs/docs/hookless-api/LLMModule.md b/docs/docs/hookless-api/LLMModule.md
@@ -39,20 +39,25 @@ LLMModule.delete();
 
 ### Methods
 
-| Method               | Type                                                                                                                                 | Description                                                                                |
-| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ |
-| `load`               | `(modelSource: ResourceSource, tokenizerSource: ResourceSource, systemPrompt?: string, contextWindowLength?: number): Promise<void>` | Loads the model. Checkout the [loading the model](#loading-the-model) section for details. |
-| `onDownloadProgress` | `(callback: (downloadProgress: number) => void): any`                                                                                | Subscribe to the download progress event.                                                  |
-| `generate`           | `(input: string): Promise<void>`                                                                                                     | Method to start generating a response with the given input string.                         |
-| `onToken`            | <code>(callback: (data: string &#124; undefined) => void): any</code>                                                                | Subscribe to the token generation event.                                                   |
-| `interrupt`          | `(): void`                                                                                                                           | Method to interrupt the current inference                                                  |
-| `delete`             | `(): void`                                                                                                                           | Method to delete the model from memory.                                                    |
+| Method               | Type                                                                                                                                                                               | Description                                                                                |
+| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ |
+| `load`               | `LLMModule.load(modelSource: ResourceSource, tokenizerSource: ResourceSource, systemPrompt?: string, messageHistory?: MessageType[], contextWindowLength?: number): Promise<void>` | Loads the model. Checkout the [loading the model](#loading-the-model) section for details. |
+| `onDownloadProgress` | `(callback: (downloadProgress: number) => void): any`                                                                                                                              | Subscribe to the download progress event.                                                  |
+| `generate`           | `(input: string): Promise<void>`                                                                                                                                                   | Method to start generating a response with the given input string.                         |
+| `onToken`            | <code>(callback: (data: string &#124; undefined) => void): any</code>                                                                                                              | Subscribe to the token generation event.                                                   |
+| `interrupt`          | `(): void`                                                                                                                                                                         | Method to interrupt the current inference                                                  |
+| `delete`             | `(): void`                                                                                                                                                                         | Method to delete the model from memory.                                                    |
 
 <details>
 <summary>Type definitions</summary>
 
 ```typescript
 type ResourceSource = string | number;
+
+interface MessageType {
+  role: 'user' | 'assistant';
+  content: string;
+}
 ```
 
 </details>
@@ -64,6 +69,7 @@ To load the model, use the `load` method. It accepts:
 - `modelSource` - A string that specifies the location of the model binary. For more information, take a look at [loading models](../fundamentals/loading-models.md) page.
 - `tokenizerSource` - URL to the binary file which contains the tokenizer
 - `systemPrompt` - Often used to tell the model what is its purpose, for example - "Be a helpful translator"
+- `messageHistory` - An array of `MessageType` objects that represent the conversation history. This can be used to provide context to the model.
 - `contextWindowLength` - The number of messages from the current conversation that the model will use to generate a response. The higher the number, the more context the model will have. Keep in mind that using larger context windows will result in longer inference time and higher memory usage.
 
 This method returns a promise, which can resolve to an error or void.

diff --git a/docs/docs/llms/running-llms.md → docs/docs/llms/useLLM.md b/docs/docs/llms/running-llms.md → docs/docs/llms/useLLM.md
@@ -1,5 +1,5 @@
 ---
-title: Running LLMs
+title: useLLM
 sidebar_position: 1
 ---
 
@@ -16,13 +16,62 @@ In order to load a model into the app, you need to run the following code:
 ```typescript
 import { useLLM, LLAMA3_2_1B } from 'react-native-executorch';
 
+const messageHistory = [
+  { role: 'user', content: 'Hello' },
+  { role: 'assistant', content: 'Hi, how can I help you?' },
+];
+
 const llama = useLLM({
   modelSource: LLAMA3_2_1B,
   tokenizerSource: require('../assets/tokenizer.bin'),
+  systemPrompt: 'Be a helpful assistant',
+  messageHistory: messageHistory,
   contextWindowLength: 3,
 });
 ```
 
+<details>
+<summary>Type definitions</summary>
+
+```typescript
+const useLLM: ({
+  modelSource,
+  tokenizerSource,
+  systemPrompt,
+  messageHistory,
+  contextWindowLength,
+}: {
+  modelSource: ResourceSource;
+  tokenizerSource: ResourceSource;
+  systemPrompt?: string;
+  messageHistory?: MessageType[];
+  contextWindowLength?: number;
+}) => Model;
+
+interface Model {
+  generate: (input: string) => Promise<void>;
+  response: string;
+  downloadProgress: number;
+  error: string | null;
+  isModelGenerating: boolean;
+  isGenerating: boolean;
+  isModelReady: boolean;
+  isReady: boolean;
+  interrupt: () => void;
+}
+
+type ResourceSource = string | number;
+
+interface MessageType {
+  role: 'user' | 'assistant';
+  content: string;
+}
+```
+
+</details>
+
+<br/>
+
 The code snippet above fetches the model from the specified URL, loads it into memory, and returns an object with various methods and properties for controlling the model. You can monitor the loading progress by checking the `llama.downloadProgress` and `llama.isReady` property, and if anything goes wrong, the `llama.error` property will contain the error message.
 
 :::danger
@@ -39,9 +88,15 @@ Given computational constraints, our architecture is designed to support only on
 
 **`tokenizerSource`** - URL to the binary file which contains the tokenizer
 
+**`systemPrompt`** - Often used to tell the model what is its purpose, for example - "Be a helpful translator"
+
+**`messageHistory`** - An array of `MessageType` objects that represent the conversation history. This can be used to provide context to the model.
+
 **`contextWindowLength`** - The number of messages from the current conversation that the model will use to generate a response. The higher the number, the more context the model will have. Keep in mind that using larger context windows will result in longer inference time and higher memory usage.
 
-**`systemPrompt`** - Often used to tell the model what is its purpose, for example - "Be a helpful translator"
+:::note
+Make sure that the reference to the `messageHistory` array is stable. Depending on your use case, you might use `useState` or `useRef` to store the message history.
+:::
 
 ### Returns