diff --git a/docs/versioned_docs/version-0.8.x/03-hooks/01-natural-language-processing/useLLM.md b/docs/versioned_docs/version-0.8.x/03-hooks/01-natural-language-processing/useLLM.md index 714404e20f..d7fd5fe80e 100644 --- a/docs/versioned_docs/version-0.8.x/03-hooks/01-natural-language-processing/useLLM.md +++ b/docs/versioned_docs/version-0.8.x/03-hooks/01-natural-language-processing/useLLM.md @@ -211,7 +211,15 @@ To configure model (i.e. change system prompt, load initial conversation history - [`temperature`](../../06-api-reference/interfaces/GenerationConfig.md#temperature) - Scales output logits by the inverse of temperature. Controls the randomness / creativity of text generation. - - [`topp`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topp. + - `topP` - Only samples from the smallest set of tokens whose cumulative probability exceeds topP. Range `[0, 1]`. Values of `0` or `1` disable top-p filtering. + + - `minP` - Minimum-probability threshold applied after softmax: tokens whose probability is below `minP * max_prob` are excluded from sampling. Range `[0, 1]`. Default `0` disables the filter. Stacks with `topP` when both are set. + + - `repetitionPenalty` - Multiplicative penalty applied to logits of tokens that already appeared in the prompt or the generated text. Values greater than `1` discourage repetition; default `1` disables the penalty. + +:::info[Built-in models ship with sampling defaults] +Model presets expose an optional [`generationConfig`](../../06-api-reference/interfaces/LLMProps.md) on the `model` prop. Whenever the upstream model card publishes recommended values (currently Qwen3 and LFM2-VL) the preset carries them and `useLLM` applies them automatically before `isReady` flips — you don't need to call `configure` just to get sensible defaults. Any fields you then pass to `configure` still override on a per-field basis. +::: ### Model configuration example @@ -282,7 +290,9 @@ useEffect(() => { outputTokenBatchSize: 15, batchTimeInterval: 100, temperature: 0.7, - topp: 0.9, + topP: 0.9, + minP: 0.05, + repetitionPenalty: 1.05, }, }); }, [configure]); @@ -491,9 +501,9 @@ Some models support multimodal input — text and images together. To use them, ### Loading a VLM ```tsx -import { useLLM, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch'; +import { useLLM, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch'; -const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED }); +const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED }); ``` The `capabilities` field is already set on the model constant. You can also construct the model object explicitly: @@ -514,7 +524,7 @@ Passing `capabilities` unlocks the typed `media` argument on `sendMessage`. ### Sending a message with an image ```tsx -const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED }); +const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED }); const send = () => { llm.sendMessage('What is in this image?', { @@ -537,7 +547,7 @@ The `imagePath` should be a local file path on the device. You can also use `generate` directly by setting `mediaPath` on user messages: ```tsx -const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED }); +const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED }); const handleGenerate = async () => { const chat: Message[] = [ diff --git a/docs/versioned_docs/version-0.8.x/04-typescript-api/01-natural-language-processing/LLMModule.md b/docs/versioned_docs/version-0.8.x/04-typescript-api/01-natural-language-processing/LLMModule.md index 3a479e8ae4..c015de5149 100644 --- a/docs/versioned_docs/version-0.8.x/04-typescript-api/01-natural-language-processing/LLMModule.md +++ b/docs/versioned_docs/version-0.8.x/04-typescript-api/01-natural-language-processing/LLMModule.md @@ -107,17 +107,25 @@ To configure model (i.e. change system prompt, load initial conversation history - [`temperature`](../../06-api-reference/interfaces/GenerationConfig.md#temperature) - Scales output logits by the inverse of temperature. Controls the randomness / creativity of text generation. - - [`topp`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topp. + - `topP` - Only samples from the smallest set of tokens whose cumulative probability exceeds topP. Range `[0, 1]`. Values of `0` or `1` disable top-p filtering. + + - `minP` - Minimum-probability threshold applied after softmax: tokens whose probability is below `minP * max_prob` are excluded from sampling. Range `[0, 1]`. Default `0` disables the filter. Stacks with `topP` when both are set. + + - `repetitionPenalty` - Multiplicative penalty applied to logits of tokens that already appeared in the prompt or the generated text. Values greater than `1` discourage repetition; default `1` disables the penalty. + +:::info[Built-in models ship with sampling defaults] +Model presets expose an optional `generationConfig` that `LLMModule.fromModelName` applies automatically when available — for Qwen3 and LFM2-VL this means the model-card recommended sampling settings are in effect without any explicit `configure` call. Any fields you pass to `configure` still override on a per-field basis. +::: ## Vision-Language Models (VLM) Some models support multimodal input — text and images together. To use them, pass `capabilities` in the model object when calling [`fromModelName`](../../06-api-reference/classes/LLMModule.md#frommodelname): ```typescript -import { LLMModule, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch'; +import { LLMModule, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch'; const llm = await LLMModule.fromModelName( - LFM2_VL_1_6B_QUANTIZED, + LFM2_5_VL_1_6B_QUANTIZED, undefined, (token) => console.log(token) ); diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useInstanceSegmentation.ts b/packages/react-native-executorch/src/hooks/computer_vision/useInstanceSegmentation.ts index 36827828d5..ab63912fae 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useInstanceSegmentation.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useInstanceSegmentation.ts @@ -15,7 +15,7 @@ import { useModuleFactory } from '../useModuleFactory'; * @param props - Configuration object containing `model` config and optional `preventLoad` flag. * @returns An object with model state (`error`, `isReady`, `isGenerating`, `downloadProgress`), a typed `forward` function, `getAvailableInputSizes` helper, and a `runOnFrame` worklet for VisionCamera integration. * @example - * ```ts + * ```tsx * const { isReady, isGenerating, forward, error, downloadProgress, getAvailableInputSizes, runOnFrame } = * useInstanceSegmentation({ * model: {