Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions apps/llm/app/multimodal_llm/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
import { launchImageLibrary } from 'react-native-image-picker';
import { useIsFocused } from '@react-navigation/native';
import { useSafeAreaInsets } from 'react-native-safe-area-context';
import { useLLM, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch';
import { useLLM, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch';
import SendIcon from '../../assets/icons/send_icon.svg';
import PauseIcon from '../../assets/icons/pause_icon.svg';
import ColorPalette from '../../colors';
Expand Down Expand Up @@ -50,7 +50,7 @@ function MultimodalLLMScreen() {
const [error, setError] = useState<string | null>(null);

const vlm = useLLM({
model: LFM2_VL_1_6B_QUANTIZED,
model: LFM2_5_VL_1_6B_QUANTIZED,
});
const tokenCount = vlm.isReady ? vlm.getGeneratedTokenCount() : 0;
const { stats, onMessageSend } = useLLMStats(
Expand Down
22 changes: 16 additions & 6 deletions docs/docs/03-hooks/01-natural-language-processing/useLLM.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,15 @@ To configure model (i.e. change system prompt, load initial conversation history

- [`temperature`](../../06-api-reference/interfaces/GenerationConfig.md#temperature) - Scales output logits by the inverse of temperature. Controls the randomness / creativity of text generation.

- [`topp`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topp.
- [`topP`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topP. Range `[0, 1]`. Values of `0` or `1` disable top-p filtering.

- [`minP`](../../06-api-reference/interfaces/GenerationConfig.md#minp) - Minimum-probability threshold applied after softmax: tokens whose probability is below `minP * max_prob` are excluded from sampling. Range `[0, 1]`. Default `0` disables the filter. Stacks with `topP` when both are set.

- [`repetitionPenalty`](../../06-api-reference/interfaces/GenerationConfig.md#repetitionpenalty) - Multiplicative penalty applied to logits of tokens that already appeared in the prompt or the generated text. Values greater than `1` discourage repetition; default `1` disables the penalty.

:::info[Built-in models ship with sampling defaults]
Model presets expose an optional [`generationConfig`](../../06-api-reference/interfaces/LLMProps.md) on the `model` prop. Whenever the upstream model card publishes recommended values (currently Qwen3 and LFM2-VL) the preset carries them and `useLLM` applies them automatically before `isReady` flips — you don't need to call `configure` just to get sensible defaults. Any fields you then pass to `configure` still override on a per-field basis.
:::

### Model configuration example

Expand Down Expand Up @@ -279,7 +287,9 @@ useEffect(() => {
outputTokenBatchSize: 15,
batchTimeInterval: 100,
temperature: 0.7,
topp: 0.9,
topP: 0.9,
minP: 0.05,
repetitionPenalty: 1.05,
},
});
}, [configure]);
Expand Down Expand Up @@ -488,9 +498,9 @@ Some models support multimodal input — text and images together. To use them,
### Loading a VLM

```tsx
import { useLLM, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch';
import { useLLM, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch';

const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED });
const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED });
```

The `capabilities` field is already set on the model constant. You can also construct the model object explicitly:
Expand All @@ -511,7 +521,7 @@ Passing `capabilities` unlocks the typed `media` argument on `sendMessage`.
### Sending a message with an image

```tsx
const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED });
const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED });

const send = () => {
llm.sendMessage('What is in this image?', {
Expand All @@ -534,7 +544,7 @@ The `imagePath` should be a local file path on the device.
You can also use `generate` directly by setting `mediaPath` on user messages:

```tsx
const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED });
const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED });

const handleGenerate = async () => {
const chat: Message[] = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,25 @@ To configure model (i.e. change system prompt, load initial conversation history

- [`temperature`](../../06-api-reference/interfaces/GenerationConfig.md#temperature) - Scales output logits by the inverse of temperature. Controls the randomness / creativity of text generation.

- [`topp`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topp.
- [`topP`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topP. Range `[0, 1]`. Values of `0` or `1` disable top-p filtering.

- [`minP`](../../06-api-reference/interfaces/GenerationConfig.md#minp) - Minimum-probability threshold applied after softmax: tokens whose probability is below `minP * max_prob` are excluded from sampling. Range `[0, 1]`. Default `0` disables the filter. Stacks with `topP` when both are set.

- [`repetitionPenalty`](../../06-api-reference/interfaces/GenerationConfig.md#repetitionpenalty) - Multiplicative penalty applied to logits of tokens that already appeared in the prompt or the generated text. Values greater than `1` discourage repetition; default `1` disables the penalty.

:::info[Built-in models ship with sampling defaults]
Model presets expose an optional `generationConfig` that `LLMModule.fromModelName` applies automatically when available — for Qwen3 and LFM2-VL this means the model-card recommended sampling settings are in effect without any explicit `configure` call. Any fields you pass to `configure` still override on a per-field basis.
:::

## Vision-Language Models (VLM)

Some models support multimodal input — text and images together. To use them, pass `capabilities` in the model object when calling [`fromModelName`](../../06-api-reference/classes/LLMModule.md#frommodelname):

```typescript
import { LLMModule, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch';
import { LLMModule, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch';

const llm = await LLMModule.fromModelName(
LFM2_VL_1_6B_QUANTIZED,
LFM2_5_VL_1_6B_QUANTIZED,
undefined,
(token) => console.log(token)
);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.swmansion.rnexecutorch

import com.facebook.react.bridge.ReactApplicationContext
import com.facebook.react.bridge.ReactMethod
import com.facebook.react.common.annotations.FrameworkAPI
import com.facebook.react.module.annotations.ReactModule

/**
* Fallback TurboModule returned when native ExecuTorch libraries cannot be
* loaded (e.g. 32-bit Android devices where only arm64-v8a binaries are
* shipped). Extends the same spec as ETInstaller so JS sees a real linked
* module, but install() returns false to signal unavailability.
*/
@OptIn(FrameworkAPI::class)
@ReactModule(name = ETInstallerUnavailable.NAME)
class ETInstallerUnavailable(
reactContext: ReactApplicationContext,
) : NativeETInstallerSpec(reactContext) {
companion object {
const val NAME = NativeETInstallerSpec.NAME
}

@ReactMethod(isBlockingSynchronousMethod = true)
override fun install(): Boolean {
return false
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,18 @@ class RnExecutorchPackage : TurboReactPackage() {
reactContext: ReactApplicationContext,
): NativeModule? =
if (name == ETInstaller.NAME) {
ETInstaller(reactContext)
try {
ETInstaller(reactContext)
} catch (e: RuntimeException) {
if (e.cause is UnsatisfiedLinkError) {
// Native library not available (e.g. 32-bit device without arm64-v8a .so).
// Return a fallback module whose install() returns false so JS can
// distinguish "unsupported ABI" from "package not linked."
ETInstallerUnavailable(reactContext)
} else {
throw e
}
}
} else {
null
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,15 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
synchronousHostFunction<&Model::setTopp>,
"setTopp"));

addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
synchronousHostFunction<&Model::setMinP>,
"setMinP"));

addFunctions(JSI_EXPORT_FUNCTION(
ModelHostObject<Model>,
synchronousHostFunction<&Model::setRepetitionPenalty>,
"setRepetitionPenalty"));

addFunctions(JSI_EXPORT_FUNCTION(
ModelHostObject<Model>,
synchronousHostFunction<&Model::getMaxContextLength>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,30 @@ void LLM::setTopp(float topp) {
runner_->set_topp(topp);
}

void LLM::setMinP(float minP) {
if (!runner_ || !runner_->is_loaded()) {
throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
"Can't configure a model that's not loaded");
}
if (minP < 0.0f || minP > 1.0f) {
throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
"Min-p must be between 0.0 and 1.0");
}
runner_->set_min_p(minP);
}

void LLM::setRepetitionPenalty(float repetitionPenalty) {
if (!runner_ || !runner_->is_loaded()) {
throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
"Can't configure a model that's not loaded");
}
if (repetitionPenalty < 0.0f) {
throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
"Repetition penalty must be non-negative");
}
runner_->set_repetition_penalty(repetitionPenalty);
}

int32_t LLM::getMaxContextLength() const {
if (!runner_ || !runner_->is_loaded()) {
throw RnExecutorchError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class LLM : public BaseModel {
void setCountInterval(size_t countInterval);
void setTemperature(float temperature);
void setTopp(float topp);
void setMinP(float minP);
void setRepetitionPenalty(float repetitionPenalty);
void setTimeInterval(size_t timeInterval);
int32_t getMaxContextLength() const;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ add_rn_test(RunnerTests unit/RunnerTest.cpp
integration/stubs/jsi_stubs.cpp
LIBS tokenizers_deps
)
add_rn_test(SamplerTests unit/SamplerTest.cpp
SOURCES
${COMMON_DIR}/runner/sampler.cpp
${COMMON_DIR}/runner/arange_util.cpp
LIBS
)
add_rn_test(LogTests unit/LogTest.cpp)
add_rn_test(FileUtilsTest unit/FileUtilsTest.cpp)
add_rn_test(ImageProcessingTest unit/ImageProcessingTest.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,31 @@ TEST_F(LLMTest, SetToppInvalidThrows) {
EXPECT_THROW(model.setTopp(1.1f), RnExecutorchError);
}

TEST_F(LLMTest, SetMinP) {
LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
EXPECT_NO_THROW(model.setMinP(0.0f));
EXPECT_NO_THROW(model.setMinP(0.15f));
EXPECT_NO_THROW(model.setMinP(1.0f));
}

TEST_F(LLMTest, SetMinPInvalidThrows) {
LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
EXPECT_THROW(model.setMinP(-0.1f), RnExecutorchError);
EXPECT_THROW(model.setMinP(1.1f), RnExecutorchError);
}

TEST_F(LLMTest, SetRepetitionPenalty) {
LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
EXPECT_NO_THROW(model.setRepetitionPenalty(1.0f));
EXPECT_NO_THROW(model.setRepetitionPenalty(1.05f));
EXPECT_NO_THROW(model.setRepetitionPenalty(2.0f));
}

TEST_F(LLMTest, SetRepetitionPenaltyInvalidThrows) {
LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
EXPECT_THROW(model.setRepetitionPenalty(-0.1f), RnExecutorchError);
}

TEST_F(LLMTest, SetCountInterval) {
LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
EXPECT_NO_THROW(model.setCountInterval(5));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,11 @@ class StubRunner : public ::executorch::extension::llm::BaseLLMRunner {
return ::executorch::runtime::Error::Ok;
}
void stop_impl() override {}
void set_temperature_impl(float t) override { last_temp_ = t; }
void set_topp_impl(float) override {}
void set_count_interval_impl(size_t) override {}
void set_time_interval_impl(size_t) override {}

int32_t resolve_max(int32_t prompt, int32_t seq_len, int32_t ctx_len,
int32_t max_new = -1) const {
return resolve_max_new_tokens(prompt, seq_len, ctx_len, max_new);
}

bool loaded_ = false;
float last_temp_ = -1.f;
};
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,10 @@ TEST(MultimodalInputTest, EmptyStringIsStillText) {
// BaseLLMRunner via StubRunner
// ============================================================================

TEST(BaseLLMRunnerTest, SetTemperatureUpdatesConfigAndCallsImpl) {
TEST(BaseLLMRunnerTest, SetTemperatureUpdatesConfig) {
StubRunner runner(nullptr, "dummy");
runner.set_temperature(0.42f);
EXPECT_FLOAT_EQ(runner.config_.temperature, 0.42f);
EXPECT_FLOAT_EQ(runner.last_temp_, 0.42f);
}

TEST(BaseLLMRunnerTest, SetToppUpdatesConfig) {
Expand All @@ -89,3 +88,15 @@ TEST(BaseLLMRunnerTest, GenerateEmptyStringReturnsError) {
auto err = runner.generate("", {}, {}, {});
EXPECT_NE(err, ::executorch::runtime::Error::Ok);
}

TEST(BaseLLMRunnerTest, SetMinPUpdatesConfig) {
StubRunner runner(nullptr, "dummy");
runner.set_min_p(0.15f);
EXPECT_FLOAT_EQ(runner.config_.min_p, 0.15f);
}

TEST(BaseLLMRunnerTest, SetRepetitionPenaltyUpdatesConfig) {
StubRunner runner(nullptr, "dummy");
runner.set_repetition_penalty(1.05f);
EXPECT_FLOAT_EQ(runner.config_.repetition_penalty, 1.05f);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <gtest/gtest.h>
#include <runner/sampler.h>
#include <vector>

using namespace executorch::extension::llm;

// Helper: run sampler N times, count how often each index is picked.
template <typename T>
std::vector<int> sampleMany(Sampler &s, std::vector<T> logits,
const std::vector<uint64_t> &recent, int n) {
std::vector<int> counts(logits.size(), 0);
for (int i = 0; i < n; ++i) {
std::vector<T> copy = logits;
counts[s.sample(copy.data(), recent)]++;
}
return counts;
}

// 1. Repetition penalty on positive logit: token 0 should be sampled less.
TEST(SamplerTest, RepetitionPenaltyReducesPositiveLogit) {
Sampler s(2, 1.0f, 1.0f, 0, 0.0f, 1.3f);
std::vector<float> logits = {1.0f, 1.0f};
std::vector<uint64_t> recent = {0};
auto counts = sampleMany(s, logits, recent, 2000);
EXPECT_LT(counts[0], 1200);
}

// 2. Repetition penalty on negative logit: penalised token should appear even
// less.
TEST(SamplerTest, RepetitionPenaltyMultipliesNegativeLogit) {
Sampler s(2, 1.0f, 1.0f, 0, 0.0f, 1.5f);
std::vector<float> logits = {0.0f, -1.0f};
std::vector<uint64_t> recent = {1};
auto counts = sampleMany(s, logits, recent, 2000);
EXPECT_LT(counts[1], 200);
}

// 3. No recent tokens — penalty has no effect.
TEST(SamplerTest, RepetitionPenaltyNoRecentTokensHasNoEffect) {
Sampler baseline(2, 1.0f, 1.0f, 0, 0.0f, 1.0f);
Sampler penalised(2, 1.0f, 1.0f, 0, 0.0f, 2.0f);
std::vector<float> logits_b = {1.0f, 1.0f};
std::vector<float> logits_p = {1.0f, 1.0f};
std::vector<uint64_t> recent = {};
auto cb = sampleMany(baseline, logits_b, recent, 2000);
auto cp = sampleMany(penalised, logits_p, recent, 2000);
EXPECT_NEAR(cb[0], cp[0], 300);
}

// 4. Min-p truncation: token with very low probability is excluded.
TEST(SamplerTest, MinPFiltersTailTokens) {
Sampler s(3, 1.0f, 1.0f, 0, 0.1f, 1.0f);
std::vector<float> logits = {5.0f, -5.0f, -5.0f};
std::vector<uint64_t> recent = {};
auto counts = sampleMany(s, logits, recent, 1000);
EXPECT_EQ(counts[1], 0);
EXPECT_EQ(counts[2], 0);
EXPECT_EQ(counts[0], 1000);
}

// 5. Min-p = 0 disables filtering.
TEST(SamplerTest, MinPZeroDisablesFiltering) {
Sampler s(3, 0.0f, 1.0f, 0, 0.0f, 1.0f);
std::vector<float> logits = {1.0f, -1000.0f, -1000.0f};
std::vector<uint64_t> recent = {};
EXPECT_EQ(s.sample(logits.data(), recent), 0);
}

// 6. Min-p + top-p stacked.
TEST(SamplerTest, MinPAndToppStack) {
Sampler s(4, 1.0f, 0.5f, 0, 0.2f, 1.0f);
std::vector<float> logits = {5.0f, 2.0f, -2.0f, -5.0f};
std::vector<uint64_t> recent = {};
auto counts = sampleMany(s, logits, recent, 2000);
EXPECT_EQ(counts[2], 0);
EXPECT_EQ(counts[3], 0);
}
Loading
Loading