software-mansion · msluszniak · Apr 29, 2026 · Apr 14, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/apps/llm/app/multimodal_llm/index.tsx b/apps/llm/app/multimodal_llm/index.tsx
@@ -14,7 +14,7 @@ import {
 import { launchImageLibrary } from 'react-native-image-picker';
 import { useIsFocused } from '@react-navigation/native';
 import { useSafeAreaInsets } from 'react-native-safe-area-context';
-import { useLLM, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch';
+import { useLLM, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch';
 import SendIcon from '../../assets/icons/send_icon.svg';
 import PauseIcon from '../../assets/icons/pause_icon.svg';
 import ColorPalette from '../../colors';
@@ -50,7 +50,7 @@ function MultimodalLLMScreen() {
   const [error, setError] = useState<string | null>(null);
 
   const vlm = useLLM({
-    model: LFM2_VL_1_6B_QUANTIZED,
+    model: LFM2_5_VL_1_6B_QUANTIZED,
   });
   const tokenCount = vlm.isReady ? vlm.getGeneratedTokenCount() : 0;
   const { stats, onMessageSend } = useLLMStats(

diff --git a/docs/docs/03-hooks/01-natural-language-processing/useLLM.md b/docs/docs/03-hooks/01-natural-language-processing/useLLM.md
@@ -208,7 +208,15 @@ To configure model (i.e. change system prompt, load initial conversation history
 
   - [`temperature`](../../06-api-reference/interfaces/GenerationConfig.md#temperature) - Scales output logits by the inverse of temperature. Controls the randomness / creativity of text generation.
 
-  - [`topp`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topp.
+  - [`topP`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topP. Range `[0, 1]`. Values of `0` or `1` disable top-p filtering.
+
+  - [`minP`](../../06-api-reference/interfaces/GenerationConfig.md#minp) - Minimum-probability threshold applied after softmax: tokens whose probability is below `minP * max_prob` are excluded from sampling. Range `[0, 1]`. Default `0` disables the filter. Stacks with `topP` when both are set.
+
+  - [`repetitionPenalty`](../../06-api-reference/interfaces/GenerationConfig.md#repetitionpenalty) - Multiplicative penalty applied to logits of tokens that already appeared in the prompt or the generated text. Values greater than `1` discourage repetition; default `1` disables the penalty.
+
+:::info[Built-in models ship with sampling defaults]
+Model presets expose an optional [`generationConfig`](../../06-api-reference/interfaces/LLMProps.md) on the `model` prop. Whenever the upstream model card publishes recommended values (currently Qwen3 and LFM2-VL) the preset carries them and `useLLM` applies them automatically before `isReady` flips — you don't need to call `configure` just to get sensible defaults. Any fields you then pass to `configure` still override on a per-field basis.
+:::
 
 ### Model configuration example
 
@@ -279,7 +287,9 @@ useEffect(() => {
       outputTokenBatchSize: 15,
       batchTimeInterval: 100,
       temperature: 0.7,
-      topp: 0.9,
+      topP: 0.9,
+      minP: 0.05,
+      repetitionPenalty: 1.05,
     },
   });
 }, [configure]);
@@ -488,9 +498,9 @@ Some models support multimodal input — text and images together. To use them,
 ### Loading a VLM
 
 ```tsx
-import { useLLM, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch';
+import { useLLM, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch';
 
-const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED });
+const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED });
 ```
 
 The `capabilities` field is already set on the model constant. You can also construct the model object explicitly:
@@ -511,7 +521,7 @@ Passing `capabilities` unlocks the typed `media` argument on `sendMessage`.
 ### Sending a message with an image
 
 ```tsx
-const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED });
+const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED });
 
 const send = () => {
   llm.sendMessage('What is in this image?', {
@@ -534,7 +544,7 @@ The `imagePath` should be a local file path on the device.
 You can also use `generate` directly by setting `mediaPath` on user messages:
 
 ```tsx
-const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED });
+const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED });
 
 const handleGenerate = async () => {
   const chat: Message[] = [

diff --git a/docs/docs/04-typescript-api/01-natural-language-processing/LLMModule.md b/docs/docs/04-typescript-api/01-natural-language-processing/LLMModule.md
@@ -107,17 +107,25 @@ To configure model (i.e. change system prompt, load initial conversation history
 
   - [`temperature`](../../06-api-reference/interfaces/GenerationConfig.md#temperature) - Scales output logits by the inverse of temperature. Controls the randomness / creativity of text generation.
 
-  - [`topp`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topp.
+  - [`topP`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topP. Range `[0, 1]`. Values of `0` or `1` disable top-p filtering.
+
+  - [`minP`](../../06-api-reference/interfaces/GenerationConfig.md#minp) - Minimum-probability threshold applied after softmax: tokens whose probability is below `minP * max_prob` are excluded from sampling. Range `[0, 1]`. Default `0` disables the filter. Stacks with `topP` when both are set.
+
+  - [`repetitionPenalty`](../../06-api-reference/interfaces/GenerationConfig.md#repetitionpenalty) - Multiplicative penalty applied to logits of tokens that already appeared in the prompt or the generated text. Values greater than `1` discourage repetition; default `1` disables the penalty.
+
+:::info[Built-in models ship with sampling defaults]
+Model presets expose an optional `generationConfig` that `LLMModule.fromModelName` applies automatically when available — for Qwen3 and LFM2-VL this means the model-card recommended sampling settings are in effect without any explicit `configure` call. Any fields you pass to `configure` still override on a per-field basis.
+:::
 
 ## Vision-Language Models (VLM)
 
 Some models support multimodal input — text and images together. To use them, pass `capabilities` in the model object when calling [`fromModelName`](../../06-api-reference/classes/LLMModule.md#frommodelname):
 
 ```typescript
-import { LLMModule, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch';
+import { LLMModule, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch';
 
 const llm = await LLMModule.fromModelName(
-  LFM2_VL_1_6B_QUANTIZED,
+  LFM2_5_VL_1_6B_QUANTIZED,
   undefined,
   (token) => console.log(token)
 );

diff --git a/...ive-executorch/android/src/main/java/com/swmansion/rnexecutorch/ETInstallerUnavailable.kt b/...ive-executorch/android/src/main/java/com/swmansion/rnexecutorch/ETInstallerUnavailable.kt
@@ -0,0 +1,27 @@
+package com.swmansion.rnexecutorch
+
+import com.facebook.react.bridge.ReactApplicationContext
+import com.facebook.react.bridge.ReactMethod
+import com.facebook.react.common.annotations.FrameworkAPI
+import com.facebook.react.module.annotations.ReactModule
+
+/**
+ * Fallback TurboModule returned when native ExecuTorch libraries cannot be
+ * loaded (e.g. 32-bit Android devices where only arm64-v8a binaries are
+ * shipped). Extends the same spec as ETInstaller so JS sees a real linked
+ * module, but install() returns false to signal unavailability.
+ */
+@OptIn(FrameworkAPI::class)
+@ReactModule(name = ETInstallerUnavailable.NAME)
+class ETInstallerUnavailable(
+  reactContext: ReactApplicationContext,
+) : NativeETInstallerSpec(reactContext) {
+  companion object {
+    const val NAME = NativeETInstallerSpec.NAME
+  }
+
+  @ReactMethod(isBlockingSynchronousMethod = true)
+  override fun install(): Boolean {
+    return false
+  }
+}
diff --git a/...native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt b/...native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt
@@ -15,7 +15,18 @@ class RnExecutorchPackage : TurboReactPackage() {
     reactContext: ReactApplicationContext,
   ): NativeModule? =
     if (name == ETInstaller.NAME) {
-      ETInstaller(reactContext)
+      try {
+        ETInstaller(reactContext)
+      } catch (e: RuntimeException) {
+        if (e.cause is UnsatisfiedLinkError) {
+          // Native library not available (e.g. 32-bit device without arm64-v8a .so).
+          // Return a fallback module whose install() returns false so JS can
+          // distinguish "unsupported ABI" from "package not linked."
+          ETInstallerUnavailable(reactContext)
+        } else {
+          throw e
+        }
+      }
     } else {
       null
     }

diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -140,6 +140,15 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        synchronousHostFunction<&Model::setTopp>,
                                        "setTopp"));
 
+      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                                       synchronousHostFunction<&Model::setMinP>,
+                                       "setMinP"));
+
+      addFunctions(JSI_EXPORT_FUNCTION(
+          ModelHostObject<Model>,
+          synchronousHostFunction<&Model::setRepetitionPenalty>,
+          "setRepetitionPenalty"));
+
       addFunctions(JSI_EXPORT_FUNCTION(
           ModelHostObject<Model>,
           synchronousHostFunction<&Model::getMaxContextLength>,

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
@@ -250,6 +250,30 @@ void LLM::setTopp(float topp) {
   runner_->set_topp(topp);
 }
 
+void LLM::setMinP(float minP) {
+  if (!runner_ || !runner_->is_loaded()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
+                            "Can't configure a model that's not loaded");
+  }
+  if (minP < 0.0f || minP > 1.0f) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
+                            "Min-p must be between 0.0 and 1.0");
+  }
+  runner_->set_min_p(minP);
+}
+
+void LLM::setRepetitionPenalty(float repetitionPenalty) {
+  if (!runner_ || !runner_->is_loaded()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
+                            "Can't configure a model that's not loaded");
+  }
+  if (repetitionPenalty < 0.0f) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
+                            "Repetition penalty must be non-negative");
+  }
+  runner_->set_repetition_penalty(repetitionPenalty);
+}
+
 int32_t LLM::getMaxContextLength() const {
   if (!runner_ || !runner_->is_loaded()) {
     throw RnExecutorchError(

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h
@@ -38,6 +38,8 @@ class LLM : public BaseModel {
   void setCountInterval(size_t countInterval);
   void setTemperature(float temperature);
   void setTopp(float topp);
+  void setMinP(float minP);
+  void setRepetitionPenalty(float repetitionPenalty);
   void setTimeInterval(size_t timeInterval);
   int32_t getMaxContextLength() const;
 

diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
@@ -149,6 +149,12 @@ add_rn_test(RunnerTests unit/RunnerTest.cpp
         integration/stubs/jsi_stubs.cpp
     LIBS tokenizers_deps
 )
+add_rn_test(SamplerTests unit/SamplerTest.cpp
+    SOURCES
+        ${COMMON_DIR}/runner/sampler.cpp
+        ${COMMON_DIR}/runner/arange_util.cpp
+    LIBS
+)
 add_rn_test(LogTests unit/LogTest.cpp)
 add_rn_test(FileUtilsTest unit/FileUtilsTest.cpp)
 add_rn_test(ImageProcessingTest unit/ImageProcessingTest.cpp

diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/LLMTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/LLMTest.cpp
@@ -110,6 +110,31 @@ TEST_F(LLMTest, SetToppInvalidThrows) {
   EXPECT_THROW(model.setTopp(1.1f), RnExecutorchError);
 }
 
+TEST_F(LLMTest, SetMinP) {
+  LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
+  EXPECT_NO_THROW(model.setMinP(0.0f));
+  EXPECT_NO_THROW(model.setMinP(0.15f));
+  EXPECT_NO_THROW(model.setMinP(1.0f));
+}
+
+TEST_F(LLMTest, SetMinPInvalidThrows) {
+  LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
+  EXPECT_THROW(model.setMinP(-0.1f), RnExecutorchError);
+  EXPECT_THROW(model.setMinP(1.1f), RnExecutorchError);
+}
+
+TEST_F(LLMTest, SetRepetitionPenalty) {
+  LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
+  EXPECT_NO_THROW(model.setRepetitionPenalty(1.0f));
+  EXPECT_NO_THROW(model.setRepetitionPenalty(1.05f));
+  EXPECT_NO_THROW(model.setRepetitionPenalty(2.0f));
+}
+
+TEST_F(LLMTest, SetRepetitionPenaltyInvalidThrows) {
+  LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
+  EXPECT_THROW(model.setRepetitionPenalty(-0.1f), RnExecutorchError);
+}
+
 TEST_F(LLMTest, SetCountInterval) {
   LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
   EXPECT_NO_THROW(model.setCountInterval(5));

diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/StubRunner.h b/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/StubRunner.h
@@ -18,16 +18,11 @@ class StubRunner : public ::executorch::extension::llm::BaseLLMRunner {
     return ::executorch::runtime::Error::Ok;
   }
   void stop_impl() override {}
-  void set_temperature_impl(float t) override { last_temp_ = t; }
-  void set_topp_impl(float) override {}
-  void set_count_interval_impl(size_t) override {}
-  void set_time_interval_impl(size_t) override {}
 
   int32_t resolve_max(int32_t prompt, int32_t seq_len, int32_t ctx_len,
                       int32_t max_new = -1) const {
     return resolve_max_new_tokens(prompt, seq_len, ctx_len, max_new);
   }
 
   bool loaded_ = false;
-  float last_temp_ = -1.f;
 };
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/unit/RunnerTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/unit/RunnerTest.cpp
@@ -62,11 +62,10 @@ TEST(MultimodalInputTest, EmptyStringIsStillText) {
 // BaseLLMRunner via StubRunner
 // ============================================================================
 
-TEST(BaseLLMRunnerTest, SetTemperatureUpdatesConfigAndCallsImpl) {
+TEST(BaseLLMRunnerTest, SetTemperatureUpdatesConfig) {
   StubRunner runner(nullptr, "dummy");
   runner.set_temperature(0.42f);
   EXPECT_FLOAT_EQ(runner.config_.temperature, 0.42f);
-  EXPECT_FLOAT_EQ(runner.last_temp_, 0.42f);
 }
 
 TEST(BaseLLMRunnerTest, SetToppUpdatesConfig) {
@@ -89,3 +88,15 @@ TEST(BaseLLMRunnerTest, GenerateEmptyStringReturnsError) {
   auto err = runner.generate("", {}, {}, {});
   EXPECT_NE(err, ::executorch::runtime::Error::Ok);
 }
+
+TEST(BaseLLMRunnerTest, SetMinPUpdatesConfig) {
+  StubRunner runner(nullptr, "dummy");
+  runner.set_min_p(0.15f);
+  EXPECT_FLOAT_EQ(runner.config_.min_p, 0.15f);
+}
+
+TEST(BaseLLMRunnerTest, SetRepetitionPenaltyUpdatesConfig) {
+  StubRunner runner(nullptr, "dummy");
+  runner.set_repetition_penalty(1.05f);
+  EXPECT_FLOAT_EQ(runner.config_.repetition_penalty, 1.05f);
+}
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/unit/SamplerTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/unit/SamplerTest.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+#include <runner/sampler.h>
+#include <vector>
+
+using namespace executorch::extension::llm;
+
+// Helper: run sampler N times, count how often each index is picked.
+template <typename T>
+std::vector<int> sampleMany(Sampler &s, std::vector<T> logits,
+                            const std::vector<uint64_t> &recent, int n) {
+  std::vector<int> counts(logits.size(), 0);
+  for (int i = 0; i < n; ++i) {
+    std::vector<T> copy = logits;
+    counts[s.sample(copy.data(), recent)]++;
+  }
+  return counts;
+}
+
+// 1. Repetition penalty on positive logit: token 0 should be sampled less.
+TEST(SamplerTest, RepetitionPenaltyReducesPositiveLogit) {
+  Sampler s(2, 1.0f, 1.0f, 0, 0.0f, 1.3f);
+  std::vector<float> logits = {1.0f, 1.0f};
+  std::vector<uint64_t> recent = {0};
+  auto counts = sampleMany(s, logits, recent, 2000);
+  EXPECT_LT(counts[0], 1200);
+}
+
+// 2. Repetition penalty on negative logit: penalised token should appear even
+// less.
+TEST(SamplerTest, RepetitionPenaltyMultipliesNegativeLogit) {
+  Sampler s(2, 1.0f, 1.0f, 0, 0.0f, 1.5f);
+  std::vector<float> logits = {0.0f, -1.0f};
+  std::vector<uint64_t> recent = {1};
+  auto counts = sampleMany(s, logits, recent, 2000);
+  EXPECT_LT(counts[1], 200);
+}
+
+// 3. No recent tokens — penalty has no effect.
+TEST(SamplerTest, RepetitionPenaltyNoRecentTokensHasNoEffect) {
+  Sampler baseline(2, 1.0f, 1.0f, 0, 0.0f, 1.0f);
+  Sampler penalised(2, 1.0f, 1.0f, 0, 0.0f, 2.0f);
+  std::vector<float> logits_b = {1.0f, 1.0f};
+  std::vector<float> logits_p = {1.0f, 1.0f};
+  std::vector<uint64_t> recent = {};
+  auto cb = sampleMany(baseline, logits_b, recent, 2000);
+  auto cp = sampleMany(penalised, logits_p, recent, 2000);
+  EXPECT_NEAR(cb[0], cp[0], 300);
+}
+
+// 4. Min-p truncation: token with very low probability is excluded.
+TEST(SamplerTest, MinPFiltersTailTokens) {
+  Sampler s(3, 1.0f, 1.0f, 0, 0.1f, 1.0f);
+  std::vector<float> logits = {5.0f, -5.0f, -5.0f};
+  std::vector<uint64_t> recent = {};
+  auto counts = sampleMany(s, logits, recent, 1000);
+  EXPECT_EQ(counts[1], 0);
+  EXPECT_EQ(counts[2], 0);
+  EXPECT_EQ(counts[0], 1000);
+}
+
+// 5. Min-p = 0 disables filtering.
+TEST(SamplerTest, MinPZeroDisablesFiltering) {
+  Sampler s(3, 0.0f, 1.0f, 0, 0.0f, 1.0f);
+  std::vector<float> logits = {1.0f, -1000.0f, -1000.0f};
+  std::vector<uint64_t> recent = {};
+  EXPECT_EQ(s.sample(logits.data(), recent), 0);
+}
+
+// 6. Min-p + top-p stacked.
+TEST(SamplerTest, MinPAndToppStack) {
+  Sampler s(4, 1.0f, 0.5f, 0, 0.2f, 1.0f);
+  std::vector<float> logits = {5.0f, 2.0f, -2.0f, -5.0f};
+  std::vector<uint64_t> recent = {};
+  auto counts = sampleMany(s, logits, recent, 2000);
+  EXPECT_EQ(counts[2], 0);
+  EXPECT_EQ(counts[3], 0);
+}