From 15ddd07ba8dc131bab05f1a3e6b21a19649ec518 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Fri, 24 Apr 2026 13:08:31 +0200
Subject: [PATCH 1/6] feat: add DISTILUSE_BASE_MULTILINGUAL_CASED_V2 text
 embeddings model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the multilingual half of #945. Shipping only the WordPiece
tokenizer model for now — paraphrase-multilingual-MiniLM-L12-v2 needs
Unigram/Precompiled/Metaspace support in executorch/extension/llm/
tokenizers, which is in-flight upstream.

The model lives at
software-mansion/react-native-executorch-distiluse-base-multilingual-cased-v2
under tag v0.9.0, so the constant uses NEXT_VERSION_TAG.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .cspell-wordlist.txt                              |  3 +++
 .../text-embeddings/app/text-embeddings/index.tsx |  5 +++++
 .../useTextEmbeddings.md                          | 15 ++++++++-------
 .../src/constants/modelUrls.ts                    | 14 +++++++++++++-
 .../src/types/textEmbeddings.ts                   |  1 +
 5 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
index f1d55b4c98..5b815c3381 100644
--- a/.cspell-wordlist.txt
+++ b/.cspell-wordlist.txt
@@ -53,6 +53,9 @@ Lexend
 finetuned
 MINILM
 MPNET
+DISTILUSE
+distiluse
+Distil
 QINT
 FNUZ
 wordlist
diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
index e31097940c..756c93c48e 100644
--- a/apps/text-embeddings/app/text-embeddings/index.tsx
+++ b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -18,6 +18,7 @@ import {
   ALL_MPNET_BASE_V2,
   MULTI_QA_MINILM_L6_COS_V1,
   MULTI_QA_MPNET_BASE_DOT_V1,
+  DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
   TextEmbeddingsProps,
 } from 'react-native-executorch';
 
@@ -28,6 +29,10 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [
   { label: 'MPNet Base', value: ALL_MPNET_BASE_V2 },
   { label: 'MultiQA MiniLM', value: MULTI_QA_MINILM_L6_COS_V1 },
   { label: 'MultiQA MPNet', value: MULTI_QA_MPNET_BASE_DOT_V1 },
+  {
+    label: 'Multilingual DistilUSE',
+    value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
+  },
 ];
 import { useIsFocused } from '@react-navigation/native';
 import { dotProduct } from '../../utils/math';
diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
index 84c8499808..2f92eb6e8f 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -101,13 +101,14 @@ function App() {
 
 ## Supported models
 
-| Model                                                                                                 | Language | Max Tokens | Embedding Dimensions | Description                                                                                                                                                                                                                                                                                                                                                                                                                      |
-| ----------------------------------------------------------------------------------------------------- | :------: | :--------: | :------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)                     | English  |    254     |         384          | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs.                                                                                                                                                                                                                                                                                                               |
-| [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)                   | English  |    382     |         768          | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs.                                                                                                                                                                                                                                                                                                               |
-| [multi-qa-MiniLM-L6-cos-v1](https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1)   | English  |    509     |         384          | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.                                                                                                                                                                                                                                                          |
-| [multi-qa-mpnet-base-dot-v1](https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1) | English  |    510     |         768          | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.                                                                                                                                                                                                                                                          |
-| [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32)                     | English  |     74     |         512          | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). |
+| Model                                                                                                                     |   Language    | Max Tokens | Embedding Dimensions | Description                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| ------------------------------------------------------------------------------------------------------------------------- | :-----------: | :--------: | :------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)                                         |    English    |    254     |         384          | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs.                                                                                                                                                                                                                                                                                                               |
+| [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)                                       |    English    |    382     |         768          | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs.                                                                                                                                                                                                                                                                                                               |
+| [multi-qa-MiniLM-L6-cos-v1](https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1)                       |    English    |    509     |         384          | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.                                                                                                                                                                                                                                                          |
+| [multi-qa-mpnet-base-dot-v1](https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1)                     |    English    |    510     |         768          | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.                                                                                                                                                                                                                                                          |
+| [distiluse-base-multilingual-cased-v2](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2) | 50+ languages |    126     |         512          | Multilingual DistilBERT with a 768→512 projection head. Recommended when broader language coverage matters more than the exact English quality of MiniLM/MPNet.                                                                                                                                                                                                                                                                  |
+| [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32)                                         |    English    |     74     |         512          | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). |
 
 **`Max Tokens`** - The maximum number of tokens that can be processed by the model. If the input text exceeds this limit, it will be truncated.
 
diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
index 92cd95bd7a..fcd6c14ef4 100644
--- a/packages/react-native-executorch/src/constants/modelUrls.ts
+++ b/packages/react-native-executorch/src/constants/modelUrls.ts
@@ -1,5 +1,5 @@
 import { Platform } from 'react-native';
-import { URL_PREFIX, VERSION_TAG } from './versions';
+import { URL_PREFIX, VERSION_TAG, NEXT_VERSION_TAG } from './versions';
 
 // LLMs
 
@@ -987,6 +987,8 @@ const MULTI_QA_MINILM_L6_COS_V1_MODEL = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1
 const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1/${VERSION_TAG}/tokenizer.json`;
 const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/multi-qa-mpnet-base-dot-v1_xnnpack.pte`;
 const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`;
+const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_fp32.pte`;
+const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/tokenizer.json`;
 const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`;
 const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/tokenizer.json`;
 
@@ -1026,6 +1028,15 @@ export const MULTI_QA_MPNET_BASE_DOT_V1 = {
   tokenizerSource: MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER,
 } as const;
 
+/**
+ * @category Models - Text Embeddings
+ */
+export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2 = {
+  modelName: 'distiluse-base-multilingual-cased-v2',
+  modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL,
+  tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
+} as const;
+
 /**
  * @category Models - Text Embeddings
  */
@@ -1175,6 +1186,7 @@ export const MODEL_REGISTRY = {
     ALL_MPNET_BASE_V2,
     MULTI_QA_MINILM_L6_COS_V1,
     MULTI_QA_MPNET_BASE_DOT_V1,
+    DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
     CLIP_VIT_BASE_PATCH32_TEXT,
     BK_SDM_TINY_VPRED_512,
     BK_SDM_TINY_VPRED_256,
diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts
index 87b5d6375f..3bdf8df079 100644
--- a/packages/react-native-executorch/src/types/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/types/textEmbeddings.ts
@@ -10,6 +10,7 @@ export type TextEmbeddingsModelName =
   | 'all-mpnet-base-v2'
   | 'multi-qa-minilm-l6-cos-v1'
   | 'multi-qa-mpnet-base-dot-v1'
+  | 'distiluse-base-multilingual-cased-v2'
   | 'clip-vit-base-patch32-text';
 
 /**

From 1a01d54ed482effb82a81238f074f0cfa74bcbdb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Fri, 24 Apr 2026 16:38:39 +0200
Subject: [PATCH 2/6] feat: expose distiluse 8da4w + CoreML fp32/fp16 variants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follows the same scheme-suffix convention used for LLaMA (`_QLORA`,
`_SPINQUANT`) — each variant has its own constant so the caller picks
exactly the quantization / backend combo they want:

  DISTILUSE_BASE_MULTILINGUAL_CASED_V2               xnnpack fp32 (baseline)
  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W         xnnpack 8da4w
  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32   coreml  fp32 (iOS/macOS)
  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16   coreml  fp16 (iOS/macOS)

All four point at the same HF repo tag v0.9.0; tokenizer.json is shared.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .cspell-wordlist.txt                          |  1 +
 .../app/text-embeddings/index.tsx             | 15 +++++++++
 .../useTextEmbeddings.md                      | 13 ++++++++
 .../src/constants/modelUrls.ts                | 33 +++++++++++++++++++
 .../src/types/textEmbeddings.ts               |  3 ++
 5 files changed, 65 insertions(+)

diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
index 5b815c3381..d611f986b6 100644
--- a/.cspell-wordlist.txt
+++ b/.cspell-wordlist.txt
@@ -56,6 +56,7 @@ MPNET
 DISTILUSE
 distiluse
 Distil
+torchao
 QINT
 FNUZ
 wordlist
diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
index 756c93c48e..da48b27ceb 100644
--- a/apps/text-embeddings/app/text-embeddings/index.tsx
+++ b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -19,6 +19,9 @@ import {
   MULTI_QA_MINILM_L6_COS_V1,
   MULTI_QA_MPNET_BASE_DOT_V1,
   DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
+  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
+  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32,
+  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16,
   TextEmbeddingsProps,
 } from 'react-native-executorch';
 
@@ -33,6 +36,18 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [
     label: 'Multilingual DistilUSE',
     value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
   },
+  {
+    label: 'Multilingual DistilUSE (8da4w)',
+    value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
+  },
+  {
+    label: 'Multilingual DistilUSE (CoreML fp32)',
+    value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32,
+  },
+  {
+    label: 'Multilingual DistilUSE (CoreML fp16)',
+    value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16,
+  },
 ];
 import { useIsFocused } from '@react-navigation/native';
 import { dotProduct } from '../../utils/math';
diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
index 2f92eb6e8f..b519d8a073 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -117,3 +117,16 @@ function App() {
 :::note
 For the supported models, the returned embedding vector is normalized, meaning that its length is equal to 1. This allows for easier comparison of vectors using cosine similarity, just calculate the dot product of two vectors to get the cosine similarity score.
 :::
+
+### distiluse-base-multilingual-cased-v2 variants
+
+`distiluse-base-multilingual-cased-v2` ships in four flavours so you can trade size, latency, and platform. All share the same tokenizer and embedding dimension — only the `.pte` differs.
+
+| Constant                                           | Backend | Precision | Size   | Platforms              |
+| -------------------------------------------------- | ------- | --------- | ------ | ---------------------- |
+| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2`             | XNNPACK | fp32      | 516 MB | iOS, Android (default) |
+| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W`       | XNNPACK | 8da4w     | 375 MB | iOS, Android           |
+| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32` | CoreML  | fp32      | 516 MB | iOS / macOS only       |
+| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16` | CoreML  | fp16      | 258 MB | iOS / macOS only       |
+
+`8da4w` is Int8 dynamic activation + Int4 weight (torchao), group_size 32 — only `nn.Linear` layers are quantized, embeddings stay fp32. CoreML variants only load on Apple platforms; pick the XNNPACK baseline if you need a single artifact that runs everywhere.
diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
index fcd6c14ef4..90dee0f230 100644
--- a/packages/react-native-executorch/src/constants/modelUrls.ts
+++ b/packages/react-native-executorch/src/constants/modelUrls.ts
@@ -988,6 +988,9 @@ const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-MiniLM-L6-co
 const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/multi-qa-mpnet-base-dot-v1_xnnpack.pte`;
 const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`;
 const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_fp32.pte`;
+const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_8da4w.pte`;
+const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp32.pte`;
+const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp16.pte`;
 const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/tokenizer.json`;
 const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`;
 const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/tokenizer.json`;
@@ -1037,6 +1040,33 @@ export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2 = {
   tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
 } as const;
 
+/**
+ * @category Models - Text Embeddings
+ */
+export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W = {
+  modelName: 'distiluse-base-multilingual-cased-v2-8da4w',
+  modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL,
+  tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
+} as const;
+
+/**
+ * @category Models - Text Embeddings
+ */
+export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32 = {
+  modelName: 'distiluse-base-multilingual-cased-v2-coreml-fp32',
+  modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32_MODEL,
+  tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
+} as const;
+
+/**
+ * @category Models - Text Embeddings
+ */
+export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 = {
+  modelName: 'distiluse-base-multilingual-cased-v2-coreml-fp16',
+  modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16_MODEL,
+  tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
+} as const;
+
 /**
  * @category Models - Text Embeddings
  */
@@ -1187,6 +1217,9 @@ export const MODEL_REGISTRY = {
     MULTI_QA_MINILM_L6_COS_V1,
     MULTI_QA_MPNET_BASE_DOT_V1,
     DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
+    DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
+    DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32,
+    DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16,
     CLIP_VIT_BASE_PATCH32_TEXT,
     BK_SDM_TINY_VPRED_512,
     BK_SDM_TINY_VPRED_256,
diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts
index 3bdf8df079..c2993900c1 100644
--- a/packages/react-native-executorch/src/types/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/types/textEmbeddings.ts
@@ -11,6 +11,9 @@ export type TextEmbeddingsModelName =
   | 'multi-qa-minilm-l6-cos-v1'
   | 'multi-qa-mpnet-base-dot-v1'
   | 'distiluse-base-multilingual-cased-v2'
+  | 'distiluse-base-multilingual-cased-v2-8da4w'
+  | 'distiluse-base-multilingual-cased-v2-coreml-fp32'
+  | 'distiluse-base-multilingual-cased-v2-coreml-fp16'
   | 'clip-vit-base-patch32-text';
 
 /**

From e4a6c4d1899125260b6fc66d1411c7c139319d06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Fri, 24 Apr 2026 16:50:20 +0200
Subject: [PATCH 3/6] docs: move distiluse variant sizes from useTextEmbeddings
 hook page to benchmarks

Size data belongs next to the other model sizes, not inline in the hook
reference. The useTextEmbeddings page now only lists the model family
(one row) and leaves variant enumeration to the API reference + the
model-size benchmark table.

Model column in model-size.md is renamed from "XNNPACK [MB]" to just
"Size [MB]" since the table now mixes XNNPACK and CoreML rows.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 docs/docs/02-benchmarks/model-size.md          | 18 +++++++++++-------
 .../useTextEmbeddings.md                       | 13 -------------
 2 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/docs/docs/02-benchmarks/model-size.md b/docs/docs/02-benchmarks/model-size.md
index f9f5e4701f..3c5d5a5e92 100644
--- a/docs/docs/02-benchmarks/model-size.md
+++ b/docs/docs/02-benchmarks/model-size.md
@@ -89,13 +89,17 @@ title: Model Size
 
 ## Text Embeddings
 
-| Model                      | XNNPACK [MB] |
-| -------------------------- | :----------: |
-| ALL_MINILM_L6_V2           |      91      |
-| ALL_MPNET_BASE_V2          |     438      |
-| MULTI_QA_MINILM_L6_COS_V1  |      91      |
-| MULTI_QA_MPNET_BASE_DOT_V1 |     438      |
-| CLIP_VIT_BASE_PATCH32_TEXT |     254      |
+| Model                                            |   Size [MB]   |
+| ------------------------------------------------ | :-----------: |
+| ALL_MINILM_L6_V2                                 |      91       |
+| ALL_MPNET_BASE_V2                                |      438      |
+| MULTI_QA_MINILM_L6_COS_V1                        |      91       |
+| MULTI_QA_MPNET_BASE_DOT_V1                       |      438      |
+| CLIP_VIT_BASE_PATCH32_TEXT                       |      254      |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2             |      541      |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W       |      393      |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32 |      541      |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 |      271      |
 
 ## Image Embeddings
 
diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
index b519d8a073..2f92eb6e8f 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -117,16 +117,3 @@ function App() {
 :::note
 For the supported models, the returned embedding vector is normalized, meaning that its length is equal to 1. This allows for easier comparison of vectors using cosine similarity, just calculate the dot product of two vectors to get the cosine similarity score.
 :::
-
-### distiluse-base-multilingual-cased-v2 variants
-
-`distiluse-base-multilingual-cased-v2` ships in four flavours so you can trade size, latency, and platform. All share the same tokenizer and embedding dimension — only the `.pte` differs.
-
-| Constant                                           | Backend | Precision | Size   | Platforms              |
-| -------------------------------------------------- | ------- | --------- | ------ | ---------------------- |
-| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2`             | XNNPACK | fp32      | 516 MB | iOS, Android (default) |
-| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W`       | XNNPACK | 8da4w     | 375 MB | iOS, Android           |
-| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32` | CoreML  | fp32      | 516 MB | iOS / macOS only       |
-| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16` | CoreML  | fp16      | 258 MB | iOS / macOS only       |
-
-`8da4w` is Int8 dynamic activation + Int4 weight (torchao), group_size 32 — only `nn.Linear` layers are quantized, embeddings stay fp32. CoreML variants only load on Apple platforms; pick the XNNPACK baseline if you need a single artifact that runs everywhere.

From cf144a01581484d442c1d24868f719e2c3598b22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Mon, 27 Apr 2026 17:31:57 +0200
Subject: [PATCH 4/6] docs: add DISTILUSE benchmarks for OnePlus 12 and iPhone
 17 Pro

Adds inference-time and memory-usage rows for all four
distiluse-base-multilingual-cased-v2 variants (XNNPACK fp32, XNNPACK
8da4w, Core ML fp32, Core ML fp16). Captured on a OnePlus 12 (Android,
debug build) and iPhone 17 Pro (iOS, debug build) with a fixed ~80-token
sentence over 100 measured forwards, JS-side wall-clock around
model.forward(). Memory column reports peak resident-set delta vs the
pre-model-load baseline, sampled with adb dumpsys meminfo on Android
and Xcode's Debug Navigator on iOS.

Also normalizes the text-embeddings table headers to match the
Classification section convention: column header drops the
"(XNNPACK)" suffix and the backend now lives in the per-row label,
which lets multi-backend models (fp32 / 8da4w / Core ML) share a
single table without an artificial column split.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 docs/docs/02-benchmarks/inference-time.md | 18 +++++++++++-------
 docs/docs/02-benchmarks/memory-usage.md   | 22 +++++++++++++---------
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/docs/docs/02-benchmarks/inference-time.md b/docs/docs/02-benchmarks/inference-time.md
index dbc06dc85f..838e920b57 100644
--- a/docs/docs/02-benchmarks/inference-time.md
+++ b/docs/docs/02-benchmarks/inference-time.md
@@ -150,13 +150,17 @@ Average time to synthesize speech from an input text of approximately 60 tokens,
 Benchmark times for text embeddings are highly dependent on the sentence length. The numbers below are based on a sentence of around 80 tokens. For shorter or longer sentences, inference time may vary accordingly.
 :::
 
-| Model                      | iPhone 17 Pro (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] |
-| -------------------------- | :--------------------------: | :-----------------------: |
-| ALL_MINILM_L6_V2           |              7               |            21             |
-| ALL_MPNET_BASE_V2          |              24              |            90             |
-| MULTI_QA_MINILM_L6_COS_V1  |              7               |            19             |
-| MULTI_QA_MPNET_BASE_DOT_V1 |              24              |            88             |
-| CLIP_VIT_BASE_PATCH32_TEXT |              14              |            39             |
+| Model / Device                                       | iPhone 17 Pro [ms] | OnePlus 12 [ms] |
+| ---------------------------------------------------- | :----------------: | :-------------: |
+| ALL_MINILM_L6_V2 (XNNPACK)                           |         7          |       21        |
+| ALL_MPNET_BASE_V2 (XNNPACK)                          |         24         |       90        |
+| MULTI_QA_MINILM_L6_COS_V1 (XNNPACK)                  |         7          |       19        |
+| MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK)                 |         24         |       88        |
+| CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK)                 |         14         |       39        |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK FP32)  |         47         |       41        |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK 8da4w) |         16         |       15        |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP32)  |         15         |        -        |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP16)  |         19         |        -        |
 
 ## Image Embeddings
 
diff --git a/docs/docs/02-benchmarks/memory-usage.md b/docs/docs/02-benchmarks/memory-usage.md
index a225b724d1..52507ce9c1 100644
--- a/docs/docs/02-benchmarks/memory-usage.md
+++ b/docs/docs/02-benchmarks/memory-usage.md
@@ -98,13 +98,17 @@ The reported memory usage values include the memory footprint of the Phonemis pa
 
 ## Text Embeddings
 
-| Model / Device                       | iPhone 17 Pro [MB] | OnePlus 12 [MB] |
-| ------------------------------------ | :----------------: | :-------------: |
-| ALL_MINILM_L6_V2 (XNNPACK)           |        110         |       95        |
-| ALL_MPNET_BASE_V2 (XNNPACK)          |        455         |       405       |
-| MULTI_QA_MINILM_L6_COS_V1 (XNNPACK)  |        140         |       120       |
-| MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK) |        455         |       435       |
-| CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK) |        280         |       200       |
+| Model / Device                                       | iPhone 17 Pro [MB] | OnePlus 12 [MB] |
+| ---------------------------------------------------- | :----------------: | :-------------: |
+| ALL_MINILM_L6_V2 (XNNPACK)                           |        110         |       95        |
+| ALL_MPNET_BASE_V2 (XNNPACK)                          |        455         |       405       |
+| MULTI_QA_MINILM_L6_COS_V1 (XNNPACK)                  |        140         |       120       |
+| MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK)                 |        455         |       435       |
+| CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK)                 |        280         |       200       |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK FP32)  |        175         |       196       |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK 8da4w) |         36         |       44        |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP32)  |         55         |        -        |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP16)  |        143         |        -        |
 
 ## Image Embeddings
 
@@ -120,8 +124,8 @@ output. When resize is enabled, expect higher memory usage and inference time
 with higher resolutions.
 :::
 
-| Model / Device              | iPhone 17 Pro [MB] | OnePlus 12 [MB] |
-| --------------------------- | :----------------: | :-------------: |
+| Model / Device               | iPhone 17 Pro [MB] | OnePlus 12 [MB] |
+| ---------------------------- | :----------------: | :-------------: |
 | DEEPLABV3_RESNET50 (XNNPACK) |        660         |       930       |
 
 ## Instance Segmentation

From e318d6a3f6a9959923f77a36528232481bcc92cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Tue, 28 Apr 2026 13:38:50 +0200
Subject: [PATCH 5/6] refactor: drop fp32 suffix from CoreML default-precision
 variant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Renames `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32` →
`DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML` to match the existing
convention where the default-precision XNNPACK variant has no precision
suffix. The fp16 variant keeps its suffix since it's non-default.

Per review feedback on #1098.
---
 .../app/text-embeddings/index.tsx             |  6 ++---
 docs/docs/02-benchmarks/model-size.md         | 22 +++++++++----------
 .../src/constants/modelUrls.ts                | 10 ++++-----
 .../src/types/textEmbeddings.ts               |  2 +-
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
index da48b27ceb..d049b0c131 100644
--- a/apps/text-embeddings/app/text-embeddings/index.tsx
+++ b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -20,7 +20,7 @@ import {
   MULTI_QA_MPNET_BASE_DOT_V1,
   DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
   DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
-  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32,
+  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML,
   DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16,
   TextEmbeddingsProps,
 } from 'react-native-executorch';
@@ -41,8 +41,8 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [
     value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
   },
   {
-    label: 'Multilingual DistilUSE (CoreML fp32)',
-    value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32,
+    label: 'Multilingual DistilUSE (CoreML)',
+    value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML,
   },
   {
     label: 'Multilingual DistilUSE (CoreML fp16)',
diff --git a/docs/docs/02-benchmarks/model-size.md b/docs/docs/02-benchmarks/model-size.md
index 3c5d5a5e92..a4f5a6c0eb 100644
--- a/docs/docs/02-benchmarks/model-size.md
+++ b/docs/docs/02-benchmarks/model-size.md
@@ -89,17 +89,17 @@ title: Model Size
 
 ## Text Embeddings
 
-| Model                                            |   Size [MB]   |
-| ------------------------------------------------ | :-----------: |
-| ALL_MINILM_L6_V2                                 |      91       |
-| ALL_MPNET_BASE_V2                                |      438      |
-| MULTI_QA_MINILM_L6_COS_V1                        |      91       |
-| MULTI_QA_MPNET_BASE_DOT_V1                       |      438      |
-| CLIP_VIT_BASE_PATCH32_TEXT                       |      254      |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2             |      541      |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W       |      393      |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32 |      541      |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 |      271      |
+| Model                                            | Size [MB] |
+| ------------------------------------------------ | :-------: |
+| ALL_MINILM_L6_V2                                 |    91     |
+| ALL_MPNET_BASE_V2                                |    438    |
+| MULTI_QA_MINILM_L6_COS_V1                        |    91     |
+| MULTI_QA_MPNET_BASE_DOT_V1                       |    438    |
+| CLIP_VIT_BASE_PATCH32_TEXT                       |    254    |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2             |    541    |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W       |    393    |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML      |    541    |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 |    271    |
 
 ## Image Embeddings
 
diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
index 90dee0f230..6e693a07d8 100644
--- a/packages/react-native-executorch/src/constants/modelUrls.ts
+++ b/packages/react-native-executorch/src/constants/modelUrls.ts
@@ -989,7 +989,7 @@ const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-
 const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`;
 const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_fp32.pte`;
 const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_8da4w.pte`;
-const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp32.pte`;
+const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp32.pte`;
 const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp16.pte`;
 const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/tokenizer.json`;
 const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`;
@@ -1052,9 +1052,9 @@ export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W = {
 /**
  * @category Models - Text Embeddings
  */
-export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32 = {
-  modelName: 'distiluse-base-multilingual-cased-v2-coreml-fp32',
-  modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32_MODEL,
+export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML = {
+  modelName: 'distiluse-base-multilingual-cased-v2-coreml',
+  modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_MODEL,
   tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
 } as const;
 
@@ -1218,7 +1218,7 @@ export const MODEL_REGISTRY = {
     MULTI_QA_MPNET_BASE_DOT_V1,
     DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
     DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
-    DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32,
+    DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML,
     DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16,
     CLIP_VIT_BASE_PATCH32_TEXT,
     BK_SDM_TINY_VPRED_512,
diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts
index c2993900c1..6a2fb7af32 100644
--- a/packages/react-native-executorch/src/types/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/types/textEmbeddings.ts
@@ -12,7 +12,7 @@ export type TextEmbeddingsModelName =
   | 'multi-qa-mpnet-base-dot-v1'
   | 'distiluse-base-multilingual-cased-v2'
   | 'distiluse-base-multilingual-cased-v2-8da4w'
-  | 'distiluse-base-multilingual-cased-v2-coreml-fp32'
+  | 'distiluse-base-multilingual-cased-v2-coreml'
   | 'distiluse-base-multilingual-cased-v2-coreml-fp16'
   | 'clip-vit-base-patch32-text';
 

From 8c71bfbf5623b5e04b3e605c9eaa03679b8f7572 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Tue, 28 Apr 2026 14:30:08 +0200
Subject: [PATCH 6/6] refactor: drop XNNPACK fp32 and CoreML fp16 distiluse
 variants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tatoeba bitext-mining (eng↔X for X ∈ {pol, deu, fra, spa, rus, jpn},
1000 pairs each) shows all 4 variants land within 0.2 pp R@1 / 0.1 pp
R@10 of each other. CoreML fp32 is bit-exact with XNNPACK fp32; CoreML
fp16 differs at the 5th decimal; 8da4w drifts ~1% cosine but retrieval
is unaffected.

Drop:
- XNNPACK fp32 (bare _V2): Pareto-dominated on iPhone by COREML and on
  Android by 8DA4W (speed and memory). No retained quality benefit.
- COREML_FP16: identical retrieval quality to COREML fp32 but slower
  on iPhone (19 vs 15 ms) and uses more memory (143 vs 55 MB).

Ships as _8DA4W (Android) and _COREML (iOS) only.
---
 .../app/text-embeddings/index.tsx             | 10 ---------
 docs/docs/02-benchmarks/inference-time.md     |  2 --
 docs/docs/02-benchmarks/memory-usage.md       |  2 --
 docs/docs/02-benchmarks/model-size.md         | 20 ++++++++---------
 .../src/constants/modelUrls.ts                | 22 -------------------
 .../src/types/textEmbeddings.ts               |  2 --
 6 files changed, 9 insertions(+), 49 deletions(-)

diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
index d049b0c131..772c9d39f2 100644
--- a/apps/text-embeddings/app/text-embeddings/index.tsx
+++ b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -18,10 +18,8 @@ import {
   ALL_MPNET_BASE_V2,
   MULTI_QA_MINILM_L6_COS_V1,
   MULTI_QA_MPNET_BASE_DOT_V1,
-  DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
   DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
   DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML,
-  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16,
   TextEmbeddingsProps,
 } from 'react-native-executorch';
 
@@ -32,10 +30,6 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [
   { label: 'MPNet Base', value: ALL_MPNET_BASE_V2 },
   { label: 'MultiQA MiniLM', value: MULTI_QA_MINILM_L6_COS_V1 },
   { label: 'MultiQA MPNet', value: MULTI_QA_MPNET_BASE_DOT_V1 },
-  {
-    label: 'Multilingual DistilUSE',
-    value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
-  },
   {
     label: 'Multilingual DistilUSE (8da4w)',
     value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
@@ -44,10 +38,6 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [
     label: 'Multilingual DistilUSE (CoreML)',
     value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML,
   },
-  {
-    label: 'Multilingual DistilUSE (CoreML fp16)',
-    value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16,
-  },
 ];
 import { useIsFocused } from '@react-navigation/native';
 import { dotProduct } from '../../utils/math';
diff --git a/docs/docs/02-benchmarks/inference-time.md b/docs/docs/02-benchmarks/inference-time.md
index 838e920b57..6f23dd92d8 100644
--- a/docs/docs/02-benchmarks/inference-time.md
+++ b/docs/docs/02-benchmarks/inference-time.md
@@ -157,10 +157,8 @@ Benchmark times for text embeddings are highly dependent on the sentence length.
 | MULTI_QA_MINILM_L6_COS_V1 (XNNPACK)                  |         7          |       19        |
 | MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK)                 |         24         |       88        |
 | CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK)                 |         14         |       39        |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK FP32)  |         47         |       41        |
 | DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK 8da4w) |         16         |       15        |
 | DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP32)  |         15         |        -        |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP16)  |         19         |        -        |
 
 ## Image Embeddings
 
diff --git a/docs/docs/02-benchmarks/memory-usage.md b/docs/docs/02-benchmarks/memory-usage.md
index 52507ce9c1..88cd19698e 100644
--- a/docs/docs/02-benchmarks/memory-usage.md
+++ b/docs/docs/02-benchmarks/memory-usage.md
@@ -105,10 +105,8 @@ The reported memory usage values include the memory footprint of the Phonemis pa
 | MULTI_QA_MINILM_L6_COS_V1 (XNNPACK)                  |        140         |       120       |
 | MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK)                 |        455         |       435       |
 | CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK)                 |        280         |       200       |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK FP32)  |        175         |       196       |
 | DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK 8da4w) |         36         |       44        |
 | DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP32)  |         55         |        -        |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP16)  |        143         |        -        |
 
 ## Image Embeddings
 
diff --git a/docs/docs/02-benchmarks/model-size.md b/docs/docs/02-benchmarks/model-size.md
index a4f5a6c0eb..14c1777689 100644
--- a/docs/docs/02-benchmarks/model-size.md
+++ b/docs/docs/02-benchmarks/model-size.md
@@ -89,17 +89,15 @@ title: Model Size
 
 ## Text Embeddings
 
-| Model                                            | Size [MB] |
-| ------------------------------------------------ | :-------: |
-| ALL_MINILM_L6_V2                                 |    91     |
-| ALL_MPNET_BASE_V2                                |    438    |
-| MULTI_QA_MINILM_L6_COS_V1                        |    91     |
-| MULTI_QA_MPNET_BASE_DOT_V1                       |    438    |
-| CLIP_VIT_BASE_PATCH32_TEXT                       |    254    |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2             |    541    |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W       |    393    |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML      |    541    |
-| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 |    271    |
+| Model                                       | Size [MB] |
+| ------------------------------------------- | :-------: |
+| ALL_MINILM_L6_V2                            |    91     |
+| ALL_MPNET_BASE_V2                           |    438    |
+| MULTI_QA_MINILM_L6_COS_V1                   |    91     |
+| MULTI_QA_MPNET_BASE_DOT_V1                  |    438    |
+| CLIP_VIT_BASE_PATCH32_TEXT                  |    254    |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W  |    393    |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML |    541    |
 
 ## Image Embeddings
 
diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
index 6e693a07d8..2f05fe3a63 100644
--- a/packages/react-native-executorch/src/constants/modelUrls.ts
+++ b/packages/react-native-executorch/src/constants/modelUrls.ts
@@ -987,10 +987,8 @@ const MULTI_QA_MINILM_L6_COS_V1_MODEL = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1
 const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1/${VERSION_TAG}/tokenizer.json`;
 const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/multi-qa-mpnet-base-dot-v1_xnnpack.pte`;
 const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`;
-const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_fp32.pte`;
 const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_8da4w.pte`;
 const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp32.pte`;
-const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp16.pte`;
 const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/tokenizer.json`;
 const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`;
 const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/tokenizer.json`;
@@ -1031,15 +1029,6 @@ export const MULTI_QA_MPNET_BASE_DOT_V1 = {
   tokenizerSource: MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER,
 } as const;
 
-/**
- * @category Models - Text Embeddings
- */
-export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2 = {
-  modelName: 'distiluse-base-multilingual-cased-v2',
-  modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL,
-  tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
-} as const;
-
 /**
  * @category Models - Text Embeddings
  */
@@ -1058,15 +1047,6 @@ export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML = {
   tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
 } as const;
 
-/**
- * @category Models - Text Embeddings
- */
-export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 = {
-  modelName: 'distiluse-base-multilingual-cased-v2-coreml-fp16',
-  modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16_MODEL,
-  tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
-} as const;
-
 /**
  * @category Models - Text Embeddings
  */
@@ -1216,10 +1196,8 @@ export const MODEL_REGISTRY = {
     ALL_MPNET_BASE_V2,
     MULTI_QA_MINILM_L6_COS_V1,
     MULTI_QA_MPNET_BASE_DOT_V1,
-    DISTILUSE_BASE_MULTILINGUAL_CASED_V2,
     DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
     DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML,
-    DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16,
     CLIP_VIT_BASE_PATCH32_TEXT,
     BK_SDM_TINY_VPRED_512,
     BK_SDM_TINY_VPRED_256,
diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts
index 6a2fb7af32..45f636c8e5 100644
--- a/packages/react-native-executorch/src/types/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/types/textEmbeddings.ts
@@ -10,10 +10,8 @@ export type TextEmbeddingsModelName =
   | 'all-mpnet-base-v2'
   | 'multi-qa-minilm-l6-cos-v1'
   | 'multi-qa-mpnet-base-dot-v1'
-  | 'distiluse-base-multilingual-cased-v2'
   | 'distiluse-base-multilingual-cased-v2-8da4w'
   | 'distiluse-base-multilingual-cased-v2-coreml'
-  | 'distiluse-base-multilingual-cased-v2-coreml-fp16'
   | 'clip-vit-base-patch32-text';
 
 /**