software-mansion · msluszniak · Apr 28, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
@@ -53,6 +53,10 @@ Lexend
 finetuned
 MINILM
 MPNET
+DISTILUSE
+distiluse
+Distil
+torchao
 QINT
 FNUZ
 wordlist

diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -18,6 +18,8 @@ import {
   ALL_MPNET_BASE_V2,
   MULTI_QA_MINILM_L6_COS_V1,
   MULTI_QA_MPNET_BASE_DOT_V1,
+  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
+  DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML,
   TextEmbeddingsProps,
 } from 'react-native-executorch';
 
@@ -28,6 +30,14 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [
   { label: 'MPNet Base', value: ALL_MPNET_BASE_V2 },
   { label: 'MultiQA MiniLM', value: MULTI_QA_MINILM_L6_COS_V1 },
   { label: 'MultiQA MPNet', value: MULTI_QA_MPNET_BASE_DOT_V1 },
+  {
+    label: 'Multilingual DistilUSE (8da4w)',
+    value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
+  },
+  {
+    label: 'Multilingual DistilUSE (CoreML)',
+    value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML,
+  },
 ];
 import { useIsFocused } from '@react-navigation/native';
 import { dotProduct } from '../../utils/math';

diff --git a/docs/docs/02-benchmarks/inference-time.md b/docs/docs/02-benchmarks/inference-time.md
@@ -150,13 +150,15 @@ Average time to synthesize speech from an input text of approximately 60 tokens,
 Benchmark times for text embeddings are highly dependent on the sentence length. The numbers below are based on a sentence of around 80 tokens. For shorter or longer sentences, inference time may vary accordingly.
 :::
 
-| Model                      | iPhone 17 Pro (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] |
-| -------------------------- | :--------------------------: | :-----------------------: |
-| ALL_MINILM_L6_V2           |              7               |            21             |
-| ALL_MPNET_BASE_V2          |              24              |            90             |
-| MULTI_QA_MINILM_L6_COS_V1  |              7               |            19             |
-| MULTI_QA_MPNET_BASE_DOT_V1 |              24              |            88             |
-| CLIP_VIT_BASE_PATCH32_TEXT |              14              |            39             |
+| Model / Device                                       | iPhone 17 Pro [ms] | OnePlus 12 [ms] |
+| ---------------------------------------------------- | :----------------: | :-------------: |
+| ALL_MINILM_L6_V2 (XNNPACK)                           |         7          |       21        |
+| ALL_MPNET_BASE_V2 (XNNPACK)                          |         24         |       90        |
+| MULTI_QA_MINILM_L6_COS_V1 (XNNPACK)                  |         7          |       19        |
+| MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK)                 |         24         |       88        |
+| CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK)                 |         14         |       39        |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK 8da4w) |         16         |       15        |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP32)  |         15         |        -        |
 
 ## Image Embeddings
 

diff --git a/docs/docs/02-benchmarks/memory-usage.md b/docs/docs/02-benchmarks/memory-usage.md
@@ -98,13 +98,15 @@ The reported memory usage values include the memory footprint of the Phonemis pa
 
 ## Text Embeddings
 
-| Model / Device                       | iPhone 17 Pro [MB] | OnePlus 12 [MB] |
-| ------------------------------------ | :----------------: | :-------------: |
-| ALL_MINILM_L6_V2 (XNNPACK)           |        110         |       95        |
-| ALL_MPNET_BASE_V2 (XNNPACK)          |        455         |       405       |
-| MULTI_QA_MINILM_L6_COS_V1 (XNNPACK)  |        140         |       120       |
-| MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK) |        455         |       435       |
-| CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK) |        280         |       200       |
+| Model / Device                                       | iPhone 17 Pro [MB] | OnePlus 12 [MB] |
+| ---------------------------------------------------- | :----------------: | :-------------: |
+| ALL_MINILM_L6_V2 (XNNPACK)                           |        110         |       95        |
+| ALL_MPNET_BASE_V2 (XNNPACK)                          |        455         |       405       |
+| MULTI_QA_MINILM_L6_COS_V1 (XNNPACK)                  |        140         |       120       |
+| MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK)                 |        455         |       435       |
+| CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK)                 |        280         |       200       |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK 8da4w) |         36         |       44        |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP32)  |         55         |        -        |
 
 ## Image Embeddings
 
@@ -120,8 +122,8 @@ output. When resize is enabled, expect higher memory usage and inference time
 with higher resolutions.
 :::
 
-| Model / Device              | iPhone 17 Pro [MB] | OnePlus 12 [MB] |
-| --------------------------- | :----------------: | :-------------: |
+| Model / Device               | iPhone 17 Pro [MB] | OnePlus 12 [MB] |
+| ---------------------------- | :----------------: | :-------------: |
 | DEEPLABV3_RESNET50 (XNNPACK) |        660         |       930       |
 
 ## Instance Segmentation

diff --git a/docs/docs/02-benchmarks/model-size.md b/docs/docs/02-benchmarks/model-size.md
@@ -89,13 +89,15 @@ title: Model Size
 
 ## Text Embeddings
 
-| Model                      | XNNPACK [MB] |
-| -------------------------- | :----------: |
-| ALL_MINILM_L6_V2           |      91      |
-| ALL_MPNET_BASE_V2          |     438      |
-| MULTI_QA_MINILM_L6_COS_V1  |      91      |
-| MULTI_QA_MPNET_BASE_DOT_V1 |     438      |
-| CLIP_VIT_BASE_PATCH32_TEXT |     254      |
+| Model                                       | Size [MB] |
+| ------------------------------------------- | :-------: |
+| ALL_MINILM_L6_V2                            |    91     |
+| ALL_MPNET_BASE_V2                           |    438    |
+| MULTI_QA_MINILM_L6_COS_V1                   |    91     |
+| MULTI_QA_MPNET_BASE_DOT_V1                  |    438    |
+| CLIP_VIT_BASE_PATCH32_TEXT                  |    254    |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W  |    393    |
+| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML |    541    |
 
 ## Image Embeddings
 

diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -101,13 +101,14 @@ function App() {
 
 ## Supported models
 
-| Model                                                                                                 | Language | Max Tokens | Embedding Dimensions | Description                                                                                                                                                                                                                                                                                                                                                                                                                      |
-| ----------------------------------------------------------------------------------------------------- | :------: | :--------: | :------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)                     | English  |    254     |         384          | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs.                                                                                                                                                                                                                                                                                                               |
-| [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)                   | English  |    382     |         768          | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs.                                                                                                                                                                                                                                                                                                               |
-| [multi-qa-MiniLM-L6-cos-v1](https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1)   | English  |    509     |         384          | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.                                                                                                                                                                                                                                                          |
-| [multi-qa-mpnet-base-dot-v1](https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1) | English  |    510     |         768          | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.                                                                                                                                                                                                                                                          |
-| [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32)                     | English  |     74     |         512          | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). |
+| Model                                                                                                                     |   Language    | Max Tokens | Embedding Dimensions | Description                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| ------------------------------------------------------------------------------------------------------------------------- | :-----------: | :--------: | :------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)                                         |    English    |    254     |         384          | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs.                                                                                                                                                                                                                                                                                                               |
+| [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)                                       |    English    |    382     |         768          | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs.                                                                                                                                                                                                                                                                                                               |
+| [multi-qa-MiniLM-L6-cos-v1](https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1)                       |    English    |    509     |         384          | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.                                                                                                                                                                                                                                                          |
+| [multi-qa-mpnet-base-dot-v1](https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1)                     |    English    |    510     |         768          | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.                                                                                                                                                                                                                                                          |
+| [distiluse-base-multilingual-cased-v2](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2) | 50+ languages |    126     |         512          | Multilingual DistilBERT with a 768→512 projection head. Recommended when broader language coverage matters more than the exact English quality of MiniLM/MPNet.                                                                                                                                                                                                                                                                  |
+| [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32)                                         |    English    |     74     |         512          | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). |
 
 **`Max Tokens`** - The maximum number of tokens that can be processed by the model. If the input text exceeds this limit, it will be truncated.
 

diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
@@ -1,5 +1,5 @@
 import { Platform } from 'react-native';
-import { URL_PREFIX, VERSION_TAG } from './versions';
+import { URL_PREFIX, VERSION_TAG, NEXT_VERSION_TAG } from './versions';
 
 // LLMs
 
@@ -987,6 +987,9 @@ const MULTI_QA_MINILM_L6_COS_V1_MODEL = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1
 const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1/${VERSION_TAG}/tokenizer.json`;
 const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/multi-qa-mpnet-base-dot-v1_xnnpack.pte`;
 const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`;
+const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_8da4w.pte`;
+const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp32.pte`;
+const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/tokenizer.json`;
 const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`;
 const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/tokenizer.json`;
 
@@ -1026,6 +1029,24 @@ export const MULTI_QA_MPNET_BASE_DOT_V1 = {
   tokenizerSource: MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER,
 } as const;
 
+/**
+ * @category Models - Text Embeddings
+ */
+export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W = {
+  modelName: 'distiluse-base-multilingual-cased-v2-8da4w',
+  modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL,
+  tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
+} as const;
+
+/**
+ * @category Models - Text Embeddings
+ */
+export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML = {
+  modelName: 'distiluse-base-multilingual-cased-v2-coreml',
+  modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_MODEL,
+  tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER,
+} as const;
+
 /**
  * @category Models - Text Embeddings
  */
@@ -1175,6 +1196,8 @@ export const MODEL_REGISTRY = {
     ALL_MPNET_BASE_V2,
     MULTI_QA_MINILM_L6_COS_V1,
     MULTI_QA_MPNET_BASE_DOT_V1,
+    DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W,
+    DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML,
     CLIP_VIT_BASE_PATCH32_TEXT,
     BK_SDM_TINY_VPRED_512,
     BK_SDM_TINY_VPRED_256,

diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts
@@ -10,6 +10,8 @@ export type TextEmbeddingsModelName =
   | 'all-mpnet-base-v2'
   | 'multi-qa-minilm-l6-cos-v1'
   | 'multi-qa-mpnet-base-dot-v1'
+  | 'distiluse-base-multilingual-cased-v2-8da4w'
+  | 'distiluse-base-multilingual-cased-v2-coreml'
   | 'clip-vit-base-patch32-text';
 
 /**