From 15ddd07ba8dc131bab05f1a3e6b21a19649ec518 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Fri, 24 Apr 2026 13:08:31 +0200 Subject: [PATCH 1/6] feat: add DISTILUSE_BASE_MULTILINGUAL_CASED_V2 text embeddings model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the multilingual half of #945. Shipping only the WordPiece tokenizer model for now — paraphrase-multilingual-MiniLM-L12-v2 needs Unigram/Precompiled/Metaspace support in executorch/extension/llm/ tokenizers, which is in-flight upstream. The model lives at software-mansion/react-native-executorch-distiluse-base-multilingual-cased-v2 under tag v0.9.0, so the constant uses NEXT_VERSION_TAG. Co-Authored-By: Claude Opus 4.7 (1M context) --- .cspell-wordlist.txt | 3 +++ .../text-embeddings/app/text-embeddings/index.tsx | 5 +++++ .../useTextEmbeddings.md | 15 ++++++++------- .../src/constants/modelUrls.ts | 14 +++++++++++++- .../src/types/textEmbeddings.ts | 1 + 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt index f1d55b4c98..5b815c3381 100644 --- a/.cspell-wordlist.txt +++ b/.cspell-wordlist.txt @@ -53,6 +53,9 @@ Lexend finetuned MINILM MPNET +DISTILUSE +distiluse +Distil QINT FNUZ wordlist diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx index e31097940c..756c93c48e 100644 --- a/apps/text-embeddings/app/text-embeddings/index.tsx +++ b/apps/text-embeddings/app/text-embeddings/index.tsx @@ -18,6 +18,7 @@ import { ALL_MPNET_BASE_V2, MULTI_QA_MINILM_L6_COS_V1, MULTI_QA_MPNET_BASE_DOT_V1, + DISTILUSE_BASE_MULTILINGUAL_CASED_V2, TextEmbeddingsProps, } from 'react-native-executorch'; @@ -28,6 +29,10 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [ { label: 'MPNet Base', value: ALL_MPNET_BASE_V2 }, { label: 'MultiQA MiniLM', value: MULTI_QA_MINILM_L6_COS_V1 }, { label: 'MultiQA MPNet', value: MULTI_QA_MPNET_BASE_DOT_V1 }, + { + label: 'Multilingual DistilUSE', + value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2, + }, ]; import { useIsFocused } from '@react-navigation/native'; import { dotProduct } from '../../utils/math'; diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md index 84c8499808..2f92eb6e8f 100644 --- a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md +++ b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md @@ -101,13 +101,14 @@ function App() { ## Supported models -| Model | Language | Max Tokens | Embedding Dimensions | Description | -| ----------------------------------------------------------------------------------------------------- | :------: | :--------: | :------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | English | 254 | 384 | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs. | -| [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) | English | 382 | 768 | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs. | -| [multi-qa-MiniLM-L6-cos-v1](https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1) | English | 509 | 384 | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs. | -| [multi-qa-mpnet-base-dot-v1](https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1) | English | 510 | 768 | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs. | -| [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32) | English | 74 | 512 | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). | +| Model | Language | Max Tokens | Embedding Dimensions | Description | +| ------------------------------------------------------------------------------------------------------------------------- | :-----------: | :--------: | :------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | English | 254 | 384 | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs. | +| [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) | English | 382 | 768 | All-round model tuned for many use-cases. Trained on a large and diverse dataset of over 1 billion training pairs. | +| [multi-qa-MiniLM-L6-cos-v1](https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1) | English | 509 | 384 | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs. | +| [multi-qa-mpnet-base-dot-v1](https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1) | English | 510 | 768 | This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs. | +| [distiluse-base-multilingual-cased-v2](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2) | 50+ languages | 126 | 512 | Multilingual DistilBERT with a 768→512 projection head. Recommended when broader language coverage matters more than the exact English quality of MiniLM/MPNet. | +| [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32) | English | 74 | 512 | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). | **`Max Tokens`** - The maximum number of tokens that can be processed by the model. If the input text exceeds this limit, it will be truncated. diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 92cd95bd7a..fcd6c14ef4 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -1,5 +1,5 @@ import { Platform } from 'react-native'; -import { URL_PREFIX, VERSION_TAG } from './versions'; +import { URL_PREFIX, VERSION_TAG, NEXT_VERSION_TAG } from './versions'; // LLMs @@ -987,6 +987,8 @@ const MULTI_QA_MINILM_L6_COS_V1_MODEL = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1 const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1/${VERSION_TAG}/tokenizer.json`; const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/multi-qa-mpnet-base-dot-v1_xnnpack.pte`; const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`; +const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_fp32.pte`; +const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/tokenizer.json`; const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`; const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/tokenizer.json`; @@ -1026,6 +1028,15 @@ export const MULTI_QA_MPNET_BASE_DOT_V1 = { tokenizerSource: MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER, } as const; +/** + * @category Models - Text Embeddings + */ +export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2 = { + modelName: 'distiluse-base-multilingual-cased-v2', + modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL, + tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER, +} as const; + /** * @category Models - Text Embeddings */ @@ -1175,6 +1186,7 @@ export const MODEL_REGISTRY = { ALL_MPNET_BASE_V2, MULTI_QA_MINILM_L6_COS_V1, MULTI_QA_MPNET_BASE_DOT_V1, + DISTILUSE_BASE_MULTILINGUAL_CASED_V2, CLIP_VIT_BASE_PATCH32_TEXT, BK_SDM_TINY_VPRED_512, BK_SDM_TINY_VPRED_256, diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts index 87b5d6375f..3bdf8df079 100644 --- a/packages/react-native-executorch/src/types/textEmbeddings.ts +++ b/packages/react-native-executorch/src/types/textEmbeddings.ts @@ -10,6 +10,7 @@ export type TextEmbeddingsModelName = | 'all-mpnet-base-v2' | 'multi-qa-minilm-l6-cos-v1' | 'multi-qa-mpnet-base-dot-v1' + | 'distiluse-base-multilingual-cased-v2' | 'clip-vit-base-patch32-text'; /** From 1a01d54ed482effb82a81238f074f0cfa74bcbdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Fri, 24 Apr 2026 16:38:39 +0200 Subject: [PATCH 2/6] feat: expose distiluse 8da4w + CoreML fp32/fp16 variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follows the same scheme-suffix convention used for LLaMA (`_QLORA`, `_SPINQUANT`) — each variant has its own constant so the caller picks exactly the quantization / backend combo they want: DISTILUSE_BASE_MULTILINGUAL_CASED_V2 xnnpack fp32 (baseline) DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W xnnpack 8da4w DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32 coreml fp32 (iOS/macOS) DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 coreml fp16 (iOS/macOS) All four point at the same HF repo tag v0.9.0; tokenizer.json is shared. Co-Authored-By: Claude Opus 4.7 (1M context) --- .cspell-wordlist.txt | 1 + .../app/text-embeddings/index.tsx | 15 +++++++++ .../useTextEmbeddings.md | 13 ++++++++ .../src/constants/modelUrls.ts | 33 +++++++++++++++++++ .../src/types/textEmbeddings.ts | 3 ++ 5 files changed, 65 insertions(+) diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt index 5b815c3381..d611f986b6 100644 --- a/.cspell-wordlist.txt +++ b/.cspell-wordlist.txt @@ -56,6 +56,7 @@ MPNET DISTILUSE distiluse Distil +torchao QINT FNUZ wordlist diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx index 756c93c48e..da48b27ceb 100644 --- a/apps/text-embeddings/app/text-embeddings/index.tsx +++ b/apps/text-embeddings/app/text-embeddings/index.tsx @@ -19,6 +19,9 @@ import { MULTI_QA_MINILM_L6_COS_V1, MULTI_QA_MPNET_BASE_DOT_V1, DISTILUSE_BASE_MULTILINGUAL_CASED_V2, + DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W, + DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32, + DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16, TextEmbeddingsProps, } from 'react-native-executorch'; @@ -33,6 +36,18 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [ label: 'Multilingual DistilUSE', value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2, }, + { + label: 'Multilingual DistilUSE (8da4w)', + value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W, + }, + { + label: 'Multilingual DistilUSE (CoreML fp32)', + value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32, + }, + { + label: 'Multilingual DistilUSE (CoreML fp16)', + value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16, + }, ]; import { useIsFocused } from '@react-navigation/native'; import { dotProduct } from '../../utils/math'; diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md index 2f92eb6e8f..b519d8a073 100644 --- a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md +++ b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md @@ -117,3 +117,16 @@ function App() { :::note For the supported models, the returned embedding vector is normalized, meaning that its length is equal to 1. This allows for easier comparison of vectors using cosine similarity, just calculate the dot product of two vectors to get the cosine similarity score. ::: + +### distiluse-base-multilingual-cased-v2 variants + +`distiluse-base-multilingual-cased-v2` ships in four flavours so you can trade size, latency, and platform. All share the same tokenizer and embedding dimension — only the `.pte` differs. + +| Constant | Backend | Precision | Size | Platforms | +| -------------------------------------------------- | ------- | --------- | ------ | ---------------------- | +| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2` | XNNPACK | fp32 | 516 MB | iOS, Android (default) | +| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W` | XNNPACK | 8da4w | 375 MB | iOS, Android | +| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32` | CoreML | fp32 | 516 MB | iOS / macOS only | +| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16` | CoreML | fp16 | 258 MB | iOS / macOS only | + +`8da4w` is Int8 dynamic activation + Int4 weight (torchao), group_size 32 — only `nn.Linear` layers are quantized, embeddings stay fp32. CoreML variants only load on Apple platforms; pick the XNNPACK baseline if you need a single artifact that runs everywhere. diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index fcd6c14ef4..90dee0f230 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -988,6 +988,9 @@ const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-MiniLM-L6-co const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/multi-qa-mpnet-base-dot-v1_xnnpack.pte`; const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`; const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_fp32.pte`; +const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_8da4w.pte`; +const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp32.pte`; +const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp16.pte`; const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/tokenizer.json`; const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`; const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/tokenizer.json`; @@ -1037,6 +1040,33 @@ export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2 = { tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER, } as const; +/** + * @category Models - Text Embeddings + */ +export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W = { + modelName: 'distiluse-base-multilingual-cased-v2-8da4w', + modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL, + tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER, +} as const; + +/** + * @category Models - Text Embeddings + */ +export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32 = { + modelName: 'distiluse-base-multilingual-cased-v2-coreml-fp32', + modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32_MODEL, + tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER, +} as const; + +/** + * @category Models - Text Embeddings + */ +export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 = { + modelName: 'distiluse-base-multilingual-cased-v2-coreml-fp16', + modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16_MODEL, + tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER, +} as const; + /** * @category Models - Text Embeddings */ @@ -1187,6 +1217,9 @@ export const MODEL_REGISTRY = { MULTI_QA_MINILM_L6_COS_V1, MULTI_QA_MPNET_BASE_DOT_V1, DISTILUSE_BASE_MULTILINGUAL_CASED_V2, + DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W, + DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32, + DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16, CLIP_VIT_BASE_PATCH32_TEXT, BK_SDM_TINY_VPRED_512, BK_SDM_TINY_VPRED_256, diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts index 3bdf8df079..c2993900c1 100644 --- a/packages/react-native-executorch/src/types/textEmbeddings.ts +++ b/packages/react-native-executorch/src/types/textEmbeddings.ts @@ -11,6 +11,9 @@ export type TextEmbeddingsModelName = | 'multi-qa-minilm-l6-cos-v1' | 'multi-qa-mpnet-base-dot-v1' | 'distiluse-base-multilingual-cased-v2' + | 'distiluse-base-multilingual-cased-v2-8da4w' + | 'distiluse-base-multilingual-cased-v2-coreml-fp32' + | 'distiluse-base-multilingual-cased-v2-coreml-fp16' | 'clip-vit-base-patch32-text'; /** From e4a6c4d1899125260b6fc66d1411c7c139319d06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Fri, 24 Apr 2026 16:50:20 +0200 Subject: [PATCH 3/6] docs: move distiluse variant sizes from useTextEmbeddings hook page to benchmarks Size data belongs next to the other model sizes, not inline in the hook reference. The useTextEmbeddings page now only lists the model family (one row) and leaves variant enumeration to the API reference + the model-size benchmark table. Model column in model-size.md is renamed from "XNNPACK [MB]" to just "Size [MB]" since the table now mixes XNNPACK and CoreML rows. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/docs/02-benchmarks/model-size.md | 18 +++++++++++------- .../useTextEmbeddings.md | 13 ------------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/docs/docs/02-benchmarks/model-size.md b/docs/docs/02-benchmarks/model-size.md index f9f5e4701f..3c5d5a5e92 100644 --- a/docs/docs/02-benchmarks/model-size.md +++ b/docs/docs/02-benchmarks/model-size.md @@ -89,13 +89,17 @@ title: Model Size ## Text Embeddings -| Model | XNNPACK [MB] | -| -------------------------- | :----------: | -| ALL_MINILM_L6_V2 | 91 | -| ALL_MPNET_BASE_V2 | 438 | -| MULTI_QA_MINILM_L6_COS_V1 | 91 | -| MULTI_QA_MPNET_BASE_DOT_V1 | 438 | -| CLIP_VIT_BASE_PATCH32_TEXT | 254 | +| Model | Size [MB] | +| ------------------------------------------------ | :-----------: | +| ALL_MINILM_L6_V2 | 91 | +| ALL_MPNET_BASE_V2 | 438 | +| MULTI_QA_MINILM_L6_COS_V1 | 91 | +| MULTI_QA_MPNET_BASE_DOT_V1 | 438 | +| CLIP_VIT_BASE_PATCH32_TEXT | 254 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 | 541 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W | 393 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32 | 541 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 | 271 | ## Image Embeddings diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md index b519d8a073..2f92eb6e8f 100644 --- a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md +++ b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md @@ -117,16 +117,3 @@ function App() { :::note For the supported models, the returned embedding vector is normalized, meaning that its length is equal to 1. This allows for easier comparison of vectors using cosine similarity, just calculate the dot product of two vectors to get the cosine similarity score. ::: - -### distiluse-base-multilingual-cased-v2 variants - -`distiluse-base-multilingual-cased-v2` ships in four flavours so you can trade size, latency, and platform. All share the same tokenizer and embedding dimension — only the `.pte` differs. - -| Constant | Backend | Precision | Size | Platforms | -| -------------------------------------------------- | ------- | --------- | ------ | ---------------------- | -| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2` | XNNPACK | fp32 | 516 MB | iOS, Android (default) | -| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W` | XNNPACK | 8da4w | 375 MB | iOS, Android | -| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32` | CoreML | fp32 | 516 MB | iOS / macOS only | -| `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16` | CoreML | fp16 | 258 MB | iOS / macOS only | - -`8da4w` is Int8 dynamic activation + Int4 weight (torchao), group_size 32 — only `nn.Linear` layers are quantized, embeddings stay fp32. CoreML variants only load on Apple platforms; pick the XNNPACK baseline if you need a single artifact that runs everywhere. From cf144a01581484d442c1d24868f719e2c3598b22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Mon, 27 Apr 2026 17:31:57 +0200 Subject: [PATCH 4/6] docs: add DISTILUSE benchmarks for OnePlus 12 and iPhone 17 Pro Adds inference-time and memory-usage rows for all four distiluse-base-multilingual-cased-v2 variants (XNNPACK fp32, XNNPACK 8da4w, Core ML fp32, Core ML fp16). Captured on a OnePlus 12 (Android, debug build) and iPhone 17 Pro (iOS, debug build) with a fixed ~80-token sentence over 100 measured forwards, JS-side wall-clock around model.forward(). Memory column reports peak resident-set delta vs the pre-model-load baseline, sampled with adb dumpsys meminfo on Android and Xcode's Debug Navigator on iOS. Also normalizes the text-embeddings table headers to match the Classification section convention: column header drops the "(XNNPACK)" suffix and the backend now lives in the per-row label, which lets multi-backend models (fp32 / 8da4w / Core ML) share a single table without an artificial column split. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/docs/02-benchmarks/inference-time.md | 18 +++++++++++------- docs/docs/02-benchmarks/memory-usage.md | 22 +++++++++++++--------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/docs/docs/02-benchmarks/inference-time.md b/docs/docs/02-benchmarks/inference-time.md index dbc06dc85f..838e920b57 100644 --- a/docs/docs/02-benchmarks/inference-time.md +++ b/docs/docs/02-benchmarks/inference-time.md @@ -150,13 +150,17 @@ Average time to synthesize speech from an input text of approximately 60 tokens, Benchmark times for text embeddings are highly dependent on the sentence length. The numbers below are based on a sentence of around 80 tokens. For shorter or longer sentences, inference time may vary accordingly. ::: -| Model | iPhone 17 Pro (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] | -| -------------------------- | :--------------------------: | :-----------------------: | -| ALL_MINILM_L6_V2 | 7 | 21 | -| ALL_MPNET_BASE_V2 | 24 | 90 | -| MULTI_QA_MINILM_L6_COS_V1 | 7 | 19 | -| MULTI_QA_MPNET_BASE_DOT_V1 | 24 | 88 | -| CLIP_VIT_BASE_PATCH32_TEXT | 14 | 39 | +| Model / Device | iPhone 17 Pro [ms] | OnePlus 12 [ms] | +| ---------------------------------------------------- | :----------------: | :-------------: | +| ALL_MINILM_L6_V2 (XNNPACK) | 7 | 21 | +| ALL_MPNET_BASE_V2 (XNNPACK) | 24 | 90 | +| MULTI_QA_MINILM_L6_COS_V1 (XNNPACK) | 7 | 19 | +| MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK) | 24 | 88 | +| CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK) | 14 | 39 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK FP32) | 47 | 41 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK 8da4w) | 16 | 15 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP32) | 15 | - | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP16) | 19 | - | ## Image Embeddings diff --git a/docs/docs/02-benchmarks/memory-usage.md b/docs/docs/02-benchmarks/memory-usage.md index a225b724d1..52507ce9c1 100644 --- a/docs/docs/02-benchmarks/memory-usage.md +++ b/docs/docs/02-benchmarks/memory-usage.md @@ -98,13 +98,17 @@ The reported memory usage values include the memory footprint of the Phonemis pa ## Text Embeddings -| Model / Device | iPhone 17 Pro [MB] | OnePlus 12 [MB] | -| ------------------------------------ | :----------------: | :-------------: | -| ALL_MINILM_L6_V2 (XNNPACK) | 110 | 95 | -| ALL_MPNET_BASE_V2 (XNNPACK) | 455 | 405 | -| MULTI_QA_MINILM_L6_COS_V1 (XNNPACK) | 140 | 120 | -| MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK) | 455 | 435 | -| CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK) | 280 | 200 | +| Model / Device | iPhone 17 Pro [MB] | OnePlus 12 [MB] | +| ---------------------------------------------------- | :----------------: | :-------------: | +| ALL_MINILM_L6_V2 (XNNPACK) | 110 | 95 | +| ALL_MPNET_BASE_V2 (XNNPACK) | 455 | 405 | +| MULTI_QA_MINILM_L6_COS_V1 (XNNPACK) | 140 | 120 | +| MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK) | 455 | 435 | +| CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK) | 280 | 200 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK FP32) | 175 | 196 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK 8da4w) | 36 | 44 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP32) | 55 | - | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP16) | 143 | - | ## Image Embeddings @@ -120,8 +124,8 @@ output. When resize is enabled, expect higher memory usage and inference time with higher resolutions. ::: -| Model / Device | iPhone 17 Pro [MB] | OnePlus 12 [MB] | -| --------------------------- | :----------------: | :-------------: | +| Model / Device | iPhone 17 Pro [MB] | OnePlus 12 [MB] | +| ---------------------------- | :----------------: | :-------------: | | DEEPLABV3_RESNET50 (XNNPACK) | 660 | 930 | ## Instance Segmentation From e318d6a3f6a9959923f77a36528232481bcc92cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Tue, 28 Apr 2026 13:38:50 +0200 Subject: [PATCH 5/6] refactor: drop fp32 suffix from CoreML default-precision variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renames `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32` → `DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML` to match the existing convention where the default-precision XNNPACK variant has no precision suffix. The fp16 variant keeps its suffix since it's non-default. Per review feedback on #1098. --- .../app/text-embeddings/index.tsx | 6 ++--- docs/docs/02-benchmarks/model-size.md | 22 +++++++++---------- .../src/constants/modelUrls.ts | 10 ++++----- .../src/types/textEmbeddings.ts | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx index da48b27ceb..d049b0c131 100644 --- a/apps/text-embeddings/app/text-embeddings/index.tsx +++ b/apps/text-embeddings/app/text-embeddings/index.tsx @@ -20,7 +20,7 @@ import { MULTI_QA_MPNET_BASE_DOT_V1, DISTILUSE_BASE_MULTILINGUAL_CASED_V2, DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W, - DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32, + DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML, DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16, TextEmbeddingsProps, } from 'react-native-executorch'; @@ -41,8 +41,8 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [ value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W, }, { - label: 'Multilingual DistilUSE (CoreML fp32)', - value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32, + label: 'Multilingual DistilUSE (CoreML)', + value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML, }, { label: 'Multilingual DistilUSE (CoreML fp16)', diff --git a/docs/docs/02-benchmarks/model-size.md b/docs/docs/02-benchmarks/model-size.md index 3c5d5a5e92..a4f5a6c0eb 100644 --- a/docs/docs/02-benchmarks/model-size.md +++ b/docs/docs/02-benchmarks/model-size.md @@ -89,17 +89,17 @@ title: Model Size ## Text Embeddings -| Model | Size [MB] | -| ------------------------------------------------ | :-----------: | -| ALL_MINILM_L6_V2 | 91 | -| ALL_MPNET_BASE_V2 | 438 | -| MULTI_QA_MINILM_L6_COS_V1 | 91 | -| MULTI_QA_MPNET_BASE_DOT_V1 | 438 | -| CLIP_VIT_BASE_PATCH32_TEXT | 254 | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 | 541 | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W | 393 | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32 | 541 | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 | 271 | +| Model | Size [MB] | +| ------------------------------------------------ | :-------: | +| ALL_MINILM_L6_V2 | 91 | +| ALL_MPNET_BASE_V2 | 438 | +| MULTI_QA_MINILM_L6_COS_V1 | 91 | +| MULTI_QA_MPNET_BASE_DOT_V1 | 438 | +| CLIP_VIT_BASE_PATCH32_TEXT | 254 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 | 541 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W | 393 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML | 541 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 | 271 | ## Image Embeddings diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 90dee0f230..6e693a07d8 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -989,7 +989,7 @@ const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot- const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`; const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_fp32.pte`; const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_8da4w.pte`; -const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp32.pte`; +const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp32.pte`; const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp16.pte`; const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/tokenizer.json`; const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`; @@ -1052,9 +1052,9 @@ export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W = { /** * @category Models - Text Embeddings */ -export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32 = { - modelName: 'distiluse-base-multilingual-cased-v2-coreml-fp32', - modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32_MODEL, +export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML = { + modelName: 'distiluse-base-multilingual-cased-v2-coreml', + modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_MODEL, tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER, } as const; @@ -1218,7 +1218,7 @@ export const MODEL_REGISTRY = { MULTI_QA_MPNET_BASE_DOT_V1, DISTILUSE_BASE_MULTILINGUAL_CASED_V2, DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W, - DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP32, + DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML, DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16, CLIP_VIT_BASE_PATCH32_TEXT, BK_SDM_TINY_VPRED_512, diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts index c2993900c1..6a2fb7af32 100644 --- a/packages/react-native-executorch/src/types/textEmbeddings.ts +++ b/packages/react-native-executorch/src/types/textEmbeddings.ts @@ -12,7 +12,7 @@ export type TextEmbeddingsModelName = | 'multi-qa-mpnet-base-dot-v1' | 'distiluse-base-multilingual-cased-v2' | 'distiluse-base-multilingual-cased-v2-8da4w' - | 'distiluse-base-multilingual-cased-v2-coreml-fp32' + | 'distiluse-base-multilingual-cased-v2-coreml' | 'distiluse-base-multilingual-cased-v2-coreml-fp16' | 'clip-vit-base-patch32-text'; From 8c71bfbf5623b5e04b3e605c9eaa03679b8f7572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Tue, 28 Apr 2026 14:30:08 +0200 Subject: [PATCH 6/6] refactor: drop XNNPACK fp32 and CoreML fp16 distiluse variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tatoeba bitext-mining (eng↔X for X ∈ {pol, deu, fra, spa, rus, jpn}, 1000 pairs each) shows all 4 variants land within 0.2 pp R@1 / 0.1 pp R@10 of each other. CoreML fp32 is bit-exact with XNNPACK fp32; CoreML fp16 differs at the 5th decimal; 8da4w drifts ~1% cosine but retrieval is unaffected. Drop: - XNNPACK fp32 (bare _V2): Pareto-dominated on iPhone by COREML and on Android by 8DA4W (speed and memory). No retained quality benefit. - COREML_FP16: identical retrieval quality to COREML fp32 but slower on iPhone (19 vs 15 ms) and uses more memory (143 vs 55 MB). Ships as _8DA4W (Android) and _COREML (iOS) only. --- .../app/text-embeddings/index.tsx | 10 --------- docs/docs/02-benchmarks/inference-time.md | 2 -- docs/docs/02-benchmarks/memory-usage.md | 2 -- docs/docs/02-benchmarks/model-size.md | 20 ++++++++--------- .../src/constants/modelUrls.ts | 22 ------------------- .../src/types/textEmbeddings.ts | 2 -- 6 files changed, 9 insertions(+), 49 deletions(-) diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx index d049b0c131..772c9d39f2 100644 --- a/apps/text-embeddings/app/text-embeddings/index.tsx +++ b/apps/text-embeddings/app/text-embeddings/index.tsx @@ -18,10 +18,8 @@ import { ALL_MPNET_BASE_V2, MULTI_QA_MINILM_L6_COS_V1, MULTI_QA_MPNET_BASE_DOT_V1, - DISTILUSE_BASE_MULTILINGUAL_CASED_V2, DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W, DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML, - DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16, TextEmbeddingsProps, } from 'react-native-executorch'; @@ -32,10 +30,6 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [ { label: 'MPNet Base', value: ALL_MPNET_BASE_V2 }, { label: 'MultiQA MiniLM', value: MULTI_QA_MINILM_L6_COS_V1 }, { label: 'MultiQA MPNet', value: MULTI_QA_MPNET_BASE_DOT_V1 }, - { - label: 'Multilingual DistilUSE', - value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2, - }, { label: 'Multilingual DistilUSE (8da4w)', value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W, @@ -44,10 +38,6 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [ label: 'Multilingual DistilUSE (CoreML)', value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML, }, - { - label: 'Multilingual DistilUSE (CoreML fp16)', - value: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16, - }, ]; import { useIsFocused } from '@react-navigation/native'; import { dotProduct } from '../../utils/math'; diff --git a/docs/docs/02-benchmarks/inference-time.md b/docs/docs/02-benchmarks/inference-time.md index 838e920b57..6f23dd92d8 100644 --- a/docs/docs/02-benchmarks/inference-time.md +++ b/docs/docs/02-benchmarks/inference-time.md @@ -157,10 +157,8 @@ Benchmark times for text embeddings are highly dependent on the sentence length. | MULTI_QA_MINILM_L6_COS_V1 (XNNPACK) | 7 | 19 | | MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK) | 24 | 88 | | CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK) | 14 | 39 | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK FP32) | 47 | 41 | | DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK 8da4w) | 16 | 15 | | DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP32) | 15 | - | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP16) | 19 | - | ## Image Embeddings diff --git a/docs/docs/02-benchmarks/memory-usage.md b/docs/docs/02-benchmarks/memory-usage.md index 52507ce9c1..88cd19698e 100644 --- a/docs/docs/02-benchmarks/memory-usage.md +++ b/docs/docs/02-benchmarks/memory-usage.md @@ -105,10 +105,8 @@ The reported memory usage values include the memory footprint of the Phonemis pa | MULTI_QA_MINILM_L6_COS_V1 (XNNPACK) | 140 | 120 | | MULTI_QA_MPNET_BASE_DOT_V1 (XNNPACK) | 455 | 435 | | CLIP_VIT_BASE_PATCH32_TEXT (XNNPACK) | 280 | 200 | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK FP32) | 175 | 196 | | DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (XNNPACK 8da4w) | 36 | 44 | | DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP32) | 55 | - | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 (Core ML FP16) | 143 | - | ## Image Embeddings diff --git a/docs/docs/02-benchmarks/model-size.md b/docs/docs/02-benchmarks/model-size.md index a4f5a6c0eb..14c1777689 100644 --- a/docs/docs/02-benchmarks/model-size.md +++ b/docs/docs/02-benchmarks/model-size.md @@ -89,17 +89,15 @@ title: Model Size ## Text Embeddings -| Model | Size [MB] | -| ------------------------------------------------ | :-------: | -| ALL_MINILM_L6_V2 | 91 | -| ALL_MPNET_BASE_V2 | 438 | -| MULTI_QA_MINILM_L6_COS_V1 | 91 | -| MULTI_QA_MPNET_BASE_DOT_V1 | 438 | -| CLIP_VIT_BASE_PATCH32_TEXT | 254 | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2 | 541 | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W | 393 | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML | 541 | -| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 | 271 | +| Model | Size [MB] | +| ------------------------------------------- | :-------: | +| ALL_MINILM_L6_V2 | 91 | +| ALL_MPNET_BASE_V2 | 438 | +| MULTI_QA_MINILM_L6_COS_V1 | 91 | +| MULTI_QA_MPNET_BASE_DOT_V1 | 438 | +| CLIP_VIT_BASE_PATCH32_TEXT | 254 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W | 393 | +| DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML | 541 | ## Image Embeddings diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 6e693a07d8..2f05fe3a63 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -987,10 +987,8 @@ const MULTI_QA_MINILM_L6_COS_V1_MODEL = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1 const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1/${VERSION_TAG}/tokenizer.json`; const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/multi-qa-mpnet-base-dot-v1_xnnpack.pte`; const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`; -const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_fp32.pte`; const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/xnnpack/distiluse-base-multilingual-cased-v2_xnnpack_8da4w.pte`; const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp32.pte`; -const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16_MODEL = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/coreml/distiluse-base-multilingual-cased-v2_coreml_fp16.pte`; const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${NEXT_VERSION_TAG}/tokenizer.json`; const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`; const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/tokenizer.json`; @@ -1031,15 +1029,6 @@ export const MULTI_QA_MPNET_BASE_DOT_V1 = { tokenizerSource: MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER, } as const; -/** - * @category Models - Text Embeddings - */ -export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2 = { - modelName: 'distiluse-base-multilingual-cased-v2', - modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_MODEL, - tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER, -} as const; - /** * @category Models - Text Embeddings */ @@ -1058,15 +1047,6 @@ export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML = { tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER, } as const; -/** - * @category Models - Text Embeddings - */ -export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16 = { - modelName: 'distiluse-base-multilingual-cased-v2-coreml-fp16', - modelSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16_MODEL, - tokenizerSource: DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER, -} as const; - /** * @category Models - Text Embeddings */ @@ -1216,10 +1196,8 @@ export const MODEL_REGISTRY = { ALL_MPNET_BASE_V2, MULTI_QA_MINILM_L6_COS_V1, MULTI_QA_MPNET_BASE_DOT_V1, - DISTILUSE_BASE_MULTILINGUAL_CASED_V2, DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W, DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML, - DISTILUSE_BASE_MULTILINGUAL_CASED_V2_COREML_FP16, CLIP_VIT_BASE_PATCH32_TEXT, BK_SDM_TINY_VPRED_512, BK_SDM_TINY_VPRED_256, diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts index 6a2fb7af32..45f636c8e5 100644 --- a/packages/react-native-executorch/src/types/textEmbeddings.ts +++ b/packages/react-native-executorch/src/types/textEmbeddings.ts @@ -10,10 +10,8 @@ export type TextEmbeddingsModelName = | 'all-mpnet-base-v2' | 'multi-qa-minilm-l6-cos-v1' | 'multi-qa-mpnet-base-dot-v1' - | 'distiluse-base-multilingual-cased-v2' | 'distiluse-base-multilingual-cased-v2-8da4w' | 'distiluse-base-multilingual-cased-v2-coreml' - | 'distiluse-base-multilingual-cased-v2-coreml-fp16' | 'clip-vit-base-patch32-text'; /**