From bb5a49682c2f07f515f02cecaa8fc79c64fdee17 Mon Sep 17 00:00:00 2001 From: fzowl Date: Sat, 20 Dec 2025 18:53:12 +0100 Subject: [PATCH 1/4] Adding VoyageAI's voyage-multimodal-3.5 --- weaviate/collections/classes/config_vectorizers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 6d7a10c27..a4641a465 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -62,7 +62,10 @@ "voyage-finance-2", "voyage-multilingual-2", ] -VoyageMultimodalModel: TypeAlias = Literal["voyage-multimodal-3",] +VoyageMultimodalModel: TypeAlias = Literal[ + "voyage-multimodal-3", + "voyage-multimodal-3.5", +] AWSModel: TypeAlias = Literal[ "amazon.titan-embed-text-v1", "cohere.embed-english-v3", From f9406477211b8bb7e8db8fe05460c99d16d73d44 Mon Sep 17 00:00:00 2001 From: fzowl Date: Mon, 22 Dec 2025 16:09:51 +0100 Subject: [PATCH 2/4] Adding VoyageAI's voyage-multimodal-3.5 --- test/collection/test_config.py | 55 +++++++++++++++++++ .../classes/config_named_vectors.py | 6 ++ .../collections/classes/config_vectorizers.py | 20 +++++-- .../collections/classes/config_vectors.py | 7 ++- 4 files changed, 81 insertions(+), 7 deletions(-) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index eab0a82b2..1c978f168 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -127,6 +127,61 @@ def test_basic_config(): } }, ), + ( + Configure.Vectorizer.multi2vec_voyageai( + model="voyage-multimodal-3.5", + truncation=True, + output_encoding="base64", + vectorize_collection_name=True, + base_url="https://api.voyageai.com", + ), + { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "truncation": True, + "baseURL": "https://api.voyageai.com/", + } + }, + ), + ( + Configure.Vectorizer.multi2vec_voyageai( + model="voyage-multimodal-3.5", + truncation=True, + text_fields=[Multi2VecField(name="text", weight=0.2)], + image_fields=[Multi2VecField(name="image", weight=0.3)], + video_fields=[Multi2VecField(name="video", weight=0.5)], + ), + { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "truncation": True, + "textFields": ["text"], + "imageFields": ["image"], + "videoFields": ["video"], + "weights": { + "textFields": [0.2], + "imageFields": [0.3], + "videoFields": [0.5], + }, + } + }, + ), + ( + Configure.Vectorizer.multi2vec_voyageai( + model="voyage-multimodal-3.5", + dimensions=512, + text_fields=["text"], + video_fields=["video"], + ), + { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "dimensions": 512, + "textFields": ["text"], + "videoFields": ["video"], + } + }, + ), ( Configure.Vectorizer.multi2vec_nvidia( model="nvidia/nvclip", diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index bc1d27cd7..f4552b573 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -700,9 +700,11 @@ def multi2vec_voyageai( base_url: Optional[AnyHttpUrl] = None, model: Optional[Union[VoyageMultimodalModel, str]] = None, truncation: Optional[bool] = None, + dimensions: Optional[int] = None, output_encoding: Optional[str] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, vector_index_config: Optional[_VectorIndexConfigCreate] = None, vectorize_collection_name: bool = True, ) -> _NamedVectorConfigCreate: @@ -717,9 +719,11 @@ def multi2vec_voyageai( vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. model: The model to use. Defaults to `None`, which uses the server-defined default. truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. + video_fields: The video fields to use in vectorization. Raises: pydantic.ValidationError: If `model` is not a valid value from the `VoyageaiMultimodalModel` type. @@ -730,8 +734,10 @@ def multi2vec_voyageai( baseURL=base_url, model=model, truncation=truncation, + dimensions=dimensions, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), + videoFields=_map_multi2vec_fields(video_fields), ), vector_index_config=vector_index_config, ) diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index a4641a465..2418de830 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -569,6 +569,8 @@ class _Multi2VecVoyageaiConfig(_Multi2VecBase): baseURL: Optional[AnyHttpUrl] model: Optional[str] truncation: Optional[bool] + dimensions: Optional[int] + videoFields: Optional[List[Multi2VecField]] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() @@ -900,37 +902,43 @@ def multi2vec_cohere( @staticmethod def multi2vec_voyageai( *, - model: Optional[Union[CohereMultimodalModel, str]] = None, + model: Optional[Union[VoyageMultimodalModel, str]] = None, truncation: Optional[bool] = None, - output_encoding: Optional[str], + dimensions: Optional[int] = None, + output_encoding: Optional[str] = None, vectorize_collection_name: bool = True, base_url: Optional[AnyHttpUrl] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, ) -> _VectorizerConfigCreate: - """Create a `_Multi2VecCohereConfig` object for use when vectorizing using the `multi2vec-cohere` model. + """Create a `_Multi2VecVoyageaiConfig` object for use when vectorizing using the `multi2vec-voyageai` model. - See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal) + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings-multimodal) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. - truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default (1024 for voyage-multimodal-3.5). output_encoding: Deprecated, has no effect. vectorize_collection_name: Deprecated, has no effect. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. + video_fields: The video fields to use in vectorization. Raises: - pydantic.ValidationError: If `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type. + pydantic.ValidationError: If `model` is not a valid value from the `VoyageMultimodalModel` type. """ return _Multi2VecVoyageaiConfig( baseURL=base_url, model=model, truncation=truncation, + dimensions=dimensions, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), + videoFields=_map_multi2vec_fields(video_fields), ) @staticmethod diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py index 5f3a765ed..cbfe5c8cd 100644 --- a/weaviate/collections/classes/config_vectors.py +++ b/weaviate/collections/classes/config_vectors.py @@ -1114,6 +1114,8 @@ def multi2vec_voyageai( image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, model: Optional[Union[VoyageMultimodalModel, str]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + dimensions: Optional[int] = None, truncation: Optional[bool] = None, vector_index_config: Optional[_VectorIndexConfigCreate] = None, ) -> _VectorConfigCreate: @@ -1128,8 +1130,9 @@ def multi2vec_voyageai( base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. model: The model to use. Defaults to `None`, which uses the server-defined default. - output_encoding: The output encoding to use. Defaults to `None`, which uses the server-defined default. text_fields: The text fields to use in vectorization. + video_fields: The video fields to use in vectorization. + dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default. truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default @@ -1142,8 +1145,10 @@ def multi2vec_voyageai( baseURL=base_url, model=model, truncation=truncation, + dimensions=dimensions, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), + videoFields=_map_multi2vec_fields(video_fields), ), vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), ) From 3cc1257b3bf5573093c41e527857f7dedfc3e9cf Mon Sep 17 00:00:00 2001 From: JP Hwang Date: Wed, 31 Dec 2025 13:00:08 +0000 Subject: [PATCH 3/4] Remove changes to deprecated vectorizer factories --- test/collection/test_config.py | 108 +++++++++--------- .../classes/config_named_vectors.py | 8 +- .../collections/classes/config_vectorizers.py | 10 +- 3 files changed, 58 insertions(+), 68 deletions(-) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 1c978f168..e4b2c2a23 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -127,61 +127,6 @@ def test_basic_config(): } }, ), - ( - Configure.Vectorizer.multi2vec_voyageai( - model="voyage-multimodal-3.5", - truncation=True, - output_encoding="base64", - vectorize_collection_name=True, - base_url="https://api.voyageai.com", - ), - { - "multi2vec-voyageai": { - "model": "voyage-multimodal-3.5", - "truncation": True, - "baseURL": "https://api.voyageai.com/", - } - }, - ), - ( - Configure.Vectorizer.multi2vec_voyageai( - model="voyage-multimodal-3.5", - truncation=True, - text_fields=[Multi2VecField(name="text", weight=0.2)], - image_fields=[Multi2VecField(name="image", weight=0.3)], - video_fields=[Multi2VecField(name="video", weight=0.5)], - ), - { - "multi2vec-voyageai": { - "model": "voyage-multimodal-3.5", - "truncation": True, - "textFields": ["text"], - "imageFields": ["image"], - "videoFields": ["video"], - "weights": { - "textFields": [0.2], - "imageFields": [0.3], - "videoFields": [0.5], - }, - } - }, - ), - ( - Configure.Vectorizer.multi2vec_voyageai( - model="voyage-multimodal-3.5", - dimensions=512, - text_fields=["text"], - video_fields=["video"], - ), - { - "multi2vec-voyageai": { - "model": "voyage-multimodal-3.5", - "dimensions": 512, - "textFields": ["text"], - "videoFields": ["video"], - } - }, - ), ( Configure.Vectorizer.multi2vec_nvidia( model="nvidia/nvclip", @@ -2479,6 +2424,59 @@ def test_config_with_named_vectors( } }, ), + ( + Configure.Vectors.multi2vec_voyageai( + model="voyage-multimodal-3.5", + truncation=True, + base_url="https://api.voyageai.com", + ), + { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "truncation": True, + "baseURL": "https://api.voyageai.com/", + } + }, + ), + ( + Configure.Vectors.multi2vec_voyageai( + model="voyage-multimodal-3.5", + truncation=True, + text_fields=[Multi2VecField(name="text", weight=0.2)], + image_fields=[Multi2VecField(name="image", weight=0.3)], + video_fields=[Multi2VecField(name="video", weight=0.5)], + ), + { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "truncation": True, + "textFields": ["text"], + "imageFields": ["image"], + "videoFields": ["video"], + "weights": { + "textFields": [0.2], + "imageFields": [0.3], + "videoFields": [0.5], + }, + } + }, + ), + ( + Configure.Vectors.multi2vec_voyageai( + model="voyage-multimodal-3.5", + dimensions=512, + text_fields=["text"], + video_fields=["video"], + ), + { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "dimensions": 512, + "textFields": ["text"], + "videoFields": ["video"], + } + }, + ), ( [ Configure.Vectors.multi2vec_clip( diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index f4552b573..de2f5577d 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -700,11 +700,9 @@ def multi2vec_voyageai( base_url: Optional[AnyHttpUrl] = None, model: Optional[Union[VoyageMultimodalModel, str]] = None, truncation: Optional[bool] = None, - dimensions: Optional[int] = None, output_encoding: Optional[str] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, - video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, vector_index_config: Optional[_VectorIndexConfigCreate] = None, vectorize_collection_name: bool = True, ) -> _NamedVectorConfigCreate: @@ -719,11 +717,9 @@ def multi2vec_voyageai( vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. model: The model to use. Defaults to `None`, which uses the server-defined default. truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. - dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. - video_fields: The video fields to use in vectorization. Raises: pydantic.ValidationError: If `model` is not a valid value from the `VoyageaiMultimodalModel` type. @@ -734,10 +730,10 @@ def multi2vec_voyageai( baseURL=base_url, model=model, truncation=truncation, - dimensions=dimensions, + dimensions=None, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), - videoFields=_map_multi2vec_fields(video_fields), + videoFields=None, ), vector_index_config=vector_index_config, ) diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 2418de830..3fa389095 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -904,13 +904,11 @@ def multi2vec_voyageai( *, model: Optional[Union[VoyageMultimodalModel, str]] = None, truncation: Optional[bool] = None, - dimensions: Optional[int] = None, - output_encoding: Optional[str] = None, + output_encoding: Optional[str], vectorize_collection_name: bool = True, base_url: Optional[AnyHttpUrl] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, - video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, ) -> _VectorizerConfigCreate: """Create a `_Multi2VecVoyageaiConfig` object for use when vectorizing using the `multi2vec-voyageai` model. @@ -920,13 +918,11 @@ def multi2vec_voyageai( Args: model: The model to use. Defaults to `None`, which uses the server-defined default. truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. - dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default (1024 for voyage-multimodal-3.5). output_encoding: Deprecated, has no effect. vectorize_collection_name: Deprecated, has no effect. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. - video_fields: The video fields to use in vectorization. Raises: pydantic.ValidationError: If `model` is not a valid value from the `VoyageMultimodalModel` type. @@ -935,10 +931,10 @@ def multi2vec_voyageai( baseURL=base_url, model=model, truncation=truncation, - dimensions=dimensions, + dimensions=None, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), - videoFields=_map_multi2vec_fields(video_fields), + videoFields=None, ) @staticmethod From b783e080391ed62b52e7b6fe6651a3ae22a0fc8e Mon Sep 17 00:00:00 2001 From: JP Hwang Date: Wed, 31 Dec 2025 13:15:10 +0000 Subject: [PATCH 4/4] update tests --- test/collection/test_config.py | 100 ++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 38 deletions(-) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index e4b2c2a23..a58d794a6 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -2425,55 +2425,79 @@ def test_config_with_named_vectors( }, ), ( - Configure.Vectors.multi2vec_voyageai( - model="voyage-multimodal-3.5", - truncation=True, - base_url="https://api.voyageai.com", - ), + [ + Configure.Vectors.multi2vec_voyageai( + name="test", + model="voyage-multimodal-3.5", + truncation=True, + base_url="https://api.voyageai.com", + ), + ], { - "multi2vec-voyageai": { - "model": "voyage-multimodal-3.5", - "truncation": True, - "baseURL": "https://api.voyageai.com/", + "test": { + "vectorizer": { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "truncation": True, + "baseURL": "https://api.voyageai.com/", + } + }, + "vectorIndexType": "hnsw", } }, ), ( - Configure.Vectors.multi2vec_voyageai( - model="voyage-multimodal-3.5", - truncation=True, - text_fields=[Multi2VecField(name="text", weight=0.2)], - image_fields=[Multi2VecField(name="image", weight=0.3)], - video_fields=[Multi2VecField(name="video", weight=0.5)], - ), + [ + Configure.Vectors.multi2vec_voyageai( + name="test", + model="voyage-multimodal-3.5", + truncation=True, + text_fields=[Multi2VecField(name="text", weight=0.2)], + image_fields=[Multi2VecField(name="image", weight=0.3)], + video_fields=[Multi2VecField(name="video", weight=0.5)], + ) + ], { - "multi2vec-voyageai": { - "model": "voyage-multimodal-3.5", - "truncation": True, - "textFields": ["text"], - "imageFields": ["image"], - "videoFields": ["video"], - "weights": { - "textFields": [0.2], - "imageFields": [0.3], - "videoFields": [0.5], + "test": { + "vectorizer": { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "truncation": True, + "textFields": ["text"], + "imageFields": ["image"], + "videoFields": ["video"], + "weights": { + "textFields": [0.2], + "imageFields": [0.3], + "videoFields": [0.5], + }, + } }, - } + "vectorIndexType": "hnsw", + }, }, ), ( - Configure.Vectors.multi2vec_voyageai( - model="voyage-multimodal-3.5", - dimensions=512, - text_fields=["text"], - video_fields=["video"], - ), + [ + Configure.Vectors.multi2vec_voyageai( + name="test", + model="voyage-multimodal-3.5", + dimensions=512, + text_fields=["text"], + video_fields=["video"], + ) + ], { - "multi2vec-voyageai": { - "model": "voyage-multimodal-3.5", - "dimensions": 512, - "textFields": ["text"], - "videoFields": ["video"], + "test": { + "vectorizer": { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "dimensions": 512, + "textFields": ["text"], + "videoFields": ["video"], + } + }, + "vectorIndexType": "hnsw", } }, ),