From 94d212714b6f3ca6f8d660271d701fa2a9eb7f9c Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 20 Jun 2024 11:17:24 +0100 Subject: [PATCH 1/6] Adding qdrant vector record store implementation. --- dotnet/Directory.Packages.props | 1 + .../Connectors.Memory.Qdrant.csproj | 1 + .../QdrantRecordMapperType.cs | 21 + .../QdrantVectorRecordStore.cs | 388 ++++++++++++++++++ .../QdrantVectorRecordStoreOptions.cs | 48 +++ .../QdrantVectorStoreRecordMapper.cs | 299 ++++++++++++++ .../QdrantVectorStoreRecordMapperOptions.cs | 27 ++ .../Qdrant/QdrantVectorRecordStoreTests.cs | 282 +++++++++++++ .../QdrantVectorStoreCollectionFixture.cs | 10 + .../Memory/Qdrant/QdrantVectorStoreFixture.cs | 325 +++++++++++++++ .../IntegrationTests/IntegrationTests.csproj | 1 + 11 files changed, 1403 insertions(+) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreCollectionFixture.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props index 38b249cc229a..5a1ba6b7fef8 100644 --- a/dotnet/Directory.Packages.props +++ b/dotnet/Directory.Packages.props @@ -92,6 +92,7 @@ + diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj b/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj index da803a71b52a..f06d269cdabc 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj @@ -20,6 +20,7 @@ + diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs new file mode 100644 index 000000000000..cb8f7bf8b14c --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Qdrant.Client.Grpc; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// The types of mapper supported by . +/// +public enum QdrantRecordMapperType +{ + /// + /// Use the default mapper that is provided by the semantic kernel SDK that uses json as an intermediary to allows automatic mapping to a wide variety of types. + /// + Default, + + /// + /// Use a custom mapper between and the data model. + /// + QdrantPointStructCustomMapper +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs new file mode 100644 index 000000000000..a1aa76a422cd --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs @@ -0,0 +1,388 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; +using Grpc.Core; +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client; +using Qdrant.Client.Grpc; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Service for storing and retrieving vector records, that uses Qdrant as the underlying storage. +/// +/// The data model to use for adding, updating and retrieving data from storage. +public sealed class QdrantVectorRecordStore : IVectorRecordStore, IVectorRecordStore + where TRecord : class +{ + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + private readonly QdrantClient _qdrantClient; + + /// Optional configuration options for this class. + private readonly QdrantVectorRecordStoreOptions _options; + + /// A mapper to use for converting between qdrant point and consumer models. + private readonly IVectorStoreRecordMapper _mapper; + + /// + /// Initializes a new instance of the class. + /// + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + /// Optional configuration options for this class. + /// + /// + public QdrantVectorRecordStore(QdrantClient qdrantClient, QdrantVectorRecordStoreOptions? options = null) + { + // Verify. + Verify.NotNull(qdrantClient); + + // Assign. + this._qdrantClient = qdrantClient; + this._options = options ?? new QdrantVectorRecordStoreOptions(); + + // Assign Mapper. + if (this._options.MapperType == QdrantRecordMapperType.QdrantPointStructCustomMapper) + { + // Custom Mapper. + if (this._options.PointStructCustomMapper is null) + { + throw new ArgumentException($"The {nameof(QdrantVectorRecordStoreOptions.PointStructCustomMapper)} option needs to be set if a {nameof(QdrantVectorRecordStoreOptions.MapperType)} of {nameof(QdrantRecordMapperType.QdrantPointStructCustomMapper)} has been chosen.", nameof(options)); + } + + this._mapper = this._options.PointStructCustomMapper; + } + else + { + // Default Mapper. + this._mapper = new QdrantVectorStoreRecordMapper(new QdrantVectorStoreRecordMapperOptions + { + HasNamedVectors = this._options.HasNamedVectors, + VectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition + }); + } + } + + /// + public async Task GetAsync(ulong key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNull(key); + + var retrievedPoints = await this.GetBatchAsync([key], options, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false); + return retrievedPoints[0]; + } + + /// + public async Task GetAsync(Guid key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNull(key); + + var retrievedPoints = await this.GetBatchAsync([key], options, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false); + return retrievedPoints[0]; + } + + /// + public IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, CancellationToken cancellationToken = default) + { + return this.GetBatchByPointIdAsync(keys, key => new PointId { Num = key }, options, cancellationToken); + } + + /// + public IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, CancellationToken cancellationToken = default) + { + return this.GetBatchByPointIdAsync(keys, key => new PointId { Uuid = key.ToString("D") }, options, cancellationToken); + } + + /// + public Task DeleteAsync(ulong key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNull(key); + + var collectionName = this.ChooseCollectionName(options?.CollectionName); + return RunOperationAsync( + collectionName, + "Delete", + () => this._qdrantClient.DeleteAsync( + collectionName, + key, + wait: true, + cancellationToken: cancellationToken)); + } + + /// + public Task DeleteAsync(Guid key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNull(key); + + var collectionName = this.ChooseCollectionName(options?.CollectionName); + return RunOperationAsync( + collectionName, + "Delete", + () => this._qdrantClient.DeleteAsync( + collectionName, + key, + wait: true, + cancellationToken: cancellationToken)); + } + + /// + public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + var collectionName = this.ChooseCollectionName(options?.CollectionName); + return RunOperationAsync( + collectionName, + "Delete", + () => this._qdrantClient.DeleteAsync( + collectionName, + keys.ToList(), + wait: true, + cancellationToken: cancellationToken)); + } + + /// + public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + var collectionName = this.ChooseCollectionName(options?.CollectionName); + return RunOperationAsync( + collectionName, + "Delete", + () => this._qdrantClient.DeleteAsync( + collectionName, + keys.ToList(), + wait: true, + cancellationToken: cancellationToken)); + } + + /// + public async Task UpsertAsync(TRecord record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(record); + + // Create options. + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Create point from record. + var pointStruct = RunModelConversion( + collectionName, + "Upsert", + () => this._mapper.MapFromDataToStorageModel(record)); + + // Upsert. + await RunOperationAsync( + collectionName, + "Upsert", + () => this._qdrantClient.UpsertAsync(collectionName, [pointStruct], true, cancellationToken: cancellationToken)).ConfigureAwait(false); + return pointStruct.Id.Num; + } + + /// + async Task IVectorRecordStore.UpsertAsync(TRecord record, UpsertRecordOptions? options, CancellationToken cancellationToken) + { + Verify.NotNull(record); + + // Create options. + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Create point from record. + var pointStruct = RunModelConversion( + collectionName, + "Upsert", + () => this._mapper.MapFromDataToStorageModel(record)); + + // Upsert. + await RunOperationAsync( + collectionName, + "Upsert", + () => this._qdrantClient.UpsertAsync(collectionName, [pointStruct], true, cancellationToken: cancellationToken)).ConfigureAwait(false); + return Guid.Parse(pointStruct.Id.Uuid); + } + + /// + public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Verify.NotNull(records); + + // Create Options + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Create points from records. + var pointStructs = RunModelConversion( + collectionName, + "Upsert", + () => records.Select(this._mapper.MapFromDataToStorageModel).ToList()); + + // Upsert. + await RunOperationAsync( + collectionName, + "Upsert", + () => this._qdrantClient.UpsertAsync(collectionName, pointStructs, true, cancellationToken: cancellationToken)).ConfigureAwait(false); + + foreach (var pointStruct in pointStructs) + { + yield return pointStruct.Id.Num; + } + } + + /// + async IAsyncEnumerable IVectorRecordStore.UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options, [EnumeratorCancellation] CancellationToken cancellationToken) + { + Verify.NotNull(records); + + // Create Options + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Create points from records. + var pointStructs = RunModelConversion( + collectionName, + "Upsert", + () => records.Select(this._mapper.MapFromDataToStorageModel).ToList()); + + // Upsert. + await RunOperationAsync( + collectionName, + "Upsert", + () => this._qdrantClient.UpsertAsync(collectionName, pointStructs, true, cancellationToken: cancellationToken)).ConfigureAwait(false); + + foreach (var pointStruct in pointStructs) + { + yield return Guid.Parse(pointStruct.Id.Uuid); + } + } + + /// + /// Get the requested records from the Qdrant store using the provided keys. + /// + /// The keys of the points to retrieve. + /// Function to convert the provided keys to point ids. + /// The retrieval options. + /// The to monitor for cancellation requests. The default is . + /// The retrieved points. + private async IAsyncEnumerable GetBatchByPointIdAsync( + IEnumerable keys, + Func keyConverter, + GetRecordOptions? options, + [EnumeratorCancellation] CancellationToken cancellationToken) + { + Verify.NotNull(keys); + + // Create options. + var collectionName = this.ChooseCollectionName(options?.CollectionName); + var pointsIds = keys.Select(key => keyConverter(key)).ToArray(); + + // Retrieve data points. + var retrievedPoints = await RunOperationAsync( + collectionName, + "Retrieve", + () => this._qdrantClient.RetrieveAsync(collectionName, pointsIds, true, options?.IncludeVectors ?? false, cancellationToken: cancellationToken)).ConfigureAwait(false); + + // Check that we found the required number of values. + if (retrievedPoints.Count != pointsIds.Length) + { + throw new VectorStoreOperationException("Record not found"); + } + + // Convert the retrieved points to the target data model. + foreach (var retrievedPoint in retrievedPoints) + { + var pointStruct = new PointStruct + { + Id = retrievedPoint.Id, + Vectors = retrievedPoint.Vectors, + Payload = { } + }; + + foreach (KeyValuePair payloadEntry in retrievedPoint.Payload) + { + pointStruct.Payload.Add(payloadEntry.Key, payloadEntry.Value); + } + + yield return RunModelConversion( + collectionName, + "Retrieve", + () => this._mapper.MapFromStorageToDataModel(pointStruct, options)); + } + } + + /// + /// Choose the right collection name to use for the operation by using the one provided + /// as part of the operation options, or the default one provided at construction time. + /// + /// The collection name provided on the operation options. + /// The collection name to use. + private string ChooseCollectionName(string? operationCollectionName) + { + var collectionName = operationCollectionName ?? this._options.DefaultCollectionName; + if (collectionName is null) + { +#pragma warning disable CA2208 // Instantiate argument exceptions correctly + throw new ArgumentException("Collection name must be provided in the operation options, since no default was provided at construction time.", "options"); +#pragma warning restore CA2208 // Instantiate argument exceptions correctly + } + + return collectionName; + } + + /// + /// Run the given operation and wrap any with ."/> + /// + /// The response type of the operation. + /// The name of the collection the operation is being run on. + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + private static async Task RunOperationAsync(string collectionName, string operationName, Func> operation) + { + try + { + return await operation.Invoke().ConfigureAwait(false); + } + catch (RpcException ex) + { + var wrapperException = new VectorStoreOperationException("Call to vector store failed.", ex); + + // Using Open Telemetry standard for naming of these entries. + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ + wrapperException.Data.Add("db.system", "Qdrant"); + wrapperException.Data.Add("db.collection.name", collectionName); + wrapperException.Data.Add("db.operation.name", operationName); + + throw wrapperException; + } + } + + /// + /// Run the given model conversion and wrap any exceptions with . + /// + /// The response type of the operation. + /// The name of the collection the operation is being run on. + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + private static T RunModelConversion(string collectionName, string operationName, Func operation) + { + try + { + return operation.Invoke(); + } + catch (Exception ex) when (ex is not VectorStoreRecordMappingException) + { + var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); + + // Using Open Telemetry standard for naming of these entries. + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ + wrapperException.Data.Add("db.system", "Qdrant"); + wrapperException.Data.Add("db.collection.name", collectionName); + wrapperException.Data.Add("db.operation.name", operationName); + + throw wrapperException; + } + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs new file mode 100644 index 000000000000..d3e568057976 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs @@ -0,0 +1,48 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client.Grpc; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Options when creating a . +/// +public sealed class QdrantVectorRecordStoreOptions + where TRecord : class +{ + /// + /// Gets or sets the default collection name to use. + /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. + /// + public string? DefaultCollectionName { get; init; } = null; + + /// + /// Gets or sets a value indicating whether the vectors in the store are named and multiple vectors are supported, or whether there is just a single unnamed vector per qdrant point. + /// Defaults to single vector per point. + /// + public bool HasNamedVectors { get; set; } = false; + + /// + /// Gets or sets the choice of mapper to use when converting between the data model and the qdrant point. + /// + public QdrantRecordMapperType MapperType { get; init; } = QdrantRecordMapperType.Default; + + /// + /// Gets or sets an optional custom mapper to use when converting between the data model and the qdrant point. + /// + /// + /// Set to to use this mapper."/> + /// + public IVectorStoreRecordMapper? PointStructCustomMapper { get; init; } = null; + + /// + /// Gets or sets an optional record definition that defines the schema of the record type. + /// + /// + /// If not provided, the schema will be inferred from the record model class using reflection. + /// In this case, the record model properties must be annotated with the appropriate attributes to indicate their usage. + /// See , and . + /// + public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs new file mode 100644 index 000000000000..29f8d9002cb2 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -0,0 +1,299 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Text.Json; +using System.Text.Json.Nodes; +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client.Grpc; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Mapper between a Qdrant record and the consumer data model that uses json as an intermediary to allow supporting a wide range of models. +/// +/// The consumer data model to map to or from. +internal sealed class QdrantVectorStoreRecordMapper : IVectorStoreRecordMapper + where TRecord : class +{ + /// A set of types that a key on the provided model may have. + private static readonly HashSet s_supportedKeyTypes = + [ + typeof(ulong), + typeof(Guid) + ]; + + /// A set of types that data properties on the provided model may have. + private static readonly HashSet s_supportedDataTypes = + [ + typeof(List), + typeof(List), + typeof(List), + typeof(List), + typeof(List), + typeof(List), + typeof(string), + typeof(int), + typeof(long), + typeof(double), + typeof(float), + typeof(bool), + typeof(int?), + typeof(long?), + typeof(double?), + typeof(float?), + typeof(bool?) + ]; + + /// A set of types that vectors on the provided model may have. + /// + /// While qdrant supports float32 and uint64, the api only supports float64, therefore + /// any float32 vectors will be converted to float64 before being sent to qdrant. + /// + private static readonly HashSet s_supportedVectorTypes = + [ + typeof(ReadOnlyMemory), + typeof(ReadOnlyMemory?), + typeof(ReadOnlyMemory), + typeof(ReadOnlyMemory?) + ]; + + /// A list of property info objects that point at the payload properties in the current model, and allows easy reading and writing of these properties. + private readonly List _payloadPropertiesInfo = new(); + + /// A list of property info objects that point at the vector properties in the current model, and allows easy reading and writing of these properties. + private readonly List _vectorPropertiesInfo = new(); + + /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. + private readonly PropertyInfo _keyPropertyInfo; + + /// Optional configuration options for this class. + private readonly QdrantVectorStoreRecordMapperOptions _options; + + /// + /// Initializes a new instance of the class. + /// + /// Optional options to use when doing the model conversion. + public QdrantVectorStoreRecordMapper(QdrantVectorStoreRecordMapperOptions? options) + { + this._options = options ?? new QdrantVectorStoreRecordMapperOptions(); + + // Enumerate public properties using configuration or attributes. + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; + if (this._options.VectorStoreRecordDefinition is not null) + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: this._options.HasNamedVectors); + } + else + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: this._options.HasNamedVectors); + } + + // Validate property types and store for later use. + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); + + this._keyPropertyInfo = properties.keyProperty; + this._payloadPropertiesInfo = properties.dataProperties; + this._vectorPropertiesInfo = properties.vectorProperties; + } + + /// + public PointStruct MapFromDataToStorageModel(TRecord dataModel) + { + PointId pointId; + if (this._keyPropertyInfo.PropertyType == typeof(ulong)) + { + var key = this._keyPropertyInfo.GetValue(dataModel) as ulong? ?? throw new VectorStoreRecordMappingException($"Missing key property {this._keyPropertyInfo.Name} on provided record of type {typeof(TRecord).FullName}."); + pointId = new PointId { Num = key }; + } + else if (this._keyPropertyInfo.PropertyType == typeof(Guid)) + { + var key = this._keyPropertyInfo.GetValue(dataModel) as Guid? ?? throw new VectorStoreRecordMappingException($"Missing key property {this._keyPropertyInfo.Name} on provided record of type {typeof(TRecord).FullName}."); + pointId = new PointId { Uuid = key.ToString("D") }; + } + else + { + throw new VectorStoreRecordMappingException($"Unsupported key type {this._keyPropertyInfo.PropertyType.FullName} for key property {this._keyPropertyInfo.Name} on provided record of type {typeof(TRecord).FullName}."); + } + + // Create point. + var pointStruct = new PointStruct + { + Id = pointId, + Vectors = new Vectors(), + Payload = { }, + }; + + // Add point payload. + foreach (var payloadPropertyInfo in this._payloadPropertiesInfo) + { + var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(payloadPropertyInfo); + var propertyValue = payloadPropertyInfo.GetValue(dataModel); + pointStruct.Payload.Add(propertyName, ConvertToGrpcFieldValue(propertyValue)); + } + + // Add vectors. + if (this._options.HasNamedVectors) + { + var namedVectors = new NamedVectors(); + foreach (var vectorPropertyInfo in this._vectorPropertiesInfo) + { + var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(vectorPropertyInfo); + var propertyValue = vectorPropertyInfo.GetValue(dataModel); + if (propertyValue is not null) + { + var castPropertyValue = (ReadOnlyMemory)propertyValue; + namedVectors.Vectors.Add(propertyName, castPropertyValue.ToArray()); + } + } + + pointStruct.Vectors.Vectors_ = namedVectors; + } + else + { + var vectorPropertyInfo = this._vectorPropertiesInfo.First(); + if (vectorPropertyInfo.GetValue(dataModel) is ReadOnlyMemory floatROM) + { + pointStruct.Vectors.Vector = floatROM.ToArray(); + } + else + { + throw new VectorStoreRecordMappingException($"Vector property {vectorPropertyInfo.Name} on provided record of type {typeof(TRecord).FullName} may not be null when not using named vectors."); + } + } + + return pointStruct; + } + + /// + public TRecord MapFromStorageToDataModel(PointStruct storageModel, GetRecordOptions? options = default) + { + // Get the key property name and value. + var keyPropertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(this._keyPropertyInfo); + var keyPropertyValue = storageModel.Id.HasNum ? storageModel.Id.Num as object : storageModel.Id.Uuid as object; + + // Create a json object to represent the point. + var outputJsonObject = new JsonObject + { + { keyPropertyName, JsonValue.Create(keyPropertyValue) }, + }; + + // Add each vector property if embeddings are included in the point. + if (options?.IncludeVectors is true) + { + foreach (var vectorProperty in this._vectorPropertiesInfo) + { + var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(vectorProperty); + + if (this._options.HasNamedVectors) + { + if (storageModel.Vectors.Vectors_.Vectors.TryGetValue(propertyName, out var vector)) + { + outputJsonObject.Add(propertyName, new JsonArray(vector.Data.Select(x => JsonValue.Create(x)).ToArray())); + } + } + else + { + outputJsonObject.Add(propertyName, new JsonArray(storageModel.Vectors.Vector.Data.Select(x => JsonValue.Create(x)).ToArray())); + } + } + } + + // Add each payload property. + foreach (var payloadProperty in this._payloadPropertiesInfo) + { + var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(payloadProperty); + if (storageModel.Payload.TryGetValue(propertyName, out var value)) + { + outputJsonObject.Add(propertyName, ConvertFromGrpcFieldValueToJsonNode(value)); + } + } + + // Convert from json object to the target data model. + return JsonSerializer.Deserialize(outputJsonObject)!; + } + + /// + /// Convert the given to the correct native type based on its properties. + /// + /// The value to convert to a native type. + /// The converted native value. + /// Thrown when an unsupported type is enountered. + private static JsonNode? ConvertFromGrpcFieldValueToJsonNode(Value payloadValue) + { + return payloadValue.KindCase switch + { + Value.KindOneofCase.NullValue => null, + Value.KindOneofCase.IntegerValue => JsonValue.Create(payloadValue.IntegerValue), + Value.KindOneofCase.StringValue => JsonValue.Create(payloadValue.StringValue), + Value.KindOneofCase.DoubleValue => JsonValue.Create(payloadValue.DoubleValue), + Value.KindOneofCase.BoolValue => JsonValue.Create(payloadValue.BoolValue), + Value.KindOneofCase.ListValue => new JsonArray(payloadValue.ListValue.Values.Select(x => ConvertFromGrpcFieldValueToJsonNode(x)).ToArray()), + Value.KindOneofCase.StructValue => new JsonObject(payloadValue.StructValue.Fields.ToDictionary(x => x.Key, x => ConvertFromGrpcFieldValueToJsonNode(x.Value))), + _ => throw new VectorStoreRecordMappingException($"Unsupported grpc value kind {payloadValue.KindCase}."), + }; + } + + /// + /// Convert the given to a object that can be stored in Qdrant. + /// + /// The object to convert. + /// The converted Qdrant value. + /// Thrown when an unsupported type is enountered. + private static Value ConvertToGrpcFieldValue(object? sourceValue) + { + var value = new Value(); + if (sourceValue is null) + { + value.NullValue = NullValue.NullValue; + } + else if (sourceValue is int intValue) + { + value.IntegerValue = intValue; + } + else if (sourceValue is long longValue) + { + value.IntegerValue = longValue; + } + else if (sourceValue is string stringValue) + { + value.StringValue = stringValue; + } + else if (sourceValue is float floatValue) + { + value.DoubleValue = floatValue; + } + else if (sourceValue is double doubleValue) + { + value.DoubleValue = doubleValue; + } + else if (sourceValue is bool boolValue) + { + value.BoolValue = boolValue; + } + else if (sourceValue is IEnumerable || + sourceValue is IEnumerable || + sourceValue is IEnumerable || + sourceValue is IEnumerable || + sourceValue is IEnumerable || + sourceValue is IEnumerable) + { + var listValue = sourceValue as IEnumerable; + value.ListValue = new ListValue(); + foreach (var item in listValue!) + { + value.ListValue.Values.Add(ConvertToGrpcFieldValue(item)); + } + } + else + { + throw new VectorStoreRecordMappingException($"Unsupported source value type {sourceValue?.GetType().FullName}."); + } + + return value; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs new file mode 100644 index 000000000000..c5a9ffa46865 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Memory; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Options when creating a . +/// +internal sealed class QdrantVectorStoreRecordMapperOptions +{ + /// + /// Gets or sets a value indicating whether the vectors in the store are named, or whether there is just a single vector per qdrant point. + /// Defaults to single vector per point. + /// + public bool HasNamedVectors { get; set; } = false; + + /// + /// Gets or sets an optional record definition that defines the schema of the record type. + /// + /// + /// If not provided, the schema will be inferred from the record model class using reflection. + /// In this case, the record model properties must be annotated with the appropriate attributes to indicate their usage. + /// See , and . + /// + public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs new file mode 100644 index 000000000000..0cabe2d8514e --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs @@ -0,0 +1,282 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Globalization; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client.Grpc; +using Xunit; +using Xunit.Abstractions; +using static SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant.QdrantVectorStoreFixture; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; + +/// +/// Contains tests for the class. +/// +/// Used for logging. +/// Redis setup and teardown. +[Collection("QdrantVectorStoreCollection")] +public sealed class QdrantVectorRecordStoreTests(ITestOutputHelper output, QdrantVectorStoreFixture fixture) +{ + [Theory] + [InlineData(true, "singleVectorHotels", false)] + [InlineData(false, "singleVectorHotels", false)] + [InlineData(true, "namedVectorsHotels", true)] + [InlineData(false, "namedVectorsHotels", true)] + public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition, string collectionName, bool hasNamedVectors) + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions + { + HasNamedVectors = hasNamedVectors, + DefaultCollectionName = collectionName, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + var record = new HotelInfo + { + HotelId = 20, + HotelName = "My Hotel 20", + HotelCode = 20, + HotelRating = 4.3f, + ParkingIncluded = true, + Tags = { "t1", "t2" }, + Description = "This is a great hotel.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + }; + + // Act. + var upsertResult = await sut.UpsertAsync(record); + + // Assert. + var getResult = await sut.GetAsync(20, new GetRecordOptions { IncludeVectors = true }); + Assert.Equal(20ul, upsertResult); + Assert.Equal(record.HotelId, getResult?.HotelId); + Assert.Equal(record.HotelName, getResult?.HotelName); + Assert.Equal(record.HotelCode, getResult?.HotelCode); + Assert.Equal(record.HotelRating, getResult?.HotelRating); + Assert.Equal(record.ParkingIncluded, getResult?.ParkingIncluded); + Assert.Equal(record.Tags.ToArray(), getResult?.Tags.ToArray()); + Assert.Equal(record.Description, getResult?.Description); + + // TODO: figure out why original array is different from the one we get back. + //Assert.Equal(record.DescriptionEmbedding?.ToArray(), getResult?.DescriptionEmbedding?.ToArray()); + + // Output. + output.WriteLine(upsertResult.ToString(CultureInfo.InvariantCulture)); + output.WriteLine(getResult?.ToString()); + } + + [Fact] + public async Task ItCanUpsertAndRemoveDocumentWithGuidIdToVectorStoreAsync() + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false, DefaultCollectionName = "singleVectorGuidIdHotels" }; + IVectorRecordStore sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + var record = new HotelInfoWithGuidId + { + HotelId = Guid.Parse("55555555-5555-5555-5555-555555555555"), + HotelName = "My Hotel 5", + Description = "This is a great hotel.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + }; + + // Act. + var upsertResult = await sut.UpsertAsync(record); + + // Assert. + var getResult = await sut.GetAsync(Guid.Parse("55555555-5555-5555-5555-555555555555"), new GetRecordOptions { IncludeVectors = true }); + Assert.Equal(Guid.Parse("55555555-5555-5555-5555-555555555555"), upsertResult); + Assert.Equal(record.HotelId, getResult?.HotelId); + Assert.Equal(record.HotelName, getResult?.HotelName); + Assert.Equal(record.Description, getResult?.Description); + + // Act. + await sut.DeleteAsync(Guid.Parse("55555555-5555-5555-5555-555555555555")); + + // Assert. + await Assert.ThrowsAsync(async () => await sut.GetAsync(Guid.Parse("55555555-5555-5555-5555-555555555555"))); + + // Output. + output.WriteLine(upsertResult.ToString("D")); + output.WriteLine(getResult?.ToString()); + } + + [Theory] + [InlineData(true, true, "singleVectorHotels", false)] + [InlineData(true, false, "singleVectorHotels", false)] + [InlineData(false, true, "singleVectorHotels", false)] + [InlineData(false, false, "singleVectorHotels", false)] + [InlineData(true, true, "namedVectorsHotels", true)] + [InlineData(true, false, "namedVectorsHotels", true)] + [InlineData(false, true, "namedVectorsHotels", true)] + [InlineData(false, false, "namedVectorsHotels", true)] + public async Task ItCanGetDocumentFromVectorStoreAsync(bool useRecordDefinition, bool withEmbeddings, string collectionName, bool hasNamedVectors) + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions + { + HasNamedVectors = hasNamedVectors, + DefaultCollectionName = collectionName, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act. + var getResult = await sut.GetAsync(11, new GetRecordOptions { IncludeVectors = withEmbeddings }); + + // Assert. + Assert.Equal(11ul, getResult?.HotelId); + Assert.Equal("My Hotel 11", getResult?.HotelName); + Assert.Equal(11, getResult?.HotelCode); + Assert.True(getResult?.ParkingIncluded); + Assert.Equal(4.5f, getResult?.HotelRating); + Assert.Equal(2, getResult?.Tags.Count); + Assert.Equal("t1", getResult?.Tags[0]); + Assert.Equal("t2", getResult?.Tags[1]); + Assert.Equal("This is a great hotel.", getResult?.Description); + if (withEmbeddings) + { + Assert.NotNull(getResult?.DescriptionEmbedding); + } + else + { + Assert.Null(getResult?.DescriptionEmbedding); + } + + // Output. + output.WriteLine(getResult?.ToString()); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task ItCanGetDocumentWithGuidIdFromVectorStoreAsync(bool useRecordDefinition, bool withEmbeddings) + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions + { + HasNamedVectors = false, + DefaultCollectionName = "singleVectorGuidIdHotels", + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelWithGuidIdVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act. + var getResult = await sut.GetAsync(Guid.Parse("11111111-1111-1111-1111-111111111111"), new GetRecordOptions { IncludeVectors = withEmbeddings }); + + // Assert. + Assert.Equal(Guid.Parse("11111111-1111-1111-1111-111111111111"), getResult?.HotelId); + Assert.Equal("My Hotel 11", getResult?.HotelName); + Assert.Equal("This is a great hotel.", getResult?.Description); + if (withEmbeddings) + { + Assert.NotNull(getResult?.DescriptionEmbedding); + } + else + { + Assert.Null(getResult?.DescriptionEmbedding); + } + + // Output. + output.WriteLine(getResult?.ToString()); + } + + [Fact] + public async Task ItCanGetManyDocumentsFromVectorStoreAsync() + { + // Arrange + var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = true, DefaultCollectionName = "namedVectorsHotels" }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act + var hotels = sut.GetBatchAsync([11, 12], new GetRecordOptions { IncludeVectors = true }); + + // Assert + Assert.NotNull(hotels); + var hotelsList = await hotels.ToListAsync(); + Assert.Equal(2, hotelsList.Count); + + // Output + foreach (var hotel in hotelsList) + { + output.WriteLine(hotel?.ToString() ?? "Null"); + } + } + + [Fact] + public async Task ItThrowsForPartialBatchResultAsync() + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = true, DefaultCollectionName = "namedVectorsHotels" }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act. + await Assert.ThrowsAsync(async () => await sut.GetBatchAsync([1, 5, 2]).ToListAsync()); + } + + [Theory] + [InlineData(true, "singleVectorHotels", false)] + [InlineData(false, "singleVectorHotels", false)] + [InlineData(true, "namedVectorsHotels", true)] + [InlineData(false, "namedVectorsHotels", true)] + public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefinition, string collectionName, bool hasNamedVectors) + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions + { + HasNamedVectors = hasNamedVectors, + DefaultCollectionName = collectionName, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + var record = new HotelInfo + { + HotelId = 20, + HotelName = "My Hotel 20", + HotelCode = 20, + ParkingIncluded = true, + Description = "This is a great hotel.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + }; + await sut.UpsertAsync(record); + + // Act. + await sut.DeleteAsync(20); + + // Assert. + await Assert.ThrowsAsync(async () => await sut.GetAsync(20)); + } + + [Fact] + public async Task ItThrowsMappingExceptionForFailedMapperAsync() + { + // Arrange + var options = new QdrantVectorRecordStoreOptions { DefaultCollectionName = "singleVectorHotels", MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = new FailingMapper() }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act & Assert + await Assert.ThrowsAsync(async () => await sut.GetAsync(11, new GetRecordOptions { IncludeVectors = true })); + } + + private sealed class FailingMapper : IVectorStoreRecordMapper + { + public PointStruct MapFromDataToStorageModel(HotelInfo dataModel) + { + throw new NotImplementedException(); + } + + public HotelInfo MapFromStorageToDataModel(PointStruct storageModel, GetRecordOptions? options = null) + { + throw new NotImplementedException(); + } + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreCollectionFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreCollectionFixture.cs new file mode 100644 index 000000000000..a7b565d71c2d --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreCollectionFixture.cs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; + +[CollectionDefinition("QdrantVectorStoreCollection")] +public class QdrantVectorStoreCollectionFixture : ICollectionFixture +{ +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs new file mode 100644 index 000000000000..80316f1bd6fd --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs @@ -0,0 +1,325 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Docker.DotNet; +using Docker.DotNet.Models; +using Grpc.Core; +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client; +using Qdrant.Client.Grpc; +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; + +public class QdrantVectorStoreFixture : IAsyncLifetime +{ + /// The docker client we are using to create a qdrant container with. + private readonly DockerClient _client; + + /// The id of the qdrant container that we are testing with. + private string? _containerId = null; + +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + + /// + /// Initializes a new instance of the class. + /// + public QdrantVectorStoreFixture() + { + using var dockerClientConfiguration = new DockerClientConfiguration(); + this._client = dockerClientConfiguration.CreateClient(); + this.HotelVectorStoreRecordDefinition = new VectorStoreRecordDefinition + { + Properties = new List + { + new VectorStoreRecordKeyProperty("HotelId"), + new VectorStoreRecordDataProperty("HotelName"), + new VectorStoreRecordDataProperty("HotelCode"), + new VectorStoreRecordDataProperty("ParkingIncluded"), + new VectorStoreRecordDataProperty("HotelRating"), + new VectorStoreRecordDataProperty("Tags"), + new VectorStoreRecordDataProperty("Description"), + new VectorStoreRecordVectorProperty("DescriptionEmbedding") + } + }; + this.HotelWithGuidIdVectorStoreRecordDefinition = new VectorStoreRecordDefinition + { + Properties = new List + { + new VectorStoreRecordKeyProperty("HotelId"), + new VectorStoreRecordDataProperty("HotelName"), + new VectorStoreRecordDataProperty("Description"), + new VectorStoreRecordVectorProperty("DescriptionEmbedding") + } + }; + } + +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + + /// Gets the qdrant client connection to use for tests. + public QdrantClient QdrantClient { get; private set; } + + /// Gets the manually created vector store record definition for our test model. + public VectorStoreRecordDefinition HotelVectorStoreRecordDefinition { get; private set; } + + /// Gets the manually created vector store record definition for our test model. + public VectorStoreRecordDefinition HotelWithGuidIdVectorStoreRecordDefinition { get; private set; } + + /// + /// Create / Recreate qdrant docker container and run it. + /// + /// An async task. + public async Task InitializeAsync() + { + this._containerId = await SetupQdrantContainerAsync(this._client); + + // Connect to qdrant. + this.QdrantClient = new QdrantClient("localhost"); + + // Create schemas for the vector store. + var vectorParamsMap = new VectorParamsMap(); + vectorParamsMap.Map.Add("DescriptionEmbedding", new VectorParams { Size = 4, Distance = Distance.Cosine }); + + // Wait for the qdrant container to be ready. + var retryCount = 0; + while (retryCount++ < 5) + { + try + { + await this.QdrantClient.ListCollectionsAsync(); + } + catch (RpcException e) + { + if (e.StatusCode != Grpc.Core.StatusCode.Unavailable) + { + throw; + } + + await Task.Delay(1000); + } + } + + await this.QdrantClient.CreateCollectionAsync( + "namedVectorsHotels", + vectorParamsMap); + + await this.QdrantClient.CreateCollectionAsync( + "singleVectorHotels", + new VectorParams { Size = 4, Distance = Distance.Cosine }); + + await this.QdrantClient.CreateCollectionAsync( + "singleVectorGuidIdHotels", + new VectorParams { Size = 4, Distance = Distance.Cosine }); + + // Create test data common to both named and unnamed vectors. + var tags = new ListValue(); + tags.Values.Add("t1"); + tags.Values.Add("t2"); + var tagsValue = new Value(); + tagsValue.ListValue = tags; + + // Create some test data using named vectors. + var embedding = new[] { 30f, 31f, 32f, 33f }; + + var namedVectors1 = new NamedVectors(); + var namedVectors2 = new NamedVectors(); + var namedVectors3 = new NamedVectors(); + + namedVectors1.Vectors.Add("DescriptionEmbedding", embedding); + namedVectors2.Vectors.Add("DescriptionEmbedding", embedding); + namedVectors3.Vectors.Add("DescriptionEmbedding", embedding); + + List namedVectorPoints = + [ + new PointStruct + { + Id = 11, + Vectors = new Vectors { Vectors_ = namedVectors1 }, + Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["ParkingIncluded"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = 12, + Vectors = new Vectors { Vectors_ = namedVectors2 }, + Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = 13, + Vectors = new Vectors { Vectors_ = namedVectors3 }, + Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + }, + ]; + + await this.QdrantClient.UpsertAsync("namedVectorsHotels", namedVectorPoints); + + // Create some test data using a single unnamed vector. + List unnamedVectorPoints = + [ + new PointStruct + { + Id = 11, + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["ParkingIncluded"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = 12, + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = 13, + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + }, + ]; + + await this.QdrantClient.UpsertAsync("singleVectorHotels", unnamedVectorPoints); + + // Create some test data using a single unnamed vector and a guid id. + List unnamedVectorGuidIdPoints = + [ + new PointStruct + { + Id = Guid.Parse("11111111-1111-1111-1111-111111111111"), + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 11", ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = Guid.Parse("22222222-2222-2222-2222-222222222222"), + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 12", ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = Guid.Parse("33333333-3333-3333-3333-333333333333"), + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 13", ["Description"] = "This is a great hotel." } + }, + ]; + + await this.QdrantClient.UpsertAsync("singleVectorGuidIdHotels", unnamedVectorGuidIdPoints); + } + + /// + /// Delete the docker container after the test run. + /// + /// An async task. + public async Task DisposeAsync() + { + if (this._containerId != null) + { + await this._client.Containers.StopContainerAsync(this._containerId, new ContainerStopParameters()); + await this._client.Containers.RemoveContainerAsync(this._containerId, new ContainerRemoveParameters()); + } + } + + /// + /// Setup the qdrant container by pulling the image and running it. + /// + /// The docker client to create the container with. + /// The id of the container. + private static async Task SetupQdrantContainerAsync(DockerClient client) + { + await client.Images.CreateImageAsync( + new ImagesCreateParameters + { + FromImage = "qdrant/qdrant", + Tag = "latest", + }, + null, + new Progress()); + + var container = await client.Containers.CreateContainerAsync(new CreateContainerParameters() + { + Image = "qdrant/qdrant", + HostConfig = new HostConfig() + { + PortBindings = new Dictionary> + { + {"6333", new List {new() {HostPort = "6333" } }}, + {"6334", new List {new() {HostPort = "6334" } }} + }, + PublishAllPorts = true + }, + ExposedPorts = new Dictionary + { + { "6333", default }, + { "6334", default } + }, + }); + + await client.Containers.StartContainerAsync( + container.ID, + new ContainerStartParameters()); + + return container.ID; + } + + /// + /// A test model for the qdrant vector store. + /// +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + public record HotelInfo() + { + /// The key of the record. + [VectorStoreRecordKey] + public ulong HotelId { get; init; } + + /// A string metadata field. + [VectorStoreRecordData] + public string? HotelName { get; set; } + + /// An int metadata field. + [VectorStoreRecordData] + public int HotelCode { get; set; } + + /// A float metadata field. + [VectorStoreRecordData] + public float? HotelRating { get; set; } + + /// A bool metadata field. + [VectorStoreRecordData] + public bool ParkingIncluded { get; set; } + + [VectorStoreRecordData] + public List Tags { get; set; } = new List(); + + /// A data field. + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + public string Description { get; set; } + + /// A vector field. + [VectorStoreRecordVector] + public ReadOnlyMemory? DescriptionEmbedding { get; set; } + } + + /// + /// A test model for the qdrant vector store. + /// +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + public record HotelInfoWithGuidId() + { + /// The key of the record. + [VectorStoreRecordKey] + public Guid HotelId { get; init; } + + /// A string metadata field. + [VectorStoreRecordData] + public string? HotelName { get; set; } + + /// A data field. + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + public string Description { get; set; } + + /// A vector field. + [VectorStoreRecordVector] + public ReadOnlyMemory? DescriptionEmbedding { get; set; } + } +} +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. diff --git a/dotnet/src/IntegrationTests/IntegrationTests.csproj b/dotnet/src/IntegrationTests/IntegrationTests.csproj index cd2c947609e8..2b2e03fe26b1 100644 --- a/dotnet/src/IntegrationTests/IntegrationTests.csproj +++ b/dotnet/src/IntegrationTests/IntegrationTests.csproj @@ -64,6 +64,7 @@ + From 304a7aea0c078cc92314f61c1e3e9c9117e543b0 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 20 Jun 2024 11:22:13 +0100 Subject: [PATCH 2/6] Fix typo --- .../Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs index 29f8d9002cb2..796caf138445 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -222,7 +222,7 @@ public TRecord MapFromStorageToDataModel(PointStruct storageModel, GetRecordOpti /// /// The value to convert to a native type. /// The converted native value. - /// Thrown when an unsupported type is enountered. + /// Thrown when an unsupported type is encountered. private static JsonNode? ConvertFromGrpcFieldValueToJsonNode(Value payloadValue) { return payloadValue.KindCase switch @@ -243,7 +243,7 @@ public TRecord MapFromStorageToDataModel(PointStruct storageModel, GetRecordOpti /// /// The object to convert. /// The converted Qdrant value. - /// Thrown when an unsupported type is enountered. + /// Thrown when an unsupported type is encountered. private static Value ConvertToGrpcFieldValue(object? sourceValue) { var value = new Value(); From dbb7eaf3b29fa3aa4ca0c249f44d74eb60cdac74 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 20 Jun 2024 13:50:09 +0100 Subject: [PATCH 3/6] Add qdrant test to verify batch delete with missing items behavior. --- .../Qdrant/QdrantVectorRecordStoreTests.cs | 64 +++++++++++++------ 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs index 0cabe2d8514e..25e67904d3d0 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs @@ -37,17 +37,7 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition }; var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); - var record = new HotelInfo - { - HotelId = 20, - HotelName = "My Hotel 20", - HotelCode = 20, - HotelRating = 4.3f, - ParkingIncluded = true, - Tags = { "t1", "t2" }, - Description = "This is a great hotel.", - DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, - }; + var record = this.CreateTestHotel(20); // Act. var upsertResult = await sut.UpsertAsync(record); @@ -238,19 +228,38 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti }; var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); - var record = new HotelInfo + await sut.UpsertAsync(this.CreateTestHotel(20)); + + // Act. + await sut.DeleteAsync(20); + // Also delete a non-existing key to test that the operation does not fail for these. + await sut.DeleteAsync(21); + + // Assert. + await Assert.ThrowsAsync(async () => await sut.GetAsync(20)); + } + + [Theory] + [InlineData(true, "singleVectorHotels", false)] + [InlineData(false, "singleVectorHotels", false)] + [InlineData(true, "namedVectorsHotels", true)] + [InlineData(false, "namedVectorsHotels", true)] + public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync(bool useRecordDefinition, string collectionName, bool hasNamedVectors) + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions { - HotelId = 20, - HotelName = "My Hotel 20", - HotelCode = 20, - ParkingIncluded = true, - Description = "This is a great hotel.", - DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + HasNamedVectors = hasNamedVectors, + DefaultCollectionName = collectionName, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null }; - await sut.UpsertAsync(record); + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + await sut.UpsertAsync(this.CreateTestHotel(20)); // Act. - await sut.DeleteAsync(20); + // Also delete a non-existing key to test that the operation does not fail for these. + await sut.DeleteBatchAsync([20, 21]); // Assert. await Assert.ThrowsAsync(async () => await sut.GetAsync(20)); @@ -267,6 +276,21 @@ public async Task ItThrowsMappingExceptionForFailedMapperAsync() await Assert.ThrowsAsync(async () => await sut.GetAsync(11, new GetRecordOptions { IncludeVectors = true })); } + private HotelInfo CreateTestHotel(uint hotelId) + { + return new HotelInfo + { + HotelId = hotelId, + HotelName = $"My Hotel {hotelId}", + HotelCode = (int)hotelId, + HotelRating = 4.5f, + ParkingIncluded = true, + Tags = { "t1", "t2" }, + Description = "This is a great hotel.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + }; + } + private sealed class FailingMapper : IVectorStoreRecordMapper { public PointStruct MapFromDataToStorageModel(HotelInfo dataModel) From f8deb610c13f4db007caa432f233e5be1b6ffc84 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Fri, 21 Jun 2024 18:10:29 +0100 Subject: [PATCH 4/6] Updating connectors to support new agreed approach to non-existent records. --- .../AzureAISearchVectorRecordStore.cs | 46 +++++++++++++++++-- .../QdrantVectorRecordStore.cs | 14 ++---- .../RedisVectorRecordStore.cs | 6 +-- .../AzureAISearchVectorRecordStoreTests.cs | 36 +++++++-------- .../Qdrant/QdrantVectorRecordStoreTests.cs | 31 +++++++------ .../Redis/RedisVectorRecordStoreTests.cs | 33 ++++++------- .../Memory/IVectorRecordStore.cs | 7 +-- 7 files changed, 102 insertions(+), 71 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index ad16bbd8761f..79148890b555 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -100,7 +100,7 @@ public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, Azure } /// - public Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) + public Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) { Verify.NotNullOrWhiteSpace(key); @@ -126,7 +126,13 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G var searchClient = this.GetSearchClient(collectionName); var tasks = keys.Select(key => this.GetDocumentAndMapToDataModelAsync(searchClient, collectionName, key, innerOptions, cancellationToken)); var results = await Task.WhenAll(tasks).ConfigureAwait(false); - foreach (var result in results) { yield return result; } + foreach (var result in results) + { + if (result is not null) + { + yield return result; + } + } } /// @@ -203,7 +209,7 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco /// The azure ai search sdk options for getting a document. /// The to monitor for cancellation requests. The default is . /// The retrieved document, mapped to the consumer data model. - private async Task GetDocumentAndMapToDataModelAsync( + private async Task GetDocumentAndMapToDataModelAsync( SearchClient searchClient, string collectionName, string key, @@ -216,7 +222,12 @@ private async Task GetDocumentAndMapToDataModelAsync( var jsonObject = await RunOperationAsync( collectionName, "GetDocument", - () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken)).ConfigureAwait(false); + () => GetDocumentWithNotFoundHandlingAsync(searchClient, key, innerOptions, cancellationToken)).ConfigureAwait(false); + + if (jsonObject is null) + { + return null; + } return RunModelConversion( collectionName, @@ -228,7 +239,7 @@ private async Task GetDocumentAndMapToDataModelAsync( return await RunOperationAsync( collectionName, "GetDocument", - () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken)).ConfigureAwait(false); + () => GetDocumentWithNotFoundHandlingAsync(searchClient, key, innerOptions, cancellationToken)).ConfigureAwait(false); } /// @@ -321,6 +332,31 @@ private GetDocumentOptions ConvertGetDocumentOptions(GetRecordOptions? options) return innerOptions; } + /// + /// Get a document with the given key, and return null if it is not found. + /// + /// The type to deserialize the doucment to. + /// The search client to use when fetching the document. + /// The key of the record to get. + /// The azure ai search sdk options for getting a document. + /// The to monitor for cancellation requests. The default is . + /// The retrieved document, mapped to the consumer data model, or null if not found. + private static async Task GetDocumentWithNotFoundHandlingAsync( + SearchClient searchClient, + string key, + GetDocumentOptions innerOptions, + CancellationToken cancellationToken) + { + try + { + return await searchClient.GetDocumentAsync(key, innerOptions, cancellationToken).ConfigureAwait(false); + } + catch (RequestFailedException ex) when (ex.Status == 404) + { + return default; + } + } + /// /// Run the given operation and wrap any with ."/> /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs index a1aa76a422cd..e717b25c00ca 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs @@ -68,21 +68,21 @@ public QdrantVectorRecordStore(QdrantClient qdrantClient, QdrantVectorRecordStor } /// - public async Task GetAsync(ulong key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + public async Task GetAsync(ulong key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) { Verify.NotNull(key); var retrievedPoints = await this.GetBatchAsync([key], options, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false); - return retrievedPoints[0]; + return retrievedPoints.FirstOrDefault(); } /// - public async Task GetAsync(Guid key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + public async Task GetAsync(Guid key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) { Verify.NotNull(key); var retrievedPoints = await this.GetBatchAsync([key], options, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false); - return retrievedPoints[0]; + return retrievedPoints.FirstOrDefault(); } /// @@ -283,12 +283,6 @@ private async IAsyncEnumerable GetBatchByPointIdAsync( "Retrieve", () => this._qdrantClient.RetrieveAsync(collectionName, pointsIds, true, options?.IncludeVectors ?? false, cancellationToken: cancellationToken)).ConfigureAwait(false); - // Check that we found the required number of values. - if (retrievedPoints.Count != pointsIds.Length) - { - throw new VectorStoreOperationException("Record not found"); - } - // Convert the retrieved points to the target data model. foreach (var retrievedPoint in retrievedPoints) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs index 1198653c088a..1aa0b00d7639 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs @@ -111,7 +111,7 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< } /// - public async Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + public async Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) { Verify.NotNullOrWhiteSpace(key); @@ -134,7 +134,7 @@ public async Task GetAsync(string key, GetRecordOptions? options = null // Check if the key was found before trying to parse the result. if (redisResult.IsNull || redisResult is null) { - throw new VectorStoreOperationException($"Could not find document with key '{key}'"); + return null; } // Check if the value contained any json text before trying to parse the result. @@ -183,7 +183,7 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G // Check if the key was found before trying to parse the result. if (redisResult.IsNull || redisResult is null) { - throw new VectorStoreOperationException($"Could not find document with key '{key}'"); + continue; } // Check if the value contained any json text before trying to parse the result. diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs index a72859a4dcd2..747076ba9779 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs @@ -147,7 +147,8 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); // Act - var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-2", "BaseSet-3", "BaseSet-4"], new GetRecordOptions { IncludeVectors = true }); + // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. + var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-2", "BaseSet-3", "BaseSet-5", "BaseSet-4"], new GetRecordOptions { IncludeVectors = true }); // Assert Assert.NotNull(hotels); @@ -161,20 +162,6 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() } } - [Fact] - public async Task ItThrowsForPartialGetBatchResultAsync() - { - // Arrange. - var options = new AzureAISearchVectorRecordStoreOptions - { - DefaultCollectionName = fixture.TestIndexName - }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); - - // Act. - await Assert.ThrowsAsync(async () => await sut.GetBatchAsync(["BaseSet-1", "BaseSet-5", "BaseSet-2"]).ToListAsync()); - } - [Theory(Skip = SkipReason)] [InlineData(true)] [InlineData(false)] @@ -195,7 +182,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti await sut.DeleteAsync("Remove-2"); // Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("Remove-1", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("Remove-1", new GetRecordOptions { IncludeVectors = true })); } [Fact(Skip = SkipReason)] @@ -216,9 +203,20 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() await sut.DeleteBatchAsync(["RemoveMany-1", "RemoveMany-2", "RemoveMany-3", "RemoveMany-4"]); // Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact(Skip = SkipReason)] + public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() + { + // Arrange + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); + + // Act & Assert + Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); } [Fact(Skip = SkipReason)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs index 25e67904d3d0..33793ca1479e 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs @@ -90,7 +90,7 @@ public async Task ItCanUpsertAndRemoveDocumentWithGuidIdToVectorStoreAsync() await sut.DeleteAsync(Guid.Parse("55555555-5555-5555-5555-555555555555")); // Assert. - await Assert.ThrowsAsync(async () => await sut.GetAsync(Guid.Parse("55555555-5555-5555-5555-555555555555"))); + Assert.Null(await sut.GetAsync(Guid.Parse("55555555-5555-5555-5555-555555555555"))); // Output. output.WriteLine(upsertResult.ToString("D")); @@ -187,7 +187,8 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); // Act - var hotels = sut.GetBatchAsync([11, 12], new GetRecordOptions { IncludeVectors = true }); + // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. + var hotels = sut.GetBatchAsync([11, 15, 12], new GetRecordOptions { IncludeVectors = true }); // Assert Assert.NotNull(hotels); @@ -201,17 +202,6 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() } } - [Fact] - public async Task ItThrowsForPartialBatchResultAsync() - { - // Arrange. - var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = true, DefaultCollectionName = "namedVectorsHotels" }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); - - // Act. - await Assert.ThrowsAsync(async () => await sut.GetBatchAsync([1, 5, 2]).ToListAsync()); - } - [Theory] [InlineData(true, "singleVectorHotels", false)] [InlineData(false, "singleVectorHotels", false)] @@ -236,7 +226,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti await sut.DeleteAsync(21); // Assert. - await Assert.ThrowsAsync(async () => await sut.GetAsync(20)); + Assert.Null(await sut.GetAsync(20)); } [Theory] @@ -262,7 +252,18 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync(bool useRecordDef await sut.DeleteBatchAsync([20, 21]); // Assert. - await Assert.ThrowsAsync(async () => await sut.GetAsync(20)); + Assert.Null(await sut.GetAsync(20)); + } + + [Fact] + public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() + { + // Arrange + var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false, DefaultCollectionName = "singleVectorHotels" }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act & Assert + Assert.Null(await sut.GetAsync(15, new GetRecordOptions { IncludeVectors = true })); } [Fact] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs index 4d0a9f69473f..02cd55df0a41 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs @@ -146,7 +146,8 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() var sut = new RedisVectorRecordStore(fixture.Database, options); // Act - var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-2"], new GetRecordOptions { IncludeVectors = true }); + // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. + var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-5", "BaseSet-2"], new GetRecordOptions { IncludeVectors = true }); // Assert Assert.NotNull(hotels); @@ -171,17 +172,6 @@ public async Task ItFailsToGetDocumentsWithInvalidSchemaAsync() await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-4-Invalid", new GetRecordOptions { IncludeVectors = true })); } - [Fact] - public async Task ItThrowsForPartialGetBatchResultAsync() - { - // Arrange. - var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorRecordStore(fixture.Database, options); - - // Act & Assert. - await Assert.ThrowsAsync(async () => await sut.GetBatchAsync(["BaseSet-1", "nonexistent", "BaseSet-2"], new GetRecordOptions { IncludeVectors = true }).ToListAsync()); - } - [Theory] [InlineData(true)] [InlineData(false)] @@ -213,7 +203,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti await sut.DeleteAsync("Remove-2"); // Assert. - await Assert.ThrowsAsync(async () => await sut.GetAsync("Remove-1")); + Assert.Null(await sut.GetAsync("Remove-1")); } [Fact] @@ -231,9 +221,20 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() await sut.DeleteBatchAsync(["RemoveMany-1", "RemoveMany-2", "RemoveMany-3", "RemoveMany-4"]); // Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact] + public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() + { + // Arrange + var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + + // Act & Assert + Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); } [Fact] diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs index 6852ca14b6bc..1caa5fd59018 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs @@ -18,7 +18,7 @@ public interface IVectorRecordStore { /// /// Gets a record from the vector store. Does not guarantee that the collection exists. - /// Throws if the record is not found. + /// Returns null if the record is not found. /// /// The unique id associated with the record to get. /// Optional options for retrieving the record. @@ -26,12 +26,13 @@ public interface IVectorRecordStore /// The record if found, otherwise null. /// Throw when the command fails to execute for any reason. /// Throw when mapping between the storage model and record data model fails. - Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); /// /// Gets a batch of records from the vector store. Does not guarantee that the collection exists. - /// Throws if any of the records are not found. /// Gets will be made in a single request or in a single parallel batch depending on the available store functionality. + /// Only found records will be returned, so the resultset may be smaller than the requested keys. + /// Throws for any issues other than records not being found. /// /// The unique ids associated with the record to get. /// Optional options for retrieving the records. From 03e293ab41e7325540112031b29b249fb9d51bdb Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Fri, 21 Jun 2024 18:13:44 +0100 Subject: [PATCH 5/6] Fix typo. --- .../AzureAISearchVectorRecordStore.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index 79148890b555..7c1fd90cfcb1 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -335,7 +335,7 @@ private GetDocumentOptions ConvertGetDocumentOptions(GetRecordOptions? options) /// /// Get a document with the given key, and return null if it is not found. /// - /// The type to deserialize the doucment to. + /// The type to deserialize the document to. /// The search client to use when fetching the document. /// The key of the record to get. /// The azure ai search sdk options for getting a document. From 79915354fa32c870f4ba86e7b6376e4d0766887a Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:09:21 +0100 Subject: [PATCH 6/6] Address code review comments. --- .../AzureAISearchVectorRecordStore.cs | 57 +++++-------- .../QdrantVectorRecordStore.cs | 83 ++++++++----------- .../QdrantVectorStoreRecordMapper.cs | 10 ++- .../RedisVectorRecordStore.cs | 45 +++------- .../Qdrant/QdrantVectorRecordStoreTests.cs | 2 +- .../src/Data/VectorStoreErrorHandler.cs | 45 ++++++++++ .../VectorStoreRecordPropertyReader.cs | 42 +++++++++- .../VectorStoreRecordPropertyReaderTests.cs | 49 ++++++++--- 8 files changed, 194 insertions(+), 139 deletions(-) create mode 100644 dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs rename dotnet/src/InternalUtilities/src/{Schema => Data}/VectorStoreRecordPropertyReader.cs (84%) rename dotnet/src/SemanticKernel.UnitTests/{Utilities => Data}/VectorStoreRecordPropertyReaderTests.cs (81%) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index 7c1fd90cfcb1..f5cf330e5f3e 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -24,6 +24,9 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; public sealed class AzureAISearchVectorRecordStore : IVectorRecordStore where TRecord : class { + /// The name of this database for telemetry purposes. + private const string DatabaseName = "AzureAISearch"; + /// A set of types that a key on the provided model may have. private static readonly HashSet s_supportedKeyTypes = [ @@ -216,12 +219,14 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco GetDocumentOptions innerOptions, CancellationToken cancellationToken) { + const string OperationName = "GetDocument"; + // Use the user provided mapper. if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) { var jsonObject = await RunOperationAsync( collectionName, - "GetDocument", + OperationName, () => GetDocumentWithNotFoundHandlingAsync(searchClient, key, innerOptions, cancellationToken)).ConfigureAwait(false); if (jsonObject is null) @@ -229,16 +234,17 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco return null; } - return RunModelConversion( + return VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, - "GetDocument", + OperationName, () => this._options.JsonObjectCustomMapper!.MapFromStorageToDataModel(jsonObject)); } // Use the built in Azure AI Search mapper. return await RunOperationAsync( collectionName, - "GetDocument", + OperationName, () => GetDocumentWithNotFoundHandlingAsync(searchClient, key, innerOptions, cancellationToken)).ConfigureAwait(false); } @@ -258,24 +264,27 @@ private Task> MapToStorageModelAndUploadDocumentA IndexDocumentsOptions innerOptions, CancellationToken cancellationToken) { + const string OperationName = "UploadDocuments"; + // Use the user provided mapper. if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) { - var jsonObjects = RunModelConversion( + var jsonObjects = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, - "UploadDocuments", + OperationName, () => records.Select(this._options.JsonObjectCustomMapper!.MapFromDataToStorageModel)); return RunOperationAsync( collectionName, - "UploadDocuments", + OperationName, () => searchClient.UploadDocumentsAsync(jsonObjects, innerOptions, cancellationToken)); } // Use the built in Azure AI Search mapper. return RunOperationAsync( collectionName, - "UploadDocuments", + OperationName, () => searchClient.UploadDocumentsAsync(records, innerOptions, cancellationToken)); } @@ -377,7 +386,7 @@ private static async Task RunOperationAsync(string collectionName, string // Using Open Telemetry standard for naming of these entries. // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "AzureAISearch"); + wrapperException.Data.Add("db.system", DatabaseName); wrapperException.Data.Add("db.collection.name", collectionName); wrapperException.Data.Add("db.operation.name", operationName); @@ -389,35 +398,7 @@ private static async Task RunOperationAsync(string collectionName, string // Using Open Telemetry standard for naming of these entries. // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "AzureAISearch"); - wrapperException.Data.Add("db.collection.name", collectionName); - wrapperException.Data.Add("db.operation.name", operationName); - - throw wrapperException; - } - } - - /// - /// Run the given model conversion and wrap any exceptions with . - /// - /// The response type of the operation. - /// The name of the collection the operation is being run on. - /// The type of database operation being run. - /// The operation to run. - /// The result of the operation. - private static T RunModelConversion(string collectionName, string operationName, Func operation) - { - try - { - return operation.Invoke(); - } - catch (Exception ex) - { - var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); - - // Using Open Telemetry standard for naming of these entries. - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "AzureAISearch"); + wrapperException.Data.Add("db.system", DatabaseName); wrapperException.Data.Add("db.collection.name", collectionName); wrapperException.Data.Add("db.operation.name", operationName); diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs index e717b25c00ca..b05766ff9b17 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs @@ -20,6 +20,15 @@ namespace Microsoft.SemanticKernel.Connectors.Qdrant; public sealed class QdrantVectorRecordStore : IVectorRecordStore, IVectorRecordStore where TRecord : class { + /// The name of this database for telemetry purposes. + private const string DatabaseName = "Qdrant"; + + /// The name of the upsert operation for telemetry purposes. + private const string UpsertName = "Upsert"; + + /// The name of the Delete operation for telemetry purposes. + private const string DeleteName = "Delete"; + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. private readonly QdrantClient _qdrantClient; @@ -105,7 +114,7 @@ public Task DeleteAsync(ulong key, DeleteRecordOptions? options = null, Cancella var collectionName = this.ChooseCollectionName(options?.CollectionName); return RunOperationAsync( collectionName, - "Delete", + DeleteName, () => this._qdrantClient.DeleteAsync( collectionName, key, @@ -121,7 +130,7 @@ public Task DeleteAsync(Guid key, DeleteRecordOptions? options = null, Cancellat var collectionName = this.ChooseCollectionName(options?.CollectionName); return RunOperationAsync( collectionName, - "Delete", + DeleteName, () => this._qdrantClient.DeleteAsync( collectionName, key, @@ -137,7 +146,7 @@ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? optio var collectionName = this.ChooseCollectionName(options?.CollectionName); return RunOperationAsync( collectionName, - "Delete", + DeleteName, () => this._qdrantClient.DeleteAsync( collectionName, keys.ToList(), @@ -153,7 +162,7 @@ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? option var collectionName = this.ChooseCollectionName(options?.CollectionName); return RunOperationAsync( collectionName, - "Delete", + DeleteName, () => this._qdrantClient.DeleteAsync( collectionName, keys.ToList(), @@ -170,15 +179,16 @@ public async Task UpsertAsync(TRecord record, UpsertRecordOptions? option var collectionName = this.ChooseCollectionName(options?.CollectionName); // Create point from record. - var pointStruct = RunModelConversion( + var pointStruct = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, - "Upsert", + UpsertName, () => this._mapper.MapFromDataToStorageModel(record)); // Upsert. await RunOperationAsync( collectionName, - "Upsert", + UpsertName, () => this._qdrantClient.UpsertAsync(collectionName, [pointStruct], true, cancellationToken: cancellationToken)).ConfigureAwait(false); return pointStruct.Id.Num; } @@ -192,15 +202,16 @@ async Task IVectorRecordStore.UpsertAsync(TRecord record, U var collectionName = this.ChooseCollectionName(options?.CollectionName); // Create point from record. - var pointStruct = RunModelConversion( + var pointStruct = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, - "Upsert", + UpsertName, () => this._mapper.MapFromDataToStorageModel(record)); // Upsert. await RunOperationAsync( collectionName, - "Upsert", + UpsertName, () => this._qdrantClient.UpsertAsync(collectionName, [pointStruct], true, cancellationToken: cancellationToken)).ConfigureAwait(false); return Guid.Parse(pointStruct.Id.Uuid); } @@ -214,15 +225,16 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable recor var collectionName = this.ChooseCollectionName(options?.CollectionName); // Create points from records. - var pointStructs = RunModelConversion( + var pointStructs = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, - "Upsert", + UpsertName, () => records.Select(this._mapper.MapFromDataToStorageModel).ToList()); // Upsert. await RunOperationAsync( collectionName, - "Upsert", + UpsertName, () => this._qdrantClient.UpsertAsync(collectionName, pointStructs, true, cancellationToken: cancellationToken)).ConfigureAwait(false); foreach (var pointStruct in pointStructs) @@ -240,15 +252,16 @@ async IAsyncEnumerable IVectorRecordStore.UpsertBatchAsync( var collectionName = this.ChooseCollectionName(options?.CollectionName); // Create points from records. - var pointStructs = RunModelConversion( + var pointStructs = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, - "Upsert", + UpsertName, () => records.Select(this._mapper.MapFromDataToStorageModel).ToList()); // Upsert. await RunOperationAsync( collectionName, - "Upsert", + UpsertName, () => this._qdrantClient.UpsertAsync(collectionName, pointStructs, true, cancellationToken: cancellationToken)).ConfigureAwait(false); foreach (var pointStruct in pointStructs) @@ -271,6 +284,7 @@ private async IAsyncEnumerable GetBatchByPointIdAsync( GetRecordOptions? options, [EnumeratorCancellation] CancellationToken cancellationToken) { + const string OperationName = "Retrieve"; Verify.NotNull(keys); // Create options. @@ -280,7 +294,7 @@ private async IAsyncEnumerable GetBatchByPointIdAsync( // Retrieve data points. var retrievedPoints = await RunOperationAsync( collectionName, - "Retrieve", + OperationName, () => this._qdrantClient.RetrieveAsync(collectionName, pointsIds, true, options?.IncludeVectors ?? false, cancellationToken: cancellationToken)).ConfigureAwait(false); // Convert the retrieved points to the target data model. @@ -298,9 +312,10 @@ private async IAsyncEnumerable GetBatchByPointIdAsync( pointStruct.Payload.Add(payloadEntry.Key, payloadEntry.Value); } - yield return RunModelConversion( + yield return VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, - "Retrieve", + OperationName, () => this._mapper.MapFromStorageToDataModel(pointStruct, options)); } } @@ -344,35 +359,7 @@ private static async Task RunOperationAsync(string collectionName, string // Using Open Telemetry standard for naming of these entries. // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "Qdrant"); - wrapperException.Data.Add("db.collection.name", collectionName); - wrapperException.Data.Add("db.operation.name", operationName); - - throw wrapperException; - } - } - - /// - /// Run the given model conversion and wrap any exceptions with . - /// - /// The response type of the operation. - /// The name of the collection the operation is being run on. - /// The type of database operation being run. - /// The operation to run. - /// The result of the operation. - private static T RunModelConversion(string collectionName, string operationName, Func operation) - { - try - { - return operation.Invoke(); - } - catch (Exception ex) when (ex is not VectorStoreRecordMappingException) - { - var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); - - // Using Open Telemetry standard for naming of these entries. - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "Qdrant"); + wrapperException.Data.Add("db.system", DatabaseName); wrapperException.Data.Add("db.collection.name", collectionName); wrapperException.Data.Add("db.operation.name", operationName); diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs index 796caf138445..d7e122a2627a 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -69,16 +69,17 @@ internal sealed class QdrantVectorStoreRecordMapper : IVectorStoreRecor /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. private readonly PropertyInfo _keyPropertyInfo; - /// Optional configuration options for this class. + /// Configuration options for this class. private readonly QdrantVectorStoreRecordMapperOptions _options; /// /// Initializes a new instance of the class. /// - /// Optional options to use when doing the model conversion. - public QdrantVectorStoreRecordMapper(QdrantVectorStoreRecordMapperOptions? options) + /// Options to use when doing the model conversion. + public QdrantVectorStoreRecordMapper(QdrantVectorStoreRecordMapperOptions options) { - this._options = options ?? new QdrantVectorStoreRecordMapperOptions(); + Verify.NotNull(options); + this._options = options; // Enumerate public properties using configuration or attributes. (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; @@ -155,6 +156,7 @@ public PointStruct MapFromDataToStorageModel(TRecord dataModel) } else { + // We already verified in the constructor via FindProperties that there is exactly one vector property when not using named vectors. var vectorPropertyInfo = this._vectorPropertiesInfo.First(); if (vectorPropertyInfo.GetValue(dataModel) is ReadOnlyMemory floatROM) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs index 1aa0b00d7639..7192c2125dc2 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs @@ -23,6 +23,9 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; public sealed class RedisVectorRecordStore : IVectorRecordStore where TRecord : class { + /// The name of this database for telemetry purposes. + private const string DatabaseName = "Redis"; + /// A set of types that a key on the provided model may have. private static readonly HashSet s_supportedKeyTypes = [ @@ -145,7 +148,8 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< } // Convert to the caller's data model. - return RunModelConversion( + return VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, "GET", () => @@ -194,7 +198,8 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G } // Convert to the caller's data model. - yield return RunModelConversion( + yield return VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, "MGET", () => @@ -242,7 +247,8 @@ public async Task UpsertAsync(TRecord record, UpsertRecordOptions? optio var collectionName = this.ChooseCollectionName(options?.CollectionName); // Map. - var redisJsonRecord = RunModelConversion( + var redisJsonRecord = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, "SET", () => this._mapper.MapFromDataToStorageModel(record)); @@ -274,7 +280,8 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco var redisRecords = new List<(string maybePrefixedKey, string originalKey, JsonNode jsonNode)>(); foreach (var record in records) { - var redisJsonRecord = RunModelConversion( + var redisJsonRecord = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, "MSET", () => this._mapper.MapFromDataToStorageModel(record)); @@ -354,35 +361,7 @@ private static async Task RunOperationAsync(string collectionName, string // Using Open Telemetry standard for naming of these entries. // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "Redis"); - wrapperException.Data.Add("db.collection.name", collectionName); - wrapperException.Data.Add("db.operation.name", operationName); - - throw wrapperException; - } - } - - /// - /// Run the given model conversion and wrap any exceptions with . - /// - /// The response type of the operation. - /// The name of the collection the operation is being run on. - /// The type of database operation being run. - /// The operation to run. - /// The result of the operation. - private static T RunModelConversion(string collectionName, string operationName, Func operation) - { - try - { - return operation.Invoke(); - } - catch (Exception ex) - { - var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); - - // Using Open Telemetry standard for naming of these entries. - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "Redis"); + wrapperException.Data.Add("db.system", DatabaseName); wrapperException.Data.Add("db.collection.name", collectionName); wrapperException.Data.Add("db.operation.name", operationName); diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs index 33793ca1479e..78b3b7dfeba0 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs @@ -17,7 +17,7 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; /// Contains tests for the class. /// /// Used for logging. -/// Redis setup and teardown. +/// Qdrant setup and teardown. [Collection("QdrantVectorStoreCollection")] public sealed class QdrantVectorRecordStoreTests(ITestOutputHelper output, QdrantVectorStoreFixture fixture) { diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs new file mode 100644 index 000000000000..f2fc3f992de7 --- /dev/null +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using Microsoft.SemanticKernel.Memory; + +namespace Microsoft.SemanticKernel; + +/// +/// Contains helpers for reading vector store model properties and their attributes. +/// +[ExcludeFromCodeCoverage] +internal static class VectorStoreErrorHandler +{ + /// + /// Run the given model conversion and wrap any exceptions with . + /// + /// The response type of the operation. + /// The name of the database system the operation is being run on. + /// The name of the collection the operation is being run on. + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static T RunModelConversion(string databaseSystemName, string collectionName, string operationName, Func operation) + { + try + { + return operation.Invoke(); + } + catch (Exception ex) + { + var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); + + // Using Open Telemetry standard for naming of these entries. + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ + wrapperException.Data.Add("db.system", databaseSystemName); + wrapperException.Data.Add("db.collection.name", collectionName); + wrapperException.Data.Add("db.operation.name", operationName); + + throw wrapperException; + } + } +} diff --git a/dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs similarity index 84% rename from dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs rename to dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index fc580c69bf9c..20318c8d385b 100644 --- a/dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -5,11 +5,7 @@ using System.Diagnostics.CodeAnalysis; using System.Linq; using System.Reflection; -using System.Text.Json; -using System.Text.Json.Nodes; using System.Text.Json.Serialization; -using System.Text.Json.Serialization.Metadata; -using JsonSchemaMapper; using Microsoft.SemanticKernel.Memory; namespace Microsoft.SemanticKernel; @@ -196,6 +192,44 @@ public static (PropertyInfo keyProperty, List dataProperties, List return (keyProperty!, dataProperties, vectorProperties); } + /// + /// Create a by reading the attributes on the properties of the given type. + /// + /// The type to create the definition for. + /// if the store supports multiple vectors, otherwise. + /// The based on the given type. + public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromType(Type type, bool supportsMultipleVectors) + { + var properties = FindProperties(type, supportsMultipleVectors); + var definitionProperties = new List(); + + definitionProperties.Add(new VectorStoreRecordKeyProperty(properties.keyProperty.Name)); + + foreach (var dataProperty in properties.dataProperties) + { + var dataAttribute = dataProperty.GetCustomAttribute(); + if (dataAttribute is not null) + { + definitionProperties.Add(new VectorStoreRecordDataProperty(dataProperty.Name) + { + HasEmbedding = dataAttribute.HasEmbedding, + EmbeddingPropertyName = dataAttribute.EmbeddingPropertyName, + }); + } + } + + foreach (var vectorProperty in properties.vectorProperties) + { + var vectorAttribute = vectorProperty.GetCustomAttribute(); + if (vectorAttribute is not null) + { + definitionProperties.Add(new VectorStoreRecordVectorProperty(vectorProperty.Name)); + } + } + + return new VectorStoreRecordDefinition { Properties = definitionProperties }; + } + /// /// Verify that the given properties are of the supported types. /// diff --git a/dotnet/src/SemanticKernel.UnitTests/Utilities/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs similarity index 81% rename from dotnet/src/SemanticKernel.UnitTests/Utilities/VectorStoreRecordPropertyReaderTests.cs rename to dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index 232758d77bbb..b702f2b799a0 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Utilities/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -5,7 +5,7 @@ using Microsoft.SemanticKernel.Memory; using Xunit; -namespace SemanticKernel.UnitTests.Utilities; +namespace SemanticKernel.UnitTests.Data; public class VectorStoreRecordPropertyReaderTests { @@ -61,8 +61,8 @@ public void FindPropertiesThrowsForMultipleVectorsWithSingleVectorSupport(bool u // Assert. var expectedMessage = useConfig ? - "Multiple vector properties configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiPropsModel while only one is supported." : - "Multiple vector properties found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiPropsModel while only one is supported."; + "Multiple vector properties configured for type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+MultiPropsModel while only one is supported." : + "Multiple vector properties found on type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+MultiPropsModel while only one is supported."; Assert.Equal(expectedMessage, ex.Message); } @@ -78,8 +78,8 @@ public void FindPropertiesThrowsOnMultipleKeyProperties(bool useConfig) // Assert. var expectedMessage = useConfig ? - "Multiple key properties configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiKeysModel." : - "Multiple key properties found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiKeysModel."; + "Multiple key properties configured for type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+MultiKeysModel." : + "Multiple key properties found on type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+MultiKeysModel."; Assert.Equal(expectedMessage, ex.Message); } @@ -95,8 +95,8 @@ public void FindPropertiesThrowsOnNoKeyProperty(bool useConfig) // Assert. var expectedMessage = useConfig ? - "No key property configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoKeyModel." : - "No key property found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoKeyModel."; + "No key property configured for type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+NoKeyModel." : + "No key property found on type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+NoKeyModel."; Assert.Equal(expectedMessage, ex.Message); } @@ -112,8 +112,8 @@ public void FindPropertiesThrowsOnNoVectorPropertyWithSingleVectorSupport(bool u // Assert. var expectedMessage = useConfig ? - "No vector property configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoVectorModel." : - "No vector property found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoVectorModel."; + "No vector property configured for type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+NoVectorModel." : + "No vector property found on type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+NoVectorModel."; Assert.Equal(expectedMessage, ex.Message); } @@ -138,6 +138,33 @@ public void FindPropertiesUsingConfigThrowsForNotFoundProperties(string property Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(NoKeyModel), definition, false)); } + [Fact] + public void CreateVectorStoreRecordDefinitionFromTypeConvertsAllProps() + { + // Act. + var definition = VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(MultiPropsModel), true); + + // Assert. + Assert.Equal(5, definition.Properties.Count); + Assert.Equal("Key", definition.Properties[0].PropertyName); + Assert.Equal("Data1", definition.Properties[1].PropertyName); + Assert.Equal("Data2", definition.Properties[2].PropertyName); + Assert.Equal("Vector1", definition.Properties[3].PropertyName); + Assert.Equal("Vector2", definition.Properties[4].PropertyName); + + Assert.IsType(definition.Properties[0]); + Assert.IsType(definition.Properties[1]); + Assert.IsType(definition.Properties[2]); + Assert.IsType(definition.Properties[3]); + Assert.IsType(definition.Properties[4]); + + var data1 = (VectorStoreRecordDataProperty)definition.Properties[1]; + var data2 = (VectorStoreRecordDataProperty)definition.Properties[2]; + + Assert.True(data1.HasEmbedding); + Assert.False(data2.HasEmbedding); + } + [Fact] public void VerifyPropertyTypesPassForAllowedTypes() { @@ -229,7 +256,7 @@ private sealed class MultiPropsModel [VectorStoreRecordKey] public string Key { get; set; } = string.Empty; - [VectorStoreRecordData] + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "Vector1")] public string Data1 { get; set; } = string.Empty; [VectorStoreRecordData] @@ -249,7 +276,7 @@ private sealed class MultiPropsModel Properties = [ new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data1"), + new VectorStoreRecordDataProperty("Data1") { HasEmbedding = true, EmbeddingPropertyName = "Vector1" }, new VectorStoreRecordDataProperty("Data2"), new VectorStoreRecordVectorProperty("Vector1"), new VectorStoreRecordVectorProperty("Vector2")