Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dotnet/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
<PackageVersion Include="OpenTelemetry.Instrumentation.Http" Version="1.9.0" />
<PackageVersion Include="OpenTelemetry.Instrumentation.Runtime" Version="1.9.0" />
<PackageVersion Include="PdfPig" Version="0.1.10" />
<PackageVersion Include="Pinecone.NET" Version="2.1.1" />
<PackageVersion Include="Pinecone.Client" Version="3.0.0" />
<PackageVersion Include="Prompty.Core" Version="0.0.23-alpha" />
<PackageVersion Include="PuppeteerSharp" Version="20.0.5" />
<PackageVersion Include="System.Diagnostics.DiagnosticSource" Version="8.0.1" />
Expand Down
9 changes: 9 additions & 0 deletions dotnet/SK-dotnet.sln
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ModelContextProtocol", "sam
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SqlServerIntegrationTests", "src\VectorDataIntegrationTests\SqlServerIntegrationTests\SqlServerIntegrationTests.csproj", "{A5E6193C-8431-4C6E-B674-682CB41EAA0C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PineconeIntegrationTests", "src\VectorDataIntegrationTests\PineconeIntegrationTests\PineconeIntegrationTests.csproj", "{E9A74E0C-BC02-4DDD-A487-89847EDF8026}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -1334,6 +1336,12 @@ Global
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Publish|Any CPU.Build.0 = Debug|Any CPU
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Release|Any CPU.Build.0 = Release|Any CPU
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Publish|Any CPU.ActiveCfg = Release|Any CPU
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Publish|Any CPU.Build.0 = Release|Any CPU
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -1516,6 +1524,7 @@ Global
{8C658E1E-83C8-4127-B8BF-27A638A45DDD} = {6823CD5E-2ABE-41EB-B865-F86EC13F0CF9}
{B16AC373-3DA8-4505-9510-110347CD635D} = {5D4C0700-BBB5-418F-A7B2-F392B9A18263}
{A5E6193C-8431-4C6E-B674-682CB41EAA0C} = {4F381919-F1BE-47D8-8558-3187ED04A84F}
{E9A74E0C-BC02-4DDD-A487-89847EDF8026} = {4F381919-F1BE-47D8-8558-3187ED04A84F}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {FBDC56A3-86AD-4323-AA0F-201E59123B83}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Pinecone.NET" />
<PackageReference Include="Pinecone.Client" />
<PackageReference Include="System.Text.Json" />
</ItemGroup>

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Linq.Expressions;
using System.Reflection;
using System.Runtime.CompilerServices;
using Pinecone;

namespace Microsoft.SemanticKernel.Connectors.Pinecone;

// This class is a modification of MongoDBFilterTranslator that uses the same query language
// (https://docs.pinecone.io/guides/data/understanding-metadata#metadata-query-language),
// with the difference of representing everything as Metadata rather than BsonDocument.
// For representing collections of any kinds, we use List<MetadataValue>,
// as we sometimes need to extend the collection (with for example another condition).
internal class PineconeFilterTranslator
{
private IReadOnlyDictionary<string, string> _storagePropertyNames = null!;
private ParameterExpression _recordParameter = null!;

internal Metadata Translate(LambdaExpression lambdaExpression, IReadOnlyDictionary<string, string> storagePropertyNames)
{
this._storagePropertyNames = storagePropertyNames;

Debug.Assert(lambdaExpression.Parameters.Count == 1);
this._recordParameter = lambdaExpression.Parameters[0];

return this.Translate(lambdaExpression.Body);
}

private Metadata Translate(Expression? node)
=> node switch
{
BinaryExpression
{
NodeType: ExpressionType.Equal or ExpressionType.NotEqual
or ExpressionType.GreaterThan or ExpressionType.GreaterThanOrEqual
or ExpressionType.LessThan or ExpressionType.LessThanOrEqual
} binary
=> this.TranslateEqualityComparison(binary),

BinaryExpression { NodeType: ExpressionType.AndAlso or ExpressionType.OrElse } andOr
=> this.TranslateAndOr(andOr),
UnaryExpression { NodeType: ExpressionType.Not } not
=> this.TranslateNot(not),

// MemberExpression is generally handled within e.g. TranslateEqualityComparison; this is used to translate direct bool inside filter (e.g. Filter => r => r.Bool)
MemberExpression member when member.Type == typeof(bool) && this.TryTranslateFieldAccess(member, out _)
=> this.TranslateEqualityComparison(Expression.Equal(member, Expression.Constant(true))),

MethodCallExpression methodCall => this.TranslateMethodCall(methodCall),

_ => throw new NotSupportedException("The following NodeType is unsupported: " + node?.NodeType)
};

private Metadata TranslateEqualityComparison(BinaryExpression binary)
{
if ((this.TryTranslateFieldAccess(binary.Left, out var storagePropertyName) && TryGetConstant(binary.Right, out var value))
|| (this.TryTranslateFieldAccess(binary.Right, out storagePropertyName) && TryGetConstant(binary.Left, out value)))
{
if (value is null)
{
throw new NotSupportedException("Pincone does not support null checks in vector search pre-filters");
}

// Short form of equality (instead of $eq)
if (binary.NodeType is ExpressionType.Equal)
{
return new Metadata { [storagePropertyName] = ToMetadata(value) };
}

var filterOperator = binary.NodeType switch
{
ExpressionType.NotEqual => "$ne",
ExpressionType.GreaterThan => "$gt",
ExpressionType.GreaterThanOrEqual => "$gte",
ExpressionType.LessThan => "$lt",
ExpressionType.LessThanOrEqual => "$lte",

_ => throw new UnreachableException()
};

return new Metadata { [storagePropertyName] = new Metadata { [filterOperator] = ToMetadata(value) } };
}

throw new NotSupportedException("Invalid equality/comparison");
}

private Metadata TranslateAndOr(BinaryExpression andOr)
{
var mongoOperator = andOr.NodeType switch
{
ExpressionType.AndAlso => "$and",
ExpressionType.OrElse => "$or",
_ => throw new UnreachableException()
};

var (left, right) = (this.Translate(andOr.Left), this.Translate(andOr.Right));

List<MetadataValue?>? nestedLeft = GetListOrNull(left, mongoOperator);
List<MetadataValue?>? nestedRight = GetListOrNull(right, mongoOperator);

switch ((nestedLeft, nestedRight))
{
case (not null, not null):
nestedLeft.AddRange(nestedRight);
return left;
case (not null, null):
nestedLeft.Add(right);
return left;
case (null, not null):
nestedRight.Insert(0, left);
return right;
case (null, null):
return new Metadata { [mongoOperator] = new MetadataValue(new List<MetadataValue?> { left, right }) };
}
}

private Metadata TranslateNot(UnaryExpression not)
{
switch (not.Operand)
{
// Special handling for !(a == b) and !(a != b)
case BinaryExpression { NodeType: ExpressionType.Equal or ExpressionType.NotEqual } binary:
return this.TranslateEqualityComparison(
Expression.MakeBinary(
binary.NodeType is ExpressionType.Equal ? ExpressionType.NotEqual : ExpressionType.Equal,
binary.Left,
binary.Right));

// Not over bool field (Filter => r => !r.Bool)
case MemberExpression member when member.Type == typeof(bool) && this.TryTranslateFieldAccess(member, out _):
return this.TranslateEqualityComparison(Expression.Equal(member, Expression.Constant(false)));
}

var operand = this.Translate(not.Operand);

// Identify NOT over $in, transform to $nin (https://www.mongodb.com/docs/manual/reference/operator/query/nin/#mongodb-query-op.-nin)
if (operand.Count == 1 && operand.First() is { Key: var fieldName, Value: MetadataValue nested } && nested.Value is Metadata nestedMetadata
&& GetListOrNull(nestedMetadata, "$in") is List<MetadataValue> values)
{
return new Metadata { [fieldName] = new Metadata { ["$nin"] = values } };
}

throw new NotSupportedException("Pinecone does not support the NOT operator in vector search pre-filters");
}

private Metadata TranslateMethodCall(MethodCallExpression methodCall)
=> methodCall switch
{
// Enumerable.Contains()
{ Method.Name: nameof(Enumerable.Contains), Arguments: [var source, var item] } contains
when contains.Method.DeclaringType == typeof(Enumerable)
=> this.TranslateContains(source, item),

// List.Contains()
{
Method:
{
Name: nameof(Enumerable.Contains),
DeclaringType: { IsGenericType: true } declaringType
},
Object: Expression source,
Arguments: [var item]
} when declaringType.GetGenericTypeDefinition() == typeof(List<>) => this.TranslateContains(source, item),

_ => throw new NotSupportedException($"Unsupported method call: {methodCall.Method.DeclaringType?.Name}.{methodCall.Method.Name}")
};

private Metadata TranslateContains(Expression source, Expression item)
{
switch (source)
{
// Contains over array column (r => r.Strings.Contains("foo"))
case var _ when this.TryTranslateFieldAccess(source, out _):
throw new NotSupportedException("Pinecone does not support Contains within array fields ($elemMatch) in vector search pre-filters");

// Contains over inline enumerable
case NewArrayExpression newArray:
var elements = new object?[newArray.Expressions.Count];

for (var i = 0; i < newArray.Expressions.Count; i++)
{
if (!TryGetConstant(newArray.Expressions[i], out var elementValue))
{
throw new NotSupportedException("Invalid element in array");
}

elements[i] = elementValue;
}

return ProcessInlineEnumerable(elements, item);

// Contains over captured enumerable (we inline)
case var _ when TryGetConstant(source, out var constantEnumerable)
&& constantEnumerable is IEnumerable enumerable and not string:
return ProcessInlineEnumerable(enumerable, item);

default:
throw new NotSupportedException("Unsupported Contains expression");
}

Metadata ProcessInlineEnumerable(IEnumerable elements, Expression item)
{
if (!this.TryTranslateFieldAccess(item, out var storagePropertyName))
{
throw new NotSupportedException("Unsupported item type in Contains");
}

return new Metadata
{
[storagePropertyName] = new Metadata
{
["$in"] = new MetadataValue(elements.Cast<object>().Select(ToMetadata).ToList())
}
};
}
}

private bool TryTranslateFieldAccess(Expression expression, [NotNullWhen(true)] out string? storagePropertyName)
{
if (expression is MemberExpression memberExpression && memberExpression.Expression == this._recordParameter)
{
if (!this._storagePropertyNames.TryGetValue(memberExpression.Member.Name, out storagePropertyName))
{
throw new InvalidOperationException($"Property name '{memberExpression.Member.Name}' provided as part of the filter clause is not a valid property name.");
}

return true;
}

storagePropertyName = null;
return false;
}

private static bool TryGetConstant(Expression expression, out object? constantValue)
{
switch (expression)
{
case ConstantExpression { Value: var v }:
constantValue = v;
return true;

// This identifies compiler-generated closure types which contain captured variables.
case MemberExpression { Expression: ConstantExpression constant, Member: FieldInfo fieldInfo }
when constant.Type.Attributes.HasFlag(TypeAttributes.NestedPrivate)
&& Attribute.IsDefined(constant.Type, typeof(CompilerGeneratedAttribute), inherit: true):
constantValue = fieldInfo.GetValue(constant.Value);
return true;

default:
constantValue = null;
return false;
}
}

private static MetadataValue? ToMetadata(object? value)
=> value is null ? null : PineconeVectorStoreRecordFieldMapping.ConvertToMetadataValue(value);

private static List<MetadataValue?>? GetListOrNull(Metadata value, string mongoOperator)
=> value.Count == 1 && value.First() is var element && element.Key == mongoOperator ? element.Value?.Value as List<MetadataValue?> : null;
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,17 @@ public PineconeGenericDataModelMapper(
/// <inheritdoc />
public Vector MapFromDataToStorageModel(VectorStoreGenericDataModel<string> dataModel)
{
var metadata = new MetadataMap();
var metadata = new Metadata();

// Map data properties.
foreach (var dataProperty in this._propertyReader.DataProperties)
{
if (dataModel.Data.TryGetValue(dataProperty.DataModelPropertyName, out var propertyValue))
{
var propertyStorageName = this._propertyReader.GetStoragePropertyName(dataProperty.DataModelPropertyName);
metadata[propertyStorageName] = propertyValue == null ?
new MetadataValue() :
PineconeVectorStoreRecordFieldMapping.ConvertToMetadataValue(propertyValue);
metadata[propertyStorageName] = propertyValue is not null
? PineconeVectorStoreRecordFieldMapping.ConvertToMetadataValue(propertyValue)
: null;
}
}

Expand All @@ -62,8 +62,8 @@ public Vector MapFromDataToStorageModel(VectorStoreGenericDataModel<string> data
// TODO: what about sparse values?
var result = new Vector
{
Id = (string)dataModel.Key,
Values = values.ToArray(),
Id = dataModel.Key,
Values = values,
Metadata = metadata,
SparseValues = null
};
Expand All @@ -80,7 +80,7 @@ public VectorStoreGenericDataModel<string> MapFromStorageToDataModel(Vector stor
// Set Vector.
if (options?.IncludeVectors is true)
{
dataModel.Vectors.Add(this._propertyReader.FirstVectorPropertyName!, new ReadOnlyMemory<float>(storageModel.Values));
dataModel.Vectors.Add(this._propertyReader.FirstVectorPropertyName!, storageModel.Values);
}

// Set Data.
Expand All @@ -91,9 +91,10 @@ public VectorStoreGenericDataModel<string> MapFromStorageToDataModel(Vector stor
var propertyStorageName = this._propertyReader.GetStoragePropertyName(dataProperty.DataModelPropertyName);
if (storageModel.Metadata.TryGetValue(propertyStorageName, out var propertyValue))
{
dataModel.Data[dataProperty.DataModelPropertyName] = PineconeVectorStoreRecordFieldMapping.ConvertFromMetadataValueToNativeType(
propertyValue,
dataProperty.PropertyType);
dataModel.Data[dataProperty.DataModelPropertyName] =
propertyValue is not null
? PineconeVectorStoreRecordFieldMapping.ConvertFromMetadataValueToNativeType(propertyValue, dataProperty.PropertyType)
: null;
}
}
}
Expand Down
Loading
Loading