diff --git a/extensions/Postgres/Postgres/Internals/PostgresDbClient.cs b/extensions/Postgres/Postgres/Internals/PostgresDbClient.cs index edee5a31f..910557153 100644 --- a/extensions/Postgres/Postgres/Internals/PostgresDbClient.cs +++ b/extensions/Postgres/Postgres/Internals/PostgresDbClient.cs @@ -404,8 +404,8 @@ DO UPDATE SET // Column names string columns = withEmbeddings ? this._columnsListWithEmbeddings : this._columnsListNoEmbeddings; - string similarityActualValue = "__similarity"; - string similarityPlaceholder = "@__min_similarity"; + string colDistance = "__distance"; + string colMaxDistance = "@__max_distance"; // Filtering logic, including filter by similarity filterSql = filterSql?.Trim().Replace(PostgresSchema.PlaceholdersTags, this._colTags, StringComparison.Ordinal); @@ -413,10 +413,12 @@ DO UPDATE SET { filterSql = "TRUE"; } + var maxDistance = 1 - minSimilarity; + filterSql += $" AND {this._colEmbedding} <=> @embedding < {maxDistance}"; if (sqlUserValues == null) { sqlUserValues = new(); } - sqlUserValues[similarityPlaceholder] = minSimilarity; + sqlUserValues[colMaxDistance] = minSimilarity; this._log.LogTrace("Searching by similarity. Table: {0}. Threshold: {1}. Limit: {2}. Offset: {3}. Using SQL filter: {4}", tableName, minSimilarity, limit, offset, string.IsNullOrWhiteSpace(filterSql) ? "false" : "true"); @@ -429,11 +431,16 @@ DO UPDATE SET await using (cmd.ConfigureAwait(false)) { #pragma warning disable CA2100 // SQL reviewed + + // When using 1 - (embedding <=> target) the index is not being used, therefore we calculate + // the similarity (1 - distance) later + // Furthermore, colDistance can't be used in the WHERE clause + // as that causes a "table cannot be found error" cmd.CommandText = @$" - SELECT {columns}, 1 - ({this._colEmbedding} <=> @embedding) AS {similarityActualValue} + SELECT {columns}, {this._colEmbedding} <=> @embedding AS {colDistance} FROM {tableName} WHERE {filterSql} - ORDER BY {similarityActualValue} DESC + ORDER BY {colDistance} ASC LIMIT @limit OFFSET @offset "; @@ -447,7 +454,6 @@ OFFSET @offset cmd.Parameters.AddWithValue(kv.Key, kv.Value); } #pragma warning restore CA2100 - // TODO: rewrite code to stream results (need to combine yield and try-catch) var result = new List<(PostgresMemoryRecord record, double similarity)>(); try @@ -455,16 +461,10 @@ OFFSET @offset NpgsqlDataReader dataReader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); await using (dataReader.ConfigureAwait(false)) { - var run = true; - while (run && await dataReader.ReadAsync(cancellationToken).ConfigureAwait(false)) + while (await dataReader.ReadAsync(cancellationToken).ConfigureAwait(false)) { - double similarity = dataReader.GetDouble(dataReader.GetOrdinal(similarityActualValue)); - if (similarity < minSimilarity) - { - run = false; - continue; - } - + double distance = dataReader.GetDouble(dataReader.GetOrdinal(colDistance)); + double similarity = 1 - distance; result.Add((this.ReadEntry(dataReader, withEmbeddings), similarity)); } }