From 48dbcfafd440d76982de58a6b36aa1cea01ef737 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Fri, 23 Aug 2024 18:31:55 +0100 Subject: [PATCH 01/11] support Lucene's (proposed) HNSW search seeding feature --- .../org/apache/solr/schema/DenseVectorField.java | 10 ++++++++-- .../search/neural/AbstractVectorQParserBase.java | 4 ++++ .../org/apache/solr/search/neural/KnnQParser.java | 2 +- .../solr/search/neural/VectorSimilarityQParser.java | 12 ++++++++++-- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 4d528361dd4..7cdc6b75221 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -365,6 +365,11 @@ public ValueSource getValueSource(SchemaField field, QParser parser) { public Query getKnnVectorQuery( String fieldName, String vectorToSearch, int topK, Query filterQuery) { + return getKnnVectorQuery(fieldName, vectorToSearch, topK, filterQuery, null); + } + + public Query getKnnVectorQuery( + String fieldName, String vectorToSearch, int topK, Query filterQuery, Query seedQuery) { DenseVectorParser vectorBuilder = getVectorBuilder(vectorToSearch, DenseVectorParser.BuilderPhase.QUERY); @@ -372,9 +377,10 @@ public Query getKnnVectorQuery( switch (vectorEncoding) { case FLOAT32: return new KnnFloatVectorQuery( - fieldName, vectorBuilder.getFloatVector(), topK, filterQuery); + fieldName, vectorBuilder.getFloatVector(), topK, filterQuery /*, seedQuery */); case BYTE: - return new KnnByteVectorQuery(fieldName, vectorBuilder.getByteVector(), topK, filterQuery); + return new KnnByteVectorQuery( + fieldName, vectorBuilder.getByteVector(), topK, filterQuery /*, seedQuery */); default: throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java b/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java index 4cafb45744e..b57b5f5fd2e 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java +++ b/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java @@ -76,6 +76,10 @@ protected static DenseVectorField getCheckedFieldType(SchemaField schemaField) { return (DenseVectorField) fieldType; } + protected Query getSeedQuery() throws SolrException, SyntaxError { + return null; // TODO + } + protected Query getFilterQuery() throws SolrException, SyntaxError { // Default behavior of FQ wrapping, and suitability of some local params diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java index 166dada5b7f..071f93753bb 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java @@ -41,6 +41,6 @@ public Query parse() throws SyntaxError { final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); return denseVectorType.getKnnVectorQuery( - schemaField.getName(), vectorToSearch, topK, getFilterQuery()); + schemaField.getName(), vectorToSearch, topK, getFilterQuery(), getSeedQuery()); } } diff --git a/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java b/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java index e3ec2f242f7..074aaa17a4d 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java @@ -62,10 +62,18 @@ public Query parse() throws SyntaxError { switch (vectorEncoding) { case FLOAT32: return new FloatVectorSimilarityQuery( - fieldName, vectorBuilder.getFloatVector(), minTraverse, minReturn, getFilterQuery()); + fieldName, + vectorBuilder.getFloatVector(), + minTraverse, + minReturn, + getFilterQuery() /*, getSeedQuery() */); case BYTE: return new ByteVectorSimilarityQuery( - fieldName, vectorBuilder.getByteVector(), minTraverse, minReturn, getFilterQuery()); + fieldName, + vectorBuilder.getByteVector(), + minTraverse, + minReturn, + getFilterQuery() /*, getSeedQuery() */); default: throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, From 7c194a82c9a4163b927b74fba0939280a2d204b1 Mon Sep 17 00:00:00 2001 From: ilariapet Date: Mon, 22 Sep 2025 10:50:14 +0200 Subject: [PATCH 02/11] Added support for SeededKnnVectorQuery --- solr/CHANGES.txt | 2 + .../apache/solr/schema/DenseVectorField.java | 15 ++- .../neural/AbstractVectorQParserBase.java | 4 - .../apache/solr/search/neural/KnnQParser.java | 11 ++ .../neural/VectorSimilarityQParser.java | 12 +-- .../solr/search/neural/KnnQParserTest.java | 102 ++++++++++++++++++ .../pages/dense-vector-search.adoc | 18 ++++ 7 files changed, 146 insertions(+), 18 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 0584672fa1c..8450a48d2c0 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -23,6 +23,8 @@ New Features * SOLR-17023: Use Modern NLP Models from Apache OpenNLP with Solr (Jeff Zemerick, Eric Pugh) +* SOLR-17813: Add support for SeededKnnVectorQuery (Ilaria Petreti via Alessandro Benedetti) + Improvements --------------------- diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 2d8f876b4a3..efcd82795ae 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -40,6 +40,7 @@ import org.apache.lucene.search.KnnByteVectorQuery; import org.apache.lucene.search.KnnFloatVectorQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SeededKnnVectorQuery; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.hnsw.HnswGraph; @@ -383,11 +384,17 @@ public Query getKnnVectorQuery( switch (vectorEncoding) { case FLOAT32: - return new KnnFloatVectorQuery( - fieldName, vectorBuilder.getFloatVector(), topK, filterQuery /*, seedQuery */); + KnnFloatVectorQuery floatVectorQuery = + new KnnFloatVectorQuery(fieldName, vectorBuilder.getFloatVector(), topK, filterQuery); + return (seedQuery != null) + ? SeededKnnVectorQuery.fromFloatQuery(floatVectorQuery, seedQuery) + : floatVectorQuery; case BYTE: - return new KnnByteVectorQuery( - fieldName, vectorBuilder.getByteVector(), topK, filterQuery /*, seedQuery */); + KnnByteVectorQuery byteVectorQuery = + new KnnByteVectorQuery(fieldName, vectorBuilder.getByteVector(), topK, filterQuery); + return (seedQuery != null) + ? SeededKnnVectorQuery.fromByteQuery(byteVectorQuery, seedQuery) + : byteVectorQuery; default: throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java b/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java index 105681abc2d..d7ab9c72938 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java +++ b/solr/core/src/java/org/apache/solr/search/neural/AbstractVectorQParserBase.java @@ -76,10 +76,6 @@ protected static DenseVectorField getCheckedFieldType(SchemaField schemaField) { return (DenseVectorField) fieldType; } - protected Query getSeedQuery() throws SolrException, SyntaxError { - return null; // TODO - } - protected Query getFilterQuery() throws SolrException, SyntaxError { // Default behavior of FQ wrapping, and suitability of some local params diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java index 97580a083eb..cd78aeb282a 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java @@ -17,10 +17,12 @@ package org.apache.solr.search.neural; import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.DenseVectorField; import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.QParser; import org.apache.solr.search.SyntaxError; public class KnnQParser extends AbstractVectorQParserBase { @@ -28,11 +30,20 @@ public class KnnQParser extends AbstractVectorQParserBase { // retrieve the top K results based on the distance similarity function protected static final String TOP_K = "topK"; protected static final int DEFAULT_TOP_K = 10; + protected static final String SEED = "seed"; public KnnQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); } + protected Query getSeedQuery() throws SolrException, SyntaxError { + String seed = localParams.get(SEED); + if (seed == null || seed.isBlank()) return null; + + final QParser seedParser = subQuery(seed, null); + return seedParser.getQuery(); + } + @Override public Query parse() throws SyntaxError { final SchemaField schemaField = req.getCore().getLatestSchema().getField(getFieldName()); diff --git a/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java b/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java index 074aaa17a4d..e3ec2f242f7 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/VectorSimilarityQParser.java @@ -62,18 +62,10 @@ public Query parse() throws SyntaxError { switch (vectorEncoding) { case FLOAT32: return new FloatVectorSimilarityQuery( - fieldName, - vectorBuilder.getFloatVector(), - minTraverse, - minReturn, - getFilterQuery() /*, getSeedQuery() */); + fieldName, vectorBuilder.getFloatVector(), minTraverse, minReturn, getFilterQuery()); case BYTE: return new ByteVectorSimilarityQuery( - fieldName, - vectorBuilder.getByteVector(), - minTraverse, - minReturn, - getFilterQuery() /*, getSeedQuery() */); + fieldName, vectorBuilder.getByteVector(), minTraverse, minReturn, getFilterQuery()); default: throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java index f5d5668a7e5..9abeb6df3a6 100644 --- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java @@ -37,6 +37,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 { String vectorField = "vector"; String vectorField2 = "vector2"; String vectorFieldByteEncoding = "vector_byte_encoding"; + String stringField = "string_field"; @Before public void prepareIndex() throws Exception { @@ -106,6 +107,9 @@ private List prepareDocs() { docs.get(6).addField(vectorFieldByteEncoding, Arrays.asList(18, 2, 4, 4)); docs.get(7).addField(vectorFieldByteEncoding, Arrays.asList(8, 3, 2, 4)); + docs.get(0).addField(stringField, "test"); + docs.get(1).addField(stringField, "test2"); + return docs; } @@ -967,4 +971,102 @@ public void knnQueryAsRerank_shouldAddSimilarityFunctionScore() { "//result/doc[3]/str[@name='id'][.='3']", "//result/doc[4]/str[@name='id'][.='9']"); } + + @Test + public void knnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { + // Test to verify that when the seed parameter is provided, the SeededKnnVectorQuery is executed + // (float). + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + // BooleanQuery + assertQ( + req( + CommonParams.Q, + "{!knn f=vector topK=4 seed='id:(1 4 7 8 9)'}" + vectorToSearch, + "fl", + "id", + "debugQuery", + "true"), + "//result[@numFound='4']", + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); + + // SolrRangeQuery + assertQ( + req( + CommonParams.Q, + "{!knn f=vector topK=4 seed='id:[1 TO 5]'}" + vectorToSearch, + "fl", + "id", + "debugQuery", + "true"), + "//result[@numFound='4']", + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:[1 TO 5], seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); + + // Term Query + assertQ( + req( + CommonParams.Q, + "{!knn f=vector topK=4 seed='string_field:test'}" + vectorToSearch, + "fl", + "id", + "debugQuery", + "true"), + "//result[@numFound='4']", + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=string_field:test, seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); + } + + @Test + public void byteKnnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { + // Test to verify that when the seed parameter is provided, the SeededKnnVectorQuery is executed + // (byte). + + String vectorToSearch = "[2, 2, 1, 3]"; + + // BooleanQuery + assertQ( + req( + CommonParams.Q, + "{!knn f=vector_byte_encoding topK=4 seed='id:(1 4 7 8 9)'}" + vectorToSearch, + "fl", + "id", + "debugQuery", + "true"), + "//result[@numFound='4']", + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=KnnByteVectorQuery:vector_byte_encoding[2,...][4]})']"); + } + + @Test + public void knnQueryWithBlackSeed_shouldPerformKnnFloatVectorQuery() { + // Test to verify that when the seed parameter is provided but blank, it is treated as null, and + // no additional seed logic is applied. + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + assertQ( + req( + CommonParams.Q, + "{!knn f=vector topK=4 seed=''}" + vectorToSearch, + "fl", + "id", + "debugQuery", + "true"), + "//result[@numFound='4']", + "//result/doc[1]/str[@name='id'][.='1']", + "//result/doc[2]/str[@name='id'][.='4']", + "//result/doc[3]/str[@name='id'][.='2']", + "//result/doc[4]/str[@name='id'][.='10']", + "//str[@name='parsedquery'][.='KnnFloatVectorQuery(KnnFloatVectorQuery:vector[1.0,...][4])']"); + } + + @Test + public void knnQueryWithInvalidSeedQuery_shouldThrowException() { + // Test to verify that when the seed parameter is provided with an invalid value, Solr throws a + // BAD_REQUEST exception. + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + assertQEx( + "Invalid seed query should throw Exception", + "Cannot parse 'id:'", + req(CommonParams.Q, "{!knn f=vector topK=4 seed='id:'}" + vectorToSearch), + SolrException.ErrorCode.BAD_REQUEST); + } } diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index e66501faae3..b3fadd4c52a 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -384,6 +384,24 @@ Here's an example of a simple `knn` search: The search results retrieved are the k=10 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`, ranked by the `similarityFunction` configured at indexing time. +`seed`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +A query seed to initiate the vector search, i.e. entry points in the HNSW graph exploration. Solr relies on Lucene’s implementation of https://lucene.apache.org/core/10_2_0/core/org/apache/lucene/search/SeededKnnVectorQuery.html[org.apache.lucene.search.SeededKnnVectorQuery]. ++ +The seed can be any valid Solr query, letting traditional query logic guide the vector search in a hybrid-like way. + + +Here is an example of a `knn` search using a `seed` query: + +[source,text] +?q={!knn f=vector topK=10 seed='id:(1 4 10)'}[1.0, 2.0, 3.0, 4.0] + +The search results retrieved are the k=10 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`. Documents matching the query `id:(1 4 10)` are used as entry points for the ANN search. If no documents match the seed, Solr falls back to a regular knn search without seeding, starting instead from random entry points. === knn_text_to_vector Query Parser From dc2a433be1ea82031ce6da7f769e351d551294e3 Mon Sep 17 00:00:00 2001 From: ilariapet Date: Fri, 26 Sep 2025 11:54:34 +0200 Subject: [PATCH 03/11] Adjustment after the introduction of PatienceKnnQuery --- .../apache/solr/schema/DenseVectorField.java | 105 +++++++++++------- .../apache/solr/search/neural/KnnQParser.java | 18 ++- .../solr/search/neural/KnnQParserTest.java | 44 +++----- .../pages/dense-vector-search.adoc | 10 +- 4 files changed, 104 insertions(+), 73 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index a342f301cce..7b35df52b62 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -374,52 +374,43 @@ public ValueSource getValueSource(SchemaField field, QParser parser) { } public Query getKnnVectorQuery( - String fieldName, String vectorToSearch, int topK, Query filterQuery) { - return getKnnVectorQuery(fieldName, vectorToSearch, topK, filterQuery, null, null); - } - - public Query getKnnVectorQuery( - String fieldName, String vectorToSearch, int topK, Query filterQuery, EarlyTerminationParams earlyTermination, Query seedQuery) { + String fieldName, + String vectorToSearch, + int topK, + Query filterQuery, + Query seedQuery, + EarlyTerminationParams earlyTermination) { DenseVectorParser vectorBuilder = getVectorBuilder(vectorToSearch, DenseVectorParser.BuilderPhase.QUERY); - switch (vectorEncoding) { - case FLOAT32: - KnnFloatVectorQuery knnFloatVectorQuery = - new KnnFloatVectorQuery(fieldName, vectorBuilder.getFloatVector(), topK, filterQuery); - if (earlyTermination.isEnabled()) { - return (earlyTermination.getSaturationThreshold() != null - && earlyTermination.getPatience() != null) - ? PatienceKnnVectorQuery.fromFloatQuery( - knnFloatVectorQuery, - earlyTermination.getSaturationThreshold(), - earlyTermination.getPatience()) - : PatienceKnnVectorQuery.fromFloatQuery(knnFloatVectorQuery); - } - else if (seedQuery != null) - return SeededKnnVectorQuery.fromFloatQuery(knnFloatVectorQuery, seedQuery); - return knnFloatVectorQuery; - case BYTE: - KnnByteVectorQuery knnByteVectorQuery = - new KnnByteVectorQuery(fieldName, vectorBuilder.getByteVector(), topK, filterQuery); - if (earlyTermination.isEnabled()) { - return (earlyTermination.getSaturationThreshold() != null - && earlyTermination.getPatience() != null) - ? PatienceKnnVectorQuery.fromByteQuery( - knnByteVectorQuery, - earlyTermination.getSaturationThreshold(), - earlyTermination.getPatience()) - : PatienceKnnVectorQuery.fromByteQuery(knnByteVectorQuery); - } - else if (seedQuery != null) - return SeededKnnVectorQuery.fromByteQuery(knnByteVectorQuery, seedQuery); - return knnByteVectorQuery; - default: - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - "Unexpected state. Vector Encoding: " + vectorEncoding); + Query knnQuery = + switch (vectorEncoding) { + case FLOAT32 -> new KnnFloatVectorQuery( + fieldName, vectorBuilder.getFloatVector(), topK, filterQuery); + case BYTE -> new KnnByteVectorQuery( + fieldName, vectorBuilder.getByteVector(), topK, filterQuery); + }; + + // NOTE: Currently seed and earlyTermination parameters cannot be used together due to + // https://github.com/apache/lucene/pull/14688 + // PatienceKnnVectorQuery does not rewrite its SeededKnnVectorQuery delegate, leaving seedWeight + // uninitialized and triggering a NullPointerException. + // Solr must be upgraded to a Lucene version that includes this patch. + if (seedQuery != null && earlyTermination.isEnabled()) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "Seeded queries and early termination cannot be used together. " + + "This limitation is due to Lucene issue #14688, which is not yet included in the current version."); + } + + if (seedQuery != null) { + return applySeeded(knnQuery, seedQuery); } + if (earlyTermination.isEnabled()) { + return applyEarlyTermination(knnQuery, earlyTermination); + } + return knnQuery; } /** @@ -452,4 +443,36 @@ public SortField getSortField(SchemaField field, boolean top) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Cannot sort on a Dense Vector field"); } + + private static Query applySeeded(Query knnQuery, Query seed) { + return switch (knnQuery) { + case KnnFloatVectorQuery knnFloatQuery -> SeededKnnVectorQuery.fromFloatQuery( + knnFloatQuery, seed); + case KnnByteVectorQuery knnByteQuery -> SeededKnnVectorQuery.fromByteQuery( + knnByteQuery, seed); + default -> knnQuery; + }; + } + + private static Query applyEarlyTermination( + Query knnQuery, EarlyTerminationParams earlyTermination) { + final boolean useExplicitParams = + (earlyTermination.getSaturationThreshold() != null + && earlyTermination.getPatience() != null); + return switch (knnQuery) { + case KnnFloatVectorQuery knnFloatQuery -> useExplicitParams + ? PatienceKnnVectorQuery.fromFloatQuery( + knnFloatQuery, + earlyTermination.getSaturationThreshold(), + earlyTermination.getPatience()) + : PatienceKnnVectorQuery.fromFloatQuery(knnFloatQuery); + case KnnByteVectorQuery knnByteQuery -> useExplicitParams + ? PatienceKnnVectorQuery.fromByteQuery( + knnByteQuery, + earlyTermination.getSaturationThreshold(), + earlyTermination.getPatience()) + : PatienceKnnVectorQuery.fromByteQuery(knnByteQuery); + default -> knnQuery; + }; + } } diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java index 3ee5dbae8f9..ce63088e2ad 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java @@ -16,6 +16,7 @@ */ package org.apache.solr.search.neural; +import java.lang.invoke.MethodHandles; import java.util.Optional; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; @@ -25,6 +26,8 @@ import org.apache.solr.schema.SchemaField; import org.apache.solr.search.QParser; import org.apache.solr.search.SyntaxError; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class KnnQParser extends AbstractVectorQParserBase { @@ -40,6 +43,7 @@ public class KnnQParser extends AbstractVectorQParserBase { protected static final boolean DEFAULT_EARLY_TERMINATION = false; protected static final String SATURATION_THRESHOLD = "saturationThreshold"; protected static final String PATIENCE = "patience"; + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public KnnQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); @@ -92,8 +96,11 @@ public EarlyTerminationParams getEarlyTerminationParams() { protected Query getSeedQuery() throws SolrException, SyntaxError { String seed = localParams.get(SEED); - if (seed == null || seed.isBlank()) return null; - + if (seed == null) return null; + if (seed.isBlank()) { + log.warn("Seed query is blank, defaulting to null"); + return null; + } final QParser seedParser = subQuery(seed, null); return seedParser.getQuery(); } @@ -106,6 +113,11 @@ public Query parse() throws SyntaxError { final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); return denseVectorType.getKnnVectorQuery( - schemaField.getName(), vectorToSearch, topK, getFilterQuery(), getEarlyTerminationParams(), getSeedQuery()); + schemaField.getName(), + vectorToSearch, + topK, + getFilterQuery(), + getSeedQuery(), + getEarlyTerminationParams()); } } diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java index 5847d0009b8..c07c5e0d1f0 100644 --- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java @@ -1205,7 +1205,6 @@ public void knnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { // (float). String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - // BooleanQuery assertQ( req( CommonParams.Q, @@ -1216,30 +1215,6 @@ public void knnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { "true"), "//result[@numFound='4']", "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); - - // SolrRangeQuery - assertQ( - req( - CommonParams.Q, - "{!knn f=vector topK=4 seed='id:[1 TO 5]'}" + vectorToSearch, - "fl", - "id", - "debugQuery", - "true"), - "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:[1 TO 5], seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); - - // Term Query - assertQ( - req( - CommonParams.Q, - "{!knn f=vector topK=4 seed='string_field:test'}" + vectorToSearch, - "fl", - "id", - "debugQuery", - "true"), - "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=string_field:test, seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); } @Test @@ -1263,7 +1238,7 @@ public void byteKnnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { } @Test - public void knnQueryWithBlackSeed_shouldPerformKnnFloatVectorQuery() { + public void knnQueryWithBlankSeed_shouldPerformKnnFloatVectorQuery() { // Test to verify that when the seed parameter is provided but blank, it is treated as null, and // no additional seed logic is applied. String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; @@ -1296,4 +1271,21 @@ public void knnQueryWithInvalidSeedQuery_shouldThrowException() { req(CommonParams.Q, "{!knn f=vector topK=4 seed='id:'}" + vectorToSearch), SolrException.ErrorCode.BAD_REQUEST); } + + // NOTE: This test will need to be updated once Solr upgrades to a Lucene version that includes + // the fix for issue #14688 + @Test + public void knnQueryWithBothSeedAndEarlyTermination_shouldThrowException() { + // Test to verify that when both the seed and the early termination parameters are provided, + // Solr throws a BAD_REQUEST exception. + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + assertQEx( + "Currently seed and earlyTermination parameters cannot be used together", + "Seeded queries and early termination cannot be used together. This limitation is due to Lucene issue #14688, which is not yet included in the current version.", + req( + CommonParams.Q, + "{!knn f=vector topK=4 seed='id:(1 4 7 8 9)' earlyTermination=true}" + vectorToSearch), + SolrException.ErrorCode.BAD_REQUEST); + } } diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index d0c3eb4bd32..795d38c7860 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -47,7 +47,7 @@ The strategy implemented in Apache Lucene and used by Apache Solr is based on Na It provides efficient approximate nearest neighbor search for high dimensional vectors. -See https://doi.org/10.1016/j.is.2013.10.006[Approximate nearest neighbor algorithm based on navigable small world graphs [2014]] and https://arxiv.org/abs/1603.09320[Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs [2018]] for details. +See https://doi.org/10.1016/j.is.2013.10.006[Approximate nearest neighbor algorithm based on navigable small world graphs (2014)] and https://arxiv.org/abs/1603.09320[Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs (2018)] for details. == Index Time @@ -391,7 +391,7 @@ The search results retrieved are the k=10 nearest documents to the vector in inp |Optional |Default: `false` |=== + -Early termination is an HNSW optimization. Solr relies on the Lucene’s implementation of early termination for kNN queries, based on https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf[Patience in Proximity: A Simple Early Termination Strategy for HNSW Graph Traversal in Approximate k-Nearest Neighbor Search]. +Early termination is an HNSW optimization. Solr relies on the Lucene’s implementation of early termination for kNN queries, based on https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf[Patience in Proximity: A Simple Early Termination Strategy for HNSW Graph Traversal in Approximate k-Nearest Neighbor Search (2025)]. + When enabled (true), the search may exit early when the HNSW candidate queue remains saturated over a threshold (saturationThreshold) for more than a given number of iterations (patience). Refer to the two parameters below for more details. + @@ -439,7 +439,7 @@ Here's an example of a `knn` search using the early termination with input param |Optional |Default: none |=== + -A query seed to initiate the vector search, i.e. entry points in the HNSW graph exploration. Solr relies on Lucene’s implementation of https://lucene.apache.org/core/10_2_0/core/org/apache/lucene/search/SeededKnnVectorQuery.html[org.apache.lucene.search.SeededKnnVectorQuery]. +A query seed to initiate the vector search, i.e. entry points in the HNSW graph exploration. Solr relies on the Lucene’s implementation of SeededKnnVectorQuery based on https://arxiv.org/pdf/2307.16779[Lexically-Accelerated Dense Retrieval (2023)]. + The seed can be any valid Solr query, letting traditional query logic guide the vector search in a hybrid-like way. @@ -451,6 +451,10 @@ Here is an example of a `knn` search using a `seed` query: The search results retrieved are the k=10 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`. Documents matching the query `id:(1 4 10)` are used as entry points for the ANN search. If no documents match the seed, Solr falls back to a regular knn search without seeding, starting instead from random entry points. +[NOTE] +Currently seed and earlyTermination parameters (including saturationThreshold and patience) cannot be used together. +Solr must be upgraded to a Lucene version that includes this patch https://github.com/apache/lucene/pull/14688[Lucene #14688]. + === knn_text_to_vector Query Parser The `knn_text_to_vector` query parser encode a textual query to a vector using a dedicated Large Language Model(fine tuned for the task of encoding text to vector for sentence similarity) and matches k-nearest neighbours documents to such query vector. From f0bdb83666aed75de9ef2594d91c45e2aa5b9a1c Mon Sep 17 00:00:00 2001 From: ilariapet Date: Fri, 3 Oct 2025 14:24:06 +0200 Subject: [PATCH 04/11] Addressed comments after the review with Alessandro Benedetti --- .../apache/solr/search/neural/KnnQParser.java | 16 ++--- .../solr/search/neural/KnnQParserTest.java | 67 ++++++++++++------- .../pages/dense-vector-search.adoc | 9 ++- 3 files changed, 52 insertions(+), 40 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java index ce63088e2ad..664e6f341c9 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java @@ -16,7 +16,6 @@ */ package org.apache.solr.search.neural; -import java.lang.invoke.MethodHandles; import java.util.Optional; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; @@ -26,24 +25,20 @@ import org.apache.solr.schema.SchemaField; import org.apache.solr.search.QParser; import org.apache.solr.search.SyntaxError; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class KnnQParser extends AbstractVectorQParserBase { // retrieve the top K results based on the distance similarity function protected static final String TOP_K = "topK"; protected static final int DEFAULT_TOP_K = 10; - protected static final String SEED = "seed"; + protected static final String SEED_QUERY = "seedQuery"; // parameters for PatienceKnnVectorQuery, a version of knn vector query that exits early when HNSW - // queue - // saturates over a {@code #saturationThreshold} for more than {@code #patience} times. + // queue saturates over a {@code #saturationThreshold} for more than {@code #patience} times. protected static final String EARLY_TERMINATION = "earlyTermination"; protected static final boolean DEFAULT_EARLY_TERMINATION = false; protected static final String SATURATION_THRESHOLD = "saturationThreshold"; protected static final String PATIENCE = "patience"; - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public KnnQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); @@ -95,11 +90,12 @@ public EarlyTerminationParams getEarlyTerminationParams() { } protected Query getSeedQuery() throws SolrException, SyntaxError { - String seed = localParams.get(SEED); + String seed = localParams.get(SEED_QUERY); if (seed == null) return null; if (seed.isBlank()) { - log.warn("Seed query is blank, defaulting to null"); - return null; + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "'seedQuery' parameter is present but is blank: please provide a valid query"); } final QParser seedParser = subQuery(seed, null); return seedParser.getQuery(); diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java index c07c5e0d1f0..5c4836415b7 100644 --- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java @@ -1201,14 +1201,15 @@ public void onlyOneInputParam_shouldThrowException() { @Test public void knnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { - // Test to verify that when the seed parameter is provided, the SeededKnnVectorQuery is executed + // Test to verify that when the seedQuery parameter is provided, the SeededKnnVectorQuery is + // executed // (float). String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; assertQ( req( CommonParams.Q, - "{!knn f=vector topK=4 seed='id:(1 4 7 8 9)'}" + vectorToSearch, + "{!knn f=vector topK=4 seedQuery='id:(1 4 7 8 9)'}" + vectorToSearch, "fl", "id", "debugQuery", @@ -1219,7 +1220,8 @@ public void knnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { @Test public void byteKnnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { - // Test to verify that when the seed parameter is provided, the SeededKnnVectorQuery is executed + // Test to verify that when the seedQuery parameter is provided, the SeededKnnVectorQuery is + // executed // (byte). String vectorToSearch = "[2, 2, 1, 3]"; @@ -1228,7 +1230,7 @@ public void byteKnnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { assertQ( req( CommonParams.Q, - "{!knn f=vector_byte_encoding topK=4 seed='id:(1 4 7 8 9)'}" + vectorToSearch, + "{!knn f=vector_byte_encoding topK=4 seedQuery='id:(1 4 7 8 9)'}" + vectorToSearch, "fl", "id", "debugQuery", @@ -1238,45 +1240,59 @@ public void byteKnnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { } @Test - public void knnQueryWithBlankSeed_shouldPerformKnnFloatVectorQuery() { - // Test to verify that when the seed parameter is provided but blank, it is treated as null, and - // no additional seed logic is applied. + public void knnQueryWithBlankSeed_shouldThrowException() { + // Test to verify that when the seedQuery parameter is provided but blank, Solr throws a + // BAD_REQUEST exception. String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - assertQ( - req( - CommonParams.Q, - "{!knn f=vector topK=4 seed=''}" + vectorToSearch, - "fl", - "id", - "debugQuery", - "true"), - "//result[@numFound='4']", - "//result/doc[1]/str[@name='id'][.='1']", - "//result/doc[2]/str[@name='id'][.='4']", - "//result/doc[3]/str[@name='id'][.='2']", - "//result/doc[4]/str[@name='id'][.='10']", - "//str[@name='parsedquery'][.='KnnFloatVectorQuery(KnnFloatVectorQuery:vector[1.0,...][4])']"); + assertQEx( + "Blank seed query should throw Exception", + "'seedQuery' parameter is present but is blank: please provide a valid query", + req(CommonParams.Q, "{!knn f=vector topK=4 seedQuery=''}" + vectorToSearch), + SolrException.ErrorCode.BAD_REQUEST); } @Test public void knnQueryWithInvalidSeedQuery_shouldThrowException() { - // Test to verify that when the seed parameter is provided with an invalid value, Solr throws a + // Test to verify that when the seedQuery parameter is provided with an invalid value, Solr + // throws a // BAD_REQUEST exception. String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; assertQEx( "Invalid seed query should throw Exception", "Cannot parse 'id:'", - req(CommonParams.Q, "{!knn f=vector topK=4 seed='id:'}" + vectorToSearch), + req(CommonParams.Q, "{!knn f=vector topK=4 seedQuery='id:'}" + vectorToSearch), SolrException.ErrorCode.BAD_REQUEST); } + @Test + public void knnQueryWithKnnSeedQuery_shouldPerformSeededKnnVectorQuery() { + // Test to verify that when the seedQuery parameter itself is a knn query, it is correctly + // parsed and applied as the seed for the main knn query. + String mainVectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + String seedVectorToSearch = "[0.1, 0.2, 0.3, 0.4]"; + + assertQ( + req( + CommonParams.Q, + "{!knn f=vector topK=4 seedQuery=$seedQuery}" + mainVectorToSearch, + "seedQuery", + "{!knn f=vector topK=4}" + seedVectorToSearch, + "fl", + "id", + "debugQuery", + "true"), + "//result[@numFound='4']", + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=KnnFloatVectorQuery:vector[0.1,...][4], seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); + } + // NOTE: This test will need to be updated once Solr upgrades to a Lucene version that includes // the fix for issue #14688 @Test public void knnQueryWithBothSeedAndEarlyTermination_shouldThrowException() { - // Test to verify that when both the seed and the early termination parameters are provided, + // Test to verify that when both the seedQuery and the early termination parameters are + // provided, // Solr throws a BAD_REQUEST exception. String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; @@ -1285,7 +1301,8 @@ public void knnQueryWithBothSeedAndEarlyTermination_shouldThrowException() { "Seeded queries and early termination cannot be used together. This limitation is due to Lucene issue #14688, which is not yet included in the current version.", req( CommonParams.Q, - "{!knn f=vector topK=4 seed='id:(1 4 7 8 9)' earlyTermination=true}" + vectorToSearch), + "{!knn f=vector topK=4 seedQuery='id:(1 4 7 8 9)' earlyTermination=true}" + + vectorToSearch), SolrException.ErrorCode.BAD_REQUEST); } } diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index 795d38c7860..f025b48e875 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -432,7 +432,7 @@ Here's an example of a `knn` search using the early termination with input param [source,text] ?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989 patience=10}[1.0, 2.0, 3.0, 4.0] -`seed`:: +`seedQuery`:: + [%autowidth,frame=none] |=== @@ -441,13 +441,12 @@ Here's an example of a `knn` search using the early termination with input param + A query seed to initiate the vector search, i.e. entry points in the HNSW graph exploration. Solr relies on the Lucene’s implementation of SeededKnnVectorQuery based on https://arxiv.org/pdf/2307.16779[Lexically-Accelerated Dense Retrieval (2023)]. + -The seed can be any valid Solr query, letting traditional query logic guide the vector search in a hybrid-like way. +The seedQuery is primarily intended to be a lexical query, guiding the vector search in a hybrid-like way through traditional query logic. Although a knn query can also be used as a seed — which might make sense in specific scenarios and has been verified by a dedicated test — this approach is not considered a best practice. - -Here is an example of a `knn` search using a `seed` query: +Here is an example of a `knn` search using a `seedQuery`: [source,text] -?q={!knn f=vector topK=10 seed='id:(1 4 10)'}[1.0, 2.0, 3.0, 4.0] +?q={!knn f=vector topK=10 seedQuery='id:(1 4 10)'}[1.0, 2.0, 3.0, 4.0] The search results retrieved are the k=10 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`. Documents matching the query `id:(1 4 10)` are used as entry points for the ANN search. If no documents match the seed, Solr falls back to a regular knn search without seeding, starting instead from random entry points. From 07b6427453f6f5bb8f31c53a48196df146bf2d7a Mon Sep 17 00:00:00 2001 From: ilariapet Date: Thu, 16 Oct 2025 11:59:45 +0200 Subject: [PATCH 05/11] Adjustment after the merge of SOLR-17917: Lucene 10.3 upgrade --- .../apache/solr/schema/DenseVectorField.java | 41 ++++++++++--------- .../solr/search/neural/KnnQParserTest.java | 36 +++++++++++----- .../pages/dense-vector-search.adoc | 18 ++++---- 3 files changed, 54 insertions(+), 41 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 7b35df52b62..1ef1caca3be 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -384,7 +384,7 @@ public Query getKnnVectorQuery( DenseVectorParser vectorBuilder = getVectorBuilder(vectorToSearch, DenseVectorParser.BuilderPhase.QUERY); - Query knnQuery = + final Query knnQuery = switch (vectorEncoding) { case FLOAT32 -> new KnnFloatVectorQuery( fieldName, vectorBuilder.getFloatVector(), topK, filterQuery); @@ -392,25 +392,22 @@ public Query getKnnVectorQuery( fieldName, vectorBuilder.getByteVector(), topK, filterQuery); }; - // NOTE: Currently seed and earlyTermination parameters cannot be used together due to - // https://github.com/apache/lucene/pull/14688 - // PatienceKnnVectorQuery does not rewrite its SeededKnnVectorQuery delegate, leaving seedWeight - // uninitialized and triggering a NullPointerException. - // Solr must be upgraded to a Lucene version that includes this patch. - if (seedQuery != null && earlyTermination.isEnabled()) { - throw new SolrException( - SolrException.ErrorCode.BAD_REQUEST, - "Seeded queries and early termination cannot be used together. " - + "This limitation is due to Lucene issue #14688, which is not yet included in the current version."); - } - - if (seedQuery != null) { - return applySeeded(knnQuery, seedQuery); - } - if (earlyTermination.isEnabled()) { - return applyEarlyTermination(knnQuery, earlyTermination); - } - return knnQuery; + final boolean seedEnabled = (seedQuery != null); + final boolean earlyTerminationEnabled = + (earlyTermination != null && earlyTermination.isEnabled()); + + return switch ((seedEnabled ? 1 : 0) | (earlyTerminationEnabled ? 2 : 0)) { + // 0: no seed, no early termination -> knnQuery + case 0 -> knnQuery; + // 1: only seed -> Seeded(knnQuery) + case 1 -> applySeeded(knnQuery, seedQuery); + // 2: only early termination -> Patience(knnQuery) + case 2 -> applyEarlyTermination(knnQuery, earlyTermination); + // 3: seed + early termination -> Patience(Seeded(knnQuery)) + case 3 -> applyEarlyTermination(applySeeded(knnQuery, seedQuery), earlyTermination); + default -> throw new IllegalStateException( + "Unexpected combination of seedQuery and early termination parameters"); + }; } /** @@ -472,6 +469,10 @@ private static Query applyEarlyTermination( earlyTermination.getSaturationThreshold(), earlyTermination.getPatience()) : PatienceKnnVectorQuery.fromByteQuery(knnByteQuery); + case SeededKnnVectorQuery seedQuery -> useExplicitParams + ? PatienceKnnVectorQuery.fromSeededQuery( + seedQuery, earlyTermination.getSaturationThreshold(), earlyTermination.getPatience()) + : PatienceKnnVectorQuery.fromSeededQuery(seedQuery); default -> knnQuery; }; } diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java index 5c4836415b7..7648167881f 100644 --- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java @@ -1287,22 +1287,36 @@ public void knnQueryWithKnnSeedQuery_shouldPerformSeededKnnVectorQuery() { "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=KnnFloatVectorQuery:vector[0.1,...][4], seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); } - // NOTE: This test will need to be updated once Solr upgrades to a Lucene version that includes - // the fix for issue #14688 @Test - public void knnQueryWithBothSeedAndEarlyTermination_shouldThrowException() { - // Test to verify that when both the seedQuery and the early termination parameters are - // provided, - // Solr throws a BAD_REQUEST exception. + public void + knnQueryWithBothSeedAndEarlyTermination_shouldPerformPatienceKnnVectorQueryFromSeeded() { + // Test to verify that when both the seed and the early termination parameters are provided, the + // PatienceKnnVectorQuery is executed + // using the SeededKnnVectorQuery. String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; - assertQEx( - "Currently seed and earlyTermination parameters cannot be used together", - "Seeded queries and early termination cannot be used together. This limitation is due to Lucene issue #14688, which is not yet included in the current version.", + assertQ( req( CommonParams.Q, "{!knn f=vector topK=4 seedQuery='id:(1 4 7 8 9)' earlyTermination=true}" - + vectorToSearch), - SolrException.ErrorCode.BAD_REQUEST); + + vectorToSearch, + "fl", + "id", + "debugQuery", + "true"), + // Verify that 4 documents are returned + "//result[@numFound='4']", + // Verify that the parsed query is a nested PatienceKnnVectorQuery wrapping a + // SeededKnnVectorQuery + "//str[@name='parsedquery'][contains(.,'PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=0.995, patience=7, delegate=SeededKnnVectorQuery{')]", + // Verify that the seed query contains the expected document IDs + "//str[@name='parsedquery'][contains(.,'seed=id:1 id:4 id:7 id:8 id:9')]", + // Verify that a seedWeight field is present — its value (BooleanWeight@) includes a + // hash code + // that changes on each run, so it cannot be asserted explicitly + "//str[@name='parsedquery'][contains(.,'seedWeight=')]", + // Verify that the final delegate is a KnnFloatVectorQuery with the expected vector and topK + // value + "//str[@name='parsedquery'][contains(.,'delegate=KnnFloatVectorQuery:vector[1.0,...][4]')]"); } } diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index c6a56376a7e..d4dd68ee74f 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -467,6 +467,8 @@ Here's an example of a `knn` search using the early termination with input param A query seed to initiate the vector search, i.e. entry points in the HNSW graph exploration. Solr relies on the Lucene’s implementation of SeededKnnVectorQuery based on https://arxiv.org/pdf/2307.16779[Lexically-Accelerated Dense Retrieval (2023)]. + The seedQuery is primarily intended to be a lexical query, guiding the vector search in a hybrid-like way through traditional query logic. Although a knn query can also be used as a seed — which might make sense in specific scenarios and has been verified by a dedicated test — this approach is not considered a best practice. ++ +The seedQuery can also be used in combination with earlyTermination. Here is an example of a `knn` search using a `seedQuery`: @@ -475,10 +477,6 @@ Here is an example of a `knn` search using a `seedQuery`: The search results retrieved are the k=10 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`. Documents matching the query `id:(1 4 10)` are used as entry points for the ANN search. If no documents match the seed, Solr falls back to a regular knn search without seeding, starting instead from random entry points. -[NOTE] -Currently seed and earlyTermination parameters (including saturationThreshold and patience) cannot be used together. -Solr must be upgraded to a Lucene version that includes this patch https://github.com/apache/lucene/pull/14688[Lucene #14688]. - === knn_text_to_vector Query Parser The `knn_text_to_vector` query parser encode a textual query to a vector using a dedicated Large Language Model(fine tuned for the task of encoding text to vector for sentence similarity) and matches k-nearest neighbours documents to such query vector. @@ -846,7 +844,7 @@ cat > cuvs_configset/conf/solrconfig.xml << 'EOF' 10.0.0 ${solr.data.dir:} - + ${solr.ulog.dir:} @@ -875,7 +873,7 @@ cat > cuvs_configset/conf/solrconfig.xml << 'EOF' 10 - + EOF @@ -887,16 +885,16 @@ cat > cuvs_configset/conf/managed-schema << 'EOF' - - + id EOF From a34f17641be5b0110e6838723560410d9675eb04 Mon Sep 17 00:00:00 2001 From: ilariapet Date: Mon, 20 Oct 2025 10:09:55 +0200 Subject: [PATCH 06/11] Updated documentation --- .../modules/query-guide/pages/dense-vector-search.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index d4dd68ee74f..82ae6648a05 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -464,7 +464,7 @@ Here's an example of a `knn` search using the early termination with input param |Optional |Default: none |=== + -A query seed to initiate the vector search, i.e. entry points in the HNSW graph exploration. Solr relies on the Lucene’s implementation of SeededKnnVectorQuery based on https://arxiv.org/pdf/2307.16779[Lexically-Accelerated Dense Retrieval (2023)]. +A query seed to initiate the vector search, i.e. entry points in the HNSW graph exploration. Solr relies on Lucene’s implementation of {lucene-javadocs}/core/org/apache/lucene/search/SeededKnnVectorQuery.html[SeededKnnVectorQuery] based on https://arxiv.org/pdf/2307.16779[Lexically-Accelerated Dense Retrieval (2023)]. + The seedQuery is primarily intended to be a lexical query, guiding the vector search in a hybrid-like way through traditional query logic. Although a knn query can also be used as a seed — which might make sense in specific scenarios and has been verified by a dedicated test — this approach is not considered a best practice. + From 77baf8e501a66d39b631f96ff97ba7bc9bb83592 Mon Sep 17 00:00:00 2001 From: ilariapet Date: Mon, 20 Oct 2025 10:54:01 +0200 Subject: [PATCH 07/11] Renaming --- .../java/org/apache/solr/schema/DenseVectorField.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 1ef1caca3be..7861f6bfd32 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -400,11 +400,11 @@ public Query getKnnVectorQuery( // 0: no seed, no early termination -> knnQuery case 0 -> knnQuery; // 1: only seed -> Seeded(knnQuery) - case 1 -> applySeeded(knnQuery, seedQuery); + case 1 -> getSeededQuery(knnQuery, seedQuery); // 2: only early termination -> Patience(knnQuery) - case 2 -> applyEarlyTermination(knnQuery, earlyTermination); + case 2 -> getEarlyTerminationQuery(knnQuery, earlyTermination); // 3: seed + early termination -> Patience(Seeded(knnQuery)) - case 3 -> applyEarlyTermination(applySeeded(knnQuery, seedQuery), earlyTermination); + case 3 -> getEarlyTerminationQuery(getSeededQuery(knnQuery, seedQuery), earlyTermination); default -> throw new IllegalStateException( "Unexpected combination of seedQuery and early termination parameters"); }; @@ -441,17 +441,18 @@ public SortField getSortField(SchemaField field, boolean top) { SolrException.ErrorCode.BAD_REQUEST, "Cannot sort on a Dense Vector field"); } - private static Query applySeeded(Query knnQuery, Query seed) { + private static Query getSeededQuery(Query knnQuery, Query seed) { return switch (knnQuery) { case KnnFloatVectorQuery knnFloatQuery -> SeededKnnVectorQuery.fromFloatQuery( knnFloatQuery, seed); case KnnByteVectorQuery knnByteQuery -> SeededKnnVectorQuery.fromByteQuery( knnByteQuery, seed); + default -> knnQuery; }; } - private static Query applyEarlyTermination( + private static Query getEarlyTerminationQuery( Query knnQuery, EarlyTerminationParams earlyTermination) { final boolean useExplicitParams = (earlyTermination.getSaturationThreshold() != null From bd6de7d8b04500519c78684b14ffc1f30ea6000a Mon Sep 17 00:00:00 2001 From: ilariapet Date: Mon, 20 Oct 2025 11:10:02 +0200 Subject: [PATCH 08/11] Refactoring switch case --- .../src/java/org/apache/solr/schema/DenseVectorField.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 7861f6bfd32..2bb8bd17317 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -396,17 +396,16 @@ public Query getKnnVectorQuery( final boolean earlyTerminationEnabled = (earlyTermination != null && earlyTermination.isEnabled()); - return switch ((seedEnabled ? 1 : 0) | (earlyTerminationEnabled ? 2 : 0)) { + int caseNumber = (seedEnabled ? 1 : 0) + (earlyTerminationEnabled ? 2 : 0); + return switch (caseNumber) { // 0: no seed, no early termination -> knnQuery - case 0 -> knnQuery; + default -> knnQuery; // 1: only seed -> Seeded(knnQuery) case 1 -> getSeededQuery(knnQuery, seedQuery); // 2: only early termination -> Patience(knnQuery) case 2 -> getEarlyTerminationQuery(knnQuery, earlyTermination); // 3: seed + early termination -> Patience(Seeded(knnQuery)) case 3 -> getEarlyTerminationQuery(getSeededQuery(knnQuery, seedQuery), earlyTermination); - default -> throw new IllegalStateException( - "Unexpected combination of seedQuery and early termination parameters"); }; } From 1fd2f153b118cf9a0ee42e30a55b007916800524 Mon Sep 17 00:00:00 2001 From: ilariapet Date: Mon, 20 Oct 2025 11:35:11 +0200 Subject: [PATCH 09/11] Removed static and throw exception for the default case --- .../org/apache/solr/schema/DenseVectorField.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 2bb8bd17317..7a24bd9377e 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -440,18 +440,19 @@ public SortField getSortField(SchemaField field, boolean top) { SolrException.ErrorCode.BAD_REQUEST, "Cannot sort on a Dense Vector field"); } - private static Query getSeededQuery(Query knnQuery, Query seed) { + private Query getSeededQuery(Query knnQuery, Query seed) { return switch (knnQuery) { case KnnFloatVectorQuery knnFloatQuery -> SeededKnnVectorQuery.fromFloatQuery( knnFloatQuery, seed); case KnnByteVectorQuery knnByteQuery -> SeededKnnVectorQuery.fromByteQuery( knnByteQuery, seed); - - default -> knnQuery; + default -> throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Invalid type of knn query"); }; } - private static Query getEarlyTerminationQuery( + private Query getEarlyTerminationQuery( Query knnQuery, EarlyTerminationParams earlyTermination) { final boolean useExplicitParams = (earlyTermination.getSaturationThreshold() != null @@ -473,7 +474,9 @@ private static Query getEarlyTerminationQuery( ? PatienceKnnVectorQuery.fromSeededQuery( seedQuery, earlyTermination.getSaturationThreshold(), earlyTermination.getPatience()) : PatienceKnnVectorQuery.fromSeededQuery(seedQuery); - default -> knnQuery; + default -> throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Invalid type of knn query"); }; } } From b149cc23006f58851db5445c1f254a22ba1cf84c Mon Sep 17 00:00:00 2001 From: ilariapet Date: Mon, 20 Oct 2025 11:36:39 +0200 Subject: [PATCH 10/11] Tidy --- .../java/org/apache/solr/schema/DenseVectorField.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 7a24bd9377e..771d11c5635 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -447,13 +447,11 @@ private Query getSeededQuery(Query knnQuery, Query seed) { case KnnByteVectorQuery knnByteQuery -> SeededKnnVectorQuery.fromByteQuery( knnByteQuery, seed); default -> throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - "Invalid type of knn query"); + SolrException.ErrorCode.SERVER_ERROR, "Invalid type of knn query"); }; } - private Query getEarlyTerminationQuery( - Query knnQuery, EarlyTerminationParams earlyTermination) { + private Query getEarlyTerminationQuery(Query knnQuery, EarlyTerminationParams earlyTermination) { final boolean useExplicitParams = (earlyTermination.getSaturationThreshold() != null && earlyTermination.getPatience() != null); @@ -475,8 +473,7 @@ private Query getEarlyTerminationQuery( seedQuery, earlyTermination.getSaturationThreshold(), earlyTermination.getPatience()) : PatienceKnnVectorQuery.fromSeededQuery(seedQuery); default -> throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - "Invalid type of knn query"); + SolrException.ErrorCode.SERVER_ERROR, "Invalid type of knn query"); }; } } From acc6cf818b0c695bd7b9cdbca108ea7d798ef2c1 Mon Sep 17 00:00:00 2001 From: ilariapet Date: Mon, 20 Oct 2025 11:47:17 +0200 Subject: [PATCH 11/11] Tidy --- .../apache/solr/search/neural/KnnQParserTest.java | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java index 7648167881f..cfa5d91da69 100644 --- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java @@ -1202,8 +1202,7 @@ public void onlyOneInputParam_shouldThrowException() { @Test public void knnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { // Test to verify that when the seedQuery parameter is provided, the SeededKnnVectorQuery is - // executed - // (float). + // executed (float). String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; assertQ( @@ -1221,8 +1220,7 @@ public void knnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { @Test public void byteKnnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { // Test to verify that when the seedQuery parameter is provided, the SeededKnnVectorQuery is - // executed - // (byte). + // executed (byte). String vectorToSearch = "[2, 2, 1, 3]"; @@ -1255,8 +1253,7 @@ public void knnQueryWithBlankSeed_shouldThrowException() { @Test public void knnQueryWithInvalidSeedQuery_shouldThrowException() { // Test to verify that when the seedQuery parameter is provided with an invalid value, Solr - // throws a - // BAD_REQUEST exception. + // throws a BAD_REQUEST exception. String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; assertQEx( @@ -1291,8 +1288,7 @@ public void knnQueryWithKnnSeedQuery_shouldPerformSeededKnnVectorQuery() { public void knnQueryWithBothSeedAndEarlyTermination_shouldPerformPatienceKnnVectorQueryFromSeeded() { // Test to verify that when both the seed and the early termination parameters are provided, the - // PatienceKnnVectorQuery is executed - // using the SeededKnnVectorQuery. + // PatienceKnnVectorQuery is executed using the SeededKnnVectorQuery. String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; assertQ( @@ -1312,8 +1308,7 @@ public void knnQueryWithKnnSeedQuery_shouldPerformSeededKnnVectorQuery() { // Verify that the seed query contains the expected document IDs "//str[@name='parsedquery'][contains(.,'seed=id:1 id:4 id:7 id:8 id:9')]", // Verify that a seedWeight field is present — its value (BooleanWeight@) includes a - // hash code - // that changes on each run, so it cannot be asserted explicitly + // hash code that changes on each run, so it cannot be asserted explicitly "//str[@name='parsedquery'][contains(.,'seedWeight=')]", // Verify that the final delegate is a KnnFloatVectorQuery with the expected vector and topK // value