From c7c2703c4e217d34d3876c6c3440bfb938ec02d6 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Sat, 10 Jan 2026 20:49:49 -0500 Subject: [PATCH 01/29] opus assisted first pass --- .../solr/handler/export/ExportWriter.java | 185 +++++++++++- .../conf/schema-sortingresponse.xml | 20 +- .../solr/handler/export/TestExportWriter.java | 274 ++++++++++++++++++ .../pages/exporting-result-sets.adoc | 9 +- 4 files changed, 472 insertions(+), 16 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index ba88e3ce7637..772e66077ac3 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -28,12 +28,21 @@ import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeSet; +import java.util.WeakHashMap; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.index.StoredFields; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -99,6 +108,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { public static final String BATCH_SIZE_PARAM = "batchSize"; public static final String QUEUE_SIZE_PARAM = "queueSize"; + public static final String INCLUDE_STORED_FIELDS_PARAM = "includeStoredFields"; public static final int DEFAULT_BATCH_SIZE = 30000; public static final int DEFAULT_QUEUE_SIZE = 150000; @@ -493,34 +503,61 @@ void writeDoc( public List getFieldWriters(String[] fields, SolrQueryRequest req) throws IOException { DocValuesIteratorCache dvIterCache = new DocValuesIteratorCache(req.getSearcher(), false); - SolrReturnFields solrReturnFields = new SolrReturnFields(fields, req); + boolean includeStoredFields = + req.getParams().getBool(INCLUDE_STORED_FIELDS_PARAM, false); List writers = new ArrayList<>(); + Set docValueFields = new LinkedHashSet<>(); + Map storedOnlyFields = new LinkedHashMap<>(); + for (String field : req.getSearcher().getFieldNames()) { if (!solrReturnFields.wantsField(field)) { continue; } SchemaField schemaField = req.getSchema().getField(field); - if (!schemaField.hasDocValues()) { - throw new IOException(schemaField + " must have DocValues to use this feature."); - } - boolean multiValued = schemaField.multiValued(); FieldType fieldType = schemaField.getType(); - FieldWriter writer; - if (fieldType instanceof SortableTextField && !schemaField.useDocValuesAsStored()) { - if (solrReturnFields.getRequestedFieldNames() != null + // Check if field can use DocValues + boolean canUseDocValues = schemaField.hasDocValues() + && (!(fieldType instanceof SortableTextField) || schemaField.useDocValuesAsStored()); + + if (canUseDocValues) { + // Prefer DocValues when available + docValueFields.add(field); + } else if (schemaField.stored()) { + // Field is stored-only (no usable DocValues) + if (includeStoredFields) { + storedOnlyFields.put(field, schemaField); + } else if (solrReturnFields.getRequestedFieldNames() != null && solrReturnFields.getRequestedFieldNames().contains(field)) { - // Explicitly requested field cannot be used due to not having useDocValuesAsStored=true, - // throw exception + // Explicitly requested field without DocValues and includeStoredFields=false + throw new IOException( + schemaField + + " must have DocValues to use this feature. " + + "Try setting includeStoredFields=true to retrieve this field from stored values."); + } + // Else: glob matched stored-only field without includeStoredFields - silently skip + } else if (solrReturnFields.getRequestedFieldNames() != null + && solrReturnFields.getRequestedFieldNames().contains(field)) { + // Explicitly requested field that has neither DocValues nor stored + if (fieldType instanceof SortableTextField && !schemaField.useDocValuesAsStored()) { throw new IOException( - schemaField + " Must have useDocValuesAsStored='true' to be used with export writer"); + schemaField + + " Must have useDocValuesAsStored='true' to be used with export writer"); } else { - // Glob pattern matched field cannot be used due to not having useDocValuesAsStored=true - continue; + throw new IOException(schemaField + " must have DocValues to use this feature."); } } + // Else: glob matched field with neither DocValues nor stored - silently skip + } + + // Process DocValues fields first + for (String field : docValueFields) { + SchemaField schemaField = req.getSchema().getField(field); + boolean multiValued = schemaField.multiValued(); + FieldType fieldType = schemaField.getType(); + FieldWriter writer; DocValuesIteratorCache.FieldDocValuesSupplier docValuesCache = dvIterCache.getSupplier(field); @@ -574,6 +611,18 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) } writers.add(writer); } + + // Add StoredFieldsWriter if there are stored-only fields to process + if (!storedOnlyFields.isEmpty()) { + if (log.isWarnEnabled()) { + log.warn( + "Export request includes stored-only fields {} which may significantly impact performance. " + + "Consider adding docValues to these fields for better export performance.", + storedOnlyFields.keySet()); + } + writers.add(new StoredFieldsWriter(storedOnlyFields)); + } + return writers; } @@ -856,4 +905,114 @@ public String getMessage() { return "Early Client Disconnect"; } } + + static class StoredFieldsWriter extends FieldWriter { + + private final Map fields; + private final ThreadLocal> storedFieldsMap = + new ThreadLocal<>(); + + public StoredFieldsWriter(Map fieldsToRead) { + this.fields = fieldsToRead; + } + + @Override + public boolean write( + SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out, int fieldIndex) + throws IOException { + WeakHashMap map = storedFieldsMap.get(); + if (map == null) { + map = new WeakHashMap<>(); + storedFieldsMap.set(map); + } + LeafReader reader = readerContext.reader(); + StoredFields storedFields = map.get(reader.getReaderCacheHelper().getKey()); + if (storedFields == null) { + storedFields = reader.storedFields(); + map.put(reader.getReaderCacheHelper().getKey(), storedFields); + } + ExportVisitor visitor = new ExportVisitor(out); + storedFields.document(sortDoc.docId, visitor); + visitor.flush(); + return false; + } + + class ExportVisitor extends StoredFieldVisitor { + + final EntryWriter out; + String lastFieldName; + List multiValue = null; + + public ExportVisitor(EntryWriter out) { + this.out = out; + } + + @Override + public void stringField(FieldInfo fieldInfo, String value) throws IOException { + var schemaField = fields.get(fieldInfo.name); + var fieldType = schemaField == null ? null : schemaField.getType(); + if (fieldType instanceof BoolField) { + // Convert "T"/"F" stored value to boolean true/false + addField(fieldInfo.name, "T".equals(value)); + } else { + addField(fieldInfo.name, value); + } + } + + @Override + public void intField(FieldInfo fieldInfo, int value) throws IOException { + addField(fieldInfo.name, value); + } + + @Override + public void longField(FieldInfo fieldInfo, long value) throws IOException { + var schemaField = fields.get(fieldInfo.name); + var fieldType = schemaField == null ? null : schemaField.getType(); + if (fieldType instanceof DateValueFieldType) { + Date date = new Date(value); + addField(fieldInfo.name, date); + } else { + addField(fieldInfo.name, value); + } + } + + @Override + public void floatField(FieldInfo fieldInfo, float value) throws IOException { + addField(fieldInfo.name, value); + } + + @Override + public void doubleField(FieldInfo fieldInfo, double value) throws IOException { + addField(fieldInfo.name, value); + } + + @Override + public Status needsField(FieldInfo fieldInfo) { + return fields.containsKey(fieldInfo.name) ? Status.YES : Status.NO; + } + + private void addField(String fieldName, T value) throws IOException { + if (fields.get(fieldName).multiValued()) { + if (fieldName.equals(lastFieldName)) { + multiValue.add(value); + } else { + if (multiValue != null) { + out.put(lastFieldName, multiValue); + } + multiValue = new ArrayList<>(); + lastFieldName = fieldName; + multiValue.add(value); + } + } else { + out.put(fieldName, value); + } + } + + private void flush() throws IOException { + if (lastFieldName != null && multiValue != null && !multiValue.isEmpty()) { + out.put(lastFieldName, multiValue); + } + } + } + } } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml index d821c3935f2b..1827da6b4d18 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml @@ -33,7 +33,7 @@ - + @@ -105,7 +105,7 @@ - + @@ -128,6 +128,22 @@ + + + + + + + + + + + + + + + + id diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java index f4836a93ce2f..154067184011 100644 --- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java @@ -1476,4 +1476,278 @@ private void addField(SolrInputDocument doc, String type, String value, boolean doc.addField("number_" + type + (mv ? "s" : "") + "_ni_t", value); doc.addField("number_" + type + (mv ? "s" : "") + "_ni_p", value); } + + @Test + public void testIncludeStoredFieldsExplicitRequest() throws Exception { + // Test that stored-only fields are returned when includeStoredFields=true + clearIndex(); + + assertU( + adoc( + "id", "1", + "intdv", "1", + "str_s_stored", "hello", + "num_i_stored", "42", + "num_l_stored", "1234567890123", + "num_f_stored", "3.14", + "num_d_stored", "2.71828", + "date_dt_stored", "2024-01-15T10:30:00Z", + "bool_b_stored", "true")); + assertU(commit()); + + // Request stored-only fields with includeStoredFields=true + String resp = + h.query( + req( + "q", "*:*", + "qt", "/export", + "fl", "id,str_s_stored,num_i_stored,num_l_stored,num_f_stored,num_d_stored,date_dt_stored,bool_b_stored", + "sort", "intdv asc", + "includeStoredFields", "true")); + + assertJsonEquals( + resp, + "{\n" + + " \"responseHeader\":{\"status\":0},\n" + + " \"response\":{\n" + + " \"numFound\":1,\n" + + " \"docs\":[{\n" + + " \"id\":\"1\",\n" + + " \"str_s_stored\":\"hello\",\n" + + " \"num_i_stored\":42,\n" + + " \"num_l_stored\":1234567890123,\n" + + " \"num_f_stored\":3.14,\n" + + " \"num_d_stored\":2.71828,\n" + + " \"date_dt_stored\":\"2024-01-15T10:30:00Z\",\n" + + " \"bool_b_stored\":true}]}}"); + } + + @Test + public void testIncludeStoredFieldsErrorWithoutParam() throws Exception { + // Test that error with hint is thrown when requesting stored-only field without includeStoredFields + clearIndex(); + + assertU(adoc("id", "1", "intdv", "1", "str_s_stored", "hello")); + assertU(commit()); + + // Request stored-only field without includeStoredFields=true should error + String resp = + h.query( + req( + "q", "*:*", + "qt", "/export", + "fl", "id,str_s_stored", + "sort", "intdv asc")); + + assertTrue( + "Expected error message to contain hint about includeStoredFields", + resp.contains("includeStoredFields=true")); + assertTrue( + "Expected error message to mention the field", + resp.contains("str_s_stored")); + } + + @Test + public void testIncludeStoredFieldsGlobSkipsWithoutParam() throws Exception { + // Test that glob pattern silently skips stored-only fields when includeStoredFields=false + clearIndex(); + + assertU( + adoc( + "id", "1", + "intdv", "1", + "stringdv", "docvalue_string", + "str_s_stored", "stored_string")); + assertU(commit()); + + // Glob fl=* without includeStoredFields should skip stored-only fields + String resp = + h.query( + req( + "q", "*:*", + "qt", "/export", + "fl", "id,intdv,stringdv,str_s_stored", + "sort", "intdv asc")); + + // Should error because str_s_stored is explicitly requested + assertTrue( + "Expected error for explicitly requested stored-only field", + resp.contains("str_s_stored")); + + // Now test with glob - should silently skip stored-only fields + resp = + h.query( + req( + "q", "*:*", + "qt", "/export", + "fl", "*", + "sort", "intdv asc")); + + // Should succeed and return only DocValues fields (no str_s_stored) + assertTrue("Expected successful response", resp.contains("\"status\":0")); + assertFalse( + "Should not contain stored-only field without includeStoredFields", + resp.contains("str_s_stored")); + assertTrue("Should contain DocValues field", resp.contains("stringdv")); + } + + @Test + public void testIncludeStoredFieldsGlobIncludesWithParam() throws Exception { + // Test that glob pattern includes stored-only fields when includeStoredFields=true + clearIndex(); + + assertU( + adoc( + "id", "1", + "intdv", "1", + "stringdv", "docvalue_string", + "str_s_stored", "stored_string")); + assertU(commit()); + + // Glob fl=* with includeStoredFields=true should include stored-only fields + String resp = + h.query( + req( + "q", "*:*", + "qt", "/export", + "fl", "*", + "sort", "intdv asc", + "includeStoredFields", "true")); + + assertTrue("Expected successful response", resp.contains("\"status\":0")); + assertTrue( + "Should contain stored-only field with includeStoredFields=true", + resp.contains("str_s_stored")); + assertTrue("Should contain DocValues field", resp.contains("stringdv")); + } + + @Test + public void testIncludeStoredFieldsPreferDocValues() throws Exception { + // Test that fields with both DocValues and stored use DocValues regardless of param + clearIndex(); + + // field2_i_p has both stored=true and docValues=true + assertU(adoc("id", "1", "field2_i_p", "100")); + assertU(adoc("id", "2", "field2_i_p", "200")); + assertU(commit()); + + // With includeStoredFields=false (default) + String resp1 = + h.query( + req( + "q", "*:*", + "qt", "/export", + "fl", "id,field2_i_p", + "sort", "field2_i_p asc")); + + // With includeStoredFields=true + String resp2 = + h.query( + req( + "q", "*:*", + "qt", "/export", + "fl", "id,field2_i_p", + "sort", "field2_i_p asc", + "includeStoredFields", "true")); + + // Both should return the same result (DocValues preferred in both cases) + assertJsonEquals(resp1, resp2); + + // Verify both have successful status and correct values + assertTrue("Expected successful response", resp1.contains("\"status\":0")); + assertTrue("Should contain field2_i_p", resp1.contains("field2_i_p")); + } + + @Test + public void testIncludeStoredFieldsMultiValued() throws Exception { + // Test that multi-valued stored-only fields work correctly + clearIndex(); + + assertU( + adoc( + "id", "1", + "intdv", "1", + "strs_ss_stored", "value1", + "strs_ss_stored", "value2", + "strs_ss_stored", "value3", + "nums_is_stored", "10", + "nums_is_stored", "20", + "nums_is_stored", "30")); + assertU(commit()); + + String resp = + h.query( + req( + "q", "*:*", + "qt", "/export", + "fl", "id,strs_ss_stored,nums_is_stored", + "sort", "intdv asc", + "includeStoredFields", "true")); + + assertTrue("Expected successful response", resp.contains("\"status\":0")); + // Multi-valued fields should be returned as arrays + assertTrue( + "Multi-valued string field should contain multiple values", + resp.contains("strs_ss_stored")); + assertTrue( + "Multi-valued int field should contain multiple values", + resp.contains("nums_is_stored")); + } + + @Test + public void testIncludeStoredFieldsAllTypes() throws Exception { + // Test all supported stored field types including Date + clearIndex(); + + assertU( + adoc( + "id", "1", + "intdv", "1", + "str_s_stored", "test_string", + "num_i_stored", "123", + "num_l_stored", "9876543210", + "num_f_stored", "1.5", + "num_d_stored", "2.5", + "date_dt_stored", "2025-12-25T00:00:00Z", + "bool_b_stored", "false")); + assertU( + adoc( + "id", "2", + "intdv", "2", + "str_s_stored", "another_string", + "num_i_stored", "456", + "num_l_stored", "1234567890", + "num_f_stored", "2.5", + "num_d_stored", "3.5", + "date_dt_stored", "2025-06-15T12:30:00Z", + "bool_b_stored", "true")); + assertU(commit()); + + String resp = + h.query( + req( + "q", "*:*", + "qt", "/export", + "fl", "id,str_s_stored,num_i_stored,num_l_stored,num_f_stored,num_d_stored,date_dt_stored,bool_b_stored", + "sort", "intdv asc", + "includeStoredFields", "true")); + + assertTrue("Expected successful response", resp.contains("\"status\":0")); + assertTrue("Should contain 2 docs", resp.contains("\"numFound\":2")); + + // Verify all field types are present + assertTrue("Should contain string field", resp.contains("str_s_stored")); + assertTrue("Should contain int field", resp.contains("num_i_stored")); + assertTrue("Should contain long field", resp.contains("num_l_stored")); + assertTrue("Should contain float field", resp.contains("num_f_stored")); + assertTrue("Should contain double field", resp.contains("num_d_stored")); + assertTrue("Should contain date field", resp.contains("date_dt_stored")); + assertTrue("Should contain boolean field", resp.contains("bool_b_stored")); + + // Verify specific values + assertTrue("Should contain string value", resp.contains("test_string")); + assertTrue("Should contain int value", resp.contains("123")); + assertTrue("Should contain long value", resp.contains("9876543210")); + assertTrue("Should contain date value", resp.contains("2025-12-25")); + } } diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index bbd31c7b358f..663426ee3e72 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -25,7 +25,9 @@ The cases where this functionality may be useful include: session analysis, dist == Field Requirements -All the fields being sorted and exported must have docValues set to `true`. +All the fields being sorted must have docValues set to `true`. +By default, fields in the field list (`fl`) must also have docValues. +However, you can include stored-only fields (fields without docValues) by setting the `includeStoredFields` parameter to `true`. For more information, see the section on xref:indexing-guide:docvalues.adoc[]. == The /export RequestHandler @@ -44,6 +46,11 @@ Filter queries are also supported. An optional parameter `batchSize` determines the size of the internal buffers for partial results. The default value is `30000` but users may want to specify smaller values to limit the memory use (at the cost of degraded performance) or higher values to improve export performance (the relationship is not linear and larger values don't bring proportionally larger performance increases). +An optional parameter `includeStoredFields` (default `false`) enables exporting fields that only have stored values (no docValues). +When set to `true`, fields without docValues but with stored values can be included in the field list (`fl`). +Note that retrieving stored fields may significantly impact export performance compared to docValues fields, as stored fields require additional I/O operations. +Fields that have both docValues and stored values will always use docValues for optimal performance, regardless of this parameter setting. + The supported response writers are `json` and `javabin`. For backward compatibility reasons `wt=xsort` is also supported as input, but `wt=xsort` behaves same as `wt=json`. The default output format is `json`. From 7a41252c5e0fda0dbebd4c1198a6cc6db7713773 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Mon, 12 Jan 2026 13:27:28 -0500 Subject: [PATCH 02/29] clean up cruft --- .../solr/handler/export/ExportWriter.java | 21 +-- .../solr/handler/export/TestExportWriter.java | 155 ++++++++---------- 2 files changed, 75 insertions(+), 101 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 772e66077ac3..296e7c9e0e27 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -504,8 +504,7 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) throws IOException { DocValuesIteratorCache dvIterCache = new DocValuesIteratorCache(req.getSearcher(), false); SolrReturnFields solrReturnFields = new SolrReturnFields(fields, req); - boolean includeStoredFields = - req.getParams().getBool(INCLUDE_STORED_FIELDS_PARAM, false); + boolean includeStoredFields = req.getParams().getBool(INCLUDE_STORED_FIELDS_PARAM, false); List writers = new ArrayList<>(); Set docValueFields = new LinkedHashSet<>(); @@ -519,8 +518,9 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) FieldType fieldType = schemaField.getType(); // Check if field can use DocValues - boolean canUseDocValues = schemaField.hasDocValues() - && (!(fieldType instanceof SortableTextField) || schemaField.useDocValuesAsStored()); + boolean canUseDocValues = + schemaField.hasDocValues() + && (!(fieldType instanceof SortableTextField) || schemaField.useDocValuesAsStored()); if (canUseDocValues) { // Prefer DocValues when available @@ -543,8 +543,7 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) // Explicitly requested field that has neither DocValues nor stored if (fieldType instanceof SortableTextField && !schemaField.useDocValuesAsStored()) { throw new IOException( - schemaField - + " Must have useDocValuesAsStored='true' to be used with export writer"); + schemaField + " Must have useDocValuesAsStored='true' to be used with export writer"); } else { throw new IOException(schemaField + " must have DocValues to use this feature."); } @@ -552,7 +551,6 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) // Else: glob matched field with neither DocValues nor stored - silently skip } - // Process DocValues fields first for (String field : docValueFields) { SchemaField schemaField = req.getSchema().getField(field); boolean multiValued = schemaField.multiValued(); @@ -612,14 +610,7 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) writers.add(writer); } - // Add StoredFieldsWriter if there are stored-only fields to process if (!storedOnlyFields.isEmpty()) { - if (log.isWarnEnabled()) { - log.warn( - "Export request includes stored-only fields {} which may significantly impact performance. " - + "Consider adding docValues to these fields for better export performance.", - storedOnlyFields.keySet()); - } writers.add(new StoredFieldsWriter(storedOnlyFields)); } @@ -953,7 +944,7 @@ public void stringField(FieldInfo fieldInfo, String value) throws IOException { var fieldType = schemaField == null ? null : schemaField.getType(); if (fieldType instanceof BoolField) { // Convert "T"/"F" stored value to boolean true/false - addField(fieldInfo.name, "T".equals(value)); + addField(fieldInfo.name, Boolean.valueOf(fieldType.indexedToReadable(value))); } else { addField(fieldInfo.name, value); } diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java index 154067184011..647e23879a94 100644 --- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java @@ -1495,13 +1495,13 @@ public void testIncludeStoredFieldsExplicitRequest() throws Exception { "bool_b_stored", "true")); assertU(commit()); - // Request stored-only fields with includeStoredFields=true String resp = h.query( req( "q", "*:*", "qt", "/export", - "fl", "id,str_s_stored,num_i_stored,num_l_stored,num_f_stored,num_d_stored,date_dt_stored,bool_b_stored", + "fl", + "id,str_s_stored,num_i_stored,num_l_stored,num_f_stored,num_d_stored,date_dt_stored,bool_b_stored", "sort", "intdv asc", "includeStoredFields", "true")); @@ -1524,7 +1524,8 @@ public void testIncludeStoredFieldsExplicitRequest() throws Exception { @Test public void testIncludeStoredFieldsErrorWithoutParam() throws Exception { - // Test that error with hint is thrown when requesting stored-only field without includeStoredFields + // Test that error with hint is thrown when requesting stored-only field without + // includeStoredFields clearIndex(); assertU(adoc("id", "1", "intdv", "1", "str_s_stored", "hello")); @@ -1542,9 +1543,7 @@ public void testIncludeStoredFieldsErrorWithoutParam() throws Exception { assertTrue( "Expected error message to contain hint about includeStoredFields", resp.contains("includeStoredFields=true")); - assertTrue( - "Expected error message to mention the field", - resp.contains("str_s_stored")); + assertTrue("Expected error message to mention the field", resp.contains("str_s_stored")); } @Test @@ -1560,7 +1559,7 @@ public void testIncludeStoredFieldsGlobSkipsWithoutParam() throws Exception { "str_s_stored", "stored_string")); assertU(commit()); - // Glob fl=* without includeStoredFields should skip stored-only fields + // Explicit fl with stored-only field should error String resp = h.query( req( @@ -1571,24 +1570,29 @@ public void testIncludeStoredFieldsGlobSkipsWithoutParam() throws Exception { // Should error because str_s_stored is explicitly requested assertTrue( - "Expected error for explicitly requested stored-only field", - resp.contains("str_s_stored")); + "Expected error for explicitly requested stored-only field", resp.contains("str_s_stored")); + assertTrue( + "Expected hint about includeStoredFields", resp.contains("includeStoredFields=true")); - // Now test with glob - should silently skip stored-only fields + // Now test with glob - should silently skip stored-only fields and succeed resp = h.query( req( "q", "*:*", "qt", "/export", - "fl", "*", + "fl", "id,intdv,stringdv", "sort", "intdv asc")); - // Should succeed and return only DocValues fields (no str_s_stored) - assertTrue("Expected successful response", resp.contains("\"status\":0")); - assertFalse( - "Should not contain stored-only field without includeStoredFields", - resp.contains("str_s_stored")); - assertTrue("Should contain DocValues field", resp.contains("stringdv")); + assertJsonEquals( + resp, + "{\n" + + " \"responseHeader\":{\"status\":0},\n" + + " \"response\":{\n" + + " \"numFound\":1,\n" + + " \"docs\":[{\n" + + " \"id\":\"1\",\n" + + " \"intdv\":1,\n" + + " \"stringdv\":\"docvalue_string\"}]}}"); } @Test @@ -1610,52 +1614,21 @@ public void testIncludeStoredFieldsGlobIncludesWithParam() throws Exception { req( "q", "*:*", "qt", "/export", - "fl", "*", + "fl", "id,intdv,stringdv,str_s_stored", "sort", "intdv asc", "includeStoredFields", "true")); - assertTrue("Expected successful response", resp.contains("\"status\":0")); - assertTrue( - "Should contain stored-only field with includeStoredFields=true", - resp.contains("str_s_stored")); - assertTrue("Should contain DocValues field", resp.contains("stringdv")); - } - - @Test - public void testIncludeStoredFieldsPreferDocValues() throws Exception { - // Test that fields with both DocValues and stored use DocValues regardless of param - clearIndex(); - - // field2_i_p has both stored=true and docValues=true - assertU(adoc("id", "1", "field2_i_p", "100")); - assertU(adoc("id", "2", "field2_i_p", "200")); - assertU(commit()); - - // With includeStoredFields=false (default) - String resp1 = - h.query( - req( - "q", "*:*", - "qt", "/export", - "fl", "id,field2_i_p", - "sort", "field2_i_p asc")); - - // With includeStoredFields=true - String resp2 = - h.query( - req( - "q", "*:*", - "qt", "/export", - "fl", "id,field2_i_p", - "sort", "field2_i_p asc", - "includeStoredFields", "true")); - - // Both should return the same result (DocValues preferred in both cases) - assertJsonEquals(resp1, resp2); - - // Verify both have successful status and correct values - assertTrue("Expected successful response", resp1.contains("\"status\":0")); - assertTrue("Should contain field2_i_p", resp1.contains("field2_i_p")); + assertJsonEquals( + resp, + "{\n" + + " \"responseHeader\":{\"status\":0},\n" + + " \"response\":{\n" + + " \"numFound\":1,\n" + + " \"docs\":[{\n" + + " \"id\":\"1\",\n" + + " \"intdv\":1,\n" + + " \"stringdv\":\"docvalue_string\",\n" + + " \"str_s_stored\":\"stored_string\"}]}}"); } @Test @@ -1684,14 +1657,16 @@ public void testIncludeStoredFieldsMultiValued() throws Exception { "sort", "intdv asc", "includeStoredFields", "true")); - assertTrue("Expected successful response", resp.contains("\"status\":0")); - // Multi-valued fields should be returned as arrays - assertTrue( - "Multi-valued string field should contain multiple values", - resp.contains("strs_ss_stored")); - assertTrue( - "Multi-valued int field should contain multiple values", - resp.contains("nums_is_stored")); + assertJsonEquals( + resp, + "{\n" + + " \"responseHeader\":{\"status\":0},\n" + + " \"response\":{\n" + + " \"numFound\":1,\n" + + " \"docs\":[{\n" + + " \"id\":\"1\",\n" + + " \"strs_ss_stored\":[\"value1\",\"value2\",\"value3\"],\n" + + " \"nums_is_stored\":[10,20,30]}]}}"); } @Test @@ -1728,26 +1703,34 @@ public void testIncludeStoredFieldsAllTypes() throws Exception { req( "q", "*:*", "qt", "/export", - "fl", "id,str_s_stored,num_i_stored,num_l_stored,num_f_stored,num_d_stored,date_dt_stored,bool_b_stored", + "fl", + "id,str_s_stored,num_i_stored,num_l_stored,num_f_stored,num_d_stored,date_dt_stored,bool_b_stored", "sort", "intdv asc", "includeStoredFields", "true")); - assertTrue("Expected successful response", resp.contains("\"status\":0")); - assertTrue("Should contain 2 docs", resp.contains("\"numFound\":2")); - - // Verify all field types are present - assertTrue("Should contain string field", resp.contains("str_s_stored")); - assertTrue("Should contain int field", resp.contains("num_i_stored")); - assertTrue("Should contain long field", resp.contains("num_l_stored")); - assertTrue("Should contain float field", resp.contains("num_f_stored")); - assertTrue("Should contain double field", resp.contains("num_d_stored")); - assertTrue("Should contain date field", resp.contains("date_dt_stored")); - assertTrue("Should contain boolean field", resp.contains("bool_b_stored")); - - // Verify specific values - assertTrue("Should contain string value", resp.contains("test_string")); - assertTrue("Should contain int value", resp.contains("123")); - assertTrue("Should contain long value", resp.contains("9876543210")); - assertTrue("Should contain date value", resp.contains("2025-12-25")); + assertJsonEquals( + resp, + "{\n" + + " \"responseHeader\":{\"status\":0},\n" + + " \"response\":{\n" + + " \"numFound\":2,\n" + + " \"docs\":[{\n" + + " \"id\":\"1\",\n" + + " \"str_s_stored\":\"test_string\",\n" + + " \"num_i_stored\":123,\n" + + " \"num_l_stored\":9876543210,\n" + + " \"num_f_stored\":1.5,\n" + + " \"num_d_stored\":2.5,\n" + + " \"date_dt_stored\":\"2025-12-25T00:00:00Z\",\n" + + " \"bool_b_stored\":false},\n" + + " {\n" + + " \"id\":\"2\",\n" + + " \"str_s_stored\":\"another_string\",\n" + + " \"num_i_stored\":456,\n" + + " \"num_l_stored\":1234567890,\n" + + " \"num_f_stored\":2.5,\n" + + " \"num_d_stored\":3.5,\n" + + " \"date_dt_stored\":\"2025-06-15T12:30:00Z\",\n" + + " \"bool_b_stored\":true}]}}"); } } From a16594d57e04cc0a8ae02f55a718fc2f636ed8bf Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Mon, 12 Jan 2026 14:46:11 -0500 Subject: [PATCH 03/29] change expected exception message and remove duplication --- .../org/apache/solr/handler/export/ExportWriter.java | 10 ++++++---- .../apache/solr/handler/export/TestExportWriter.java | 8 +++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 296e7c9e0e27..06b928462d13 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -521,6 +521,10 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) boolean canUseDocValues = schemaField.hasDocValues() && (!(fieldType instanceof SortableTextField) || schemaField.useDocValuesAsStored()); + Set requestFieldNames = + solrReturnFields.getRequestedFieldNames() == null + ? Set.of() + : solrReturnFields.getRequestedFieldNames(); if (canUseDocValues) { // Prefer DocValues when available @@ -529,8 +533,7 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) // Field is stored-only (no usable DocValues) if (includeStoredFields) { storedOnlyFields.put(field, schemaField); - } else if (solrReturnFields.getRequestedFieldNames() != null - && solrReturnFields.getRequestedFieldNames().contains(field)) { + } else if (requestFieldNames.contains(field)) { // Explicitly requested field without DocValues and includeStoredFields=false throw new IOException( schemaField @@ -538,8 +541,7 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) + "Try setting includeStoredFields=true to retrieve this field from stored values."); } // Else: glob matched stored-only field without includeStoredFields - silently skip - } else if (solrReturnFields.getRequestedFieldNames() != null - && solrReturnFields.getRequestedFieldNames().contains(field)) { + } else if (requestFieldNames.contains(field)) { // Explicitly requested field that has neither DocValues nor stored if (fieldType instanceof SortableTextField && !schemaField.useDocValuesAsStored()) { throw new IOException( diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java index 647e23879a94..55009dc4f45c 100644 --- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java @@ -952,7 +952,7 @@ private void testSortingOutput() throws Exception { s.contains("\"status\":400}")); assertTrue( "Should have a cause when exporting sortabledv_m, it does not have useDocValuesAsStored='true'", - s.contains("Must have useDocValuesAsStored='true' to be used with export writer")); + s.contains("includeStoredFields=true")); s = h.query( @@ -970,7 +970,7 @@ private void testSortingOutput() throws Exception { s.contains("\"status\":400}")); assertTrue( "Should have a cause when exporting sortabledv, it does not have useDocValuesAsStored='true'", - s.contains("Must have useDocValuesAsStored='true' to be used with export writer")); + s.contains("includeStoredFields=true")); } private void assertJsonEquals(String actual, String expected) { @@ -1292,9 +1292,7 @@ public void testExpr() throws Exception { assertTrue("doc doesn't have exception", doc.containsKey(StreamParams.EXCEPTION)); assertTrue( "wrong exception message", - doc.get(StreamParams.EXCEPTION) - .toString() - .contains("Must have useDocValuesAsStored='true'")); + doc.get(StreamParams.EXCEPTION).toString().contains("includeStoredFields=true")); } @Test From ec14338397fa9e43c52304a247b5d6981162b90e Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Wed, 14 Jan 2026 14:02:29 -0500 Subject: [PATCH 04/29] allow arbitrary index shift in field writer --- gradle/wrapper/gradle-wrapper.properties | 2 +- .../handler/export/DoubleFieldWriter.java | 8 ++--- .../solr/handler/export/ExportWriter.java | 35 ++++++++++--------- .../solr/handler/export/FieldWriter.java | 2 +- .../solr/handler/export/FloatFieldWriter.java | 8 ++--- .../solr/handler/export/IntFieldWriter.java | 8 ++--- .../solr/handler/export/LongFieldWriter.java | 8 ++--- .../solr/handler/export/MultiFieldWriter.java | 10 +++--- .../handler/export/StringFieldWriter.java | 8 ++--- 9 files changed, 45 insertions(+), 44 deletions(-) diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 9355b4155759..b1b2f74e8570 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip -networkTimeout=10000 +networkTimeout=60000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java index e439560894b4..8b211c6546bc 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java @@ -34,7 +34,7 @@ public DoubleFieldWriter( } @Override - public boolean write( + public int write( SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { double val; @@ -43,7 +43,7 @@ public boolean write( if (sortValue.isPresent()) { val = (double) sortValue.getCurrentValue(); } else { // empty-value - return false; + return 0; } } else { // field is not part of 'sort' param, but part of 'fl' param @@ -53,10 +53,10 @@ public boolean write( if (vals != null) { val = Double.longBitsToDouble(vals.longValue()); } else { - return false; + return 0; } } ew.put(this.field, val); - return true; + return 1; } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 06b928462d13..4a918ac6bd4d 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -115,9 +115,9 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { private static final FieldWriter EMPTY_FIELD_WRITER = new FieldWriter() { @Override - public boolean write( + public int write( SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out, int fieldIndex) { - return false; + return 0; } }; @@ -494,9 +494,7 @@ void writeDoc( LeafReaderContext context = leaves.get(ord); int fieldIndex = 0; for (FieldWriter fieldWriter : writers) { - if (fieldWriter.write(sortDoc, context, ew, fieldIndex)) { - ++fieldIndex; - } + fieldIndex += fieldWriter.write(sortDoc, context, ew, fieldIndex); } } @@ -507,8 +505,8 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) boolean includeStoredFields = req.getParams().getBool(INCLUDE_STORED_FIELDS_PARAM, false); List writers = new ArrayList<>(); - Set docValueFields = new LinkedHashSet<>(); - Map storedOnlyFields = new LinkedHashMap<>(); + Set docValueFields = new LinkedHashSet<>(); + Map storedFields = new LinkedHashMap<>(); for (String field : req.getSearcher().getFieldNames()) { if (!solrReturnFields.wantsField(field)) { @@ -528,11 +526,11 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) if (canUseDocValues) { // Prefer DocValues when available - docValueFields.add(field); + docValueFields.add(schemaField); } else if (schemaField.stored()) { // Field is stored-only (no usable DocValues) if (includeStoredFields) { - storedOnlyFields.put(field, schemaField); + storedFields.put(field, schemaField); } else if (requestFieldNames.contains(field)) { // Explicitly requested field without DocValues and includeStoredFields=false throw new IOException( @@ -553,8 +551,8 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) // Else: glob matched field with neither DocValues nor stored - silently skip } - for (String field : docValueFields) { - SchemaField schemaField = req.getSchema().getField(field); + for (SchemaField schemaField : docValueFields) { + String field = schemaField.getName(); boolean multiValued = schemaField.multiValued(); FieldType fieldType = schemaField.getType(); FieldWriter writer; @@ -612,8 +610,8 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) writers.add(writer); } - if (!storedOnlyFields.isEmpty()) { - writers.add(new StoredFieldsWriter(storedOnlyFields)); + if (!storedFields.isEmpty()) { + writers.add(new StoredFieldsWriter(storedFields)); } return writers; @@ -910,7 +908,7 @@ public StoredFieldsWriter(Map fieldsToRead) { } @Override - public boolean write( + public int write( SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out, int fieldIndex) throws IOException { WeakHashMap map = storedFieldsMap.get(); @@ -926,8 +924,7 @@ public boolean write( } ExportVisitor visitor = new ExportVisitor(out); storedFields.document(sortDoc.docId, visitor); - visitor.flush(); - return false; + return visitor.flush(); } class ExportVisitor extends StoredFieldVisitor { @@ -935,6 +932,7 @@ class ExportVisitor extends StoredFieldVisitor { final EntryWriter out; String lastFieldName; List multiValue = null; + int fieldsVisited; public ExportVisitor(EntryWriter out) { this.out = out; @@ -995,16 +993,19 @@ private void addField(String fieldName, T value) throws IOException { multiValue = new ArrayList<>(); lastFieldName = fieldName; multiValue.add(value); + fieldsVisited++; } } else { out.put(fieldName, value); + fieldsVisited++; } } - private void flush() throws IOException { + private int flush() throws IOException { if (lastFieldName != null && multiValue != null && !multiValue.isEmpty()) { out.put(lastFieldName, multiValue); } + return fieldsVisited; } } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java index 1923afb410f7..0d386237f538 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java @@ -22,7 +22,7 @@ import org.apache.solr.common.MapWriter; abstract class FieldWriter { - public abstract boolean write( + public abstract int write( SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out, int fieldIndex) throws IOException; } diff --git a/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java index a60c14e6b0ad..590d2d620237 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java @@ -34,7 +34,7 @@ public FloatFieldWriter( } @Override - public boolean write( + public int write( SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { float val; @@ -43,7 +43,7 @@ public boolean write( if (sortValue.isPresent()) { val = (float) sortValue.getCurrentValue(); } else { // empty-value - return false; + return 0; } } else { // field is not part of 'sort' param, but part of 'fl' param @@ -53,10 +53,10 @@ public boolean write( if (vals != null) { val = Float.intBitsToFloat((int) vals.longValue()); } else { - return false; + return 0; } } ew.put(this.field, val); - return true; + return 1; } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java index bf0396d4ab87..0fbc57863beb 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java @@ -34,7 +34,7 @@ public IntFieldWriter( } @Override - public boolean write( + public int write( SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { int val; @@ -43,7 +43,7 @@ public boolean write( if (sortValue.isPresent()) { val = (int) sortValue.getCurrentValue(); } else { // empty-value - return false; + return 0; } } else { // field is not part of 'sort' param, but part of 'fl' param @@ -53,10 +53,10 @@ public boolean write( if (vals != null) { val = (int) vals.longValue(); } else { - return false; + return 0; } } ew.put(this.field, val); - return true; + return 1; } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java index 7961549477cf..7146e95bfb83 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java @@ -35,7 +35,7 @@ public LongFieldWriter( } @Override - public boolean write( + public int write( SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { long val; @@ -44,7 +44,7 @@ public boolean write( if (sortValue.isPresent()) { val = (long) sortValue.getCurrentValue(); } else { // empty-value - return false; + return 0; } } else { // field is not part of 'sort' param, but part of 'fl' param @@ -54,11 +54,11 @@ public boolean write( if (vals != null) { val = vals.longValue(); } else { - return false; + return 0; } } doWrite(ew, val); - return true; + return 1; } protected void doWrite(MapWriter.EntryWriter ew, long val) throws IOException { diff --git a/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java index 7f5bdee4899f..39efcdbc8107 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java @@ -61,7 +61,7 @@ public MultiFieldWriter( } @Override - public boolean write( + public int write( SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out, int fieldIndex) throws IOException { if (this.fieldType.isPointField()) { @@ -69,7 +69,7 @@ public boolean write( docValuesCache.getSortedNumericDocValues( sortDoc.docId, readerContext.reader(), readerContext.ord); if (vals == null) { - return false; + return 0; } final SortedNumericDocValues docVals = vals; @@ -82,13 +82,13 @@ public boolean write( w.add(bitsToValue.apply(docVals.nextValue())); } }); - return true; + return 1; } else { SortedSetDocValues vals = docValuesCache.getSortedSetDocValues( sortDoc.docId, readerContext.reader(), readerContext.ord); if (vals == null) { - return false; + return 0; } final SortedSetDocValues docVals = vals; @@ -105,7 +105,7 @@ public boolean write( else w.add(fieldType.toObject(f)); } }); - return true; + return 1; } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java index 2f8d0963e3a1..02bdc93f661f 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java @@ -59,7 +59,7 @@ public StringFieldWriter( } @Override - public boolean write( + public int write( SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { StringValue stringValue = (StringValue) sortDoc.getSortValue(this.field); @@ -74,7 +74,7 @@ public boolean write( if (stringValue.currentOrd == -1) { // Null sort value - return false; + return 0; } if (this.lastOrd == stringValue.currentOrd) { @@ -89,7 +89,7 @@ public boolean write( docValuesCache.getSortedDocValues( sortDoc.docId, readerContext.reader(), readerContext.ord); if (vals == null) { - return false; + return 0; } int ord = vals.ordValue(); @@ -102,7 +102,7 @@ public boolean write( } writeBytes(ew, ref, fieldType); - return true; + return 1; } protected void writeBytes(MapWriter.EntryWriter ew, BytesRef ref, FieldType fieldType) From 58e9a5b5b8ec17bae6b3ead1e9514ad7a84258be Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Wed, 14 Jan 2026 15:55:58 -0500 Subject: [PATCH 05/29] add udvas test --- .../solr/handler/export/TestExportWriter.java | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java index 55009dc4f45c..e7f973370594 100644 --- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java @@ -1731,4 +1731,56 @@ public void testIncludeStoredFieldsAllTypes() throws Exception { + " \"date_dt_stored\":\"2025-06-15T12:30:00Z\",\n" + " \"bool_b_stored\":true}]}}"); } + + @Test + public void testIncludeStoredFieldsWithUseDocValuesAsStored() throws Exception { + // Test that fields with stored=true AND useDocValuesAsStored=true work correctly + // with includeStoredFields=true. The sortabledv_udvas and sortabledv_m_udvas fields + // in the schema have stored="true" and useDocValuesAsStored="true" + clearIndex(); + + // Use values that sort lexicographically in the same order to avoid issues with + // DocValues returning values in sorted order + assertU( + adoc( + "id", "1", + "intdv", "1", + "sortabledv_udvas", "single value text", + "sortabledv_m_udvas", "aaa multi value", + "sortabledv_m_udvas", "bbb multi value")); + assertU( + adoc( + "id", "2", + "intdv", "2", + "sortabledv_udvas", "another single value", + "sortabledv_m_udvas", "ccc another multi", + "sortabledv_m_udvas", "ddd another multi", + "sortabledv_m_udvas", "eee another multi")); + assertU(commit()); + + // Request with includeStoredFields=true - should retrieve the stored values + String resp = + h.query( + req( + "q", "*:*", + "qt", "/export", + "fl", "id,sortabledv_udvas,sortabledv_m_udvas", + "sort", "intdv asc", + "includeStoredFields", "true")); + + assertJsonEquals( + resp, + "{\n" + + " \"responseHeader\":{\"status\":0},\n" + + " \"response\":{\n" + + " \"numFound\":2,\n" + + " \"docs\":[{\n" + + " \"id\":\"1\",\n" + + " \"sortabledv_udvas\":\"single value text\",\n" + + " \"sortabledv_m_udvas\":[\"aaa multi value\",\"bbb multi value\"]},\n" + + " {\n" + + " \"id\":\"2\",\n" + + " \"sortabledv_udvas\":\"another single value\",\n" + + " \"sortabledv_m_udvas\":[\"ccc another multi\",\"ddd another multi\",\"eee another multi\"]}]}}"); + } } From 9d25470712fc5ad054c99b7686d8de20868f07e7 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 15 Jan 2026 16:07:58 -0500 Subject: [PATCH 06/29] avoid DV lookup if we're reading StoredFields --- .../solr/handler/export/ExportWriter.java | 6 +++ .../solr/handler/export/TestExportWriter.java | 54 +------------------ 2 files changed, 7 insertions(+), 53 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 4a918ac6bd4d..04915655c226 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -557,6 +557,12 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) FieldType fieldType = schemaField.getType(); FieldWriter writer; + if (schemaField.stored() && !storedFields.isEmpty()) { + // if we're reading StoredFields *anyway*, then we might as well avoid this extra DV lookup + storedFields.put(field, schemaField); + continue; + } + DocValuesIteratorCache.FieldDocValuesSupplier docValuesCache = dvIterCache.getSupplier(field); if (docValuesCache == null) { diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java index e7f973370594..7ea7871e584b 100644 --- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java @@ -1623,9 +1623,9 @@ public void testIncludeStoredFieldsGlobIncludesWithParam() throws Exception { + " \"response\":{\n" + " \"numFound\":1,\n" + " \"docs\":[{\n" - + " \"id\":\"1\",\n" + " \"intdv\":1,\n" + " \"stringdv\":\"docvalue_string\",\n" + + " \"id\":\"1\",\n" + " \"str_s_stored\":\"stored_string\"}]}}"); } @@ -1731,56 +1731,4 @@ public void testIncludeStoredFieldsAllTypes() throws Exception { + " \"date_dt_stored\":\"2025-06-15T12:30:00Z\",\n" + " \"bool_b_stored\":true}]}}"); } - - @Test - public void testIncludeStoredFieldsWithUseDocValuesAsStored() throws Exception { - // Test that fields with stored=true AND useDocValuesAsStored=true work correctly - // with includeStoredFields=true. The sortabledv_udvas and sortabledv_m_udvas fields - // in the schema have stored="true" and useDocValuesAsStored="true" - clearIndex(); - - // Use values that sort lexicographically in the same order to avoid issues with - // DocValues returning values in sorted order - assertU( - adoc( - "id", "1", - "intdv", "1", - "sortabledv_udvas", "single value text", - "sortabledv_m_udvas", "aaa multi value", - "sortabledv_m_udvas", "bbb multi value")); - assertU( - adoc( - "id", "2", - "intdv", "2", - "sortabledv_udvas", "another single value", - "sortabledv_m_udvas", "ccc another multi", - "sortabledv_m_udvas", "ddd another multi", - "sortabledv_m_udvas", "eee another multi")); - assertU(commit()); - - // Request with includeStoredFields=true - should retrieve the stored values - String resp = - h.query( - req( - "q", "*:*", - "qt", "/export", - "fl", "id,sortabledv_udvas,sortabledv_m_udvas", - "sort", "intdv asc", - "includeStoredFields", "true")); - - assertJsonEquals( - resp, - "{\n" - + " \"responseHeader\":{\"status\":0},\n" - + " \"response\":{\n" - + " \"numFound\":2,\n" - + " \"docs\":[{\n" - + " \"id\":\"1\",\n" - + " \"sortabledv_udvas\":\"single value text\",\n" - + " \"sortabledv_m_udvas\":[\"aaa multi value\",\"bbb multi value\"]},\n" - + " {\n" - + " \"id\":\"2\",\n" - + " \"sortabledv_udvas\":\"another single value\",\n" - + " \"sortabledv_m_udvas\":[\"ccc another multi\",\"ddd another multi\",\"eee another multi\"]}]}}"); - } } From cad960761a2cf2d3d299883d4c75f0795f3f0406 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 15 Jan 2026 16:44:12 -0500 Subject: [PATCH 07/29] revert ... overridden by mistake --- gradle/wrapper/gradle-wrapper.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index b1b2f74e8570..9355b4155759 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip -networkTimeout=60000 +networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists From 8f08d3580ee41f45572b30300f76929776793e13 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 15 Jan 2026 17:07:47 -0500 Subject: [PATCH 08/29] clean up comment --- .../test-files/solr/collection1/conf/schema-sortingresponse.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml index 1827da6b4d18..f7dbe80e7ded 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml @@ -128,7 +128,7 @@ - + From e064f6e2cda8a15fabf546d7604825ae3dafe00b Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 15 Jan 2026 17:10:07 -0500 Subject: [PATCH 09/29] better comment --- .../src/java/org/apache/solr/handler/export/ExportWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 04915655c226..4577256a1c07 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -545,7 +545,7 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) throw new IOException( schemaField + " Must have useDocValuesAsStored='true' to be used with export writer"); } else { - throw new IOException(schemaField + " must have DocValues to use this feature."); + throw new IOException(schemaField + " must have DocValues or be stored to use this feature."); } } // Else: glob matched field with neither DocValues nor stored - silently skip From 815892a3639d449d20649a80cecf57a2213df180 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 15 Jan 2026 17:21:36 -0500 Subject: [PATCH 10/29] actually test glob pattern --- .../src/java/org/apache/solr/handler/export/ExportWriter.java | 3 ++- .../test/org/apache/solr/handler/export/TestExportWriter.java | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 4577256a1c07..d4176cbd1a8f 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -545,7 +545,8 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) throw new IOException( schemaField + " Must have useDocValuesAsStored='true' to be used with export writer"); } else { - throw new IOException(schemaField + " must have DocValues or be stored to use this feature."); + throw new IOException( + schemaField + " must have DocValues or be stored to use this feature."); } } // Else: glob matched field with neither DocValues nor stored - silently skip diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java index 7ea7871e584b..5c23f4da8880 100644 --- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java @@ -1578,7 +1578,7 @@ public void testIncludeStoredFieldsGlobSkipsWithoutParam() throws Exception { req( "q", "*:*", "qt", "/export", - "fl", "id,intdv,stringdv", + "fl", "intdv,*", "sort", "intdv asc")); assertJsonEquals( @@ -1612,7 +1612,7 @@ public void testIncludeStoredFieldsGlobIncludesWithParam() throws Exception { req( "q", "*:*", "qt", "/export", - "fl", "id,intdv,stringdv,str_s_stored", + "fl", "*", "sort", "intdv asc", "includeStoredFields", "true")); From 093af42241d00ed9e9ab63aa30cde0f9efc4db51 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 15 Jan 2026 18:01:06 -0500 Subject: [PATCH 11/29] changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7c1a47aec42..28f330651efe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Added efSearch parameter to knn query, exposed efSearchScaleFactor that is used to calculate efSearch internally #17928 [SOLR-17928](https://issues.apache.org/jira/browse/SOLR-17928) (Puneet Ahuja) (Elia Porciani) - Support indexing primitive float[] values for DenseVectorField via JavaBin [SOLR-17948](https://issues.apache.org/jira/browse/SOLR-17948) (Puneet Ahuja) (Noble Paul) - Enable MergeOnFlushMergePolicy in Solr [SOLR-17984](https://issues.apache.org/jira/browse/SOLR-17984) ([Houston Putman](https://home.apache.org/phonebook.html?uid=houston) @HoustonPutman) +- Add support for stored-only fields in ExportWriter with includeStoredFields=true. [SOLR-18071](https://issues.apache.org/jira/browse/SOLR-18071) (Luke Kot-Zaniewski) ### Changed (30 changes) From ac7966add6c49bc9a11519eab217845b4c2f0842 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Fri, 16 Jan 2026 14:55:38 -0500 Subject: [PATCH 12/29] add unreleased changelog entry --- .../SOLR-18071-support-stored-fields-export-writer.yml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 changelog/unreleased/SOLR-18071-support-stored-fields-export-writer.yml diff --git a/changelog/unreleased/SOLR-18071-support-stored-fields-export-writer.yml b/changelog/unreleased/SOLR-18071-support-stored-fields-export-writer.yml new file mode 100644 index 000000000000..dbd1b8c02371 --- /dev/null +++ b/changelog/unreleased/SOLR-18071-support-stored-fields-export-writer.yml @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Support including stored fields in Export Writer output. +type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Luke Kot-Zaniewski +links: + - name: SOLR-18071 + url: https://issues.apache.org/jira/browse/SOLR-18071 From d510972a3657c77ef2e0fe704abf9ef64b65641c Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Fri, 16 Jan 2026 15:22:29 -0500 Subject: [PATCH 13/29] move StoredFieldsWriter to its own file --- CHANGELOG.md | 2 +- .../solr/handler/export/ExportWriter.java | 119 -------------- .../handler/export/StoredFieldsWriter.java | 148 ++++++++++++++++++ 3 files changed, 149 insertions(+), 120 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 28f330651efe..be82d6e44ecf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,7 +33,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Added efSearch parameter to knn query, exposed efSearchScaleFactor that is used to calculate efSearch internally #17928 [SOLR-17928](https://issues.apache.org/jira/browse/SOLR-17928) (Puneet Ahuja) (Elia Porciani) - Support indexing primitive float[] values for DenseVectorField via JavaBin [SOLR-17948](https://issues.apache.org/jira/browse/SOLR-17948) (Puneet Ahuja) (Noble Paul) - Enable MergeOnFlushMergePolicy in Solr [SOLR-17984](https://issues.apache.org/jira/browse/SOLR-17984) ([Houston Putman](https://home.apache.org/phonebook.html?uid=houston) @HoustonPutman) -- Add support for stored-only fields in ExportWriter with includeStoredFields=true. [SOLR-18071](https://issues.apache.org/jira/browse/SOLR-18071) (Luke Kot-Zaniewski) +- Add support for stored-only fields in ExportWriter with includeStoredFields=true. The default is false because it can negatively impact performance. [SOLR-18071](https://issues.apache.org/jira/browse/SOLR-18071) (Luke Kot-Zaniewski) ### Changed (30 changes) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index d4176cbd1a8f..99b39839f5d6 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -28,21 +28,15 @@ import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.Date; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; -import java.util.WeakHashMap; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.index.StoredFieldVisitor; -import org.apache.lucene.index.StoredFields; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -903,117 +897,4 @@ public String getMessage() { return "Early Client Disconnect"; } } - - static class StoredFieldsWriter extends FieldWriter { - - private final Map fields; - private final ThreadLocal> storedFieldsMap = - new ThreadLocal<>(); - - public StoredFieldsWriter(Map fieldsToRead) { - this.fields = fieldsToRead; - } - - @Override - public int write( - SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out, int fieldIndex) - throws IOException { - WeakHashMap map = storedFieldsMap.get(); - if (map == null) { - map = new WeakHashMap<>(); - storedFieldsMap.set(map); - } - LeafReader reader = readerContext.reader(); - StoredFields storedFields = map.get(reader.getReaderCacheHelper().getKey()); - if (storedFields == null) { - storedFields = reader.storedFields(); - map.put(reader.getReaderCacheHelper().getKey(), storedFields); - } - ExportVisitor visitor = new ExportVisitor(out); - storedFields.document(sortDoc.docId, visitor); - return visitor.flush(); - } - - class ExportVisitor extends StoredFieldVisitor { - - final EntryWriter out; - String lastFieldName; - List multiValue = null; - int fieldsVisited; - - public ExportVisitor(EntryWriter out) { - this.out = out; - } - - @Override - public void stringField(FieldInfo fieldInfo, String value) throws IOException { - var schemaField = fields.get(fieldInfo.name); - var fieldType = schemaField == null ? null : schemaField.getType(); - if (fieldType instanceof BoolField) { - // Convert "T"/"F" stored value to boolean true/false - addField(fieldInfo.name, Boolean.valueOf(fieldType.indexedToReadable(value))); - } else { - addField(fieldInfo.name, value); - } - } - - @Override - public void intField(FieldInfo fieldInfo, int value) throws IOException { - addField(fieldInfo.name, value); - } - - @Override - public void longField(FieldInfo fieldInfo, long value) throws IOException { - var schemaField = fields.get(fieldInfo.name); - var fieldType = schemaField == null ? null : schemaField.getType(); - if (fieldType instanceof DateValueFieldType) { - Date date = new Date(value); - addField(fieldInfo.name, date); - } else { - addField(fieldInfo.name, value); - } - } - - @Override - public void floatField(FieldInfo fieldInfo, float value) throws IOException { - addField(fieldInfo.name, value); - } - - @Override - public void doubleField(FieldInfo fieldInfo, double value) throws IOException { - addField(fieldInfo.name, value); - } - - @Override - public Status needsField(FieldInfo fieldInfo) { - return fields.containsKey(fieldInfo.name) ? Status.YES : Status.NO; - } - - private void addField(String fieldName, T value) throws IOException { - if (fields.get(fieldName).multiValued()) { - if (fieldName.equals(lastFieldName)) { - multiValue.add(value); - } else { - if (multiValue != null) { - out.put(lastFieldName, multiValue); - } - multiValue = new ArrayList<>(); - lastFieldName = fieldName; - multiValue.add(value); - fieldsVisited++; - } - } else { - out.put(fieldName, value); - fieldsVisited++; - } - } - - private int flush() throws IOException { - if (lastFieldName != null && multiValue != null && !multiValue.isEmpty()) { - out.put(lastFieldName, multiValue); - } - return fieldsVisited; - } - } - } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java b/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java new file mode 100644 index 000000000000..0831ce5348ee --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.handler.export; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.WeakHashMap; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.index.StoredFields; +import org.apache.solr.common.MapWriter.EntryWriter; +import org.apache.solr.schema.BoolField; +import org.apache.solr.schema.DateValueFieldType; +import org.apache.solr.schema.SchemaField; + +class StoredFieldsWriter extends FieldWriter { + + private final Map fields; + private final ThreadLocal> storedFieldsMap = + new ThreadLocal<>(); + + public StoredFieldsWriter(Map fieldsToRead) { + this.fields = fieldsToRead; + } + + @Override + public int write( + SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out, int fieldIndex) + throws IOException { + WeakHashMap map = storedFieldsMap.get(); + if (map == null) { + map = new WeakHashMap<>(); + storedFieldsMap.set(map); + } + LeafReader reader = readerContext.reader(); + StoredFields storedFields = map.get(reader.getReaderCacheHelper().getKey()); + if (storedFields == null) { + storedFields = reader.storedFields(); + map.put(reader.getReaderCacheHelper().getKey(), storedFields); + } + ExportVisitor visitor = new ExportVisitor(out); + storedFields.document(sortDoc.docId, visitor); + return visitor.flush(); + } + + class ExportVisitor extends StoredFieldVisitor { + + final EntryWriter out; + String lastFieldName; + List multiValue = null; + int fieldsVisited; + + public ExportVisitor(EntryWriter out) { + this.out = out; + } + + @Override + public void stringField(FieldInfo fieldInfo, String value) throws IOException { + var schemaField = fields.get(fieldInfo.name); + var fieldType = schemaField == null ? null : schemaField.getType(); + if (fieldType instanceof BoolField) { + // Convert "T"/"F" stored value to boolean true/false + addField(fieldInfo.name, Boolean.valueOf(fieldType.indexedToReadable(value))); + } else { + addField(fieldInfo.name, value); + } + } + + @Override + public void intField(FieldInfo fieldInfo, int value) throws IOException { + addField(fieldInfo.name, value); + } + + @Override + public void longField(FieldInfo fieldInfo, long value) throws IOException { + var schemaField = fields.get(fieldInfo.name); + var fieldType = schemaField == null ? null : schemaField.getType(); + if (fieldType instanceof DateValueFieldType) { + Date date = new Date(value); + addField(fieldInfo.name, date); + } else { + addField(fieldInfo.name, value); + } + } + + @Override + public void floatField(FieldInfo fieldInfo, float value) throws IOException { + addField(fieldInfo.name, value); + } + + @Override + public void doubleField(FieldInfo fieldInfo, double value) throws IOException { + addField(fieldInfo.name, value); + } + + @Override + public Status needsField(FieldInfo fieldInfo) { + return fields.containsKey(fieldInfo.name) ? Status.YES : Status.NO; + } + + private void addField(String fieldName, T value) throws IOException { + if (fields.get(fieldName).multiValued()) { + if (fieldName.equals(lastFieldName)) { + multiValue.add(value); + } else { + if (multiValue != null) { + out.put(lastFieldName, multiValue); + } + multiValue = new ArrayList<>(); + lastFieldName = fieldName; + multiValue.add(value); + fieldsVisited++; + } + } else { + out.put(fieldName, value); + fieldsVisited++; + } + } + + private int flush() throws IOException { + if (lastFieldName != null && multiValue != null && !multiValue.isEmpty()) { + out.put(lastFieldName, multiValue); + } + return fieldsVisited; + } + } +} From c30ed956854aa0ba83af6433b97386d7341ab459 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Sun, 18 Jan 2026 21:15:09 -0500 Subject: [PATCH 14/29] simplify logic (subjective) --- .../handler/export/StoredFieldsWriter.java | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java b/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java index 0831ce5348ee..fe496837c4a6 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java @@ -120,27 +120,28 @@ public Status needsField(FieldInfo fieldInfo) { } private void addField(String fieldName, T value) throws IOException { + if (fieldName.equals(lastFieldName)) { + // assume adding another value to a multi-value field + multiValue.add(value); + return; + } + // new/different field... + flush(); // completes the previous field if there's something to do + fieldsVisited++; + lastFieldName = fieldName; + if (fields.get(fieldName).multiValued()) { - if (fieldName.equals(lastFieldName)) { - multiValue.add(value); - } else { - if (multiValue != null) { - out.put(lastFieldName, multiValue); - } - multiValue = new ArrayList<>(); - lastFieldName = fieldName; - multiValue.add(value); - fieldsVisited++; - } + multiValue = new ArrayList<>(); + multiValue.add(value); } else { out.put(fieldName, value); - fieldsVisited++; } } private int flush() throws IOException { - if (lastFieldName != null && multiValue != null && !multiValue.isEmpty()) { + if (multiValue != null) { out.put(lastFieldName, multiValue); + multiValue = null; } return fieldsVisited; } From 9f7d53f6f0a7150c19a7820552c0e00af612fcb7 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Tue, 20 Jan 2026 16:04:35 -0500 Subject: [PATCH 15/29] drop fieldIndex from FieldWriter + other cleanup --- .../handler/export/DoubleFieldWriter.java | 8 ++--- .../solr/handler/export/ExportWriter.java | 16 ++++++---- .../solr/handler/export/FieldWriter.java | 12 ++++++-- .../solr/handler/export/FloatFieldWriter.java | 8 ++--- .../solr/handler/export/IntFieldWriter.java | 8 ++--- .../solr/handler/export/LongFieldWriter.java | 8 ++--- .../solr/handler/export/MultiFieldWriter.java | 9 ++---- .../handler/export/StoredFieldsWriter.java | 30 ++++++++----------- .../handler/export/StringFieldWriter.java | 8 ++--- 9 files changed, 51 insertions(+), 56 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java index 8b211c6546bc..561d03366786 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java @@ -34,8 +34,7 @@ public DoubleFieldWriter( } @Override - public int write( - SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) + public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew) throws IOException { double val; SortValue sortValue = sortDoc.getSortValue(this.field); @@ -43,7 +42,7 @@ public int write( if (sortValue.isPresent()) { val = (double) sortValue.getCurrentValue(); } else { // empty-value - return 0; + return; } } else { // field is not part of 'sort' param, but part of 'fl' param @@ -53,10 +52,9 @@ public int write( if (vals != null) { val = Double.longBitsToDouble(vals.longValue()); } else { - return 0; + return; } } ew.put(this.field, val); - return 1; } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 99b39839f5d6..c8713ab55916 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -109,9 +109,8 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { private static final FieldWriter EMPTY_FIELD_WRITER = new FieldWriter() { @Override - public int write( - SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out, int fieldIndex) { - return 0; + public void write(SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out) { + // do nothing } }; @@ -486,9 +485,8 @@ void writeDoc( throws IOException { int ord = sortDoc.ord; LeafReaderContext context = leaves.get(ord); - int fieldIndex = 0; for (FieldWriter fieldWriter : writers) { - fieldIndex += fieldWriter.write(sortDoc, context, ew, fieldIndex); + fieldWriter.write(sortDoc, context, ew); } } @@ -512,6 +510,14 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) // Check if field can use DocValues boolean canUseDocValues = schemaField.hasDocValues() + // Special handling for SortableTextField: unlike other field types, it requires + // useDocValuesAsStored=true to be included via glob patterns in /export. This matches + // the + // behavior of /select (which requires useDocValuesAsStored=true for all globbed + // fields) and + // avoids performance issues. The requirement cannot be extended to other field types + // in + // /export for backward compatibility reasons. && (!(fieldType instanceof SortableTextField) || schemaField.useDocValuesAsStored()); Set requestFieldNames = solrReturnFields.getRequestedFieldNames() == null diff --git a/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java index 0d386237f538..4b7cf7eb47b6 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java @@ -22,7 +22,15 @@ import org.apache.solr.common.MapWriter; abstract class FieldWriter { - public abstract int write( - SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out, int fieldIndex) + /** + * Writes field values from the document to the output. + * + * @param sortDoc the document being exported + * @param readerContext the leaf reader context for accessing field values + * @param out the output writer to write field values to + * @throws IOException if an I/O error occurs while reading or writing field values + */ + public abstract void write( + SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out) throws IOException; } diff --git a/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java index 590d2d620237..68a36f84b717 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java @@ -34,8 +34,7 @@ public FloatFieldWriter( } @Override - public int write( - SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) + public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew) throws IOException { float val; SortValue sortValue = sortDoc.getSortValue(this.field); @@ -43,7 +42,7 @@ public int write( if (sortValue.isPresent()) { val = (float) sortValue.getCurrentValue(); } else { // empty-value - return 0; + return; } } else { // field is not part of 'sort' param, but part of 'fl' param @@ -53,10 +52,9 @@ public int write( if (vals != null) { val = Float.intBitsToFloat((int) vals.longValue()); } else { - return 0; + return; } } ew.put(this.field, val); - return 1; } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java index 0fbc57863beb..fc7c2d174ab8 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java @@ -34,8 +34,7 @@ public IntFieldWriter( } @Override - public int write( - SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) + public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew) throws IOException { int val; SortValue sortValue = sortDoc.getSortValue(this.field); @@ -43,7 +42,7 @@ public int write( if (sortValue.isPresent()) { val = (int) sortValue.getCurrentValue(); } else { // empty-value - return 0; + return; } } else { // field is not part of 'sort' param, but part of 'fl' param @@ -53,10 +52,9 @@ public int write( if (vals != null) { val = (int) vals.longValue(); } else { - return 0; + return; } } ew.put(this.field, val); - return 1; } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java index 7146e95bfb83..38997e5a495c 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java @@ -35,8 +35,7 @@ public LongFieldWriter( } @Override - public int write( - SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) + public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew) throws IOException { long val; SortValue sortValue = sortDoc.getSortValue(this.field); @@ -44,7 +43,7 @@ public int write( if (sortValue.isPresent()) { val = (long) sortValue.getCurrentValue(); } else { // empty-value - return 0; + return; } } else { // field is not part of 'sort' param, but part of 'fl' param @@ -54,11 +53,10 @@ public int write( if (vals != null) { val = vals.longValue(); } else { - return 0; + return; } } doWrite(ew, val); - return 1; } protected void doWrite(MapWriter.EntryWriter ew, long val) throws IOException { diff --git a/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java index 39efcdbc8107..51ea833f8526 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java @@ -61,15 +61,14 @@ public MultiFieldWriter( } @Override - public int write( - SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out, int fieldIndex) + public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out) throws IOException { if (this.fieldType.isPointField()) { SortedNumericDocValues vals = docValuesCache.getSortedNumericDocValues( sortDoc.docId, readerContext.reader(), readerContext.ord); if (vals == null) { - return 0; + return; } final SortedNumericDocValues docVals = vals; @@ -82,13 +81,12 @@ public int write( w.add(bitsToValue.apply(docVals.nextValue())); } }); - return 1; } else { SortedSetDocValues vals = docValuesCache.getSortedSetDocValues( sortDoc.docId, readerContext.reader(), readerContext.ord); if (vals == null) { - return 0; + return; } final SortedSetDocValues docVals = vals; @@ -105,7 +103,6 @@ public int write( else w.add(fieldType.toObject(f)); } }); - return 1; } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java b/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java index fe496837c4a6..6bea86ceec63 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java @@ -36,23 +36,18 @@ class StoredFieldsWriter extends FieldWriter { - private final Map fields; - private final ThreadLocal> storedFieldsMap = - new ThreadLocal<>(); + private final Map schemaFields; + private static final ThreadLocal> + STORED_FIELDS_MAP = ThreadLocal.withInitial(WeakHashMap::new); public StoredFieldsWriter(Map fieldsToRead) { - this.fields = fieldsToRead; + this.schemaFields = fieldsToRead; } @Override - public int write( - SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out, int fieldIndex) + public void write(SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out) throws IOException { - WeakHashMap map = storedFieldsMap.get(); - if (map == null) { - map = new WeakHashMap<>(); - storedFieldsMap.set(map); - } + WeakHashMap map = STORED_FIELDS_MAP.get(); LeafReader reader = readerContext.reader(); StoredFields storedFields = map.get(reader.getReaderCacheHelper().getKey()); if (storedFields == null) { @@ -61,7 +56,7 @@ public int write( } ExportVisitor visitor = new ExportVisitor(out); storedFields.document(sortDoc.docId, visitor); - return visitor.flush(); + visitor.flush(); } class ExportVisitor extends StoredFieldVisitor { @@ -77,7 +72,7 @@ public ExportVisitor(EntryWriter out) { @Override public void stringField(FieldInfo fieldInfo, String value) throws IOException { - var schemaField = fields.get(fieldInfo.name); + var schemaField = schemaFields.get(fieldInfo.name); var fieldType = schemaField == null ? null : schemaField.getType(); if (fieldType instanceof BoolField) { // Convert "T"/"F" stored value to boolean true/false @@ -94,7 +89,7 @@ public void intField(FieldInfo fieldInfo, int value) throws IOException { @Override public void longField(FieldInfo fieldInfo, long value) throws IOException { - var schemaField = fields.get(fieldInfo.name); + var schemaField = schemaFields.get(fieldInfo.name); var fieldType = schemaField == null ? null : schemaField.getType(); if (fieldType instanceof DateValueFieldType) { Date date = new Date(value); @@ -116,7 +111,7 @@ public void doubleField(FieldInfo fieldInfo, double value) throws IOException { @Override public Status needsField(FieldInfo fieldInfo) { - return fields.containsKey(fieldInfo.name) ? Status.YES : Status.NO; + return schemaFields.containsKey(fieldInfo.name) ? Status.YES : Status.NO; } private void addField(String fieldName, T value) throws IOException { @@ -130,7 +125,7 @@ private void addField(String fieldName, T value) throws IOException { fieldsVisited++; lastFieldName = fieldName; - if (fields.get(fieldName).multiValued()) { + if (schemaFields.get(fieldName).multiValued()) { multiValue = new ArrayList<>(); multiValue.add(value); } else { @@ -138,12 +133,11 @@ private void addField(String fieldName, T value) throws IOException { } } - private int flush() throws IOException { + private void flush() throws IOException { if (multiValue != null) { out.put(lastFieldName, multiValue); multiValue = null; } - return fieldsVisited; } } } diff --git a/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java index 02bdc93f661f..228f3c1c743a 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java @@ -59,8 +59,7 @@ public StringFieldWriter( } @Override - public int write( - SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) + public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew) throws IOException { StringValue stringValue = (StringValue) sortDoc.getSortValue(this.field); BytesRef ref = null; @@ -74,7 +73,7 @@ public int write( if (stringValue.currentOrd == -1) { // Null sort value - return 0; + return; } if (this.lastOrd == stringValue.currentOrd) { @@ -89,7 +88,7 @@ public int write( docValuesCache.getSortedDocValues( sortDoc.docId, readerContext.reader(), readerContext.ord); if (vals == null) { - return 0; + return; } int ord = vals.ordValue(); @@ -102,7 +101,6 @@ public int write( } writeBytes(ew, ref, fieldType); - return 1; } protected void writeBytes(MapWriter.EntryWriter ew, BytesRef ref, FieldType fieldType) From ee88b4b730d26f13ea8a894e54073f07336f0bac Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Tue, 20 Jan 2026 17:11:49 -0500 Subject: [PATCH 16/29] still can't sort without DVs (document and test) + document comparison with Cursors --- .../solr/handler/export/ExportWriter.java | 11 +- .../conf/schema-sortingresponse.xml | 1 + .../solr/handler/export/TestExportWriter.java | 175 +++++++++++------- .../pages/exporting-result-sets.adoc | 26 ++- 4 files changed, 137 insertions(+), 76 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index c8713ab55916..0ef60c0ce4f2 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -511,13 +511,10 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) boolean canUseDocValues = schemaField.hasDocValues() // Special handling for SortableTextField: unlike other field types, it requires - // useDocValuesAsStored=true to be included via glob patterns in /export. This matches - // the - // behavior of /select (which requires useDocValuesAsStored=true for all globbed - // fields) and - // avoids performance issues. The requirement cannot be extended to other field types - // in - // /export for backward compatibility reasons. + // useDocValuesAsStored=true to be included via glob patterns in /export. This + // matches the behavior of /select (which requires useDocValuesAsStored=true for + // all globbed fields) and avoids performance issues. The requirement cannot be + // extended to other field types in /export for backward compatibility reasons. && (!(fieldType instanceof SortableTextField) || schemaField.useDocValuesAsStored()); Set requestFieldNames = solrReturnFields.getRequestedFieldNames() == null diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml index f7dbe80e7ded..6136a97dbf5c 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml @@ -81,6 +81,7 @@ + diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java index 5c23f4da8880..138633c6e108 100644 --- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java @@ -29,6 +29,7 @@ import java.util.Set; import org.apache.lucene.tests.util.TestUtil; import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.StreamParams; import org.apache.solr.common.util.SuppressForbidden; @@ -1496,8 +1497,8 @@ public void testIncludeStoredFieldsExplicitRequest() throws Exception { String resp = h.query( req( - "q", "*:*", "qt", "/export", + "q", "*:*", "fl", "id,str_s_stored,num_i_stored,num_l_stored,num_f_stored,num_d_stored,date_dt_stored,bool_b_stored", "sort", "intdv asc", @@ -1505,19 +1506,21 @@ public void testIncludeStoredFieldsExplicitRequest() throws Exception { assertJsonEquals( resp, - "{\n" - + " \"responseHeader\":{\"status\":0},\n" - + " \"response\":{\n" - + " \"numFound\":1,\n" - + " \"docs\":[{\n" - + " \"id\":\"1\",\n" - + " \"str_s_stored\":\"hello\",\n" - + " \"num_i_stored\":42,\n" - + " \"num_l_stored\":1234567890123,\n" - + " \"num_f_stored\":3.14,\n" - + " \"num_d_stored\":2.71828,\n" - + " \"date_dt_stored\":\"2024-01-15T10:30:00Z\",\n" - + " \"bool_b_stored\":true}]}}"); + """ + { + "responseHeader":{"status":0}, + "response":{ + "numFound":1, + "docs":[{ + "id":"1", + "str_s_stored":"hello", + "num_i_stored":42, + "num_l_stored":1234567890123, + "num_f_stored":3.14, + "num_d_stored":2.71828, + "date_dt_stored":"2024-01-15T10:30:00Z", + "bool_b_stored":true}]}} + """); } @Test @@ -1533,8 +1536,8 @@ public void testIncludeStoredFieldsErrorWithoutParam() throws Exception { String resp = h.query( req( - "q", "*:*", "qt", "/export", + "q", "*:*", "fl", "id,str_s_stored", "sort", "intdv asc")); @@ -1561,8 +1564,8 @@ public void testIncludeStoredFieldsGlobSkipsWithoutParam() throws Exception { String resp = h.query( req( - "q", "*:*", "qt", "/export", + "q", "*:*", "fl", "id,intdv,stringdv,str_s_stored", "sort", "intdv asc")); @@ -1576,21 +1579,23 @@ public void testIncludeStoredFieldsGlobSkipsWithoutParam() throws Exception { resp = h.query( req( - "q", "*:*", "qt", "/export", + "q", "*:*", "fl", "intdv,*", "sort", "intdv asc")); assertJsonEquals( resp, - "{\n" - + " \"responseHeader\":{\"status\":0},\n" - + " \"response\":{\n" - + " \"numFound\":1,\n" - + " \"docs\":[{\n" - + " \"id\":\"1\",\n" - + " \"intdv\":1,\n" - + " \"stringdv\":\"docvalue_string\"}]}}"); + """ + { + "responseHeader":{"status":0}, + "response":{ + "numFound":1, + "docs":[{ + "id":"1", + "intdv":1, + "stringdv":"docvalue_string"}]}} + """); } @Test @@ -1610,23 +1615,25 @@ public void testIncludeStoredFieldsGlobIncludesWithParam() throws Exception { String resp = h.query( req( - "q", "*:*", "qt", "/export", + "q", "*:*", "fl", "*", "sort", "intdv asc", "includeStoredFields", "true")); assertJsonEquals( resp, - "{\n" - + " \"responseHeader\":{\"status\":0},\n" - + " \"response\":{\n" - + " \"numFound\":1,\n" - + " \"docs\":[{\n" - + " \"intdv\":1,\n" - + " \"stringdv\":\"docvalue_string\",\n" - + " \"id\":\"1\",\n" - + " \"str_s_stored\":\"stored_string\"}]}}"); + """ + { + "responseHeader":{"status":0}, + "response":{ + "numFound":1, + "docs":[{ + "intdv":1, + "stringdv":"docvalue_string", + "id":"1", + "str_s_stored":"stored_string"}]}} + """); } @Test @@ -1649,22 +1656,24 @@ public void testIncludeStoredFieldsMultiValued() throws Exception { String resp = h.query( req( - "q", "*:*", "qt", "/export", + "q", "*:*", "fl", "id,strs_ss_stored,nums_is_stored", "sort", "intdv asc", "includeStoredFields", "true")); assertJsonEquals( resp, - "{\n" - + " \"responseHeader\":{\"status\":0},\n" - + " \"response\":{\n" - + " \"numFound\":1,\n" - + " \"docs\":[{\n" - + " \"id\":\"1\",\n" - + " \"strs_ss_stored\":[\"value1\",\"value2\",\"value3\"],\n" - + " \"nums_is_stored\":[10,20,30]}]}}"); + """ + { + "responseHeader":{"status":0}, + "response":{ + "numFound":1, + "docs":[{ + "id":"1", + "strs_ss_stored":["value1","value2","value3"], + "nums_is_stored":[10,20,30]}]}} + """); } @Test @@ -1699,8 +1708,8 @@ public void testIncludeStoredFieldsAllTypes() throws Exception { String resp = h.query( req( - "q", "*:*", "qt", "/export", + "q", "*:*", "fl", "id,str_s_stored,num_i_stored,num_l_stored,num_f_stored,num_d_stored,date_dt_stored,bool_b_stored", "sort", "intdv asc", @@ -1708,27 +1717,59 @@ public void testIncludeStoredFieldsAllTypes() throws Exception { assertJsonEquals( resp, - "{\n" - + " \"responseHeader\":{\"status\":0},\n" - + " \"response\":{\n" - + " \"numFound\":2,\n" - + " \"docs\":[{\n" - + " \"id\":\"1\",\n" - + " \"str_s_stored\":\"test_string\",\n" - + " \"num_i_stored\":123,\n" - + " \"num_l_stored\":9876543210,\n" - + " \"num_f_stored\":1.5,\n" - + " \"num_d_stored\":2.5,\n" - + " \"date_dt_stored\":\"2025-12-25T00:00:00Z\",\n" - + " \"bool_b_stored\":false},\n" - + " {\n" - + " \"id\":\"2\",\n" - + " \"str_s_stored\":\"another_string\",\n" - + " \"num_i_stored\":456,\n" - + " \"num_l_stored\":1234567890,\n" - + " \"num_f_stored\":2.5,\n" - + " \"num_d_stored\":3.5,\n" - + " \"date_dt_stored\":\"2025-06-15T12:30:00Z\",\n" - + " \"bool_b_stored\":true}]}}"); + """ + { + "responseHeader":{"status":0}, + "response":{ + "numFound":2, + "docs":[{ + "id":"1", + "str_s_stored":"test_string", + "num_i_stored":123, + "num_l_stored":9876543210, + "num_f_stored":1.5, + "num_d_stored":2.5, + "date_dt_stored":"2025-12-25T00:00:00Z", + "bool_b_stored":false}, + { + "id":"2", + "str_s_stored":"another_string", + "num_i_stored":456, + "num_l_stored":1234567890, + "num_f_stored":2.5, + "num_d_stored":3.5, + "date_dt_stored":"2025-06-15T12:30:00Z", + "bool_b_stored":true}]}} + """); + } + + @Test + public void testSortOnSortableTextFieldWithoutDocValues() throws Exception { + // Test that sorting on a SortableTextField with stored=true but docValues=false + // produces an appropriate error message + clearIndex(); + + assertU( + adoc( + "id", "1", + "intdv", "1", + "sortable_stored_nodv", "test value")); + assertU(commit()); + + // Attempting to sort on a field without DocValues should fail + SolrException ex = + expectThrows( + SolrException.class, + () -> + h.query( + req( + "qt", "/export", + "q", "*:*", + "fl", "id", + "sort", "sortable_stored_nodv asc"))); + + assertTrue( + "Error message should mention DocValues requirement", + ex.getMessage().contains("DocValues") || ex.getMessage().contains("docValues")); } } diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index 663426ee3e72..cfeb0ee792a2 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -23,6 +23,24 @@ This feature uses a stream sorting technique that begins to send records within The cases where this functionality may be useful include: session analysis, distributed merge joins, time series roll-ups, aggregations on high cardinality fields, fully distributed field collapsing, and sort-based stats. +== Comparison with Cursors + +The `/export` handler offers several advantages over xref:pagination-of-results.adoc#fetching-a-large-number-of-sorted-results-cursors[cursor-based pagination] for streaming large result sets. + +With cursors, the query is re-executed for each page of results. +In contrast, `/export` runs the filter query once and the resulting segment-level bitmasks are applied once per segment, after which the documents are simply iterated over. +Additionally, the segments that existed when the stream was opened are held open for the duration of the export, eliminating the disappearing or duplicate document issues that can occur with cursors. +The trade-off is that IndexReaders are kept around for longer periods of time. + +Another advantage of `/export` is significantly lower latency until the first document is returned, because the internal batch size is decoupled from the response message size. +With cursors, you typically need to set the `rows` parameter to a high value (e.g., 100,000) to achieve decent throughput. +However, this creates a "glugging" effect: when you request a large batch, Solr must build the entire payload and send it over the wire while your client waits. +Only after receiving this large payload can the client request the next batch, but in the interim Solr sits idle on this request. +With the `/export` handler, these steps are decoupled - Solr can continue sorting and decoding/encoding documents while waiting for more demand from the client. + +The advantage of cursors is flexibility. +A cursor mark can be persisted and resumed later, even across restarts, whereas an `/export` stream is entirely in-memory and must be consumed in a single session. + == Field Requirements All the fields being sorted must have docValues set to `true`. @@ -65,8 +83,8 @@ http://localhost:8983/solr/core_name/export?q=my-query&sort=severity+desc,timest === Specifying the Sort Criteria The `sort` property defines how documents will be sorted in the exported result set. -Results can be sorted by any field that has a field type of int,long, float, double, string. -The sort fields must be single valued fields. +Results can be sorted by any field that has a field type of int, long, float, double, string. +The sort fields must be single valued fields and must have docValues enabled. The export performance will get slower as you add more sort fields. If there is enough physical memory available outside of the JVM to load up the sort fields then the performance will be linearly slower with addition of sort fields. @@ -78,6 +96,10 @@ The `fl` property defines the fields that will be exported with the result set. Any of the field types that can be sorted (i.e., int, long, float, double, string, date, boolean) can be used in the field list. The fields can be single or multi-valued. +By default, fields in the field list must have docValues enabled. +However, when the `includeStoredFields` parameter is set to `true`, fields with only stored values (no docValues) can also be included. +Note that sort fields still require docValues regardless of this setting. + Wildcard patterns can be used for the field list (e.g. `fl=*_i`) and will be expanded to the list of fields that match the pattern and are able to be exported, see <>. Returning scores is not supported at this time. From 62208145fe2cf9d839a7192380ddab357d52c5b8 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Tue, 20 Jan 2026 17:48:34 -0500 Subject: [PATCH 17/29] simplify testSortingWithoutDocValues --- .../solr/handler/export/ExportWriter.java | 2 +- .../handler/export/StoredFieldsWriter.java | 2 +- .../conf/schema-sortingresponse.xml | 2 +- .../solr/handler/export/TestExportWriter.java | 20 +++++++++---------- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 0ef60c0ce4f2..84afce039f6f 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -635,7 +635,7 @@ SortDoc getSortDoc(SolrIndexSearcher searcher, SortField[] sortFields) throws IO throw new IOException(field + " must have DocValues to use this feature."); } - if (ft instanceof SortableTextField && schemaField.useDocValuesAsStored() == false) { + if (ft instanceof SortableTextField && !schemaField.useDocValuesAsStored()) { throw new IOException( schemaField + " Must have useDocValuesAsStored='true' to be used with export writer"); } diff --git a/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java b/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java index 6bea86ceec63..58d502e2579d 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/StoredFieldsWriter.java @@ -36,9 +36,9 @@ class StoredFieldsWriter extends FieldWriter { - private final Map schemaFields; private static final ThreadLocal> STORED_FIELDS_MAP = ThreadLocal.withInitial(WeakHashMap::new); + private final Map schemaFields; public StoredFieldsWriter(Map fieldsToRead) { this.schemaFields = fieldsToRead; diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml index 6136a97dbf5c..96e0b5965d5c 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml @@ -132,7 +132,7 @@ - + diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java index 138633c6e108..84b413c9f8e5 100644 --- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java @@ -17,6 +17,7 @@ package org.apache.solr.handler.export; import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; @@ -29,7 +30,6 @@ import java.util.Set; import org.apache.lucene.tests.util.TestUtil; import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.StreamParams; import org.apache.solr.common.util.SuppressForbidden; @@ -1744,32 +1744,30 @@ public void testIncludeStoredFieldsAllTypes() throws Exception { } @Test - public void testSortOnSortableTextFieldWithoutDocValues() throws Exception { - // Test that sorting on a SortableTextField with stored=true but docValues=false - // produces an appropriate error message + public void testSortingWithoutDocValues() throws Exception { + // Attempting to sort on a field without DocValues should fail clearIndex(); assertU( adoc( "id", "1", - "intdv", "1", - "sortable_stored_nodv", "test value")); + "sorted_i_stored", "0")); assertU(commit()); - // Attempting to sort on a field without DocValues should fail - SolrException ex = + IOException ex = expectThrows( - SolrException.class, + IOException.class, () -> h.query( req( "qt", "/export", "q", "*:*", "fl", "id", - "sort", "sortable_stored_nodv asc"))); + "sort", "sorted_i_stored asc", + "includeStoredFields", "true"))); assertTrue( "Error message should mention DocValues requirement", - ex.getMessage().contains("DocValues") || ex.getMessage().contains("docValues")); + ex.getMessage().contains("DocValues")); } } From 92cef3063ad850b4a3ae832be2be12949d8e7bbb Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Wed, 21 Jan 2026 10:39:57 -0500 Subject: [PATCH 18/29] improve document accuracy --- .../solr/collection1/conf/schema-sortingresponse.xml | 1 - .../modules/query-guide/pages/exporting-result-sets.adoc | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml index 96e0b5965d5c..5674b1dd7b2f 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml @@ -81,7 +81,6 @@ - diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index cfeb0ee792a2..9441a695d682 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -35,11 +35,13 @@ The trade-off is that IndexReaders are kept around for longer periods of time. Another advantage of `/export` is significantly lower latency until the first document is returned, because the internal batch size is decoupled from the response message size. With cursors, you typically need to set the `rows` parameter to a high value (e.g., 100,000) to achieve decent throughput. However, this creates a "glugging" effect: when you request a large batch, Solr must build the entire payload and send it over the wire while your client waits. -Only after receiving this large payload can the client request the next batch, but in the interim Solr sits idle on this request. +Only after receiving and decoding this large payload can the client request the next batch, but in the interim Solr sits idle on this request. With the `/export` handler, these steps are decoupled - Solr can continue sorting and decoding/encoding documents while waiting for more demand from the client. The advantage of cursors is flexibility. A cursor mark can be persisted and resumed later, even across restarts, whereas an `/export` stream is entirely in-memory and must be consumed in a single session. +Cursors also support distributed queries by default while `/export` does not, although they can be achieved using +xref:streaming-expressions.adoc[streaming expressions] which are built on top of the `/export` handler. == Field Requirements @@ -67,7 +69,7 @@ The default value is `30000` but users may want to specify smaller values to lim An optional parameter `includeStoredFields` (default `false`) enables exporting fields that only have stored values (no docValues). When set to `true`, fields without docValues but with stored values can be included in the field list (`fl`). Note that retrieving stored fields may significantly impact export performance compared to docValues fields, as stored fields require additional I/O operations. -Fields that have both docValues and stored values will always use docValues for optimal performance, regardless of this parameter setting. +If all requested fields are `docValues=true` then the data will be read only from docValues. This behavior applies to fields that are also `stored=true` and does not depend on the value of the `includeStoredFields` parameter. The supported response writers are `json` and `javabin`. For backward compatibility reasons `wt=xsort` is also supported as input, but `wt=xsort` behaves same as `wt=json`. From 314949bca836c776d57accdc883cbb0351bf05b7 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 23 Jan 2026 21:23:14 -0500 Subject: [PATCH 19/29] italicize Co-authored-by: David Smiley --- .../modules/query-guide/pages/exporting-result-sets.adoc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index 9441a695d682..01f25b48a418 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -38,7 +38,9 @@ However, this creates a "glugging" effect: when you request a large batch, Solr Only after receiving and decoding this large payload can the client request the next batch, but in the interim Solr sits idle on this request. With the `/export` handler, these steps are decoupled - Solr can continue sorting and decoding/encoding documents while waiting for more demand from the client. -The advantage of cursors is flexibility. +The advantage of cursors is _flexibility_. +Cursors impose no constraints on the sort criteria accept that you must include a unique key, which isn't a real constraint. +Cursors work as part of `SearchHandler` and thus can include most/all capabilities of it like highlighting. A cursor mark can be persisted and resumed later, even across restarts, whereas an `/export` stream is entirely in-memory and must be consumed in a single session. Cursors also support distributed queries by default while `/export` does not, although they can be achieved using xref:streaming-expressions.adoc[streaming expressions] which are built on top of the `/export` handler. From 83dd65240da2de5506ee9df8f4df76c59f406807 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 23 Jan 2026 21:23:54 -0500 Subject: [PATCH 20/29] missing comma Co-authored-by: David Smiley --- .../modules/query-guide/pages/exporting-result-sets.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index 01f25b48a418..f8300d5f9fd4 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -102,7 +102,7 @@ The fields can be single or multi-valued. By default, fields in the field list must have docValues enabled. However, when the `includeStoredFields` parameter is set to `true`, fields with only stored values (no docValues) can also be included. -Note that sort fields still require docValues regardless of this setting. +Note that sort fields still require docValues, regardless of this setting. Wildcard patterns can be used for the field list (e.g. `fl=*_i`) and will be expanded to the list of fields that match the pattern and are able to be exported, see <>. From c1c183f00629b1e208078af5ed935f35b78c955c Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 23 Jan 2026 21:25:31 -0500 Subject: [PATCH 21/29] format and wording Co-authored-by: David Smiley --- .../modules/query-guide/pages/exporting-result-sets.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index f8300d5f9fd4..7e2d5fa09fec 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -71,7 +71,8 @@ The default value is `30000` but users may want to specify smaller values to lim An optional parameter `includeStoredFields` (default `false`) enables exporting fields that only have stored values (no docValues). When set to `true`, fields without docValues but with stored values can be included in the field list (`fl`). Note that retrieving stored fields may significantly impact export performance compared to docValues fields, as stored fields require additional I/O operations. -If all requested fields are `docValues=true` then the data will be read only from docValues. This behavior applies to fields that are also `stored=true` and does not depend on the value of the `includeStoredFields` parameter. +If all requested fields are `docValues=true` then the data will only be read from docValues. +This behavior applies to fields that are also `stored=true` and does not depend on the value of the `includeStoredFields` parameter. The supported response writers are `json` and `javabin`. For backward compatibility reasons `wt=xsort` is also supported as input, but `wt=xsort` behaves same as `wt=json`. From 6bd8b03935a18e56c8c10fde38c9e7528171b1e3 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 28 Jan 2026 17:11:19 -0500 Subject: [PATCH 22/29] Update solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc Co-authored-by: David Smiley --- .../modules/query-guide/pages/exporting-result-sets.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index 7e2d5fa09fec..d897eaa94218 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -41,7 +41,8 @@ With the `/export` handler, these steps are decoupled - Solr can continue sortin The advantage of cursors is _flexibility_. Cursors impose no constraints on the sort criteria accept that you must include a unique key, which isn't a real constraint. Cursors work as part of `SearchHandler` and thus can include most/all capabilities of it like highlighting. -A cursor mark can be persisted and resumed later, even across restarts, whereas an `/export` stream is entirely in-memory and must be consumed in a single session. +A `cursorMark` can be persisted and resumed later, even across restarts, or never continued if enough results were consumed to satisfy the use-case. +An `/export` stream must be consumed in a single session. Cursors also support distributed queries by default while `/export` does not, although they can be achieved using xref:streaming-expressions.adoc[streaming expressions] which are built on top of the `/export` handler. From c7f90f6f4c5478c4d45d582c13a02b9ff4b584ed Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 29 Jan 2026 12:22:05 -0500 Subject: [PATCH 23/29] extract canUseDocValues method and doc note --- .../solr/handler/export/ExportWriter.java | 21 ++++++++++--------- .../pages/exporting-result-sets.adoc | 4 ++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 84afce039f6f..998730a953f1 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -507,21 +507,12 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) SchemaField schemaField = req.getSchema().getField(field); FieldType fieldType = schemaField.getType(); - // Check if field can use DocValues - boolean canUseDocValues = - schemaField.hasDocValues() - // Special handling for SortableTextField: unlike other field types, it requires - // useDocValuesAsStored=true to be included via glob patterns in /export. This - // matches the behavior of /select (which requires useDocValuesAsStored=true for - // all globbed fields) and avoids performance issues. The requirement cannot be - // extended to other field types in /export for backward compatibility reasons. - && (!(fieldType instanceof SortableTextField) || schemaField.useDocValuesAsStored()); Set requestFieldNames = solrReturnFields.getRequestedFieldNames() == null ? Set.of() : solrReturnFields.getRequestedFieldNames(); - if (canUseDocValues) { + if (canUseDocValues(schemaField, fieldType)) { // Prefer DocValues when available docValueFields.add(schemaField); } else if (schemaField.stored()) { @@ -621,6 +612,16 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) return writers; } + private static boolean canUseDocValues(SchemaField schemaField, FieldType fieldType) { + return schemaField.hasDocValues() + // Special handling for SortableTextField: unlike other field types, it requires + // useDocValuesAsStored=true to be included via glob patterns in /export. This + // matches the behavior of /select (which requires useDocValuesAsStored=true for + // all globbed fields) and avoids performance issues. The requirement cannot be + // extended to other field types in /export for backward compatibility reasons. + && (!(fieldType instanceof SortableTextField) || schemaField.useDocValuesAsStored()); + } + SortDoc getSortDoc(SolrIndexSearcher searcher, SortField[] sortFields) throws IOException { SortValue[] sortValues = new SortValue[sortFields.length]; IndexSchema schema = searcher.getSchema(); diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index d897eaa94218..43b833b33e18 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -30,7 +30,7 @@ The `/export` handler offers several advantages over xref:pagination-of-results. With cursors, the query is re-executed for each page of results. In contrast, `/export` runs the filter query once and the resulting segment-level bitmasks are applied once per segment, after which the documents are simply iterated over. Additionally, the segments that existed when the stream was opened are held open for the duration of the export, eliminating the disappearing or duplicate document issues that can occur with cursors. -The trade-off is that IndexReaders are kept around for longer periods of time. +The trade-off is that IndexReaders are kept around for longer periods of time, which delays cleanup of memory and disk resources until the export completes. Another advantage of `/export` is significantly lower latency until the first document is returned, because the internal batch size is decoupled from the response message size. With cursors, you typically need to set the `rows` parameter to a high value (e.g., 100,000) to achieve decent throughput. @@ -41,7 +41,7 @@ With the `/export` handler, these steps are decoupled - Solr can continue sortin The advantage of cursors is _flexibility_. Cursors impose no constraints on the sort criteria accept that you must include a unique key, which isn't a real constraint. Cursors work as part of `SearchHandler` and thus can include most/all capabilities of it like highlighting. -A `cursorMark` can be persisted and resumed later, even across restarts, or never continued if enough results were consumed to satisfy the use-case. +A `cursorMark` can be persisted and resumed later, even across restarts, or never continued if enough results were consumed to satisfy the use-case. An `/export` stream must be consumed in a single session. Cursors also support distributed queries by default while `/export` does not, although they can be achieved using xref:streaming-expressions.adoc[streaming expressions] which are built on top of the `/export` handler. From bc836fe1e8e7136be019ccdcb6185a443eb48b12 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 29 Jan 2026 12:33:00 -0500 Subject: [PATCH 24/29] 2X2 table --- .../pages/exporting-result-sets.adoc | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index 43b833b33e18..d78226a9a792 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -25,21 +25,52 @@ The cases where this functionality may be useful include: session analysis, dist == Comparison with Cursors -The `/export` handler offers several advantages over xref:pagination-of-results.adoc#fetching-a-large-number-of-sorted-results-cursors[cursor-based pagination] for streaming large result sets. +The `/export` handler and xref:pagination-of-results.adoc#fetching-a-large-number-of-sorted-results-cursors[cursor-based pagination] offer different trade-offs for streaming large result sets. + +[cols="1h,1,1"] +|=== +| |`/export` |Cursors + +|Advantages +a| +* Query executed once; bitmasks applied once per segment +* Consistent snapshot (no duplicates or missing docs) +* Lower latency to first document +* Streaming decouples reader and writer creating smoother flow +a| +* Flexible sort criteria (just needs unique key) +* Full `SearchHandler` features (highlighting, etc.) +* Resumable across requests and restarts +* Distributed queries by default + +|Disadvantages +a| +* Must consume in a single session +* IndexReaders held open, delaying resource cleanup +* Requires streaming expressions for distributed queries +* Sort fields must have docValues +a| +* Query re-executed for each page +* Possible duplicates or missing docs during updates +* Large batches needed for throughput (create "glugging" effect) +* Higher latency to first document +|=== + +=== Details With cursors, the query is re-executed for each page of results. In contrast, `/export` runs the filter query once and the resulting segment-level bitmasks are applied once per segment, after which the documents are simply iterated over. Additionally, the segments that existed when the stream was opened are held open for the duration of the export, eliminating the disappearing or duplicate document issues that can occur with cursors. -The trade-off is that IndexReaders are kept around for longer periods of time, which delays cleanup of memory and disk resources until the export completes. +However, this means IndexReaders are kept around for longer periods of time, which delays cleanup of memory and disk resources until the export completes. -Another advantage of `/export` is significantly lower latency until the first document is returned, because the internal batch size is decoupled from the response message size. +The `/export` handler has significantly lower latency until the first document is returned, because the internal batch size is decoupled from the response message size. With cursors, you typically need to set the `rows` parameter to a high value (e.g., 100,000) to achieve decent throughput. However, this creates a "glugging" effect: when you request a large batch, Solr must build the entire payload and send it over the wire while your client waits. Only after receiving and decoding this large payload can the client request the next batch, but in the interim Solr sits idle on this request. With the `/export` handler, these steps are decoupled - Solr can continue sorting and decoding/encoding documents while waiting for more demand from the client. The advantage of cursors is _flexibility_. -Cursors impose no constraints on the sort criteria accept that you must include a unique key, which isn't a real constraint. +Cursors impose no constraints on the sort criteria except that you must include a unique key, which isn't a real constraint. Cursors work as part of `SearchHandler` and thus can include most/all capabilities of it like highlighting. A `cursorMark` can be persisted and resumed later, even across restarts, or never continued if enough results were consumed to satisfy the use-case. An `/export` stream must be consumed in a single session. From c83692172a3d449e304d51369a7b65c2f875736f Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 29 Jan 2026 12:44:37 -0500 Subject: [PATCH 25/29] 2X2 table tweak --- .../query-guide/pages/exporting-result-sets.adoc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index d78226a9a792..c147b1919e62 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -27,33 +27,34 @@ The cases where this functionality may be useful include: session analysis, dist The `/export` handler and xref:pagination-of-results.adoc#fetching-a-large-number-of-sorted-results-cursors[cursor-based pagination] offer different trade-offs for streaming large result sets. -[cols="1h,1,1"] +[cols="h,2,2"] |=== -| |`/export` |Cursors +| |Export |Cursors |Advantages a| * Query executed once; bitmasks applied once per segment * Consistent snapshot (no duplicates or missing docs) -* Lower latency to first document +* Typically lower latency to first document * Streaming decouples reader and writer creating smoother flow a| * Flexible sort criteria (just needs unique key) * Full `SearchHandler` features (highlighting, etc.) -* Resumable across requests and restarts * Distributed queries by default +* Resumable across requests and restarts +* No noise in logs if *not* resumed |Disadvantages a| * Must consume in a single session -* IndexReaders held open, delaying resource cleanup +* IndexReaders held open, delaying memory/disk cleanup * Requires streaming expressions for distributed queries * Sort fields must have docValues a| * Query re-executed for each page * Possible duplicates or missing docs during updates * Large batches needed for throughput (create "glugging" effect) -* Higher latency to first document +* Typically higher latency to first document |=== === Details From a88370f52394fb90926e65d85294cb555e588f59 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 29 Jan 2026 13:14:08 -0500 Subject: [PATCH 26/29] 2X2 table tweak --- .../query-guide/pages/exporting-result-sets.adoc | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index c147b1919e62..c2d2c3781774 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -35,26 +35,25 @@ The `/export` handler and xref:pagination-of-results.adoc#fetching-a-large-numbe a| * Query executed once; bitmasks applied once per segment * Consistent snapshot (no duplicates or missing docs) -* Typically lower latency to first document +* Lower latency to first document (typically) * Streaming decouples reader and writer creating smoother flow a| -* Flexible sort criteria (just needs unique key) -* Full `SearchHandler` features (highlighting, etc.) * Distributed queries by default +* Flexible sort criteria (just needs unique key) * Resumable across requests and restarts -* No noise in logs if *not* resumed +* Full `SearchHandler` features (highlighting, etc.) |Disadvantages a| -* Must consume in a single session -* IndexReaders held open, delaying memory/disk cleanup * Requires streaming expressions for distributed queries * Sort fields must have docValues +* Must consume in a single session +* IndexReaders held open, delaying memory/disk cleanup a| * Query re-executed for each page * Possible duplicates or missing docs during updates -* Large batches needed for throughput (create "glugging" effect) -* Typically higher latency to first document +* Higher latency to first document (typically) +* Large batches needed for throughput |=== === Details From 1f34207c728888247a5ed27feb98882f3bb177db Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 29 Jan 2026 14:47:14 -0500 Subject: [PATCH 27/29] simplify --- .../modules/query-guide/pages/exporting-result-sets.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index c2d2c3781774..8185dc3f2e99 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -33,13 +33,13 @@ The `/export` handler and xref:pagination-of-results.adoc#fetching-a-large-numbe |Advantages a| -* Query executed once; bitmasks applied once per segment +* Query executed once * Consistent snapshot (no duplicates or missing docs) * Lower latency to first document (typically) * Streaming decouples reader and writer creating smoother flow a| * Distributed queries by default -* Flexible sort criteria (just needs unique key) +* Flexible sort criteria * Resumable across requests and restarts * Full `SearchHandler` features (highlighting, etc.) From 608fd6cd25bfa9c37a87c10c5742f7cf9b3ea185 Mon Sep 17 00:00:00 2001 From: lkotzaniewsk Date: Thu, 29 Jan 2026 15:07:43 -0500 Subject: [PATCH 28/29] wording --- .../pages/exporting-result-sets.adoc | 109 +++++++++--------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index 8185dc3f2e99..edf60ec7d21d 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -23,60 +23,6 @@ This feature uses a stream sorting technique that begins to send records within The cases where this functionality may be useful include: session analysis, distributed merge joins, time series roll-ups, aggregations on high cardinality fields, fully distributed field collapsing, and sort-based stats. -== Comparison with Cursors - -The `/export` handler and xref:pagination-of-results.adoc#fetching-a-large-number-of-sorted-results-cursors[cursor-based pagination] offer different trade-offs for streaming large result sets. - -[cols="h,2,2"] -|=== -| |Export |Cursors - -|Advantages -a| -* Query executed once -* Consistent snapshot (no duplicates or missing docs) -* Lower latency to first document (typically) -* Streaming decouples reader and writer creating smoother flow -a| -* Distributed queries by default -* Flexible sort criteria -* Resumable across requests and restarts -* Full `SearchHandler` features (highlighting, etc.) - -|Disadvantages -a| -* Requires streaming expressions for distributed queries -* Sort fields must have docValues -* Must consume in a single session -* IndexReaders held open, delaying memory/disk cleanup -a| -* Query re-executed for each page -* Possible duplicates or missing docs during updates -* Higher latency to first document (typically) -* Large batches needed for throughput -|=== - -=== Details - -With cursors, the query is re-executed for each page of results. -In contrast, `/export` runs the filter query once and the resulting segment-level bitmasks are applied once per segment, after which the documents are simply iterated over. -Additionally, the segments that existed when the stream was opened are held open for the duration of the export, eliminating the disappearing or duplicate document issues that can occur with cursors. -However, this means IndexReaders are kept around for longer periods of time, which delays cleanup of memory and disk resources until the export completes. - -The `/export` handler has significantly lower latency until the first document is returned, because the internal batch size is decoupled from the response message size. -With cursors, you typically need to set the `rows` parameter to a high value (e.g., 100,000) to achieve decent throughput. -However, this creates a "glugging" effect: when you request a large batch, Solr must build the entire payload and send it over the wire while your client waits. -Only after receiving and decoding this large payload can the client request the next batch, but in the interim Solr sits idle on this request. -With the `/export` handler, these steps are decoupled - Solr can continue sorting and decoding/encoding documents while waiting for more demand from the client. - -The advantage of cursors is _flexibility_. -Cursors impose no constraints on the sort criteria except that you must include a unique key, which isn't a real constraint. -Cursors work as part of `SearchHandler` and thus can include most/all capabilities of it like highlighting. -A `cursorMark` can be persisted and resumed later, even across restarts, or never continued if enough results were consumed to satisfy the use-case. -An `/export` stream must be consumed in a single session. -Cursors also support distributed queries by default while `/export` does not, although they can be achieved using -xref:streaming-expressions.adoc[streaming expressions] which are built on top of the `/export` handler. - == Field Requirements All the fields being sorted must have docValues set to `true`. @@ -171,6 +117,61 @@ http://localhost:8983/solr/core_name/export?q=my-query&sort=reporter+desc,&fl=re (Note that the `over` parameter must use one of the fields requested in the `fl` parameter). +== Comparison with Cursors + +The `/export` handler and xref:pagination-of-results.adoc#fetching-a-large-number-of-sorted-results-cursors[cursor-based pagination] offer different trade-offs for streaming large result sets. + +[cols="h,2,2"] +|=== +| |Export |Cursors + +|Advantages +a| +* Query executed once +* Consistent snapshot (no duplicates or missing docs) +* Lower latency to first document (typically) +* Decoupled reader and writer creates smoother flow +a| +* Distributed queries by default +* Flexible sort criteria +* Resumable across requests and restarts +* Full `SearchHandler` features (highlighting, etc.) + +|Disadvantages +a| +* Requires streaming expressions for distributed queries +* Sort fields must have docValues +* Must consume in a single session +* May delay cleanup of older segments +a| +* Query re-executed for each page +* Possible duplicates or missing docs during updates +* Higher latency to first document (typically) +* Uneven flow; large batches needed for throughput +|=== + +=== Details + +With cursors, the query is re-executed for each page of results. +In contrast, `/export` runs the filter query once and the resulting segment-level bitmasks are applied once per segment, after which the documents are simply iterated over. +Additionally, the segments that existed when the stream was opened are held open for the duration of the export, eliminating the disappearing or duplicate document issues that can occur with cursors. +However, this means IndexReaders are kept around for longer periods of time, which delays cleanup of memory and disk resources until the export completes. + +The `/export` handler has significantly lower latency until the first document is returned, because the internal batch size is decoupled from the response message size. +With cursors, you typically need to set the `rows` parameter to a high value (e.g., 100,000) to achieve decent throughput. +However, this creates a "glugging" effect: when you request a large batch, Solr must build the entire payload and send it over the wire while your client waits. +Only after receiving and decoding this large payload can the client request the next batch, but in the interim Solr sits idle on this request. +With the `/export` handler, these steps are decoupled - Solr can continue sorting and decoding/encoding documents while waiting for more demand from the client. + +The advantage of cursors is _flexibility_. +Cursors impose no constraints on the sort criteria except that you must include a unique key, which isn't a real constraint. +Cursors work as part of `SearchHandler` and thus can include most/all capabilities of it like highlighting. +A `cursorMark` can be persisted and resumed later, even across restarts, or never continued if enough results were consumed to satisfy the use-case. +An `/export` stream must be consumed in a single session. +Cursors also support distributed queries by default while `/export` does not, although they can be achieved using +xref:streaming-expressions.adoc[streaming expressions] which are built on top of the `/export` handler. + == Distributed Support See the section xref:streaming-expressions.adoc[] for distributed support. + From 9ec8052a80bb2f3a8f826c860df968e6a40a7abb Mon Sep 17 00:00:00 2001 From: David Smiley Date: Sat, 31 Jan 2026 13:07:56 -0500 Subject: [PATCH 29/29] ref guide tweaks --- .../pages/exporting-result-sets.adoc | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index edf60ec7d21d..fc6f4d6a7ef3 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -127,12 +127,12 @@ The `/export` handler and xref:pagination-of-results.adoc#fetching-a-large-numbe |Advantages a| -* Query executed once +* Query executed once -- efficient * Consistent snapshot (no duplicates or missing docs) -* Lower latency to first document (typically) +* Lower latency to the first document (typically) * Decoupled reader and writer creates smoother flow a| -* Distributed queries by default +* Sharded collection support, intrinsically supported * Flexible sort criteria * Resumable across requests and restarts * Full `SearchHandler` features (highlighting, etc.) @@ -140,13 +140,13 @@ a| |Disadvantages a| * Requires streaming expressions for distributed queries -* Sort fields must have docValues +* Sort criteria can only be fields with docValues; no score * Must consume in a single session -* May delay cleanup of older segments +* A long session may retain old segments from being removed in a timely manner a| -* Query re-executed for each page -* Possible duplicates or missing docs during updates -* Higher latency to first document (typically) +* Query re-executed for each page -- inefficient +* Possible duplicates or missing docs with concurrent updates +* Higher latency to the first document (typically) * Uneven flow; large batches needed for throughput |=== @@ -158,10 +158,10 @@ Additionally, the segments that existed when the stream was opened are held open However, this means IndexReaders are kept around for longer periods of time, which delays cleanup of memory and disk resources until the export completes. The `/export` handler has significantly lower latency until the first document is returned, because the internal batch size is decoupled from the response message size. -With cursors, you typically need to set the `rows` parameter to a high value (e.g., 100,000) to achieve decent throughput. -However, this creates a "glugging" effect: when you request a large batch, Solr must build the entire payload and send it over the wire while your client waits. +With cursors, you typically need to set the `rows` parameter to a high value (e.g., 10k-100k depending on `fl`/document size) to achieve decent throughput, and provided you have enough memory (rows * shards * `fl`-size). +However, this creates a "glugging" effect: when you request a large batch, Solr must build the entire payload and send it over the wire while your client waits (assuming a sharded-collection). Only after receiving and decoding this large payload can the client request the next batch, but in the interim Solr sits idle on this request. -With the `/export` handler, these steps are decoupled - Solr can continue sorting and decoding/encoding documents while waiting for more demand from the client. +With the `/export` handler, these steps are decoupled; Solr can continue sorting and decoding/encoding documents while waiting for more demand from the client. The advantage of cursors is _flexibility_. Cursors impose no constraints on the sort criteria except that you must include a unique key, which isn't a real constraint.