Skip to content
15 changes: 15 additions & 0 deletions index-recovery-tests.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Harder-to-Reproduce Index Recovery Performance Results

I tested recovery of 1 and 12 shards of ~20 Gigs each. The size makes it a bit challenging to package nicely in a reproducible benchmark, although I am sure it can be done.
I am confident you can reproduce this behavior with a comparable amount of data and cloud structure. I can share the scripts I used to achieve these results if it is helpful.

## Results Summary

| Scenario | Shards | Configuration | Result | Time |
|----------|--------|---------------|--------|------|
| HTTP/2 | 1 | default | Fast | ~40s |
| HTTP/1 | 1 | default | Fast | ~50s |
| HTTP/1 | 12 | default | Fast | ~90s |
| HTTP/2 | 12 | default | Slowest | ~320s |
| HTTP/2 | 12 | `maxConcurrentStreams=1`| Slower | ~180s |

50 changes: 50 additions & 0 deletions solr/benchmark/build-lib.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Builds all JARs and copies them to lib/ so jmh.sh can run without invoking gradle.

set -e

SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
REPO_ROOT="$SCRIPT_DIR/../.."
LIB_DIR="$SCRIPT_DIR/lib"

echo "Building jars..."
"$REPO_ROOT/gradlew" -p "$REPO_ROOT" jar

echo "Getting classpath from gradle..."
CLASSPATH=$("$REPO_ROOT/gradlew" -p "$SCRIPT_DIR" -q echoCp)

echo "Copying JARs to lib/..."
rm -rf "$LIB_DIR"
mkdir -p "$LIB_DIR"

# Copy all JAR dependencies
echo "$CLASSPATH" | tr ':' '\n' | while read -r jar; do
if [ -f "$jar" ] && [[ "$jar" == *.jar ]]; then
cp "$jar" "$LIB_DIR/"
fi
done

# Copy the benchmark module's own JAR (echoCp outputs classes dir, not the JAR)
BENCHMARK_JAR=$(ls "$SCRIPT_DIR/build/libs"/solr-benchmark-*.jar 2>/dev/null | head -1)
if [ -n "$BENCHMARK_JAR" ]; then
cp "$BENCHMARK_JAR" "$LIB_DIR/"
fi

JAR_COUNT=$(ls -1 "$LIB_DIR"/*.jar 2>/dev/null | wc -l)
echo "Done. Copied $JAR_COUNT JARs to lib/"
echo "You can now run jmh.sh without gradle being invoked."
4 changes: 2 additions & 2 deletions solr/benchmark/jmh.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ echo "running JMH with args: $@"
jvmArgs="-jvmArgs -Djmh.shutdownTimeout=5 -jvmArgs -Djmh.shutdownTimeout.step=3 -jvmArgs -Djava.security.egd=file:/dev/./urandom -jvmArgs -XX:+UnlockDiagnosticVMOptions -jvmArgs -XX:+DebugNonSafepoints -jvmArgs --add-opens=java.base/java.lang.reflect=ALL-UNNAMED"
gcArgs="-jvmArgs -XX:+UseG1GC -jvmArgs -XX:+ParallelRefProcEnabled"

# -jvmArgs -Dlog4j2.debug
loggingArgs="-jvmArgs -Dlog4jConfigurationFile=./log4j2-bench.xml -jvmArgs -Dlog4j2.is.webapp=false -jvmArgs -Dlog4j2.garbagefreeThreadContextMap=true -jvmArgs -Dlog4j2.enableDirectEncoders=true -jvmArgs -Dlog4j2.enable.threadlocals=true"
# -jvmArgs -Dlog4j2.debug
loggingArgs="-jvmArgs -Dlog4j2.configurationFile=./log4j2-bench.xml -jvmArgs -Dlog4j2.is.webapp=false -jvmArgs -Dlog4j2.garbagefreeThreadContextMap=true -jvmArgs -Dlog4j2.enableDirectEncoders=true -jvmArgs -Dlog4j2.enable.threadlocals=true -jvmArgs -Djava.util.logging.config.file=./logging.properties"

#set -x

Expand Down
10 changes: 9 additions & 1 deletion solr/benchmark/log4j2-bench.xml
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,18 @@
<AsyncLogger name="org.apache.solr.bench.BaseBenchState.RandomCounts" level="info" additivity="false">
<AppenderRef ref="RandomCountsFile"/>
</AsyncLogger>
<!-- <AsyncLogger name="org.eclipse.jetty" level="${sys:solr.jetty.log.level:-DEBUG}"/>-->
<!-- <AsyncLogger name="org.eclipse.jetty.deploy" level="${sys:solr.jetty.log.level:-DEBUG}"/>-->
<!-- <AsyncLogger name="org.eclipse.jetty.ee" level="${sys:solr.jetty.log.level:-DEBUG}"/>-->
<!-- <AsyncLogger name="org.eclipse.jetty.session" level="${sys:solr.jetty.log.level:-DEBUG}"/>-->
<!-- <AsyncLogger name="org.eclipse.jetty.server" level="${sys:solr.jetty.log.level:-DEBUG}"/>-->
<!-- <AsyncLogger name="org.eclipse.jetty.http" level="${sys:solr.jetty.log.level:-DEBUG}"/>-->
<!-- <AsyncLogger name="org.eclipse.jetty.http2" level="DEBUG"/>-->
<AsyncLogger name="org.eclipse.jetty.http2.FlowControlStrategy" level="DEBUG"/>
<AsyncLogger name="org.eclipse.jetty.http2.BufferingFlowControlStrategy" level="DEBUG"/>

<AsyncRoot level="info">
<AppenderRef ref="MainLogFile"/>
<!-- <AppenderRef ref="STDOUT"/> -->
</AsyncRoot>
</Loggers>
</Configuration>
110 changes: 109 additions & 1 deletion solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.jetty.HttpJettySolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
Expand Down Expand Up @@ -79,7 +80,7 @@ public static class MiniClusterBenchState {
public String zkHost;

/** The Cluster. */
MiniSolrCloudCluster cluster;
public MiniSolrCloudCluster cluster;

/** The Client. */
public HttpJettySolrClient client;
Expand Down Expand Up @@ -393,6 +394,7 @@ public void index(String collection, Docs docs, int docCount, boolean parallel)
private void indexParallel(String collection, Docs docs, int docCount)
throws InterruptedException {
Meter meter = new Meter();
AtomicReference<Exception> indexingException = new AtomicReference<>();
ExecutorService executorService =
Executors.newFixedThreadPool(
Runtime.getRuntime().availableProcessors(),
Expand Down Expand Up @@ -429,6 +431,7 @@ public void run() {
try {
client.requestWithBaseUrl(url, updateRequest, collection);
} catch (Exception e) {
indexingException.compareAndSet(null, e);
throw new RuntimeException(e);
}
}
Expand All @@ -444,6 +447,11 @@ public void run() {
}

scheduledExecutor.shutdown();

Exception ex = indexingException.get();
if (ex != null) {
throw new RuntimeException("Indexing failed", ex);
}
}

private void indexBatch(String collection, Docs docs, int docCount, int batchSize)
Expand Down Expand Up @@ -471,6 +479,106 @@ private void indexBatch(String collection, Docs docs, int docCount, int batchSiz
log(meter.getCount() + " docs at " + (long) meter.getMeanRate() + " doc/s");
}

/**
* Index documents using multiple threads, each sending batches.
*
* @param collection the collection
* @param docs the docs generator
* @param docCount total number of docs to index
* @param numThreads number of parallel threads
* @param batchSize docs per batch/request
*/
@SuppressForbidden(reason = "This module does not need to deal with logging context")
public void indexParallelBatched(
String collection, Docs docs, int docCount, int numThreads, int batchSize)
throws InterruptedException {
Meter meter = new Meter();
AtomicReference<Exception> indexingException = new AtomicReference<>();

ExecutorService executorService =
Executors.newFixedThreadPool(numThreads, new SolrNamedThreadFactory("SolrJMH Indexer"));

// Progress logging
ScheduledExecutorService scheduledExecutor =
Executors.newSingleThreadScheduledExecutor(
new SolrNamedThreadFactory("SolrJMH Indexer Progress"));
scheduledExecutor.scheduleAtFixedRate(
() -> {
if (meter.getCount() >= docCount) {
scheduledExecutor.shutdown();
} else {
log(
meter.getCount()
+ "/"
+ docCount
+ " docs at "
+ (long) meter.getMeanRate()
+ " doc/s");
}
},
5,
5,
TimeUnit.SECONDS);

// Split work across threads
int docsPerThread = docCount / numThreads;
int remainder = docCount % numThreads;

for (int t = 0; t < numThreads; t++) {
final int threadDocsCount = docsPerThread + (t < remainder ? 1 : 0);

executorService.execute(
() -> {
List<SolrInputDocument> batch = new ArrayList<>(batchSize);

for (int i = 0; i < threadDocsCount; i++) {
batch.add(docs.inputDocument());

if (batch.size() >= batchSize) {
sendBatch(collection, batch, indexingException);
meter.mark(batch.size());
batch.clear();
}
}

// Send remaining docs
if (!batch.isEmpty()) {
sendBatch(collection, batch, indexingException);
meter.mark(batch.size());
}
});
}

executorService.shutdown();
boolean terminated = false;
while (!terminated) {
terminated = executorService.awaitTermination(10, TimeUnit.MINUTES);
}
scheduledExecutor.shutdown();

Exception ex = indexingException.get();
if (ex != null) {
throw new RuntimeException("Indexing failed", ex);
}

log(meter.getCount() + " docs indexed at " + (long) meter.getMeanRate() + " doc/s");
}

private void sendBatch(
String collection,
List<SolrInputDocument> batch,
AtomicReference<Exception> indexingException) {
try {
UpdateRequest updateRequest = new UpdateRequest();
updateRequest.add(batch);
// Use first node - simpler and avoids thread-safety issues with random node selection
client.requestWithBaseUrl(nodes.get(0), updateRequest, collection);
} catch (Exception e) {
indexingException.compareAndSet(null, e);
throw new RuntimeException(e);
}
}

/**
* Wait for merges.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,30 @@ public static class BenchState {
@Param({"false", "true"})
boolean useHttp1;

private int docs = 1000;
@Param("3")
int nodeCount;

@Param("3")
int numShards;

@Param("1")
int numReplicas;

@Param("1000")
int docCount;

@Param("4")
int indexThreads; // 0 = sequential indexing

@Param("500")
int batchSize;

@Param("1024")
int docSizeBytes; // Target document size in bytes (approximate)

@Param("3")
int numTextFields; // Number of textN_ts fields to generate

private String zkHost;
private ModifiableSolrParams params;
private StreamContext streamContext;
Expand All @@ -73,24 +96,70 @@ public static class BenchState {
@Setup(Level.Trial)
public void setup(MiniClusterBenchState miniClusterState) throws Exception {

miniClusterState.startMiniCluster(3);
miniClusterState.createCollection(collection, 3, 1);
Docs docGen =
docs()
.field("id", integers().incrementing())
.field("text2_ts", strings().basicLatinAlphabet().multi(312).ofLengthBetween(30, 64))
.field("text3_ts", strings().basicLatinAlphabet().multi(312).ofLengthBetween(30, 64))
.field("int1_i_dv", integers().all());
miniClusterState.index(collection, docGen, docs);
miniClusterState.startMiniCluster(nodeCount);
miniClusterState.createCollection(collection, numShards, numReplicas);

Docs docGen = createDocsWithTargetSize(docSizeBytes, numTextFields);

if (indexThreads > 0) {
miniClusterState.indexParallelBatched(
collection, docGen, docCount, indexThreads, batchSize);
} else {
miniClusterState.index(collection, docGen, docCount, false);
}
miniClusterState.waitForMerges(collection);

zkHost = miniClusterState.zkHost;

// Build field list dynamically based on numTextFields
StringBuilder flBuilder = new StringBuilder("id");
for (int i = 1; i <= numTextFields; i++) {
flBuilder.append(",text").append(i).append("_ts");
}
flBuilder.append(",int1_i_dv");

params = new ModifiableSolrParams();
params.set(CommonParams.Q, "*:*");
params.set(CommonParams.FL, "id,text2_ts,text3_ts,int1_i_dv");
params.set(CommonParams.FL, flBuilder.toString());
params.set(CommonParams.SORT, "id asc,int1_i_dv asc");
params.set(CommonParams.ROWS, docs);
params.set(CommonParams.ROWS, docCount);
}

/**
* Creates a Docs generator that produces documents with approximately the target size.
*
* @param targetSizeBytes target document size in bytes (approximate)
* @param numFields number of textN_ts fields to generate
* @return Docs generator configured for the target size
*/
private Docs createDocsWithTargetSize(int targetSizeBytes, int numFields) {
// Calculate how many characters per field to approximate target size
// Each character is ~1 byte in basic Latin alphabet
// Account for field overhead, id field, and int field
int baseOverhead = 100; // Approximate overhead for id, int field, and field names
int availableBytes = Math.max(100, targetSizeBytes - baseOverhead);
int bytesPerField = availableBytes / Math.max(1, numFields);

// Use multi-value strings: multi(N) creates N strings joined by spaces
// Calculate words and word length to hit target
int wordsPerField = Math.max(1, bytesPerField / 50); // ~50 chars per word avg
int wordLength = Math.min(64, Math.max(10, bytesPerField / Math.max(1, wordsPerField)));
int minWordLength = Math.max(5, wordLength - 10);
int maxWordLength = wordLength + 10;

Docs docGen = docs().field("id", integers().incrementing());

for (int i = 1; i <= numFields; i++) {
docGen.field(
"text" + i + "_ts",
strings()
.basicLatinAlphabet()
.multi(wordsPerField)
.ofLengthBetween(minWordLength, maxWordLength));
}

docGen.field("int1_i_dv", integers().all());
return docGen;
}

@Setup(Level.Iteration)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="false"/>
<dynamicField name="*_ss" type="string" indexed="false" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="false"/>
<dynamicField name="*_ts" type="text" indexed="true" stored="true"/>
<dynamicField name="*_i" type="int" indexed="true" stored="false"/>
Expand Down
9 changes: 9 additions & 0 deletions solr/server/etc/jetty-http.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@
<Item>
<New class="org.eclipse.jetty.http2.server.HTTP2CServerConnectionFactory">
<Arg name="config"><Ref refid="httpConfig" /></Arg>
<Set name="maxConcurrentStreams" property="jetty.http2c.maxConcurrentStreams"/>
<Set name="initialStreamRecvWindow" property="jetty.http2c.initialStreamRecvWindow"/>
<Set name="initialSessionRecvWindow" property="jetty.http2c.initialSessionRecvWindow"/>
<Set name="maxSettingsKeys" property="jetty.http2c.maxSettingsKeys"/>
<Set name="rateControlFactory">
<New class="org.eclipse.jetty.http2.WindowRateControl$Factory">
<Arg type="int"><Property name="jetty.http2c.rateControl.maxEventsPerSecond" default="128"/></Arg>
</New>
</Set>
</New>
</Item>
</Array>
Expand Down
Loading