diff --git a/index-recovery-tests.md b/index-recovery-tests.md new file mode 100644 index 000000000000..3e6fbcab19ce --- /dev/null +++ b/index-recovery-tests.md @@ -0,0 +1,15 @@ +# Harder-to-Reproduce Index Recovery Performance Results + +I tested recovery of 1 and 12 shards of ~20 Gigs each. The size makes it a bit challenging to package nicely in a reproducible benchmark, although I am sure it can be done. +I am confident you can reproduce this behavior with a comparable amount of data and cloud structure. I can share the scripts I used to achieve these results if it is helpful. + +## Results Summary + +| Scenario | Shards | Configuration | Result | Time | +|----------|--------|---------------|--------|------| +| HTTP/2 | 1 | default | Fast | ~40s | +| HTTP/1 | 1 | default | Fast | ~50s | +| HTTP/1 | 12 | default | Fast | ~90s | +| HTTP/2 | 12 | default | Slowest | ~320s | +| HTTP/2 | 12 | `maxConcurrentStreams=1`| Slower | ~180s | + diff --git a/solr/benchmark/build-lib.sh b/solr/benchmark/build-lib.sh new file mode 100644 index 000000000000..a6d4c27b219f --- /dev/null +++ b/solr/benchmark/build-lib.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Builds all JARs and copies them to lib/ so jmh.sh can run without invoking gradle. + +set -e + +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +REPO_ROOT="$SCRIPT_DIR/../.." +LIB_DIR="$SCRIPT_DIR/lib" + +echo "Building jars..." +"$REPO_ROOT/gradlew" -p "$REPO_ROOT" jar + +echo "Getting classpath from gradle..." +CLASSPATH=$("$REPO_ROOT/gradlew" -p "$SCRIPT_DIR" -q echoCp) + +echo "Copying JARs to lib/..." +rm -rf "$LIB_DIR" +mkdir -p "$LIB_DIR" + +# Copy all JAR dependencies +echo "$CLASSPATH" | tr ':' '\n' | while read -r jar; do + if [ -f "$jar" ] && [[ "$jar" == *.jar ]]; then + cp "$jar" "$LIB_DIR/" + fi +done + +# Copy the benchmark module's own JAR (echoCp outputs classes dir, not the JAR) +BENCHMARK_JAR=$(ls "$SCRIPT_DIR/build/libs"/solr-benchmark-*.jar 2>/dev/null | head -1) +if [ -n "$BENCHMARK_JAR" ]; then + cp "$BENCHMARK_JAR" "$LIB_DIR/" +fi + +JAR_COUNT=$(ls -1 "$LIB_DIR"/*.jar 2>/dev/null | wc -l) +echo "Done. Copied $JAR_COUNT JARs to lib/" +echo "You can now run jmh.sh without gradle being invoked." diff --git a/solr/benchmark/jmh.sh b/solr/benchmark/jmh.sh index 18f9875da192..6ef517f0da0b 100755 --- a/solr/benchmark/jmh.sh +++ b/solr/benchmark/jmh.sh @@ -54,8 +54,8 @@ echo "running JMH with args: $@" jvmArgs="-jvmArgs -Djmh.shutdownTimeout=5 -jvmArgs -Djmh.shutdownTimeout.step=3 -jvmArgs -Djava.security.egd=file:/dev/./urandom -jvmArgs -XX:+UnlockDiagnosticVMOptions -jvmArgs -XX:+DebugNonSafepoints -jvmArgs --add-opens=java.base/java.lang.reflect=ALL-UNNAMED" gcArgs="-jvmArgs -XX:+UseG1GC -jvmArgs -XX:+ParallelRefProcEnabled" -# -jvmArgs -Dlog4j2.debug -loggingArgs="-jvmArgs -Dlog4jConfigurationFile=./log4j2-bench.xml -jvmArgs -Dlog4j2.is.webapp=false -jvmArgs -Dlog4j2.garbagefreeThreadContextMap=true -jvmArgs -Dlog4j2.enableDirectEncoders=true -jvmArgs -Dlog4j2.enable.threadlocals=true" +# -jvmArgs -Dlog4j2.debug +loggingArgs="-jvmArgs -Dlog4j2.configurationFile=./log4j2-bench.xml -jvmArgs -Dlog4j2.is.webapp=false -jvmArgs -Dlog4j2.garbagefreeThreadContextMap=true -jvmArgs -Dlog4j2.enableDirectEncoders=true -jvmArgs -Dlog4j2.enable.threadlocals=true -jvmArgs -Djava.util.logging.config.file=./logging.properties" #set -x diff --git a/solr/benchmark/log4j2-bench.xml b/solr/benchmark/log4j2-bench.xml index c3b81a84ca5e..ce7d42c23435 100644 --- a/solr/benchmark/log4j2-bench.xml +++ b/solr/benchmark/log4j2-bench.xml @@ -96,10 +96,18 @@ + + + + + + + + + - diff --git a/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java b/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java index 4c4946a3ae86..9e5e714bb001 100755 --- a/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java @@ -36,6 +36,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.jetty.HttpJettySolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; @@ -79,7 +80,7 @@ public static class MiniClusterBenchState { public String zkHost; /** The Cluster. */ - MiniSolrCloudCluster cluster; + public MiniSolrCloudCluster cluster; /** The Client. */ public HttpJettySolrClient client; @@ -393,6 +394,7 @@ public void index(String collection, Docs docs, int docCount, boolean parallel) private void indexParallel(String collection, Docs docs, int docCount) throws InterruptedException { Meter meter = new Meter(); + AtomicReference indexingException = new AtomicReference<>(); ExecutorService executorService = Executors.newFixedThreadPool( Runtime.getRuntime().availableProcessors(), @@ -429,6 +431,7 @@ public void run() { try { client.requestWithBaseUrl(url, updateRequest, collection); } catch (Exception e) { + indexingException.compareAndSet(null, e); throw new RuntimeException(e); } } @@ -444,6 +447,11 @@ public void run() { } scheduledExecutor.shutdown(); + + Exception ex = indexingException.get(); + if (ex != null) { + throw new RuntimeException("Indexing failed", ex); + } } private void indexBatch(String collection, Docs docs, int docCount, int batchSize) @@ -471,6 +479,106 @@ private void indexBatch(String collection, Docs docs, int docCount, int batchSiz log(meter.getCount() + " docs at " + (long) meter.getMeanRate() + " doc/s"); } + /** + * Index documents using multiple threads, each sending batches. + * + * @param collection the collection + * @param docs the docs generator + * @param docCount total number of docs to index + * @param numThreads number of parallel threads + * @param batchSize docs per batch/request + */ + @SuppressForbidden(reason = "This module does not need to deal with logging context") + public void indexParallelBatched( + String collection, Docs docs, int docCount, int numThreads, int batchSize) + throws InterruptedException { + Meter meter = new Meter(); + AtomicReference indexingException = new AtomicReference<>(); + + ExecutorService executorService = + Executors.newFixedThreadPool(numThreads, new SolrNamedThreadFactory("SolrJMH Indexer")); + + // Progress logging + ScheduledExecutorService scheduledExecutor = + Executors.newSingleThreadScheduledExecutor( + new SolrNamedThreadFactory("SolrJMH Indexer Progress")); + scheduledExecutor.scheduleAtFixedRate( + () -> { + if (meter.getCount() >= docCount) { + scheduledExecutor.shutdown(); + } else { + log( + meter.getCount() + + "/" + + docCount + + " docs at " + + (long) meter.getMeanRate() + + " doc/s"); + } + }, + 5, + 5, + TimeUnit.SECONDS); + + // Split work across threads + int docsPerThread = docCount / numThreads; + int remainder = docCount % numThreads; + + for (int t = 0; t < numThreads; t++) { + final int threadDocsCount = docsPerThread + (t < remainder ? 1 : 0); + + executorService.execute( + () -> { + List batch = new ArrayList<>(batchSize); + + for (int i = 0; i < threadDocsCount; i++) { + batch.add(docs.inputDocument()); + + if (batch.size() >= batchSize) { + sendBatch(collection, batch, indexingException); + meter.mark(batch.size()); + batch.clear(); + } + } + + // Send remaining docs + if (!batch.isEmpty()) { + sendBatch(collection, batch, indexingException); + meter.mark(batch.size()); + } + }); + } + + executorService.shutdown(); + boolean terminated = false; + while (!terminated) { + terminated = executorService.awaitTermination(10, TimeUnit.MINUTES); + } + scheduledExecutor.shutdown(); + + Exception ex = indexingException.get(); + if (ex != null) { + throw new RuntimeException("Indexing failed", ex); + } + + log(meter.getCount() + " docs indexed at " + (long) meter.getMeanRate() + " doc/s"); + } + + private void sendBatch( + String collection, + List batch, + AtomicReference indexingException) { + try { + UpdateRequest updateRequest = new UpdateRequest(); + updateRequest.add(batch); + // Use first node - simpler and avoids thread-safety issues with random node selection + client.requestWithBaseUrl(nodes.get(0), updateRequest, collection); + } catch (Exception e) { + indexingException.compareAndSet(null, e); + throw new RuntimeException(e); + } + } + /** * Wait for merges. * diff --git a/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java b/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java index a9860763dbe7..ead834232d7d 100644 --- a/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java @@ -64,7 +64,30 @@ public static class BenchState { @Param({"false", "true"}) boolean useHttp1; - private int docs = 1000; + @Param("3") + int nodeCount; + + @Param("3") + int numShards; + + @Param("1") + int numReplicas; + + @Param("1000") + int docCount; + + @Param("4") + int indexThreads; // 0 = sequential indexing + + @Param("500") + int batchSize; + + @Param("1024") + int docSizeBytes; // Target document size in bytes (approximate) + + @Param("3") + int numTextFields; // Number of textN_ts fields to generate + private String zkHost; private ModifiableSolrParams params; private StreamContext streamContext; @@ -73,24 +96,70 @@ public static class BenchState { @Setup(Level.Trial) public void setup(MiniClusterBenchState miniClusterState) throws Exception { - miniClusterState.startMiniCluster(3); - miniClusterState.createCollection(collection, 3, 1); - Docs docGen = - docs() - .field("id", integers().incrementing()) - .field("text2_ts", strings().basicLatinAlphabet().multi(312).ofLengthBetween(30, 64)) - .field("text3_ts", strings().basicLatinAlphabet().multi(312).ofLengthBetween(30, 64)) - .field("int1_i_dv", integers().all()); - miniClusterState.index(collection, docGen, docs); + miniClusterState.startMiniCluster(nodeCount); + miniClusterState.createCollection(collection, numShards, numReplicas); + + Docs docGen = createDocsWithTargetSize(docSizeBytes, numTextFields); + + if (indexThreads > 0) { + miniClusterState.indexParallelBatched( + collection, docGen, docCount, indexThreads, batchSize); + } else { + miniClusterState.index(collection, docGen, docCount, false); + } miniClusterState.waitForMerges(collection); zkHost = miniClusterState.zkHost; + // Build field list dynamically based on numTextFields + StringBuilder flBuilder = new StringBuilder("id"); + for (int i = 1; i <= numTextFields; i++) { + flBuilder.append(",text").append(i).append("_ts"); + } + flBuilder.append(",int1_i_dv"); + params = new ModifiableSolrParams(); params.set(CommonParams.Q, "*:*"); - params.set(CommonParams.FL, "id,text2_ts,text3_ts,int1_i_dv"); + params.set(CommonParams.FL, flBuilder.toString()); params.set(CommonParams.SORT, "id asc,int1_i_dv asc"); - params.set(CommonParams.ROWS, docs); + params.set(CommonParams.ROWS, docCount); + } + + /** + * Creates a Docs generator that produces documents with approximately the target size. + * + * @param targetSizeBytes target document size in bytes (approximate) + * @param numFields number of textN_ts fields to generate + * @return Docs generator configured for the target size + */ + private Docs createDocsWithTargetSize(int targetSizeBytes, int numFields) { + // Calculate how many characters per field to approximate target size + // Each character is ~1 byte in basic Latin alphabet + // Account for field overhead, id field, and int field + int baseOverhead = 100; // Approximate overhead for id, int field, and field names + int availableBytes = Math.max(100, targetSizeBytes - baseOverhead); + int bytesPerField = availableBytes / Math.max(1, numFields); + + // Use multi-value strings: multi(N) creates N strings joined by spaces + // Calculate words and word length to hit target + int wordsPerField = Math.max(1, bytesPerField / 50); // ~50 chars per word avg + int wordLength = Math.min(64, Math.max(10, bytesPerField / Math.max(1, wordsPerField))); + int minWordLength = Math.max(5, wordLength - 10); + int maxWordLength = wordLength + 10; + + Docs docGen = docs().field("id", integers().incrementing()); + + for (int i = 1; i <= numFields; i++) { + docGen.field( + "text" + i + "_ts", + strings() + .basicLatinAlphabet() + .multi(wordsPerField) + .ofLengthBetween(minWordLength, maxWordLength)); + } + + docGen.field("int1_i_dv", integers().all()); + return docGen; } @Setup(Level.Iteration) diff --git a/solr/benchmark/src/resources/configs/cloud-minimal/conf/schema.xml b/solr/benchmark/src/resources/configs/cloud-minimal/conf/schema.xml index e517aea59307..09ae5fa43997 100644 --- a/solr/benchmark/src/resources/configs/cloud-minimal/conf/schema.xml +++ b/solr/benchmark/src/resources/configs/cloud-minimal/conf/schema.xml @@ -43,6 +43,7 @@ + diff --git a/solr/server/etc/jetty-http.xml b/solr/server/etc/jetty-http.xml index 02f53991c5c1..cf43b2836c85 100644 --- a/solr/server/etc/jetty-http.xml +++ b/solr/server/etc/jetty-http.xml @@ -34,6 +34,15 @@ + + + + + + + + + diff --git a/solr/server/etc/jetty-https.xml b/solr/server/etc/jetty-https.xml index 4a74eb125063..d5bcae7f14f9 100644 --- a/solr/server/etc/jetty-https.xml +++ b/solr/server/etc/jetty-https.xml @@ -54,6 +54,15 @@ + + + + + + + + + diff --git a/solr/solrj-jetty/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java b/solr/solrj-jetty/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java index 072a2add953c..9ccafca1a7c3 100644 --- a/solr/solrj-jetty/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java +++ b/solr/solrj-jetty/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java @@ -81,6 +81,7 @@ import org.eclipse.jetty.http.HttpMethod; import org.eclipse.jetty.http.MimeTypes; import org.eclipse.jetty.http.MultiPart; +import org.eclipse.jetty.http2.SimpleFlowControlStrategy; import org.eclipse.jetty.http2.client.HTTP2Client; import org.eclipse.jetty.http2.client.transport.HttpClientTransportOverHTTP2; import org.eclipse.jetty.io.ClientConnector; @@ -110,6 +111,27 @@ public class HttpJettySolrClient extends HttpSolrClientBase { */ public static final String CLIENT_CUSTOMIZER_SYSPROP = "solr.solrj.http.jetty.customizer"; + /** + * A Java system property to set the initial HTTP/2 session receive window size (in bytes). Only + * applies when using HTTP/2 transport. + */ + public static final String HTTP2_SESSION_RECV_WINDOW_SYSPROP = + "solr.http2.initialSessionRecvWindow"; + + /** + * A Java system property to set the initial HTTP/2 stream receive window size (in bytes). Only + * applies when using HTTP/2 transport. + */ + public static final String HTTP2_STREAM_RECV_WINDOW_SYSPROP = + "solr.http2.initialStreamRecvWindow"; + + /** + * A Java system property to enable the simple flow control strategy for HTTP/2. When set to + * "true", uses {@link SimpleFlowControlStrategy} instead of the default buffering strategy. Only + * applies when using HTTP/2 transport. + */ + public static final String HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP = "solr.http2.useSimpleFlowControl"; + public static final String REQ_PRINCIPAL_KEY = "solr-req-principal"; private static final String USER_AGENT = "Solr[" + MethodHandles.lookup().lookupClass().getName() + "] " + SolrVersion.LATEST_STRING; @@ -275,9 +297,9 @@ private HttpClient createHttpClient(Builder builder) { HttpClient httpClient; HttpClientTransport transport; if (builder.shouldUseHttp1_1()) { - if (log.isDebugEnabled()) { - log.debug("Create HttpJettySolrClient with HTTP/1.1 transport"); - } + log.info( + "Create HttpJettySolrClient with HTTP/1.1 transport (solr.http1={})", + System.getProperty("solr.http1")); transport = new HttpClientTransportOverHTTP(clientConnector); httpClient = new HttpClient(transport); @@ -285,11 +307,12 @@ private HttpClient createHttpClient(Builder builder) { httpClient.setMaxConnectionsPerDestination(builder.getMaxConnectionsPerHost()); } } else { - if (log.isDebugEnabled()) { - log.debug("Create HttpJettySolrClient with HTTP/2 transport"); - } + log.info( + "Create HttpJettySolrClient with HTTP/2 transport (solr.http1={})", + System.getProperty("solr.http1")); HTTP2Client http2client = new HTTP2Client(clientConnector); + configureHttp2FlowControl(http2client); transport = new HttpClientTransportOverHTTP2(http2client); httpClient = new HttpClient(transport); httpClient.setMaxConnectionsPerDestination(4); @@ -326,6 +349,31 @@ private HttpClient createHttpClient(Builder builder) { return httpClient; } + /** + * Configures HTTP/2 flow control settings on the HTTP2Client based on system properties. Only + * applies settings when the corresponding system property is explicitly set. + */ + private void configureHttp2FlowControl(HTTP2Client http2client) { + Integer sessionRecvWindow = + EnvUtils.getPropertyAsInteger(HTTP2_SESSION_RECV_WINDOW_SYSPROP, null); + if (sessionRecvWindow != null) { + http2client.setInitialSessionRecvWindow(sessionRecvWindow); + log.info("LKZ Set HTTP/2 initial session recv window to {} bytes", sessionRecvWindow); + } + + Integer streamRecvWindow = + EnvUtils.getPropertyAsInteger(HTTP2_STREAM_RECV_WINDOW_SYSPROP, null); + if (streamRecvWindow != null) { + http2client.setInitialStreamRecvWindow(streamRecvWindow); + log.info("Set HTTP/2 initial stream recv window to {} bytes", streamRecvWindow); + } + + if (EnvUtils.getPropertyAsBool(HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP, false)) { + http2client.setFlowControlStrategyFactory(SimpleFlowControlStrategy::new); + log.error("Using simple HTTP/2 flow control strategy"); + } + } + private void setupProxy(Builder builder, HttpClient httpClient) { if (builder.getProxyHost() == null) { return; diff --git a/solr/solrj-jetty/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java b/solr/solrj-jetty/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java index 918ef1247f8c..f94a133c31d1 100644 --- a/solr/solrj-jetty/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java +++ b/solr/solrj-jetty/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java @@ -29,6 +29,9 @@ import org.apache.solr.util.SolrJettyTestRule; import org.eclipse.jetty.client.transport.HttpClientTransportOverHTTP; import org.eclipse.jetty.ee10.servlet.ServletHolder; +import org.eclipse.jetty.http2.FlowControlStrategy; +import org.eclipse.jetty.http2.SimpleFlowControlStrategy; +import org.eclipse.jetty.http2.client.HTTP2Client; import org.eclipse.jetty.http2.client.transport.HttpClientTransportOverHTTP2; import org.junit.ClassRule; @@ -49,6 +52,73 @@ public void testSystemPropertyFlag() { } } + public void testHttp2FlowControlSystemProperties() { + // Test with custom session and stream recv window sizes + System.setProperty( + HttpJettySolrClient.HTTP2_SESSION_RECV_WINDOW_SYSPROP, String.valueOf(4 * 1024 * 1024)); + System.setProperty( + HttpJettySolrClient.HTTP2_STREAM_RECV_WINDOW_SYSPROP, String.valueOf(2 * 1024 * 1024)); + System.setProperty(HttpJettySolrClient.HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP, "true"); + + try (var client = new HttpJettySolrClient.Builder().build()) { + var transport = client.getHttpClient().getTransport(); + assertTrue("Expected HTTP/2 transport", transport instanceof HttpClientTransportOverHTTP2); + + HttpClientTransportOverHTTP2 http2Transport = (HttpClientTransportOverHTTP2) transport; + HTTP2Client http2Client = http2Transport.getHTTP2Client(); + + assertEquals( + "Session recv window should be set", + 4 * 1024 * 1024, + http2Client.getInitialSessionRecvWindow()); + assertEquals( + "Stream recv window should be set", + 2 * 1024 * 1024, + http2Client.getInitialStreamRecvWindow()); + + // Verify simple flow control strategy is used + FlowControlStrategy.Factory factory = http2Client.getFlowControlStrategyFactory(); + FlowControlStrategy strategy = factory.newFlowControlStrategy(); + assertTrue( + "Expected SimpleFlowControlStrategy", strategy instanceof SimpleFlowControlStrategy); + } finally { + System.clearProperty(HttpJettySolrClient.HTTP2_SESSION_RECV_WINDOW_SYSPROP); + System.clearProperty(HttpJettySolrClient.HTTP2_STREAM_RECV_WINDOW_SYSPROP); + System.clearProperty(HttpJettySolrClient.HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP); + } + } + + @SuppressWarnings("try") // HTTP2Client is AutoCloseable but doesn't need closing when not started + public void testHttp2FlowControlDefaultsUnchangedWhenPropertiesNotSet() { + // Ensure no flow control properties are set + System.clearProperty(HttpJettySolrClient.HTTP2_SESSION_RECV_WINDOW_SYSPROP); + System.clearProperty(HttpJettySolrClient.HTTP2_STREAM_RECV_WINDOW_SYSPROP); + System.clearProperty(HttpJettySolrClient.HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP); + + // Get default values from a fresh HTTP2Client for comparison + // Note: HTTP2Client doesn't need to be closed if never started + HTTP2Client defaultHttp2Client = new HTTP2Client(); + int defaultSessionWindow = defaultHttp2Client.getInitialSessionRecvWindow(); + int defaultStreamWindow = defaultHttp2Client.getInitialStreamRecvWindow(); + + try (var client = new HttpJettySolrClient.Builder().build()) { + var transport = client.getHttpClient().getTransport(); + assertTrue("Expected HTTP/2 transport", transport instanceof HttpClientTransportOverHTTP2); + + HttpClientTransportOverHTTP2 http2Transport = (HttpClientTransportOverHTTP2) transport; + HTTP2Client http2Client = http2Transport.getHTTP2Client(); + + assertEquals( + "Session recv window should remain at default", + defaultSessionWindow, + http2Client.getInitialSessionRecvWindow()); + assertEquals( + "Stream recv window should remain at default", + defaultStreamWindow, + http2Client.getInitialStreamRecvWindow()); + } + } + public void testConnectToOldNodesUsingHttp1() throws Exception { JettyConfig jettyConfig = diff --git a/stream-benchmark-results.md b/stream-benchmark-results.md new file mode 100644 index 000000000000..959150bd4ca4 --- /dev/null +++ b/stream-benchmark-results.md @@ -0,0 +1,71 @@ +# Reproducible /Stream Performance Results + +## Scenario: HTTP/2 with 1 shard + +**Result: Slow** + +```bash +./jmh.sh -p useHttp1=false -p nodeCount=2 -p numShards=1 -p numReplicas=2 -p docCount=10000 -p indexThreads=14 -p batchSize=500 -p docSizeBytes=10024 -p numTextFields=25 StreamingSearch +``` + +| Benchmark | batchSize | docCount | docSizeBytes | indexThreads | nodeCount | numReplicas | numShards | numTextFields | useHttp1 | Mode | Cnt | Score | Error | Units | +|-----------|-----------|----------|--------------|--------------|-----------|-------------|-----------|---------------|----------|------|-----|-------|-------|-------| +| StreamingSearch.stream | 500 | 10000 | 10024 | 14 | 2 | 2 | 1 | 25 | false | thrpt | 4 | 0.257 | ± 0.308 | ops/s | + +--- + +## Scenario: HTTP/2 with 12 shards + +**Result: Hangs indefinitely** + +```bash +./jmh.sh -p useHttp1=false -p nodeCount=2 -p numShards=12 -p numReplicas=2 -p docCount=10000 -p indexThreads=14 -p batchSize=500 -p docSizeBytes=10024 -p numTextFields=25 StreamingSearch +``` + +It appears server's flow control forcibly sets recvWindow to 0 on the offending client. There appear to be too many concurrent streams fighting for the same piece of "session window". I'm attaching flow-control-stall.log which gives a more detailed view of this response stall. + +--- + +## Scenario: HTTP/2 with 12 shards and LOWER initialStreamRecvWindow + +**Result: Slow** + +```bash +./jmh.sh -p useHttp1=false -p nodeCount=2 -p numShards=12 -p numReplicas=2 -p docCount=10000 -p indexThreads=14 -p batchSize=500 -p docSizeBytes=10024 -p numTextFields=25 -jvmArgs -Dsolr.http2.initialStreamRecvWindow=500000 StreamingSearch +``` + +| Benchmark | batchSize | docCount | docSizeBytes | indexThreads | nodeCount | numReplicas | numShards | numTextFields | useHttp1 | Mode | Cnt | Score | Error | Units | +|-----------|-----------|----------|--------------|--------------|-----------|-------------|-----------|---------------|----------|------|-----|-------|-------|-------| +| StreamingSearch.stream | 500 | 10000 | 10024 | 14 | 2 | 2 | 12 | 25 | false | thrpt | 4 | 0.344 | ± 0.027 | ops/s | + +The reason setting initialStreamRecvWindow lower helps avoid the stall is because each response gets sequestered to a smaller portion of the total session window pool and so each shard progresses consistently, albeit slowly. This result is still poor. + +--- + +## Scenario: HTTP/2 with 12 shards and HIGHER initialSessionRecvWindow + +**Result: Slow** + +```bash +./jmh.sh -p useHttp1=false -p nodeCount=2 -p numShards=12 -p numReplicas=2 -p docCount=10000 -p indexThreads=14 -p batchSize=500 -p docSizeBytes=10024 -p numTextFields=25 -jvmArgs -Dsolr.http2.initialStreamRecvWindow=8000000 -jvmArgs -Dsolr.http2.initialSessionRecvWindow=96000000 StreamingSearch +``` + +| Benchmark | batchSize | docCount | docSizeBytes | indexThreads | nodeCount | numReplicas | numShards | numTextFields | useHttp1 | Mode | Cnt | Score | Error | Units | +|-----------|-----------|----------|--------------|--------------|-----------|-------------|-----------|---------------|----------|------|-----|-------|-------|-------| +| StreamingSearch.stream | 500 | 10000 | 10024 | 14 | 2 | 2 | 12 | 25 | false | thrpt | 4 | 0.332 | ± 0.050 | ops/s | + +In other words, increasing the client-side response window doesn't help. + +--- + +## Scenario: HTTP/1 with 12 shards + +**Result: Fast** + +```bash +./jmh.sh -p useHttp1=true -p nodeCount=2 -p numShards=12 -p numReplicas=2 -p docCount=10000 -p indexThreads=14 -p batchSize=500 -p docSizeBytes=10024 -p numTextFields=25 -jvmArgs -Dsolr.http1=true StreamingSearch +``` + +| Benchmark | batchSize | docCount | docSizeBytes | indexThreads | nodeCount | numReplicas | numShards | numTextFields | useHttp1 | Mode | Cnt | Score | Error | Units | +|-----------|-----------|----------|--------------|--------------|-----------|-------------|-----------|---------------|----------|------|-----|-------|-------|-------| +| StreamingSearch.stream | 500 | 10000 | 10024 | 14 | 2 | 2 | 12 | 25 | true | thrpt | 4 | 2.301 | ± 0.676 | ops/s |