From d93a1eac95cc8790d55457b2438f482c097929d6 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Sat, 19 Oct 2024 11:08:26 -0400 Subject: [PATCH 01/21] Look at some sections that make editorial comments and rework them a bit to not be "prod embedded zk bad" --- .../deployment-guide/pages/installing-solr.adoc | 9 +++------ .../pages/taking-solr-to-production.adoc | 15 +++++++++++---- .../pages/zookeeper-ensemble.adoc | 6 ++++-- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/installing-solr.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/installing-solr.adoc index 8dceba13dbe5..529e90da3a98 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/installing-solr.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/installing-solr.adoc @@ -59,7 +59,7 @@ A very good blog post that discusses the issues to consider is https://lucidwork One thing to note when planning your installation is that a hard limit exists in Lucene for the number of documents in a single index: approximately 2.14 billion documents (2,147,483,647 to be exact). In practice, it is highly unlikely that such a large number of documents would fit and perform well in a single index, and you will likely need to distribute your index across a cluster before you ever approach this number. -If you know you will exceed this number of documents in total before you've even started indexing, it's best to plan your installation with xref:cluster-types.adoc#solrcloud-mode[SolrCloud] as part of your design from the start. +Fortunantly, by default Solr is configured to be deployed in xref:cluster-types.adoc#solrcloud-mode[SolrCloud] mode to let you scale up. == Package Installation @@ -84,9 +84,7 @@ This directory includes several important scripts that will make using Solr easi solr and solr.cmd::: This is xref:solr-control-script-reference.adoc[Solr's Control Script], also known as `bin/solr` (*nix) / `bin/solr.cmd` (Windows). This script is the preferred tool to start and stop Solr. -You can also create collections or cores, configure authentication, and work with configuration files when running in SolrCloud mode. - -post::: The xref:indexing-guide:post-tool.adoc[], which provides a simple command line interface for POSTing content to Solr. +You can also create collections or cores, configure authentication, work with configuration files and even index documents into Solr. solr.in.sh and solr.in.cmd::: These are property files for *nix and Windows systems, respectively. @@ -198,8 +196,7 @@ For instance, to launch the "techproducts" example, you would do: bin/solr start --cloud -e techproducts ---- -Currently, the available examples you can run are: techproducts, schemaless, and cloud. -See the section xref:solr-control-script-reference.adoc#running-with-example-configurations[Running with Example Configurations] for details on each example. +See the section xref:solr-control-script-reference.adoc#running-with-example-configurations[Running with Example Configurations] for details on all the examples available. .Going deeper with SolrCloud NOTE: Running the `cloud` example demonstrates running multiple nodes of Solr using xref:cluster-types.adoc#solrcloud-mode[SolrCloud] mode. diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc index 4adb5fd6b574..cfc2c8b3a663 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc @@ -247,10 +247,15 @@ You can also refer to xref:jvm-settings.adoc[] for tuning your memory and garbag The `bin/solr` script uses the `-XX:+CrashOnOutOfMemoryError` JVM option to crash Solr on `OutOfMemoryError` exceptions. This behavior is recommended. In SolrCloud mode ZooKeeper will be immediately notified that a node has experienced a non-recoverable error. -=== Going to Production with SolrCloud -To run Solr in SolrCloud mode, you need to set the `ZK_HOST` variable in the include file to point to your ZooKeeper ensemble. -Running the embedded ZooKeeper is not supported in production environments. +=== Going to Production with SolrCloud with Embedded ZooKeeper + +Solr runs by default in SolrCloud mode with an embedded ZooKeeper, no additional configuration required. + +=== Going to Production with SolrCloud with External ZooKeeper Ensemble + +To run Solr in SolrCloud mode with an external ZooKeeper ensemble, you need to set the `ZK_HOST` variable in the include file to point to your ZooKeeper ensemble. + For instance, if you have a ZooKeeper ensemble hosted on the following three hosts on the default client port 2181 (zk1, zk2, and zk3), then you would set: [source,bash] @@ -258,7 +263,9 @@ For instance, if you have a ZooKeeper ensemble hosted on the following three hos ZK_HOST=zk1,zk2,zk3 ---- -When the `ZK_HOST` variable is set, Solr will launch in "cloud" mode. +When the `ZK_HOST` variable is set, Solr will launch and connect to the defined ZooKeepers instead of starting an embedded ZooKeeper. + +See xref:zookeeper-ensemble[ZooKeeper Ensemble Configuration] for more on setting up ZooKeeper. ==== ZooKeeper chroot diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/zookeeper-ensemble.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/zookeeper-ensemble.adoc index 7bc232bb5ff0..5990621f97db 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/zookeeper-ensemble.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/zookeeper-ensemble.adoc @@ -17,12 +17,14 @@ // specific language governing permissions and limitations // under the License. -Although Solr comes bundled with http://zookeeper.apache.org[Apache ZooKeeper], you are strongly encouraged to use an external ZooKeeper setup in production. +Although Solr comes bundled with http://zookeeper.apache.org[Apache ZooKeeper], depending on your scaling needs you may need to use a external ZooKeeper setup in production. -While using Solr's embedded ZooKeeper instance is fine for getting started, you shouldn't use this in production because it does not provide any failover: if the Solr instance that hosts ZooKeeper shuts down, ZooKeeper is also shut down. +ERIC: this is editorial content that should be moved. +While using Solr's embedded ZooKeeper instance is fine for smaller setups, you shouldn't use this in production because it does not provide any failover: if the Solr instance that hosts ZooKeeper shuts down, ZooKeeper is also shut down. Any shards or Solr instances that rely on it will not be able to communicate with it or each other. The solution to this problem is to set up an external ZooKeeper _ensemble_, which is a number of servers running ZooKeeper that communicate with each other to coordinate the activities of the cluster. +ERIC: End editorial. == How Many ZooKeeper Nodes? From 677d71f329385536bdfe140b185072ee2df31a87 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Sat, 19 Oct 2024 11:08:55 -0400 Subject: [PATCH 02/21] Add Diagramming from text capablity --- solr/solr-ref-guide/build.gradle | 13 +++++++++++-- solr/solr-ref-guide/playbook.template.yml | 2 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/solr/solr-ref-guide/build.gradle b/solr/solr-ref-guide/build.gradle index 1e455f298497..dbf6f597984a 100644 --- a/solr/solr-ref-guide/build.gradle +++ b/solr/solr-ref-guide/build.gradle @@ -239,12 +239,20 @@ task downloadAntoraLunrExtension(type: NpmTask) { outputs.dir("${project.ext.nodeProjectDir}/node_modules/@antora/lunr-extension") } +task downloadAsciiDoctorKrokiExtension(type: NpmTask) { + group = 'Build Dependency Download' + args = ["install", "@antora/lunr-extension@${project.ext.antoraLunrExtensionVersion}"] + + inputs.property("Antora lunr-extension version", project.ext.antoraLunrExtensionVersion) + outputs.dir("${project.ext.nodeProjectDir}/node_modules/@antora/lunr-extension") +} + task downloadAsciidoctorMathjaxExtension(type: NpmTask) { group = 'Build Dependency Download' - args = ["install", "@djencks/asciidoctor-mathjax@${project.ext.asciidoctorMathjaxVersion}"] + args = ["install", "asciidoctor-kroki"] inputs.property("asciidoctor-mathjax version", project.ext.asciidoctorMathjaxVersion) - outputs.dir("${project.ext.nodeProjectDir}/node_modules/@djencks/asciidoctor-mathjax") + outputs.dir("${project.ext.nodeProjectDir}/node_modules/asciidoctor-kroki") } task downloadAsciidoctorTabsExtension(type: NpmTask) { @@ -262,6 +270,7 @@ task downloadAntora { dependsOn tasks.downloadAntoraCli dependsOn tasks.downloadAntoraSiteGenerator dependsOn tasks.downloadAntoraLunrExtension + dependsOn tasks.downloadAsciiDoctorKrokiExtension dependsOn tasks.downloadAsciidoctorMathjaxExtension dependsOn tasks.downloadAsciidoctorTabsExtension } diff --git a/solr/solr-ref-guide/playbook.template.yml b/solr/solr-ref-guide/playbook.template.yml index ad283d03392e..8393fcd8adda 100644 --- a/solr/solr-ref-guide/playbook.template.yml +++ b/solr/solr-ref-guide/playbook.template.yml @@ -47,9 +47,11 @@ output: asciidoc: attributes: stem: + kroki-fetch-diagram: true extensions: - '@djencks/asciidoctor-mathjax' - '@asciidoctor/tabs' + - asciidoctor-kroki runtime: fetch: true From e8a1655248b981f952f3218990c71359d9ad32d2 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Sat, 19 Oct 2024 11:09:31 -0400 Subject: [PATCH 03/21] Add in a single page to talk about deployment strategies. This is going to be a lot of text and diagrams and may become multiple pages. --- .../deployment-guide/deployment-nav.adoc | 1 + .../thinking-about-deployment-strategy.adoc | 55 +++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc diff --git a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc index a41206030a78..ac15afead540 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc @@ -20,6 +20,7 @@ * xref:solr-control-script-reference.adoc[] * Installation & Deployment +** xref:thinking-about-deployment-strategy.adoc[] ** xref:system-requirements.adoc[] ** xref:installing-solr.adoc[] ** xref:taking-solr-to-production.adoc[] diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc new file mode 100644 index 000000000000..8c6382ee2172 --- /dev/null +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc @@ -0,0 +1,55 @@ += Thinking About Deployment Strategy +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +This section embodies the Solr community's thoughts on best practices for deploying Solr depending on your needs. + +Soemthing about the various directions you can sacle... David Smiley had some good words. +Query load. Index Load. Number of Collections. Densitiy of Data (Vectors). + + +Then, a section about what to thikn about. + +=== Solr from smallest to largest. + +When we start up Solr on our computer, we're already starting Solr with the underpinnings required to let Solr scale in a clustered fashion, the coordination library ZooKeeper. +ZooKeeper is the unifying technology that supports maintaining state from a single node up to many 1000's of nodes. + +If you only need a single Solr node, then it's perfectly reasonable to start Solr with `bin/solr start`. You will have a single Solr node running in SolrCloud mode, with all the nice APIs and features that SolrCloud mode provides. + +Yes, if you Solr goes down, you won't have any diaster recovery or failure, but that is okay. + + + +==== Introducing Fail Over + +[graphviz] +.... +digraph foo { + node [style=rounded] + node1 [shape=box] + node2 [fillcolor=yellow, style="rounded,filled", shape=diamond] + node3 [shape=record, label="{ a | b | c }"] + + node1 -> node2 -> node3 +} +.... + + +=== What about Embedding Solr in my Java Application? + +Yes, there is embedded Solr. YMMV. From ebbe64994b64008f9842af7aebb6fe7339ed8b81 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Sat, 19 Oct 2024 12:31:18 -0400 Subject: [PATCH 04/21] First pass through thinking about scaling up Solr. --- .../deployment-guide/deployment-nav.adoc | 3 +- .../thinking-about-deployment-strategy.adoc | 160 ++++++++++++++++-- 2 files changed, 147 insertions(+), 16 deletions(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc index ac15afead540..754aa383202c 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc @@ -18,9 +18,8 @@ .Deployment Guide * xref:solr-control-script-reference.adoc[] - +* xref:thinking-about-deployment-strategy.adoc[] * Installation & Deployment -** xref:thinking-about-deployment-strategy.adoc[] ** xref:system-requirements.adoc[] ** xref:installing-solr.adoc[] ** xref:taking-solr-to-production.adoc[] diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc index 8c6382ee2172..dc410866f011 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc @@ -18,38 +18,170 @@ This section embodies the Solr community's thoughts on best practices for deploying Solr depending on your needs. -Soemthing about the various directions you can sacle... David Smiley had some good words. +NOTE: Soemthing about the various directions you can sacle... David Smiley had some good words. Query load. Index Load. Number of Collections. Densitiy of Data (Vectors). +== Solr from smallest to largest. -Then, a section about what to thikn about. +When we start up Solr on our computer, we're already starting Solr with the underpinnings required to let Solr scale in a smooth fashion, the coordination library ZooKeeper. +ZooKeeper is the unifying technology that supports maintaining state from a single node up to many 1000's of nodes. -=== Solr from smallest to largest. +=== Simplest Setup -When we start up Solr on our computer, we're already starting Solr with the underpinnings required to let Solr scale in a clustered fashion, the coordination library ZooKeeper. -ZooKeeper is the unifying technology that supports maintaining state from a single node up to many 1000's of nodes. +If you only need a single Solr node, then it's perfectly reasonable to start Solr with `bin/solr start`. You will have a single Solr node running in SolrCloud mode, with all the lovely APIs and features that SolrCloud provides. + +[graphviz] +.... +digraph single_node { + node [style=rounded] + node1 [shape=box, fillcolor=yellow] + + node1 +} +.... -If you only need a single Solr node, then it's perfectly reasonable to start Solr with `bin/solr start`. You will have a single Solr node running in SolrCloud mode, with all the nice APIs and features that SolrCloud mode provides. +Use this approach when: -Yes, if you Solr goes down, you won't have any diaster recovery or failure, but that is okay. +* You have minimal load +* You can restart Solr and reindex your data quickly +* You are just playing around +* You aren't worried about HA or Failover +* You want the simplest deployment approach. - -==== Introducing Fail Over +=== Introducing Fail Over + +The next most common setup after a single node is having two seperate nodes running on seperate machines, with one as the xref:cluster-types.adoc#leaders[Leader] and the other as the Follower. + +There are two approaches that you can take, one that uses loosely coupled Solr nodes with embedded ZooKeepers, and one with a shared ZooKeeper. Both of these work just fine if you only need a single xref:cluster-types.adoc#shards[Shard] to store your data. If you need multiple Shards for your data volume, skip down below. + +==== Loosely coupled Solr Nodes + +The first is using replication to copy complete Lucene segments over from the Leader to the Followers. +This allows you to run two completely independent Solr nodes and copy the data over. +See the xref:user-managed-index-replication.adoc[User Managed Index Replication] page to learn more about setting this up. + +NOTE: Need to update user-managed-index-replication.adoc to talk about doing this when embedded zk is set up. + +NOTE: Reference https://github.com/apache/solr/pull/1875 [graphviz] .... -digraph foo { +digraph leader_follower_replication { node [style=rounded] + leader [shape=box] + follower [fillcolor=yellow, style="rounded,filled"] + + leader -> follower +} +.... + +You can get even fancier with this, by introducing the concept of Repeater nodes. + +[graphviz] +.... +digraph leader_repeater_follower_replication { + node [style=rounded] + leader [shape=box] + repeater [fillcolor=yellow, style="rounded,filled"] + follower [shape=box] + + leader -> repeater -> follower +} +.... + +And even multiple followers: + +[graphviz] +.... +digraph leader_repeater_followers_replication { + node [style=rounded] + leader [shape=box] + repeater [shape=box] + follower1 [fillcolor=yellow, style="rounded,filled"] + follower2 [fillcolor=yellow, style="rounded,filled"] + follower3 [fillcolor=yellow, style="rounded,filled"] + + leader -> repeater + repeater -> follower1 + repeater -> follower2 + repeater -> follower3 +} +.... + +Use these approaches when: + +* You want each Solr node to be completely independent in state. No shared ZooKeeper for managing interactions. +* You don't need any kind of realtime/near real time updates. +* You potentially have a slow network boundary between your nodes, and want something robust between them. +* All your updates can go to the leader node. + +Some con's to this approach are: + +* This is pull based, so the segments are pulled by the bottom node from each node above them, which introduces latency and potential for slightly differnet views of the data in the Leader and the various Followers. +* You need to set up via various API calls all the interactions between the various nodes. + +==== Embedded ZooKeeper Ensemble Setup + +NOTE: This needs Jason's https://github.com/apache/solr/pull/2391 to get to done done! + +The second approach you can take is to use a simple ZooKeeper xref:solr-glossary.adoc#ensemble[Ensemble] setup. You can start a pair of Solr's and have their embedded ZooKeeper join each other to form an Ensemble. And yes, I hear you when you say "this isn't a odd number and ZK quorums should be an odd number to avoid split brain etc". + +NOTE: What is the difference between fail over and high availablity? + +[graphviz] +.... +graph simple_embedded_zk_ensemble { + node [style=rounded] + layout=neato node1 [shape=box] - node2 [fillcolor=yellow, style="rounded,filled", shape=diamond] - node3 [shape=record, label="{ a | b | c }"] + node2 [shape=box] + + node1 -- node2 + node2 -- node1 +} +.... + + +Use this approach when: + +* You have only two Solr nodes and they are close to each other in network terms. +* This appraoch is for when you want fail over, but you aren't worried about high availablity. You have a load balancer in front of the two Solr nodes and it notices one goes away and balances traffic to the other one for querying. +* You will deal with the fall out to indexing if one of the nodes goes away. - node1 -> node2 -> node3 +You can then scale this up to multiple Solr's: + +[graphviz] +.... +graph simple_embedded_zk_ensemble { + node [style=rounded] + layout=neato + node1 [shape=box] + node2 [shape=box] + node3 [shape=box] + node4 [shape=box] + node5 [shape=box] + + node1 -- node2 + node2 -- node3 + node3 -- node4 + node4 -- node5 + node5 -- node1 } .... +Use these approaches when: + +* You want to be able to split your logical Collection across multiple Shards. You want to be able to distribute Replicas around the cluster. +* You don't want to go through the effort of deploying a seperate ZK ensemble independently. And honestly, you don't need to either. + + +Some con's to this approach are: + +* Having five ZK's all updating each other is fine, but it starts to break down if you went to 9 or 11 ZooKeeper forming the Quorum. +* + -=== What about Embedding Solr in my Java Application? +== What about Embedding Solr in my Java Application? Yes, there is embedded Solr. YMMV. From b379820ea81c3761e7cb94098881b1aba756ee86 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Sat, 19 Oct 2024 14:13:21 -0400 Subject: [PATCH 05/21] More --- .../thinking-about-deployment-strategy.adoc | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc index dc410866f011..77c7a9b128d9 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc @@ -179,8 +179,45 @@ Use these approaches when: Some con's to this approach are: * Having five ZK's all updating each other is fine, but it starts to break down if you went to 9 or 11 ZooKeeper forming the Quorum. -* +* We currently don't have any flexible resizing of the quorum. You kind of just have to pick it. +=== Moving Beyond the Basic Cluster + +NOTE: This isn't yet fleshed out how it works! + +Solr has a concept of node xref:deployment-guide:node-roles.adoc#ensemble[Roles] that could be leveraged to establish a set of Solr nodes that run embedded ZooKeeper, and then a larger set of Solr nodes that connect to those ZooKeepers. We currently have the concept of "data" nodes that hosts shards and replicas, we can introduce a "zookeeper" node that also runs the embedded ZooKeeper process. + +This will work well as you grow from six to 12 nodes in your cluster. + +[graphviz] +.... +graph simple_embedded_zk_ensemble { + node [style=rounded] + layout=circo + overlap=false + node1 [shape=box, label="data, zookeeper", fillcolor=yellow, style="rounded,filled"] + node2 [shape=box, label="data, zookeeper", fillcolor=yellow, style="rounded,filled"] + node3 [shape=box, label="data, zookeeper", fillcolor=yellow, style="rounded,filled"] + node4 [shape=box, label="data"] + node5 [shape=box, label="data"] + node6 [shape=box, label="data"] + node7 [shape=box, label="data"] + node8 [shape=box, label="data"] + node9 [shape=box, label="data"] + + + node1 -- node2 + node2 -- node3 + node3 -- node1 + node3 -- node4 + node4 -- node5 + node5 -- node6 + node6 -- node7 + node7 -- node8 + node8 -- node9 + node9 -- node1 +} +.... == What about Embedding Solr in my Java Application? From 728c4b21bc1171befc232fd766d408596c5cfb65 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 21 Oct 2024 15:49:10 -0400 Subject: [PATCH 06/21] Fix up references --- solr/solr-ref-guide/build.gradle | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/solr/solr-ref-guide/build.gradle b/solr/solr-ref-guide/build.gradle index dbf6f597984a..1d9c1a28f666 100644 --- a/solr/solr-ref-guide/build.gradle +++ b/solr/solr-ref-guide/build.gradle @@ -63,6 +63,7 @@ ext { antoraLunrExtensionVersion = "1.0.0-alpha.8" asciidoctorMathjaxVersion = "0.0.9" asciidoctorTabsVersion = "1.0.0-beta.6" + asciidoctorKrokiVersion = "0.18.1" linkCheckerVersion = "1.4.2" gulpCliVersion = "2.3.0" // Most recent commit as of 2022-06-24, this repo does not have tags @@ -239,19 +240,19 @@ task downloadAntoraLunrExtension(type: NpmTask) { outputs.dir("${project.ext.nodeProjectDir}/node_modules/@antora/lunr-extension") } -task downloadAsciiDoctorKrokiExtension(type: NpmTask) { +task downloadAsciidoctorMathjaxExtension(type: NpmTask) { group = 'Build Dependency Download' - args = ["install", "@antora/lunr-extension@${project.ext.antoraLunrExtensionVersion}"] + args = ["install", "@djencks/asciidoctor-mathjax@${project.ext.asciidoctorMathjaxVersion}"] - inputs.property("Antora lunr-extension version", project.ext.antoraLunrExtensionVersion) - outputs.dir("${project.ext.nodeProjectDir}/node_modules/@antora/lunr-extension") + inputs.property("Antora asciidoctor-mathjax version", project.ext.asciidoctorMathjaxVersion) + outputs.dir("${project.ext.nodeProjectDir}/node_modules/@djencks/asciidoctor-mathjax") } -task downloadAsciidoctorMathjaxExtension(type: NpmTask) { +task downloadAsciiDoctorKrokiExtension(type: NpmTask) { group = 'Build Dependency Download' args = ["install", "asciidoctor-kroki"] - inputs.property("asciidoctor-mathjax version", project.ext.asciidoctorMathjaxVersion) + inputs.property("asciidoctor-kroki version", project.ext.asciidoctorKrokiVersion) outputs.dir("${project.ext.nodeProjectDir}/node_modules/asciidoctor-kroki") } From 37ceed07fd73f44389ed0cf9792dd63f7d6bfc4f Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 22 Oct 2024 12:49:55 -0400 Subject: [PATCH 07/21] General framework for thinking about this is in place --- .../thinking-about-deployment-strategy.adoc | 179 +++++++++++++++++- 1 file changed, 176 insertions(+), 3 deletions(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc index 77c7a9b128d9..97104cef8daf 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc @@ -16,12 +16,47 @@ // specific language governing permissions and limitations // under the License. +//// +This page has a number of graphs to help you visualize different Solr deployment strategies. + +The site https://magjac.com/graphviz-visual-editor/ allows you to play with those graphs in real time. +//// + This section embodies the Solr community's thoughts on best practices for deploying Solr depending on your needs. NOTE: Soemthing about the various directions you can sacle... David Smiley had some good words. Query load. Index Load. Number of Collections. Densitiy of Data (Vectors). -== Solr from smallest to largest. + +== Overview +There is a logical progression to scaling Solr. + +[graphviz] +.... +digraph { + node [style=rounded] + //layout=circo + overlap=false + //rankdir=LR; // Set the direction to left to right + node1 [label="1. Single \nNode"] + node2 [label="2. Nodes \nw/ Replication"] + node3 [label="2. Nodes \nw/ Embedded ZK"] + node5 [label="4. roles=data,zookeeper"] + node6 [label="5. External \nZK Ensemble"] + node7 [label="6. Solr Operator\n + Kubernetes"] + + node1 -> node2 + node2 -> node5 + node1 -> node3 + node3 -> node5 + node5 -> node6 + node6 -> node7 + +} +.... + + +== Solr from smallest to largest When we start up Solr on our computer, we're already starting Solr with the underpinnings required to let Solr scale in a smooth fashion, the coordination library ZooKeeper. ZooKeeper is the unifying technology that supports maintaining state from a single node up to many 1000's of nodes. @@ -146,7 +181,7 @@ graph simple_embedded_zk_ensemble { Use this approach when: * You have only two Solr nodes and they are close to each other in network terms. -* This appraoch is for when you want fail over, but you aren't worried about high availablity. You have a load balancer in front of the two Solr nodes and it notices one goes away and balances traffic to the other one for querying. +* This approach is for when you want fail over, but you aren't worried about high availablity. You have a load balancer in front of the two Solr nodes and it notices one goes away and balances traffic to the other one for querying. * You will deal with the fall out to indexing if one of the nodes goes away. You can then scale this up to multiple Solr's: @@ -183,7 +218,7 @@ Some con's to this approach are: === Moving Beyond the Basic Cluster -NOTE: This isn't yet fleshed out how it works! +NOTE: This isn't yet fleshed out as to how it works! Solr has a concept of node xref:deployment-guide:node-roles.adoc#ensemble[Roles] that could be leveraged to establish a set of Solr nodes that run embedded ZooKeeper, and then a larger set of Solr nodes that connect to those ZooKeepers. We currently have the concept of "data" nodes that hosts shards and replicas, we can introduce a "zookeeper" node that also runs the embedded ZooKeeper process. @@ -219,6 +254,144 @@ graph simple_embedded_zk_ensemble { } .... +=== A dedicated ZooKeeper ensemble + +As your load in the cluster goes up, sharing ZooKeeper workloads with Solr workloads may become a bottleneck. + +NOTE: I wonder if this ever goes away by just having Solr nodes with the role `zookeeper` only? + +[graphviz] +.... +graph dedicate_zk_ensemble { + node [style=rounded] + layout=osage + overlap=false + node1 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] + node2 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] + node3 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] + node4 [shape=box] + node5 [shape=box] + node6 [shape=box] + node7 [shape=box] + node8 [shape=box] + node9 [shape=box] + node10 [shape=box] + node11 [shape=box] + node12 [shape=box] + node13 [shape=box] + node14 [shape=box] + node15 [shape=box] + node16 [shape=box] + node17 [shape=box] + node18 [shape=box] + node19 [shape=box] + node20 [shape=box] + +} +.... + +Use this approach when: + +* You go beyond 12 Solr nodes up to 25 Solr nodes. +* You are leveraging all the features of SolrCloud to support multiple collections and different types of query and load characteritics, especially tuning shard and replica counts. +* You may need to move to five ZooKeepers. + +Some con's to this approach are: + +* You are responsible for configuring the external ZooKeeper ensemble. +* If you have any issues with the ZooKeeper ensemble then you need to define how you will handle failover/HA. + +=== Going massive means going Kubernetes + +NOTE: What would a diagram look like? How to show the operator? + +Beyond 25 nodes, you really need to think about more advanced tooling for managing all your nodes. + +[graphviz] +.... +graph kubernetes_setup { + fontname="Helvetica,Arial,sans-serif" + node [fontname="Helvetica,Arial,sans-serif"] + edge [fontname="Helvetica,Arial,sans-serif"] + layout=fdp + pack=1 + + Operator [fillcolor=aqua, style="filled"] + + zk1 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] + zk2 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] + zk3 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] + + subgraph clusterKubernetes { + + Operator; + subgraph clusterSolr { + node1 + node2 + node3 + node4 + node5 + node6 + node7 + node8 + node9 + node10 + node11 + node12 + node13 + node14 + node15 + node16 + node17 + node18 + node19 + node20 + node21 + node22 + node23 + node24 + node25 + node26 + node27 + node28 + node29 + node30 + + } + subgraph clusterZK { + zk1 -- zk2; + zk2 -- zk3; + zk3 -- zk1; + } + } + + clusterSolr -- clusterZK +} +.... + +Use this approaches when: + +* You go beyond 24 Solr nodes. + +Some con's to this approach are: + +* Kubernetes is much like Oregon Trail, take a friend. + +== What about User Managed Solr? + +The User Managed mode is no longer recommended, and historically was mostly used because running embedded ZooKeeper was viewed as difficult. +These days, running embedded ZooKeeper is just fine, and that eliminates the main reason for User Managed. User Managed also doesn't support all the features and APIs that SolrCloud supports. + == What about Embedding Solr in my Java Application? Yes, there is embedded Solr. YMMV. + +== What about Massive Multi-Tenant Use Cases? + +NOTE: Paging David Smiley...? + +Running thousands of collections, each representing it's own tenant? +Yeah, there are some things you need to do: + +* Item 1 +* Item 2 From 6aad26b7cfd38cdd9475d5bf276742b8732a4b8e Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 18 Aug 2025 08:59:41 -0400 Subject: [PATCH 08/21] update to latest build processes --- gradle/libs.versions.toml | 2 ++ solr/solr-ref-guide/build.gradle | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 8f37037ec0e5..db92ddf2c2f8 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -47,6 +47,8 @@ apache-zookeeper = "3.9.3" # @keep for version alignment apiguardian = "1.1.2" aqute-bnd = "6.4.1" +# @keep Asciidoctor kroki version used in ref-guide +asciidoctor-kroki = "0.18.1" # @keep Asciidoctor mathjax version used in ref-guide asciidoctor-mathjax = "0.0.9" # @keep Asciidoctor tabs version used in ref-guide diff --git a/solr/solr-ref-guide/build.gradle b/solr/solr-ref-guide/build.gradle index f63e1009a780..cba526b87924 100644 --- a/solr/solr-ref-guide/build.gradle +++ b/solr/solr-ref-guide/build.gradle @@ -241,7 +241,7 @@ task downloadAsciiDoctorKrokiExtension(type: NpmTask) { group = 'Build Dependency Download' args = ["install", "asciidoctor-kroki"] - inputs.property("asciidoctor-kroki version", project.ext.asciidoctorKrokiVersion) + inputs.property("asciidoctor-kroki version", libs.versions.asciidoctor.kroki.get()) outputs.dir("${project.ext.nodeProjectDir}/node_modules/asciidoctor-kroki") } From 5b193b6bb3feb0bd683a82e04a674849d184d6c5 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 18 Aug 2025 09:18:34 -0400 Subject: [PATCH 09/21] fix link to glossary --- .../pages/thinking-about-deployment-strategy.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc index 97104cef8daf..a92299456768 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc @@ -160,7 +160,7 @@ Some con's to this approach are: NOTE: This needs Jason's https://github.com/apache/solr/pull/2391 to get to done done! -The second approach you can take is to use a simple ZooKeeper xref:solr-glossary.adoc#ensemble[Ensemble] setup. You can start a pair of Solr's and have their embedded ZooKeeper join each other to form an Ensemble. And yes, I hear you when you say "this isn't a odd number and ZK quorums should be an odd number to avoid split brain etc". +The second approach you can take is to use a simple ZooKeeper xref:getting-started:solr-glossary.adoc#ensemble[Ensemble] setup. You can start a pair of Solr's and have their embedded ZooKeeper join each other to form an Ensemble. And yes, I hear you when you say "this isn't a odd number and ZK quorums should be an odd number to avoid split brain etc". NOTE: What is the difference between fail over and high availablity? From 8524a4da744135823d1d7fe715f78adb1565bec7 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 18 Aug 2025 11:10:05 -0400 Subject: [PATCH 10/21] Down to one last broken link --- .../pages/thinking-about-deployment-strategy.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc index a92299456768..1f4391ee097c 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc @@ -220,7 +220,7 @@ Some con's to this approach are: NOTE: This isn't yet fleshed out as to how it works! -Solr has a concept of node xref:deployment-guide:node-roles.adoc#ensemble[Roles] that could be leveraged to establish a set of Solr nodes that run embedded ZooKeeper, and then a larger set of Solr nodes that connect to those ZooKeepers. We currently have the concept of "data" nodes that hosts shards and replicas, we can introduce a "zookeeper" node that also runs the embedded ZooKeeper process. +Solr has a concept of node xref:deployment-guide:node-roles.adoc#roles[Roles] that could be leveraged to establish a set of Solr nodes that run embedded ZooKeeper, and then a larger set of Solr nodes that connect to those ZooKeepers. We currently have the concept of "data" nodes that hosts shards and replicas, we can introduce a "zookeeper" node that also runs the embedded ZooKeeper process. This will work well as you grow from six to 12 nodes in your cluster. From 570daef49b983dc7628d717581ea2657557b46ea Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Sun, 14 Sep 2025 13:59:06 -0500 Subject: [PATCH 11/21] Start seperating out use case specific suggestions from the overall topology of deployments suggestion. --- .../deployment-guide/deployment-nav.adoc | 3 +- ...adoc => deployment-topology-overview.adoc} | 34 +++++++++--------- .../pages/optimize-extreme-use-cases.adoc | 35 +++++++++++++++++++ 3 files changed, 55 insertions(+), 17 deletions(-) rename solr/solr-ref-guide/modules/deployment-guide/pages/{thinking-about-deployment-strategy.adoc => deployment-topology-overview.adoc} (87%) create mode 100644 solr/solr-ref-guide/modules/deployment-guide/pages/optimize-extreme-use-cases.adoc diff --git a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc index 4d2bb969c869..85994677ed3d 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc @@ -18,11 +18,12 @@ .Deployment Guide * xref:solr-control-script-reference.adoc[] -* xref:thinking-about-deployment-strategy.adoc[] +* xref:deployment-topology-overview.adoc[] * Installation & Deployment ** xref:system-requirements.adoc[] ** xref:installing-solr.adoc[] ** xref:taking-solr-to-production.adoc[] +** xref:optimize-extreme-use-cases.adoc[] ** xref:jvm-settings.adoc[] ** xref:upgrading-a-solr-cluster.adoc[] ** xref:backup-restore.adoc[] diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc similarity index 87% rename from solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc rename to solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc index 1f4391ee097c..d3ae3d1a6ed2 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/thinking-about-deployment-strategy.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc @@ -1,4 +1,4 @@ -= Thinking About Deployment Strategy += Deployment Topology Overview // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -19,13 +19,15 @@ //// This page has a number of graphs to help you visualize different Solr deployment strategies. +The graphs are developed using Mermaid syntax. + The site https://magjac.com/graphviz-visual-editor/ allows you to play with those graphs in real time. //// This section embodies the Solr community's thoughts on best practices for deploying Solr depending on your needs. -NOTE: Soemthing about the various directions you can sacle... David Smiley had some good words. -Query load. Index Load. Number of Collections. Densitiy of Data (Vectors). +NOTE: Something about the various directions you can scale... David Smiley had some good words. +Query load. Index Load. Number of Collections. Density of Data (Vectors). == Overview @@ -153,16 +155,16 @@ Use these approaches when: Some con's to this approach are: -* This is pull based, so the segments are pulled by the bottom node from each node above them, which introduces latency and potential for slightly differnet views of the data in the Leader and the various Followers. +* This is pull based, so the segments are pulled by the bottom node from each node above them, which introduces latency and potential for slightly different views of the data in the Leader and the various Followers. * You need to set up via various API calls all the interactions between the various nodes. ==== Embedded ZooKeeper Ensemble Setup NOTE: This needs Jason's https://github.com/apache/solr/pull/2391 to get to done done! -The second approach you can take is to use a simple ZooKeeper xref:getting-started:solr-glossary.adoc#ensemble[Ensemble] setup. You can start a pair of Solr's and have their embedded ZooKeeper join each other to form an Ensemble. And yes, I hear you when you say "this isn't a odd number and ZK quorums should be an odd number to avoid split brain etc". +The second approach you can take is to use a simple ZooKeeper xref:getting-started:solr-glossary.adoc#ensemble[Ensemble] setup. You can start a pair of Solrs and have their embedded ZooKeeper join each other to form an Ensemble. And yes, I hear you when you say "this isn't an odd number and ZK quorums should be an odd number to avoid split brain etc." -NOTE: What is the difference between fail over and high availablity? +NOTE: What is the difference between failover and high availability? [graphviz] .... @@ -181,7 +183,7 @@ graph simple_embedded_zk_ensemble { Use this approach when: * You have only two Solr nodes and they are close to each other in network terms. -* This approach is for when you want fail over, but you aren't worried about high availablity. You have a load balancer in front of the two Solr nodes and it notices one goes away and balances traffic to the other one for querying. +* This approach is for when you want failover, but you aren't worried about high availability. You have a load balancer in front of the two Solr nodes and it notices one goes away and balances traffic to the other one for querying. * You will deal with the fall out to indexing if one of the nodes goes away. You can then scale this up to multiple Solr's: @@ -213,14 +215,14 @@ Use these approaches when: Some con's to this approach are: -* Having five ZK's all updating each other is fine, but it starts to break down if you went to 9 or 11 ZooKeeper forming the Quorum. +* Having five ZKs all updating each other is fine, but it starts to break down if you went to 9 or 11 ZooKeeper forming the Quorum. * We currently don't have any flexible resizing of the quorum. You kind of just have to pick it. === Moving Beyond the Basic Cluster NOTE: This isn't yet fleshed out as to how it works! -Solr has a concept of node xref:deployment-guide:node-roles.adoc#roles[Roles] that could be leveraged to establish a set of Solr nodes that run embedded ZooKeeper, and then a larger set of Solr nodes that connect to those ZooKeepers. We currently have the concept of "data" nodes that hosts shards and replicas, we can introduce a "zookeeper" node that also runs the embedded ZooKeeper process. +Solr has a concept of node xref:deployment-guide:node-roles.adoc#roles[Roles] that could be leveraged to establish a set of Solr nodes that run embedded ZooKeeper, and then a larger set of Solr nodes that connect to those ZooKeepers. We currently have the concept of "data" nodes that host shards and replicas, we can introduce a "zookeeper" node that also runs the embedded ZooKeeper process. This will work well as you grow from six to 12 nodes in your cluster. @@ -384,14 +386,14 @@ These days, running embedded ZooKeeper is just fine, and that eliminates the mai == What about Embedding Solr in my Java Application? -Yes, there is embedded Solr. YMMV. +Embedded Solr is used extensively in Solr's own unit testing strategy. +It's also been used to build dedicated index create processes using Spark. +YMMV. -== What about Massive Multi-Tenant Use Cases? +== What about [YOUR SPECIFIC NEED] -NOTE: Paging David Smiley...? +There are Solr use cases that require extreme scaling on certain specific axis, wehter that is a massive multi-tenant use case, extreme query load, or extreme ingestion performance. -Running thousands of collections, each representing it's own tenant? -Yeah, there are some things you need to do: +Each of these requirements will bring it's own specific best practices that you will need to embrace, and have their own impact on how you deploy Solr. -* Item 1 -* Item 2 +Learn more on xref:optimize-extreme-use-cases.adoc[Optimizing for Extreme Use Cases] page. diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/optimize-extreme-use-cases.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/optimize-extreme-use-cases.adoc new file mode 100644 index 000000000000..b7a36fd23d38 --- /dev/null +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/optimize-extreme-use-cases.adoc @@ -0,0 +1,35 @@ += Meeting Extreme Use Cases +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +This section embodies the Solr community's thoughts on best practices for supporting extreme use use cases. + +NOTE: Something about the various directions you can scale... David Smiley had some good words. +Query load. Index Load. Number of Collections. Density of Data (Vectors). Massive Multi-Tenant. + + + +== What about Massive Multi-Tenant Use Cases? + +NOTE: Paging David Smiley...? + +Running thousands of collections, each representing it's own tenant? +Yeah, there are some things you need to do: + +* Item 1 +* Item 2 From db950002ed91d76e1355dc1ed261de2adb46aca6 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Sun, 14 Sep 2025 16:23:23 -0500 Subject: [PATCH 12/21] Reviewed with Mike Drob, DAvid Smiley, Kevin Risdan, and Jason G. --- .../deployment-guide/deployment-nav.adoc | 1 + .../pages/deployment-topology-overview.adoc | 144 +++++------------- 2 files changed, 38 insertions(+), 107 deletions(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc index 85994677ed3d..7fae9c9bfc95 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc @@ -55,6 +55,7 @@ ** User-Managed Clusters *** xref:user-managed-index-replication.adoc[] *** xref:user-managed-distributed-search.adoc[] +** xref:optimize-extreme-use-cases.adoc[] * Monitoring Solr ** xref:configuring-logging.adoc[] diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc index d3ae3d1a6ed2..92b955808c87 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc @@ -26,34 +26,28 @@ The site https://magjac.com/graphviz-visual-editor/ allows you to play with thos This section embodies the Solr community's thoughts on best practices for deploying Solr depending on your needs. -NOTE: Something about the various directions you can scale... David Smiley had some good words. -Query load. Index Load. Number of Collections. Density of Data (Vectors). - - == Overview -There is a logical progression to scaling Solr. +There is a logical progression of toplogies for scaling Solr based on the number of nodes you anticipate needing. [graphviz] .... -digraph { - node [style=rounded] +graph { + node [style=rounded, shape=circle, fixedsize=true, width=2, height=2] //layout=circo overlap=false - //rankdir=LR; // Set the direction to left to right + rankdir=LR; // Set the direction to left to right node1 [label="1. Single \nNode"] - node2 [label="2. Nodes \nw/ Replication"] - node3 [label="2. Nodes \nw/ Embedded ZK"] - node5 [label="4. roles=data,zookeeper"] - node6 [label="5. External \nZK Ensemble"] - node7 [label="6. Solr Operator\n + Kubernetes"] - - node1 -> node2 - node2 -> node5 - node1 -> node3 - node3 -> node5 - node5 -> node6 - node6 -> node7 - + node2 [label="2. Two Nodes"] + node3 [label="3. Three or Five Nodes"] + node4 [label="4. Six to Twelve\nNodes"] + node5 [label="5. Twelve to \nTwenty Five\nNodes"] + node6 [label="6. Twenty Six \nand Beyond"] + + node1 -- node2 + node2 -- node3 + node3 -- node4 + node4 -- node5 + node5 -- node6 } .... @@ -79,8 +73,6 @@ digraph single_node { Use this approach when: -* You have minimal load -* You can restart Solr and reindex your data quickly * You are just playing around * You aren't worried about HA or Failover * You want the simplest deployment approach. @@ -90,81 +82,9 @@ Use this approach when: The next most common setup after a single node is having two seperate nodes running on seperate machines, with one as the xref:cluster-types.adoc#leaders[Leader] and the other as the Follower. -There are two approaches that you can take, one that uses loosely coupled Solr nodes with embedded ZooKeepers, and one with a shared ZooKeeper. Both of these work just fine if you only need a single xref:cluster-types.adoc#shards[Shard] to store your data. If you need multiple Shards for your data volume, skip down below. - -==== Loosely coupled Solr Nodes - -The first is using replication to copy complete Lucene segments over from the Leader to the Followers. -This allows you to run two completely independent Solr nodes and copy the data over. -See the xref:user-managed-index-replication.adoc[User Managed Index Replication] page to learn more about setting this up. - -NOTE: Need to update user-managed-index-replication.adoc to talk about doing this when embedded zk is set up. - -NOTE: Reference https://github.com/apache/solr/pull/1875 - -[graphviz] -.... -digraph leader_follower_replication { - node [style=rounded] - leader [shape=box] - follower [fillcolor=yellow, style="rounded,filled"] - - leader -> follower -} -.... - -You can get even fancier with this, by introducing the concept of Repeater nodes. - -[graphviz] -.... -digraph leader_repeater_follower_replication { - node [style=rounded] - leader [shape=box] - repeater [fillcolor=yellow, style="rounded,filled"] - follower [shape=box] - - leader -> repeater -> follower -} -.... - -And even multiple followers: - -[graphviz] -.... -digraph leader_repeater_followers_replication { - node [style=rounded] - leader [shape=box] - repeater [shape=box] - follower1 [fillcolor=yellow, style="rounded,filled"] - follower2 [fillcolor=yellow, style="rounded,filled"] - follower3 [fillcolor=yellow, style="rounded,filled"] - - leader -> repeater - repeater -> follower1 - repeater -> follower2 - repeater -> follower3 -} -.... - -Use these approaches when: +NOTE: This needs Jason's https://github.com/apache/solr/pull/2391 to get to done done and be able to recommend this! Or, you start one solr with zk and the other connects. that is today. -* You want each Solr node to be completely independent in state. No shared ZooKeeper for managing interactions. -* You don't need any kind of realtime/near real time updates. -* You potentially have a slow network boundary between your nodes, and want something robust between them. -* All your updates can go to the leader node. - -Some con's to this approach are: - -* This is pull based, so the segments are pulled by the bottom node from each node above them, which introduces latency and potential for slightly different views of the data in the Leader and the various Followers. -* You need to set up via various API calls all the interactions between the various nodes. - -==== Embedded ZooKeeper Ensemble Setup - -NOTE: This needs Jason's https://github.com/apache/solr/pull/2391 to get to done done! - -The second approach you can take is to use a simple ZooKeeper xref:getting-started:solr-glossary.adoc#ensemble[Ensemble] setup. You can start a pair of Solrs and have their embedded ZooKeeper join each other to form an Ensemble. And yes, I hear you when you say "this isn't an odd number and ZK quorums should be an odd number to avoid split brain etc." - -NOTE: What is the difference between failover and high availability? +This leverages a simplistic ZooKeeper xref:getting-started:solr-glossary.adoc#ensemble[Ensemble] setup. You can start a pair of Solrs and have their embedded ZooKeeper join each other to form an Ensemble. And yes, I hear you when you say "this isn't an odd number and ZK quorums should be an odd number to avoid split brain etc." [graphviz] .... @@ -186,7 +106,13 @@ Use this approach when: * This approach is for when you want failover, but you aren't worried about high availability. You have a load balancer in front of the two Solr nodes and it notices one goes away and balances traffic to the other one for querying. * You will deal with the fall out to indexing if one of the nodes goes away. -You can then scale this up to multiple Solr's: +=== Moving to High Availability + +NOTE: What is the difference between failover and high availability? Fail over, in this two node situation, means that you can still issue queries and keep your application running, but you no longer can index data because ZooKeeper can't form a quorum. Often known as Split Brain. High Availability means that your service continues un interruped in the event of any node going down. + +You can then scale beyond two nodes by running either three or five Solr's: + +NOTE: We specify three or five Solr nodes because ZooKeeper is running on each node, and requires a odd number of nodes to prevent Split Brain issues. [graphviz] .... @@ -229,9 +155,13 @@ This will work well as you grow from six to 12 nodes in your cluster. [graphviz] .... graph simple_embedded_zk_ensemble { + + //size="5,5" node [style=rounded] layout=circo overlap=false + nodesep=0.3 + ratio=fill; node1 [shape=box, label="data, zookeeper", fillcolor=yellow, style="rounded,filled"] node2 [shape=box, label="data, zookeeper", fillcolor=yellow, style="rounded,filled"] node3 [shape=box, label="data, zookeeper", fillcolor=yellow, style="rounded,filled"] @@ -256,7 +186,7 @@ graph simple_embedded_zk_ensemble { } .... -=== A dedicated ZooKeeper ensemble +=== Seperating out ZooKeeper workload As your load in the cluster goes up, sharing ZooKeeper workloads with Solr workloads may become a bottleneck. @@ -305,8 +235,6 @@ Some con's to this approach are: === Going massive means going Kubernetes -NOTE: What would a diagram look like? How to show the operator? - Beyond 25 nodes, you really need to think about more advanced tooling for managing all your nodes. [graphviz] @@ -318,7 +246,7 @@ graph kubernetes_setup { layout=fdp pack=1 - Operator [fillcolor=aqua, style="filled"] + "Solr Operator" [fillcolor=aqua, style="filled"] zk1 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] zk2 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] @@ -326,7 +254,7 @@ graph kubernetes_setup { subgraph clusterKubernetes { - Operator; + "Solr Operator"; subgraph clusterSolr { node1 node2 @@ -373,11 +301,13 @@ graph kubernetes_setup { Use this approaches when: -* You go beyond 24 Solr nodes. +* You go beyond 25 Solr nodes. +* You have the maturity to manage massive data sets. +* You may adopt this earlier if you are already a Kubernetes savvy organization. Some con's to this approach are: -* Kubernetes is much like Oregon Trail, take a friend. +* Kubernetes is much like traveling on the Oregon Trail, take a friend. == What about User Managed Solr? @@ -386,8 +316,8 @@ These days, running embedded ZooKeeper is just fine, and that eliminates the mai == What about Embedding Solr in my Java Application? -Embedded Solr is used extensively in Solr's own unit testing strategy. -It's also been used to build dedicated index create processes using Spark. +{solr-javadocs}/core/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.html[Embedded Solr] is used extensively in Solr's own unit testing strategy. +It's also frequently used to build dedicated indexes in distributed systems like Spark. YMMV. == What about [YOUR SPECIFIC NEED] From 437f8ab3a18d1a80eeb211d99f6fca1737e4db2b Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Thu, 9 Oct 2025 11:15:31 -0400 Subject: [PATCH 13/21] Remove as it's just not ready to move forward with. --- .../deployment-guide/deployment-nav.adoc | 2 -- .../pages/optimize-extreme-use-cases.adoc | 35 ------------------- 2 files changed, 37 deletions(-) delete mode 100644 solr/solr-ref-guide/modules/deployment-guide/pages/optimize-extreme-use-cases.adoc diff --git a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc index 7fae9c9bfc95..3f6f525f9650 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc @@ -23,7 +23,6 @@ ** xref:system-requirements.adoc[] ** xref:installing-solr.adoc[] ** xref:taking-solr-to-production.adoc[] -** xref:optimize-extreme-use-cases.adoc[] ** xref:jvm-settings.adoc[] ** xref:upgrading-a-solr-cluster.adoc[] ** xref:backup-restore.adoc[] @@ -55,7 +54,6 @@ ** User-Managed Clusters *** xref:user-managed-index-replication.adoc[] *** xref:user-managed-distributed-search.adoc[] -** xref:optimize-extreme-use-cases.adoc[] * Monitoring Solr ** xref:configuring-logging.adoc[] diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/optimize-extreme-use-cases.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/optimize-extreme-use-cases.adoc deleted file mode 100644 index b7a36fd23d38..000000000000 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/optimize-extreme-use-cases.adoc +++ /dev/null @@ -1,35 +0,0 @@ -= Meeting Extreme Use Cases -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - - -This section embodies the Solr community's thoughts on best practices for supporting extreme use use cases. - -NOTE: Something about the various directions you can scale... David Smiley had some good words. -Query load. Index Load. Number of Collections. Density of Data (Vectors). Massive Multi-Tenant. - - - -== What about Massive Multi-Tenant Use Cases? - -NOTE: Paging David Smiley...? - -Running thousands of collections, each representing it's own tenant? -Yeah, there are some things you need to do: - -* Item 1 -* Item 2 From 66917b6005b22b1ede65f40aae69ba66d794e4d7 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Thu, 9 Oct 2025 11:15:40 -0400 Subject: [PATCH 14/21] Text review --- .../pages/deployment-topology-overview.adoc | 56 ++++++++++--------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc index 92b955808c87..6e751760fee9 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc @@ -27,7 +27,7 @@ The site https://magjac.com/graphviz-visual-editor/ allows you to play with thos This section embodies the Solr community's thoughts on best practices for deploying Solr depending on your needs. == Overview -There is a logical progression of toplogies for scaling Solr based on the number of nodes you anticipate needing. +There is a logical progression of topologies for scaling Solr based on the number of nodes you anticipate needing. [graphviz] .... @@ -80,7 +80,7 @@ Use this approach when: === Introducing Fail Over -The next most common setup after a single node is having two seperate nodes running on seperate machines, with one as the xref:cluster-types.adoc#leaders[Leader] and the other as the Follower. +The next most common setup after a single node is having two separate nodes running on separate machines, with one as the xref:cluster-types.adoc#leaders[Leader] and the other as the Follower. NOTE: This needs Jason's https://github.com/apache/solr/pull/2391 to get to done done and be able to recommend this! Or, you start one solr with zk and the other connects. that is today. @@ -103,16 +103,16 @@ graph simple_embedded_zk_ensemble { Use this approach when: * You have only two Solr nodes and they are close to each other in network terms. -* This approach is for when you want failover, but you aren't worried about high availability. You have a load balancer in front of the two Solr nodes and it notices one goes away and balances traffic to the other one for querying. -* You will deal with the fall out to indexing if one of the nodes goes away. +* This approach is for when you want failover, but you aren't worried about high availability. You have a load balancer in front of the two Solr nodes that detects when one goes down and redirects traffic to the remaining node for querying. +* You are prepared to handle the consequences for indexing operations if one of the nodes goes down. === Moving to High Availability -NOTE: What is the difference between failover and high availability? Fail over, in this two node situation, means that you can still issue queries and keep your application running, but you no longer can index data because ZooKeeper can't form a quorum. Often known as Split Brain. High Availability means that your service continues un interruped in the event of any node going down. +NOTE: What is the difference between failover and high availability? Failover, in this two-node situation, means that you can still issue queries and keep your application running, but you can no longer index data because ZooKeeper can't form a quorum. This is often referred to as a Split Brain situation. High Availability means that your service continues uninterrupted in the event of any node going down. -You can then scale beyond two nodes by running either three or five Solr's: +You can then scale beyond two nodes by running either three or five Solr nodes: -NOTE: We specify three or five Solr nodes because ZooKeeper is running on each node, and requires a odd number of nodes to prevent Split Brain issues. +NOTE: We specify three or five Solr nodes because ZooKeeper is running on each node, and requires an odd number of nodes to prevent Split Brain issues. [graphviz] .... @@ -135,20 +135,20 @@ graph simple_embedded_zk_ensemble { Use these approaches when: -* You want to be able to split your logical Collection across multiple Shards. You want to be able to distribute Replicas around the cluster. -* You don't want to go through the effort of deploying a seperate ZK ensemble independently. And honestly, you don't need to either. +* You want to be able to split your logical Collection across multiple Shards and distribute Replicas around the cluster. +* You don't want to go through the effort of deploying a separate ZK ensemble independently. And honestly, you don't need to either. -Some con's to this approach are: +Some cons to this approach are: -* Having five ZKs all updating each other is fine, but it starts to break down if you went to 9 or 11 ZooKeeper forming the Quorum. -* We currently don't have any flexible resizing of the quorum. You kind of just have to pick it. +* Having five ZooKeeper instances all updating each other is fine, but it starts to break down if you expand to 9 or 11 ZooKeeper instances forming the Quorum. +* We currently don't have any flexible resizing of the quorum. You need to select the appropriate size when setting up your cluster. === Moving Beyond the Basic Cluster NOTE: This isn't yet fleshed out as to how it works! -Solr has a concept of node xref:deployment-guide:node-roles.adoc#roles[Roles] that could be leveraged to establish a set of Solr nodes that run embedded ZooKeeper, and then a larger set of Solr nodes that connect to those ZooKeepers. We currently have the concept of "data" nodes that host shards and replicas, we can introduce a "zookeeper" node that also runs the embedded ZooKeeper process. +Solr has a concept of node xref:deployment-guide:node-roles.adoc#roles[Roles] that can be leveraged to establish a set of Solr nodes that run embedded ZooKeeper, and then a larger set of Solr nodes that connect to those ZooKeepers. We currently have the concept of "data" nodes that host shards and replicas, and we can introduce a "zookeeper" node that also runs the embedded ZooKeeper process. This will work well as you grow from six to 12 nodes in your cluster. @@ -186,7 +186,7 @@ graph simple_embedded_zk_ensemble { } .... -=== Seperating out ZooKeeper workload +=== Separating out ZooKeeper workload As your load in the cluster goes up, sharing ZooKeeper workloads with Solr workloads may become a bottleneck. @@ -225,13 +225,13 @@ graph dedicate_zk_ensemble { Use this approach when: * You go beyond 12 Solr nodes up to 25 Solr nodes. -* You are leveraging all the features of SolrCloud to support multiple collections and different types of query and load characteritics, especially tuning shard and replica counts. +* You are leveraging all the features of SolrCloud to support multiple collections and different types of query and load characteristics, especially tuning shard and replica counts. * You may need to move to five ZooKeepers. -Some con's to this approach are: +Some cons to this approach are: -* You are responsible for configuring the external ZooKeeper ensemble. -* If you have any issues with the ZooKeeper ensemble then you need to define how you will handle failover/HA. +* You are responsible for configuring and maintaining the external ZooKeeper ensemble. +* You need to define how you will handle failover/HA for the ZooKeeper ensemble itself. === Going massive means going Kubernetes @@ -299,20 +299,22 @@ graph kubernetes_setup { } .... -Use this approaches when: +Use this approach when: -* You go beyond 25 Solr nodes. -* You have the maturity to manage massive data sets. -* You may adopt this earlier if you are already a Kubernetes savvy organization. +* You need to deploy more than 25 Solr nodes. +* You have the operational maturity to manage massive data sets. +* You want a standardized approach to deployment, scaling, and management. +* You may adopt this earlier if you are already a Kubernetes-savvy organization. Some con's to this approach are: -* Kubernetes is much like traveling on the Oregon Trail, take a friend. +* Kubernetes has a steep learning curve; it's advisable to have experienced team members or consultants. +* Managing stateful applications like Solr in Kubernetes requires careful planning for persistence and recovery. == What about User Managed Solr? -The User Managed mode is no longer recommended, and historically was mostly used because running embedded ZooKeeper was viewed as difficult. -These days, running embedded ZooKeeper is just fine, and that eliminates the main reason for User Managed. User Managed also doesn't support all the features and APIs that SolrCloud supports. +The User Managed mode is no longer recommended. Historically, it was primarily used because running embedded ZooKeeper was viewed as difficult. +These days, running embedded ZooKeeper is straightforward, eliminating the main reason for User Managed deployments. Additionally, User Managed mode doesn't support all the features and APIs that SolrCloud provides. == What about Embedding Solr in my Java Application? @@ -322,8 +324,8 @@ YMMV. == What about [YOUR SPECIFIC NEED] -There are Solr use cases that require extreme scaling on certain specific axis, wehter that is a massive multi-tenant use case, extreme query load, or extreme ingestion performance. +There are Solr use cases that require extreme scaling on certain specific axes, whether that is a massive multi-tenant use case, extreme query load, or extreme ingestion performance. -Each of these requirements will bring it's own specific best practices that you will need to embrace, and have their own impact on how you deploy Solr. +Each of these requirements will bring its own specific best practices that you will need to embrace, and have their own impact on how you deploy Solr. Learn more on xref:optimize-extreme-use-cases.adoc[Optimizing for Extreme Use Cases] page. From 4ebdc6ee6eff8be4720984a54994799dca261952 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Thu, 9 Oct 2025 12:17:13 -0400 Subject: [PATCH 15/21] Revamps. Down to one specific "TBD" that prevents us from merging. --- .../pages/deployment-topology-overview.adoc | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc index 6e751760fee9..43b2ed99ec24 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc @@ -27,7 +27,7 @@ The site https://magjac.com/graphviz-visual-editor/ allows you to play with thos This section embodies the Solr community's thoughts on best practices for deploying Solr depending on your needs. == Overview -There is a logical progression of topologies for scaling Solr based on the number of nodes you anticipate needing. +There is a logical progression of topologies for scaling Solr based on the number of nodes you anticipate needing to meet your indexing needs, querying needs, and storage needs. [graphviz] .... @@ -54,8 +54,8 @@ graph { == Solr from smallest to largest -When we start up Solr on our computer, we're already starting Solr with the underpinnings required to let Solr scale in a smooth fashion, the coordination library ZooKeeper. -ZooKeeper is the unifying technology that supports maintaining state from a single node up to many 1000's of nodes. +When we start Solr, we're already starting Solr with the underpinnings required to let Solr scale in a smooth fashion, the coordination library ZooKeeper. +ZooKeeper is the unifying technology that supports maintaining state from a single node up to many 100's of nodes. === Simplest Setup @@ -73,8 +73,8 @@ digraph single_node { Use this approach when: -* You are just playing around -* You aren't worried about HA or Failover +* You are just playing around with Solr. +* You aren't worried about High Availability or Failover. * You want the simplest deployment approach. @@ -82,37 +82,38 @@ Use this approach when: The next most common setup after a single node is having two separate nodes running on separate machines, with one as the xref:cluster-types.adoc#leaders[Leader] and the other as the Follower. -NOTE: This needs Jason's https://github.com/apache/solr/pull/2391 to get to done done and be able to recommend this! Or, you start one solr with zk and the other connects. that is today. - -This leverages a simplistic ZooKeeper xref:getting-started:solr-glossary.adoc#ensemble[Ensemble] setup. You can start a pair of Solrs and have their embedded ZooKeeper join each other to form an Ensemble. And yes, I hear you when you say "this isn't an odd number and ZK quorums should be an odd number to avoid split brain etc." +This leverages a simplistic ZooKeeper xref:getting-started:solr-glossary.adoc#ensemble[Ensemble] setup. +You start the first Solr node with the embedded ZooKeeper, and then the second Solr is set to connect to the ZooKeeper embedded in the first Solr node. [graphviz] .... graph simple_embedded_zk_ensemble { node [style=rounded] layout=neato - node1 [shape=box] - node2 [shape=box] + node1 [shape=record, label="{ solr1 | zk }"] + node2 [shape=box, label="solr2"] node1 -- node2 node2 -- node1 } .... - Use this approach when: -* You have only two Solr nodes and they are close to each other in network terms. -* This approach is for when you want failover, but you aren't worried about high availability. You have a load balancer in front of the two Solr nodes that detects when one goes down and redirects traffic to the remaining node for querying. +* You want failover, but you aren't worried about high availability. You have a load balancer in front of the two Solr nodes that detects when one goes down and redirects traffic to the remaining node for querying. * You are prepared to handle the consequences for indexing operations if one of the nodes goes down. === Moving to High Availability -NOTE: What is the difference between failover and high availability? Failover, in this two-node situation, means that you can still issue queries and keep your application running, but you can no longer index data because ZooKeeper can't form a quorum. This is often referred to as a Split Brain situation. High Availability means that your service continues uninterrupted in the event of any node going down. +You can then scale beyond two nodes by running either three or five Solr nodes. +We specify three or five Solr nodes because ZooKeeper is running on every single node, and therefore requires an odd number of nodes in order to prevent Split Brain issues in the cluster. + + +NOTE: This needs Jason's https://github.com/apache/solr/pull/2391 to get to done done and be able to recommend this! Or, you start one solr with zk and the other connects. that is today. + +NOTE: What is the difference between failover and high availability? Failover, in the two-node situation, means that you can still issue queries and keep your application running, but you can no longer index data because ZooKeeper can't form a quorum. This is often referred to as a Split Brain situation. High Availability means that all aspects of your service continues uninterrupted in the event of any node going down. -You can then scale beyond two nodes by running either three or five Solr nodes: -NOTE: We specify three or five Solr nodes because ZooKeeper is running on each node, and requires an odd number of nodes to prevent Split Brain issues. [graphviz] .... @@ -137,11 +138,12 @@ Use these approaches when: * You want to be able to split your logical Collection across multiple Shards and distribute Replicas around the cluster. * You don't want to go through the effort of deploying a separate ZK ensemble independently. And honestly, you don't need to either. +* You want true High Availability. With three nodes, you can lose one and continue. With five nodes you can lose two nodes and still continue. Some cons to this approach are: -* Having five ZooKeeper instances all updating each other is fine, but it starts to break down if you expand to 9 or 11 ZooKeeper instances forming the Quorum. +* Having five ZooKeeper instances all updating each other is fine, but it starts to break down if you expand to 7 or 9 ZooKeeper instances forming the Quorum. * We currently don't have any flexible resizing of the quorum. You need to select the appropriate size when setting up your cluster. === Moving Beyond the Basic Cluster @@ -189,8 +191,7 @@ graph simple_embedded_zk_ensemble { === Separating out ZooKeeper workload As your load in the cluster goes up, sharing ZooKeeper workloads with Solr workloads may become a bottleneck. - -NOTE: I wonder if this ever goes away by just having Solr nodes with the role `zookeeper` only? +At this point you may want to run distinct seperate ZooKeeper nodes on their own servers. [graphviz] .... @@ -226,7 +227,7 @@ Use this approach when: * You go beyond 12 Solr nodes up to 25 Solr nodes. * You are leveraging all the features of SolrCloud to support multiple collections and different types of query and load characteristics, especially tuning shard and replica counts. -* You may need to move to five ZooKeepers. +* You may need to move to five ZooKeepers in their own setup to support traffic. Some cons to this approach are: @@ -235,7 +236,11 @@ Some cons to this approach are: === Going massive means going Kubernetes -Beyond 25 nodes, you really need to think about more advanced tooling for managing all your nodes. +Beyond 25 nodes, you really need to think about more advanced tooling for managing all your nodes. +We discourage rolling your own Zookeeper orchestration, as there are many pitfalls. +Instead, use a well-supported container orchestrator with support for Solr and Zookeeper. +For Kubernetes, we provide the https://solr.apache.org/operator/[Solr Operator] sub project. +There are also 3rd party Helm charts available. [graphviz] .... @@ -302,7 +307,7 @@ graph kubernetes_setup { Use this approach when: * You need to deploy more than 25 Solr nodes. -* You have the operational maturity to manage massive data sets. +* You have the operational maturity to manage massive data sets and fleets of Kubernetes pods. * You want a standardized approach to deployment, scaling, and management. * You may adopt this earlier if you are already a Kubernetes-savvy organization. @@ -313,13 +318,15 @@ Some con's to this approach are: == What about User Managed Solr? -The User Managed mode is no longer recommended. Historically, it was primarily used because running embedded ZooKeeper was viewed as difficult. -These days, running embedded ZooKeeper is straightforward, eliminating the main reason for User Managed deployments. Additionally, User Managed mode doesn't support all the features and APIs that SolrCloud provides. +The User Managed mode is no longer recommended. Historically, it was primarily used because running a seperate ZooKeeper cluster was viewed as difficult and expensive. +These days, running an embedded ZooKeeper inside of your Solr node is straightforward, eliminating the main reason for User Managed deployments. +Additionally, User Managed mode doesn't support all the features and APIs that SolrCloud provides. == What about Embedding Solr in my Java Application? {solr-javadocs}/core/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.html[Embedded Solr] is used extensively in Solr's own unit testing strategy. -It's also frequently used to build dedicated indexes in distributed systems like Spark. +It's also frequently used to build dedicated indexes in distributed systems like Spark. +However, it means that your application's dependencies are intertwined with Solr's dependencies, and that the primary focus of the Solr community is to deliver a standalone search engine, not a library. YMMV. == What about [YOUR SPECIFIC NEED] @@ -327,5 +334,3 @@ YMMV. There are Solr use cases that require extreme scaling on certain specific axes, whether that is a massive multi-tenant use case, extreme query load, or extreme ingestion performance. Each of these requirements will bring its own specific best practices that you will need to embrace, and have their own impact on how you deploy Solr. - -Learn more on xref:optimize-extreme-use-cases.adoc[Optimizing for Extreme Use Cases] page. From 8aea29499f210a265ff815c046c5736cb15d1d9d Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 13 Oct 2025 18:59:49 -0400 Subject: [PATCH 16/21] Add some internal linking --- .../deployment-guide/pages/taking-solr-to-production.adoc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc index 78676585300f..545285f8668d 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc @@ -265,7 +265,9 @@ ZK_HOST=zk1,zk2,zk3 When the `ZK_HOST` variable is set, Solr will launch and connect to the defined ZooKeepers instead of starting an embedded ZooKeeper. -See xref:zookeeper-ensemble[ZooKeeper Ensemble Configuration] for more on setting up ZooKeeper. +See xref:zookeeper-ensemble.adoc[Zookeeper Ensemble] for more on setting up ZooKeeper. + +See xref:deployment-topology-overview.adoc[Deployment Topology Overview] for more details on different approaches for deployment. ==== ZooKeeper chroot From 314cb99d9172f87b9f2b2d181e024e3f91111901 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 13 Oct 2025 19:00:04 -0400 Subject: [PATCH 17/21] Remove editorial content. --- .../modules/deployment-guide/pages/zookeeper-ensemble.adoc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/zookeeper-ensemble.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/zookeeper-ensemble.adoc index 5990621f97db..54e8d796a3d2 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/zookeeper-ensemble.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/zookeeper-ensemble.adoc @@ -19,12 +19,7 @@ Although Solr comes bundled with http://zookeeper.apache.org[Apache ZooKeeper], depending on your scaling needs you may need to use a external ZooKeeper setup in production. -ERIC: this is editorial content that should be moved. -While using Solr's embedded ZooKeeper instance is fine for smaller setups, you shouldn't use this in production because it does not provide any failover: if the Solr instance that hosts ZooKeeper shuts down, ZooKeeper is also shut down. -Any shards or Solr instances that rely on it will not be able to communicate with it or each other. - -The solution to this problem is to set up an external ZooKeeper _ensemble_, which is a number of servers running ZooKeeper that communicate with each other to coordinate the activities of the cluster. -ERIC: End editorial. +See xref:deployment-topology-overview[Deployment Topology Overview] for more information on different approaches. == How Many ZooKeeper Nodes? From 56f913682b6535b4d7d78a4d44f242d7a9ffe732 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 13 Oct 2025 19:20:07 -0400 Subject: [PATCH 18/21] Provide specific details for testing out the Failover approach with two Solr nodes. --- .../pages/deployment-topology-overview.adoc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc index 43b2ed99ec24..3a142ab937a6 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc @@ -98,18 +98,27 @@ graph simple_embedded_zk_ensemble { } .... +This is basically what you are setting up when you run the `bin/solr start -e cloud` cloud example. + +``` +mkdir -p ./two_nodes/node1/solr +mkdir -p ./two_nodes/node2/solr +bin/solr start -p 8983 --solr-home "./two_nodes/node1/solr" +bin/solr start -p 8984 --solr-home "./two_nodes/node2/solr" -z 127.0.0.1:9983 +``` + Use this approach when: * You want failover, but you aren't worried about high availability. You have a load balancer in front of the two Solr nodes that detects when one goes down and redirects traffic to the remaining node for querying. * You are prepared to handle the consequences for indexing operations if one of the nodes goes down. -=== Moving to High Availability +=== Adding High Availability You can then scale beyond two nodes by running either three or five Solr nodes. We specify three or five Solr nodes because ZooKeeper is running on every single node, and therefore requires an odd number of nodes in order to prevent Split Brain issues in the cluster. -NOTE: This needs Jason's https://github.com/apache/solr/pull/2391 to get to done done and be able to recommend this! Or, you start one solr with zk and the other connects. that is today. +NOTE: This approach requires https://github.com/apache/solr/pull/2391 to be completed! We are leaving it in the Ref Guide as a pointer to the future. NOTE: What is the difference between failover and high availability? Failover, in the two-node situation, means that you can still issue queries and keep your application running, but you can no longer index data because ZooKeeper can't form a quorum. This is often referred to as a Split Brain situation. High Availability means that all aspects of your service continues uninterrupted in the event of any node going down. From 5f6090f207852cbb6ebe2f968391d8f58a5f197f Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 28 Jan 2026 11:24:59 -0500 Subject: [PATCH 19/21] Replace kroki with mermaid js based charts Removes a dependency on an external server for generating the image. --- gradle/libs.versions.toml | 4 +- solr/solr-ref-guide/build.gradle | 10 +- .../pages/deployment-topology-overview.adoc | 330 +++++++++--------- solr/solr-ref-guide/playbook.template.yml | 6 +- 4 files changed, 171 insertions(+), 179 deletions(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index a118b18c7306..be8af1b6ec9e 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -45,8 +45,8 @@ apache-tomcat = "6.0.53" apache-zookeeper = "3.9.4" # @keep for version alignment apiguardian = "1.1.2" -# @keep Asciidoctor kroki version used in ref-guide -asciidoctor-kroki = "0.18.1" +# @keep Antora mermaid extension version used in ref-guide +antora-mermaid-extension = "0.0.8" # @keep Asciidoctor mathjax version used in ref-guide asciidoctor-mathjax = "0.0.9" # @keep Asciidoctor tabs version used in ref-guide diff --git a/solr/solr-ref-guide/build.gradle b/solr/solr-ref-guide/build.gradle index 38e6ffc4252c..1473f79a4c54 100644 --- a/solr/solr-ref-guide/build.gradle +++ b/solr/solr-ref-guide/build.gradle @@ -245,12 +245,12 @@ task downloadAsciidoctorMathjaxExtension(type: NpmTask) { outputs.dir("${project.ext.nodeProjectDir}/node_modules/@djencks/asciidoctor-mathjax") } -task downloadAsciiDoctorKrokiExtension(type: NpmTask) { +task downloadAntoraMermaidExtension(type: NpmTask) { group = 'Build Dependency Download' - args = ["install", "asciidoctor-kroki"] + args = ["install", "@sntke/antora-mermaid-extension@${libs.versions.antora.mermaid.extension.get()}"] - inputs.property("asciidoctor-kroki version", libs.versions.asciidoctor.kroki.get()) - outputs.dir("${project.ext.nodeProjectDir}/node_modules/asciidoctor-kroki") + inputs.property("antora-mermaid-extension version", libs.versions.antora.mermaid.extension.get()) + outputs.dir("${project.ext.nodeProjectDir}/node_modules/@sntke/antora-mermaid-extension") } task downloadAsciidoctorTabsExtension(type: NpmTask) { @@ -268,7 +268,7 @@ task downloadAntora { dependsOn tasks.downloadAntoraCli dependsOn tasks.downloadAntoraSiteGenerator dependsOn tasks.downloadAntoraLunrExtension - dependsOn tasks.downloadAsciiDoctorKrokiExtension + dependsOn tasks.downloadAntoraMermaidExtension dependsOn tasks.downloadAsciidoctorMathjaxExtension dependsOn tasks.downloadAsciidoctorTabsExtension } diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc index 3a142ab937a6..8b273a22736b 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc @@ -21,7 +21,7 @@ This page has a number of graphs to help you visualize different Solr deployment The graphs are developed using Mermaid syntax. -The site https://magjac.com/graphviz-visual-editor/ allows you to play with those graphs in real time. +The site https://mermaid.live/ allows you to play with those diagrams in real time. //// This section embodies the Solr community's thoughts on best practices for deploying Solr depending on your needs. @@ -29,26 +29,24 @@ This section embodies the Solr community's thoughts on best practices for deploy == Overview There is a logical progression of topologies for scaling Solr based on the number of nodes you anticipate needing to meet your indexing needs, querying needs, and storage needs. -[graphviz] +[mermaid,width=100%] .... -graph { - node [style=rounded, shape=circle, fixedsize=true, width=2, height=2] - //layout=circo - overlap=false - rankdir=LR; // Set the direction to left to right - node1 [label="1. Single \nNode"] - node2 [label="2. Two Nodes"] - node3 [label="3. Three or Five Nodes"] - node4 [label="4. Six to Twelve\nNodes"] - node5 [label="5. Twelve to \nTwenty Five\nNodes"] - node6 [label="6. Twenty Six \nand Beyond"] - - node1 -- node2 - node2 -- node3 - node3 -- node4 - node4 -- node5 - node5 -- node6 -} +flowchart LR + node1((1. Single
Node)) + node2((2. Two Nodes)) + node3((3. Three or
Five Nodes)) + node4((4. Six to Twelve
Nodes)) + node5((5. Twelve to
Twenty Five
Nodes)) + node6((6. Twenty Six
and Beyond)) + + node1 --- node2 + node2 --- node3 + node3 --- node4 + node4 --- node5 + node5 --- node6 + + classDef uniform padding:20px,min-width:120px,min-height:120px + class node1,node2,node3,node4,node5,node6 uniform .... @@ -61,14 +59,10 @@ ZooKeeper is the unifying technology that supports maintaining state from a sing If you only need a single Solr node, then it's perfectly reasonable to start Solr with `bin/solr start`. You will have a single Solr node running in SolrCloud mode, with all the lovely APIs and features that SolrCloud provides. -[graphviz] +[mermaid] .... -digraph single_node { - node [style=rounded] - node1 [shape=box, fillcolor=yellow] - - node1 -} +flowchart TD + node1[node1] .... Use this approach when: @@ -85,17 +79,13 @@ The next most common setup after a single node is having two separate nodes runn This leverages a simplistic ZooKeeper xref:getting-started:solr-glossary.adoc#ensemble[Ensemble] setup. You start the first Solr node with the embedded ZooKeeper, and then the second Solr is set to connect to the ZooKeeper embedded in the first Solr node. -[graphviz] +[mermaid] .... -graph simple_embedded_zk_ensemble { - node [style=rounded] - layout=neato - node1 [shape=record, label="{ solr1 | zk }"] - node2 [shape=box, label="solr2"] - - node1 -- node2 - node2 -- node1 -} +flowchart LR + node1["solr1 | zk"] + node2[solr2] + + node1 --- node2 .... This is basically what you are setting up when you run the `bin/solr start -e cloud` cloud example. @@ -124,23 +114,23 @@ NOTE: What is the difference between failover and high availability? Failover, i -[graphviz] +[mermaid,width=100%] .... -graph simple_embedded_zk_ensemble { - node [style=rounded] - layout=neato - node1 [shape=box] - node2 [shape=box] - node3 [shape=box] - node4 [shape=box] - node5 [shape=box] - - node1 -- node2 - node2 -- node3 - node3 -- node4 - node4 -- node5 - node5 -- node1 -} +flowchart LR + node1[node1] + node2[node2] + node3[node3] + node4[node4] + node5[node5] + + node1 --- node2 + node2 --- node3 + node3 --- node4 + node4 --- node5 + node5 --- node1 + + classDef uniform padding:20px,min-width:120px,min-height:120px + class node1,node2,node3,node4,node5 uniform .... Use these approaches when: @@ -163,38 +153,36 @@ Solr has a concept of node xref:deployment-guide:node-roles.adoc#roles[Roles] th This will work well as you grow from six to 12 nodes in your cluster. -[graphviz] +[mermaid] .... -graph simple_embedded_zk_ensemble { - - //size="5,5" - node [style=rounded] - layout=circo - overlap=false - nodesep=0.3 - ratio=fill; - node1 [shape=box, label="data, zookeeper", fillcolor=yellow, style="rounded,filled"] - node2 [shape=box, label="data, zookeeper", fillcolor=yellow, style="rounded,filled"] - node3 [shape=box, label="data, zookeeper", fillcolor=yellow, style="rounded,filled"] - node4 [shape=box, label="data"] - node5 [shape=box, label="data"] - node6 [shape=box, label="data"] - node7 [shape=box, label="data"] - node8 [shape=box, label="data"] - node9 [shape=box, label="data"] - - - node1 -- node2 - node2 -- node3 - node3 -- node1 - node3 -- node4 - node4 -- node5 - node5 -- node6 - node6 -- node7 - node7 -- node8 - node8 -- node9 - node9 -- node1 -} +flowchart TB + subgraph zk[ZooKeeper Ensemble] + node1[data, zookeeper] + node2[data, zookeeper] + node3[data, zookeeper] + end + + node4[data] + node5[data] + node6[data] + node7[data] + node8[data] + node9[data] + + node1 --- node2 + node2 --- node3 + node3 --- node1 + node3 --- node4 + node4 --- node5 + node5 --- node6 + node6 --- node7 + node7 --- node8 + node8 --- node9 + node9 --- node1 + + style node1 fill:#ffff00 + style node2 fill:#ffff00 + style node3 fill:#ffff00 .... === Separating out ZooKeeper workload @@ -202,34 +190,40 @@ graph simple_embedded_zk_ensemble { As your load in the cluster goes up, sharing ZooKeeper workloads with Solr workloads may become a bottleneck. At this point you may want to run distinct seperate ZooKeeper nodes on their own servers. -[graphviz] +[mermaid] .... -graph dedicate_zk_ensemble { - node [style=rounded] - layout=osage - overlap=false - node1 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] - node2 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] - node3 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] - node4 [shape=box] - node5 [shape=box] - node6 [shape=box] - node7 [shape=box] - node8 [shape=box] - node9 [shape=box] - node10 [shape=box] - node11 [shape=box] - node12 [shape=box] - node13 [shape=box] - node14 [shape=box] - node15 [shape=box] - node16 [shape=box] - node17 [shape=box] - node18 [shape=box] - node19 [shape=box] - node20 [shape=box] - -} +flowchart TB + subgraph zk[ZooKeeper Ensemble] + zk1[zookeeper] + zk2[zookeeper] + zk3[zookeeper] + end + + subgraph solr[Solr Nodes] + node4[node4] + node5[node5] + node6[node6] + node7[node7] + node8[node8] + node9[node9] + node10[node10] + node11[node11] + node12[node12] + node13[node13] + node14[node14] + node15[node15] + node16[node16] + node17[node17] + node18[node18] + node19[node19] + node20[node20] + end + + zk --- solr + + style zk1 fill:#ffff00 + style zk2 fill:#ffff00 + style zk3 fill:#ffff00 .... Use this approach when: @@ -251,66 +245,62 @@ Instead, use a well-supported container orchestrator with support for Solr and Z For Kubernetes, we provide the https://solr.apache.org/operator/[Solr Operator] sub project. There are also 3rd party Helm charts available. -[graphviz] +[mermaid] .... -graph kubernetes_setup { - fontname="Helvetica,Arial,sans-serif" - node [fontname="Helvetica,Arial,sans-serif"] - edge [fontname="Helvetica,Arial,sans-serif"] - layout=fdp - pack=1 - - "Solr Operator" [fillcolor=aqua, style="filled"] - - zk1 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] - zk2 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] - zk3 [shape=box, label=" zookeeper", fillcolor=yellow, style="rounded,filled"] - - subgraph clusterKubernetes { - - "Solr Operator"; - subgraph clusterSolr { - node1 - node2 - node3 - node4 - node5 - node6 - node7 - node8 - node9 - node10 - node11 - node12 - node13 - node14 - node15 - node16 - node17 - node18 - node19 - node20 - node21 - node22 - node23 - node24 - node25 - node26 - node27 - node28 - node29 - node30 - - } - subgraph clusterZK { - zk1 -- zk2; - zk2 -- zk3; - zk3 -- zk1; - } - } - - clusterSolr -- clusterZK -} +flowchart TB + subgraph kubernetes[Kubernetes] + operator[Solr Operator] + + subgraph solr[Solr Pods] + node1[node1] + node2[node2] + node3[node3] + node4[node4] + node5[node5] + node6[node6] + node7[node7] + node8[node8] + node9[node9] + node10[node10] + node11[node11] + node12[node12] + node13[node13] + node14[node14] + node15[node15] + node16[node16] + node17[node17] + node18[node18] + node19[node19] + node20[node20] + node21[node21] + node22[node22] + node23[node23] + node24[node24] + node25[node25] + node26[node26] + node27[node27] + node28[node28] + node29[node29] + node30[node30] + end + + subgraph zk[ZooKeeper Ensemble] + zk1[zookeeper] + zk2[zookeeper] + zk3[zookeeper] + zk1 --- zk2 + zk2 --- zk3 + zk3 --- zk1 + end + end + + operator --> solr + solr --- zk + + style operator fill:#00ffff + style zk1 fill:#ffff00 + style zk2 fill:#ffff00 + style zk3 fill:#ffff00 .... Use this approach when: diff --git a/solr/solr-ref-guide/playbook.template.yml b/solr/solr-ref-guide/playbook.template.yml index 8393fcd8adda..72cb9e0479ee 100644 --- a/solr/solr-ref-guide/playbook.template.yml +++ b/solr/solr-ref-guide/playbook.template.yml @@ -47,11 +47,9 @@ output: asciidoc: attributes: stem: - kroki-fetch-diagram: true extensions: - '@djencks/asciidoctor-mathjax' - '@asciidoctor/tabs' - - asciidoctor-kroki runtime: fetch: true @@ -59,3 +57,7 @@ antora: extensions: - require: '@antora/lunr-extension' index_latest_only: true + - require: '@sntke/antora-mermaid-extension' + mermaid_library_url: https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.esm.min.mjs + mermaid_initialize_options: + start_on_load: true From 8cebec30810240debddc24a10ba1e3d33028b096 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 28 Jan 2026 11:32:13 -0500 Subject: [PATCH 20/21] reformatting diagrams to be clearer --- .../pages/deployment-topology-overview.adoc | 71 +++++-------------- 1 file changed, 19 insertions(+), 52 deletions(-) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc index 8b273a22736b..15e876b9b4b8 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/deployment-topology-overview.adoc @@ -153,9 +153,9 @@ Solr has a concept of node xref:deployment-guide:node-roles.adoc#roles[Roles] th This will work well as you grow from six to 12 nodes in your cluster. -[mermaid] +[mermaid,width=100%] .... -flowchart TB +flowchart LR subgraph zk[ZooKeeper Ensemble] node1[data, zookeeper] node2[data, zookeeper] @@ -190,33 +190,22 @@ flowchart TB As your load in the cluster goes up, sharing ZooKeeper workloads with Solr workloads may become a bottleneck. At this point you may want to run distinct seperate ZooKeeper nodes on their own servers. -[mermaid] +[mermaid,width=100%] .... -flowchart TB +flowchart LR subgraph zk[ZooKeeper Ensemble] + direction TB zk1[zookeeper] zk2[zookeeper] zk3[zookeeper] end subgraph solr[Solr Nodes] - node4[node4] - node5[node5] - node6[node6] - node7[node7] - node8[node8] - node9[node9] - node10[node10] - node11[node11] - node12[node12] - node13[node13] - node14[node14] - node15[node15] - node16[node16] - node17[node17] - node18[node18] - node19[node19] - node20[node20] + direction TB + node4[node4] --- node5[node5] --- node6[node6] --- node7[node7] --- node8[node8] + node9[node9] --- node10[node10] --- node11[node11] --- node12[node12] --- node13[node13] + node14[node14] --- node15[node15] --- node16[node16] --- node17[node17] --- node18[node18] + node19[node19] --- node20[node20] end zk --- solr @@ -245,46 +234,24 @@ Instead, use a well-supported container orchestrator with support for Solr and Z For Kubernetes, we provide the https://solr.apache.org/operator/[Solr Operator] sub project. There are also 3rd party Helm charts available. -[mermaid] +[mermaid,width=100%] .... flowchart TB subgraph kubernetes[Kubernetes] operator[Solr Operator] subgraph solr[Solr Pods] - node1[node1] - node2[node2] - node3[node3] - node4[node4] - node5[node5] - node6[node6] - node7[node7] - node8[node8] - node9[node9] - node10[node10] - node11[node11] - node12[node12] - node13[node13] - node14[node14] - node15[node15] - node16[node16] - node17[node17] - node18[node18] - node19[node19] - node20[node20] - node21[node21] - node22[node22] - node23[node23] - node24[node24] - node25[node25] - node26[node26] - node27[node27] - node28[node28] - node29[node29] - node30[node30] + direction TB + node1[node1] --- node2[node2] --- node3[node3] --- node4[node4] --- node5[node5] + node6[node6] --- node7[node7] --- node8[node8] --- node9[node9] --- node10[node10] + node11[node11] --- node12[node12] --- node13[node13] --- node14[node14] --- node15[node15] + node16[node16] --- node17[node17] --- node18[node18] --- node19[node19] --- node20[node20] + node21[node21] --- node22[node22] --- node23[node23] --- node24[node24] --- node25[node25] + node26[node26] --- node27[node27] --- node28[node28] --- node29[node29] --- node30[node30] end subgraph zk[ZooKeeper Ensemble] + direction TB zk1[zookeeper] zk2[zookeeper] zk3[zookeeper] From 64f1cc705d36a852aae5ad385b2e53928729f0cb Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 28 Jan 2026 11:42:29 -0500 Subject: [PATCH 21/21] this is big enough ref guide change to be worth highlighting to users --- changelog/unreleased/SOLR-17492.yml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 changelog/unreleased/SOLR-17492.yml diff --git a/changelog/unreleased/SOLR-17492.yml b/changelog/unreleased/SOLR-17492.yml new file mode 100644 index 000000000000..50ebce085598 --- /dev/null +++ b/changelog/unreleased/SOLR-17492.yml @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Introduce advice on how to scale Solr from 1 node to 100's of nodes in Ref Guide +type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Eric Pugh +links: + - name: SOLR-17492 + url: https://issues.apache.org/jira/browse/SOLR-17492