diff --git a/.github/workflows/gradle-extraction-check.yml b/.github/workflows/gradle-extraction-check.yml
new file mode 100644
index 00000000000..8a040a077b0
--- /dev/null
+++ b/.github/workflows/gradle-extraction-check.yml
@@ -0,0 +1,28 @@
+# This test covers TikaServer tests for SolrCell, since crave does not support docker yet.
+name: Extraction module tests with Docker
+
+on:
+  pull_request:
+    branches:
+      - '*'
+    paths:
+      - 'solr/modules/extraction/**'
+
+jobs:
+  test:
+    name: extraction module tests with docker
+
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    env:
+      DEVELOCITY_ACCESS_KEY: ${{ secrets.SOLR_DEVELOCITY_ACCESS_KEY }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - uses: ./.github/actions/prepare-for-build
+
+      - name: Run extraction module tests
+        run: ./gradlew --no-daemon solr:modules:extraction:check
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 14435a7242f..641de520d6b 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -140,6 +140,8 @@ jersey-containers = "2.39.1"
 # @keep for version alignment
 jetbrains-annotations = "26.0.2"
 # @keep for version alignment
+jna = "5.13.0"
+# @keep for version alignment
 joda-time = "2.14.0"
 junit = "4.13.2"
 junit-jupiter = "5.13.4"
@@ -197,6 +199,7 @@ squareup-okhttp3-okhttp = "4.12.0"
 stephenc-jcip = "1.0-1"
 swagger3 = "2.2.22"
 tdunning-tdigest = "3.3"
+testcontainers = "1.20.4"
 thetaphi-forbiddenapis = "3.10"
 thisptr-jacksonjq = "0.0.13"
 threeten-bp = "1.6.8"
@@ -427,6 +430,8 @@ jersey-media-jsonjackson = { module = "org.glassfish.jersey.media:jersey-media-j
 # @keep transitive dependency for version alignment
 jetbrains-annotations = { module = "org.jetbrains:annotations", version.ref = "jetbrains-annotations" }
 # @keep transitive dependency for version alignment
+jna = { module = "net.java.dev.jna:jna", version.ref = "jna" }
+# @keep transitive dependency for version alignment
 jodatime-jodatime = { module = "joda-time:joda-time", version.ref = "joda-time" }
 junit-junit = { module = "junit:junit", version.ref = "junit" }
 junit-jupiter = { module = "org.junit.jupiter:junit-jupiter", version.ref = "junit-jupiter" }
@@ -516,6 +521,7 @@ stephenc-jcip-annotations = { module = "com.github.stephenc.jcip:jcip-annotation
 swagger3-annotations-jakarta = { module = "io.swagger.core.v3:swagger-annotations-jakarta", version.ref = "swagger3" }
 swagger3-jaxrs2-jakarta = { module = "io.swagger.core.v3:swagger-jaxrs2-jakarta", version.ref = "swagger3" }
 tdunning-tdigest = { module = "com.tdunning:t-digest", version.ref = "tdunning-tdigest" }
+testcontainers = { module = "org.testcontainers:testcontainers", version.ref = "testcontainers" }
 thisptr-jacksonjq = { module = "net.thisptr:jackson-jq", version.ref = "thisptr-jacksonjq" }
 threeten-bp = { module = "org.threeten:threetenbp", version.ref = "threeten-bp" }
 xerces-impl = { module = "xerces:xercesImpl", version.ref = "xerces" }
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index e87696e3e8f..dcb2d427577 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -333,6 +333,10 @@ New Features
 
 * SOLR-17923: Add fullOuterJoin stream function (Andy Webb)
 
+* SOLR-7632: The Extraction Request Handler, aka Solr Cell, now supports delegating the parsing of rich documents to
+  an external Tika Server. This allows for a more stable Solr server, and easier to configure and scale parsing
+  independently. The local in-process Tika parser is now deprecated. (Jan Høydahl, Eric Pugh)
+
 Improvements
 ---------------------
 * SOLR-17860: DocBasedVersionConstraintsProcessorFactory now supports PULL replicas. (Houston Putman)
diff --git a/solr/licenses/docker-java-LICENSE-ASL.txt b/solr/licenses/docker-java-LICENSE-ASL.txt
new file mode 100644
index 00000000000..38275f2f4fe
--- /dev/null
+++ b/solr/licenses/docker-java-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [2013] [docker-java@googlegroups.com]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/solr/licenses/docker-java-NOTICE.txt b/solr/licenses/docker-java-NOTICE.txt
new file mode 100644
index 00000000000..f54dcc4f15a
--- /dev/null
+++ b/solr/licenses/docker-java-NOTICE.txt
@@ -0,0 +1,7 @@
+This product includes software developed by the docker-java project.
+
+Copyright (c) 2013, docker-java project contributors
+
+Project: https://github.com/docker-java/docker-java
+
+Licensed under the Apache License, Version 2.0.
diff --git a/solr/licenses/docker-java-api-3.4.0.jar.sha1 b/solr/licenses/docker-java-api-3.4.0.jar.sha1
new file mode 100644
index 00000000000..bf5ca0d6db4
--- /dev/null
+++ b/solr/licenses/docker-java-api-3.4.0.jar.sha1
@@ -0,0 +1 @@
+9ef23dcc93693f15e69b64632be096c38e31bc44
diff --git a/solr/licenses/docker-java-transport-3.4.0.jar.sha1 b/solr/licenses/docker-java-transport-3.4.0.jar.sha1
new file mode 100644
index 00000000000..c1232d24a6b
--- /dev/null
+++ b/solr/licenses/docker-java-transport-3.4.0.jar.sha1
@@ -0,0 +1 @@
+c058705684d782effc4b2edfdef1a87544ba4af8
diff --git a/solr/licenses/docker-java-transport-zerodep-3.4.0.jar.sha1 b/solr/licenses/docker-java-transport-zerodep-3.4.0.jar.sha1
new file mode 100644
index 00000000000..b658f8f0810
--- /dev/null
+++ b/solr/licenses/docker-java-transport-zerodep-3.4.0.jar.sha1
@@ -0,0 +1 @@
+c4ce6d8695cfdb0027872f99cc20f8f679f8a969
diff --git a/solr/licenses/duct-tape-1.0.8.jar.sha1 b/solr/licenses/duct-tape-1.0.8.jar.sha1
new file mode 100644
index 00000000000..8ccb86d64ea
--- /dev/null
+++ b/solr/licenses/duct-tape-1.0.8.jar.sha1
@@ -0,0 +1 @@
+92edc22a9ab2f3e17c9bf700aaee377d50e8b530
diff --git a/solr/licenses/duct-tape-LICENSE-MIT.txt b/solr/licenses/duct-tape-LICENSE-MIT.txt
new file mode 100644
index 00000000000..2091a63f988
--- /dev/null
+++ b/solr/licenses/duct-tape-LICENSE-MIT.txt
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014 Richard North
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/solr/licenses/jna-5.12.1.jar.sha1 b/solr/licenses/jna-5.12.1.jar.sha1
deleted file mode 100644
index 648c9d576db..00000000000
--- a/solr/licenses/jna-5.12.1.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-b1e93a735caea94f503e95e6fe79bf9cdc1e985d
diff --git a/solr/licenses/jna-5.13.0.jar.sha1 b/solr/licenses/jna-5.13.0.jar.sha1
new file mode 100644
index 00000000000..93b456b9293
--- /dev/null
+++ b/solr/licenses/jna-5.13.0.jar.sha1
@@ -0,0 +1 @@
+1200e7ebeedbe0d10062093f32925a912020e747
diff --git a/solr/licenses/testcontainers-1.20.4.jar.sha1 b/solr/licenses/testcontainers-1.20.4.jar.sha1
new file mode 100644
index 00000000000..29746a98e88
--- /dev/null
+++ b/solr/licenses/testcontainers-1.20.4.jar.sha1
@@ -0,0 +1 @@
+ee2fe3afc9fa6cb2e6a43233998f3633f761692f
diff --git a/solr/licenses/testcontainers-LICENSE-MIT.txt b/solr/licenses/testcontainers-LICENSE-MIT.txt
new file mode 100644
index 00000000000..9c9e8bc5563
--- /dev/null
+++ b/solr/licenses/testcontainers-LICENSE-MIT.txt
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015-2019 Richard North
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/solr/modules/extraction/build.gradle b/solr/modules/extraction/build.gradle
index 655fc0360f4..5c52c056587 100644
--- a/solr/modules/extraction/build.gradle
+++ b/solr/modules/extraction/build.gradle
@@ -19,6 +19,11 @@ apply plugin: 'java-library'
 
 description = 'Solr Integration with Tika for extracting content from binary file formats such as Microsoft Word and Adobe PDF'
 
+ext {
+  // Disable security manager for extraction module tests
+  useSecurityManager = false
+}
+
 dependencies {
   implementation platform(project(':platform'))
   implementation project(':solr:core')
@@ -27,17 +32,23 @@ dependencies {
   implementation libs.apache.lucene.core
   implementation libs.slf4j.api
 
+  // For 'local' Tika backend
   implementation libs.apache.tika.core
   implementation (libs.apache.tika.parsers, {
     exclude group: 'org.apache.cxf', module: 'cxf-rt-rs-client'
     exclude group: 'org.quartz-scheduler', module: 'quartz'
     exclude group: 'xml-apis', module: 'xml-apis'
   })
-  implementation (libs.xerces.impl, {
-    exclude group: 'xml-apis', module: 'xml-apis'
-  })
+
+  // For 'tikaserver' backend
+  implementation libs.eclipse.jetty.client
+  permitUsedUndeclared libs.eclipse.jetty.http
+  permitUsedUndeclared libs.eclipse.jetty.util
+  permitUsedUndeclared libs.eclipse.jetty.io
 
   testImplementation project(':solr:test-framework')
   testImplementation libs.apache.lucene.testframework
   testImplementation libs.junit.junit
+  testImplementation libs.testcontainers
+  testImplementation libs.carrotsearch.randomizedtesting.runner
 }
diff --git a/solr/modules/extraction/gradle.lockfile b/solr/modules/extraction/gradle.lockfile
index 18895208387..fa2dc94f4cf 100644
--- a/solr/modules/extraction/gradle.lockfile
+++ b/solr/modules/extraction/gradle.lockfile
@@ -15,6 +15,9 @@ com.fasterxml.jackson.module:jackson-module-jakarta-xmlbind-annotations:2.20.0=j
 com.fasterxml.jackson:jackson-bom:2.20.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 com.fasterxml.woodstox:woodstox-core:7.0.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 com.github.ben-manes.caffeine:caffeine:3.2.2=annotationProcessor,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testRuntimeClasspath
+com.github.docker-java:docker-java-api:3.4.0=jarValidation,testCompileClasspath,testRuntimeClasspath
+com.github.docker-java:docker-java-transport-zerodep:3.4.0=jarValidation,testCompileClasspath,testRuntimeClasspath
+com.github.docker-java:docker-java-transport:3.4.0=jarValidation,testCompileClasspath,testRuntimeClasspath
 com.github.jai-imageio:jai-imageio-core:1.4.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 com.github.junrar:junrar:7.5.3=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 com.github.kevinstern:software-and-algorithms:1.0=annotationProcessor,errorprone,testAnnotationProcessor
@@ -107,7 +110,7 @@ javax.inject:javax.inject:1=annotationProcessor,errorprone,testAnnotationProcess
 javax.measure:unit-api:1.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 joda-time:joda-time:2.14.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 junit:junit:4.13.2=jarValidation,testCompileClasspath,testRuntimeClasspath
-net.java.dev.jna:jna:5.12.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
+net.java.dev.jna:jna:5.13.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 net.sf.ehcache:ehcache-core:2.6.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.antlr:antlr4-runtime:4.13.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 org.apache.commons:commons-collections4:4.5.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
@@ -195,14 +198,14 @@ org.eclipse.jetty:jetty-alpn-client:12.0.27=compileClasspath,jarValidation,runti
 org.eclipse.jetty:jetty-alpn-java-client:12.0.27=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 org.eclipse.jetty:jetty-alpn-java-server:12.0.27=jarValidation,testRuntimeClasspath
 org.eclipse.jetty:jetty-alpn-server:12.0.27=jarValidation,testRuntimeClasspath
-org.eclipse.jetty:jetty-client:12.0.27=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
-org.eclipse.jetty:jetty-http:12.0.27=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
-org.eclipse.jetty:jetty-io:12.0.27=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
+org.eclipse.jetty:jetty-client:12.0.27=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
+org.eclipse.jetty:jetty-http:12.0.27=compileClasspath,jarValidation,permitUsedUndeclared,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
+org.eclipse.jetty:jetty-io:12.0.27=compileClasspath,jarValidation,permitUsedUndeclared,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 org.eclipse.jetty:jetty-rewrite:12.0.27=jarValidation,testRuntimeClasspath
 org.eclipse.jetty:jetty-security:12.0.27=jarValidation,testCompileClasspath,testRuntimeClasspath
 org.eclipse.jetty:jetty-server:12.0.27=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 org.eclipse.jetty:jetty-session:12.0.27=jarValidation,testCompileClasspath,testRuntimeClasspath
-org.eclipse.jetty:jetty-util:12.0.27=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
+org.eclipse.jetty:jetty-util:12.0.27=compileClasspath,jarValidation,permitUsedUndeclared,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 org.gagravarr:vorbis-java-core:0.8=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.gagravarr:vorbis-java-tika:0.8=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.glassfish.hk2.external:aopalliance-repackaged:3.1.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
@@ -223,6 +226,7 @@ org.hamcrest:hamcrest:3.0=jarValidation,testCompileClasspath,testRuntimeClasspat
 org.itadaki:bzip2:0.9.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.javassist:javassist:3.30.2-GA=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 org.jdom:jdom2:2.0.6.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
+org.jetbrains:annotations:26.0.2=jarValidation,testCompileClasspath,testRuntimeClasspath
 org.jspecify:jspecify:1.0.0=annotationProcessor,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath
 org.junit.jupiter:junit-jupiter-api:5.6.2=jarValidation,testRuntimeClasspath
 org.junit.platform:junit-platform-commons:1.6.2=jarValidation,testRuntimeClasspath
@@ -234,15 +238,17 @@ org.ow2.asm:asm-commons:9.8=jarValidation,runtimeClasspath,runtimeLibs,solrPlatf
 org.ow2.asm:asm-tree:9.8=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 org.ow2.asm:asm:9.8=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 org.pcollections:pcollections:4.0.1=annotationProcessor,errorprone,testAnnotationProcessor
+org.rnorth.duct-tape:duct-tape:1.0.8=jarValidation,testCompileClasspath,testRuntimeClasspath
 org.semver4j:semver4j:6.0.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 org.slf4j:jcl-over-slf4j:2.0.17=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 org.slf4j:jul-to-slf4j:2.0.17=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
-org.slf4j:slf4j-api:2.0.17=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
+org.slf4j:slf4j-api:2.0.17=compileClasspath,jarValidation,permitUsedUndeclared,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 org.tallison.xmp:xmpcore-shaded:6.1.10=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.tallison:isoparser:1.9.41.7=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.tallison:jmatio:1.5=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.tallison:metadata-extractor:2.17.1.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
+org.testcontainers:testcontainers:1.20.4=jarValidation,testCompileClasspath,testRuntimeClasspath
 org.tukaani:xz:1.9=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.xerial.snappy:snappy-java:1.1.10.8=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 xerces:xercesImpl:2.12.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
-empty=apiHelper,apiHelperTest,compileOnlyHelper,compileOnlyHelperTest,missingdoclet,packaging,permitAggregatorUse,permitTestAggregatorUse,permitTestUnusedDeclared,permitTestUsedUndeclared,permitUnusedDeclared,permitUsedUndeclared,signatures
+empty=apiHelper,apiHelperTest,compileOnlyHelper,compileOnlyHelperTest,missingdoclet,packaging,permitAggregatorUse,permitTestAggregatorUse,permitTestUnusedDeclared,permitTestUsedUndeclared,permitUnusedDeclared,signatures
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
index 014d56caae4..ba9b72b1863 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
@@ -18,9 +18,10 @@
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.StringWriter;
 import java.lang.invoke.MethodHandles;
-import java.util.Locale;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.regex.Pattern;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.UpdateParams;
@@ -28,43 +29,21 @@
 import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.extraction.fromtika.ToTextContentHandler;
+import org.apache.solr.handler.extraction.fromtika.ToXMLContentHandler;
 import org.apache.solr.handler.loader.ContentStreamLoader;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.update.AddUpdateCommand;
 import org.apache.solr.update.processor.UpdateRequestProcessor;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.HttpHeaders;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.DefaultParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.PasswordProvider;
-import org.apache.tika.parser.html.HtmlMapper;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.apache.tika.sax.xpath.Matcher;
-import org.apache.tika.sax.xpath.MatchingContentHandler;
-import org.apache.tika.sax.xpath.XPathParser;
-import org.apache.xml.serialize.BaseMarkupSerializer;
-import org.apache.xml.serialize.OutputFormat;
-import org.apache.xml.serialize.TextSerializer;
-import org.apache.xml.serialize.XMLSerializer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
 
 /**
- * The class responsible for loading extracted content into Solr.
- *
- * @deprecated Will be replaced with something similar that calls out to a separate Tika Server
- *     process running in its own JVM.
+ * The class responsible for loading extracted content into Solr. It will delegate parsing to a
+ * {@link ExtractionBackend} and then load the resulting SolrInputDocument into Solr.
  */
-@Deprecated(since = "9.10.0")
 public class ExtractingDocumentLoader extends ContentStreamLoader {
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -75,40 +54,30 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
   /** Extract Only supported format. Default */
   public static final String XML_FORMAT = "xml";
 
-  /** XHTML XPath parser. */
-  private static final XPathParser PARSER = new XPathParser("xhtml", XHTMLContentHandler.XHTML);
-
   final SolrCore core;
-  final SolrParams params;
   final UpdateRequestProcessor processor;
   final boolean ignoreTikaException;
-  protected AutoDetectParser autoDetectParser;
 
   private final AddUpdateCommand templateAdd;
 
-  protected TikaConfig config;
-  protected ParseContextConfig parseContextConfig;
   protected SolrContentHandlerFactory factory;
+  protected ExtractionBackend backend;
 
   public ExtractingDocumentLoader(
       SolrQueryRequest req,
       UpdateRequestProcessor processor,
-      TikaConfig config,
-      ParseContextConfig parseContextConfig,
-      SolrContentHandlerFactory factory) {
-    this.params = req.getParams();
+      SolrContentHandlerFactory factory,
+      ExtractionBackend backend) {
+    SolrParams params = req.getParams();
     this.core = req.getCore();
-    this.config = config;
-    this.parseContextConfig = parseContextConfig;
     this.processor = processor;
 
     templateAdd = new AddUpdateCommand(req);
     templateAdd.overwrite = params.getBool(UpdateParams.OVERWRITE, true);
     templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);
 
-    // this is lightweight
-    autoDetectParser = new AutoDetectParser(config);
     this.factory = factory;
+    this.backend = backend;
 
     ignoreTikaException = params.getBool(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);
   }
@@ -131,169 +100,153 @@ public void load(
       ContentStream stream,
       UpdateRequestProcessor processor)
       throws Exception {
-    Parser parser = null;
-    String streamType = req.getParams().get(ExtractingParams.STREAM_TYPE, null);
-    if (streamType != null) {
-      // Cache?  Parsers are lightweight to construct and thread-safe, so I'm told
-      MediaType mt = MediaType.parse(streamType.trim().toLowerCase(Locale.ROOT));
-      parser = new DefaultParser(config.getMediaTypeRegistry()).getParsers().get(mt);
-    } else {
-      parser = autoDetectParser;
-    }
-    if (parser != null) {
-      Metadata metadata = new Metadata();
-
-      // If you specify the resource name (the filename, roughly) with this parameter,
-      // then Tika can make use of it in guessing the appropriate MIME type:
-      String resourceName = req.getParams().get(ExtractingParams.RESOURCE_NAME, null);
-      if (resourceName != null) {
-        metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, resourceName);
-      }
-      // Provide stream's content type as hint for auto detection
-      if (stream.getContentType() != null) {
-        metadata.add(HttpHeaders.CONTENT_TYPE, stream.getContentType());
-      }
-
-      try (InputStream inputStream = stream.getStream()) {
-        metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
-        metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
-        metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
-        metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
-        // HtmlParser and TXTParser regard Metadata.CONTENT_ENCODING in metadata
-        String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
-        if (charset != null) {
-          metadata.add(HttpHeaders.CONTENT_ENCODING, charset);
+    SolrParams params = req.getParams();
+    String streamType = params.get(ExtractingParams.STREAM_TYPE, null);
+    String resourceName = params.get(ExtractingParams.RESOURCE_NAME, null);
+
+    try (InputStream inputStream = stream.getStream()) {
+      String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
+
+      String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
+      boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
+      // Prefer new parameter name; fall back to legacy name for backward compatibility
+      boolean tikaserverRecursive = params.getBool(ExtractingParams.TIKASERVER_RECURSIVE, false);
+      String extractFormat =
+          params.get(ExtractingParams.EXTRACT_FORMAT, extractOnly ? XML_FORMAT : TEXT_FORMAT);
+
+      // Parse optional passwords file into a map
+      LinkedHashMap<Pattern, String> pwMap = null;
+      String passwordsFile = params.get(ExtractingParams.PASSWORD_MAP_FILE);
+      if (passwordsFile != null) {
+        try (java.io.InputStream is = core.getResourceLoader().openResource(passwordsFile)) {
+          pwMap = RegexRulesPasswordProvider.parseRulesFile(is);
         }
+      }
 
-        String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
-        boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
-        SolrContentHandler handler =
-            factory.createSolrContentHandler(metadata, params, req.getSchema());
-        ContentHandler parsingHandler = handler;
+      Integer tikaTimeoutSecs = params.getInt(ExtractingParams.TIKASERVER_TIMEOUT_SECS);
+      ExtractionRequest extractionRequest =
+          ExtractionRequest.builder()
+              .streamType(streamType)
+              .resourceName(resourceName)
+              .contentType(stream.getContentType())
+              .charset(charset)
+              .streamName(stream.getName())
+              .streamSourceInfo(stream.getSourceInfo())
+              .streamSize(stream.getSize())
+              .resourcePassword(params.get(ExtractingParams.RESOURCE_PASSWORD, null))
+              .passwordsMap(pwMap)
+              .extractFormat(extractFormat)
+              .tikaServerRecursive(tikaserverRecursive)
+              .tikaServerTimeoutSeconds(tikaTimeoutSecs)
+              .tikaServerRequestHeaders(Collections.emptyMap())
+              .build();
+
+      boolean captureAttr = params.getBool(ExtractingParams.CAPTURE_ATTRIBUTES, false);
+      String[] captureElems = params.getParams(ExtractingParams.CAPTURE_ELEMENTS);
+      boolean needsSaxParsing =
+          extractOnly
+              || xpathExpr != null
+              || captureAttr
+              || (captureElems != null && captureElems.length > 0)
+              || (params.get(ExtractingParams.RESOURCE_PASSWORD) != null)
+              || (passwordsFile != null);
+
+      if (extractOnly) {
+        try {
+          ExtractionMetadata md = backend.buildMetadataFromRequest(extractionRequest);
+          String content;
+          if (ExtractingDocumentLoader.TEXT_FORMAT.equals(extractionRequest.extractFormat)
+              || xpathExpr != null) {
+            content =
+                extractWithHandler(
+                    inputStream, xpathExpr, extractionRequest, md, new ToTextContentHandler());
+          } else { // XML format
+            content =
+                extractWithHandler(
+                    inputStream, xpathExpr, extractionRequest, md, new ToXMLContentHandler());
+            if (!content.startsWith("<?xml")) {
+              content = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + content;
+            }
+          }
 
-        StringWriter writer = null;
-        BaseMarkupSerializer serializer = null;
-        if (extractOnly == true) {
-          String extractFormat = params.get(ExtractingParams.EXTRACT_FORMAT, "xml");
-          writer = new StringWriter();
-          if (extractFormat.equals(TEXT_FORMAT)) {
-            serializer = new TextSerializer();
-            serializer.setOutputCharStream(writer);
-            serializer.setOutputFormat(new OutputFormat("Text", "UTF-8", true));
-          } else {
-            serializer = new XMLSerializer(writer, new OutputFormat("XML", "UTF-8", true));
+          rsp.add(stream.getName(), content);
+          NamedList<String[]> metadataNL = new NamedList<>();
+          for (String name : md.keySet()) {
+            metadataNL.add(name, md.get(name).toArray(new String[0]));
           }
-          if (xpathExpr != null) {
-            Matcher matcher = PARSER.parse(xpathExpr);
-            serializer
-                .startDocument(); // The MatchingContentHandler does not invoke startDocument.  See
-            // https://lists.apache.org/thread.html/5ec63e104e564a2363e45f74d5aced6520b7d32b4b625762ef56cb86%401226775505%40%3Cdev.tika.apache.org%3E
-            parsingHandler = new MatchingContentHandler(serializer, matcher);
-          } else {
-            parsingHandler = serializer;
+          rsp.add(stream.getName() + "_metadata", metadataNL);
+        } catch (Exception e) {
+          if (ignoreTikaException) {
+            if (log.isWarnEnabled())
+              log.warn("skip extracting text due to {}.", e.getLocalizedMessage(), e);
+            return;
           }
-        } else if (xpathExpr != null) {
-          Matcher matcher = PARSER.parse(xpathExpr);
-          parsingHandler = new MatchingContentHandler(handler, matcher);
-        } // else leave it as is
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+        }
+        return;
+      }
 
+      if (needsSaxParsing) {
+        ExtractionMetadata metadata = backend.buildMetadataFromRequest(extractionRequest);
+        SolrContentHandler handler =
+            factory.createSolrContentHandler(metadata, params, req.getSchema());
         try {
-          // potentially use a wrapper handler for parsing, but we still need the SolrContentHandler
-          // for getting the document.
-          ParseContext context = parseContextConfig.create();
-
-          context.set(Parser.class, parser);
-          context.set(HtmlMapper.class, MostlyPassthroughHtmlMapper.INSTANCE);
-
-          // Password handling
-          RegexRulesPasswordProvider epp = new RegexRulesPasswordProvider();
-          String pwMapFile = params.get(ExtractingParams.PASSWORD_MAP_FILE);
-          if (pwMapFile != null && pwMapFile.length() > 0) {
-            InputStream is = req.getCore().getResourceLoader().openResource(pwMapFile);
-            if (is != null) {
-              log.debug("Password file supplied: {}", pwMapFile);
-              epp.parse(is);
-            }
-          }
-          context.set(PasswordProvider.class, epp);
-          String resourcePassword = params.get(ExtractingParams.RESOURCE_PASSWORD);
-          if (resourcePassword != null) {
-            epp.setExplicitPassword(resourcePassword);
-            log.debug("Literal password supplied for file {}", resourceName);
-          }
-          parser.parse(inputStream, parsingHandler, metadata, context);
-        } catch (TikaException e) {
+          backend.extractWithSaxHandler(inputStream, extractionRequest, metadata, handler);
+        } catch (Exception e) {
           if (ignoreTikaException) {
             if (log.isWarnEnabled()) {
-              log.warn(
-                  "skip extracting text due to {}. metadata={}",
-                  e.getLocalizedMessage(),
-                  metadata,
-                  e);
+              log.warn("skip extracting text due to {}.", e.getLocalizedMessage(), e);
+              return;
             }
-          } else {
-            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
           }
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
         }
-        if (extractOnly == false) {
-          addDoc(handler);
-        } else {
-          // serializer is not null, so we need to call endDoc on it if using xpath
-          if (xpathExpr != null) {
-            serializer.endDocument();
-          }
-          rsp.add(stream.getName(), writer.toString());
-          writer.close();
-          String[] names = metadata.names();
-          NamedList<String[]> metadataNL = new NamedList<>();
-          for (int i = 0; i < names.length; i++) {
-            String[] vals = metadata.getValues(names[i]);
-            metadataNL.add(names[i], vals);
-          }
-          rsp.add(stream.getName() + "_metadata", metadataNL);
+
+        addDoc(handler);
+        return;
+      }
+
+      ExtractionResult result;
+      try {
+        result = backend.extract(inputStream, extractionRequest);
+      } catch (Exception e) {
+        if (ignoreTikaException) {
+          if (log.isWarnEnabled())
+            log.warn("skip extracting text due to {}.", e.getLocalizedMessage(), e);
+          return;
         }
-      } catch (SAXException e) {
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
       }
-    } else {
-      throw new SolrException(
-          SolrException.ErrorCode.BAD_REQUEST,
-          "Stream type of "
-              + streamType
-              + " didn't match any known parsers.  Please supply the "
-              + ExtractingParams.STREAM_TYPE
-              + " parameter.");
-    }
-  }
 
-  public static class MostlyPassthroughHtmlMapper implements HtmlMapper {
-    public static final HtmlMapper INSTANCE = new MostlyPassthroughHtmlMapper();
+      ExtractionMetadata metadata = result.getMetadata();
 
-    /**
-     * Keep all elements and their content.
-     *
-     * <p>Apparently &lt;SCRIPT&gt; and &lt;STYLE&gt; elements are blocked elsewhere
-     */
-    @Override
-    public boolean isDiscardElement(String name) {
-      return false;
-    }
-
-    /** Lowercases the attribute name */
-    @Override
-    public String mapSafeAttribute(String elementName, String attributeName) {
-      return attributeName.toLowerCase(Locale.ENGLISH);
+      SolrContentHandler handler =
+          factory.createSolrContentHandler(metadata, params, req.getSchema());
+      handler.appendToContent(result.getContent());
+      addDoc(handler);
     }
+  }
 
-    /**
-     * Lowercases the element name, but returns null for &lt;BR&gt;, which suppresses the
-     * start-element event for lt;BR&gt; tags. This also suppresses the &lt;BODY&gt; tags because
-     * those are handled internally by Tika's XHTMLContentHandler.
-     */
-    @Override
-    public String mapSafeElement(String name) {
-      String lowerName = name.toLowerCase(Locale.ROOT);
-      return (lowerName.equals("br") || lowerName.equals("body")) ? null : lowerName;
+  /*
+   * Extracts content from the given input stream using an optional XPath expression
+   * and a SAX content handler. The extraction process may filter content based on
+   * the XPath expression, if provided.
+   */
+  private String extractWithHandler(
+      InputStream inputStream,
+      String xpathExpr,
+      ExtractionRequest extractionRequest,
+      ExtractionMetadata md,
+      DefaultHandler ch)
+      throws Exception {
+    if (xpathExpr != null) {
+      org.apache.tika.sax.xpath.XPathParser xparser =
+          new org.apache.tika.sax.xpath.XPathParser(
+              "xhtml", org.apache.tika.sax.XHTMLContentHandler.XHTML);
+      org.apache.tika.sax.xpath.Matcher matcher = xparser.parse(xpathExpr);
+      ch = new org.apache.tika.sax.xpath.MatchingContentHandler(ch, matcher);
     }
+    backend.extractWithSaxHandler(inputStream, extractionRequest, md, ch);
+    return ch.toString();
   }
 }
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingMetadataConstants.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingMetadataConstants.java
index 0a72edca0ec..cecbfdb048b 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingMetadataConstants.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingMetadataConstants.java
@@ -22,4 +22,7 @@ public interface ExtractingMetadataConstants {
   String STREAM_SOURCE_INFO = "stream_source_info";
   String STREAM_SIZE = "stream_size";
   String STREAM_CONTENT_TYPE = "stream_content_type";
+  String HTTP_HEADER_CONTENT_TYPE = "Content-Type";
+  String HTTP_HEADER_CONTENT_ENCODING = "Content-Encoding";
+  String RESOURCE_NAME_KEY = "resourceName";
 }
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingParams.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingParams.java
index a7d159678f1..ddd31d30f77 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingParams.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingParams.java
@@ -136,4 +136,26 @@ public interface ExtractingParams {
    * .*=&lt;defaultmypassword&gt; at the end
    */
   public static final String PASSWORD_MAP_FILE = "passwordsFile";
+
+  /** Backend selection, either `local` or `tikaserver`. */
+  public static final String EXTRACTION_BACKEND = "extraction.backend";
+
+  /** Preferred: Fix/normalize metadata naming for Tika Server compatibility */
+  public static final String TIKASERVER_METADATA_COMPATIBILITY =
+      "tikaserver.metadata.compatibility";
+
+  /** URL of Tika Server instance. */
+  public static final String TIKASERVER_URL = "tikaserver.url";
+
+  /** Max characters allowed in parsed content */
+  public static final String TIKASERVER_MAX_CHARS = "tikaserver.maxChars";
+
+  /**
+   * Enable recursive parsing of embedded documents when using TikaServer. This is experimental,
+   * uses /rmeta endpoint, uses more RAM and is disabled by default.
+   */
+  public static final String TIKASERVER_RECURSIVE = "tikaserver.recursive";
+
+  /** Default or per-request timeout in seconds for TikaServer HTTP calls. */
+  public static final String TIKASERVER_TIMEOUT_SECS = "tikaserver.timeoutSeconds";
 }
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
index c9a319bc0bb..a64f7eea819 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
@@ -16,8 +16,8 @@
  */
 package org.apache.solr.handler.extraction;
 
-import java.io.InputStream;
-import java.nio.file.Path;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.core.SolrCore;
@@ -28,26 +28,29 @@
 import org.apache.solr.security.PermissionNameProvider;
 import org.apache.solr.update.processor.UpdateRequestProcessor;
 import org.apache.solr.util.plugin.SolrCoreAware;
-import org.apache.tika.config.TikaConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Handler for rich documents like PDF or Word or any other file format that Tika handles that need
  * the text to be extracted first from the document.
- *
- * @deprecated Will be replaced with something similar that calls out to a separate Tika Server
- *     process running in its own JVM.
  */
-@Deprecated(since = "9.10.0")
+@SuppressWarnings("removal")
 public class ExtractingRequestHandler extends ContentStreamHandlerBase
     implements SolrCoreAware, PermissionNameProvider {
 
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
   public static final String PARSE_CONTEXT_CONFIG = "parseContext.config";
   public static final String CONFIG_LOCATION = "tika.config";
 
-  protected TikaConfig config;
+  protected String tikaConfigLoc;
   protected ParseContextConfig parseContextConfig;
 
   protected SolrContentHandlerFactory factory;
+  protected String defaultBackendName;
+  protected LocalTikaExtractionBackend localBackend;
+  protected TikaServerExtractionBackend tikaServerBackend; // may be null if not configured
 
   @Override
   public PermissionNameProvider.Name getPermissionName(AuthorizationContext request) {
@@ -57,22 +60,8 @@ public PermissionNameProvider.Name getPermissionName(AuthorizationContext reques
   @Override
   public void inform(SolrCore core) {
     try {
-      String tikaConfigLoc = (String) initArgs.get(CONFIG_LOCATION);
-      if (tikaConfigLoc == null) { // default
-        ClassLoader classLoader = core.getResourceLoader().getClassLoader();
-        try (InputStream is = classLoader.getResourceAsStream("solr-default-tika-config.xml")) {
-          config = new TikaConfig(is);
-        }
-      } else {
-        Path configFile = Path.of(tikaConfigLoc);
-        if (configFile.isAbsolute()) {
-          config = new TikaConfig(configFile);
-        } else { // in conf/
-          try (InputStream is = core.getResourceLoader().openResource(tikaConfigLoc)) {
-            config = new TikaConfig(is);
-          }
-        }
-      }
+      // Store tika config location (backend-specific)
+      this.tikaConfigLoc = (String) initArgs.get(CONFIG_LOCATION);
 
       String parseContextConfigLoc = (String) initArgs.get(PARSE_CONTEXT_CONFIG);
       if (parseContextConfigLoc == null) { // default:
@@ -81,20 +70,109 @@ public void inform(SolrCore core) {
         parseContextConfig =
             new ParseContextConfig(core.getResourceLoader(), parseContextConfigLoc);
       }
+
+      // Always create local backend
+      this.localBackend = new LocalTikaExtractionBackend(core, tikaConfigLoc, parseContextConfig);
+
+      // Optionally create Tika Server backend if URL configured
+      String tikaServerUrl = (String) initArgs.get(ExtractingParams.TIKASERVER_URL);
+      if (tikaServerUrl != null && !tikaServerUrl.trim().isEmpty()) {
+        int timeoutSecs = 0;
+        Object initTimeout = initArgs.get(ExtractingParams.TIKASERVER_TIMEOUT_SECS);
+        if (initTimeout != null) {
+          try {
+            timeoutSecs = Integer.parseInt(String.valueOf(initTimeout));
+          } catch (NumberFormatException nfe) {
+            throw new SolrException(
+                ErrorCode.SERVER_ERROR,
+                "Invalid value for '"
+                    + ExtractingParams.TIKASERVER_TIMEOUT_SECS
+                    + "': "
+                    + initTimeout,
+                nfe);
+          }
+        }
+        Object maxCharsObj = initArgs.get(ExtractingParams.TIKASERVER_MAX_CHARS);
+        long maxCharsLimit = TikaServerExtractionBackend.DEFAULT_MAXCHARS_LIMIT;
+        if (maxCharsObj != null) {
+          try {
+            maxCharsLimit = Long.parseLong(String.valueOf(maxCharsObj));
+          } catch (NumberFormatException nfe) {
+            throw new SolrException(
+                ErrorCode.SERVER_ERROR,
+                "Invalid value for '"
+                    + ExtractingParams.TIKASERVER_MAX_CHARS
+                    + "': "
+                    + maxCharsObj);
+          }
+        }
+        this.tikaServerBackend =
+            new TikaServerExtractionBackend(tikaServerUrl, timeoutSecs, initArgs, maxCharsLimit);
+      }
+
+      // Choose default backend name
+      String backendName = (String) initArgs.get(ExtractingParams.EXTRACTION_BACKEND);
+      this.defaultBackendName =
+          (backendName == null || backendName.trim().isEmpty())
+              ? LocalTikaExtractionBackend.NAME
+              : backendName;
+
+      // Validate backend and check configuration
+      switch (this.defaultBackendName) {
+        case LocalTikaExtractionBackend.NAME:
+          break;
+        case TikaServerExtractionBackend.NAME:
+          // Tika Server backend requires URL to be configured
+          if (this.tikaServerBackend == null) {
+            throw new SolrException(
+                ErrorCode.INVALID_STATE, "Tika Server backend requested but no URL configured");
+          }
+          break;
+        default:
+          throw new SolrException(
+              ErrorCode.BAD_REQUEST,
+              "Invalid extraction backend: '"
+                  + this.defaultBackendName
+                  + "'. Must be one of: '"
+                  + LocalTikaExtractionBackend.NAME
+                  + "', '"
+                  + TikaServerExtractionBackend.NAME
+                  + "'");
+      }
     } catch (Exception e) {
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to load Tika Config", e);
+      throw new SolrException(
+          ErrorCode.SERVER_ERROR, "Unable to initialize ExtractingRequestHandler", e);
     }
 
-    factory = createFactory();
-  }
-
-  protected SolrContentHandlerFactory createFactory() {
-    return new SolrContentHandlerFactory();
+    factory = new SolrContentHandlerFactory();
   }
 
   @Override
   protected ContentStreamLoader newLoader(SolrQueryRequest req, UpdateRequestProcessor processor) {
-    return new ExtractingDocumentLoader(req, processor, config, parseContextConfig, factory);
+    // Allow per-request override of backend via request param
+    String backendParam = req.getParams().get(ExtractingParams.EXTRACTION_BACKEND);
+    String nameToUse =
+        (backendParam != null && !backendParam.trim().isEmpty())
+            ? backendParam
+            : defaultBackendName;
+
+    ExtractionBackend extractionBackend;
+    if (LocalTikaExtractionBackend.NAME.equals(nameToUse)) {
+      extractionBackend = localBackend;
+    } else if (TikaServerExtractionBackend.NAME.equals(nameToUse)) {
+      if (tikaServerBackend == null) {
+        throw new SolrException(
+            ErrorCode.BAD_REQUEST,
+            "Tika Server backend requested but '"
+                + ExtractingParams.TIKASERVER_URL
+                + "' is not configured");
+      }
+      extractionBackend = tikaServerBackend;
+    } else {
+      throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown extraction backend: " + nameToUse);
+    }
+
+    return new ExtractingDocumentLoader(req, processor, factory, extractionBackend);
   }
 
   // ////////////////////// SolrInfoMBeans methods //////////////////////
@@ -102,4 +180,22 @@ protected ContentStreamLoader newLoader(SolrQueryRequest req, UpdateRequestProce
   public String getDescription() {
     return "Add/Update Rich document";
   }
+
+  @Override
+  public void close() throws IOException {
+    // Close our backends to release any shared resources (e.g., Jetty HttpClient)
+    try {
+      if (tikaServerBackend != null) {
+        tikaServerBackend.close();
+      }
+    } finally {
+      try {
+        if (localBackend != null) {
+          localBackend.close();
+        }
+      } finally {
+        super.close();
+      }
+    }
+  }
 }
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionBackend.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionBackend.java
new file mode 100644
index 00000000000..4550b1f8617
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionBackend.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Content extraction backends must implement this interface.
+ *
+ * <p>Implementations must be thread-safe as a single instance may be shared across multiple
+ * concurrent requests.
+ */
+public interface ExtractionBackend extends Closeable {
+  /**
+   * Extract plain text and metadata from the inputStream. Implementations should not close the
+   * inputStream.
+   */
+  ExtractionResult extract(InputStream inputStream, ExtractionRequest request) throws Exception;
+
+  /**
+   * Perform extraction of text from inputStream with SAX handler. Examples of SAX handlers are
+   * SolrContentHandler, ToTextContentHandler, ToXMLContentHandler and MatchingContentHandler.
+   */
+  void extractWithSaxHandler(
+      InputStream inputStream,
+      ExtractionRequest request,
+      ExtractionMetadata md,
+      DefaultHandler saxContentHandler)
+      throws Exception;
+
+  /** Build ExtractionMetadata from the request context */
+  default ExtractionMetadata buildMetadataFromRequest(ExtractionRequest request) {
+    ExtractionMetadata md = new ExtractionMetadata();
+    md.add(ExtractingMetadataConstants.RESOURCE_NAME_KEY, request.resourceName);
+    md.add(ExtractingMetadataConstants.HTTP_HEADER_CONTENT_TYPE, request.contentType);
+    md.add(ExtractingMetadataConstants.STREAM_NAME, request.streamName);
+    md.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, request.streamSourceInfo);
+    md.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(request.streamSize));
+    md.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, request.contentType);
+    md.add(ExtractingMetadataConstants.HTTP_HEADER_CONTENT_ENCODING, request.charset);
+    return md;
+  }
+
+  /** A short name for debugging/config, e.g., "local" or "tikaserver". */
+  String name();
+
+  @Override
+  default void close() throws IOException {
+    // default no-op; specific backends may override to release shared resources
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionMetadata.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionMetadata.java
new file mode 100644
index 00000000000..764a0a9d152
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionMetadata.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+
+/** A map of metadata name/value pairs. */
+public class ExtractionMetadata extends LinkedHashMap<String, List<String>> {
+  /**
+   * Add a metadata value. If the name already exists, the value will be appended to the existing
+   * list.
+   */
+  public void add(String name, String value) {
+    if (name == null || value == null) return;
+    computeIfAbsent(name, k -> new ArrayList<>()).add(value);
+  }
+
+  /** Add multiple metadata values. */
+  public void add(String name, Collection<String> values) {
+    if (name == null || values == null || values.isEmpty()) return;
+    computeIfAbsent(name, k -> new ArrayList<>()).addAll(values);
+  }
+
+  /** Gets all metadata values for the given name. */
+  public List<String> get(String name) {
+    List<String> vals = super.get(name);
+    return (vals == null) ? Collections.emptyList() : vals;
+  }
+
+  /** Gets the first metadata value for the given name or null if not set. */
+  public String getFirst(String name) {
+    List<String> vals = super.get(name);
+    if (vals == null || vals.isEmpty()) return null;
+    return vals.getFirst();
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionPasswordProvider.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionPasswordProvider.java
new file mode 100644
index 00000000000..6dbee85b988
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionPasswordProvider.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+/** Generic password provider without Tika dependency */
+public interface ExtractionPasswordProvider {
+  /** Given some metadata, return a password to use for the given document. */
+  String getPassword(ExtractionMetadata metadata);
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionRequest.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionRequest.java
new file mode 100644
index 00000000000..50c6f0c7b7b
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionRequest.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+/** Immutable request info needed by extraction backends. */
+public class ExtractionRequest {
+  public final String streamType;
+  public final String resourceName;
+  public final String contentType;
+  public final String charset;
+  public final String streamName;
+  public final String streamSourceInfo;
+  public final Long streamSize;
+  public final String resourcePassword;
+  public final java.util.LinkedHashMap<java.util.regex.Pattern, String> passwordsMap;
+  public final String extractFormat;
+
+  // Below variables are only used by TikaServerExtractionBackend
+  public final boolean tikaServerRecursive;
+  public final Integer tikaServerTimeoutSeconds; // optional per-request override
+  public final Map<String, String> tikaServerRequestHeaders = new HashMap<>();
+
+  /**
+   * Constructs an ExtractionRequest object containing metadata and configurations for extraction
+   * backends. This constructor is private - use {@link #builder()} to create instances.
+   *
+   * @param streamType the explicit MIME type of the document (optional)
+   * @param resourceName the name of the resource, typically a filename hint
+   * @param contentType the HTTP content-type header value
+   * @param charset the derived character set of the stream if available
+   * @param streamName the name of the content stream
+   * @param streamSourceInfo additional information about the stream source
+   * @param streamSize the size of the stream in bytes
+   * @param resourcePassword an optional password used for encrypted documents
+   * @param passwordsMap an optional map of regex patterns to passwords for encrypted content
+   * @param extractFormat the desired format for extraction output
+   * @param tikaServerRecursive a flag indicating whether extraction should be recursive. TikaServer
+   *     only
+   * @param tikaServerTimeoutSeconds optional per-request timeout override in seconds (TikaServer
+   *     only). If null or ≤ 0, the default timeout will be used
+   * @param tikaServerRequestHeaders optional headers to be included in requests to the extraction
+   *     service. TikaServer only
+   */
+  private ExtractionRequest(
+      String streamType,
+      String resourceName,
+      String contentType,
+      String charset,
+      String streamName,
+      String streamSourceInfo,
+      Long streamSize,
+      String resourcePassword,
+      java.util.LinkedHashMap<java.util.regex.Pattern, String> passwordsMap,
+      String extractFormat,
+      boolean tikaServerRecursive,
+      Integer tikaServerTimeoutSeconds,
+      Map<String, String> tikaServerRequestHeaders) {
+    this.streamType = streamType;
+    this.resourceName = resourceName;
+    this.contentType = contentType;
+    this.charset = charset;
+    this.streamName = streamName;
+    this.streamSourceInfo = streamSourceInfo;
+    this.streamSize = streamSize;
+    this.resourcePassword = resourcePassword;
+    this.passwordsMap = passwordsMap;
+    this.extractFormat = extractFormat;
+    this.tikaServerRecursive = tikaServerRecursive;
+    this.tikaServerTimeoutSeconds = tikaServerTimeoutSeconds;
+    if (tikaServerRequestHeaders != null) {
+      this.tikaServerRequestHeaders.putAll(tikaServerRequestHeaders);
+    }
+  }
+
+  /** Creates a new Builder for constructing ExtractionRequest instances. */
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /** Builder for creating ExtractionRequest instances with improved readability and safety. */
+  public static class Builder {
+    private String streamType;
+    private String resourceName;
+    private String contentType;
+    private String charset;
+    private String streamName;
+    private String streamSourceInfo;
+    private Long streamSize;
+    private String resourcePassword;
+    private LinkedHashMap<Pattern, String> passwordsMap;
+    private String extractFormat;
+    private boolean tikaServerRecursive = false;
+    private Integer tikaServerTimeoutSeconds;
+    private Map<String, String> tikaServerRequestHeaders;
+
+    private Builder() {}
+
+    public Builder streamType(String streamType) {
+      this.streamType = streamType;
+      return this;
+    }
+
+    public Builder resourceName(String resourceName) {
+      this.resourceName = resourceName;
+      return this;
+    }
+
+    public Builder contentType(String contentType) {
+      this.contentType = contentType;
+      return this;
+    }
+
+    public Builder charset(String charset) {
+      this.charset = charset;
+      return this;
+    }
+
+    public Builder streamName(String streamName) {
+      this.streamName = streamName;
+      return this;
+    }
+
+    public Builder streamSourceInfo(String streamSourceInfo) {
+      this.streamSourceInfo = streamSourceInfo;
+      return this;
+    }
+
+    public Builder streamSize(Long streamSize) {
+      this.streamSize = streamSize;
+      return this;
+    }
+
+    public Builder resourcePassword(String resourcePassword) {
+      this.resourcePassword = resourcePassword;
+      return this;
+    }
+
+    public Builder passwordsMap(LinkedHashMap<Pattern, String> passwordsMap) {
+      this.passwordsMap = passwordsMap;
+      return this;
+    }
+
+    public Builder extractFormat(String extractFormat) {
+      this.extractFormat = extractFormat;
+      return this;
+    }
+
+    public Builder tikaServerRecursive(boolean tikaServerRecursive) {
+      this.tikaServerRecursive = tikaServerRecursive;
+      return this;
+    }
+
+    public Builder tikaServerTimeoutSeconds(Integer tikaServerTimeoutSeconds) {
+      this.tikaServerTimeoutSeconds = tikaServerTimeoutSeconds;
+      return this;
+    }
+
+    public Builder tikaServerRequestHeaders(Map<String, String> tikaServerRequestHeaders) {
+      this.tikaServerRequestHeaders = tikaServerRequestHeaders;
+      return this;
+    }
+
+    public ExtractionRequest build() {
+      return new ExtractionRequest(
+          streamType,
+          resourceName,
+          contentType,
+          charset,
+          streamName,
+          streamSourceInfo,
+          streamSize,
+          resourcePassword,
+          passwordsMap,
+          extractFormat,
+          tikaServerRecursive,
+          tikaServerTimeoutSeconds,
+          tikaServerRequestHeaders);
+    }
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionResult.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionResult.java
new file mode 100644
index 00000000000..97767d15367
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionResult.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+/** Immutable extraction result with plain text content and neutral metadata. */
+public final class ExtractionResult {
+  private final String content;
+  private final ExtractionMetadata metadata;
+
+  public ExtractionResult(String content, ExtractionMetadata metadata) {
+    this.content = content == null ? "" : content;
+    this.metadata = metadata;
+  }
+
+  /** Extracted textual content (plain text). */
+  public String getContent() {
+    return content;
+  }
+
+  /** Extracted metadata in neutral, backend-agnostic form. */
+  public ExtractionMetadata getMetadata() {
+    return metadata;
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/LocalTikaExtractionBackend.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/LocalTikaExtractionBackend.java
new file mode 100644
index 00000000000..1ca7268a31b
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/LocalTikaExtractionBackend.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Locale;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.logging.DeprecationLog;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.html.HtmlMapper;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Extraction backend using local in-process Apache Tika. This encapsulates the previous direct
+ * usage of Tika from the loader.
+ *
+ * @deprecated Will be removed soon, please use the 'tikaserver' extraction backend instead.
+ */
+@Deprecated(since = "9.10.0", forRemoval = true)
+public class LocalTikaExtractionBackend implements ExtractionBackend {
+  private final TikaConfig tikaConfig;
+  private final ParseContextConfig parseContextConfig;
+  private final AutoDetectParser autoDetectParser;
+
+  // Local HtmlMapper moved from ExtractingDocumentLoader
+  private static class MostlyPassthroughHtmlMapper implements HtmlMapper {
+    static final HtmlMapper INSTANCE = new MostlyPassthroughHtmlMapper();
+
+    @Override
+    public boolean isDiscardElement(String name) {
+      return false;
+    }
+
+    @Override
+    public String mapSafeAttribute(String elementName, String attributeName) {
+      return attributeName.toLowerCase(java.util.Locale.ENGLISH);
+    }
+
+    @Override
+    public String mapSafeElement(String name) {
+      String lowerName = name.toLowerCase(java.util.Locale.ROOT);
+      return (lowerName.equals("br") || lowerName.equals("body")) ? null : lowerName;
+    }
+  }
+
+  public LocalTikaExtractionBackend(TikaConfig config, ParseContextConfig parseContextConfig) {
+    this.tikaConfig = config;
+    this.parseContextConfig = parseContextConfig;
+    this.autoDetectParser = new AutoDetectParser(config);
+  }
+
+  /**
+   * Construct backend by loading TikaConfig based on handler/core configuration without exposing
+   * Tika types to the handler.
+   */
+  public LocalTikaExtractionBackend(
+      SolrCore core, String tikaConfigLoc, ParseContextConfig parseContextConfig) throws Exception {
+    TikaConfig cfg;
+    if (tikaConfigLoc == null) { // default
+      ClassLoader classLoader = core.getResourceLoader().getClassLoader();
+      try (InputStream is = classLoader.getResourceAsStream("solr-default-tika-config.xml")) {
+        cfg = new TikaConfig(is);
+      }
+    } else {
+      Path configFile = Path.of(tikaConfigLoc);
+      core.getCoreContainer().assertPathAllowed(configFile);
+      if (configFile.isAbsolute()) {
+        cfg = new TikaConfig(configFile);
+      } else { // in conf/
+        try (InputStream is = core.getResourceLoader().openResource(tikaConfigLoc)) {
+          cfg = new TikaConfig(is);
+        }
+      }
+    }
+    this.tikaConfig = cfg;
+    this.parseContextConfig = parseContextConfig;
+    this.autoDetectParser = new AutoDetectParser(cfg);
+    DeprecationLog.log("Local Tika", "The 'local' extraction backend is deprecated");
+  }
+
+  public static final String NAME = "local";
+
+  @Override
+  public String name() {
+    return NAME;
+  }
+
+  private Parser selectParser(ExtractionRequest request) {
+    if (request.streamType != null) {
+      MediaType mt = MediaType.parse(request.streamType.trim().toLowerCase(Locale.ROOT));
+      return new DefaultParser(tikaConfig.getMediaTypeRegistry()).getParsers().get(mt);
+    }
+    return autoDetectParser;
+  }
+
+  private Metadata buildMetadata(ExtractionRequest request) {
+    ExtractionMetadata extractionMetadata = buildMetadataFromRequest(request);
+    Metadata md = new Metadata();
+    for (String name : extractionMetadata.keySet()) {
+      List<String> vals = extractionMetadata.get(name);
+      if (vals != null) for (String v : vals) md.add(name, v);
+    }
+    return md;
+  }
+
+  private ParseContext buildContext(Parser parser, ExtractionRequest request) {
+    ParseContext context = parseContextConfig.create();
+    context.set(Parser.class, parser);
+    context.set(HtmlMapper.class, MostlyPassthroughHtmlMapper.INSTANCE);
+    RegexRulesPasswordProvider pwd = new RegexRulesPasswordProvider();
+    if (request.resourcePassword != null) {
+      pwd.setExplicitPassword(request.resourcePassword);
+    }
+    if (request.passwordsMap != null) {
+      pwd.setPasswordMap(request.passwordsMap);
+    }
+    context.set(PasswordProvider.class, new PasswordProviderAdapter(pwd));
+    return context;
+  }
+
+  private static ExtractionMetadata tikaMetadataToExtractionMetadata(Metadata md) {
+    ExtractionMetadata out = new ExtractionMetadata();
+    for (String name : md.names()) {
+      String[] vals = md.getValues(name);
+      if (vals != null) for (String v : vals) out.add(name, v);
+    }
+    return out;
+  }
+
+  @Override
+  public ExtractionResult extract(InputStream inputStream, ExtractionRequest request)
+      throws Exception {
+    Parser parser = selectParser(request);
+    if (parser == null) {
+      throw new IllegalArgumentException("No Tika parser for stream type: " + request.streamType);
+    }
+    ParseContext context = buildContext(parser, request);
+    Metadata md = buildMetadata(request);
+    BodyContentHandler textHandler = new BodyContentHandler(-1);
+    parser.parse(inputStream, textHandler, md, context);
+    return new ExtractionResult(textHandler.toString(), tikaMetadataToExtractionMetadata(md));
+  }
+
+  @Override
+  public void extractWithSaxHandler(
+      InputStream inputStream,
+      ExtractionRequest request,
+      ExtractionMetadata md,
+      DefaultHandler saxContentHandler)
+      throws Exception {
+    Parser parser = selectParser(request);
+    if (parser == null) {
+      throw new IllegalArgumentException("No Tika parser for stream type: " + request.streamType);
+    }
+    ParseContext context = buildContext(parser, request);
+    Metadata tikaMetadata = buildMetadata(request);
+    parser.parse(inputStream, saxContentHandler, tikaMetadata, context);
+    for (String name : tikaMetadata.names()) {
+      String[] vals = tikaMetadata.getValues(name);
+      if (vals != null) for (String v : vals) md.add(name, v);
+    }
+  }
+
+  private static class PasswordProviderAdapter implements PasswordProvider {
+    private final ExtractionPasswordProvider delegate;
+
+    public PasswordProviderAdapter(ExtractionPasswordProvider delegate) {
+      this.delegate = delegate;
+    }
+
+    @Override
+    public String getPassword(Metadata metadata) {
+      return delegate.getPassword(tikaMetadataToExtractionMetadata(metadata));
+    }
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java
index 84b4e94171c..38337217f3a 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java
@@ -26,9 +26,6 @@
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 import org.apache.lucene.util.IOUtils;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
-import org.apache.tika.parser.PasswordProvider;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -37,20 +34,21 @@
  * matching against a list of regular expressions. The list of passwords is supplied in an optional
  * Map. If an explicit password is set, it will be used.
  */
-public class RegexRulesPasswordProvider implements PasswordProvider {
+public class RegexRulesPasswordProvider implements ExtractionPasswordProvider {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   private LinkedHashMap<Pattern, String> passwordMap = new LinkedHashMap<>();
   private String explicitPassword;
 
   @Override
-  public String getPassword(Metadata meta) {
+  public String getPassword(ExtractionMetadata extractionMetadata) {
     if (getExplicitPassword() != null) {
       return getExplicitPassword();
     }
 
     if (passwordMap.size() > 0)
-      return lookupPasswordFromMap(meta.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
+      return lookupPasswordFromMap(
+          extractionMetadata.getFirst(ExtractingMetadataConstants.RESOURCE_NAME_KEY));
 
     return null;
   }
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
index 9edba0e925e..f9d84167127 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
@@ -23,6 +23,7 @@
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
@@ -30,8 +31,6 @@
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.Attributes;
@@ -57,7 +56,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
 
   protected final SolrInputDocument document;
 
-  protected final Metadata metadata;
+  protected final ExtractionMetadata metadata;
   protected final SolrParams params;
   protected final StringBuilder catchAllBuilder = new StringBuilder(2048);
   protected final IndexSchema schema;
@@ -74,7 +73,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
 
   private Set<String> literalFieldNames = null;
 
-  public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
+  public SolrContentHandler(ExtractionMetadata metadata, SolrParams params, IndexSchema schema) {
     this.document = new SolrInputDocument();
     this.metadata = metadata;
     this.params = params;
@@ -152,6 +151,13 @@ protected void addContent() {
     addField(contentFieldName, catchAllBuilder.toString(), null);
   }
 
+  /** Append pre-extracted plain text content to the catch-all builder. */
+  public void appendToContent(String text) {
+    if (text != null && !text.isEmpty()) {
+      catchAllBuilder.append(text);
+    }
+  }
+
   /**
    * Add in the literals to the document using the {@link #params} and the {@link #LITERALS_PREFIX}.
    */
@@ -170,10 +176,10 @@ protected void addLiterals() {
 
   /** Add in any metadata using {@link #metadata} as the source. */
   protected void addMetadata() {
-    for (String name : metadata.names()) {
+    for (String name : metadata.keySet()) {
       if (literalsOverride && literalFieldNames.contains(name)) continue;
-      String[] vals = metadata.getValues(name);
-      addField(name, null, vals);
+      List<String> vals = metadata.get(name);
+      addField(name, null, vals.toArray(new String[0]));
     }
   }
 
@@ -200,7 +206,7 @@ protected void addField(String fname, String fval, String[] vals) {
       sf = schema.getFieldOrNull(name);
     } else if (sf == null
         && defaultField.length() > 0
-        && name.equals(TikaMetadataKeys.RESOURCE_NAME_KEY)
+        && name.equals(ExtractingMetadataConstants.RESOURCE_NAME_KEY)
             == false /*let the fall through below handle this*/) {
       name = defaultField;
       sf = schema.getFieldOrNull(name);
@@ -213,7 +219,7 @@ protected void addField(String fname, String fval, String[] vals) {
     // you?
     if (sf == null
         && unknownFieldPrefix.length() == 0
-        && Objects.equals(name, TikaMetadataKeys.RESOURCE_NAME_KEY)) {
+        && Objects.equals(name, ExtractingMetadataConstants.RESOURCE_NAME_KEY)) {
       return;
     }
 
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java
index 1070e744d84..b4fe031a068 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java
@@ -18,7 +18,6 @@
 
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.schema.IndexSchema;
-import org.apache.tika.metadata.Metadata;
 
 /** */
 public class SolrContentHandlerFactory {
@@ -26,7 +25,7 @@ public class SolrContentHandlerFactory {
   public SolrContentHandlerFactory() {}
 
   public SolrContentHandler createSolrContentHandler(
-      Metadata metadata, SolrParams params, IndexSchema schema) {
+      ExtractionMetadata metadata, SolrParams params, IndexSchema schema) {
     return new SolrContentHandler(metadata, params, schema);
   }
 }
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerExtractionBackend.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerExtractionBackend.java
new file mode 100644
index 00000000000..b4a5324575b
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerExtractionBackend.java
@@ -0,0 +1,450 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.ConnectException;
+import java.net.SocketTimeoutException;
+import java.nio.channels.ClosedChannelException;
+import java.time.Duration;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SolrNamedThreadFactory;
+import org.apache.solr.handler.extraction.fromtika.BodyContentHandler;
+import org.apache.solr.util.RefCounted;
+import org.eclipse.jetty.client.HttpClient;
+import org.eclipse.jetty.client.InputStreamRequestContent;
+import org.eclipse.jetty.client.InputStreamResponseListener;
+import org.eclipse.jetty.client.Request;
+import org.eclipse.jetty.client.Response;
+import org.eclipse.jetty.io.EofException;
+import org.eclipse.jetty.util.thread.ScheduledExecutorScheduler;
+import org.xml.sax.helpers.DefaultHandler;
+
+/** Extraction backend using the Tika Server. It uses a shared Jetty HttpClient. */
+public class TikaServerExtractionBackend implements ExtractionBackend {
+  /**
+   * Default maximum response size (100MB) to prevent excessive memory usage from large documents
+   */
+  public static final long DEFAULT_MAXCHARS_LIMIT = 100 * 1024 * 1024;
+
+  private static final Object INIT_LOCK = new Object();
+  private final String baseUrl;
+  private static final int DEFAULT_TIMEOUT_SECONDS = 3 * 60;
+  private final Duration defaultTimeout;
+  private final TikaServerParser tikaServerResponseParser = new TikaServerParser();
+  private boolean tikaMetadataCompatibility;
+  private HashMap<String, Object> initArgsMap = new HashMap<>();
+  private final long maxCharsLimit;
+
+  // Singleton holder for the shared HttpClient/Executor resources (one per JVM)
+  private static volatile RefCounted<HttpClientResources> SHARED_RESOURCES;
+  // Per-backend handle (same RefCounted instance as SHARED_RESOURCES) that this instance will
+  // decref() on close
+  private RefCounted<HttpClientResources> acquiredResourcesRef;
+
+  public TikaServerExtractionBackend(String baseUrl) {
+    this(baseUrl, DEFAULT_TIMEOUT_SECONDS, null, DEFAULT_MAXCHARS_LIMIT);
+  }
+
+  public TikaServerExtractionBackend(
+      String baseUrl, int timeoutSeconds, NamedList<?> initArgs, long maxCharsLimit) {
+    // Validate baseUrl
+    if (baseUrl == null || baseUrl.trim().isEmpty()) {
+      throw new IllegalArgumentException("baseUrl cannot be null or empty");
+    }
+    // Validate URL format and scheme
+    try {
+      java.net.URI uri = new java.net.URI(baseUrl);
+      String scheme = uri.getScheme();
+      if (scheme == null
+          || (!scheme.equalsIgnoreCase("http") && !scheme.equalsIgnoreCase("https"))) {
+        throw new IllegalArgumentException(
+            "baseUrl must use http or https scheme, got: " + baseUrl);
+      }
+      uri.toURL(); // Additional validation that it's a valid URL
+    } catch (java.net.URISyntaxException | java.net.MalformedURLException e) {
+      throw new IllegalArgumentException("Invalid baseUrl: " + baseUrl, e);
+    }
+
+    this.maxCharsLimit = maxCharsLimit;
+    if (initArgs != null) {
+      initArgs.toMap(this.initArgsMap);
+    }
+    Object metaCompatObh = this.initArgsMap.get(ExtractingParams.TIKASERVER_METADATA_COMPATIBILITY);
+    if (metaCompatObh != null) {
+      this.tikaMetadataCompatibility = Boolean.parseBoolean(metaCompatObh.toString());
+    }
+    if (timeoutSeconds <= 0) {
+      timeoutSeconds = DEFAULT_TIMEOUT_SECONDS;
+    }
+    if (baseUrl.endsWith("/")) {
+      this.baseUrl = baseUrl.substring(0, baseUrl.length() - 1);
+    } else {
+      this.baseUrl = baseUrl;
+    }
+    this.defaultTimeout =
+        Duration.ofSeconds(timeoutSeconds > 0 ? timeoutSeconds : DEFAULT_TIMEOUT_SECONDS);
+
+    // Acquire a reference to the shared resources; keep a handle so we can decref() on close
+    acquiredResourcesRef = initializeHttpClient().incref();
+  }
+
+  public static final String NAME = "tikaserver";
+
+  @Override
+  public String name() {
+    return NAME;
+  }
+
+  @Override
+  public ExtractionResult extract(InputStream inputStream, ExtractionRequest request)
+      throws Exception {
+    try (InputStream tikaResponse = callTikaServer(inputStream, request)) {
+      ExtractionMetadata md = buildMetadataFromRequest(request);
+      BodyContentHandler bodyContentHandler = new BodyContentHandler(-1);
+      if (request.tikaServerRecursive) {
+        tikaServerResponseParser.parseRmetaJson(tikaResponse, bodyContentHandler, md);
+      } else {
+        tikaServerResponseParser.parseXml(tikaResponse, bodyContentHandler, md);
+      }
+      if (tikaMetadataCompatibility) {
+        appendBackCompatTikaMetadata(md);
+      }
+      return new ExtractionResult(bodyContentHandler.toString(), md);
+    }
+  }
+
+  @Override
+  public void extractWithSaxHandler(
+      InputStream inputStream,
+      ExtractionRequest request,
+      ExtractionMetadata md,
+      DefaultHandler saxContentHandler)
+      throws Exception {
+    try (InputStream tikaResponse = callTikaServer(inputStream, request)) {
+      if (request.tikaServerRecursive) {
+        tikaServerResponseParser.parseRmetaJson(tikaResponse, saxContentHandler, md);
+      } else {
+        tikaServerResponseParser.parseXml(tikaResponse, saxContentHandler, md);
+      }
+      if (tikaMetadataCompatibility) {
+        appendBackCompatTikaMetadata(md);
+      }
+    }
+  }
+
+  /**
+   * Call the Tika Server to extract text and metadata. Depending on <code>request.recursive</code>,
+   * will either return XML (false) or JSON array (true). <b>The recursive mode consumes more memory
+   * both on the TikaServer side and on the Solr side</b>
+   *
+   * @return InputStream of the response body, either XML or JSON depending on <code>
+   *     request.tikaserverRecursive</code>
+   */
+  InputStream callTikaServer(InputStream inputStream, ExtractionRequest request) throws Exception {
+    String url = baseUrl + (request.tikaServerRecursive ? "/rmeta" : "/tika");
+
+    HttpClient client = acquiredResourcesRef.get().client;
+
+    Request req = client.newRequest(url).method("PUT");
+    Duration effectiveTimeout =
+        (request.tikaServerTimeoutSeconds != null && request.tikaServerTimeoutSeconds > 0)
+            ? Duration.ofSeconds(request.tikaServerTimeoutSeconds)
+            : defaultTimeout;
+    req.timeout(effectiveTimeout.toMillis(), TimeUnit.MILLISECONDS);
+
+    // Headers
+    String accept = (request.tikaServerRecursive ? "application/json" : "text/xml");
+    req.headers(h -> h.add("Accept", accept));
+    String contentType = (request.streamType != null) ? request.streamType : request.contentType;
+    if (contentType != null) {
+      req.headers(h -> h.add("Content-Type", contentType));
+    }
+    if (!request.tikaServerRequestHeaders.isEmpty()) {
+      req.headers(
+          h ->
+              request.tikaServerRequestHeaders.forEach(
+                  (k, v) -> {
+                    if (k != null && v != null) h.add(k, v);
+                  }));
+    }
+
+    ExtractionMetadata md = buildMetadataFromRequest(request);
+    if (request.resourcePassword != null || request.passwordsMap != null) {
+      RegexRulesPasswordProvider passwordProvider = new RegexRulesPasswordProvider();
+      if (request.resourcePassword != null) {
+        passwordProvider.setExplicitPassword(request.resourcePassword);
+      }
+      if (request.passwordsMap != null) {
+        passwordProvider.setPasswordMap(request.passwordsMap);
+      }
+      String pwd = passwordProvider.getPassword(md);
+      if (pwd != null) {
+        req.headers(h -> h.add("Password", pwd)); // Tika Server expects this header if provided
+      }
+    }
+    if (request.resourceName != null) {
+      req.headers(
+          h ->
+              h.add(
+                  "Content-Disposition", "attachment; filename=\"" + request.resourceName + "\""));
+    }
+
+    if (contentType != null) {
+      req.body(new InputStreamRequestContent(contentType, inputStream));
+    } else {
+      req.body(new InputStreamRequestContent(inputStream));
+    }
+
+    InputStreamResponseListener listener = new InputStreamResponseListener();
+    req.send(listener);
+
+    final Response response;
+    try {
+      response = listener.get(effectiveTimeout.toMillis(), TimeUnit.MILLISECONDS);
+    } catch (TimeoutException te) {
+      throw new SolrException(
+          SolrException.ErrorCode.GATEWAY_TIMEOUT,
+          "Timeout after "
+              + effectiveTimeout.toMillis()
+              + " ms while waiting for response from TikaServer "
+              + url,
+          te);
+    } catch (InterruptedException ie) {
+      Thread.currentThread().interrupt();
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          "Interrupted while waiting for response from TikaServer " + url,
+          ie);
+    } catch (ExecutionException ee) {
+      Throwable cause = ee.getCause();
+      if (cause instanceof ConnectException
+          || cause instanceof SocketTimeoutException
+          || cause instanceof EofException
+          || cause instanceof ClosedChannelException) {
+        throw new SolrException(
+            SolrException.ErrorCode.SERVICE_UNAVAILABLE,
+            "Error communicating with TikaServer "
+                + url
+                + ": "
+                + cause.getClass().getSimpleName()
+                + ": "
+                + cause.getMessage(),
+            cause);
+      }
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          "Unexpected error while calling TikaServer " + url,
+          ee);
+    }
+
+    int code = response.getStatus();
+    if (code < 200 || code >= 300) {
+      SolrException.ErrorCode errorCode = SolrException.ErrorCode.getErrorCode(code);
+      String reason = response.getReason();
+      String msg =
+          "TikaServer "
+              + url
+              + " returned status "
+              + code
+              + (reason != null ? " (" + reason + ")" : "");
+      throw new SolrException(errorCode, msg);
+    }
+
+    InputStream responseStream = listener.getInputStream();
+    // Bound the amount of data we read from Tika Server to avoid excessive memory/CPU usage
+    return new LimitingInputStream(responseStream, maxCharsLimit);
+  }
+
+  private static class LimitingInputStream extends InputStream {
+    private final InputStream in;
+    private final long max;
+    private long count;
+
+    LimitingInputStream(InputStream in, long max) {
+      this.in = in;
+      this.max = max;
+      this.count = 0L;
+    }
+
+    private void checkLimit(long toAdd) {
+      if (max <= 0) return; // non-positive means unlimited
+      long newCount = count + toAdd;
+      if (newCount > max) {
+        throw new SolrException(
+            SolrException.ErrorCode.BAD_REQUEST,
+            "TikaServer response exceeded the configured maximum size of " + max + " bytes");
+      }
+      count = newCount;
+    }
+
+    @Override
+    public int read() throws IOException {
+      int b = in.read();
+      if (b != -1) {
+        checkLimit(1);
+      }
+      return b;
+    }
+
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+      int n = in.read(b, off, len);
+      if (n > 0) {
+        checkLimit(n);
+      }
+      return n;
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+      long skipped = in.skip(n);
+      if (skipped > 0) {
+        checkLimit(skipped);
+      }
+      return skipped;
+    }
+
+    @Override
+    public void close() throws IOException {
+      in.close();
+    }
+
+    @Override
+    public int available() throws IOException {
+      return in.available();
+    }
+  }
+
+  private static final class HttpClientResources {
+    final HttpClient client;
+    final ExecutorService executor;
+
+    HttpClientResources(HttpClient client, ExecutorService executor) {
+      this.client = client;
+      this.executor = executor;
+    }
+  }
+
+  private static final class ResourcesRef extends RefCounted<HttpClientResources> {
+    ResourcesRef(HttpClientResources r) {
+      super(r);
+    }
+
+    @Override
+    protected void close() {
+      // stop client and shutdown executor
+      try {
+        if (resource.client != null) resource.client.stop();
+      } catch (Throwable ignore) {
+      }
+      try {
+        if (resource.executor != null) resource.executor.shutdownNow();
+      } catch (Throwable ignore) {
+      }
+      synchronized (INIT_LOCK) {
+        // clear the shared reference when closed
+        if (SHARED_RESOURCES == this) {
+          SHARED_RESOURCES = null;
+        }
+      }
+    }
+  }
+
+  private static RefCounted<HttpClientResources> initializeHttpClient() {
+    RefCounted<HttpClientResources> ref = SHARED_RESOURCES;
+    if (ref != null) return ref;
+    synchronized (INIT_LOCK) {
+      if (SHARED_RESOURCES != null) return SHARED_RESOURCES;
+      ThreadFactory tf = new SolrNamedThreadFactory("TikaServerHttpClient");
+      ExecutorService exec = ExecutorUtil.newMDCAwareCachedThreadPool(tf);
+      HttpClient client = new HttpClient();
+      client.setExecutor(exec);
+      client.setScheduler(new ScheduledExecutorScheduler("TikaServerHttpClient-scheduler", true));
+      try {
+        client.start();
+      } catch (Exception e) {
+        try {
+          exec.shutdownNow();
+        } catch (Throwable ignore) {
+        }
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR, "Failed to start shared Jetty HttpClient", e);
+      }
+      SHARED_RESOURCES = new ResourcesRef(new HttpClientResources(client, exec));
+      return SHARED_RESOURCES;
+    }
+  }
+
+  private final Map<String, String> fieldMappings = new LinkedHashMap<>();
+
+  // TODO: Improve backward compatibility by adding more mappings
+  {
+    fieldMappings.put("dc:title", "title");
+    fieldMappings.put("dc:creator", "author");
+    fieldMappings.put("dc:description", "description");
+    fieldMappings.put("dc:subject", "subject");
+    fieldMappings.put("dc:language", "language");
+    fieldMappings.put("dc:publisher", "publisher");
+    fieldMappings.put("dcterms:created", "created");
+    fieldMappings.put("dcterms:modified", "modified");
+    fieldMappings.put("meta:author", "Author");
+    fieldMappings.put("meta:creation-date", "Creation-Date");
+    fieldMappings.put("meta:save-date", "Last-Save-Date");
+    fieldMappings.put("meta:keyword", "Keywords");
+    fieldMappings.put("pdf:docinfo:keywords", "Keywords");
+  }
+
+  /*
+   * Appends back-compatible metadata into the given {@code ExtractionMetadata} instance by mapping
+   * source fields to target fields, provided that backward compatibility is enabled. If a source
+   * field exists and the target field is not yet populated, the values from the source field will
+   * be added to the target field.
+   */
+  private void appendBackCompatTikaMetadata(ExtractionMetadata md) {
+    for (Map.Entry<String, String> mapping : fieldMappings.entrySet()) {
+      String sourceField = mapping.getKey();
+      String targetField = mapping.getValue();
+      if (md.getFirst(sourceField) != null && md.getFirst(targetField) == null) {
+        md.add(targetField, md.get(sourceField));
+      }
+    }
+  }
+
+  @Override
+  public void close() {
+    RefCounted<HttpClientResources> ref;
+    synchronized (INIT_LOCK) {
+      ref = acquiredResourcesRef;
+      acquiredResourcesRef = null;
+    }
+    if (ref != null) {
+      ref.decref();
+    }
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerParser.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerParser.java
new file mode 100644
index 00000000000..38f0afec3d1
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerParser.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Map;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.Utils;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class TikaServerParser {
+  private final SAXParser saxParser;
+
+  public TikaServerParser() {
+    SAXParserFactory factory = SAXParserFactory.newInstance();
+    factory.setNamespaceAware(true);
+    try {
+      factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
+      factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
+      factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
+    } catch (Throwable ignore) {
+      // Some parsers may not support all features; ignore
+    }
+    try {
+      saxParser = factory.newSAXParser();
+    } catch (Exception e) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+    }
+  }
+
+  /**
+   * Parses response in XML format from Tika Server /tika endpoint. The result is that the metadata
+   * object is populated and the content handler is called with extracted text.
+   */
+  public void parseXml(InputStream inputStream, ContentHandler handler, ExtractionMetadata metadata)
+      throws IOException, SAXException {
+    DefaultHandler xmlHandler = new TikaXmlResponseSaxContentHandler(handler, metadata);
+    try (Reader reader =
+        new XmlSanitizingReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
+      saxParser.parse(new InputSource(reader), xmlHandler);
+    }
+  }
+
+  /**
+   * Parses response in JSON format from Tika Server /rmeta endpoint. The result is that the
+   * metadata object is populated, and the content handler is called with extracted text.
+   *
+   * @param jsonStream - JSON stream to parse
+   * @param handler - SAX content handler to call with extracted text
+   * @param md - metadata object to populate
+   */
+  @SuppressWarnings({"rawtypes", "PatternVariableCanBeUsed"})
+  void parseRmetaJson(InputStream jsonStream, DefaultHandler handler, ExtractionMetadata md)
+      throws IOException, SAXException {
+    Object parsed = Utils.fromJSON(jsonStream);
+    if (!(parsed instanceof List)) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR, "Unexpected /rmeta response, expected JSON array");
+    }
+    List list = (List) parsed;
+    for (Object o : list) {
+      if (!(o instanceof Map)) continue;
+      Map map = (Map) o;
+      // Copy metadata
+      for (Object k : map.keySet()) {
+        String key = String.valueOf(k);
+        Object val = map.get(k);
+        if ("X-TIKA:content".equalsIgnoreCase(key)) {
+          // handled below
+          continue;
+        }
+        if (val instanceof List) {
+          for (Object v : (List) val) {
+            if (v != null) md.add(key, String.valueOf(v));
+          }
+        } else if (val != null) {
+          md.add(key, String.valueOf(val));
+        }
+      }
+      Object content = map.get("X-TIKA:content");
+      if (content != null) {
+        String xhtml = String.valueOf(content);
+        if (!xhtml.isEmpty() && handler != null) {
+          InputStream inputStream =
+              new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8));
+          try (Reader reader =
+              new XmlSanitizingReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
+            saxParser.parse(new InputSource(reader), handler);
+          }
+        }
+      }
+    }
+  }
+
+  /** Custom SAX handler that will extract meta tags from the tika xml and delegate */
+  static class TikaXmlResponseSaxContentHandler extends DefaultHandler {
+    private final ContentHandler delegate;
+    private final ExtractionMetadata metadata;
+    private boolean inHead = false;
+
+    public TikaXmlResponseSaxContentHandler(ContentHandler delegate, ExtractionMetadata metadata) {
+      this.delegate = delegate;
+      this.metadata = metadata;
+    }
+
+    @Override
+    public void startDocument() throws SAXException {
+      if (delegate != null) delegate.startDocument();
+    }
+
+    @Override
+    public void endDocument() throws SAXException {
+      if (delegate != null) delegate.endDocument();
+    }
+
+    @Override
+    public void startElement(
+        String uri, String localName, String qName, org.xml.sax.Attributes attributes)
+        throws SAXException {
+      String ln = localName != null && !localName.isEmpty() ? localName : qName;
+      if ("head".equalsIgnoreCase(ln)) {
+        inHead = true;
+      } else if (inHead && "meta".equalsIgnoreCase(ln) && attributes != null) {
+        String name = attributes.getValue("name");
+        String content = attributes.getValue("content");
+        if (name != null && content != null) {
+          metadata.add(name, content);
+        }
+      }
+      if (delegate != null) delegate.startElement(uri, localName, qName, attributes);
+    }
+
+    @Override
+    public void endElement(String uri, String localName, String qName) throws SAXException {
+      String ln = localName != null && !localName.isEmpty() ? localName : qName;
+      if ("head".equalsIgnoreCase(ln)) {
+        inHead = false;
+      }
+      if (delegate != null) delegate.endElement(uri, localName, qName);
+    }
+
+    @Override
+    public void characters(char[] ch, int start, int length) throws SAXException {
+      if (delegate != null) delegate.characters(ch, start, length);
+    }
+
+    @Override
+    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+      if (delegate != null) delegate.ignorableWhitespace(ch, start, length);
+    }
+
+    @Override
+    public void startPrefixMapping(String prefix, String uri) throws SAXException {
+      if (delegate != null) delegate.startPrefixMapping(prefix, uri);
+    }
+
+    @Override
+    public void endPrefixMapping(String prefix) throws SAXException {
+      if (delegate != null) delegate.endPrefixMapping(prefix);
+    }
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/XmlSanitizingReader.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/XmlSanitizingReader.java
new file mode 100644
index 00000000000..78e3ca149fc
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/XmlSanitizingReader.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.FilterReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Filters out null character entities (&#0;, &#x0;, etc.) from XML content.
+ *
+ * <p>Removes numeric character entities that resolve to code point 0, such as <code>&#0;</code> or
+ * <code>&#00;</code>. Everything else is passed through unchanged.
+ */
+final class XmlSanitizingReader extends FilterReader {
+  private static final Pattern NULL_ENTITY_PATTERN =
+      Pattern.compile("&#(0+|x0+);", Pattern.CASE_INSENSITIVE);
+  private static final int BUFFER_SIZE = 8192;
+  private static final int OVERLAP_SIZE = 16; // Max entity length: &#x00000000;
+
+  private final char[] readBuffer = new char[BUFFER_SIZE + OVERLAP_SIZE];
+  private final char[] buffer = new char[BUFFER_SIZE + OVERLAP_SIZE];
+  private final StringBuilder sb = new StringBuilder(BUFFER_SIZE + OVERLAP_SIZE);
+  private final StringBuffer result = new StringBuffer(BUFFER_SIZE + OVERLAP_SIZE);
+  private int bufferPos = 0;
+  private int bufferLimit = 0;
+  private int overlapLen = 0;
+  private boolean eof = false;
+
+  XmlSanitizingReader(Reader in) {
+    super(in);
+  }
+
+  @Override
+  public int read() throws IOException {
+    if (bufferPos < bufferLimit) {
+      return buffer[bufferPos++];
+    }
+    if (fillBuffer() == -1) {
+      return -1;
+    }
+    return buffer[bufferPos++];
+  }
+
+  @Override
+  public int read(char[] cbuf, int off, int len) throws IOException {
+    if (len == 0) return 0;
+    int totalRead = 0;
+    while (totalRead < len) {
+      int available = bufferLimit - bufferPos;
+      if (available > 0) {
+        int toCopy = Math.min(available, len - totalRead);
+        System.arraycopy(buffer, bufferPos, cbuf, off + totalRead, toCopy);
+        bufferPos += toCopy;
+        totalRead += toCopy;
+      } else {
+        if (fillBuffer() == -1) {
+          return totalRead == 0 ? -1 : totalRead;
+        }
+      }
+    }
+    return totalRead;
+  }
+
+  private int fillBuffer() throws IOException {
+    if (eof) return -1;
+
+    // Copy overlap from end of previous buffer
+    if (overlapLen > 0) {
+      System.arraycopy(buffer, bufferLimit - overlapLen, readBuffer, 0, overlapLen);
+    }
+
+    // Read new data
+    int read = in.read(readBuffer, overlapLen, BUFFER_SIZE);
+    if (read == -1) {
+      eof = true;
+      if (overlapLen == 0) return -1;
+      // Process remaining overlap at EOF
+      read = 0;
+    }
+
+    // Sanitize without allocating a String
+    sb.setLength(0);
+    sb.append(readBuffer, 0, overlapLen + read);
+
+    result.setLength(0);
+    Matcher matcher = NULL_ENTITY_PATTERN.matcher(sb);
+    while (matcher.find()) {
+      matcher.appendReplacement(result, "");
+    }
+    matcher.appendTail(result);
+
+    result.getChars(0, result.length(), buffer, 0);
+    bufferLimit = result.length();
+    bufferPos = overlapLen;
+
+    // Edge case: if sanitization removed characters from overlap at EOF,
+    // bufferPos might exceed bufferLimit
+    if (bufferPos > bufferLimit) {
+      bufferPos = bufferLimit;
+    }
+
+    // Keep last OVERLAP_SIZE chars for next iteration (unless EOF)
+    overlapLen = eof ? 0 : Math.min(OVERLAP_SIZE, bufferLimit);
+
+    return bufferLimit - bufferPos;
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/BodyContentHandler.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/BodyContentHandler.java
new file mode 100644
index 00000000000..34a3c071808
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/BodyContentHandler.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file was copied from Apache Tika 1.28.5 (org.apache.tika.sax.BodyContentHandler).
+ * It still depends on Tika, see imports.
+ * <p>
+ * TODO: Find a replacement for this class before removing Tika dependency
+ * </p>
+ */
+package org.apache.solr.handler.extraction.fromtika;
+
+import java.io.OutputStream;
+import java.io.Writer;
+import org.apache.tika.sax.WriteOutContentHandler;
+import org.apache.tika.sax.xpath.Matcher;
+import org.apache.tika.sax.xpath.MatchingContentHandler;
+import org.apache.tika.sax.xpath.XPathParser;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Content handler decorator that only passes everything inside the XHTML &lt;body/&gt; tag to the
+ * underlying handler. Note that the &lt;body/&gt; tag itself is <em>not</em> passed on.
+ */
+public class BodyContentHandler extends ContentHandlerDecorator {
+
+  /** The XHTML namespace URI (from Apache Tika 1.28.5 XHTMLContentHandler.XHTML) */
+  private static final String XHTML = "http://www.w3.org/1999/xhtml";
+
+  /** XHTML XPath parser. */
+  private static final XPathParser PARSER = new XPathParser("xhtml", XHTML);
+
+  /** The XPath matcher used to select the XHTML body contents. */
+  private static final Matcher MATCHER = PARSER.parse("/xhtml:html/xhtml:body/descendant::node()");
+
+  /**
+   * Creates a content handler that passes all XHTML body events to the given underlying content
+   * handler.
+   *
+   * @param handler content handler
+   */
+  public BodyContentHandler(ContentHandler handler) {
+    super(new MatchingContentHandler(handler, MATCHER));
+  }
+
+  /**
+   * Creates a content handler that writes XHTML body character events to the given writer.
+   *
+   * @param writer writer
+   */
+  public BodyContentHandler(Writer writer) {
+    this(new WriteOutContentHandler(writer));
+  }
+
+  /**
+   * Creates a content handler that writes XHTML body character events to the given output stream
+   * using the default encoding.
+   *
+   * @param stream output stream
+   */
+  public BodyContentHandler(OutputStream stream) {
+    this(new WriteOutContentHandler(stream));
+  }
+
+  /**
+   * Creates a content handler that writes XHTML body character events to an internal string buffer.
+   * The contents of the buffer can be retrieved using the {@link #toString()} method.
+   *
+   * <p>The internal string buffer is bounded at the given number of characters. If this write limit
+   * is reached, then a {@link SAXException} is thrown.
+   *
+   * @since Apache Tika 0.7
+   * @param writeLimit maximum number of characters to include in the string, or -1 to disable the
+   *     write limit
+   */
+  public BodyContentHandler(int writeLimit) {
+    this(new WriteOutContentHandler(writeLimit));
+  }
+
+  /**
+   * Creates a content handler that writes XHTML body character events to an internal string buffer.
+   * The contents of the buffer can be retrieved using the {@link #toString()} method.
+   *
+   * <p>The internal string buffer is bounded at 100k characters. If this write limit is reached,
+   * then a {@link SAXException} is thrown.
+   */
+  public BodyContentHandler() {
+    this(new WriteOutContentHandler());
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/ContentHandlerDecorator.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/ContentHandlerDecorator.java
new file mode 100644
index 00000000000..8fda4712697
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/ContentHandlerDecorator.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file was originally part of Apache Tika 1.28.5 (org.apache.tika.sax.ContentHandlerDecorator)
+ * and has been copied into the Solr codebase to eliminate the Tika dependency.
+ */
+package org.apache.solr.handler.extraction.fromtika;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Decorator base class for the {@link ContentHandler} interface. This class simply delegates all
+ * SAX events calls to an underlying decorated handler instance. Subclasses can provide extra
+ * decoration by overriding one or more of the SAX event methods.
+ */
+public class ContentHandlerDecorator extends DefaultHandler {
+
+  /** Decorated SAX event handler. */
+  private ContentHandler handler;
+
+  /**
+   * Creates a decorator for the given SAX event handler.
+   *
+   * @param handler SAX event handler to be decorated
+   */
+  public ContentHandlerDecorator(ContentHandler handler) {
+    assert handler != null;
+    this.handler = handler;
+  }
+
+  /**
+   * Creates a decorator that by default forwards incoming SAX events to a dummy content handler
+   * that simply ignores all the events. Subclasses should use the {@link
+   * #setContentHandler(ContentHandler)} method to switch to a more usable underlying content
+   * handler.
+   */
+  protected ContentHandlerDecorator() {
+    this(new DefaultHandler());
+  }
+
+  /**
+   * Sets the underlying content handler. All future SAX events will be directed to this handler
+   * instead of the one that was previously used.
+   *
+   * @param handler content handler
+   */
+  protected void setContentHandler(ContentHandler handler) {
+    assert handler != null;
+    this.handler = handler;
+  }
+
+  @Override
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+    try {
+      handler.startPrefixMapping(prefix, uri);
+    } catch (SAXException e) {
+      handleException(e);
+    }
+  }
+
+  @Override
+  public void endPrefixMapping(String prefix) throws SAXException {
+    try {
+      handler.endPrefixMapping(prefix);
+    } catch (SAXException e) {
+      handleException(e);
+    }
+  }
+
+  @Override
+  public void processingInstruction(String target, String data) throws SAXException {
+    try {
+      handler.processingInstruction(target, data);
+    } catch (SAXException e) {
+      handleException(e);
+    }
+  }
+
+  @Override
+  public void setDocumentLocator(Locator locator) {
+    handler.setDocumentLocator(locator);
+  }
+
+  @Override
+  public void startDocument() throws SAXException {
+    try {
+      handler.startDocument();
+    } catch (SAXException e) {
+      handleException(e);
+    }
+  }
+
+  @Override
+  public void endDocument() throws SAXException {
+    try {
+      handler.endDocument();
+    } catch (SAXException e) {
+      handleException(e);
+    }
+  }
+
+  @Override
+  public void startElement(String uri, String localName, String name, Attributes atts)
+      throws SAXException {
+    try {
+      handler.startElement(uri, localName, name, atts);
+    } catch (SAXException e) {
+      handleException(e);
+    }
+  }
+
+  @Override
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    try {
+      handler.endElement(uri, localName, name);
+    } catch (SAXException e) {
+      handleException(e);
+    }
+  }
+
+  @Override
+  public void characters(char[] ch, int start, int length) throws SAXException {
+    try {
+      handler.characters(ch, start, length);
+    } catch (SAXException e) {
+      handleException(e);
+    }
+  }
+
+  @Override
+  public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+    try {
+      handler.ignorableWhitespace(ch, start, length);
+    } catch (SAXException e) {
+      handleException(e);
+    }
+  }
+
+  @Override
+  public void skippedEntity(String name) throws SAXException {
+    try {
+      handler.skippedEntity(name);
+    } catch (SAXException e) {
+      handleException(e);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return handler.toString();
+  }
+
+  /**
+   * Handle any exceptions thrown by methods in this class. This method provides a single place to
+   * implement custom exception handling. The default behaviour is simply to re-throw the given
+   * exception, but subclasses can also provide alternative ways of handling the situation.
+   *
+   * @param exception the exception that was thrown
+   * @throws SAXException the exception (if any) thrown to the client
+   */
+  protected void handleException(SAXException exception) throws SAXException {
+    throw exception;
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/ToTextContentHandler.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/ToTextContentHandler.java
new file mode 100644
index 00000000000..d4255fa2fb1
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/ToTextContentHandler.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file was originally part of Apache Tika 1.28.5 (org.apache.tika.sax.ToTextContentHandler)
+ * and has been copied into the Solr codebase to eliminate the Tika dependency.
+ */
+package org.apache.solr.handler.extraction.fromtika;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.Locale;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * SAX event handler that writes all character content out to a character stream. No escaping or
+ * other transformations are made on the character content.
+ *
+ * <p>As of Tika 1.20, this handler ignores content within &lt;script&gt; and &lt;style&gt; tags.
+ *
+ * @since Apache Tika 0.10
+ */
+public class ToTextContentHandler extends DefaultHandler {
+
+  private static final String STYLE = "STYLE";
+  private static final String SCRIPT = "SCRIPT";
+  private int styleDepth = 0;
+  private int scriptDepth = 0;
+
+  /** The character stream. */
+  private final Writer writer;
+
+  /**
+   * Creates a content handler that writes character events to the given writer.
+   *
+   * @param writer writer
+   */
+  public ToTextContentHandler(Writer writer) {
+    this.writer = writer;
+  }
+
+  /**
+   * Creates a content handler that writes character events to the given output stream using the
+   * platform default encoding.
+   *
+   * @param stream output stream
+   */
+  public ToTextContentHandler(OutputStream stream) {
+    this(new OutputStreamWriter(stream, Charset.defaultCharset()));
+  }
+
+  /**
+   * Creates a content handler that writes character events to the given output stream using the
+   * given encoding.
+   *
+   * @param stream output stream
+   * @param encoding output encoding
+   * @throws UnsupportedEncodingException if the encoding is unsupported
+   */
+  public ToTextContentHandler(OutputStream stream, String encoding)
+      throws UnsupportedEncodingException {
+    this(new OutputStreamWriter(stream, encoding));
+  }
+
+  /**
+   * Creates a content handler that writes character events to an internal string buffer. Use the
+   * {@link #toString()} method to access the collected character content.
+   */
+  public ToTextContentHandler() {
+    this(new StringWriter());
+  }
+
+  /** Writes the given characters to the given character stream. */
+  @Override
+  public void characters(char[] ch, int start, int length) throws SAXException {
+
+    if (styleDepth + scriptDepth != 0) {
+      return;
+    }
+
+    try {
+      writer.write(ch, start, length);
+    } catch (IOException e) {
+      throw new SAXException("Error writing: " + new String(ch, start, length), e);
+    }
+  }
+
+  /**
+   * Writes the given ignorable characters to the given character stream. The default implementation
+   * simply forwards the call to the {@link #characters(char[], int, int)} method.
+   */
+  @Override
+  public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+    characters(ch, start, length);
+  }
+
+  /**
+   * Flushes the character stream so that no characters are forgotten in internal buffers.
+   *
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
+   * @throws SAXException if the stream can not be flushed
+   */
+  @Override
+  public void endDocument() throws SAXException {
+    try {
+      writer.flush();
+    } catch (IOException e) {
+      throw new SAXException("Error flushing character output", e);
+    }
+  }
+
+  @Override
+  public void startElement(String uri, String localName, String qName, Attributes atts)
+      throws SAXException {
+    String uc = (qName == null) ? "" : qName.toUpperCase(Locale.ENGLISH);
+    if (uc.equals(STYLE)) {
+      styleDepth++;
+    }
+    if (uc.equals(SCRIPT)) {
+      scriptDepth++;
+    }
+  }
+
+  @Override
+  public void endElement(String uri, String localName, String qName) throws SAXException {
+    String uc = (qName == null) ? "" : qName.toUpperCase(Locale.ENGLISH);
+    if (uc.equals(STYLE)) {
+      styleDepth--;
+    }
+    if (uc.equals(SCRIPT)) {
+      scriptDepth--;
+    }
+  }
+
+  /**
+   * Returns the contents of the internal string buffer where all the received characters have been
+   * collected. Only works when this object was constructed using the empty default constructor or
+   * by passing a {@link StringWriter} to the other constructor.
+   */
+  @Override
+  public String toString() {
+    return writer.toString();
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/ToXMLContentHandler.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/ToXMLContentHandler.java
new file mode 100644
index 00000000000..5310e234d7b
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/ToXMLContentHandler.java
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file was originally part of Apache Tika 1.28.5 (org.apache.tika.sax.ToXMLContentHandler)
+ * and has been copied into the Solr codebase to eliminate the Tika dependency.
+ */
+package org.apache.solr.handler.extraction.fromtika;
+
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+/**
+ * SAX event handler that serializes the XML document to a character stream. The incoming SAX events
+ * are expected to be well-formed (properly nested, etc.) and to explicitly include namespace
+ * declaration attributes and corresponding namespace prefixes in element and attribute names.
+ *
+ * @since Apache Tika 0.10
+ */
+public class ToXMLContentHandler extends ToTextContentHandler {
+
+  private static class ElementInfo {
+
+    private final ElementInfo parent;
+
+    private final Map<String, String> namespaces;
+
+    public ElementInfo(ElementInfo parent, Map<String, String> namespaces) {
+      this.parent = parent;
+      if (namespaces.isEmpty()) {
+        this.namespaces = Collections.emptyMap();
+      } else {
+        this.namespaces = new HashMap<String, String>(namespaces);
+      }
+    }
+
+    public String getPrefix(String uri) throws SAXException {
+      String prefix = namespaces.get(uri);
+      if (prefix != null) {
+        return prefix;
+      } else if (parent != null) {
+        return parent.getPrefix(uri);
+      } else if (uri == null || uri.length() == 0) {
+        return "";
+      } else {
+        throw new SAXException("Namespace " + uri + " not declared");
+      }
+    }
+
+    public String getQName(String uri, String localName) throws SAXException {
+      String prefix = getPrefix(uri);
+      if (prefix.length() > 0) {
+        return prefix + ":" + localName;
+      } else {
+        return localName;
+      }
+    }
+  }
+
+  private final String encoding;
+
+  protected boolean inStartElement = false;
+
+  protected final Map<String, String> namespaces = new HashMap<String, String>();
+
+  private ElementInfo currentElement;
+
+  /**
+   * Creates an XML serializer that writes to the given byte stream using the given character
+   * encoding.
+   *
+   * @param stream output stream
+   * @param encoding output encoding
+   * @throws UnsupportedEncodingException if the encoding is unsupported
+   */
+  public ToXMLContentHandler(OutputStream stream, String encoding)
+      throws UnsupportedEncodingException {
+    super(stream, encoding);
+    this.encoding = encoding;
+  }
+
+  public ToXMLContentHandler(String encoding) {
+    super();
+    this.encoding = encoding;
+  }
+
+  public ToXMLContentHandler() {
+    super();
+    this.encoding = null;
+  }
+
+  /** Writes the XML prefix. */
+  @Override
+  public void startDocument() throws SAXException {
+    if (encoding != null) {
+      write("<?xml version=\"1.0\" encoding=\"");
+      write(encoding);
+      write("\"?>\n");
+    }
+
+    currentElement = null;
+    namespaces.clear();
+  }
+
+  @Override
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+    try {
+      if (currentElement != null && prefix.equals(currentElement.getPrefix(uri))) {
+        return;
+      }
+    } catch (SAXException ignore) {
+    }
+    namespaces.put(uri, prefix);
+  }
+
+  @Override
+  public void startElement(String uri, String localName, String qName, Attributes atts)
+      throws SAXException {
+    lazyCloseStartElement();
+
+    currentElement = new ElementInfo(currentElement, namespaces);
+
+    write('<');
+    write(currentElement.getQName(uri, localName));
+
+    for (int i = 0; i < atts.getLength(); i++) {
+      write(' ');
+      write(currentElement.getQName(atts.getURI(i), atts.getLocalName(i)));
+      write('=');
+      write('"');
+      char[] ch = atts.getValue(i).toCharArray();
+      writeEscaped(ch, 0, ch.length, true);
+      write('"');
+    }
+
+    for (Map.Entry<String, String> entry : namespaces.entrySet()) {
+      write(' ');
+      write("xmlns");
+      String prefix = entry.getValue();
+      if (prefix.length() > 0) {
+        write(':');
+        write(prefix);
+      }
+      write('=');
+      write('"');
+      char[] ch = entry.getKey().toCharArray();
+      writeEscaped(ch, 0, ch.length, true);
+      write('"');
+    }
+    namespaces.clear();
+
+    inStartElement = true;
+  }
+
+  @Override
+  public void endElement(String uri, String localName, String qName) throws SAXException {
+    if (inStartElement) {
+      write(" />");
+      inStartElement = false;
+    } else {
+      write("</");
+      write(qName);
+      write('>');
+    }
+
+    namespaces.clear();
+
+    // Reset the position in the tree, to avoid endless stack overflow
+    // chains (see TIKA-1070)
+    currentElement = currentElement.parent;
+  }
+
+  @Override
+  public void characters(char[] ch, int start, int length) throws SAXException {
+    lazyCloseStartElement();
+    writeEscaped(ch, start, start + length, false);
+  }
+
+  private void lazyCloseStartElement() throws SAXException {
+    if (inStartElement) {
+      write('>');
+      inStartElement = false;
+    }
+  }
+
+  /**
+   * Writes the given character as-is.
+   *
+   * @param ch character to be written
+   * @throws SAXException if the character could not be written
+   */
+  protected void write(char ch) throws SAXException {
+    super.characters(new char[] {ch}, 0, 1);
+  }
+
+  /**
+   * Writes the given string of character as-is.
+   *
+   * @param string string of character to be written
+   * @throws SAXException if the character string could not be written
+   */
+  protected void write(String string) throws SAXException {
+    super.characters(string.toCharArray(), 0, string.length());
+  }
+
+  /**
+   * Writes the given characters as-is followed by the given entity.
+   *
+   * @param ch character array
+   * @param from start position in the array
+   * @param to end position in the array
+   * @param entity entity code
+   * @return next position in the array, after the characters plus one entity
+   * @throws SAXException if the characters could not be written
+   */
+  private int writeCharsAndEntity(char[] ch, int from, int to, String entity) throws SAXException {
+    super.characters(ch, from, to - from);
+    write('&');
+    write(entity);
+    write(';');
+    return to + 1;
+  }
+
+  /**
+   * Writes the given characters with XML meta characters escaped.
+   *
+   * @param ch character array
+   * @param from start position in the array
+   * @param to end position in the array
+   * @param attribute whether the characters should be escaped as an attribute value or normal
+   *     character content
+   * @throws SAXException if the characters could not be written
+   */
+  private void writeEscaped(char[] ch, int from, int to, boolean attribute) throws SAXException {
+    int pos = from;
+    while (pos < to) {
+      if (ch[pos] == '<') {
+        from = pos = writeCharsAndEntity(ch, from, pos, "lt");
+      } else if (ch[pos] == '>') {
+        from = pos = writeCharsAndEntity(ch, from, pos, "gt");
+      } else if (ch[pos] == '&') {
+        from = pos = writeCharsAndEntity(ch, from, pos, "amp");
+      } else if (attribute && ch[pos] == '"') {
+        from = pos = writeCharsAndEntity(ch, from, pos, "quot");
+      } else {
+        pos++;
+      }
+    }
+    super.characters(ch, from, to - from);
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/package-info.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/package-info.java
new file mode 100644
index 00000000000..ddd0b2d4eb6
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/fromtika/package-info.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Classes in this package are copied from and (C) the Apache Tika project. */
+package org.apache.solr.handler.extraction.fromtika;
diff --git a/solr/modules/extraction/src/test-files/extraction/example.html b/solr/modules/extraction/src/test-files/extraction/example.html
index 5732f6214bc..2801c3c97d8 100644
--- a/solr/modules/extraction/src/test-files/extraction/example.html
+++ b/solr/modules/extraction/src/test-files/extraction/example.html
@@ -6,8 +6,8 @@
 <p>
   Here is some text
 </p>
-<div>Here is some text in a div</div>
-<div>This has a <a href="http://www.apache.org">link</a>.</div>
+<h1>a h1 tag</h1>
+<p>This has a <a href="http://www.apache.org">link</a> in a paragraph.</p>
 <a href="#news">News</a>
 <ul class="minitoc">
 <li>
diff --git a/solr/modules/extraction/src/test-files/extraction/simple.html b/solr/modules/extraction/src/test-files/extraction/simple.html
index 3c807fb1d98..3ec4d4e0d01 100644
--- a/solr/modules/extraction/src/test-files/extraction/simple.html
+++ b/solr/modules/extraction/src/test-files/extraction/simple.html
@@ -10,7 +10,7 @@
   Here is some text
 </p>
 <p>distinct<br/>words</p>
-<div>Here is some text in a div</div>
+<h1>Here is some text in a h1</h1>
 <div>This has a <a href="http://www.apache.org">link</a>.</div>
 </body>
 <script>
diff --git a/solr/modules/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml b/solr/modules/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml
index 2c52f4591e8..2623f60b865 100644
--- a/solr/modules/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml
+++ b/solr/modules/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml
@@ -152,9 +152,15 @@
 
   <requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler">
     <str name="parseContext.config">parseContext.xml</str>
+    <str name="extraction.backend">${solr.test.extraction.backend:local}</str>
+    <str name="tikaserver.url">${solr.test.tikaserver.url:}</str>
+    <str name="tikaserver.metadata.compatibility">${solr.test.tikaserver.metadata.compatibility:false}</str>
   </requestHandler>
 
   <requestHandler name="/update/extract/lit-def" class="org.apache.solr.handler.extraction.ExtractingRequestHandler">
+    <str name="extraction.backend">${solr.test.extraction.backend:local}</str>
+    <str name="tikaserver.url">${solr.test.tikaserver.url:}</str>
+    <str name="tikaserver.metadata.compatibility">${solr.test.tikaserver.metadata.compatibility:false}</str>
     <lst name="defaults">
       <str name="literal.foo_s">x</str>
     </lst>
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerLocalTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerLocalTest.java
new file mode 100644
index 00000000000..a3d3af02487
--- /dev/null
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerLocalTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class ExtractingRequestHandlerLocalTest extends ExtractingRequestHandlerTestAbstract {
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig.xml", "schema.xml", getFile("extraction/solr"));
+  }
+
+  @Test
+  public void testPdfWithImages() throws Exception {
+    // This test moved from abstract class since TikaServer with Tika3 does not extract images by
+    // default
+    // Tests possibility to configure ParseContext (by example to extract embedded images from pdf)
+    loadLocal(
+        "extraction/pdf-with-image.pdf",
+        "fmap.created",
+        "extractedDate",
+        "fmap.producer",
+        "extractedProducer",
+        "fmap.creator",
+        "extractedCreator",
+        "fmap.Keywords",
+        "extractedKeywords",
+        "fmap.Creation-Date",
+        "extractedDate",
+        "uprefix",
+        "ignored_",
+        "fmap.Author",
+        "extractedAuthor",
+        "fmap.content",
+        "wdf_nocase",
+        "literal.id",
+        "pdfWithImage",
+        "resource.name",
+        "pdf-with-image.pdf",
+        "resource.password",
+        "solrRules",
+        "fmap.Last-Modified",
+        "extractedDate");
+
+    assertQ(req("wdf_nocase:\"embedded:image0.jpg\""), "//*[@numFound='0']");
+    assertU(commit());
+    assertQ(req("wdf_nocase:\"embedded:image0.jpg\""), "//*[@numFound='1']");
+  }
+}
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTestAbstract.java
similarity index 85%
rename from solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
rename to solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTestAbstract.java
index 0097b86e818..e5a50c55e79 100644
--- a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTestAbstract.java
@@ -16,15 +16,13 @@
  */
 package org.apache.solr.handler.extraction;
 
+import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Locale;
-import java.util.TimeZone;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.ContentStream;
 import org.apache.solr.common.util.ContentStreamBase;
-import org.apache.solr.common.util.EnvUtils;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
@@ -32,28 +30,12 @@
 import org.apache.solr.update.AddUpdateCommand;
 import org.apache.solr.update.processor.BufferingRequestProcessor;
 import org.junit.Before;
-import org.junit.BeforeClass;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-/** */
-public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    // Is the JDK/env affected by a known bug?
-    final String tzDisplayName =
-        TimeZone.getDefault().getDisplayName(false, TimeZone.SHORT, Locale.US);
-    if (!tzDisplayName.matches("[A-Za-z]{3,}([+-]\\d\\d(:\\d\\d)?)?")) {
-      assertTrue(
-          "Is some other JVM affected?  Or bad regex? TzDisplayName: " + tzDisplayName,
-          EnvUtils.getProperty("java.version").startsWith("11"));
-      assumeTrue(
-          "SOLR-12759 JDK 11 (1st release) and Tika 1.x can result in extracting dates in a bad format.",
-          false);
-    }
-
-    initCore("solrconfig.xml", "schema.xml", getFile("extraction/solr"));
-  }
+public abstract class ExtractingRequestHandlerTestAbstract extends SolrTestCaseJ4 {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   @Override
   @Before
@@ -298,13 +280,13 @@ public void testCapture() throws Exception {
         "uprefix",
         "t_",
         "capture",
-        "div",
-        "fmap.div",
+        "h1",
+        "fmap.h1",
         "foo_t",
         "commit",
         "true");
     assertQ(req("+id:capture1 +t_content:Solr"), "//*[@numFound='1']");
-    assertQ(req("+id:capture1 +foo_t:\"here is some text in a div\""), "//*[@numFound='1']");
+    assertQ(req("+id:capture1 +foo_t:\"here is some text in a h1\""), "//*[@numFound='1']");
 
     loadLocal(
         "extraction/simple.html",
@@ -314,18 +296,18 @@ public void testCapture() throws Exception {
         "true",
         "defaultField",
         "text",
-        "fmap.div",
-        "div_t",
+        "fmap.h1",
+        "h1_t",
         "fmap.a",
         "anchor_t",
         "capture",
-        "div",
+        "h1",
         "capture",
         "a",
         "commit",
         "true");
     assertQ(req("+id:capture2 +text:Solr"), "//*[@numFound='1']");
-    assertQ(req("+id:capture2 +div_t:\"here is some text in a div\""), "//*[@numFound='1']");
+    assertQ(req("+id:capture2 +h1_t:\"here is some text in a h1\""), "//*[@numFound='1']");
     assertQ(req("+id:capture2 +anchor_t:http\\://www.apache.org"), "//*[@numFound='1']");
     assertQ(req("+id:capture2 +anchor_t:link"), "//*[@numFound='1']");
   }
@@ -341,20 +323,19 @@ public void testDefaultField() throws Exception {
       ignoreException("unknown field 'meta'"); // TODO: should this exception be happening?
       expectThrows(
           SolrException.class,
-          () -> {
-            loadLocal(
-                "extraction/simple.html",
-                "literal.id",
-                "simple2",
-                "lowernames",
-                "true",
-                "captureAttr",
-                "true",
-                // "fmap.content_type", "abcxyz",
-                "commit",
-                "true" // test immediate commit
-                );
-          });
+          () ->
+              loadLocal(
+                  "extraction/simple.html",
+                  "literal.id",
+                  "simple2",
+                  "lowernames",
+                  "true",
+                  "captureAttr",
+                  "true",
+                  // "fmap.content_type", "abcxyz",
+                  "commit",
+                  "true" // test immediate commit
+                  ));
     } finally {
       resetExceptionIgnores();
     }
@@ -465,6 +446,16 @@ public void testLiterals() throws Exception {
           "two",
           "fmap.X-Parsed-By",
           "ignored_parser",
+          "fmap.X-TIKA:Parsed-By",
+          "ignored_parser",
+          "fmap.X-TIKA:Parsed-By-Full-Set",
+          "ignored_parser",
+          "fmap.X-TIKA:content_handler",
+          "ignored_parser",
+          "fmap.X-TIKA:parse_time_millis",
+          "ignored_parser",
+          "fmap.X-TIKA:embedded_depth",
+          "ignored_parser",
           "fmap.Last-Modified",
           "extractedDate");
       // TODO: original author did not specify why an exception should be thrown... how to fix?
@@ -495,6 +486,16 @@ public void testLiterals() throws Exception {
         "one",
         "fmap.X-Parsed-By",
         "ignored_parser",
+        "fmap.X-TIKA:Parsed-By",
+        "ignored_parser",
+        "fmap.X-TIKA:Parsed-By-Full-Set",
+        "ignored_parser",
+        "fmap.X-TIKA:content_handler",
+        "ignored_parser",
+        "fmap.X-TIKA:parse_time_millis",
+        "ignored_parser",
+        "fmap.X-TIKA:embedded_depth",
+        "ignored_parser",
         "fmap.Last-Modified",
         "extractedDate");
     assertU(commit());
@@ -601,6 +602,20 @@ public void testPlainTextSpecifyingMimeType() throws Exception {
         "extractedLanguage",
         "fmap.X-Parsed-By",
         "ignored_parser",
+        "fmap.X-TIKA:Parsed-By",
+        "ignored_parser",
+        "fmap.X-TIKA:detectedEncoding",
+        "ignored_parser",
+        "fmap.X-TIKA:encodingDetector",
+        "ignored_parser",
+        "fmap.X-TIKA:Parsed-By-Full-Set",
+        "ignored_parser",
+        "fmap.X-TIKA:content_handler",
+        "ignored_parser",
+        "fmap.X-TIKA:parse_time_millis",
+        "ignored_parser",
+        "fmap.X-TIKA:embedded_depth",
+        "ignored_parser",
         "fmap.content",
         "extractedContent",
         ExtractingParams.STREAM_TYPE,
@@ -635,6 +650,20 @@ public void testPlainTextSpecifyingResourceName() throws Exception {
         "extractedLanguage",
         "fmap.X-Parsed-By",
         "ignored_parser",
+        "fmap.X-TIKA:Parsed-By",
+        "ignored_parser",
+        "fmap.X-TIKA:detectedEncoding",
+        "ignored_parser",
+        "fmap.X-TIKA:encodingDetector",
+        "ignored_parser",
+        "fmap.X-TIKA:Parsed-By-Full-Set",
+        "ignored_parser",
+        "fmap.X-TIKA:content_handler",
+        "ignored_parser",
+        "fmap.X-TIKA:parse_time_millis",
+        "ignored_parser",
+        "fmap.X-TIKA:embedded_depth",
+        "ignored_parser",
         "fmap.content",
         "extractedContent",
         ExtractingParams.RESOURCE_NAME,
@@ -743,16 +772,16 @@ public void testXPath() throws Exception {
         "defaultField",
         "text",
         "capture",
-        "div",
-        "fmap.div",
+        "h1",
+        "fmap.h1",
         "foo_t",
         "boost.foo_t",
         "3",
         "xpath",
-        "/xhtml:html/xhtml:body/xhtml:div//node()",
+        "/xhtml:html/xhtml:body/xhtml:cite//node()",
         "commit",
         "true");
-    assertQ(req("+id:example1 +foo_t:\"here is some text in a div\""), "//*[@numFound='1']");
+    assertQ(req("+id:example1 +foo_t:\"a h1 tag\""), "//*[@numFound='1']");
   }
 
   /** test arabic PDF extraction is functional */
@@ -789,83 +818,6 @@ public void testArabicPDF() throws Exception {
     assertQ(req("wdf_nocase:السلم"), "//result[@numFound=1]");
   }
 
-  @Test
-  public void testTikaExceptionHandling() throws Exception {
-    ExtractingRequestHandler handler =
-        (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
-    assertNotNull("handler is null and it shouldn't be", handler);
-
-    expectThrows(
-        Exception.class,
-        () -> {
-          loadLocal("extraction/password-is-solrcell.docx", "literal.id", "one");
-        });
-    assertU(commit());
-    assertQ(req("*:*"), "//result[@numFound=0]");
-
-    try {
-      loadLocal(
-          "extraction/password-is-solrcell.docx",
-          "fmap.created",
-          "extractedDate",
-          "fmap.producer",
-          "extractedProducer",
-          "fmap.creator",
-          "extractedCreator",
-          "fmap.Keywords",
-          "extractedKeywords",
-          "fmap.Creation-Date",
-          "extractedDate",
-          "uprefix",
-          "ignored_",
-          "fmap.Author",
-          "extractedAuthor",
-          "fmap.content",
-          "wdf_nocase",
-          "literal.id",
-          "one",
-          "ignoreTikaException",
-          "true", // set ignore flag
-          "fmap.Last-Modified",
-          "extractedDate");
-    } catch (Exception e) {
-      fail("TikaException should be ignored.");
-    }
-    assertU(commit());
-    assertQ(req("*:*"), "//result[@numFound=1]");
-  }
-
-  @Test
-  public void testWrongStreamType() throws Exception {
-    ExtractingRequestHandler handler =
-        (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
-    assertNotNull("handler is null and it shouldn't be", handler);
-
-    expectThrows(
-        Exception.class,
-        () -> {
-          // Load plain text specifying another mime type, should fail
-          loadLocal(
-              "extraction/version_control.txt",
-              "literal.id",
-              "one",
-              ExtractingParams.STREAM_TYPE,
-              "application/pdf");
-        });
-
-    expectThrows(
-        Exception.class,
-        () -> {
-          // Load plain text specifying non existing mimetype, should fail
-          loadLocal(
-              "extraction/version_control.txt",
-              "literal.id",
-              "one",
-              ExtractingParams.STREAM_TYPE,
-              "foo/bar");
-        });
-  }
-
   public void testLiteralsOverride() throws Exception {
     ExtractingRequestHandler handler =
         (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
@@ -971,41 +923,6 @@ public void testLiteralsOverride() throws Exception {
         req("extractedKeywords:(solr AND word AND pdf AND literalkeyword)"), "//*[@numFound='1']");
   }
 
-  @Test
-  public void testPdfWithImages() throws Exception {
-    // Tests possibility to configure ParseContext (by example to extract embedded images from pdf)
-    loadLocal(
-        "extraction/pdf-with-image.pdf",
-        "fmap.created",
-        "extractedDate",
-        "fmap.producer",
-        "extractedProducer",
-        "fmap.creator",
-        "extractedCreator",
-        "fmap.Keywords",
-        "extractedKeywords",
-        "fmap.Creation-Date",
-        "extractedDate",
-        "uprefix",
-        "ignored_",
-        "fmap.Author",
-        "extractedAuthor",
-        "fmap.content",
-        "wdf_nocase",
-        "literal.id",
-        "pdfWithImage",
-        "resource.name",
-        "pdf-with-image.pdf",
-        "resource.password",
-        "solrRules",
-        "fmap.Last-Modified",
-        "extractedDate");
-
-    assertQ(req("wdf_nocase:\"embedded:image0.jpg\""), "//*[@numFound='0']");
-    assertU(commit());
-    assertQ(req("wdf_nocase:\"embedded:image0.jpg\""), "//*[@numFound='1']");
-  }
-
   @Test
   public void testPasswordProtected() throws Exception {
     // PDF, Passwords from resource.password
@@ -1129,16 +1046,13 @@ public void testPasswordProtected() throws Exception {
   SolrQueryResponse loadLocalFromHandler(String handler, String filename, String... args)
       throws Exception {
 
-    LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args);
-    try {
+    try (LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args)) {
       // TODO: stop using locally defined streams once stream.file and
       // stream.body work everywhere
       List<ContentStream> cs = new ArrayList<>();
       cs.add(new ContentStreamBase.FileStream(getFile(filename)));
       req.setContentStreams(cs);
       return h.queryAndResponse(handler, req);
-    } finally {
-      req.close();
     }
   }
 
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTikaServerTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTikaServerTest.java
new file mode 100644
index 00000000000..616d1f92bb0
--- /dev/null
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTikaServerTest.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+import java.lang.invoke.MethodHandles;
+import org.apache.lucene.tests.util.QuickPatchThreadsFilter;
+import org.apache.solr.SolrIgnoredThreadsFilter;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.containers.wait.strategy.Wait;
+
+@ThreadLeakFilters(filters = {SolrIgnoredThreadsFilter.class, QuickPatchThreadsFilter.class})
+public class ExtractingRequestHandlerTikaServerTest extends ExtractingRequestHandlerTestAbstract {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static GenericContainer<?> tika;
+
+  @BeforeClass
+  @SuppressWarnings("resource")
+  public static void beforeClassTika() {
+    String baseUrl;
+    try {
+      tika =
+          new GenericContainer<>("apache/tika:3.2.3.0-full")
+              .withExposedPorts(9998)
+              .waitingFor(Wait.forListeningPort());
+      tika.start();
+      baseUrl = "http://" + tika.getHost() + ":" + tika.getMappedPort(9998);
+      System.setProperty("solr.test.tikaserver.url", baseUrl);
+      System.setProperty("solr.test.extraction.backend", "tikaserver");
+      System.setProperty("solr.test.tikaserver.metadata.compatibility", "true");
+      log.info("Using extraction backend 'tikaserver'. Tika server running on {}", baseUrl);
+      initCore("solrconfig.xml", "schema.xml", getFile("extraction/solr"));
+    } catch (Throwable t) {
+      // Skip tests if Docker/Testcontainers are not available in the environment
+      Assume.assumeNoException("Docker/Testcontainers not available; skipping test", t);
+    }
+  }
+
+  @AfterClass
+  public static void afterClassTika() {
+    if (tika != null) {
+      try {
+        tika.stop();
+      } catch (Throwable t) {
+        // ignore
+      } finally {
+        tika = null;
+      }
+    }
+    System.clearProperty("solr.test.tikaserver.url");
+    System.clearProperty("solr.test.extraction.backend");
+    System.clearProperty("solr.test.tikaserver.metadata.compatibility");
+  }
+
+  @Test
+  public void testXPathWithTikaServer() throws Exception {
+    // Verify extractOnly with XPath expression returns expected content
+    var rsp =
+        loadLocal(
+            "extraction/example.html",
+            ExtractingParams.XPATH_EXPRESSION,
+            "/xhtml:html/xhtml:body/xhtml:a/descendant::node()",
+            ExtractingParams.EXTRACT_ONLY,
+            "true");
+    assertNotNull("rsp is null and it shouldn't be", rsp);
+    var list = rsp.getValues();
+    String val = (String) list.get("example.html");
+    assertEquals("News", val.trim());
+
+    // Verify capture + xpath mapping into a field works and can be queried
+    loadLocal(
+        "extraction/example.html",
+        "literal.id",
+        "tikaxpath1",
+        "captureAttr",
+        "true",
+        "defaultField",
+        "text",
+        "capture",
+        "h1",
+        "fmap.h1",
+        "foo_t",
+        "boost.foo_t",
+        "3",
+        "xpath",
+        "/xhtml:html/xhtml:body/xhtml:cite//node()",
+        "commit",
+        "true");
+    assertQ(req("+id:tikaxpath1 +foo_t:\"a h1 tag\""), "//*[@numFound='1']");
+  }
+}
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/LocalTikaExtractionBackendTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/LocalTikaExtractionBackendTest.java
new file mode 100644
index 00000000000..2e6dfc24153
--- /dev/null
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/LocalTikaExtractionBackendTest.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.util.Collections;
+import java.util.Map;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.tika.config.TikaConfig;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/** Unit tests for LocalTikaExtractionBackend independent of the HTTP handler. */
+@SuppressWarnings("removal")
+public class LocalTikaExtractionBackendTest extends SolrTestCaseJ4 {
+
+  private static TikaConfig tikaConfig;
+  private static ParseContextConfig parseContextConfig;
+
+  @BeforeClass
+  public static void setupClass() throws Exception {
+    try (InputStream is =
+        LocalTikaExtractionBackendTest.class
+            .getClassLoader()
+            .getResourceAsStream("solr-default-tika-config.xml")) {
+      assertNotNull("solr-default-tika-config.xml not on classpath", is);
+      tikaConfig = new TikaConfig(is);
+    }
+    parseContextConfig = new ParseContextConfig();
+  }
+
+  private LocalTikaExtractionBackend newBackend() {
+    return new LocalTikaExtractionBackend(tikaConfig, parseContextConfig);
+  }
+
+  private ExtractionRequest newRequest(
+      String resourceName,
+      String streamType,
+      String contentType,
+      String charset,
+      String streamName,
+      String streamSourceInfo,
+      Long streamSize,
+      String resourcePassword,
+      String returnType,
+      boolean tikaserverRecursive,
+      Map<String, String> tikaserverRequestHeaders) {
+    return ExtractionRequest.builder()
+        .streamType(streamType)
+        .resourceName(resourceName)
+        .contentType(contentType)
+        .charset(charset)
+        .streamName(streamName)
+        .streamSourceInfo(streamSourceInfo)
+        .streamSize(streamSize)
+        .resourcePassword(resourcePassword)
+        .extractFormat(returnType)
+        .tikaServerRecursive(tikaserverRecursive)
+        .tikaServerRequestHeaders(tikaserverRequestHeaders)
+        .build();
+  }
+
+  @Test
+  public void testWrongStreamTypeThrows() throws Exception {
+    LocalTikaExtractionBackend backend = newBackend();
+    try (InputStream in = Files.newInputStream(getFile("extraction/version_control.txt"))) {
+      // Non-existing type -> no parser available
+      ExtractionRequest req =
+          newRequest(
+              "version_control.txt",
+              "foo/bar",
+              null,
+              null,
+              "version_control.txt",
+              null,
+              null,
+              null,
+              "text",
+              false,
+              Collections.emptyMap());
+      expectThrows(IllegalArgumentException.class, () -> backend.extract(in, req));
+    }
+
+    try (InputStream in = Files.newInputStream(getFile("extraction/version_control.txt"))) {
+      // Wrong but existing type -> likely to fail when parsing
+      ExtractionRequest req =
+          newRequest(
+              "version_control.txt",
+              "application/pdf",
+              null,
+              null,
+              "version_control.txt",
+              null,
+              null,
+              null,
+              "text",
+              false,
+              Collections.emptyMap());
+      expectThrows(Exception.class, () -> backend.extract(in, req));
+    }
+  }
+
+  @Test
+  public void testPasswordProtectedDocxWithoutPasswordThrows() throws Exception {
+    LocalTikaExtractionBackend backend = newBackend();
+    try (InputStream in = Files.newInputStream(getFile("extraction/password-is-Word2010.docx"))) {
+      ExtractionRequest req =
+          newRequest(
+              "password-is-Word2010.docx",
+              null,
+              null,
+              null,
+              "password-is-Word2010.docx",
+              null,
+              null,
+              null,
+              "text",
+              false,
+              Collections.emptyMap());
+      expectThrows(Exception.class, () -> backend.extract(in, req));
+    }
+  }
+
+  @Test
+  public void testPasswordProtectedDocxWithPasswordSucceeds() throws Exception {
+    LocalTikaExtractionBackend backend = newBackend();
+    try (InputStream in = Files.newInputStream(getFile("extraction/password-is-Word2010.docx"))) {
+      ExtractionRequest req =
+          newRequest(
+              "password-is-Word2010.docx",
+              null,
+              null,
+              null,
+              "password-is-Word2010.docx",
+              null,
+              null,
+              "Word2010",
+              "text",
+              false,
+              Collections.emptyMap());
+      ExtractionResult res = backend.extract(in, req);
+      assertNotNull(res);
+      assertNotNull(res.getMetadata());
+      String content = res.getContent();
+      assertNotNull(content);
+      assertTrue(
+          "Content should mention password-protected doc text",
+          content.contains("Test password protected word doc"));
+    }
+  }
+}
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/TikaServerExtractionBackendTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/TikaServerExtractionBackendTest.java
new file mode 100644
index 00000000000..2bc12f838f8
--- /dev/null
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/TikaServerExtractionBackendTest.java
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import com.carrotsearch.randomizedtesting.ThreadFilter;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+import java.io.ByteArrayInputStream;
+import java.nio.file.Files;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.apache.lucene.tests.util.QuickPatchThreadsFilter;
+import org.apache.solr.SolrIgnoredThreadsFilter;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.handler.extraction.fromtika.ToXMLContentHandler;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.testcontainers.containers.GenericContainer;
+
+/**
+ * Integration tests for TikaServerExtractionBackend using a real Tika Server via Testcontainers.
+ */
+@ThreadLeakFilters(
+    filters = {
+      SolrIgnoredThreadsFilter.class,
+      QuickPatchThreadsFilter.class,
+      TikaServerExtractionBackendTest.TestcontainersThreadsFilter.class
+    })
+public class TikaServerExtractionBackendTest extends SolrTestCaseJ4 {
+
+  // Ignore known non-daemon threads spawned by Testcontainers and Java HttpClient in this test
+  @SuppressWarnings("NewClassNamingConvention")
+  public static class TestcontainersThreadsFilter implements ThreadFilter {
+    @Override
+    public boolean reject(Thread t) {
+      if (t == null || t.getName() == null) return false;
+      String n = t.getName();
+      return n.startsWith("testcontainers-ryuk")
+          || n.startsWith("testcontainers-wait-")
+          || n.startsWith("HttpClient-")
+          || n.startsWith("HttpClient-TestContainers");
+    }
+  }
+
+  private static GenericContainer<?> tika;
+  private static String baseUrl;
+
+  @SuppressWarnings("resource")
+  @BeforeClass
+  public static void startTikaServer() {
+    try {
+      tika = new GenericContainer<>("apache/tika:3.2.3.0-full").withExposedPorts(9998);
+      tika.start();
+      baseUrl = "http://" + tika.getHost() + ":" + tika.getMappedPort(9998);
+    } catch (Throwable t) {
+      // Skip tests if Docker/Testcontainers are not available in the environment
+      Assume.assumeNoException("Docker/Testcontainers not available; skipping TikaServer tests", t);
+    }
+  }
+
+  @AfterClass
+  public static void stopTikaServer() {
+    if (tika != null) {
+      try {
+        tika.stop();
+      } catch (Throwable ignore) {
+      }
+      tika = null;
+    }
+  }
+
+  private static ExtractionRequest newRequest(
+      String resourceName,
+      String contentType,
+      String extractFormat,
+      boolean recursive,
+      Map<String, String> tikaRequestHeaders) {
+    return ExtractionRequest.builder()
+        .streamType(contentType)
+        .resourceName(resourceName)
+        .contentType(contentType)
+        .streamName(resourceName)
+        .extractFormat(extractFormat)
+        .tikaServerRecursive(recursive)
+        .tikaServerRequestHeaders(tikaRequestHeaders)
+        .build();
+  }
+
+  @Test
+  public void testExtractTextAndMetadata() throws Exception {
+    Assume.assumeTrue("Tika server container not started", tika != null);
+    try (TikaServerExtractionBackend backend = new TikaServerExtractionBackend(baseUrl)) {
+      byte[] data = "Hello TestContainers".getBytes(java.nio.charset.StandardCharsets.UTF_8);
+      try (ByteArrayInputStream in = new ByteArrayInputStream(data)) {
+        ExtractionResult res = backend.extract(in, newRequest("test.txt", "text/plain", "text"));
+        assertNotNull(res);
+        assertNotNull(res.getContent());
+        assertTrue(res.getContent().contains("Hello TestContainers"));
+        assertNotNull(res.getMetadata());
+        List<String> cts = res.getMetadata().get("Content-Type");
+        assertNotNull(cts);
+        assertFalse(cts.isEmpty());
+        // Tika may append charset; be flexible
+        assertTrue(cts.getFirst().startsWith("text/plain"));
+      }
+    }
+  }
+
+  @Test
+  public void testExtractWithSaxHandlerXml() throws Exception {
+    Assume.assumeTrue("Tika server container not started", tika != null);
+    try (TikaServerExtractionBackend backend = new TikaServerExtractionBackend(baseUrl)) {
+      byte[] data = "Hello XML".getBytes(java.nio.charset.StandardCharsets.UTF_8);
+      ExtractionRequest request = newRequest("test.txt", "text/plain", "xml");
+      try (ByteArrayInputStream in = new ByteArrayInputStream(data)) {
+        ToXMLContentHandler xmlHandler = new ToXMLContentHandler();
+        ExtractionMetadata md = backend.buildMetadataFromRequest(request);
+        backend.extractWithSaxHandler(in, request, md, xmlHandler);
+        String c = xmlHandler.toString();
+        assertNotNull(c);
+        // Tika Server may return XHTML without XML declaration; be flexible
+        assertTrue(
+            c.contains("<?xml")
+                || c.toLowerCase(java.util.Locale.ROOT).contains("<html")
+                || c.toLowerCase(java.util.Locale.ROOT).contains("<xhtml"));
+        assertTrue(c.contains("Hello XML"));
+      }
+    }
+  }
+
+  @Test
+  public void testPdfWithImageRecursive() throws Exception {
+    Assume.assumeTrue("Tika server container not started", tika != null);
+    try (TikaServerExtractionBackend backend = new TikaServerExtractionBackend(baseUrl)) {
+      byte[] data = Files.readAllBytes(getFile("extraction/pdf-with-image.pdf"));
+      // Enable recursive extraction and set header to extract images from PDF
+      ExtractionRequest request =
+          newRequest(
+              "pdf-with-image.pdf",
+              "application/pdf",
+              "xml",
+              true,
+              Map.of("X-Tika-PDFextractInlineImages", "true"));
+      try (ByteArrayInputStream in = new ByteArrayInputStream(data)) {
+        ToXMLContentHandler xmlHandler = new ToXMLContentHandler();
+        ExtractionMetadata md = backend.buildMetadataFromRequest(request);
+        backend.extractWithSaxHandler(in, request, md, xmlHandler);
+        String c = xmlHandler.toString();
+        assertNotNull(c);
+        assertTrue(c.contains("Puppet Apply"));
+        assertTrue(c.contains("embedded:image0.jpg"));
+        assertEquals(
+            "org.apache.tika.parser.DefaultParser", md.getFirst("X-TIKA:Parsed-By-Full-Set"));
+      }
+    }
+  }
+
+  private ExtractionRequest newRequest(String file, String contentType, String content) {
+    return newRequest(file, contentType, content, false, Collections.emptyMap());
+  }
+
+  @Test
+  public void testMaxCharsLimitEnforced() throws Exception {
+    Assume.assumeTrue("Tika server container not started", tika != null);
+    // Set a very small max chars limit and attempt to extract more than that
+    long maxChars = 10L;
+    try (TikaServerExtractionBackend backend =
+        new TikaServerExtractionBackend(baseUrl, 180, null, maxChars)) {
+      byte[] data =
+          ("This content is definitely longer than ten characters.")
+              .getBytes(java.nio.charset.StandardCharsets.UTF_8);
+      try (ByteArrayInputStream in = new ByteArrayInputStream(data)) {
+        SolrException e =
+            expectThrows(
+                SolrException.class,
+                () -> backend.extract(in, newRequest("test.txt", "text/plain", "xml")));
+        assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code());
+        assertTrue(
+            "Expected message to mention max size exceeded",
+            e.getMessage().contains("exceeded the configured maximum size"));
+      }
+    }
+  }
+
+  @Test
+  public void testMaxCharsLimitEnforcedWithSaxHandler() throws Exception {
+    Assume.assumeTrue("Tika server container not started", tika != null);
+    long maxChars = 10L;
+    try (TikaServerExtractionBackend backend =
+        new TikaServerExtractionBackend(baseUrl, 180, null, maxChars)) {
+      byte[] data =
+          ("This content is definitely longer than ten characters.")
+              .getBytes(java.nio.charset.StandardCharsets.UTF_8);
+      ExtractionRequest request = newRequest("test.txt", "text/plain", "xml");
+      try (ByteArrayInputStream in = new ByteArrayInputStream(data)) {
+        ToXMLContentHandler xmlHandler = new ToXMLContentHandler();
+        ExtractionMetadata md = backend.buildMetadataFromRequest(request);
+        SolrException e =
+            expectThrows(
+                SolrException.class,
+                () -> backend.extractWithSaxHandler(in, request, md, xmlHandler));
+        assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code());
+        assertTrue(
+            "Expected message to mention max size exceeded",
+            e.getMessage().contains("exceeded the configured maximum size"));
+      }
+    }
+  }
+}
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/XmlSanitizingReaderTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/XmlSanitizingReaderTest.java
new file mode 100644
index 00000000000..654ad9397c6
--- /dev/null
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/XmlSanitizingReaderTest.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.StringWriter;
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.Test;
+
+public class XmlSanitizingReaderTest extends SolrTestCaseJ4 {
+
+  private static String sanitize(String s) throws Exception {
+    Reader r = new XmlSanitizingReader(new StringReader(s));
+    StringWriter w = new StringWriter();
+    char[] buf = new char[16];
+    int n;
+    while ((n = r.read(buf)) != -1) {
+      w.write(buf, 0, n);
+    }
+    r.close();
+    return w.toString();
+  }
+
+  @Test
+  public void testDropsNullNumericEntities() throws Exception {
+    assertEquals("ab", sanitize("a&#0;b"));
+    assertEquals("ac", sanitize("a&#00;c"));
+    assertEquals("ad", sanitize("a&#x0;d"));
+    assertEquals("ae", sanitize("a&#x0000;e"));
+  }
+
+  @Test
+  public void testPassThroughNonNullEntitiesAndText() throws Exception {
+    assertEquals("&amp; &#x41; &#65;", sanitize("&amp; &#x41; &#65;"));
+    assertEquals("pre&#12post", sanitize("pre&#12post")); // unterminated non-zero
+    assertEquals("abc", sanitize("abc"));
+  }
+}
diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/indexing-with-tika.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/indexing-with-tika.adoc
index b0cdb7eba30..f60864150c6 100644
--- a/solr/solr-ref-guide/modules/indexing-guide/pages/indexing-with-tika.adoc
+++ b/solr/solr-ref-guide/modules/indexing-guide/pages/indexing-with-tika.adoc
@@ -18,11 +18,9 @@
 
 If the documents you need to index are in a binary format, such as Word, Excel, PDFs, etc., Solr includes a request handler which uses http://tika.apache.org/[Apache Tika] to extract text for indexing to Solr.
 
-Solr uses code from the Tika project to provide a framework for incorporating many different file-format parsers such as http://pdfbox.apache.org/[Apache PDFBox] and http://poi.apache.org/index.html[Apache POI] into Solr itself.
+Apache Tika incorporates many different file-format parsers such as http://pdfbox.apache.org/[Apache PDFBox] and http://poi.apache.org/index.html[Apache POI] to extract the text content and metadata from files.
 
-Working with this framework, Solr's `ExtractingRequestHandler` uses Tika internally to support uploading binary files
-for data extraction and indexing.
-Downloading Tika is not required to use Solr Cell.
+Solr's `ExtractingRequestHandler` uses Tika, either in-process or a remote Tika server, to support extracting text and metadata from binary files.
 
 When this framework was under development, it was called the Solr _Content Extraction Library_, or _CEL_; from that abbreviation came this framework's name: Solr Cell.
 The names Solr Cell and `ExtractingRequestHandler` are used
@@ -35,21 +33,20 @@ When using the Solr Cell framework, it is helpful to keep the following in mind:
 * Tika will automatically attempt to determine the input document type (e.g., Word, PDF, HTML) and extract the content appropriately.
 If you like, you can explicitly specify a MIME type for Tika with the `stream.type` parameter.
 See http://tika.apache.org/{dep-version-tika}/formats.html for the file types supported.
-* Briefly, Tika internally works by synthesizing an XHTML document from the core content of the parsed document which is passed to a configured http://www.saxproject.org/quickstart.html[SAX] ContentHandler provided by Solr Cell.
-Solr responds to Tika's SAX events to create one or more text fields from the content.
+* Briefly, Tika internally works by synthesizing an XHTML document from the core content of the parsed document, which is passed to a configured http://www.saxproject.org/quickstart.html[SAX] ContentHandler provided by Solr Cell.
+Solr responds to SAX events to create one or more text fields from the content.
 Tika exposes document metadata as well (apart from the XHTML).
-* Tika produces metadata such as Title, Subject, and Author according to specifications such as the DublinCore.
-The metadata available is highly dependent on the file types and what they in turn contain.
+* Tika produces metadata such as `dc:title`, `dc:subject`, and `dc:author` according to specifications such as the DublinCore.
+The metadata available is highly dependent on the file types and what they, in turn, contain.
 Some of the general metadata created is described in the section <<Metadata Created by Tika>> below.
 Solr Cell supplies some metadata of its own too.
 * Solr Cell concatenates text from the internal XHTML into a `content` field.
 You can configure which elements should be included/ignored, and which should map to another field.
 * Solr Cell maps each piece of metadata onto a field.
-By default it maps to the same name but several parameters control how this is done.
+By default, it maps to the same name, but several parameters control how this is done.
 * When Solr Cell finishes creating the internal `SolrInputDocument`, the rest of the indexing stack takes over.
 The next step after any update handler is the xref:configuration-guide:update-request-processors.adoc[Update Request Processor] chain.
 
-
 == Module
 
 This is provided via the `extraction` xref:configuration-guide:solr-modules.adoc[Solr Module] that needs to be enabled before use.
@@ -57,8 +54,48 @@ This is provided via the `extraction` xref:configuration-guide:solr-modules.adoc
 The "techproducts" example included with Solr is pre-configured to have Solr Cell configured.
 If you are not using the example, you will want to pay attention to the section <<solrconfig.xml Configuration>> below.
 
+== Tika Extraction Backends
+
+There are two backends for this module. The `local` backend embeds Tika inside Solr's own process, while the `tikaserver` backend uses an external Tika server process to do the extraction.
+
+=== Tika Server
+
+The `tikaserver` backend lets Solr delegate content extraction to an external Apache Tika Server process instead of running Tika parsers inside the Solr JVM. This can improve operational isolation (crashes or heavy parsing won’t impact Solr), simplify dependency management, and allow you to scale Tika independently of Solr.
+
+Example handler configuration:
+
+[source,xml]
+----
+<requestHandler name="/update/extract" class="solr.extraction.ExtractingRequestHandler">
+    <!-- Select the tikaserver backend by default for this handler -->
+    <str name="extraction.backend">tikaserver</str>
+    <!-- Point Solr to your Tika Server -->
+    <str name="tikaserver.url">http://localhost:9998</str>
+</requestHandler>
+----
+
+==== Starting Tika Server with Docker
+
+The quickest way to run Tika Server for development is using Docker. The examples below expose Tika on port 9998 on localhost, matching the default value when `tikaserver.url` is not explicitly set.
+
+[,bash]
+----
+docker run --rm -p 9998:9998 apache/tika:3.2.3.0-full
+----
+
+NOTE: If Solr runs in Docker too, ensure both containers share a network and use the Tika container name as the host in `tikaserver.url`.
+
+==== Limitations
 
-=== Solr Cell Performance Implications
+Metadata produced by Tika Server can differ slightly from local Tika, particularly in key names and the presence/absence of certain fields. Adjust your `fmap.*` mappings accordingly.
+
+=== Local in-process
+
+The `local` backend is identical to Solr Cell's behavior in previous Solr versions.
+
+IMPORTANT: This backend is deprecated in 9.x and gone in Solr 10.0.
+
+==== Performance Implications
 
 Rich document formats are frequently not well documented, and even in cases where there is documentation for the format, not everyone who creates documents will follow the specifications faithfully.
 
@@ -73,13 +110,8 @@ the request handler is running in the same JVM that Solr uses for other operatio
 Indexing can also consume all available Solr resources, particularly with large PDFs, presentations, or other files
 that have a lot of rich media embedded in them.
 
-For these reasons, Solr Cell is not recommended for use in a production system.
-
-It is a best practice to use Solr Cell as a proof-of-concept tool during development and then run Tika as an external
-process that sends the extracted documents to Solr (via xref:deployment-guide:solrj.adoc[]) for indexing.
-This way, any extraction failures that occur are isolated from Solr itself and can be handled gracefully.
-
-For a few examples of how this could be done, see this blog post by Erick Erickson, https://lucidworks.com/2012/02/14/indexing-with-solrj/[Indexing with SolrJ].
+For these reasons, Solr Cell with `local` backend is not recommended for use in a production system. Prefer the
+`tikaserver` backend, which is more robust and isolates failures from Solr itself. Or alternatively, roll your own extraction pipeline in your application before sending anything to Solr.
 
 == Trying out Solr Cell
 
@@ -91,11 +123,19 @@ This command will start Solr, create a core/collection named `gettingstarted` wi
 ----
 bin/solr start -e schemaless -Dsolr.modules=extraction
 
+# Optionally start Tika Server if using `tikaserver` backend below
+docker run --rm -p 9998:9998 apache/tika:3.2.3.0-full
+
 curl -X POST -H 'Content-type:application/json' -d '{
   "add-requesthandler": {
     "name": "/update/extract",
     "class": "solr.extraction.ExtractingRequestHandler",
-    "defaults":{ "lowernames": "true", "captureAttr":"true"}
+    "extraction.backend": "tikaserver",
+    "tikaserver.url": "http://localhost:9998",
+    "defaults":{
+      "lowernames": "true",
+      "captureAttr":"true"
+    }
   }
 }' 'http://localhost:8983/solr/gettingstarted/config'
 ----
@@ -155,8 +195,8 @@ $ bin/solr post -c gettingstarted example/exampledocs/solr-word.pdf --params "li
 ====
 The above example won't work as expected if you run it after you've already indexed the document one or more times.
 
-Previously we added the document without these parameters so all fields were added to the index at that time.
-The `uprefix` parameter only applies to fields that are _undefined_, so these won't be prefixed if the document is reindexed later.
+Previously we added the document without these parameters, so all fields were added to the index at that time.
+The `uprefix` parameter only applies to fields that are _undefined_, so these won't be prefixed if the document is re-indexed later.
 However, you would see the new `last_modified_dt` field.
 
 The easiest way to try out the `uprefix` parameter is to start over with a fresh collection.
@@ -224,6 +264,21 @@ A default field to use if the `uprefix` parameter is not specified and a field c
 +
 Example: `defaultField=\_text_`
 
+`extraction.backend`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: local
+|===
++
+Choose the backend to use for extraction. The options are `local` or `tikaserver`.
+The `local` backend uses Tika libraries included with Solr to do the extraction, and is the default in Solr 9.x.
+The `tikaserver` backend uses an external Tika server process to do the extraction, and
+requires the `tikaserver.url` parameter to point to a running Tika Server.
+**The `local` backend is deprecated and will be removed in a future release.**
++
+Example: In `solrconfig.xml`: `<str name="extraction.backend">tikaserver</str>`.
+
 `extractOnly`::
 +
 [%autowidth,frame=none]
@@ -355,6 +410,40 @@ See the section <<Indexing Encrypted Documents>> for more information about usin
 +
 Example: `passwordsFile=/path/to/passwords.txt`
 
+// TODO: Feature exists but keep undocumented for now
+// `tikaserver.metadata.compatibility`::
+// +
+// [%autowidth,frame=none]
+// |===
+// |Optional |Default: false
+// |===
+// +
+// When enabled, Solr Cell tries to map some common metadata to other common names, e.g. `dc:author` is mapped also to `Author`. This can be useful if switching from `local` to `tikaserver` backend, since `tikaserver` uses more industry standard name-spaced metadata keys.
+// +
+// Only applicable for `tikaserver` backend. Can only be set in `solrconfig.xml`, not per request.
+
+`tikaserver.maxChars`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: 100 MBytes
+|===
++
+Sets a hard limit on the number of bytes Solr will accept from the Tika Server response body when using the `tikaserver` backend. If the extracted content exceeds this limit, the request will fail with HTTP 400 (Bad Request).
++
+Only applicable for the `tikaserver` backend. This parameter can only be configured in the request handler configuration (`solrconfig.xml`), not per request.
++
+Example: In `solrconfig.xml`: `<long name="tikaserver.maxChars">1000000</long>`
+
+`tikaserver.recursive`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: false
+|===
++
+**Only applicable for the `tikaserver` backend`.** For `local` backend parsing is always recursive, i.e. extracts text from embedded documents. For `tikaserver` you have to enable it explicitly.
+
 `resource.name`::
 +
 [%autowidth,frame=none]
@@ -391,6 +480,35 @@ This is only required if you have customized your Tika implementation.
 +
 Example: `tika.config=/path/to/tika.config`
 
+`tikaserver.timeoutSeconds`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `180` seconds
+|===
++
+Sets the HTTP timeout when communicating with Tika Server, in seconds. Can be set per request as a parameter or as a default in the request handler configuration. If set on the request it overrides the handler default for that call only.
++
+Examples:
++
+- Per request: `tikaserver.timeoutSeconds=60`
+- In `solrconfig.xml`: `<int name="tikaserver.timeoutSeconds">60</int>`
+
+
+`tikaserver.url`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: none
+|===
++
+Specifies the URL of the Tika server to use when the `extraction.backend` parameter is set to `tikaserver`.
+This parameter is required when using the `tikaserver` backend. This parameter can only be configured in the configuration, not per request.
++
+If your TikaServer is using HTTPS, it needs to use a verifiable SSL certificate. If using self-signed or custom Certificate Authority, you will need to add those to Solr's Truststore. The `tikaserver` backend currently does not have support for configuring custom certificates for Tika alone.
++
+Example: In `solrconfig.xml`: `<str name="tikaserver.url">http://localhost:9998</str>`.
+
 `uprefix`::
 +
 [%autowidth,frame=none]
@@ -464,13 +582,15 @@ For example:
 </requestHandler>
 ----
 
-The above suggested list was taken from the list of URPs that run as a part of schemaless mode and provide much of its functionality.
+The above-suggested list was taken from the list of URPs that run as a part of schemaless mode and provide much of its functionality.
 However, one major part of the schemaless functionality is missing from the suggested list, `add-unknown-fields-to-the-schema`, which is the part that adds fields to the schema.
 So you can use the other URPs without worrying about unexpected field additions.
 ====
 
 === Parser-Specific Properties
 
+NOTE: This setting currently applies to the `local` backend only. When using `tikaserver` you can configure similar settings on the Tika Server side.
+
 Parsers used by Tika may have specific properties to govern how data is extracted.
 These can be passed through Solr for special parsing situations.
 
@@ -516,7 +636,7 @@ If you want to supply your own `ContentHandler` for Solr to use, you can extend
 This factory is responsible for constructing the `SolrContentHandler` that interacts with Tika, and allows literals to override Tika-parsed values.
 Set the parameter `literalsOverride`, which normally defaults to `true`, to `false` to append Tika-parsed values to literal values.
 
-==  Solr Cell Internals
+== Solr Cell Internals
 
 === Metadata Created by Tika
 
@@ -554,11 +674,11 @@ If `literalsOverride=false`, literals will be appended as multi-value to the Tik
 
 === Using capture and Mapping Fields
 
-The command below captures `<div>` tags separately (`capture=div`), and then maps all the instances of that field to a dynamic field named `foo_t` (`fmap.div=foo_t`).
+The command below captures `<h1>` tags separately (`capture=h1`), and then maps all the instances of that field to a dynamic field named `foo_t` (`fmap.h1=foo_t`).
 
 [,console]
 ----
-$ bin/solr post -c gettingstarted example/exampledocs/sample.html --params "literal.id=doc2&captureAttr=true&defaultField=_text_&fmap.div=foo_t&capture=div"
+$ bin/solr post -c gettingstarted example/exampledocs/sample.html --params "literal.id=doc2&captureAttr=true&defaultField=_text_&fmap.h1=foo_t&capture=h1"
 ----
 
 === Using Literals to Define Custom Metadata
@@ -579,7 +699,7 @@ The example below passes in an XPath expression to restrict the XHTML returned b
 
 [,console]
 ----
-$ bin/solr post -c gettingstarted --params "literal.id=doc5&captureAttr=true&defaultField=text&capture=div&fmap.div=foo_t&xpath=/xhtml:html/xhtml:body/xhtml:div//node()" example/exampledocs/sample.html
+$ bin/solr post -c gettingstarted --params "literal.id=doc5&captureAttr=true&defaultField=text&capture=h1&fmap.h1=foo_t&xpath=/xhtml:html/xhtml:body/xhtml:h1//node()" example/exampledocs/sample.html
 ----
 
 === Extracting Data without Indexing
@@ -629,6 +749,7 @@ public class SolrCellRequestDemo {
     req.setParam(ExtractingParams.EXTRACT_ONLY, "true");
     NamedList<Object> result = client.request(req);
     System.out.println("Result: " + result);
+  }
 }
 ----
 
diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
index a54aceb624b..a99ad2b4e4f 100644
--- a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
+++ b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
@@ -82,6 +82,9 @@ Java has removed support for the Security Manager starting with Java 24; therefo
 
 The `XLSXResponseWriter` is now deprecated.
 
+The Extraction module can now extract documents using an external Tika Server.
+The local in-process Tika 1.x extractor backend is deprecated and will go away in 10.0.
+
 == Solr 9.9
 
 === SolrJ
diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrException.java b/solr/solrj/src/java/org/apache/solr/common/SolrException.java
index 80e28818a31..983c75bcafd 100644
--- a/solr/solrj/src/java/org/apache/solr/common/SolrException.java
+++ b/solr/solrj/src/java/org/apache/solr/common/SolrException.java
@@ -45,6 +45,7 @@ public enum ErrorCode {
     TOO_MANY_REQUESTS(429),
     SERVER_ERROR(500),
     SERVICE_UNAVAILABLE(503),
+    GATEWAY_TIMEOUT(504),
     INVALID_STATE(510),
     UNKNOWN(0);
     public final int code;
diff --git a/solr/test-framework/src/java/org/apache/solr/SolrIgnoredThreadsFilter.java b/solr/test-framework/src/java/org/apache/solr/SolrIgnoredThreadsFilter.java
index 153bacba9ac..4c65603d9c5 100644
--- a/solr/test-framework/src/java/org/apache/solr/SolrIgnoredThreadsFilter.java
+++ b/solr/test-framework/src/java/org/apache/solr/SolrIgnoredThreadsFilter.java
@@ -88,6 +88,20 @@ public boolean reject(Thread t) {
       return true;
     }
 
-    return threadName.startsWith("closeThreadPool");
+    if (threadName.startsWith("closeThreadPool")) {
+      return true;
+    }
+
+    // TestContainers
+    if (threadName.startsWith("testcontainers-ryuk")
+        || threadName.startsWith("testcontainers-wait-")
+        || threadName.startsWith("testcontainers-pull-watchdog-")
+        || threadName.equals("JNA Cleaner")
+        || threadName.startsWith("HttpClient-")
+        || threadName.startsWith("HttpClient-TestContainers")) {
+      return true;
+    }
+
+    return false;
   }
 }