From 76b0662a8362fc016ecd41b84781772a803ba951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B8ydahl?= Date: Wed, 15 Oct 2025 23:16:29 +0200 Subject: [PATCH] SOLR-17958 Deprecate TikaLanguageIdentifierUpdateProcessor --- solr/CHANGES.txt | 2 ++ .../processor/TikaLanguageIdentifierUpdateProcessor.java | 2 ++ .../TikaLanguageIdentifierUpdateProcessorFactory.java | 2 ++ .../TikaLanguageIdentifierUpdateProcessorFactoryTest.java | 1 + .../modules/indexing-guide/pages/language-detection.adoc | 5 +++++ .../modules/upgrade-notes/pages/major-changes-in-solr-9.adoc | 2 ++ 6 files changed, 14 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 18272cfe621..e8777ba6eb2 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -381,6 +381,8 @@ Other Changes * SOLR-17956: XLSXResponseWriter has been deprecated and will be removed in a future release. (Jan Høydahl) +* SOLR-17958: The Tika Language Identifier is deprecated. Use one of the other detectors instead. (Jan Høydahl) + * SOLR-17952: Stream decorator test refactoring - use underscore rather than dot in aliases (Andy Webb) ================== 9.9.1 ================== diff --git a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java index 97fb7917fd6..bde2f62437c 100644 --- a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java +++ b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java @@ -34,7 +34,9 @@ * href="https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection">https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection * * @since 3.5 + * @deprecated Since 9.10, use {@link OpenNLPLangDetectUpdateProcessor} instead. */ +@Deprecated(since = "9.10") public class TikaLanguageIdentifierUpdateProcessor extends LanguageIdentifierUpdateProcessor { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java index 66cb977ed50..1728390c065 100644 --- a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java +++ b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java @@ -43,7 +43,9 @@ * href="https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection">https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection * * @since 3.5 + * @deprecated Since 9.10, use {@link OpenNLPLangDetectUpdateProcessorFactory} instead. */ +@Deprecated(since = "9.10") public class TikaLanguageIdentifierUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware, LangIdParams { diff --git a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java index b110b397b05..3c47c2a9de0 100644 --- a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java +++ b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java @@ -20,6 +20,7 @@ import org.apache.solr.common.params.ModifiableSolrParams; import org.junit.Test; +@SuppressWarnings("deprecation") public class TikaLanguageIdentifierUpdateProcessorFactoryTest extends LanguageIdentifierUpdateProcessorFactoryTestCase { @Override diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc index f4f5ab158f3..a2ab17638fa 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc @@ -55,6 +55,11 @@ Here is an example of a minimal Tika `langid` configuration in `solrconfig.xml`: ---- +[IMPORTANT] +==== +This detector is deprecated and may be removed in a future version. +==== + === Configuring LangDetect Language Detection Here is an example of a minimal LangDetect `langid` configuration in `solrconfig.xml`: diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc index a54aceb624b..c1d5c3084ba 100644 --- a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc +++ b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc @@ -82,6 +82,8 @@ Java has removed support for the Security Manager starting with Java 24; therefo The `XLSXResponseWriter` is now deprecated. +The Tika Language Identifier is deprecated. Use one of the other detectors instead. + == Solr 9.9 === SolrJ