diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 4ff226cbe784..4680b7677e64 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -389,6 +389,8 @@ Other Changes * SOLR-17956: XLSXResponseWriter has been deprecated and will be removed in a future release. (Jan Høydahl) +* SOLR-17958: The Tika Language Identifier is deprecated. Use one of the other detectors instead. (Jan Høydahl) + * SOLR-17952: Stream decorator test refactoring - use underscore rather than dot in aliases (Andy Webb) ================== 9.9.1 ================== diff --git a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java index 97fb7917fd66..bde2f62437c6 100644 --- a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java +++ b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java @@ -34,7 +34,9 @@ * href="https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection">https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection * * @since 3.5 + * @deprecated Since 9.10, use {@link OpenNLPLangDetectUpdateProcessor} instead. */ +@Deprecated(since = "9.10") public class TikaLanguageIdentifierUpdateProcessor extends LanguageIdentifierUpdateProcessor { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java index 66cb977ed50c..1728390c0658 100644 --- a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java +++ b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java @@ -43,7 +43,9 @@ * href="https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection">https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection * * @since 3.5 + * @deprecated Since 9.10, use {@link OpenNLPLangDetectUpdateProcessorFactory} instead. */ +@Deprecated(since = "9.10") public class TikaLanguageIdentifierUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware, LangIdParams { diff --git a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java index b110b397b05d..3c47c2a9de08 100644 --- a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java +++ b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java @@ -20,6 +20,7 @@ import org.apache.solr.common.params.ModifiableSolrParams; import org.junit.Test; +@SuppressWarnings("deprecation") public class TikaLanguageIdentifierUpdateProcessorFactoryTest extends LanguageIdentifierUpdateProcessorFactoryTestCase { @Override diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc index f4f5ab158f33..a2ab17638fa8 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc @@ -55,6 +55,11 @@ Here is an example of a minimal Tika `langid` configuration in `solrconfig.xml`: ---- +[IMPORTANT] +==== +This detector is deprecated and may be removed in a future version. +==== + === Configuring LangDetect Language Detection Here is an example of a minimal LangDetect `langid` configuration in `solrconfig.xml`: diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc index a99ad2b4e4f8..d07e8d8081d2 100644 --- a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc +++ b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc @@ -82,6 +82,8 @@ Java has removed support for the Security Manager starting with Java 24; therefo The `XLSXResponseWriter` is now deprecated. +The Tika Language Identifier is deprecated. Use one of the other detectors instead. + The Extraction module can now extract documents using an external Tika Server. The local in-process Tika 1.x extractor backend is deprecated and will go away in 10.0.