From 8d8bdd25c5b420c117af796d984a070059376a56 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Wed, 5 Dec 2018 15:53:41 +0100 Subject: [PATCH 01/16] Move back to BM25 similarity With the recent lucene upgrade we have temporarily adopted the LegacyBM25Similarity which exposes the same scores as BM25Similarity before the k1+1 factor was removed from the numerator of the scoring formula. This commit moves the default Elasticsearch similarity back to the Bm25Similarity and updates the scores that have changed in our docs and tests --- .../index/similarity/SimilarityProviders.java | 6 +++--- .../index/similarity/SimilarityService.java | 4 ++-- .../index/similarity/LegacySimilarityTests.java | 4 ++-- .../index/similarity/SimilarityServiceTests.java | 4 ++-- .../index/similarity/SimilarityTests.java | 11 +++++------ 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java index 04970a38bd99d..0473c6aeaf551 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.similarities.AfterEffect; import org.apache.lucene.search.similarities.AfterEffectB; import org.apache.lucene.search.similarities.AfterEffectL; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BasicModel; import org.apache.lucene.search.similarities.BasicModelG; import org.apache.lucene.search.similarities.BasicModelIF; @@ -50,7 +51,6 @@ import org.apache.lucene.search.similarities.NormalizationH2; import org.apache.lucene.search.similarities.NormalizationH3; import org.apache.lucene.search.similarities.NormalizationZ; -import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.Version; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.settings.Settings; @@ -269,14 +269,14 @@ static void assertSettingsIsSubsetOf(String type, Version version, Settings sett } } - public static LegacyBM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) { + public static BM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) { assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS); float k1 = settings.getAsFloat("k1", 1.2f); float b = settings.getAsFloat("b", 0.75f); boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); - LegacyBM25Similarity similarity = new LegacyBM25Similarity(k1, b); + BM25Similarity similarity = new BM25Similarity(k1, b); similarity.setDiscountOverlaps(discountOverlaps); return similarity; } diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java index 57cbc961aacc0..44956eec35d9b 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java @@ -25,12 +25,12 @@ import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; -import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.TriFunction; @@ -75,7 +75,7 @@ public final class SimilarityService extends AbstractIndexComponent { } }); defaults.put("BM25", version -> { - final LegacyBM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version); + final BM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version); return () -> similarity; }); defaults.put("boolean", version -> { diff --git a/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java b/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java index 13398d8791437..69ad3bd128fe2 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java @@ -19,9 +19,9 @@ package org.elasticsearch.index.similarity; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity; -import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; @@ -51,7 +51,7 @@ public void testResolveDefaultSimilaritiesOn6xIndex() { assertWarnings("The [classic] similarity is now deprecated in favour of BM25, which is generally " + "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity " + "instead."); - assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class)); + assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class)); assertThat(similarityService.getSimilarity("default"), equalTo(null)); } diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java index 98c2abb3f8219..0eefd737364a5 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java @@ -21,9 +21,9 @@ import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.Version; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; @@ -40,7 +40,7 @@ public void testDefaultSimilarity() { Settings settings = Settings.builder().build(); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); - assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class)); + assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class)); } // Tests #16594 diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java index fd5a77665fecd..d2244eb8a5a88 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.similarity; import org.apache.lucene.search.similarities.AfterEffectL; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BasicModelG; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.DFISimilarity; @@ -31,7 +32,6 @@ import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; import org.apache.lucene.search.similarities.LambdaTTF; import org.apache.lucene.search.similarities.NormalizationH2; -import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; @@ -60,7 +60,7 @@ protected Collection> getPlugins() { public void testResolveDefaultSimilarities() { SimilarityService similarityService = createIndex("foo").similarityService(); - assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class)); + assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class)); assertThat(similarityService.getSimilarity("default"), equalTo(null)); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, @@ -94,13 +94,12 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException { .put("index.similarity.my_similarity.discount_overlaps", false) .build(); MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService(); - assertThat(mapperService.fullName("field1").similarity().get(), instanceOf(LegacyBM25Similarity.class)); + assertThat(mapperService.fullName("field1").similarity().get(), instanceOf(BM25Similarity.class)); - LegacyBM25Similarity similarity = (LegacyBM25Similarity) mapperService.fullName("field1").similarity().get(); + BM25Similarity similarity = (BM25Similarity) mapperService.fullName("field1").similarity().get(); assertThat(similarity.getK1(), equalTo(2.0f)); assertThat(similarity.getB(), equalTo(0.5f)); - // TODO: re-enable when we switch back to BM25Similarity - // assertThat(similarity.getDiscountOverlaps(), equalTo(false)); + assertThat(similarity.getDiscountOverlaps(), equalTo(false)); } public void testResolveSimilaritiesFromMapping_boolean() throws IOException { From 6d92c111eb2e1bb30c96340a4cb6101ca73957cd Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Wed, 5 Dec 2018 16:42:04 +0100 Subject: [PATCH 02/16] fix tests --- .../basic/TransportTwoNodesSearchIT.java | 24 +++++++++---------- .../search/nested/SimpleNestedIT.java | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/search/basic/TransportTwoNodesSearchIT.java b/server/src/test/java/org/elasticsearch/search/basic/TransportTwoNodesSearchIT.java index 5d3b19697d788..704248c29ada8 100644 --- a/server/src/test/java/org/elasticsearch/search/basic/TransportTwoNodesSearchIT.java +++ b/server/src/test/java/org/elasticsearch/search/basic/TransportTwoNodesSearchIT.java @@ -148,15 +148,15 @@ public void testDfsQueryThenFetch() throws Exception { SearchHit hit = hits[i]; assertThat(hit.getExplanation(), notNullValue()); assertThat(hit.getExplanation().getDetails().length, equalTo(1)); - assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(3)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails().length, equalTo(2)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getDescription(), + assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getDescription(), startsWith("n,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getValue(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getValue(), equalTo(100L)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getDescription(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getDescription(), startsWith("N,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getValue(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getValue(), equalTo(100L)); assertThat("id[" + hit.getId() + "] -> " + hit.getExplanation().toString(), hit.getId(), equalTo(Integer.toString(100 - total - i - 1))); @@ -187,15 +187,15 @@ public void testDfsQueryThenFetchWithSort() throws Exception { SearchHit hit = hits[i]; assertThat(hit.getExplanation(), notNullValue()); assertThat(hit.getExplanation().getDetails().length, equalTo(1)); - assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(3)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails().length, equalTo(2)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getDescription(), + assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getDescription(), startsWith("n,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getValue(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getValue(), equalTo(100L)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getDescription(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getDescription(), startsWith("N,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getValue(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getValue(), equalTo(100L)); assertThat("id[" + hit.getId() + "]", hit.getId(), equalTo(Integer.toString(total + i))); } diff --git a/server/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java b/server/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java index 5feb341fd6943..240f9d4672f7f 100644 --- a/server/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java +++ b/server/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java @@ -326,7 +326,7 @@ public void testExplain() throws Exception { assertThat(searchResponse.getHits().getTotalHits().value, equalTo(1L)); Explanation explanation = searchResponse.getHits().getHits()[0].getExplanation(); assertThat(explanation.getValue(), equalTo(searchResponse.getHits().getHits()[0].getScore())); - assertThat(explanation.toString(), startsWith("0.36464313 = Score based on 2 child docs in range from 0 to 1")); + assertThat(explanation.toString(), startsWith(explanation.getValue() + " = Score based on 2 child docs in range from 0 to 1")); } public void testSimpleNestedSorting() throws Exception { From 1dea27fcb485cd363f6b354a3491c6a6b3df090a Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Wed, 5 Dec 2018 17:18:41 +0100 Subject: [PATCH 03/16] add note about bm25 changes --- docs/reference/release-notes.asciidoc | 2 ++ docs/reference/release-notes/7.0.0-alpha2.asciidoc | 14 ++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 docs/reference/release-notes/7.0.0-alpha2.asciidoc diff --git a/docs/reference/release-notes.asciidoc b/docs/reference/release-notes.asciidoc index ffea569ca21f3..2178d5345a4fa 100644 --- a/docs/reference/release-notes.asciidoc +++ b/docs/reference/release-notes.asciidoc @@ -7,7 +7,9 @@ This section summarizes the changes in each release. * <> +* <> -- include::release-notes/7.0.0-alpha1.asciidoc[] +include::release-notes/7.0.0-alpha2.asciidoc[] diff --git a/docs/reference/release-notes/7.0.0-alpha2.asciidoc b/docs/reference/release-notes/7.0.0-alpha2.asciidoc new file mode 100644 index 0000000000000..13fcac69de5cb --- /dev/null +++ b/docs/reference/release-notes/7.0.0-alpha2.asciidoc @@ -0,0 +1,14 @@ +[[release-notes-7.0.0-alpha2]] +== 7.0.0-alpha2 release notes + +The changes listed below have been released for the first time in Elasticsearch 7.0.0-alpha2. + +[[breaking-7.0.0-alpha2]] +[float] +=== Breaking changes + +Search:: +* Scores computed using the BM25 similarity, the default similarity used by + Elasticsearch, are lower than previously as one constant factor was removed + from the numerator of the scoring formula. Ordering of results is preserved + unless scores are computed from multiple fields using different similarities. From 53dd1f370dcad0c1f68d59c8d4ca9de3d7a2b88a Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Thu, 6 Dec 2018 15:42:29 +0100 Subject: [PATCH 04/16] update scores in docs --- .../metrics/tophits-aggregation.asciidoc | 4 +-- .../pattern-replace-charfilter.asciidoc | 4 +-- .../tokenizers/edgengram-tokenizer.asciidoc | 4 +-- .../how-to/recipes/stemming.asciidoc | 6 ++--- .../mapping/params/normalizer.asciidoc | 6 ++--- .../query-dsl/terms-set-query.asciidoc | 4 +-- docs/reference/search/explain.asciidoc | 5 ---- docs/reference/search/request-body.asciidoc | 4 +-- .../search/request/highlighting.asciidoc | 11 +++++--- .../search/request/inner-hits.asciidoc | 25 +++++++++++++++---- docs/reference/search/uri-request.asciidoc | 4 +-- 11 files changed, 46 insertions(+), 31 deletions(-) diff --git a/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc b/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc index 1d225c91e26d8..b8459ee6e618c 100644 --- a/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc @@ -332,7 +332,7 @@ Top hits response snippet with a nested hit, which resides in the first slot of "value": 1, "relation": "eq" }, - "max_score": 0.3616575, + "max_score": 0.16438977, "hits": [ { "_index": "sales", @@ -342,7 +342,7 @@ Top hits response snippet with a nested hit, which resides in the first slot of "field": "comments", <1> "offset": 0 <2> }, - "_score": 0.3616575, + "_score": 0.16438977, "_source": { "comment": "This car could have better brakes", <3> "username": "baddriver007" diff --git a/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc b/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc index 046f6441c07b9..e273cc52c5ae8 100644 --- a/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc +++ b/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc @@ -245,13 +245,13 @@ The output from the above is: "value": 1, "relation": "eq" }, - "max_score": 0.2876821, + "max_score": 0.13076457, "hits": [ { "_index": "my_index", "_type": "_doc", "_id": "1", - "_score": 0.2876821, + "_score": 0.13076457, "_source": { "text": "The fooBarBaz method" }, diff --git a/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc index a34f5c801939e..90dc16d9e054c 100644 --- a/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc +++ b/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc @@ -304,13 +304,13 @@ GET my_index/_search "value": 1, "relation": "eq" }, - "max_score": 0.5753642, + "max_score": 0.26152915, "hits": [ { "_index": "my_index", "_type": "_doc", "_id": "1", - "_score": 0.5753642, + "_score": 0.26152915, "_source": { "title": "Quick Foxes" } diff --git a/docs/reference/how-to/recipes/stemming.asciidoc b/docs/reference/how-to/recipes/stemming.asciidoc index 83f1379cd32a0..0292bc98c6f7e 100644 --- a/docs/reference/how-to/recipes/stemming.asciidoc +++ b/docs/reference/how-to/recipes/stemming.asciidoc @@ -88,13 +88,13 @@ GET index/_search "value": 2, "relation": "eq" }, - "max_score": 0.18232156, + "max_score": 0.082873434, "hits": [ { "_index": "index", "_type": "_doc", "_id": "1", - "_score": 0.18232156, + "_score": 0.082873434, "_source": { "body": "Ski resort" } @@ -103,7 +103,7 @@ GET index/_search "_index": "index", "_type": "_doc", "_id": "2", - "_score": 0.18232156, + "_score": 0.082873434, "_source": { "body": "A pair of skis" } diff --git a/docs/reference/mapping/params/normalizer.asciidoc b/docs/reference/mapping/params/normalizer.asciidoc index bfd24381753f7..17b90df507e3f 100644 --- a/docs/reference/mapping/params/normalizer.asciidoc +++ b/docs/reference/mapping/params/normalizer.asciidoc @@ -93,13 +93,13 @@ both index and query time. "value": 2, "relation": "eq" }, - "max_score": 0.47000363, + "max_score": 0.21363801, "hits": [ { "_index": "index", "_type": "_doc", "_id": "1", - "_score": 0.47000363, + "_score": 0.21363801, "_source": { "foo": "BÀR" } @@ -108,7 +108,7 @@ both index and query time. "_index": "index", "_type": "_doc", "_id": "2", - "_score": 0.47000363, + "_score": 0.21363801, "_source": { "foo": "bar" } diff --git a/docs/reference/query-dsl/terms-set-query.asciidoc b/docs/reference/query-dsl/terms-set-query.asciidoc index fa879bb068d34..0b5b615c59f6e 100644 --- a/docs/reference/query-dsl/terms-set-query.asciidoc +++ b/docs/reference/query-dsl/terms-set-query.asciidoc @@ -76,13 +76,13 @@ Response: "value": 1, "relation": "eq" }, - "max_score": 0.87546873, + "max_score": 0.39794034, "hits": [ { "_index": "my-index", "_type": "_doc", "_id": "2", - "_score": 0.87546873, + "_score": 0.39794034, "_source": { "codes": ["def", "ghi"], "required_matches": 2 diff --git a/docs/reference/search/explain.asciidoc b/docs/reference/search/explain.asciidoc index 8f8da5af5a593..da2bb8453a995 100644 --- a/docs/reference/search/explain.asciidoc +++ b/docs/reference/search/explain.asciidoc @@ -41,11 +41,6 @@ This will yield the following result: "value":1.6943597, "description":"score(freq=1.0), product of:", "details":[ - { - "value":2.2, - "description":"boost", - "details":[] - }, { "value":1.3862944, "description":"idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:", diff --git a/docs/reference/search/request-body.asciidoc b/docs/reference/search/request-body.asciidoc index 7145b40c43e3d..6079db135c054 100644 --- a/docs/reference/search/request-body.asciidoc +++ b/docs/reference/search/request-body.asciidoc @@ -35,13 +35,13 @@ And here is a sample response: "value": 1, "relation": "eq" }, - "max_score": 1.3862944, + "max_score": 0.6301338, "hits" : [ { "_index" : "twitter", "_type" : "_doc", "_id" : "0", - "_score": 1.3862944, + "_score": 0.6301338, "_source" : { "user" : "kimchy", "message": "trying out Elasticsearch", diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/request/highlighting.asciidoc index e798fcf186906..bad339a96983f 100644 --- a/docs/reference/search/request/highlighting.asciidoc +++ b/docs/reference/search/request/highlighting.asciidoc @@ -860,17 +860,22 @@ Response: { ... "hits": { +<<<<<<< HEAD "total" : { "value": 1, "relation": "eq" }, "max_score": 1.601195, +======= + "total": 1, + "max_score": 0.72781587, +>>>>>>> update scores in docs "hits": [ { "_index": "twitter", "_type": "_doc", "_id": "1", - "_score": 1.601195, + "_score": 0.72781587, "_source": { "user": "test", "message": "some message with the number 1", @@ -923,13 +928,13 @@ Response: "value": 1, "relation": "eq" }, - "max_score": 1.601195, + "max_score": 0.72781587, "hits": [ { "_index": "twitter", "_type": "_doc", "_id": "1", - "_score": 1.601195, + "_score": 0.72781587, "_source": { "user": "test", "message": "some message with the number 1", diff --git a/docs/reference/search/request/inner-hits.asciidoc b/docs/reference/search/request/inner-hits.asciidoc index a1eeeb8f06375..6884e4ff61677 100644 --- a/docs/reference/search/request/inner-hits.asciidoc +++ b/docs/reference/search/request/inner-hits.asciidoc @@ -270,17 +270,22 @@ Response not included in text but tested for completeness sake. { ..., "hits": { +<<<<<<< HEAD "total" : { "value": 1, "relation": "eq" }, "max_score": 1.0444684, +======= + "total": 1, + "max_score": 0.47475836, +>>>>>>> update scores in docs "hits": [ { "_index": "test", "_type": "_doc", "_id": "1", - "_score": 1.0444684, + "_score": 0.47475836, "_source": ..., "inner_hits": { "comments": { <1> @@ -289,7 +294,7 @@ Response not included in text but tested for completeness sake. "value": 1, "relation": "eq" }, - "max_score": 1.0444684, + "max_score": 0.47475836, "hits": [ { "_index": "test", @@ -299,7 +304,7 @@ Response not included in text but tested for completeness sake. "field": "comments", "offset": 1 }, - "_score": 1.0444684, + "_score": 0.47475836, "fields": { "comments.text.keyword": [ "words words words" @@ -391,26 +396,36 @@ Which would look like: { ..., "hits": { +<<<<<<< HEAD "total" : { "value": 1, "relation": "eq" }, "max_score": 0.6931472, +======= + "total": 1, + "max_score": 0.3150669, +>>>>>>> update scores in docs "hits": [ { "_index": "test", "_type": "_doc", "_id": "1", - "_score": 0.6931472, + "_score": 0.3150669, "_source": ..., "inner_hits": { "comments.votes": { <1> "hits": { +<<<<<<< HEAD "total" : { "value": 1, "relation": "eq" }, "max_score": 0.6931472, +======= + "total": 1, + "max_score": 0.3150669, +>>>>>>> update scores in docs "hits": [ { "_index": "test", @@ -424,7 +439,7 @@ Which would look like: "offset": 0 } }, - "_score": 0.6931472, + "_score": 0.3150669, "_source": { "value": 1, "voter": "kimchy" diff --git a/docs/reference/search/uri-request.asciidoc b/docs/reference/search/uri-request.asciidoc index 320e65bf3ee0b..8b955b7dab024 100644 --- a/docs/reference/search/uri-request.asciidoc +++ b/docs/reference/search/uri-request.asciidoc @@ -31,13 +31,13 @@ And here is a sample response: "value": 1, "relation": "eq" }, - "max_score": 1.3862944, + "max_score": 0.6301338, "hits" : [ { "_index" : "twitter", "_type" : "_doc", "_id" : "0", - "_score": 1.3862944, + "_score": 0.6301338, "_source" : { "user" : "kimchy", "date" : "2009-11-15T14:12:12", From af56872fea9a2eb525ec12085250b2cfdafae003 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 7 Dec 2018 14:47:23 +0100 Subject: [PATCH 05/16] update scores in docs --- docs/reference/how-to/recipes/stemming.asciidoc | 8 ++++---- docs/reference/search/explain.asciidoc | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/reference/how-to/recipes/stemming.asciidoc b/docs/reference/how-to/recipes/stemming.asciidoc index 0292bc98c6f7e..87588c799c17e 100644 --- a/docs/reference/how-to/recipes/stemming.asciidoc +++ b/docs/reference/how-to/recipes/stemming.asciidoc @@ -149,13 +149,13 @@ GET index/_search "value": 1, "relation": "eq" }, - "max_score": 0.8025915, + "max_score": 0.3648143, "hits": [ { "_index": "index", "_type": "_doc", "_id": "1", - "_score": 0.8025915, + "_score": 0.3648143, "_source": { "body": "Ski resort" } @@ -209,13 +209,13 @@ GET index/_search "value": 1, "relation": "eq" }, - "max_score": 0.8025915, + "max_score": 0.3648143, "hits": [ { "_index": "index", "_type": "_doc", "_id": "1", - "_score": 0.8025915, + "_score": 0.3648143, "_source": { "body": "Ski resort" } diff --git a/docs/reference/search/explain.asciidoc b/docs/reference/search/explain.asciidoc index da2bb8453a995..472fb40447a10 100644 --- a/docs/reference/search/explain.asciidoc +++ b/docs/reference/search/explain.asciidoc @@ -34,11 +34,11 @@ This will yield the following result: "_id":"0", "matched":true, "explanation":{ - "value":1.6943597, + "value":0.7701635, "description":"weight(message:elasticsearch in 0) [PerFieldSimilarity], result of:", "details":[ { - "value":1.6943597, + "value":0.7701635, "description":"score(freq=1.0), product of:", "details":[ { From b2244616f0e4a96ebea2900a4d31d41c5b00091d Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 7 Dec 2018 16:59:07 +0100 Subject: [PATCH 06/16] fix bad merge --- .../search/request/highlighting.asciidoc | 5 ----- docs/reference/search/request/inner-hits.asciidoc | 15 --------------- 2 files changed, 20 deletions(-) diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/request/highlighting.asciidoc index bad339a96983f..80a5ea0a2542c 100644 --- a/docs/reference/search/request/highlighting.asciidoc +++ b/docs/reference/search/request/highlighting.asciidoc @@ -860,16 +860,11 @@ Response: { ... "hits": { -<<<<<<< HEAD "total" : { "value": 1, "relation": "eq" }, - "max_score": 1.601195, -======= - "total": 1, "max_score": 0.72781587, ->>>>>>> update scores in docs "hits": [ { "_index": "twitter", diff --git a/docs/reference/search/request/inner-hits.asciidoc b/docs/reference/search/request/inner-hits.asciidoc index 6884e4ff61677..80af0b822b188 100644 --- a/docs/reference/search/request/inner-hits.asciidoc +++ b/docs/reference/search/request/inner-hits.asciidoc @@ -270,16 +270,11 @@ Response not included in text but tested for completeness sake. { ..., "hits": { -<<<<<<< HEAD "total" : { "value": 1, "relation": "eq" }, - "max_score": 1.0444684, -======= - "total": 1, "max_score": 0.47475836, ->>>>>>> update scores in docs "hits": [ { "_index": "test", @@ -396,16 +391,11 @@ Which would look like: { ..., "hits": { -<<<<<<< HEAD "total" : { "value": 1, "relation": "eq" }, - "max_score": 0.6931472, -======= - "total": 1, "max_score": 0.3150669, ->>>>>>> update scores in docs "hits": [ { "_index": "test", @@ -416,16 +406,11 @@ Which would look like: "inner_hits": { "comments.votes": { <1> "hits": { -<<<<<<< HEAD "total" : { "value": 1, "relation": "eq" }, - "max_score": 0.6931472, -======= - "total": 1, "max_score": 0.3150669, ->>>>>>> update scores in docs "hits": [ { "_index": "test", From f09f5dd6054677f9ef154f1d4a613001f41490ab Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 7 Dec 2018 22:42:14 +0100 Subject: [PATCH 07/16] fix sql tests --- .../plugin/sql/qa/src/main/resources/docs.csv-spec | 12 ++++++------ .../sql/qa/src/main/resources/fulltext.csv-spec | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec index 4b3659c6baad1..120a5c974d771 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec @@ -713,8 +713,8 @@ SELECT SCORE(), * FROM library WHERE MATCH(name, 'dune') ORDER BY SCORE() DESC; SCORE() | author | name | page_count | release_date ---------------+---------------+-------------------+---------------+-------------------- -2.288635 |Frank Herbert |Dune |604 |1965-06-01T00:00:00Z -1.8893257 |Frank Herbert |Dune Messiah |331 |1969-10-15T00:00:00Z +1.0402887 |Frank Herbert |Dune |604 |1965-06-01T00:00:00Z +0.8587844 |Frank Herbert |Dune Messiah |331 |1969-10-15T00:00:00Z 1.6086555 |Frank Herbert |Children of Dune |408 |1976-04-21T00:00:00Z 1.4005898 |Frank Herbert |God Emperor of Dune|454 |1981-05-28T00:00:00Z @@ -727,10 +727,10 @@ SELECT SCORE(), * FROM library WHERE MATCH(name, 'dune') ORDER BY page_count DES SCORE() | author | name | page_count | release_date ---------------+---------------+-------------------+---------------+-------------------- -2.288635 |Frank Herbert |Dune |604 |1965-06-01T00:00:00Z +1.0402887 |Frank Herbert |Dune |604 |1965-06-01T00:00:00Z 1.4005898 |Frank Herbert |God Emperor of Dune|454 |1981-05-28T00:00:00Z 1.6086555 |Frank Herbert |Children of Dune |408 |1976-04-21T00:00:00Z -1.8893257 |Frank Herbert |Dune Messiah |331 |1969-10-15T00:00:00Z +0.8587844 |Frank Herbert |Dune Messiah |331 |1969-10-15T00:00:00Z // end::orderByScoreWithMatch ; @@ -743,8 +743,8 @@ SELECT SCORE() AS score, name, release_date FROM library WHERE QUERY('dune') ORD ---------------+-------------------+-------------------- 1.4005898 |God Emperor of Dune|1981-05-28T00:00:00Z 1.6086555 |Children of Dune |1976-04-21T00:00:00Z -1.8893257 |Dune Messiah |1969-10-15T00:00:00Z -2.288635 |Dune |1965-06-01T00:00:00Z +0.8587844 |Dune Messiah |1969-10-15T00:00:00Z +1.0402887 |Dune |1965-06-01T00:00:00Z // end::scoreWithMatch ; diff --git a/x-pack/plugin/sql/qa/src/main/resources/fulltext.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/fulltext.csv-spec index 93493ffdc2acb..d0d0d1522f2ad 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/fulltext.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/fulltext.csv-spec @@ -76,12 +76,12 @@ score SELECT emp_no, first_name, SCORE() FROM test_emp WHERE MATCH(first_name, 'Erez') ORDER BY SCORE(); emp_no:i | first_name:s | SCORE():f -10076 |Erez |4.2096553 +10076 |Erez |1.8660883 ; scoreAsSomething SELECT emp_no, first_name, SCORE() as s FROM test_emp WHERE MATCH(first_name, 'Erez') ORDER BY SCORE(); emp_no:i | first_name:s | s:f -10076 |Erez |4.2096553 +10076 |Erez |1.8660883 ; From 90b97d8ee1d22207db82c8a54af8febc9a9467e5 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 14 Dec 2018 11:19:45 +0100 Subject: [PATCH 08/16] fix remaining failures --- .../plugin/sql/qa/src/main/resources/docs.csv-spec | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec index 90ed53967511a..bc6cb0ffbbcea 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec @@ -726,8 +726,8 @@ SELECT SCORE(), * FROM library WHERE MATCH(name, 'dune') ORDER BY SCORE() DESC; ---------------+---------------+-------------------+---------------+-------------------- 1.0402887 |Frank Herbert |Dune |604 |1965-06-01T00:00:00Z 0.8587844 |Frank Herbert |Dune Messiah |331 |1969-10-15T00:00:00Z -1.6086555 |Frank Herbert |Children of Dune |408 |1976-04-21T00:00:00Z -1.4005898 |Frank Herbert |God Emperor of Dune|454 |1981-05-28T00:00:00Z +0.731207 |Frank Herbert |Children of Dune |408 |1976-04-21T00:00:00Z +0.6366317 |Frank Herbert |God Emperor of Dune|454 |1981-05-28T00:00:00Z // end::orderByScore ; @@ -739,8 +739,8 @@ SELECT SCORE(), * FROM library WHERE MATCH(name, 'dune') ORDER BY page_count DES SCORE() | author | name | page_count | release_date ---------------+---------------+-------------------+---------------+-------------------- 1.0402887 |Frank Herbert |Dune |604 |1965-06-01T00:00:00Z -1.4005898 |Frank Herbert |God Emperor of Dune|454 |1981-05-28T00:00:00Z -1.6086555 |Frank Herbert |Children of Dune |408 |1976-04-21T00:00:00Z +0.6366317 |Frank Herbert |God Emperor of Dune|454 |1981-05-28T00:00:00Z +0.731207 |Frank Herbert |Children of Dune |408 |1976-04-21T00:00:00Z 0.8587844 |Frank Herbert |Dune Messiah |331 |1969-10-15T00:00:00Z // end::orderByScoreWithMatch @@ -752,8 +752,8 @@ SELECT SCORE() AS score, name, release_date FROM library WHERE QUERY('dune') ORD score | name | release_date ---------------+-------------------+-------------------- -1.4005898 |God Emperor of Dune|1981-05-28T00:00:00Z -1.6086555 |Children of Dune |1976-04-21T00:00:00Z +0.6366317 |God Emperor of Dune|1981-05-28T00:00:00Z +0.731207 |Children of Dune |1976-04-21T00:00:00Z 0.8587844 |Dune Messiah |1969-10-15T00:00:00Z 1.0402887 |Dune |1965-06-01T00:00:00Z // end::scoreWithMatch From 95658deec5142ec4fc3d0624d1379dbf95752fa0 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 14 Dec 2018 11:22:02 +0100 Subject: [PATCH 09/16] revert change sto release-notes.asciidoc --- docs/reference/release-notes.asciidoc | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/reference/release-notes.asciidoc b/docs/reference/release-notes.asciidoc index fdc15861a04bd..1c31e1cb3db47 100644 --- a/docs/reference/release-notes.asciidoc +++ b/docs/reference/release-notes.asciidoc @@ -8,10 +8,8 @@ This section summarizes the changes in each release. * <> * <> -* <> -- include::release-notes/7.0.0-alpha2.asciidoc[] include::release-notes/7.0.0-alpha1.asciidoc[] -include::release-notes/7.0.0-alpha2.asciidoc[] From cfafb9e12f7499775ec2f896a091bc2814b66212 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 17 Dec 2018 16:33:08 +0100 Subject: [PATCH 10/16] update release notes - mention mixed clusters --- docs/reference/release-notes/7.0.0-alpha2.asciidoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/reference/release-notes/7.0.0-alpha2.asciidoc b/docs/reference/release-notes/7.0.0-alpha2.asciidoc index ace3c3891e124..752b65fefa34e 100644 --- a/docs/reference/release-notes/7.0.0-alpha2.asciidoc +++ b/docs/reference/release-notes/7.0.0-alpha2.asciidoc @@ -12,3 +12,7 @@ Search:: Elasticsearch, are lower than previously as one constant factor was removed from the numerator of the scoring formula. Ordering of results is preserved unless scores are computed from multiple fields using different similarities. + In a mixed cluster with some nodes on 7.0 and some on 6.x, scoring will + depend on where the shards are located: the older formula will be used for + data located from 6.x nodes, while the updated one will be used for data + located on 7.0 nodes. From e66f48dcb31a8dd85c9e92b29653041b5fe671ae Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 4 Jan 2019 10:46:44 +0100 Subject: [PATCH 11/16] Add legacy bm25 to default and built-in similarities This allows users to opt-out of the updated bm25 similarities and use the deprecated legacy bm25 similarity. This helps especially cross-cluster search cases across clusters on multiple versions, otherwise sorting by score would lead to very weird results depending on the version of the data node that scores each doc. --- .../index/similarity/SimilarityProvider.java | 4 +-- .../index/similarity/SimilarityProviders.java | 13 ++++++++ .../index/similarity/SimilarityService.java | 7 +++++ .../similarity/SimilarityServiceTests.java | 1 - .../index/similarity/SimilarityTests.java | 31 +++++++++++++++++-- 5 files changed, 50 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java index f5a870441d43f..b5af32f5c2f17 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java @@ -55,7 +55,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SimilarityProvider that = (SimilarityProvider) o; - /** + /* * We check name only because the similarity is * re-created for each new instance and they don't implement equals. * This is not entirely correct though but we only use equality checks @@ -66,7 +66,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - /** + /* * We use name only because the similarity is * re-created for each new instance and they don't implement equals. * This is not entirely correct though but we only use equality checks diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java index 0473c6aeaf551..a215257cb5cff 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java @@ -51,6 +51,7 @@ import org.apache.lucene.search.similarities.NormalizationH2; import org.apache.lucene.search.similarities.NormalizationH3; import org.apache.lucene.search.similarities.NormalizationZ; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.Version; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.settings.Settings; @@ -281,6 +282,18 @@ public static BM25Similarity createBM25Similarity(Settings settings, Version ind return similarity; } + public static LegacyBM25Similarity createLegacyBM25Similarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("LegacyBM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS); + + float k1 = settings.getAsFloat("k1", 1.2f); + float b = settings.getAsFloat("b", 0.75f); + boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); + + LegacyBM25Similarity similarity = new LegacyBM25Similarity(k1, b); + similarity.setDiscountOverlaps(discountOverlaps); + return similarity; + } + public static BooleanSimilarity createBooleanSimilarity(Settings settings, Version indexCreatedVersion) { assertSettingsIsSubsetOf("boolean", indexCreatedVersion, settings); return new BooleanSimilarity(); diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java index 44956eec35d9b..441560151efd7 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java @@ -31,6 +31,7 @@ import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.TriFunction; @@ -78,6 +79,10 @@ public final class SimilarityService extends AbstractIndexComponent { final BM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version); return () -> similarity; }); + defaults.put("LegacyBM25", version -> { + final LegacyBM25Similarity similarity = SimilarityProviders.createLegacyBM25Similarity(Settings.EMPTY, version); + return () -> similarity; + }); defaults.put("boolean", version -> { final Similarity similarity = new BooleanSimilarity(); return () -> similarity; @@ -98,6 +103,8 @@ public final class SimilarityService extends AbstractIndexComponent { }); builtIn.put("BM25", (settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version)); + builtIn.put("LegacyBM25", + (settings, version, scriptService) -> SimilarityProviders.createLegacyBM25Similarity(settings, version)); builtIn.put("boolean", (settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version)); builtIn.put("DFR", diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java index 0eefd737364a5..d86aa63f92b0b 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java @@ -133,5 +133,4 @@ public float score(float freq, long norm) { () -> SimilarityService.validateSimilarity(Version.V_7_0_0, increasingScoresWithNormSim)); assertThat(e.getMessage(), Matchers.containsString("Similarity scores should not increase when norm increases")); } - } diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java index d2244eb8a5a88..6591e3a09da95 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java @@ -32,6 +32,7 @@ import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; import org.apache.lucene.search.similarities.LambdaTTF; import org.apache.lucene.search.similarities.NormalizationH2; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; @@ -50,6 +51,7 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.CoreMatchers.nullValue; public class SimilarityTests extends ESSingleNodeTestCase { @@ -61,15 +63,16 @@ protected Collection> getPlugins() { public void testResolveDefaultSimilarities() { SimilarityService similarityService = createIndex("foo").similarityService(); assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); + assertThat(similarityService.getSimilarity("LegacyBM25").get(), instanceOf(LegacyBM25Similarity.class)); assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class)); - assertThat(similarityService.getSimilarity("default"), equalTo(null)); + assertThat(similarityService.getSimilarity("default"), nullValue()); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> similarityService.getSimilarity("classic")); assertEquals("The [classic] similarity may not be used anymore. Please use the [BM25] similarity or build a custom [scripted] " + "similarity instead.", e.getMessage()); } - public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException { + public void testResolveSimilaritiesFromMapping_classicIsForbidden() { Settings indexSettings = Settings.builder() .put("index.similarity.my_similarity.type", "classic") .put("index.similarity.my_similarity.discount_overlaps", false) @@ -102,6 +105,28 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException { assertThat(similarity.getDiscountOverlaps(), equalTo(false)); } + public void testResolveSimilaritiesFromMapping_LegacyBM25() throws IOException { + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field1").field("type", "text").field("similarity", "my_similarity").endObject() + .endObject() + .endObject().endObject(); + + Settings indexSettings = Settings.builder() + .put("index.similarity.my_similarity.type", "LegacyBM25") + .put("index.similarity.my_similarity.k1", 2.0f) + .put("index.similarity.my_similarity.b", 0.5f) + .put("index.similarity.my_similarity.discount_overlaps", false) + .build(); + MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService(); + assertThat(mapperService.fullName("field1").similarity().get(), instanceOf(LegacyBM25Similarity.class)); + + LegacyBM25Similarity similarity = (LegacyBM25Similarity) mapperService.fullName("field1").similarity().get(); + assertThat(similarity.getK1(), equalTo(2.0f)); + assertThat(similarity.getB(), equalTo(0.5f)); + assertThat(similarity.getDiscountOverlaps(), equalTo(false)); + } + public void testResolveSimilaritiesFromMapping_boolean() throws IOException { XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties") @@ -233,7 +258,7 @@ public void testResolveSimilaritiesFromMapping_Unknown() throws IOException { } } - public void testUnknownParameters() throws IOException { + public void testUnknownParameters() { Settings indexSettings = Settings.builder() .put("index.similarity.my_similarity.type", "BM25") .put("index.similarity.my_similarity.z", 2.0f) From bc5845d4ca2e03ada4e890477f2775b14e16fa8f Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 4 Jan 2019 11:02:58 +0100 Subject: [PATCH 12/16] additional test --- .../index/similarity/SimilarityService.java | 7 +++---- .../index/similarity/SimilarityServiceTests.java | 11 ++++++++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java index 441560151efd7..976f5140488d2 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java @@ -158,8 +158,9 @@ public SimilarityService(IndexSettings indexSettings, ScriptService scriptServic providers.put(entry.getKey(), entry.getValue().apply(indexSettings.getIndexVersionCreated())); } this.similarities = providers; - defaultSimilarity = (providers.get("default") != null) ? providers.get("default").get() - : providers.get(SimilarityService.DEFAULT_SIMILARITY).get(); + Supplier defaultSimilarity = providers.get("default"); + this.defaultSimilarity = (defaultSimilarity != null) ? + defaultSimilarity.get() : providers.get(SimilarityService.DEFAULT_SIMILARITY).get(); if (providers.get("base") != null) { deprecationLogger.deprecated("The [base] similarity is ignored since query normalization and coords have been removed"); } @@ -171,7 +172,6 @@ public Similarity similarity(MapperService mapperService) { defaultSimilarity; } - public SimilarityProvider getSimilarity(String name) { Supplier sim = similarities.get(name); if (sim == null) { @@ -280,5 +280,4 @@ private static void fail(Version indexCreatedVersion, String message) { deprecationLogger.deprecated(message); } } - } diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java index d86aa63f92b0b..a7c3b0f0d9ee6 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java @@ -24,6 +24,7 @@ import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.Version; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; @@ -60,7 +61,15 @@ public void testOverrideDefaultSimilarity() { .build(); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); - assertTrue(service.getDefaultSimilarity() instanceof BooleanSimilarity); + assertThat(service.getDefaultSimilarity(), instanceOf(BooleanSimilarity.class)); + } + + public void testOverrideDefaultSimilarityWithLegacyBM25() { + Settings settings = Settings.builder().put("index.similarity.default.type", "LegacyBM25") + .build(); + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); + SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); + assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class)); } public void testSimilarityValidation() { From 40da194052c29c1309234b7d77ed1955698fcfc2 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 11 Jan 2019 11:32:44 +0100 Subject: [PATCH 13/16] use legacy bm25 for indices created in 6.x --- .../index/similarity/SimilarityProviders.java | 16 ++++++++++++---- .../index/similarity/SimilarityService.java | 3 +-- .../index/similarity/LegacySimilarityTests.java | 11 +++++------ .../index/similarity/SimilarityServiceTests.java | 10 ++++++++++ 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java index a215257cb5cff..6596f634808b0 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java @@ -51,6 +51,7 @@ import org.apache.lucene.search.similarities.NormalizationH2; import org.apache.lucene.search.similarities.NormalizationH3; import org.apache.lucene.search.similarities.NormalizationZ; +import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.Version; import org.elasticsearch.common.logging.DeprecationLogger; @@ -270,16 +271,23 @@ static void assertSettingsIsSubsetOf(String type, Version version, Settings sett } } - public static BM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) { + public static Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) { assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS); float k1 = settings.getAsFloat("k1", 1.2f); float b = settings.getAsFloat("b", 0.75f); boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); - BM25Similarity similarity = new BM25Similarity(k1, b); - similarity.setDiscountOverlaps(discountOverlaps); - return similarity; + if (indexCreatedVersion.before(Version.V_7_0_0)) { + //use legacy bm25 for indices created in 6.x + LegacyBM25Similarity similarity = new LegacyBM25Similarity(k1, b); + similarity.setDiscountOverlaps(discountOverlaps); + return similarity; + } else { + BM25Similarity similarity = new BM25Similarity(k1, b); + similarity.setDiscountOverlaps(discountOverlaps); + return similarity; + } } public static LegacyBM25Similarity createLegacyBM25Similarity(Settings settings, Version indexCreatedVersion) { diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java index 976f5140488d2..3457530ffe03b 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java @@ -25,7 +25,6 @@ import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.TermStatistics; -import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; @@ -76,7 +75,7 @@ public final class SimilarityService extends AbstractIndexComponent { } }); defaults.put("BM25", version -> { - final BM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version); + final Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version); return () -> similarity; }); defaults.put("LegacyBM25", version -> { diff --git a/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java b/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java index 69ad3bd128fe2..9915f7116a662 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java @@ -19,9 +19,9 @@ package org.elasticsearch.index.similarity; -import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; @@ -29,6 +29,7 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.VersionUtils; import java.io.IOException; @@ -43,15 +44,14 @@ protected boolean forbidPrivateIndexSettings() { } public void testResolveDefaultSimilaritiesOn6xIndex() { - final Settings indexSettings = Settings.builder() - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0) // otherwise classic is forbidden - .build(); + Version version = VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0)); + final Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); final SimilarityService similarityService = createIndex("foo", indexSettings).similarityService(); assertThat(similarityService.getSimilarity("classic").get(), instanceOf(ClassicSimilarity.class)); assertWarnings("The [classic] similarity is now deprecated in favour of BM25, which is generally " + "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity " + "instead."); - assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); + assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class)); assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class)); assertThat(similarityService.getSimilarity("default"), equalTo(null)); } @@ -89,5 +89,4 @@ public void testResolveSimilaritiesFromMappingClassic() throws IOException { assertThat(similarity.getDiscountOverlaps(), equalTo(false)); } } - } diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java index a7c3b0f0d9ee6..d6fd74d8cd6b0 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java @@ -26,10 +26,12 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.VersionUtils; import org.hamcrest.Matchers; import java.util.Collections; @@ -44,6 +46,14 @@ public void testDefaultSimilarity() { assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class)); } + public void testDefaultSimilarity6xIndices() { + Version version = VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0)); + Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); + SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); + assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class)); + } + // Tests #16594 public void testOverrideBuiltInSimilarity() { Settings settings = Settings.builder().put("index.similarity.BM25.type", "classic").build(); From 9ca911fddb96e3b5f39597e5d32646b3e8b738e1 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 11 Jan 2019 16:04:41 +0100 Subject: [PATCH 14/16] add rolling upgrade test --- .../elasticsearch/upgrades/IndexingIT.java | 8 +- .../org/elasticsearch/upgrades/SearchIT.java | 117 ++++++++++++++++++ 2 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java diff --git a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/IndexingIT.java b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/IndexingIT.java index c80218c50ebe9..7dec5aa7f5733 100644 --- a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/IndexingIT.java +++ b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/IndexingIT.java @@ -19,13 +19,13 @@ package org.elasticsearch.upgrades; import org.apache.http.util.EntityUtils; +import org.elasticsearch.Version; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.Response; import org.elasticsearch.common.Booleans; import org.elasticsearch.common.Strings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.rest.action.document.RestBulkAction; -import org.elasticsearch.Version; -import org.elasticsearch.client.Request; -import org.elasticsearch.client.Response; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -145,7 +145,7 @@ public void testIndexing() throws IOException { } } - private void bulk(String index, String valueSuffix, int count) throws IOException { + private static void bulk(String index, String valueSuffix, int count) throws IOException { StringBuilder b = new StringBuilder(); for (int i = 0; i < count; i++) { b.append("{\"index\": {\"_index\": \"").append(index).append("\", \"_type\": \"_doc\"}}\n"); diff --git a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java new file mode 100644 index 0000000000000..feb15a22c1634 --- /dev/null +++ b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java @@ -0,0 +1,117 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.upgrades; + +import org.apache.http.util.EntityUtils; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.Response; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.DeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.rest.action.document.RestBulkAction; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.builder.SearchSourceBuilder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class SearchIT extends AbstractRollingTestCase { + + public void testSingleIndexScoring() throws IOException { + if (CLUSTER_TYPE == ClusterType.OLD) { + bulk("old"); + search("old"); + } + if (CLUSTER_TYPE == ClusterType.MIXED) { + waitForYellow(); + if (indexExists("mixed") == false) { + bulk("mixed"); + } + search("old"); + search("mixed"); + } + if (CLUSTER_TYPE == ClusterType.UPGRADED) { + waitForYellow(); + bulk("upgraded"); + search("old"); + search("mixed"); + search("upgraded"); + } + } + + private static void waitForYellow() throws IOException { + Request waitForYellow = new Request("GET", "/_cluster/health"); + waitForYellow.addParameter("wait_for_nodes", "3"); + waitForYellow.addParameter("wait_for_status", "yellow"); + client().performRequest(waitForYellow); + } + + private static void search(String index) throws IOException { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.query(new MatchQueryBuilder("text", "quick brown fox dog")); + Request request = new Request("GET", "/" + index + "/_search"); + request.addParameter("search_type", "dfs_query_then_fetch"); + request.setJsonEntity(Strings.toString(sourceBuilder)); + Response response = client().performRequest(request); + String responseString = EntityUtils.toString(response.getEntity()); + SearchResponse searchResponse = SearchResponse.fromXContent(JsonXContent.jsonXContent.createParser(NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, responseString)); + assertEquals(5, searchResponse.getHits().getTotalHits().value); + SearchHit[] hits = searchResponse.getHits().getHits(); + assertEquals(5, hits.length); + //System.out.println(Strings.toString(searchResponse.getHits(), true, true)); + for (int i = 0; i < hits.length; i++) { + assertEquals(String.valueOf(i), hits[i].getId()); + } + } + + private static void bulk(String index) throws IOException { + createIndex(index, Settings.builder().put("index.number_of_shards", randomIntBetween(2, 4)) + .put("index.number_of_replicas", randomIntBetween(0, 1)).build()); + String[] values = new String[]{ + "The quick brown fox jumps over the lazy dog", + "The quick brown dog jumps over the lazy cat", + "The brown fox jumps over the lazy dog", + "The quick fox jumps over the lazy cat", + "The fox jumps over the lazy dog", + }; + List bulkItems = new ArrayList<>(values.length); + for (int i = 0; i < values.length; i++) { + bulkItems.add("{\"index\": {\"_index\": \"" + index + "\", \"_type\": \"_doc\", \"_id\":" + i + "}}\n" + + "{\"text\": \"" + values[i] + "\"}\n"); + } + Collections.shuffle(bulkItems, random()); + StringBuilder b = new StringBuilder(); + for (String bulkItem : bulkItems) { + b.append(bulkItem); + } + Request bulk = new Request("POST", "/_bulk"); + bulk.addParameter("refresh", "true"); + bulk.setOptions(expectWarnings(RestBulkAction.TYPES_DEPRECATION_MESSAGE)); + bulk.setJsonEntity(b.toString()); + client().performRequest(bulk); + } +} From 92a7efa60badd68f347a09ece9749335b3863453 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 11 Jan 2019 20:11:16 +0100 Subject: [PATCH 15/16] add integration test --- .../org/elasticsearch/upgrades/SearchIT.java | 160 +++++++++++++----- 1 file changed, 114 insertions(+), 46 deletions(-) diff --git a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java index feb15a22c1634..1bbfc95a1dd3f 100644 --- a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java @@ -24,49 +24,124 @@ import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.DeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.index.query.MatchQueryBuilder; -import org.elasticsearch.rest.action.document.RestBulkAction; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.junit.BeforeClass; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; +/** + * Test that checks search api behaviour while performing a rolling upgrade + */ public class SearchIT extends AbstractRollingTestCase { + private static final String[] VALUES = new String[]{ + //values are in the expected scoring order when searchging for "quick brown fox dog", docs wil be shuffled + //but the ids get assigned to them based on the order in this array + "The quick brown fox jumps over the lazy dog", + "The quick brown dog jumps over the lazy cat", + "The brown fox jumps over the lazy dog", + "The quick fox jumps over the lazy cat", + "The fox jumps over the lazy dog", + }; + + private static final List> DOCS = new ArrayList<>(VALUES.length); + + @BeforeClass + public static void setupDocs() { + for (int i = 0; i < VALUES.length; i++) { + DOCS.add(Tuple.tuple(String.valueOf(i), "{\"text\": \"" + VALUES[i] + "\"}")); + } + Collections.shuffle(DOCS, random()); + } + + /** + * Check that documents ordering stays the same during the upgrade and after when searching against an index made of multiple shards. + * Tests the bw compatibility layer introduced after removing the k1+1 constant factor from the numerator of the bm25 scoring formula. + */ public void testSingleIndexScoring() throws IOException { if (CLUSTER_TYPE == ClusterType.OLD) { - bulk("old"); - search("old"); + index("single-old"); + search("single-old"); + } + if (CLUSTER_TYPE == ClusterType.MIXED) { + waitForGreen(); + if (indexExists("single-mixed") == false) { + index("single-mixed"); + } + search("single-old"); + search("single-mixed"); + } + if (CLUSTER_TYPE == ClusterType.UPGRADED) { + waitForGreen(); + index("single-upgraded"); + search("single-old"); + search("single-mixed"); + search("single-upgraded"); + } + } + + /** + * Check that documents ordering stays the same during the upgrade and after when searching against multiple indices, and newer + * indices are explcitly configured to use the LegacyBM25 similarity. + */ + public void testMultipleIndicesScoring() throws IOException { + if (CLUSTER_TYPE == ClusterType.OLD) { + createIndex("multi-old", Settings.builder().put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0).build()); + Tuple document = randomFrom(DOCS); + //index 1 of the 5 documents n an index created before the upgrade + index("multi-old", document); } if (CLUSTER_TYPE == ClusterType.MIXED) { - waitForYellow(); - if (indexExists("mixed") == false) { - bulk("mixed"); + waitForGreen(); + if (indexExists("multi-mixed") == false) { + createIndex("multi-mixed", Settings.builder().put("index.number_of_shards", randomBoolean() ? 1 : 2) + .put("index.number_of_replicas", 0).build()); + //index 3 of the 5 documents in an index created during the upgrade + indexNewDocs("multi-mixed", 3); } - search("old"); - search("mixed"); } if (CLUSTER_TYPE == ClusterType.UPGRADED) { - waitForYellow(); - bulk("upgraded"); - search("old"); - search("mixed"); - search("upgraded"); + waitForGreen(); + createIndex("multi-upgraded", Settings.builder().put("index.similarity.default.type", "LegacyBM25").build()); + //index the last document in an index created after the upgrade, which has explicitly set LegacyBM25 + indexNewDocs("multi-upgraded", 1); + //finally test that the order is the same as if the documents were all indexed in the same index + search("multi-old,multi-mixed,multi-upgraded"); } } - private static void waitForYellow() throws IOException { - Request waitForYellow = new Request("GET", "/_cluster/health"); - waitForYellow.addParameter("wait_for_nodes", "3"); - waitForYellow.addParameter("wait_for_status", "yellow"); - client().performRequest(waitForYellow); + private static void indexNewDocs(String index, int numDocs) throws IOException { + for (int i = 0; i < numDocs; i++) { + Tuple document; + do { + document = randomFrom(DOCS); + } while (docExists("multi-old", document.v1()) || docExists("multi-mixed", document.v1())); + index(index, document); + } + } + + private static boolean docExists(String index, String id) throws IOException { + Request existsRequest = new Request("HEAD", "/" + index + "/_doc/" + id); + return client().performRequest(existsRequest).getStatusLine().getStatusCode() == 200; + } + + private static void waitForGreen() throws IOException { + Request waitForGreen = new Request("GET", "/_cluster/health"); + waitForGreen.addParameter("wait_for_nodes", "3"); + waitForGreen.addParameter("wait_for_status", "green"); + waitForGreen.addParameter("timeout", "70s"); + client().performRequest(waitForGreen); } private static void search(String index) throws IOException { @@ -75,43 +150,36 @@ private static void search(String index) throws IOException { Request request = new Request("GET", "/" + index + "/_search"); request.addParameter("search_type", "dfs_query_then_fetch"); request.setJsonEntity(Strings.toString(sourceBuilder)); - Response response = client().performRequest(request); - String responseString = EntityUtils.toString(response.getEntity()); - SearchResponse searchResponse = SearchResponse.fromXContent(JsonXContent.jsonXContent.createParser(NamedXContentRegistry.EMPTY, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, responseString)); + SearchResponse searchResponse = search(request); assertEquals(5, searchResponse.getHits().getTotalHits().value); SearchHit[] hits = searchResponse.getHits().getHits(); assertEquals(5, hits.length); - //System.out.println(Strings.toString(searchResponse.getHits(), true, true)); for (int i = 0; i < hits.length; i++) { assertEquals(String.valueOf(i), hits[i].getId()); } } - private static void bulk(String index) throws IOException { + private static SearchResponse search(Request request) throws IOException { + Response response = client().performRequest(request); + String responseString = EntityUtils.toString(response.getEntity()); + return SearchResponse.fromXContent(JsonXContent.jsonXContent.createParser(NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, responseString)); + } + + private static void index(String index) throws IOException { createIndex(index, Settings.builder().put("index.number_of_shards", randomIntBetween(2, 4)) - .put("index.number_of_replicas", randomIntBetween(0, 1)).build()); - String[] values = new String[]{ - "The quick brown fox jumps over the lazy dog", - "The quick brown dog jumps over the lazy cat", - "The brown fox jumps over the lazy dog", - "The quick fox jumps over the lazy cat", - "The fox jumps over the lazy dog", - }; - List bulkItems = new ArrayList<>(values.length); - for (int i = 0; i < values.length; i++) { - bulkItems.add("{\"index\": {\"_index\": \"" + index + "\", \"_type\": \"_doc\", \"_id\":" + i + "}}\n" + - "{\"text\": \"" + values[i] + "\"}\n"); + .put("index.number_of_replicas", 0).build()); + for (Tuple document : DOCS) { + index(index, document); } - Collections.shuffle(bulkItems, random()); - StringBuilder b = new StringBuilder(); - for (String bulkItem : bulkItems) { - b.append(bulkItem); - } - Request bulk = new Request("POST", "/_bulk"); - bulk.addParameter("refresh", "true"); - bulk.setOptions(expectWarnings(RestBulkAction.TYPES_DEPRECATION_MESSAGE)); - bulk.setJsonEntity(b.toString()); - client().performRequest(bulk); + } + + private static void index(String index, Tuple doc) throws IOException { + String id = doc.v1(); + String body = doc.v2(); + Request request = new Request("PUT", "/" + index + "/_doc/" + id); + request.addParameter("refresh", "wait_for"); + request.setJsonEntity(body); + assertEquals(201, client().performRequest(request).getStatusLine().getStatusCode()); } } From c32e7701182f125a3b63d504265e6826de2bade3 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 11 Jan 2019 21:50:01 +0100 Subject: [PATCH 16/16] improve test --- .../java/org/elasticsearch/upgrades/SearchIT.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java index 1bbfc95a1dd3f..d464059a002c3 100644 --- a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java @@ -74,7 +74,7 @@ public void testSingleIndexScoring() throws IOException { search("single-old"); } if (CLUSTER_TYPE == ClusterType.MIXED) { - waitForGreen(); + waitForGreen("single-old"); if (indexExists("single-mixed") == false) { index("single-mixed"); } @@ -82,7 +82,7 @@ public void testSingleIndexScoring() throws IOException { search("single-mixed"); } if (CLUSTER_TYPE == ClusterType.UPGRADED) { - waitForGreen(); + waitForGreen("single-old,single-mixed"); index("single-upgraded"); search("single-old"); search("single-mixed"); @@ -103,7 +103,7 @@ public void testMultipleIndicesScoring() throws IOException { index("multi-old", document); } if (CLUSTER_TYPE == ClusterType.MIXED) { - waitForGreen(); + waitForGreen("multi-old"); if (indexExists("multi-mixed") == false) { createIndex("multi-mixed", Settings.builder().put("index.number_of_shards", randomBoolean() ? 1 : 2) .put("index.number_of_replicas", 0).build()); @@ -112,7 +112,7 @@ public void testMultipleIndicesScoring() throws IOException { } } if (CLUSTER_TYPE == ClusterType.UPGRADED) { - waitForGreen(); + waitForGreen("multi-old,multi-mixed"); createIndex("multi-upgraded", Settings.builder().put("index.similarity.default.type", "LegacyBM25").build()); //index the last document in an index created after the upgrade, which has explicitly set LegacyBM25 indexNewDocs("multi-upgraded", 1); @@ -136,8 +136,8 @@ private static boolean docExists(String index, String id) throws IOException { return client().performRequest(existsRequest).getStatusLine().getStatusCode() == 200; } - private static void waitForGreen() throws IOException { - Request waitForGreen = new Request("GET", "/_cluster/health"); + private static void waitForGreen(String index) throws IOException { + Request waitForGreen = new Request("GET", "/_cluster/health/" + index); waitForGreen.addParameter("wait_for_nodes", "3"); waitForGreen.addParameter("wait_for_status", "green"); waitForGreen.addParameter("timeout", "70s");