Skip to content

Commit 5b2692f

Browse files
Use dafault BM25Similarity (#17306)
Signed-off-by: Prudhvi Godithi <pgodithi@amazon.com>
1 parent d0a65d3 commit 5b2692f

File tree

9 files changed

+119
-28
lines changed

9 files changed

+119
-28
lines changed

CHANGELOG-3.0.md

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3636
- Refactor `:server` module `org.apacge.lucene` package to eliminate top level split packages for JPMS support ([#17241](https://github.com/opensearch-project/OpenSearch/pull/17241))
3737
- Stop minimizing automata used for case-insensitive matches ([#17268](https://github.com/opensearch-project/OpenSearch/pull/17268))
3838
- Refactor the `:server` module `org.opensearch.client` to `org.opensearch.transport.client` to eliminate top level split packages for JPMS support ([#17272](https://github.com/opensearch-project/OpenSearch/pull/17272))
39+
- Use Lucene `BM25Similarity` as default since the `LegacyBM25Similarity` is marked as deprecated ([#17306](https://github.com/opensearch-project/OpenSearch/pull/17306))
3940

4041
### Deprecated
4142

modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/30_inner_hits.yml

+5-5
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,11 @@ teardown:
7979

8080

8181
- match: { hits.total.value: 2 }
82-
- match: { hits.hits.0._id: "3" }
83-
- match: { hits.hits.0.inner_hits.question.hits.total.value: 0}
84-
- match: { hits.hits.1._id: "2" }
85-
- match: { hits.hits.1.inner_hits.question.hits.total.value: 1}
86-
- match: { hits.hits.1.inner_hits.question.hits.hits.0._id: "1"}
82+
- match: { hits.hits.0._id: "2" }
83+
- match: { hits.hits.0.inner_hits.question.hits.total.value: 1 }
84+
- match: { hits.hits.0.inner_hits.question.hits.hits.0._id: "1" }
85+
- match: { hits.hits.1._id: "3" }
86+
- match: { hits.hits.1.inner_hits.question.hits.total.value: 0 }
8787

8888
---
8989
"HasParent disallow expensive queries":
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
---
2+
setup:
3+
- do:
4+
indices.create:
5+
index: legacy_bm25_test
6+
body:
7+
settings:
8+
number_of_shards: 1
9+
number_of_replicas: 0
10+
similarity:
11+
default:
12+
type: LegacyBM25
13+
k1: 1.2
14+
b: 0.75
15+
mappings:
16+
properties:
17+
content:
18+
type: text
19+
- do:
20+
index:
21+
index: legacy_bm25_test
22+
id: "1"
23+
body: { "content": "This is a test document for legacy BM25 scoring" }
24+
- do:
25+
index:
26+
index: legacy_bm25_test
27+
id: "2"
28+
body: { "content": "legacy legacy legacy scoring" }
29+
- do:
30+
indices.refresh:
31+
index: legacy_bm25_test
32+
33+
---
34+
"Legacy BM25 search":
35+
- do:
36+
search:
37+
index: legacy_bm25_test
38+
body:
39+
query:
40+
match:
41+
content: "legacy"
42+
- match: { hits.total.value: 2 }
43+
- match: { hits.hits.0._id: "2" }
44+
- match: { hits.hits.1._id: "1" }
45+
46+
---
47+
teardown:
48+
- do:
49+
indices.delete:
50+
index: legacy_bm25_test

server/src/internalClusterTest/java/org/opensearch/search/basic/TransportTwoNodesSearchIT.java

+13-12
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures;
7474
import static org.opensearch.transport.client.Requests.createIndexRequest;
7575
import static org.opensearch.transport.client.Requests.searchRequest;
76+
import static org.hamcrest.Matchers.containsString;
7677
import static org.hamcrest.Matchers.equalTo;
7778
import static org.hamcrest.Matchers.instanceOf;
7879
import static org.hamcrest.Matchers.notNullValue;
@@ -179,12 +180,12 @@ public void testDfsQueryThenFetch() throws Exception {
179180
SearchHit hit = hits[i];
180181
assertThat(hit.getExplanation(), notNullValue());
181182
assertThat(hit.getExplanation().getDetails().length, equalTo(1));
182-
assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(3));
183-
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails().length, equalTo(2));
184-
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getDescription(), startsWith("n,"));
185-
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getValue(), equalTo(100L));
186-
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getDescription(), startsWith("N,"));
187-
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getValue(), equalTo(100L));
183+
assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(2));
184+
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDescription(), startsWith("idf"));
185+
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails().length, equalTo(2));
186+
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getValue(), equalTo(100L));
187+
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getValue(), equalTo(100L));
188+
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDescription(), containsString("freq"));
188189
assertThat(
189190
"id[" + hit.getId() + "] -> " + hit.getExplanation().toString(),
190191
hit.getId(),
@@ -221,12 +222,12 @@ public void testDfsQueryThenFetchWithSort() throws Exception {
221222
SearchHit hit = hits[i];
222223
assertThat(hit.getExplanation(), notNullValue());
223224
assertThat(hit.getExplanation().getDetails().length, equalTo(1));
224-
assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(3));
225-
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails().length, equalTo(2));
226-
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getDescription(), startsWith("n,"));
227-
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getValue(), equalTo(100L));
228-
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getDescription(), startsWith("N,"));
229-
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getValue(), equalTo(100L));
225+
assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(2));
226+
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDescription(), startsWith("idf"));
227+
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails().length, equalTo(2));
228+
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getValue(), equalTo(100L));
229+
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getValue(), equalTo(100L));
230+
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDescription(), containsString("freq"));
230231
assertThat("id[" + hit.getId() + "]", hit.getId(), equalTo(Integer.toString(total + i)));
231232
}
232233
total += hits.length;

server/src/internalClusterTest/java/org/opensearch/search/nested/SimpleNestedIT.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ public void testExplainWithSingleDoc() throws Exception {
496496
assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(1L));
497497
Explanation explanation = searchResponse.getHits().getHits()[0].getExplanation();
498498
assertThat(explanation.getValue(), equalTo(searchResponse.getHits().getHits()[0].getScore()));
499-
assertThat(explanation.toString(), startsWith("0.36464313 = Score based on 2 child docs in range from 0 to 1"));
499+
assertThat(explanation.toString(), startsWith("0.16574687 = Score based on 2 child docs in range from 0 to 1"));
500500
}
501501

502502
public void testSimpleNestedSorting() throws Exception {

server/src/main/java/org/opensearch/index/similarity/SimilarityProviders.java

+11-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.lucene.search.similarities.AfterEffect;
3636
import org.apache.lucene.search.similarities.AfterEffectB;
3737
import org.apache.lucene.search.similarities.AfterEffectL;
38+
import org.apache.lucene.search.similarities.BM25Similarity;
3839
import org.apache.lucene.search.similarities.BasicModel;
3940
import org.apache.lucene.search.similarities.BasicModelG;
4041
import org.apache.lucene.search.similarities.BasicModelIF;
@@ -62,6 +63,7 @@
6263
import org.apache.lucene.search.similarities.NormalizationH2;
6364
import org.apache.lucene.search.similarities.NormalizationH3;
6465
import org.apache.lucene.search.similarities.NormalizationZ;
66+
import org.apache.lucene.search.similarities.Similarity;
6567
import org.opensearch.Version;
6668
import org.opensearch.common.logging.DeprecationLogger;
6769
import org.opensearch.common.settings.Settings;
@@ -271,13 +273,21 @@ static void assertSettingsIsSubsetOf(String type, Version version, Settings sett
271273
}
272274
}
273275

274-
public static LegacyBM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
276+
public static Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
275277
assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
276278

277279
float k1 = settings.getAsFloat("k1", 1.2f);
278280
float b = settings.getAsFloat("b", 0.75f);
279281
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
280282

283+
return new BM25Similarity(k1, b, discountOverlaps);
284+
}
285+
286+
public static Similarity createLegacyBM25Similarity(Settings settings, Version indexCreatedVersion) {
287+
assertSettingsIsSubsetOf("LegacyBM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
288+
float k1 = settings.getAsFloat("k1", 1.2f);
289+
float b = settings.getAsFloat("b", 0.75f);
290+
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
281291
return new LegacyBM25Similarity(k1, b, discountOverlaps);
282292
}
283293

server/src/main/java/org/opensearch/index/similarity/SimilarityService.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.lucene.search.CollectionStatistics;
3838
import org.apache.lucene.search.Explanation;
3939
import org.apache.lucene.search.TermStatistics;
40+
import org.apache.lucene.search.similarities.BM25Similarity;
4041
import org.apache.lucene.search.similarities.BooleanSimilarity;
4142
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
4243
import org.apache.lucene.search.similarities.Similarity;
@@ -52,7 +53,6 @@
5253
import org.opensearch.index.IndexSettings;
5354
import org.opensearch.index.mapper.MappedFieldType;
5455
import org.opensearch.index.mapper.MapperService;
55-
import org.opensearch.lucene.similarity.LegacyBM25Similarity;
5656
import org.opensearch.script.ScriptService;
5757

5858
import java.util.Collections;
@@ -84,7 +84,7 @@ public final class SimilarityService extends AbstractIndexComponent {
8484
};
8585
});
8686
defaults.put("BM25", version -> {
87-
final LegacyBM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version);
87+
final Similarity similarity = new BM25Similarity();
8888
return () -> similarity;
8989
});
9090
defaults.put("boolean", version -> {
@@ -100,6 +100,7 @@ public final class SimilarityService extends AbstractIndexComponent {
100100
);
101101
});
102102
builtIn.put("BM25", (settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version));
103+
builtIn.put("LegacyBM25", (settings, version, scriptService) -> SimilarityProviders.createLegacyBM25Similarity(settings, version));
103104
builtIn.put("boolean", (settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version));
104105
builtIn.put("DFR", (settings, version, scriptService) -> SimilarityProviders.createDfrSimilarity(settings, version));
105106
builtIn.put("IB", (settings, version, scriptService) -> SimilarityProviders.createIBSimilarity(settings, version));

server/src/test/java/org/opensearch/index/similarity/SimilarityServiceTests.java

+8
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.lucene.index.FieldInvertState;
3535
import org.apache.lucene.search.CollectionStatistics;
3636
import org.apache.lucene.search.TermStatistics;
37+
import org.apache.lucene.search.similarities.BM25Similarity;
3738
import org.apache.lucene.search.similarities.BooleanSimilarity;
3839
import org.apache.lucene.search.similarities.Similarity;
3940
import org.opensearch.Version;
@@ -53,6 +54,13 @@ public void testDefaultSimilarity() {
5354
Settings settings = Settings.builder().build();
5455
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
5556
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
57+
assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
58+
}
59+
60+
public void testLegacySimilarity() {
61+
Settings settings = Settings.builder().put("index.similarity.default.type", "LegacyBM25").build();
62+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
63+
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
5664
assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class));
5765
}
5866

server/src/test/java/org/opensearch/index/similarity/SimilarityTests.java

+27-7
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.index.similarity;
3434

3535
import org.apache.lucene.search.similarities.AfterEffectL;
36+
import org.apache.lucene.search.similarities.BM25Similarity;
3637
import org.apache.lucene.search.similarities.BasicModelG;
3738
import org.apache.lucene.search.similarities.BooleanSimilarity;
3839
import org.apache.lucene.search.similarities.DFISimilarity;
@@ -72,7 +73,7 @@ protected Collection<Class<? extends Plugin>> getPlugins() {
7273

7374
public void testResolveDefaultSimilarities() {
7475
SimilarityService similarityService = createIndex("foo").similarityService();
75-
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class));
76+
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
7677
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
7778
assertThat(similarityService.getSimilarity("default"), equalTo(null));
7879
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> similarityService.getSimilarity("classic"));
@@ -83,7 +84,29 @@ public void testResolveDefaultSimilarities() {
8384
);
8485
}
8586

86-
public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException {
87+
public void testResolveLegacySimilarity() throws IOException {
88+
Settings settings = Settings.builder()
89+
.put("index.similarity.my_similarity.type", "LegacyBM25")
90+
.put("index.similarity.my_similarity.k1", 1.2f)
91+
.put("index.similarity.my_similarity.b", 0.75f)
92+
.put("index.similarity.my_similarity.discount_overlaps", false)
93+
.build();
94+
95+
XContentBuilder mapping = XContentFactory.jsonBuilder()
96+
.startObject()
97+
.startObject("properties")
98+
.startObject("dummy")
99+
.field("type", "text")
100+
.field("similarity", "my_similarity")
101+
.endObject()
102+
.endObject()
103+
.endObject();
104+
105+
MapperService mapperService = createIndex("foo", settings, "type", mapping).mapperService();
106+
assertThat(mapperService.fieldType("dummy").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class));
107+
}
108+
109+
public void testResolveSimilaritiesFromMapping_classicIsForbidden() {
87110
Settings indexSettings = Settings.builder()
88111
.put("index.similarity.my_similarity.type", "classic")
89112
.put("index.similarity.my_similarity.discount_overlaps", false)
@@ -114,12 +137,9 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException {
114137
.put("index.similarity.my_similarity.discount_overlaps", false)
115138
.build();
116139
MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService();
117-
assertThat(mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class));
140+
assertThat(mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(), instanceOf(BM25Similarity.class));
118141

119-
LegacyBM25Similarity similarity = (LegacyBM25Similarity) mapperService.fieldType("field1")
120-
.getTextSearchInfo()
121-
.getSimilarity()
122-
.get();
142+
BM25Similarity similarity = (BM25Similarity) mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get();
123143
assertThat(similarity.getK1(), equalTo(2.0f));
124144
assertThat(similarity.getB(), equalTo(0.5f));
125145
assertThat(similarity.getDiscountOverlaps(), equalTo(false));

0 commit comments

Comments
 (0)