Skip to content

Commit 993927a

Browse files
[DerivedFields] DerivedFieldScript and query execution logic (#12746) (#12968)
First in a series of commits to support derived fields, a form of schema-on-read. This commit adds: 1. DerivedFieldScript factory: This script factory will be used to execute scripts defined against derived fields of any type. 2. DerivedFieldValueFetcher: The value fetcher contains logic to execute script and fetch the value in form of List<Object>. It expects DerivedFieldScript.LeafFactory as an input and sets the contract with consumer to call setNextReader() whenever a segment is switched. 3. DerivedFieldQuery: This query will be used by any of the derived fields. It expects an input query and DerivedFieldValueFetcher. It uses 2-phase iterator approach with approximation iterator set to match all docs. On a match, it creates a lucene MemoryIndex for a given doc, fetches the value of the derived field from _source using DerivedFieldValueFetcher and executes the input query against. --------- (cherry picked from commit 70711cf) Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 6919ee1 commit 993927a

File tree

7 files changed

+417
-2
lines changed

7 files changed

+417
-2
lines changed

modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/71_context_api.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
- do:
33
scripts_painless_context: {}
44
- match: { contexts.0: aggregation_selector}
5-
- match: { contexts.23: update}
5+
- match: { contexts.24: update}
66
---
77

88
"Action to get all API values for score context":
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.mapper;
10+
11+
import org.apache.lucene.index.LeafReaderContext;
12+
import org.opensearch.script.DerivedFieldScript;
13+
import org.opensearch.search.lookup.SourceLookup;
14+
15+
import java.io.IOException;
16+
import java.util.List;
17+
18+
/**
19+
* The value fetcher contains logic to execute script and fetch the value in form of list of object.
20+
* It expects DerivedFieldScript.LeafFactory as an input and sets the contract with consumer to call
21+
* {@link #setNextReader(LeafReaderContext)} whenever a segment is switched.
22+
*/
23+
public final class DerivedFieldValueFetcher implements ValueFetcher {
24+
private DerivedFieldScript derivedFieldScript;
25+
private final DerivedFieldScript.LeafFactory derivedFieldScriptFactory;
26+
27+
public DerivedFieldValueFetcher(DerivedFieldScript.LeafFactory derivedFieldScriptFactory) {
28+
this.derivedFieldScriptFactory = derivedFieldScriptFactory;
29+
}
30+
31+
@Override
32+
public List<Object> fetchValues(SourceLookup lookup) {
33+
derivedFieldScript.setDocument(lookup.docId());
34+
// TODO: remove List.of() when derivedFieldScript.execute() returns list of objects.
35+
return List.of(derivedFieldScript.execute());
36+
}
37+
38+
public void setNextReader(LeafReaderContext context) {
39+
try {
40+
derivedFieldScript = derivedFieldScriptFactory.newInstance(context);
41+
} catch (IOException e) {
42+
throw new RuntimeException(e);
43+
}
44+
}
45+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.query;
10+
11+
import org.apache.lucene.analysis.Analyzer;
12+
import org.apache.lucene.index.IndexableField;
13+
import org.apache.lucene.index.LeafReaderContext;
14+
import org.apache.lucene.index.memory.MemoryIndex;
15+
import org.apache.lucene.search.ConstantScoreScorer;
16+
import org.apache.lucene.search.ConstantScoreWeight;
17+
import org.apache.lucene.search.DocIdSetIterator;
18+
import org.apache.lucene.search.IndexSearcher;
19+
import org.apache.lucene.search.Query;
20+
import org.apache.lucene.search.QueryVisitor;
21+
import org.apache.lucene.search.ScoreMode;
22+
import org.apache.lucene.search.Scorer;
23+
import org.apache.lucene.search.TwoPhaseIterator;
24+
import org.apache.lucene.search.Weight;
25+
import org.opensearch.index.mapper.DerivedFieldValueFetcher;
26+
import org.opensearch.search.lookup.LeafSearchLookup;
27+
import org.opensearch.search.lookup.SearchLookup;
28+
29+
import java.io.IOException;
30+
import java.util.List;
31+
import java.util.Objects;
32+
import java.util.function.Function;
33+
34+
/**
35+
* DerivedFieldQuery used for querying derived fields. It contains the logic to execute an input lucene query against
36+
* DerivedField. It also accepts DerivedFieldValueFetcher and SearchLookup as an input.
37+
*/
38+
public final class DerivedFieldQuery extends Query {
39+
private final Query query;
40+
private final DerivedFieldValueFetcher valueFetcher;
41+
private final SearchLookup searchLookup;
42+
private final Function<Object, IndexableField> indexableFieldGenerator;
43+
private final Analyzer indexAnalyzer;
44+
45+
/**
46+
* @param query lucene query to be executed against the derived field
47+
* @param valueFetcher DerivedFieldValueFetcher ValueFetcher to fetch the value of a derived field from _source
48+
* using LeafSearchLookup
49+
* @param searchLookup SearchLookup to get the LeafSearchLookup look used by valueFetcher to fetch the _source
50+
* @param indexableFieldGenerator used to generate lucene IndexableField from a given object fetched by valueFetcher
51+
* to be used in lucene memory index.
52+
*/
53+
public DerivedFieldQuery(
54+
Query query,
55+
DerivedFieldValueFetcher valueFetcher,
56+
SearchLookup searchLookup,
57+
Function<Object, IndexableField> indexableFieldGenerator,
58+
Analyzer indexAnalyzer
59+
) {
60+
this.query = query;
61+
this.valueFetcher = valueFetcher;
62+
this.searchLookup = searchLookup;
63+
this.indexableFieldGenerator = indexableFieldGenerator;
64+
this.indexAnalyzer = indexAnalyzer;
65+
}
66+
67+
@Override
68+
public void visit(QueryVisitor visitor) {
69+
query.visit(visitor);
70+
}
71+
72+
@Override
73+
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
74+
Query rewritten = indexSearcher.rewrite(query);
75+
if (rewritten == query) {
76+
return this;
77+
}
78+
return new DerivedFieldQuery(rewritten, valueFetcher, searchLookup, indexableFieldGenerator, indexAnalyzer);
79+
}
80+
81+
@Override
82+
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
83+
84+
return new ConstantScoreWeight(this, boost) {
85+
@Override
86+
public Scorer scorer(LeafReaderContext context) {
87+
DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
88+
valueFetcher.setNextReader(context);
89+
LeafSearchLookup leafSearchLookup = searchLookup.getLeafSearchLookup(context);
90+
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
91+
@Override
92+
public boolean matches() {
93+
leafSearchLookup.source().setSegmentAndDocument(context, approximation.docID());
94+
List<Object> values = valueFetcher.fetchValues(leafSearchLookup.source());
95+
// TODO: in case of errors from script, should it be ignored and treated as missing field
96+
// by using a configurable setting?
97+
MemoryIndex memoryIndex = new MemoryIndex();
98+
for (Object value : values) {
99+
memoryIndex.addField(indexableFieldGenerator.apply(value), indexAnalyzer);
100+
}
101+
float score = memoryIndex.search(query);
102+
return score > 0.0f;
103+
}
104+
105+
@Override
106+
public float matchCost() {
107+
// TODO: how can we compute this?
108+
return 1000f;
109+
}
110+
};
111+
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
112+
}
113+
114+
@Override
115+
public boolean isCacheable(LeafReaderContext ctx) {
116+
return false;
117+
}
118+
};
119+
}
120+
121+
@Override
122+
public boolean equals(Object o) {
123+
if (this == o) {
124+
return true;
125+
}
126+
if (sameClassAs(o) == false) {
127+
return false;
128+
}
129+
DerivedFieldQuery other = (DerivedFieldQuery) o;
130+
return Objects.equals(this.query, other.query)
131+
&& Objects.equals(this.valueFetcher, other.valueFetcher)
132+
&& Objects.equals(this.searchLookup, other.searchLookup)
133+
&& Objects.equals(this.indexableFieldGenerator, other.indexableFieldGenerator)
134+
&& Objects.equals(this.indexAnalyzer, other.indexAnalyzer);
135+
}
136+
137+
@Override
138+
public int hashCode() {
139+
return Objects.hash(classHash(), query, valueFetcher, searchLookup, indexableFieldGenerator, indexableFieldGenerator);
140+
}
141+
142+
@Override
143+
public String toString(String f) {
144+
return "DerivedFieldQuery (Query: [ " + query.toString(f) + "])";
145+
}
146+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.script;
10+
11+
import org.apache.lucene.index.LeafReaderContext;
12+
import org.opensearch.common.logging.DeprecationLogger;
13+
import org.opensearch.index.fielddata.ScriptDocValues;
14+
import org.opensearch.search.lookup.LeafSearchLookup;
15+
import org.opensearch.search.lookup.SearchLookup;
16+
import org.opensearch.search.lookup.SourceLookup;
17+
18+
import java.io.IOException;
19+
import java.util.HashMap;
20+
import java.util.Map;
21+
import java.util.function.Function;
22+
23+
/**
24+
* Definition of Script for DerivedField.
25+
* It will be used to execute scripts defined against derived fields of any type
26+
*
27+
* @opensearch.internal
28+
*/
29+
public abstract class DerivedFieldScript {
30+
31+
public static final String[] PARAMETERS = {};
32+
public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("derived_field", Factory.class);
33+
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(DynamicMap.class);
34+
35+
private static final Map<String, Function<Object, Object>> PARAMS_FUNCTIONS = Map.of(
36+
"doc",
37+
value -> value,
38+
"_source",
39+
value -> ((SourceLookup) value).loadSourceIfNeeded()
40+
);
41+
42+
/**
43+
* The generic runtime parameters for the script.
44+
*/
45+
private final Map<String, Object> params;
46+
47+
/**
48+
* A leaf lookup for the bound segment this script will operate on.
49+
*/
50+
private final LeafSearchLookup leafLookup;
51+
52+
public DerivedFieldScript(Map<String, Object> params, SearchLookup lookup, LeafReaderContext leafContext) {
53+
Map<String, Object> parameters = new HashMap<>(params);
54+
this.leafLookup = lookup.getLeafSearchLookup(leafContext);
55+
parameters.putAll(leafLookup.asMap());
56+
this.params = new DynamicMap(parameters, PARAMS_FUNCTIONS);
57+
}
58+
59+
protected DerivedFieldScript() {
60+
params = null;
61+
leafLookup = null;
62+
}
63+
64+
/**
65+
* Return the parameters for this script.
66+
*/
67+
public Map<String, Object> getParams() {
68+
return params;
69+
}
70+
71+
/**
72+
* The doc lookup for the Lucene segment this script was created for.
73+
*/
74+
public Map<String, ScriptDocValues<?>> getDoc() {
75+
return leafLookup.doc();
76+
}
77+
78+
/**
79+
* Set the current document to run the script on next.
80+
*/
81+
public void setDocument(int docid) {
82+
leafLookup.setDocument(docid);
83+
}
84+
85+
public abstract Object execute();
86+
87+
/**
88+
* A factory to construct {@link DerivedFieldScript} instances.
89+
*
90+
* @opensearch.internal
91+
*/
92+
public interface LeafFactory {
93+
DerivedFieldScript newInstance(LeafReaderContext ctx) throws IOException;
94+
}
95+
96+
/**
97+
* A factory to construct stateful {@link DerivedFieldScript} factories for a specific index.
98+
*
99+
* @opensearch.internal
100+
*/
101+
public interface Factory extends ScriptFactory {
102+
LeafFactory newFactory(Map<String, Object> params, SearchLookup lookup);
103+
}
104+
}

server/src/main/java/org/opensearch/script/ScriptModule.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ public class ScriptModule {
7878
ScriptedMetricAggContexts.MapScript.CONTEXT,
7979
ScriptedMetricAggContexts.CombineScript.CONTEXT,
8080
ScriptedMetricAggContexts.ReduceScript.CONTEXT,
81-
IntervalFilterScript.CONTEXT
81+
IntervalFilterScript.CONTEXT,
82+
DerivedFieldScript.CONTEXT
8283
).collect(Collectors.toMap(c -> c.name, Function.identity()));
8384
}
8485

0 commit comments

Comments
 (0)