Skip to content

Commit e8c5daf

Browse files
[Derived Fields] Add support for emitting multiple values in DerivedFieldScripts (opensearch-project#12837)
--------- Signed-off-by: Mohammad Qureshi <qreshi@amazon.com> Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> Co-authored-by: Rishabh Maurya <rishabhmaurya05@gmail.com>
1 parent 434dc61 commit e8c5daf

File tree

9 files changed

+381
-15
lines changed

9 files changed

+381
-15
lines changed

modules/lang-painless/src/main/java/org/opensearch/painless/PainlessModulePlugin.java

+6
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import org.opensearch.repositories.RepositoriesService;
6161
import org.opensearch.rest.RestController;
6262
import org.opensearch.rest.RestHandler;
63+
import org.opensearch.script.DerivedFieldScript;
6364
import org.opensearch.script.IngestScript;
6465
import org.opensearch.script.ScoreScript;
6566
import org.opensearch.script.ScriptContext;
@@ -108,6 +109,11 @@ public final class PainlessModulePlugin extends Plugin implements ScriptPlugin,
108109
ingest.add(AllowlistLoader.loadFromResourceFiles(Allowlist.class, "org.opensearch.ingest.txt"));
109110
map.put(IngestScript.CONTEXT, ingest);
110111

112+
// Functions available to derived fields
113+
List<Allowlist> derived = new ArrayList<>(Allowlist.BASE_ALLOWLISTS);
114+
derived.add(AllowlistLoader.loadFromResourceFiles(Allowlist.class, "org.opensearch.derived.txt"));
115+
map.put(DerivedFieldScript.CONTEXT, derived);
116+
111117
allowlists = map;
112118
}
113119

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# The OpenSearch Contributors require contributions made to
5+
# this file be licensed under the Apache-2.0 license or a
6+
# compatible open source license.
7+
#
8+
9+
# This file contains an allowlist for functions to be used in derived field context
10+
11+
class org.opensearch.script.DerivedFieldScript @no_import {
12+
}
13+
14+
static_import {
15+
void emit(org.opensearch.script.DerivedFieldScript, Object) bound_to org.opensearch.script.ScriptEmitValues$EmitSingle
16+
void emit(org.opensearch.script.DerivedFieldScript, double, double) bound_to org.opensearch.script.ScriptEmitValues$GeoPoint
17+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.painless;
10+
11+
import org.apache.lucene.index.LeafReaderContext;
12+
import org.apache.lucene.index.memory.MemoryIndex;
13+
import org.opensearch.common.collect.Tuple;
14+
import org.opensearch.common.geo.GeoPoint;
15+
import org.opensearch.common.settings.Settings;
16+
import org.opensearch.index.fielddata.IndexGeoPointFieldData;
17+
import org.opensearch.index.fielddata.IndexNumericFieldData;
18+
import org.opensearch.index.fielddata.LeafGeoPointFieldData;
19+
import org.opensearch.index.fielddata.LeafNumericFieldData;
20+
import org.opensearch.index.fielddata.MultiGeoPointValues;
21+
import org.opensearch.index.fielddata.SortedNumericDoubleValues;
22+
import org.opensearch.index.fielddata.plain.AbstractLeafGeoPointFieldData;
23+
import org.opensearch.index.fielddata.plain.LeafDoubleFieldData;
24+
import org.opensearch.index.mapper.GeoPointFieldMapper.GeoPointFieldType;
25+
import org.opensearch.index.mapper.MapperService;
26+
import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType;
27+
import org.opensearch.index.mapper.NumberFieldMapper.NumberType;
28+
import org.opensearch.painless.spi.Allowlist;
29+
import org.opensearch.painless.spi.AllowlistLoader;
30+
import org.opensearch.script.DerivedFieldScript;
31+
import org.opensearch.script.ScriptContext;
32+
import org.opensearch.script.ScriptException;
33+
import org.opensearch.search.lookup.LeafSearchLookup;
34+
import org.opensearch.search.lookup.SearchLookup;
35+
36+
import java.io.IOException;
37+
import java.util.ArrayList;
38+
import java.util.Collections;
39+
import java.util.List;
40+
import java.util.Map;
41+
42+
import static org.mockito.ArgumentMatchers.any;
43+
import static org.mockito.ArgumentMatchers.anyInt;
44+
import static org.mockito.Mockito.mock;
45+
import static org.mockito.Mockito.when;
46+
47+
public class DerivedFieldScriptTests extends ScriptTestCase {
48+
49+
private static PainlessScriptEngine SCRIPT_ENGINE;
50+
51+
@Override
52+
public void setUp() throws Exception {
53+
super.setUp();
54+
55+
// Adding derived field script to the contexts for the script engine
56+
Map<ScriptContext<?>, List<Allowlist>> contexts = newDefaultContexts();
57+
List<Allowlist> allowlists = new ArrayList<>(Allowlist.BASE_ALLOWLISTS);
58+
allowlists.add(AllowlistLoader.loadFromResourceFiles(Allowlist.class, "org.opensearch.derived.txt"));
59+
contexts.put(DerivedFieldScript.CONTEXT, allowlists);
60+
61+
SCRIPT_ENGINE = new PainlessScriptEngine(Settings.EMPTY, contexts);
62+
}
63+
64+
@Override
65+
public void tearDown() throws Exception {
66+
super.tearDown();
67+
SCRIPT_ENGINE = null;
68+
}
69+
70+
@Override
71+
protected PainlessScriptEngine getEngine() {
72+
return SCRIPT_ENGINE;
73+
}
74+
75+
private DerivedFieldScript.LeafFactory compile(String expression, SearchLookup lookup) {
76+
DerivedFieldScript.Factory factory = SCRIPT_ENGINE.compile(
77+
"derived_script_test",
78+
expression,
79+
DerivedFieldScript.CONTEXT,
80+
Collections.emptyMap()
81+
);
82+
return factory.newFactory(Collections.emptyMap(), lookup);
83+
}
84+
85+
public void testEmittingDoubleField() throws IOException {
86+
// Mocking field value to be returned
87+
NumberFieldType fieldType = new NumberFieldType("test_double_field", NumberType.DOUBLE);
88+
MapperService mapperService = mock(MapperService.class);
89+
when(mapperService.fieldType("test_double_field")).thenReturn(fieldType);
90+
91+
SortedNumericDoubleValues doubleValues = mock(SortedNumericDoubleValues.class);
92+
when(doubleValues.docValueCount()).thenReturn(1);
93+
when(doubleValues.advanceExact(anyInt())).thenReturn(true);
94+
when(doubleValues.nextValue()).thenReturn(2.718);
95+
96+
LeafNumericFieldData atomicFieldData = mock(LeafDoubleFieldData.class); // SortedNumericDoubleFieldData
97+
when(atomicFieldData.getDoubleValues()).thenReturn(doubleValues);
98+
99+
IndexNumericFieldData fieldData = mock(IndexNumericFieldData.class); // SortedNumericIndexFieldData
100+
when(fieldData.getFieldName()).thenReturn("test_double_field");
101+
when(fieldData.load(any())).thenReturn(atomicFieldData);
102+
103+
SearchLookup lookup = new SearchLookup(mapperService, (ignored, searchLookup) -> fieldData);
104+
105+
// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
106+
MemoryIndex index = new MemoryIndex();
107+
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);
108+
109+
// Execute the script
110+
DerivedFieldScript script = compile("emit(doc['test_double_field'].value)", lookup).newInstance(leafReaderContext);
111+
script.setDocument(1);
112+
script.execute();
113+
114+
List<Object> result = script.getEmittedValues();
115+
assertEquals(List.of(2.718), result);
116+
}
117+
118+
public void testEmittingGeoPoint() throws IOException {
119+
// Mocking field value to be returned
120+
GeoPointFieldType fieldType = new GeoPointFieldType("test_geo_field");
121+
MapperService mapperService = mock(MapperService.class);
122+
when(mapperService.fieldType("test_geo_field")).thenReturn(fieldType);
123+
124+
MultiGeoPointValues geoPointValues = mock(MultiGeoPointValues.class);
125+
when(geoPointValues.docValueCount()).thenReturn(1);
126+
when(geoPointValues.advanceExact(anyInt())).thenReturn(true);
127+
when(geoPointValues.nextValue()).thenReturn(new GeoPoint(5, 8));
128+
129+
LeafGeoPointFieldData atomicFieldData = mock(AbstractLeafGeoPointFieldData.class); // LatLonPointDVLeafFieldData
130+
when(atomicFieldData.getGeoPointValues()).thenReturn(geoPointValues);
131+
132+
IndexGeoPointFieldData fieldData = mock(IndexGeoPointFieldData.class);
133+
when(fieldData.getFieldName()).thenReturn("test_geo_field");
134+
when(fieldData.load(any())).thenReturn(atomicFieldData);
135+
136+
SearchLookup lookup = new SearchLookup(mapperService, (ignored, searchLookup) -> fieldData);
137+
138+
// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
139+
MemoryIndex index = new MemoryIndex();
140+
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);
141+
142+
// Execute the script
143+
DerivedFieldScript script = compile("emit(doc['test_geo_field'].value.getLat(), doc['test_geo_field'].value.getLon())", lookup)
144+
.newInstance(leafReaderContext);
145+
script.setDocument(1);
146+
script.execute();
147+
148+
List<Object> result = script.getEmittedValues();
149+
assertEquals(List.of(new Tuple<>(5.0, 8.0)), result);
150+
}
151+
152+
public void testEmittingMultipleValues() throws IOException {
153+
SearchLookup lookup = mock(SearchLookup.class);
154+
155+
// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
156+
MemoryIndex index = new MemoryIndex();
157+
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);
158+
159+
LeafSearchLookup leafSearchLookup = mock(LeafSearchLookup.class);
160+
when(lookup.getLeafSearchLookup(leafReaderContext)).thenReturn(leafSearchLookup);
161+
162+
// Execute the script
163+
DerivedFieldScript script = compile(
164+
"def l = new ArrayList(); l.add('test'); l.add('multiple'); l.add('values'); for (String x : l) emit(x)",
165+
lookup
166+
).newInstance(leafReaderContext);
167+
script.setDocument(1);
168+
script.execute();
169+
170+
List<Object> result = script.getEmittedValues();
171+
assertEquals(List.of("test", "multiple", "values"), result);
172+
}
173+
174+
public void testExceedingByteSizeLimit() throws IOException {
175+
SearchLookup lookup = mock(SearchLookup.class);
176+
177+
// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
178+
MemoryIndex index = new MemoryIndex();
179+
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);
180+
181+
LeafSearchLookup leafSearchLookup = mock(LeafSearchLookup.class);
182+
when(lookup.getLeafSearchLookup(leafReaderContext)).thenReturn(leafSearchLookup);
183+
184+
// Emitting a large string to exceed the byte size limit
185+
DerivedFieldScript stringScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit('a' + i);", lookup).newInstance(
186+
leafReaderContext
187+
);
188+
expectThrows(ScriptException.class, () -> {
189+
stringScript.setDocument(1);
190+
stringScript.execute();
191+
});
192+
193+
// Emitting an integer to check byte size limit
194+
DerivedFieldScript intScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(42)", lookup).newInstance(leafReaderContext);
195+
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
196+
intScript.setDocument(1);
197+
intScript.execute();
198+
});
199+
200+
// Emitting a long to check byte size limit
201+
DerivedFieldScript longScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(1234567890123456789L)", lookup).newInstance(
202+
leafReaderContext
203+
);
204+
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
205+
longScript.setDocument(1);
206+
longScript.execute();
207+
});
208+
209+
// Emitting a double to check byte size limit
210+
DerivedFieldScript doubleScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(3.14159)", lookup).newInstance(
211+
leafReaderContext
212+
);
213+
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
214+
doubleScript.setDocument(1);
215+
doubleScript.execute();
216+
});
217+
218+
// Emitting a GeoPoint to check byte size limit
219+
DerivedFieldScript geoPointScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(1.23, 4.56);", lookup).newInstance(
220+
leafReaderContext
221+
);
222+
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
223+
geoPointScript.setDocument(1);
224+
geoPointScript.execute();
225+
});
226+
}
227+
}

server/src/main/java/org/opensearch/index/mapper/DerivedFieldValueFetcher.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ public DerivedFieldValueFetcher(DerivedFieldScript.LeafFactory derivedFieldScrip
3131
@Override
3232
public List<Object> fetchValues(SourceLookup lookup) {
3333
derivedFieldScript.setDocument(lookup.docId());
34-
// TODO: remove List.of() when derivedFieldScript.execute() returns list of objects.
35-
return List.of(derivedFieldScript.execute());
34+
derivedFieldScript.execute();
35+
return derivedFieldScript.getEmittedValues();
3636
}
3737

3838
public void setNextReader(LeafReaderContext context) {

server/src/main/java/org/opensearch/script/DerivedFieldScript.java

+60-7
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,17 @@
99
package org.opensearch.script;
1010

1111
import org.apache.lucene.index.LeafReaderContext;
12-
import org.opensearch.common.logging.DeprecationLogger;
12+
import org.opensearch.common.collect.Tuple;
1313
import org.opensearch.index.fielddata.ScriptDocValues;
1414
import org.opensearch.search.lookup.LeafSearchLookup;
1515
import org.opensearch.search.lookup.SearchLookup;
1616
import org.opensearch.search.lookup.SourceLookup;
1717

1818
import java.io.IOException;
19+
import java.nio.charset.StandardCharsets;
20+
import java.util.ArrayList;
1921
import java.util.HashMap;
22+
import java.util.List;
2023
import java.util.Map;
2124
import java.util.function.Function;
2225

@@ -30,7 +33,7 @@ public abstract class DerivedFieldScript {
3033

3134
public static final String[] PARAMETERS = {};
3235
public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("derived_field", Factory.class);
33-
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(DynamicMap.class);
36+
private static final int MAX_BYTE_SIZE = 1024 * 1024; // Maximum allowed byte size (1 MB)
3437

3538
private static final Map<String, Function<Object, Object>> PARAMS_FUNCTIONS = Map.of(
3639
"doc",
@@ -49,16 +52,27 @@ public abstract class DerivedFieldScript {
4952
*/
5053
private final LeafSearchLookup leafLookup;
5154

55+
/**
56+
* The field values emitted from the script.
57+
*/
58+
private List<Object> emittedValues;
59+
60+
private int totalByteSize;
61+
5262
public DerivedFieldScript(Map<String, Object> params, SearchLookup lookup, LeafReaderContext leafContext) {
5363
Map<String, Object> parameters = new HashMap<>(params);
5464
this.leafLookup = lookup.getLeafSearchLookup(leafContext);
5565
parameters.putAll(leafLookup.asMap());
5666
this.params = new DynamicMap(parameters, PARAMS_FUNCTIONS);
67+
this.emittedValues = new ArrayList<>();
68+
this.totalByteSize = 0;
5769
}
5870

59-
protected DerivedFieldScript() {
60-
params = null;
61-
leafLookup = null;
71+
public DerivedFieldScript() {
72+
this.params = null;
73+
this.leafLookup = null;
74+
this.emittedValues = new ArrayList<>();
75+
this.totalByteSize = 0;
6276
}
6377

6478
/**
@@ -75,14 +89,54 @@ public Map<String, ScriptDocValues<?>> getDoc() {
7589
return leafLookup.doc();
7690
}
7791

92+
/**
93+
* Return the emitted values from the script execution.
94+
*/
95+
public List<Object> getEmittedValues() {
96+
return emittedValues;
97+
}
98+
7899
/**
79100
* Set the current document to run the script on next.
101+
* Clears the emittedValues as well since they should be scoped per document.
80102
*/
81103
public void setDocument(int docid) {
104+
this.emittedValues = new ArrayList<>();
105+
this.totalByteSize = 0;
82106
leafLookup.setDocument(docid);
83107
}
84108

85-
public abstract Object execute();
109+
public void addEmittedValue(Object o) {
110+
int byteSize = getObjectByteSize(o);
111+
int newTotalByteSize = totalByteSize + byteSize;
112+
if (newTotalByteSize <= MAX_BYTE_SIZE) {
113+
emittedValues.add(o);
114+
totalByteSize = newTotalByteSize;
115+
} else {
116+
throw new IllegalStateException("Exceeded maximum allowed byte size for emitted values");
117+
}
118+
}
119+
120+
private int getObjectByteSize(Object obj) {
121+
if (obj instanceof String) {
122+
return ((String) obj).getBytes(StandardCharsets.UTF_8).length;
123+
} else if (obj instanceof Integer) {
124+
return Integer.BYTES;
125+
} else if (obj instanceof Long) {
126+
return Long.BYTES;
127+
} else if (obj instanceof Double) {
128+
return Double.BYTES;
129+
} else if (obj instanceof Boolean) {
130+
return Byte.BYTES; // Assuming 1 byte for boolean
131+
} else if (obj instanceof Tuple) {
132+
// Assuming each element in the tuple is a double for GeoPoint case
133+
return Double.BYTES * 2;
134+
} else {
135+
throw new IllegalArgumentException("Unsupported object type passed in emit()");
136+
}
137+
}
138+
139+
public void execute() {}
86140

87141
/**
88142
* A factory to construct {@link DerivedFieldScript} instances.
@@ -95,7 +149,6 @@ public interface LeafFactory {
95149

96150
/**
97151
* A factory to construct stateful {@link DerivedFieldScript} factories for a specific index.
98-
*
99152
* @opensearch.internal
100153
*/
101154
public interface Factory extends ScriptFactory {

0 commit comments

Comments
 (0)