@@ -74,6 +74,7 @@ public class SourceFieldMapper extends MetadataFieldMapper {
74
74
75
75
public static final String CONTENT_TYPE = "_source" ;
76
76
private final Function <Map <String , ?>, Map <String , Object >> filter ;
77
+ private final Function <Map <String , ?>, Map <String , Object >> recoverySourceFilter ;
77
78
78
79
/**
79
80
* Default parameters for source fields
@@ -119,21 +120,75 @@ public static class Builder extends MetadataFieldMapper.Builder {
119
120
Collections .emptyList ()
120
121
);
121
122
123
+ /**
124
+ * A mapping parameter which define whether the recovery_source should be added or not. Default value is true.
125
+ * <p>
126
+ * Recovery source gets added if source is disabled or there are filters that are applied on _source using
127
+ * {@link #includes}/{@link #excludes}, which has the possibility to change the original document provided by
128
+ * customer. Recovery source is not a permanent field and gets removed during merges. Refer this merge
129
+ * policy: org.opensearch.index.engine.RecoverySourcePruneMergePolicy
130
+ * <p>
131
+ * The main reason for adding the _recovery_source was to ensure Peer to Peer recovery if segments
132
+ * are not flushed to the disk. If you are disabling the recovery source, then ensure that you are calling
133
+ * flush operation of Opensearch periodically to ensure that segments are flushed to the disk and if required
134
+ * Peer to Peer recovery can happen using segment files rather than replaying traffic by querying Lucene
135
+ * snapshot.
136
+ *
137
+ * <p>
138
+ * This is an expert mapping parameter.
139
+ *
140
+ */
141
+ private final Parameter <Boolean > recoverySourceEnabled = Parameter .boolParam (
142
+ "recovery_source_enabled" ,
143
+ false ,
144
+ m -> toType (m ).recoverySourceEnabled ,
145
+ Defaults .ENABLED
146
+ );
147
+
148
+ /**
149
+ * Provides capability to add specific fields in the recovery_source.
150
+ * <p>
151
+ * Refer {@link #recoverySourceEnabled} for more details
152
+ * This is an expert parameter.
153
+ */
154
+ private final Parameter <List <String >> recoverySourceIncludes = Parameter .stringArrayParam (
155
+ "recovery_source_includes" ,
156
+ false ,
157
+ m -> Arrays .asList (toType (m ).recoverySourceIncludes ),
158
+ Collections .emptyList ()
159
+ );
160
+
161
+ /**
162
+ * Provides capability to remove specific fields in the recovery_source.
163
+ *
164
+ * Refer {@link #recoverySourceEnabled} for more details
165
+ * This is an expert parameter.
166
+ */
167
+ private final Parameter <List <String >> recoverySourceExcludes = Parameter .stringArrayParam (
168
+ "recovery_source_excludes" ,
169
+ false ,
170
+ m -> Arrays .asList (toType (m ).recoverySourceExcludes ),
171
+ Collections .emptyList ()
172
+ );
173
+
122
174
public Builder () {
123
175
super (Defaults .NAME );
124
176
}
125
177
126
178
@ Override
127
179
protected List <Parameter <?>> getParameters () {
128
- return Arrays .asList (enabled , includes , excludes );
180
+ return Arrays .asList (enabled , includes , excludes , recoverySourceEnabled , recoverySourceIncludes , recoverySourceExcludes );
129
181
}
130
182
131
183
@ Override
132
184
public SourceFieldMapper build (BuilderContext context ) {
133
185
return new SourceFieldMapper (
134
186
enabled .getValue (),
135
187
includes .getValue ().toArray (new String [0 ]),
136
- excludes .getValue ().toArray (new String [0 ])
188
+ excludes .getValue ().toArray (new String [0 ]),
189
+ recoverySourceEnabled .getValue (),
190
+ recoverySourceIncludes .getValue ().toArray (new String [0 ]),
191
+ recoverySourceExcludes .getValue ().toArray (new String [0 ])
137
192
);
138
193
}
139
194
}
@@ -173,24 +228,44 @@ public Query termQuery(Object value, QueryShardContext context) {
173
228
}
174
229
175
230
private final boolean enabled ;
231
+ private final boolean recoverySourceEnabled ;
176
232
/** indicates whether the source will always exist and be complete, for use by features like the update API */
177
233
private final boolean complete ;
178
234
179
235
private final String [] includes ;
180
236
private final String [] excludes ;
237
+ private final String [] recoverySourceIncludes ;
238
+ private final String [] recoverySourceExcludes ;
181
239
182
240
private SourceFieldMapper () {
183
- this (Defaults .ENABLED , Strings .EMPTY_ARRAY , Strings .EMPTY_ARRAY );
241
+ this (Defaults .ENABLED , Strings .EMPTY_ARRAY , Strings .EMPTY_ARRAY , Defaults . ENABLED , Strings . EMPTY_ARRAY , Strings . EMPTY_ARRAY );
184
242
}
185
243
186
- private SourceFieldMapper (boolean enabled , String [] includes , String [] excludes ) {
244
+ private SourceFieldMapper (
245
+ boolean enabled ,
246
+ String [] includes ,
247
+ String [] excludes ,
248
+ boolean recoverySourceEnabled ,
249
+ String [] recoverySourceIncludes ,
250
+ String [] recoverySourceExcludes
251
+ ) {
187
252
super (new SourceFieldType (enabled ));
188
253
this .enabled = enabled ;
189
254
this .includes = includes ;
190
255
this .excludes = excludes ;
191
256
final boolean filtered = CollectionUtils .isEmpty (includes ) == false || CollectionUtils .isEmpty (excludes ) == false ;
192
257
this .filter = enabled && filtered ? XContentMapValues .filter (includes , excludes ) : null ;
193
258
this .complete = enabled && CollectionUtils .isEmpty (includes ) && CollectionUtils .isEmpty (excludes );
259
+
260
+ // Set parameters for recovery source
261
+ this .recoverySourceEnabled = recoverySourceEnabled ;
262
+ this .recoverySourceIncludes = recoverySourceIncludes ;
263
+ this .recoverySourceExcludes = recoverySourceExcludes ;
264
+ final boolean recoverySourcefiltered = CollectionUtils .isEmpty (recoverySourceIncludes ) == false
265
+ || CollectionUtils .isEmpty (recoverySourceExcludes ) == false ;
266
+ this .recoverySourceFilter = this .recoverySourceEnabled && recoverySourcefiltered
267
+ ? XContentMapValues .filter (recoverySourceIncludes , recoverySourceExcludes )
268
+ : null ;
194
269
}
195
270
196
271
public boolean enabled () {
@@ -212,22 +287,40 @@ public void preParse(ParseContext context) throws IOException {
212
287
context .doc ().add (new StoredField (fieldType ().name (), ref .bytes , ref .offset , ref .length ));
213
288
}
214
289
215
- if (originalSource != null && adaptedSource != originalSource ) {
216
- // if we omitted source or modified it we add the _recovery_source to ensure we have it for ops based recovery
217
- BytesRef ref = originalSource .toBytesRef ();
218
- context .doc ().add (new StoredField (RECOVERY_SOURCE_NAME , ref .bytes , ref .offset , ref .length ));
219
- context .doc ().add (new NumericDocValuesField (RECOVERY_SOURCE_NAME , 1 ));
290
+ if (recoverySourceEnabled ) {
291
+ if (originalSource != null && adaptedSource != originalSource ) {
292
+ final BytesReference adaptedRecoverySource = applyFilters (
293
+ originalSource ,
294
+ contentType ,
295
+ recoverySourceEnabled ,
296
+ recoverySourceFilter
297
+ );
298
+ // if we omitted source or modified it we add the _recovery_source to ensure we have it for ops based recovery
299
+ BytesRef ref = adaptedRecoverySource .toBytesRef ();
300
+ context .doc ().add (new StoredField (RECOVERY_SOURCE_NAME , ref .bytes , ref .offset , ref .length ));
301
+ context .doc ().add (new NumericDocValuesField (RECOVERY_SOURCE_NAME , 1 ));
302
+ }
220
303
}
221
304
}
222
305
223
306
@ Nullable
224
307
public BytesReference applyFilters (@ Nullable BytesReference originalSource , @ Nullable MediaType contentType ) throws IOException {
225
- if (enabled && originalSource != null ) {
308
+ return applyFilters (originalSource , contentType , enabled , filter );
309
+ }
310
+
311
+ @ Nullable
312
+ private BytesReference applyFilters (
313
+ @ Nullable BytesReference originalSource ,
314
+ @ Nullable MediaType contentType ,
315
+ boolean isProvidedSourceEnabled ,
316
+ @ Nullable final Function <Map <String , ?>, Map <String , Object >> filters
317
+ ) throws IOException {
318
+ if (isProvidedSourceEnabled && originalSource != null ) {
226
319
// Percolate and tv APIs may not set the source and that is ok, because these APIs will not index any data
227
- if (filter != null ) {
320
+ if (filters != null ) {
228
321
// we don't update the context source if we filter, we want to keep it as is...
229
322
Tuple <? extends MediaType , Map <String , Object >> mapTuple = XContentHelper .convertToMap (originalSource , true , contentType );
230
- Map <String , Object > filteredSource = filter .apply (mapTuple .v2 ());
323
+ Map <String , Object > filteredSource = filters .apply (mapTuple .v2 ());
231
324
BytesStreamOutput bStream = new BytesStreamOutput ();
232
325
MediaType actualContentType = mapTuple .v1 ();
233
326
XContentBuilder builder = MediaTypeRegistry .contentBuilder (actualContentType , bStream ).map (filteredSource );
0 commit comments