Skip to content

Commit 78a304a

Browse files
authored
[Enhancement] Fetch system index mappings from json file instead of string constants (opensearch-project#3153)
* feat(index mappings): fetch mappings and version from json file instead of string constants Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: changing exception being thrown Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * chore: remove unused file Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * chore: fix typo in comment Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * chore: adding new line at the end of files Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * feat: add test cases Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix: remove test code Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix(test): in main the versions were not updated appropriately Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: move mapping templates under common module Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: ensure that conversationindexconstants reference mlindex enums rather than use their own mappings Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: update comment Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: rename dir from mappings to index-mappings Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix: add null checks Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix: adding dependencies for testing Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix(test): compare json object rather than strings to avoid eol character issue Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: combine if statements into single check Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactoring: null handling + clean code Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * spotless apply Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> --------- Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>
1 parent f9cbf15 commit 78a304a

File tree

22 files changed

+912
-635
lines changed

22 files changed

+912
-635
lines changed

common/build.gradle

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ dependencies {
2626
compileOnly group: 'org.apache.commons', name: 'commons-text', version: '1.10.0'
2727
compileOnly group: 'com.google.code.gson', name: 'gson', version: '2.10.1'
2828
compileOnly group: 'org.json', name: 'json', version: '20231013'
29+
testImplementation group: 'org.json', name: 'json', version: '20231013'
2930
implementation('com.google.guava:guava:32.1.2-jre') {
3031
exclude group: 'com.google.guava', module: 'failureaccess'
3132
exclude group: 'com.google.code.findbugs', module: 'jsr305'

common/src/main/java/org/opensearch/ml/common/CommonValue.java

+10-529
Large diffs are not rendered by default.

ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java common/src/main/java/org/opensearch/ml/common/MLIndex.java

+40-31
Original file line numberDiff line numberDiff line change
@@ -3,58 +3,67 @@
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

6-
package org.opensearch.ml.engine.indices;
6+
package org.opensearch.ml.common;
77

88
import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX;
9-
import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING;
10-
import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_SCHEMA_VERSION;
9+
import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING_PATH;
1110
import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX;
12-
import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING;
13-
import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_SCHEMA_VERSION;
11+
import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING_PATH;
1412
import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX;
15-
import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING;
16-
import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_SCHEMA_VERSION;
13+
import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING_PATH;
1714
import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX;
18-
import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_MAPPING;
19-
import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_SCHEMA_VERSION;
15+
import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_MAPPING_PATH;
2016
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX;
21-
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_MAPPING;
22-
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION;
17+
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH;
2318
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX;
24-
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_MAPPING;
25-
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_SCHEMA_VERSION;
19+
import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_MAPPING_PATH;
2620
import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX;
27-
import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING;
28-
import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_SCHEMA_VERSION;
21+
import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING_PATH;
2922
import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX;
30-
import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING;
31-
import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_SCHEMA_VERSION;
23+
import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING_PATH;
3224
import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX;
33-
import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_MAPPING;
34-
import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_SCHEMA_VERSION;
25+
import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_MAPPING_PATH;
26+
27+
import java.io.IOException;
28+
import java.io.UncheckedIOException;
29+
30+
import org.opensearch.ml.common.utils.IndexUtils;
3531

3632
public enum MLIndex {
37-
MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING, ML_MODEL_GROUP_INDEX_SCHEMA_VERSION),
38-
MODEL(ML_MODEL_INDEX, false, ML_MODEL_INDEX_MAPPING, ML_MODEL_INDEX_SCHEMA_VERSION),
39-
TASK(ML_TASK_INDEX, false, ML_TASK_INDEX_MAPPING, ML_TASK_INDEX_SCHEMA_VERSION),
40-
CONNECTOR(ML_CONNECTOR_INDEX, false, ML_CONNECTOR_INDEX_MAPPING, ML_CONNECTOR_SCHEMA_VERSION),
41-
CONFIG(ML_CONFIG_INDEX, false, ML_CONFIG_INDEX_MAPPING, ML_CONFIG_INDEX_SCHEMA_VERSION),
42-
CONTROLLER(ML_CONTROLLER_INDEX, false, ML_CONTROLLER_INDEX_MAPPING, ML_CONTROLLER_INDEX_SCHEMA_VERSION),
43-
AGENT(ML_AGENT_INDEX, false, ML_AGENT_INDEX_MAPPING, ML_AGENT_INDEX_SCHEMA_VERSION),
44-
MEMORY_META(ML_MEMORY_META_INDEX, false, ML_MEMORY_META_INDEX_MAPPING, ML_MEMORY_META_INDEX_SCHEMA_VERSION),
45-
MEMORY_MESSAGE(ML_MEMORY_MESSAGE_INDEX, false, ML_MEMORY_MESSAGE_INDEX_MAPPING, ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION);
33+
MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING_PATH),
34+
MODEL(ML_MODEL_INDEX, false, ML_MODEL_INDEX_MAPPING_PATH),
35+
TASK(ML_TASK_INDEX, false, ML_TASK_INDEX_MAPPING_PATH),
36+
CONNECTOR(ML_CONNECTOR_INDEX, false, ML_CONNECTOR_INDEX_MAPPING_PATH),
37+
CONFIG(ML_CONFIG_INDEX, false, ML_CONFIG_INDEX_MAPPING_PATH),
38+
CONTROLLER(ML_CONTROLLER_INDEX, false, ML_CONTROLLER_INDEX_MAPPING_PATH),
39+
AGENT(ML_AGENT_INDEX, false, ML_AGENT_INDEX_MAPPING_PATH),
40+
MEMORY_META(ML_MEMORY_META_INDEX, false, ML_MEMORY_META_INDEX_MAPPING_PATH),
41+
MEMORY_MESSAGE(ML_MEMORY_MESSAGE_INDEX, false, ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH);
4642

4743
private final String indexName;
4844
// whether we use an alias for the index
4945
private final boolean alias;
5046
private final String mapping;
5147
private final Integer version;
5248

53-
MLIndex(String name, boolean alias, String mapping, Integer version) {
49+
MLIndex(String name, boolean alias, String mappingPath) {
5450
this.indexName = name;
5551
this.alias = alias;
56-
this.mapping = mapping;
57-
this.version = version;
52+
this.mapping = getMapping(mappingPath);
53+
this.version = IndexUtils.getVersionFromMapping(this.mapping);
54+
}
55+
56+
private String getMapping(String mappingPath) {
57+
if (mappingPath == null) {
58+
throw new IllegalArgumentException("Mapping path cannot be null");
59+
}
60+
61+
try {
62+
return IndexUtils.getMappingFromFile(mappingPath);
63+
} catch (IOException e) {
64+
// Unchecked exception is thrown since the method is being called within a constructor
65+
throw new UncheckedIOException("Failed to fetch index mapping from file: " + mappingPath, e);
66+
}
5867
}
5968

6069
public String getIndexName() {

common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java

+6-69
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@
1818
package org.opensearch.ml.common.conversation;
1919

2020
import org.opensearch.common.settings.Setting;
21+
import org.opensearch.ml.common.MLIndex;
2122

2223
/**
2324
* Class containing a bunch of constant defining how the conversational indices are formatted
25+
* ToDo: use MLIndex.MEMORY_MESSAGE and MLIndex.MEMORY_META directly for index names and mappings rather than constants
2426
*/
2527
public class ConversationalIndexConstants {
26-
/** Version of the meta index schema */
27-
public final static Integer META_INDEX_SCHEMA_VERSION = 2;
2828
/** Name of the conversational metadata index */
29-
public final static String META_INDEX_NAME = ".plugins-ml-memory-meta";
29+
public final static String META_INDEX_NAME = MLIndex.MEMORY_META.getIndexName();
3030
/** Name of the metadata field for initial timestamp */
3131
public final static String META_CREATED_TIME_FIELD = "create_time";
3232
/** Name of the metadata field for updated timestamp */
@@ -41,38 +41,10 @@ public class ConversationalIndexConstants {
4141
public final static String META_ADDITIONAL_INFO_FIELD = "additional_info";
4242

4343
/** Mappings for the conversational metadata index */
44-
public final static String META_MAPPING = "{\n"
45-
+ " \"_meta\": {\n"
46-
+ " \"schema_version\": "
47-
+ META_INDEX_SCHEMA_VERSION
48-
+ "\n"
49-
+ " },\n"
50-
+ " \"properties\": {\n"
51-
+ " \""
52-
+ META_NAME_FIELD
53-
+ "\": {\"type\": \"text\"},\n"
54-
+ " \""
55-
+ META_CREATED_TIME_FIELD
56-
+ "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n"
57-
+ " \""
58-
+ META_UPDATED_TIME_FIELD
59-
+ "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n"
60-
+ " \""
61-
+ USER_FIELD
62-
+ "\": {\"type\": \"keyword\"},\n"
63-
+ " \""
64-
+ APPLICATION_TYPE_FIELD
65-
+ "\": {\"type\": \"keyword\"},\n"
66-
+ " \""
67-
+ META_ADDITIONAL_INFO_FIELD
68-
+ "\": {\"type\": \"flat_object\"}\n"
69-
+ " }\n"
70-
+ "}";
44+
public final static String META_MAPPING = MLIndex.MEMORY_META.getMapping();
7145

72-
/** Version of the interactions index schema */
73-
public final static Integer INTERACTIONS_INDEX_SCHEMA_VERSION = 1;
7446
/** Name of the conversational interactions index */
75-
public final static String INTERACTIONS_INDEX_NAME = ".plugins-ml-memory-message";
47+
public final static String INTERACTIONS_INDEX_NAME = MLIndex.MEMORY_MESSAGE.getIndexName();
7648
/** Name of the interaction field for the conversation Id */
7749
public final static String INTERACTIONS_CONVERSATION_ID_FIELD = "memory_id";
7850
/** Name of the interaction field for the human input */
@@ -92,42 +64,7 @@ public class ConversationalIndexConstants {
9264
/** The trace number of an interaction */
9365
public final static String INTERACTIONS_TRACE_NUMBER_FIELD = "trace_number";
9466
/** Mappings for the interactions index */
95-
public final static String INTERACTIONS_MAPPINGS = "{\n"
96-
+ " \"_meta\": {\n"
97-
+ " \"schema_version\": "
98-
+ INTERACTIONS_INDEX_SCHEMA_VERSION
99-
+ "\n"
100-
+ " },\n"
101-
+ " \"properties\": {\n"
102-
+ " \""
103-
+ INTERACTIONS_CONVERSATION_ID_FIELD
104-
+ "\": {\"type\": \"keyword\"},\n"
105-
+ " \""
106-
+ INTERACTIONS_CREATE_TIME_FIELD
107-
+ "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n"
108-
+ " \""
109-
+ INTERACTIONS_INPUT_FIELD
110-
+ "\": {\"type\": \"text\"},\n"
111-
+ " \""
112-
+ INTERACTIONS_PROMPT_TEMPLATE_FIELD
113-
+ "\": {\"type\": \"text\"},\n"
114-
+ " \""
115-
+ INTERACTIONS_RESPONSE_FIELD
116-
+ "\": {\"type\": \"text\"},\n"
117-
+ " \""
118-
+ INTERACTIONS_ORIGIN_FIELD
119-
+ "\": {\"type\": \"keyword\"},\n"
120-
+ " \""
121-
+ INTERACTIONS_ADDITIONAL_INFO_FIELD
122-
+ "\": {\"type\": \"flat_object\"},\n"
123-
+ " \""
124-
+ PARENT_INTERACTIONS_ID_FIELD
125-
+ "\": {\"type\": \"keyword\"},\n"
126-
+ " \""
127-
+ INTERACTIONS_TRACE_NUMBER_FIELD
128-
+ "\": {\"type\": \"long\"}\n"
129-
+ " }\n"
130-
+ "}";
67+
public final static String INTERACTIONS_MAPPINGS = MLIndex.MEMORY_MESSAGE.getMapping();
13168

13269
/** Feature Flag setting for conversational memory */
13370
public static final Setting<Boolean> ML_COMMONS_MEMORY_FEATURE_ENABLED = Setting

common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java

+43
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,15 @@
55

66
package org.opensearch.ml.common.utils;
77

8+
import java.io.IOException;
9+
import java.net.URL;
810
import java.util.Map;
911

12+
import com.google.common.base.Charsets;
13+
import com.google.common.io.Resources;
14+
import com.google.gson.JsonObject;
15+
import com.google.gson.JsonParseException;
16+
1017
import lombok.extern.log4j.Log4j2;
1118

1219
@Log4j2
@@ -32,4 +39,40 @@ public class IndexUtils {
3239
// Note: This does not include static settings like number of shards, which can't be changed after index creation.
3340
public static final Map<String, Object> UPDATED_DEFAULT_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-1");
3441
public static final Map<String, Object> UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all");
42+
43+
public static String getMappingFromFile(String path) throws IOException {
44+
URL url = IndexUtils.class.getClassLoader().getResource(path);
45+
if (url == null) {
46+
throw new IOException("Resource not found: " + path);
47+
}
48+
49+
String mapping = Resources.toString(url, Charsets.UTF_8).trim();
50+
if (mapping.isEmpty() || !StringUtils.isJson(mapping)) {
51+
throw new IllegalArgumentException("Invalid or non-JSON mapping at: " + path);
52+
}
53+
54+
return mapping;
55+
}
56+
57+
public static Integer getVersionFromMapping(String mapping) {
58+
if (mapping == null || mapping.isBlank()) {
59+
throw new IllegalArgumentException("Mapping cannot be null or empty");
60+
}
61+
62+
JsonObject mappingJson = StringUtils.getJsonObjectFromString(mapping);
63+
if (mappingJson == null || !mappingJson.has("_meta")) {
64+
throw new JsonParseException("Failed to find \"_meta\" object in mapping: " + mapping);
65+
}
66+
67+
JsonObject metaObject = mappingJson.getAsJsonObject("_meta");
68+
if (metaObject == null || !metaObject.has("schema_version")) {
69+
throw new JsonParseException("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping);
70+
}
71+
72+
try {
73+
return metaObject.get("schema_version").getAsInt();
74+
} catch (NumberFormatException | ClassCastException e) {
75+
throw new JsonParseException("Invalid \"schema_version\" value in mapping: " + mapping, e);
76+
}
77+
}
3578
}

common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java

+20-3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
import com.google.gson.Gson;
2828
import com.google.gson.JsonElement;
29+
import com.google.gson.JsonObject;
2930
import com.google.gson.JsonParser;
3031
import com.google.gson.JsonSyntaxException;
3132
import com.jayway.jsonpath.JsonPath;
@@ -53,12 +54,16 @@ public class StringUtils {
5354
}
5455
public static final String TO_STRING_FUNCTION_NAME = ".toString()";
5556

56-
public static boolean isValidJsonString(String Json) {
57+
public static boolean isValidJsonString(String json) {
58+
if (json == null || json.isBlank()) {
59+
return false;
60+
}
61+
5762
try {
58-
new JSONObject(Json);
63+
new JSONObject(json);
5964
} catch (JSONException ex) {
6065
try {
61-
new JSONArray(Json);
66+
new JSONArray(json);
6267
} catch (JSONException ex1) {
6368
return false;
6469
}
@@ -67,6 +72,10 @@ public static boolean isValidJsonString(String Json) {
6772
}
6873

6974
public static boolean isJson(String json) {
75+
if (json == null || json.isBlank()) {
76+
return false;
77+
}
78+
7079
try {
7180
if (!isValidJsonString(json)) {
7281
return false;
@@ -319,4 +328,12 @@ public static boolean isValidJSONPath(String input) {
319328
}
320329
}
321330

331+
public static JsonObject getJsonObjectFromString(String jsonString) {
332+
if (jsonString == null || jsonString.isBlank()) {
333+
throw new IllegalArgumentException("Json cannot be null or empty");
334+
}
335+
336+
return JsonParser.parseString(jsonString).getAsJsonObject();
337+
}
338+
322339
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"_meta": {
3+
"schema_version": 2
4+
},
5+
"properties": {
6+
"name": {
7+
"type": "text",
8+
"fields": {
9+
"keyword": {
10+
"type": "keyword",
11+
"ignore_above": 256
12+
}
13+
}
14+
},
15+
"type": {
16+
"type": "keyword"
17+
},
18+
"description": {
19+
"type": "text"
20+
},
21+
"llm": {
22+
"type": "flat_object"
23+
},
24+
"tools": {
25+
"type": "flat_object"
26+
},
27+
"parameters": {
28+
"type": "flat_object"
29+
},
30+
"memory": {
31+
"type": "flat_object"
32+
},
33+
"is_hidden": {
34+
"type": "boolean"
35+
},
36+
"created_time": {
37+
"type": "date",
38+
"format": "strict_date_time||epoch_millis"
39+
},
40+
"last_updated_time": {
41+
"type": "date",
42+
"format": "strict_date_time||epoch_millis"
43+
}
44+
}
45+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"_meta": {
3+
"schema_version": 4
4+
},
5+
"properties": {
6+
"master_key": {
7+
"type": "keyword"
8+
},
9+
"config_type": {
10+
"type": "keyword"
11+
},
12+
"ml_configuration": {
13+
"type": "flat_object"
14+
},
15+
"create_time": {
16+
"type": "date",
17+
"format": "strict_date_time||epoch_millis"
18+
},
19+
"last_updated_time": {
20+
"type": "date",
21+
"format": "strict_date_time||epoch_millis"
22+
}
23+
}
24+
}

0 commit comments

Comments
 (0)