Skip to content

Commit 150db2b

Browse files
authored
Throw an exception when memory usage estimation endpoint encounters empty data frame. (#49143) (#49164)
1 parent b9a571e commit 150db2b

File tree

6 files changed

+40
-15
lines changed

6 files changed

+40
-15
lines changed

x-pack/plugin/ml/qa/ml-with-security/build.gradle

+1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ integTest.runner {
9292
'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k',
9393
'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one',
9494
'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred',
95+
'ml/data_frame_analytics_memory_usage_estimation/Test memory usage estimation for empty data frame',
9596
'ml/evaluate_data_frame/Test given missing index',
9697
'ml/evaluate_data_frame/Test given index does not exist',
9798
'ml/evaluate_data_frame/Test given missing evaluation',

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java

+7-5
Original file line numberDiff line numberDiff line change
@@ -238,11 +238,13 @@ private void getStartContext(String id, ActionListener<StartContext> finalListen
238238
.collectDataSummaryAsync(ActionListener.wrap(
239239
dataSummary -> {
240240
if (dataSummary.rows == 0) {
241-
finalListener.onFailure(new ElasticsearchStatusException(
242-
"Unable to start {} as there are no analyzable data in source indices [{}].",
243-
RestStatus.BAD_REQUEST,
244-
id,
245-
Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
241+
finalListener.onFailure(ExceptionsHelper.badRequestException(
242+
"Unable to start {} as no documents in the source indices [{}] contained all the fields "
243+
+ "selected for analysis. If you are relying on automatic field selection then there are "
244+
+ "currently mapped fields that do not exist in any indexed documents, and you will have "
245+
+ "to switch to explicit field selection and include only fields that exist in indexed "
246+
+ "documents.",
247+
id, Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
246248
));
247249
} else {
248250
finalListener.onResponse(startContext);

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.apache.logging.log4j.Logger;
1010
import org.apache.logging.log4j.message.ParameterizedMessage;
1111
import org.elasticsearch.action.ActionListener;
12+
import org.elasticsearch.common.Strings;
1213
import org.elasticsearch.common.unit.ByteSizeUnit;
1314
import org.elasticsearch.common.unit.ByteSizeValue;
1415
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
@@ -57,10 +58,16 @@ private MemoryUsageEstimationResult runJob(String jobId,
5758
DataFrameDataExtractorFactory dataExtractorFactory) {
5859
DataFrameDataExtractor dataExtractor = dataExtractorFactory.newExtractor(false);
5960
DataFrameDataExtractor.DataSummary dataSummary = dataExtractor.collectDataSummary();
60-
Set<String> categoricalFields = dataExtractor.getCategoricalFields(config.getAnalysis());
6161
if (dataSummary.rows == 0) {
62-
return new MemoryUsageEstimationResult(ByteSizeValue.ZERO, ByteSizeValue.ZERO);
62+
throw ExceptionsHelper.badRequestException(
63+
"[{}] Unable to estimate memory usage as no documents in the source indices [{}] contained all the fields selected for "
64+
+ "analysis. If you are relying on automatic field selection then there are currently mapped fields that do not exist "
65+
+ "in any indexed documents, and you will have to switch to explicit field selection and include only fields that "
66+
+ "exist in indexed documents.",
67+
jobId,
68+
Strings.arrayToCommaDelimitedString(config.getSource().getIndex()));
6369
}
70+
Set<String> categoricalFields = dataExtractor.getCategoricalFields(config.getAnalysis());
6471
AnalyticsProcessConfig processConfig =
6572
new AnalyticsProcessConfig(
6673
jobId,

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManagerTests.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ public class MemoryUsageEstimationProcessManagerTests extends ESTestCase {
4242
private static final String CONFIG_ID = "dummy";
4343
private static final int NUM_ROWS = 100;
4444
private static final int NUM_COLS = 4;
45-
private static final MemoryUsageEstimationResult PROCESS_RESULT_ZERO =
46-
new MemoryUsageEstimationResult(ByteSizeValue.ZERO, ByteSizeValue.ZERO);
4745
private static final MemoryUsageEstimationResult PROCESS_RESULT =
4846
new MemoryUsageEstimationResult(ByteSizeValue.parseBytesSizeValue("20kB", ""), ByteSizeValue.parseBytesSizeValue("10kB", ""));
4947

@@ -85,9 +83,11 @@ public void testRunJob_EmptyDataFrame() {
8583

8684
processManager.runJobAsync(TASK_ID, dataFrameAnalyticsConfig, dataExtractorFactory, listener);
8785

88-
verify(listener).onResponse(resultCaptor.capture());
89-
MemoryUsageEstimationResult result = resultCaptor.getValue();
90-
assertThat(result, equalTo(PROCESS_RESULT_ZERO));
86+
verify(listener).onFailure(exceptionCaptor.capture());
87+
ElasticsearchException exception = (ElasticsearchException) exceptionCaptor.getValue();
88+
assertThat(exception.status(), equalTo(RestStatus.BAD_REQUEST));
89+
assertThat(exception.getMessage(), containsString(TASK_ID));
90+
assertThat(exception.getMessage(), containsString("Unable to estimate memory usage"));
9191

9292
verifyNoMoreInteractions(process, listener);
9393
}

x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml

+17-2
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,27 @@ setup:
1414
---
1515
"Test memory usage estimation for empty data frame":
1616
- do:
17+
catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
18+
ml.estimate_memory_usage:
19+
body:
20+
source: { index: "index-source" }
21+
analysis: { outlier_detection: {} }
22+
23+
- do:
24+
index:
25+
index: index-source
26+
refresh: true
27+
body: { x: 1 }
28+
- match: { result: "created" }
29+
30+
# Note that value for "y" is missing and outlier detection analysis does not support missing values.
31+
# Hence, the data frame is still considered empty.
32+
- do:
33+
catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
1734
ml.estimate_memory_usage:
1835
body:
1936
source: { index: "index-source" }
2037
analysis: { outlier_detection: {} }
21-
- match: { expected_memory_without_disk: "0" }
22-
- match: { expected_memory_with_disk: "0" }
2338

2439
---
2540
"Test memory usage estimation for non-empty data frame":

x-pack/plugin/src/test/resources/rest-api-spec/test/ml/start_data_frame_analytics.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
}
8787
8888
- do:
89-
catch: /Unable to start empty-with-compatible-fields as there are no analyzable data in source indices \[empty-index-with-compatible-fields\]/
89+
catch: /Unable to start empty-with-compatible-fields as no documents in the source indices \[empty-index-with-compatible-fields\] contained all the fields selected for analysis/
9090
ml.start_data_frame_analytics:
9191
id: "empty-with-compatible-fields"
9292
---

0 commit comments

Comments
 (0)