Skip to content

Commit 34b794b

Browse files
Making wlm stats output easier to understand (#16278)
* fix wlm stats output Signed-off-by: Kaushal Kumar <ravi.kaushal97@gmail.com> * rename wlm stats vars Signed-off-by: Kaushal Kumar <ravi.kaushal97@gmail.com> * fix ut failure Signed-off-by: Kaushal Kumar <ravi.kaushal97@gmail.com> --------- Signed-off-by: Kaushal Kumar <ravi.kaushal97@gmail.com>
1 parent 1e7f6df commit 34b794b

9 files changed

+28
-71
lines changed

server/src/main/java/org/opensearch/wlm/QueryGroupService.java

+1-6
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import org.apache.logging.log4j.LogManager;
1212
import org.apache.logging.log4j.Logger;
1313
import org.opensearch.ResourceNotFoundException;
14-
import org.opensearch.action.search.SearchShardTask;
1514
import org.opensearch.cluster.ClusterChangedEvent;
1615
import org.opensearch.cluster.ClusterStateListener;
1716
import org.opensearch.cluster.metadata.Metadata;
@@ -354,10 +353,6 @@ public void onTaskCompleted(Task task) {
354353
queryGroupId = QueryGroupTask.DEFAULT_QUERY_GROUP_ID_SUPPLIER.get();
355354
}
356355

357-
if (task instanceof SearchShardTask) {
358-
queryGroupsStateAccessor.getQueryGroupState(queryGroupId).shardCompletions.inc();
359-
} else {
360-
queryGroupsStateAccessor.getQueryGroupState(queryGroupId).completions.inc();
361-
}
356+
queryGroupsStateAccessor.getQueryGroupState(queryGroupId).totalCompletions.inc();
362357
}
363358
}

server/src/main/java/org/opensearch/wlm/stats/QueryGroupState.java

+3-16
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,7 @@ public class QueryGroupState {
2121
/**
2222
* co-ordinator level completions at the query group level, this is a cumulative counter since the Opensearch start time
2323
*/
24-
public final CounterMetric completions = new CounterMetric();
25-
26-
/**
27-
* shard level completions at the query group level, this is a cumulative counter since the Opensearch start time
28-
*/
29-
public final CounterMetric shardCompletions = new CounterMetric();
24+
public final CounterMetric totalCompletions = new CounterMetric();
3025

3126
/**
3227
* rejections at the query group level, this is a cumulative counter since the OpenSearch start time
@@ -61,16 +56,8 @@ public QueryGroupState() {
6156
*
6257
* @return co-ordinator completions in the query group
6358
*/
64-
public long getCompletions() {
65-
return completions.count();
66-
}
67-
68-
/**
69-
*
70-
* @return shard completions in the query group
71-
*/
72-
public long getShardCompletions() {
73-
return shardCompletions.count();
59+
public long getTotalCompletions() {
60+
return totalCompletions.count();
7461
}
7562

7663
/**

server/src/main/java/org/opensearch/wlm/stats/QueryGroupStats.java

+16-23
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,14 @@ public Map<String, QueryGroupStatsHolder> getStats() {
9191
* the instance will only be created on demand through stats api
9292
*/
9393
public static class QueryGroupStatsHolder implements ToXContentObject, Writeable {
94-
public static final String COMPLETIONS = "completions";
95-
public static final String REJECTIONS = "rejections";
94+
public static final String COMPLETIONS = "total_completions";
95+
public static final String REJECTIONS = "total_rejections";
9696
public static final String TOTAL_CANCELLATIONS = "total_cancellations";
9797
public static final String FAILURES = "failures";
98-
public static final String SHARD_COMPLETIONS = "shard_completions";
9998
private long completions;
100-
private long shardCompletions;
10199
private long rejections;
102100
private long failures;
103-
private long totalCancellations;
101+
private long cancellations;
104102
private Map<ResourceType, ResourceStats> resourceStats;
105103

106104
// this is needed to support the factory method
@@ -110,24 +108,21 @@ public QueryGroupStatsHolder(
110108
long completions,
111109
long rejections,
112110
long failures,
113-
long totalCancellations,
114-
long shardCompletions,
111+
long cancellations,
115112
Map<ResourceType, ResourceStats> resourceStats
116113
) {
117114
this.completions = completions;
118115
this.rejections = rejections;
119116
this.failures = failures;
120-
this.shardCompletions = shardCompletions;
121-
this.totalCancellations = totalCancellations;
117+
this.cancellations = cancellations;
122118
this.resourceStats = resourceStats;
123119
}
124120

125121
public QueryGroupStatsHolder(StreamInput in) throws IOException {
126122
this.completions = in.readVLong();
127123
this.rejections = in.readVLong();
128124
this.failures = in.readVLong();
129-
this.totalCancellations = in.readVLong();
130-
this.shardCompletions = in.readVLong();
125+
this.cancellations = in.readVLong();
131126
this.resourceStats = in.readMap((i) -> ResourceType.fromName(i.readString()), ResourceStats::new);
132127
}
133128

@@ -145,11 +140,10 @@ public static QueryGroupStatsHolder from(QueryGroupState queryGroupState) {
145140
resourceStatsMap.put(resourceTypeStateEntry.getKey(), ResourceStats.from(resourceTypeStateEntry.getValue()));
146141
}
147142

148-
statsHolder.completions = queryGroupState.getCompletions();
143+
statsHolder.completions = queryGroupState.getTotalCompletions();
149144
statsHolder.rejections = queryGroupState.getTotalRejections();
150145
statsHolder.failures = queryGroupState.getFailures();
151-
statsHolder.totalCancellations = queryGroupState.getTotalCancellations();
152-
statsHolder.shardCompletions = queryGroupState.getShardCompletions();
146+
statsHolder.cancellations = queryGroupState.getTotalCancellations();
153147
statsHolder.resourceStats = resourceStatsMap;
154148
return statsHolder;
155149
}
@@ -164,8 +158,7 @@ public static void writeTo(StreamOutput out, QueryGroupStatsHolder statsHolder)
164158
out.writeVLong(statsHolder.completions);
165159
out.writeVLong(statsHolder.rejections);
166160
out.writeVLong(statsHolder.failures);
167-
out.writeVLong(statsHolder.totalCancellations);
168-
out.writeVLong(statsHolder.shardCompletions);
161+
out.writeVLong(statsHolder.cancellations);
169162
out.writeMap(statsHolder.resourceStats, (o, val) -> o.writeString(val.getName()), ResourceStats::writeTo);
170163
}
171164

@@ -177,10 +170,10 @@ public void writeTo(StreamOutput out) throws IOException {
177170
@Override
178171
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
179172
builder.field(COMPLETIONS, completions);
180-
builder.field(SHARD_COMPLETIONS, shardCompletions);
173+
// builder.field(SHARD_COMPLETIONS, shardCompletions);
181174
builder.field(REJECTIONS, rejections);
182-
builder.field(FAILURES, failures);
183-
builder.field(TOTAL_CANCELLATIONS, totalCancellations);
175+
// builder.field(FAILURES, failures);
176+
builder.field(TOTAL_CANCELLATIONS, cancellations);
184177

185178
for (ResourceType resourceType : ResourceType.getSortedValues()) {
186179
ResourceStats resourceStats1 = resourceStats.get(resourceType);
@@ -199,15 +192,14 @@ public boolean equals(Object o) {
199192
QueryGroupStatsHolder that = (QueryGroupStatsHolder) o;
200193
return completions == that.completions
201194
&& rejections == that.rejections
202-
&& shardCompletions == that.shardCompletions
203195
&& Objects.equals(resourceStats, that.resourceStats)
204196
&& failures == that.failures
205-
&& totalCancellations == that.totalCancellations;
197+
&& cancellations == that.cancellations;
206198
}
207199

208200
@Override
209201
public int hashCode() {
210-
return Objects.hash(completions, shardCompletions, rejections, totalCancellations, failures, resourceStats);
202+
return Objects.hash(completions, rejections, cancellations, failures, resourceStats);
211203
}
212204
}
213205

@@ -217,6 +209,7 @@ public int hashCode() {
217209
public static class ResourceStats implements ToXContentObject, Writeable {
218210
public static final String CURRENT_USAGE = "current_usage";
219211
public static final String CANCELLATIONS = "cancellations";
212+
public static final String REJECTIONS = "rejections";
220213
public static final double PRECISION = 1e-9;
221214
private final double currentUsage;
222215
private final long cancellations;
@@ -268,7 +261,7 @@ public void writeTo(StreamOutput out) throws IOException {
268261
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
269262
builder.field(CURRENT_USAGE, currentUsage);
270263
builder.field(CANCELLATIONS, cancellations);
271-
builder.field(QueryGroupStatsHolder.REJECTIONS, rejections);
264+
builder.field(REJECTIONS, rejections);
272265
return builder;
273266
}
274267

server/src/test/java/org/opensearch/action/admin/cluster/wlm/WlmStatsResponseTests.java

+2-5
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ public class WlmStatsResponseTests extends OpenSearchTestCase {
4646
0,
4747
1,
4848
0,
49-
0,
5049
Map.of(
5150
ResourceType.CPU,
5251
new QueryGroupStats.ResourceStats(0, 0, 0),
@@ -78,10 +77,8 @@ public void testToString() {
7877
+ " \"node-1\" : {\n"
7978
+ " \"query_groups\" : {\n"
8079
+ " \"safjgagnaeekg-3r3fads\" : {\n"
81-
+ " \"completions\" : 0,\n"
82-
+ " \"shard_completions\" : 0,\n"
83-
+ " \"rejections\" : 0,\n"
84-
+ " \"failures\" : 1,\n"
80+
+ " \"total_completions\" : 0,\n"
81+
+ " \"total_rejections\" : 0,\n"
8582
+ " \"total_cancellations\" : 0,\n"
8683
+ " \"cpu\" : {\n"
8784
+ " \"current_usage\" : 0.0,\n"

server/src/test/java/org/opensearch/wlm/QueryGroupServiceTests.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -401,14 +401,14 @@ public void testOnTaskCompleted() {
401401
((QueryGroupTask) task).setQueryGroupId(mockThreadPool.getThreadContext());
402402
queryGroupService.onTaskCompleted(task);
403403

404-
assertEquals(1, queryGroupState.completions.count());
404+
assertEquals(1, queryGroupState.totalCompletions.count());
405405

406406
// test non QueryGroupTask
407407
task = new Task(1, "simple", "test", "mock task", null, null);
408408
queryGroupService.onTaskCompleted(task);
409409

410410
// It should still be 1
411-
assertEquals(1, queryGroupState.completions.count());
411+
assertEquals(1, queryGroupState.totalCompletions.count());
412412

413413
mockThreadPool.shutdown();
414414
}

server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListenerTests.java

-6
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ public void testValidQueryGroupRequestFailure() throws IOException {
9595
0,
9696
1,
9797
0,
98-
0,
9998
Map.of(
10099
ResourceType.CPU,
101100
new QueryGroupStats.ResourceStats(0, 0, 0),
@@ -109,7 +108,6 @@ public void testValidQueryGroupRequestFailure() throws IOException {
109108
0,
110109
0,
111110
0,
112-
0,
113111
Map.of(
114112
ResourceType.CPU,
115113
new QueryGroupStats.ResourceStats(0, 0, 0),
@@ -172,7 +170,6 @@ public void testMultiThreadedValidQueryGroupRequestFailures() {
172170
0,
173171
ITERATIONS,
174172
0,
175-
0,
176173
Map.of(
177174
ResourceType.CPU,
178175
new QueryGroupStats.ResourceStats(0, 0, 0),
@@ -186,7 +183,6 @@ public void testMultiThreadedValidQueryGroupRequestFailures() {
186183
0,
187184
0,
188185
0,
189-
0,
190186
Map.of(
191187
ResourceType.CPU,
192188
new QueryGroupStats.ResourceStats(0, 0, 0),
@@ -209,7 +205,6 @@ public void testInvalidQueryGroupFailure() throws IOException {
209205
0,
210206
0,
211207
0,
212-
0,
213208
Map.of(
214209
ResourceType.CPU,
215210
new QueryGroupStats.ResourceStats(0, 0, 0),
@@ -223,7 +218,6 @@ public void testInvalidQueryGroupFailure() throws IOException {
223218
0,
224219
1,
225220
0,
226-
0,
227221
Map.of(
228222
ResourceType.CPU,
229223
new QueryGroupStats.ResourceStats(0, 0, 0),

server/src/test/java/org/opensearch/wlm/stats/QueryGroupStateTests.java

+2-8
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,7 @@ public void testRandomQueryGroupsStateUpdates() {
2323

2424
for (int i = 0; i < 25; i++) {
2525
if (i % 5 == 0) {
26-
updaterThreads.add(new Thread(() -> {
27-
if (randomBoolean()) {
28-
queryGroupState.completions.inc();
29-
} else {
30-
queryGroupState.shardCompletions.inc();
31-
}
32-
}));
26+
updaterThreads.add(new Thread(() -> { queryGroupState.totalCompletions.inc(); }));
3327
} else if (i % 5 == 1) {
3428
updaterThreads.add(new Thread(() -> {
3529
queryGroupState.totalRejections.inc();
@@ -63,7 +57,7 @@ public void testRandomQueryGroupsStateUpdates() {
6357
}
6458
});
6559

66-
assertEquals(5, queryGroupState.getCompletions() + queryGroupState.getShardCompletions());
60+
assertEquals(5, queryGroupState.getTotalCompletions());
6761
assertEquals(5, queryGroupState.getTotalRejections());
6862

6963
final long sumOfRejectionsDueToResourceTypes = queryGroupState.getResourceState().get(ResourceType.CPU).rejections.count()

server/src/test/java/org/opensearch/wlm/stats/QueryGroupStatsTests.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ public void testToXContent() throws IOException {
3838
13,
3939
2,
4040
0,
41-
1213718,
4241
Map.of(ResourceType.CPU, new QueryGroupStats.ResourceStats(0.3, 13, 2))
4342
)
4443
);
@@ -48,7 +47,7 @@ public void testToXContent() throws IOException {
4847
queryGroupStats.toXContent(builder, ToXContent.EMPTY_PARAMS);
4948
builder.endObject();
5049
assertEquals(
51-
"{\"query_groups\":{\"afakjklaj304041-afaka\":{\"completions\":123456789,\"shard_completions\":1213718,\"rejections\":13,\"failures\":2,\"total_cancellations\":0,\"cpu\":{\"current_usage\":0.3,\"cancellations\":13,\"rejections\":2}}}}",
50+
"{\"query_groups\":{\"afakjklaj304041-afaka\":{\"total_completions\":123456789,\"total_rejections\":13,\"total_cancellations\":0,\"cpu\":{\"current_usage\":0.3,\"cancellations\":13,\"rejections\":2}}}}",
5251
builder.toString()
5352
);
5453
}
@@ -68,7 +67,6 @@ protected QueryGroupStats createTestInstance() {
6867
randomNonNegativeLong(),
6968
randomNonNegativeLong(),
7069
randomNonNegativeLong(),
71-
randomNonNegativeLong(),
7270
Map.of(
7371
ResourceType.CPU,
7472
new QueryGroupStats.ResourceStats(

server/src/test/java/org/opensearch/wlm/stats/WlmStatsTests.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ public void testToXContent() throws IOException {
3939
13,
4040
2,
4141
0,
42-
1213718,
4342
Map.of(ResourceType.CPU, new QueryGroupStats.ResourceStats(0.3, 13, 2))
4443
)
4544
);
@@ -50,7 +49,7 @@ public void testToXContent() throws IOException {
5049
wlmStats.toXContent(builder, ToXContent.EMPTY_PARAMS);
5150
builder.endObject();
5251
assertEquals(
53-
"{\"query_groups\":{\"afakjklaj304041-afaka\":{\"completions\":123456789,\"shard_completions\":1213718,\"rejections\":13,\"failures\":2,\"total_cancellations\":0,\"cpu\":{\"current_usage\":0.3,\"cancellations\":13,\"rejections\":2}}}}",
52+
"{\"query_groups\":{\"afakjklaj304041-afaka\":{\"total_completions\":123456789,\"total_rejections\":13,\"total_cancellations\":0,\"cpu\":{\"current_usage\":0.3,\"cancellations\":13,\"rejections\":2}}}}",
5453
builder.toString()
5554
);
5655
}

0 commit comments

Comments
 (0)