Skip to content

Commit f2cc3d8

Browse files
authored
Add a counter to node stat (and _cat/shards) api to track shard going from idle to non-idle (opensearch-project#12768)
--------- Signed-off-by: Ruirui Zhang <mariazrr@amazon.com>
1 parent 10fc755 commit f2cc3d8

File tree

10 files changed

+196
-11
lines changed

10 files changed

+196
-11
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
103103

104104
## [Unreleased 2.x]
105105
### Added
106+
- Add a counter to node stat api to track shard going from idle to non-idle ([#12768](https://github.com/opensearch-project/OpenSearch/pull/12768))
106107

107108
### Dependencies
108109
- Bump `org.apache.commons:commons-configuration2` from 2.10.0 to 2.10.1 ([#12896](https://github.com/opensearch-project/OpenSearch/pull/12896))

rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml

+97-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,108 @@
11
"Help":
22
- skip:
3-
version: " - 2.11.99"
3+
version: " - 2.99.99"
4+
reason: search idle reactivate count total is only added in 3.0.0
5+
features: node_selector
6+
- do:
7+
cat.shards:
8+
help: true
9+
node_selector:
10+
version: "3.0.0 - "
11+
12+
- match:
13+
$body: |
14+
/^ index .+ \n
15+
shard .+ \n
16+
prirep .+ \n
17+
state .+ \n
18+
docs .+ \n
19+
store .+ \n
20+
ip .+ \n
21+
id .+ \n
22+
node .+ \n
23+
sync_id .+ \n
24+
unassigned.reason .+ \n
25+
unassigned.at .+ \n
26+
unassigned.for .+ \n
27+
unassigned.details .+ \n
28+
recoverysource.type .+ \n
29+
completion.size .+ \n
30+
fielddata.memory_size .+ \n
31+
fielddata.evictions .+ \n
32+
query_cache.memory_size .+ \n
33+
query_cache.evictions .+ \n
34+
flush.total .+ \n
35+
flush.total_time .+ \n
36+
get.current .+ \n
37+
get.time .+ \n
38+
get.total .+ \n
39+
get.exists_time .+ \n
40+
get.exists_total .+ \n
41+
get.missing_time .+ \n
42+
get.missing_total .+ \n
43+
indexing.delete_current .+ \n
44+
indexing.delete_time .+ \n
45+
indexing.delete_total .+ \n
46+
indexing.index_current .+ \n
47+
indexing.index_time .+ \n
48+
indexing.index_total .+ \n
49+
indexing.index_failed .+ \n
50+
merges.current .+ \n
51+
merges.current_docs .+ \n
52+
merges.current_size .+ \n
53+
merges.total .+ \n
54+
merges.total_docs .+ \n
55+
merges.total_size .+ \n
56+
merges.total_time .+ \n
57+
refresh.total .+ \n
58+
refresh.time .+ \n
59+
refresh.external_total .+ \n
60+
refresh.external_time .+ \n
61+
refresh.listeners .+ \n
62+
search.fetch_current .+ \n
63+
search.fetch_time .+ \n
64+
search.fetch_total .+ \n
65+
search.open_contexts .+ \n
66+
search.query_current .+ \n
67+
search.query_time .+ \n
68+
search.query_total .+ \n
69+
search.concurrent_query_current .+ \n
70+
search.concurrent_query_time .+ \n
71+
search.concurrent_query_total .+ \n
72+
search.concurrent_avg_slice_count .+ \n
73+
search.scroll_current .+ \n
74+
search.scroll_time .+ \n
75+
search.scroll_total .+ \n
76+
search.point_in_time_current .+ \n
77+
search.point_in_time_time .+ \n
78+
search.point_in_time_total .+ \n
79+
search.search_idle_reactivate_count_total .+ \n
80+
segments.count .+ \n
81+
segments.memory .+ \n
82+
segments.index_writer_memory .+ \n
83+
segments.version_map_memory .+ \n
84+
segments.fixed_bitset_memory .+ \n
85+
seq_no.max .+ \n
86+
seq_no.local_checkpoint .+ \n
87+
seq_no.global_checkpoint .+ \n
88+
warmer.current .+ \n
89+
warmer.total .+ \n
90+
warmer.total_time .+ \n
91+
path.data .+ \n
92+
path.state .+ \n
93+
docs.deleted .+ \n
94+
$/
95+
---
96+
"Help from 2.12.0 to 2.99.99":
97+
- skip:
98+
version: " - 2.11.99 , 3.0.0 - "
499
reason: deleted docs and concurrent search are added in 2.12.0
5100
features: node_selector
6101
- do:
7102
cat.shards:
8103
help: true
9104
node_selector:
10-
version: "2.12.0 - "
105+
version: "2.12.0 - 2.99.99"
11106

12107
- match:
13108
$body: |

server/src/main/java/org/opensearch/index/search/stats/SearchStats.java

+25-1
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ public static class Stats implements Writeable, ToXContentFragment {
163163
private long pitTimeInMillis;
164164
private long pitCurrent;
165165

166+
private long searchIdleReactivateCount;
167+
166168
@Nullable
167169
private RequestStatsLongHolder requestStatsLongHolder;
168170

@@ -193,7 +195,8 @@ public Stats(
193195
long pitCurrent,
194196
long suggestCount,
195197
long suggestTimeInMillis,
196-
long suggestCurrent
198+
long suggestCurrent,
199+
long searchIdleReactivateCount
197200
) {
198201
this.requestStatsLongHolder = new RequestStatsLongHolder();
199202
this.queryCount = queryCount;
@@ -220,6 +223,8 @@ public Stats(
220223
this.pitCount = pitCount;
221224
this.pitTimeInMillis = pitTimeInMillis;
222225
this.pitCurrent = pitCurrent;
226+
227+
this.searchIdleReactivateCount = searchIdleReactivateCount;
223228
}
224229

225230
private Stats(StreamInput in) throws IOException {
@@ -255,6 +260,10 @@ private Stats(StreamInput in) throws IOException {
255260
concurrentQueryCurrent = in.readVLong();
256261
queryConcurrency = in.readVLong();
257262
}
263+
264+
if (in.getVersion().onOrAfter(Version.V_3_0_0)) {
265+
searchIdleReactivateCount = in.readVLong();
266+
}
258267
}
259268

260269
public void add(Stats stats) {
@@ -282,6 +291,8 @@ public void add(Stats stats) {
282291
pitCount += stats.pitCount;
283292
pitTimeInMillis += stats.pitTimeInMillis;
284293
pitCurrent += stats.pitCurrent;
294+
295+
searchIdleReactivateCount += stats.searchIdleReactivateCount;
285296
}
286297

287298
public void addForClosingShard(Stats stats) {
@@ -306,6 +317,8 @@ public void addForClosingShard(Stats stats) {
306317
pitTimeInMillis += stats.pitTimeInMillis;
307318
pitCurrent += stats.pitCurrent;
308319
queryConcurrency += stats.queryConcurrency;
320+
321+
searchIdleReactivateCount += stats.searchIdleReactivateCount;
309322
}
310323

311324
public long getQueryCount() {
@@ -412,6 +425,10 @@ public long getSuggestCurrent() {
412425
return suggestCurrent;
413426
}
414427

428+
public long getSearchIdleReactivateCount() {
429+
return searchIdleReactivateCount;
430+
}
431+
415432
public static Stats readStats(StreamInput in) throws IOException {
416433
return new Stats(in);
417434
}
@@ -457,6 +474,10 @@ public void writeTo(StreamOutput out) throws IOException {
457474
out.writeVLong(concurrentQueryCurrent);
458475
out.writeVLong(queryConcurrency);
459476
}
477+
478+
if (out.getVersion().onOrAfter(Version.V_3_0_0)) {
479+
out.writeVLong(searchIdleReactivateCount);
480+
}
460481
}
461482

462483
@Override
@@ -486,6 +507,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
486507
builder.humanReadableField(Fields.SUGGEST_TIME_IN_MILLIS, Fields.SUGGEST_TIME, getSuggestTime());
487508
builder.field(Fields.SUGGEST_CURRENT, suggestCurrent);
488509

510+
builder.field(Fields.SEARCH_IDLE_REACTIVATE_COUNT_TOTAL, searchIdleReactivateCount);
511+
489512
if (requestStatsLongHolder != null) {
490513
builder.startObject(Fields.REQUEST);
491514

@@ -654,6 +677,7 @@ static final class Fields {
654677
static final String TIME = "time";
655678
static final String CURRENT = "current";
656679
static final String TOTAL = "total";
680+
static final String SEARCH_IDLE_REACTIVATE_COUNT_TOTAL = "search_idle_reactivate_count_total";
657681

658682
}
659683

server/src/main/java/org/opensearch/index/search/stats/ShardSearchStats.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,11 @@ public void onFreePitContext(ReaderContext readerContext) {
213213
totalStats.pitMetric.inc(TimeUnit.NANOSECONDS.toMicros(System.nanoTime() - readerContext.getStartTimeInNano()));
214214
}
215215

216+
@Override
217+
public void onSearchIdleReactivation() {
218+
totalStats.searchIdleMetric.inc();
219+
}
220+
216221
/**
217222
* Holder of statistics values
218223
*
@@ -239,6 +244,7 @@ static final class StatsHolder {
239244
final CounterMetric scrollCurrent = new CounterMetric();
240245
final CounterMetric pitCurrent = new CounterMetric();
241246
final CounterMetric suggestCurrent = new CounterMetric();
247+
final CounterMetric searchIdleMetric = new CounterMetric();
242248

243249
SearchStats.Stats stats() {
244250
return new SearchStats.Stats(
@@ -260,7 +266,8 @@ SearchStats.Stats stats() {
260266
pitCurrent.count(),
261267
suggestMetric.count(),
262268
TimeUnit.NANOSECONDS.toMillis(suggestMetric.sum()),
263-
suggestCurrent.count()
269+
suggestCurrent.count(),
270+
searchIdleMetric.count()
264271
);
265272
}
266273
}

server/src/main/java/org/opensearch/index/shard/IndexShard.java

+5
Original file line numberDiff line numberDiff line change
@@ -4683,9 +4683,14 @@ public void afterRefresh(boolean didRefresh) {
46834683
* <code>true</code> if the listener was registered to wait for a refresh.
46844684
*/
46854685
public final void awaitShardSearchActive(Consumer<Boolean> listener) {
4686+
boolean isSearchIdle = isSearchIdle();
46864687
markSearcherAccessed(); // move the shard into non-search idle
46874688
final Translog.Location location = pendingRefreshLocation.get();
46884689
if (location != null) {
4690+
if (isSearchIdle) {
4691+
SearchOperationListener searchOperationListener = getSearchOperationListener();
4692+
searchOperationListener.onSearchIdleReactivation();
4693+
}
46894694
addRefreshListener(location, (b) -> {
46904695
pendingRefreshLocation.compareAndSet(location, null);
46914696
listener.accept(true);

server/src/main/java/org/opensearch/index/shard/SearchOperationListener.java

+16
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,11 @@ default void onNewPitContext(ReaderContext readerContext) {}
145145
*/
146146
default void onFreePitContext(ReaderContext readerContext) {}
147147

148+
/**
149+
* Executed when a shard goes from idle to non-idle state
150+
*/
151+
default void onSearchIdleReactivation() {}
152+
148153
/**
149154
* A Composite listener that multiplexes calls to each of the listeners methods.
150155
*/
@@ -310,5 +315,16 @@ public void onFreePitContext(ReaderContext readerContext) {
310315
}
311316
}
312317
}
318+
319+
@Override
320+
public void onSearchIdleReactivation() {
321+
for (SearchOperationListener listener : listeners) {
322+
try {
323+
listener.onSearchIdleReactivation();
324+
} catch (Exception e) {
325+
logger.warn(() -> new ParameterizedMessage("onNewSearchIdleReactivation listener [{}] failed", listener), e);
326+
}
327+
}
328+
}
313329
}
314330
}

server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java

+5
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,10 @@ protected Table getTableWithHeader(final RestRequest request) {
253253
"search.point_in_time_total",
254254
"alias:spto,searchPointInTimeTotal;default:false;text-align:right;desc:completed point in time contexts"
255255
);
256+
table.addCell(
257+
"search.search_idle_reactivate_count_total",
258+
"alias:ssirct,searchSearchIdleReactivateCountTotal;default:false;text-align:right;desc:number of times a shard reactivated"
259+
);
256260

257261
table.addCell("segments.count", "alias:sc,segmentsCount;default:false;text-align:right;desc:number of segments");
258262
table.addCell("segments.memory", "alias:sm,segmentsMemory;default:false;text-align:right;desc:memory used by segments");
@@ -427,6 +431,7 @@ Table buildTable(RestRequest request, ClusterStateResponse state, IndicesStatsRe
427431
table.addCell(getOrNull(commonStats, CommonStats::getSearch, i -> i.getTotal().getPitCurrent()));
428432
table.addCell(getOrNull(commonStats, CommonStats::getSearch, i -> i.getTotal().getPitTime()));
429433
table.addCell(getOrNull(commonStats, CommonStats::getSearch, i -> i.getTotal().getPitCount()));
434+
table.addCell(getOrNull(commonStats, CommonStats::getSearch, i -> i.getTotal().getSearchIdleReactivateCount()));
430435

431436
table.addCell(getOrNull(commonStats, CommonStats::getSegments, SegmentsStats::getCount));
432437
table.addCell(getOrNull(commonStats, CommonStats::getSegments, SegmentsStats::getZeroMemory));

server/src/test/java/org/opensearch/index/search/stats/SearchStatsTests.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ public void testShardLevelSearchGroupStats() throws Exception {
5757
// let's create two dummy search stats with groups
5858
Map<String, Stats> groupStats1 = new HashMap<>();
5959
Map<String, Stats> groupStats2 = new HashMap<>();
60-
groupStats2.put("group1", new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1));
61-
SearchStats searchStats1 = new SearchStats(new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 0, groupStats1);
62-
SearchStats searchStats2 = new SearchStats(new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 0, groupStats2);
60+
groupStats2.put("group1", new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1));
61+
SearchStats searchStats1 = new SearchStats(new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 0, groupStats1);
62+
SearchStats searchStats2 = new SearchStats(new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 0, groupStats2);
6363

6464
// adding these two search stats and checking group stats are correct
6565
searchStats1.add(searchStats2);
@@ -128,6 +128,7 @@ private static void assertStats(Stats stats, long equalTo) {
128128
assertEquals(equalTo, stats.getSuggestCount());
129129
assertEquals(equalTo, stats.getSuggestTimeInMillis());
130130
assertEquals(equalTo, stats.getSuggestCurrent());
131+
assertEquals(equalTo, stats.getSearchIdleReactivateCount());
131132
// avg_concurrency is not summed up across stats
132133
assertEquals(1, stats.getConcurrentAvgSliceCount(), 0);
133134
}

0 commit comments

Comments
 (0)