Skip to content

Commit 2d37fcb

Browse files
committed
Fix and extend arbitration related metrics
Fixes accounting of kMetricArbitratorLocalArbitrationCountwhich was previously sometimes incremented for global arbitration. Also adds additional operator level metrics for keeping track of global and local arbitration attempts initiated by them.
1 parent 41bed84 commit 2d37fcb

File tree

4 files changed

+32
-7
lines changed

4 files changed

+32
-7
lines changed

velox/common/memory/Memory.h

+1
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ class MemoryManager {
314314
std::vector<std::shared_ptr<MemoryPool>> sharedLeafPools_;
315315

316316
mutable folly::SharedMutex mutex_;
317+
// All root pools allocated from 'this'.
317318
std::unordered_map<std::string, std::weak_ptr<MemoryPool>> pools_;
318319
};
319320

velox/common/memory/SharedArbitrator.cpp

+12-4
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ bool SharedArbitrator::ensureCapacity(
348348
if (checkCapacityGrowth(*requestor, targetBytes)) {
349349
return true;
350350
}
351-
const uint64_t reclaimedBytes = reclaim(requestor, targetBytes);
351+
const uint64_t reclaimedBytes = reclaim(requestor, targetBytes, true);
352352
// NOTE: return the reclaimed bytes back to the arbitrator and let the memory
353353
// arbitration process to grow the requestor's memory capacity accordingly.
354354
incrementFreeCapacity(reclaimedBytes);
@@ -427,6 +427,8 @@ bool SharedArbitrator::arbitrateMemory(
427427

428428
VELOX_CHECK_LT(freedBytes, growTarget);
429429
RECORD_METRIC_VALUE(kMetricArbitratorGlobalArbitrationCount);
430+
addThreadLocalRuntimeStat(
431+
"globalArbitrationCount", RuntimeCounter(1, RuntimeCounter::Unit::kNone));
430432
freedBytes += reclaimUsedMemoryFromCandidatesBySpill(
431433
requestor, candidates, growTarget - freedBytes);
432434
if (requestor->aborted()) {
@@ -494,7 +496,7 @@ uint64_t SharedArbitrator::reclaimUsedMemoryFromCandidatesBySpill(
494496
const int64_t bytesToReclaim = std::max<int64_t>(
495497
targetBytes - freedBytes, memoryPoolTransferCapacity_);
496498
VELOX_CHECK_GT(bytesToReclaim, 0);
497-
freedBytes += reclaim(candidate.pool, bytesToReclaim);
499+
freedBytes += reclaim(candidate.pool, bytesToReclaim, false);
498500
if ((freedBytes >= targetBytes) ||
499501
(requestor != nullptr && requestor->aborted())) {
500502
break;
@@ -531,7 +533,8 @@ uint64_t SharedArbitrator::reclaimUsedMemoryFromCandidatesByAbort(
531533

532534
uint64_t SharedArbitrator::reclaim(
533535
MemoryPool* pool,
534-
uint64_t targetBytes) noexcept {
536+
uint64_t targetBytes,
537+
bool isLocalArbitration) noexcept {
535538
uint64_t reclaimDurationUs{0};
536539
uint64_t reclaimedBytes{0};
537540
uint64_t freedBytes{0};
@@ -542,7 +545,12 @@ uint64_t SharedArbitrator::reclaim(
542545
try {
543546
freedBytes = pool->shrink(targetBytes);
544547
if (freedBytes < targetBytes) {
545-
RECORD_METRIC_VALUE(kMetricArbitratorLocalArbitrationCount);
548+
if (isLocalArbitration) {
549+
RECORD_METRIC_VALUE(kMetricArbitratorLocalArbitrationCount);
550+
addThreadLocalRuntimeStat(
551+
"localArbitrationCount",
552+
RuntimeCounter(1, RuntimeCounter::Unit::kNone));
553+
}
546554
pool->reclaim(
547555
targetBytes - freedBytes, memoryReclaimWaitMs_, reclaimerStats);
548556
}

velox/common/memory/SharedArbitrator.h

+8-3
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,14 @@ class SharedArbitrator : public memory::MemoryArbitrator {
151151
std::vector<Candidate>& candidates,
152152
uint64_t targetBytes);
153153

154-
// Invoked to reclaim used memory from 'pool' with specified 'targetBytes'.
155-
// The function returns the actually freed capacity.
156-
uint64_t reclaim(MemoryPool* pool, uint64_t targetBytes) noexcept;
154+
// Invoked to reclaim used memory from 'targetPool' with specified
155+
// 'targetBytes'. The function returns the actually freed capacity.
156+
// 'isLocalArbitration' is true when the reclaim attempt is within a local
157+
// arbitration.
158+
uint64_t reclaim(
159+
MemoryPool* targetPool,
160+
uint64_t targetBytes,
161+
bool isLocalArbitration) noexcept;
157162

158163
// Invoked to abort memory 'pool'.
159164
void abort(MemoryPool* pool, const std::exception_ptr& error);

velox/docs/monitoring/stats.rst

+11
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@ These stats are reported by all operators.
4141
- bytes
4242
- The reclaimed memory bytes of an operator during the memory arbitration.
4343
This stats only applies for spillable operators.
44+
* - globalArbitrationCount
45+
-
46+
- The number of times a request for more memory hit the arbitrator's
47+
capacity limit and initiated a global arbitration attempt where
48+
memory is reclaimed from viable candidates chosen among all running
49+
queries based on a criterion.
50+
* - localArbitrationCount
51+
-
52+
- The number of times a request for more memory hit the query memory
53+
limit and initiated a local arbitration attempt where memory is
54+
reclaimed from the requestor itself.
4455

4556
HashBuild, HashAggregation
4657
--------------------------

0 commit comments

Comments
 (0)