Skip to content

Commit 57b684c

Browse files
Bikramjeet Vigfacebook-github-bot
Bikramjeet Vig
authored andcommitted
Ensure memory reservation before shrinking cache and allocating (facebookincubator#9136)
Summary: Currently, the following allocation APIs in memory allocator, namely allocateNonContiguous, allocateContiguous and growContiguous attempt to shrink data cache (if enough allocated memory is not available) before attempting to increase reservation from caller's respective memory pool and finally doing the actual allocation. Motivation: The cache shrinking process used during allocation attempts is currently lock-free and opportunistic. This means that a memory chunk cleared by one thread can be seized by another thread attempting to allocate. We aim to implement a mechanism that will serialize the release and capture of this chunk for a single thread if multiple previous attempts have been unsuccessful. This will make the process more deterministic for a single thread. However, the current implementation presents a challenge. If a thread holds a lock inside the cache and then attempts a reservation, the reservation can trigger an arbitration. If the arbitration selects a query to terminate but one of its drivers is waiting on the cache lock, it will never go off thread, resulting in a deadlock. To avoid this, we propose attempting reservation before trying to shrink the cache, thereby keeping their locking mechanisms independent. Also, if the reservation fails, it can lead to unnecessary clearing of cached memory. Reviewed By: xiaoxmeng Differential Revision: D55034349
1 parent 8f0adb1 commit 57b684c

6 files changed

+181
-275
lines changed

velox/common/memory/MallocAllocator.cpp

+21-96
Original file line numberDiff line numberDiff line change
@@ -47,65 +47,36 @@ MallocAllocator::~MallocAllocator() {
4747
}
4848

4949
bool MallocAllocator::allocateNonContiguousWithoutRetry(
50-
MachinePageCount numPages,
51-
Allocation& out,
52-
ReservationCallback reservationCB,
53-
MachinePageCount minSizeClass) {
54-
const uint64_t freedBytes = freeNonContiguous(out);
55-
if (numPages == 0) {
56-
if (freedBytes != 0 && reservationCB != nullptr) {
57-
reservationCB(freedBytes, false);
58-
}
50+
const SizeMix& sizeMix,
51+
Allocation& out) {
52+
freeNonContiguous(out);
53+
if (sizeMix.totalPages == 0) {
5954
return true;
6055
}
61-
const SizeMix mix = allocationSize(numPages, minSizeClass);
62-
const auto totalBytes = AllocationTraits::pageBytes(mix.totalPages);
56+
const auto totalBytes = AllocationTraits::pageBytes(sizeMix.totalPages);
6357
if (testingHasInjectedFailure(InjectedFailure::kCap) ||
6458
!incrementUsage(totalBytes)) {
65-
if (freedBytes != 0 && reservationCB != nullptr) {
66-
reservationCB(freedBytes, false);
67-
}
6859
const auto errorMsg = fmt::format(
69-
"Exceeded memory allocator limit when allocating {} new pages for "
70-
"total allocation of {} pages, the memory allocator capacity is {}",
71-
mix.totalPages,
72-
numPages,
60+
"Exceeded memory allocator limit when allocating {} new pages"
61+
", the memory allocator capacity is {}",
62+
sizeMix.totalPages,
7363
succinctBytes(capacity_));
7464
VELOX_MEM_LOG_EVERY_MS(WARNING, 1000) << errorMsg;
7565
setAllocatorFailureMessage(errorMsg);
7666
return false;
7767
}
7868

79-
uint64_t bytesToAllocate = 0;
80-
if (reservationCB != nullptr) {
81-
bytesToAllocate = AllocationTraits::pageBytes(mix.totalPages) - freedBytes;
82-
try {
83-
reservationCB(bytesToAllocate, true);
84-
} catch (std::exception&) {
85-
VELOX_MEM_LOG(WARNING)
86-
<< "Failed to reserve " << succinctBytes(bytesToAllocate)
87-
<< " for non-contiguous allocation of " << numPages
88-
<< " pages, then release " << succinctBytes(freedBytes)
89-
<< " from the old allocation";
90-
// If the new memory reservation fails, we need to release the memory
91-
// reservation of the freed memory of previously allocation.
92-
reservationCB(freedBytes, false);
93-
decrementUsage(totalBytes);
94-
std::rethrow_exception(std::current_exception());
95-
}
96-
}
97-
9869
std::vector<void*> buffers;
99-
buffers.reserve(mix.numSizes);
100-
for (int32_t i = 0; i < mix.numSizes; ++i) {
70+
buffers.reserve(sizeMix.numSizes);
71+
for (int32_t i = 0; i < sizeMix.numSizes; ++i) {
10172
MachinePageCount numSizeClassPages =
102-
mix.sizeCounts[i] * sizeClassSizes_[mix.sizeIndices[i]];
73+
sizeMix.sizeCounts[i] * sizeClassSizes_[sizeMix.sizeIndices[i]];
10374
void* ptr = nullptr;
10475
// Trigger allocation failure by skipping malloc
10576
if (!testingHasInjectedFailure(InjectedFailure::kAllocate)) {
10677
stats_.recordAllocate(
107-
AllocationTraits::pageBytes(sizeClassSizes_[mix.sizeIndices[i]]),
108-
mix.sizeCounts[i],
78+
AllocationTraits::pageBytes(sizeClassSizes_[sizeMix.sizeIndices[i]]),
79+
sizeMix.sizeCounts[i],
10980
[&]() {
11081
ptr = ::malloc(
11182
AllocationTraits::pageBytes(numSizeClassPages)); // NOLINT
@@ -117,7 +88,7 @@ bool MallocAllocator::allocateNonContiguousWithoutRetry(
11788
"Malloc failed to allocate {} of memory while allocating for "
11889
"non-contiguous allocation of {} pages",
11990
succinctBytes(AllocationTraits::pageBytes(numSizeClassPages)),
120-
numPages);
91+
sizeMix.totalPages);
12192
VELOX_MEM_LOG(WARNING) << errorMsg;
12293
setAllocatorFailureMessage(errorMsg);
12394
break;
@@ -126,40 +97,33 @@ bool MallocAllocator::allocateNonContiguousWithoutRetry(
12697
out.append(reinterpret_cast<uint8_t*>(ptr), numSizeClassPages); // NOLINT
12798
}
12899

129-
if (buffers.size() != mix.numSizes) {
100+
if (buffers.size() != sizeMix.numSizes) {
130101
// Failed to allocate memory using malloc. Free any malloced pages and
131102
// return false.
132103
for (auto* buffer : buffers) {
133104
::free(buffer);
134105
}
135106
out.clear();
136-
if (reservationCB != nullptr) {
137-
VELOX_MEM_LOG(WARNING)
138-
<< "Failed to allocate memory for non-contiguous allocation of "
139-
<< numPages << " pages, then release "
140-
<< succinctBytes(bytesToAllocate + freedBytes)
141-
<< " of memory reservation including the old allocation";
142-
reservationCB(bytesToAllocate + freedBytes, false);
143-
}
107+
VELOX_MEM_LOG(WARNING)
108+
<< "Failed to allocate memory for non-contiguous allocation of "
109+
<< sizeMix.totalPages << " pages";
144110
decrementUsage(totalBytes);
145111
return false;
146112
}
147113

148114
// Successfully allocated all pages.
149-
numAllocated_.fetch_add(mix.totalPages);
115+
numAllocated_.fetch_add(sizeMix.totalPages);
150116
return true;
151117
}
152118

153119
bool MallocAllocator::allocateContiguousWithoutRetry(
154120
MachinePageCount numPages,
155121
Allocation* collateral,
156122
ContiguousAllocation& allocation,
157-
ReservationCallback reservationCB,
158123
MachinePageCount maxPages) {
159124
bool result;
160125
stats_.recordAllocate(AllocationTraits::pageBytes(numPages), 1, [&]() {
161-
result = allocateContiguousImpl(
162-
numPages, collateral, allocation, reservationCB, maxPages);
126+
result = allocateContiguousImpl(numPages, collateral, allocation, maxPages);
163127
});
164128
return result;
165129
}
@@ -168,7 +132,6 @@ bool MallocAllocator::allocateContiguousImpl(
168132
MachinePageCount numPages,
169133
Allocation* collateral,
170134
ContiguousAllocation& allocation,
171-
ReservationCallback reservationCB,
172135
MachinePageCount maxPages) {
173136
if (maxPages == 0) {
174137
maxPages = numPages;
@@ -192,23 +155,13 @@ bool MallocAllocator::allocateContiguousImpl(
192155
decrementUsage(AllocationTraits::pageBytes(numContiguousCollateralPages));
193156
allocation.clear();
194157
}
195-
const auto totalCollateralPages =
196-
numCollateralPages + numContiguousCollateralPages;
197-
const auto totalCollateralBytes =
198-
AllocationTraits::pageBytes(totalCollateralPages);
199158
if (numPages == 0) {
200-
if (totalCollateralBytes != 0 && reservationCB != nullptr) {
201-
reservationCB(totalCollateralBytes, false);
202-
}
203159
return true;
204160
}
205161

206162
const auto totalBytes = AllocationTraits::pageBytes(numPages);
207163
if (testingHasInjectedFailure(InjectedFailure::kCap) ||
208164
!incrementUsage(totalBytes)) {
209-
if (totalCollateralBytes != 0 && reservationCB != nullptr) {
210-
reservationCB(totalCollateralBytes, false);
211-
}
212165
const auto errorMsg = fmt::format(
213166
"Exceeded memory allocator limit when allocating {} new pages, the "
214167
"memory allocator capacity is {}",
@@ -218,23 +171,6 @@ bool MallocAllocator::allocateContiguousImpl(
218171
VELOX_MEM_LOG_EVERY_MS(WARNING, 1000) << errorMsg;
219172
return false;
220173
}
221-
const int64_t numNeededPages = numPages - totalCollateralPages;
222-
if (reservationCB != nullptr) {
223-
try {
224-
reservationCB(AllocationTraits::pageBytes(numNeededPages), true);
225-
} catch (std::exception&) {
226-
// If the new memory reservation fails, we need to release the memory
227-
// reservation of the freed contiguous and non-contiguous memory.
228-
VELOX_MEM_LOG(WARNING)
229-
<< "Failed to reserve " << AllocationTraits::pageBytes(numNeededPages)
230-
<< " bytes for contiguous allocation of " << numPages
231-
<< " pages, then release " << succinctBytes(totalCollateralBytes)
232-
<< " from the old allocations";
233-
reservationCB(totalCollateralBytes, false);
234-
decrementUsage(totalBytes);
235-
std::rethrow_exception(std::current_exception());
236-
}
237-
}
238174
numAllocated_.fetch_add(numPages);
239175
numMapped_.fetch_add(numPages);
240176
void* data = ::mmap(
@@ -300,15 +236,7 @@ void MallocAllocator::freeContiguousImpl(ContiguousAllocation& allocation) {
300236

301237
bool MallocAllocator::growContiguousWithoutRetry(
302238
MachinePageCount increment,
303-
ContiguousAllocation& allocation,
304-
ReservationCallback reservationCB) {
305-
VELOX_CHECK_LE(
306-
allocation.size() + increment * AllocationTraits::kPageSize,
307-
allocation.maxSize());
308-
if (reservationCB != nullptr) {
309-
// May throw. If does, there is nothing to revert.
310-
reservationCB(AllocationTraits::pageBytes(increment), true);
311-
}
239+
ContiguousAllocation& allocation) {
312240
if (!incrementUsage(AllocationTraits::pageBytes(increment))) {
313241
const auto errorMsg = fmt::format(
314242
"Exceeded memory allocator limit when allocating {} new pages for "
@@ -318,9 +246,6 @@ bool MallocAllocator::growContiguousWithoutRetry(
318246
succinctBytes(capacity_));
319247
setAllocatorFailureMessage(errorMsg);
320248
VELOX_MEM_LOG_EVERY_MS(WARNING, 1000) << errorMsg;
321-
if (reservationCB != nullptr) {
322-
reservationCB(AllocationTraits::pageBytes(increment), false);
323-
}
324249
return false;
325250
}
326251
numAllocated_ += increment;

velox/common/memory/MallocAllocator.h

+3-8
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ class MallocAllocator : public MemoryAllocator {
5555

5656
bool growContiguousWithoutRetry(
5757
MachinePageCount increment,
58-
ContiguousAllocation& allocation,
59-
ReservationCallback reservationCB = nullptr) override;
58+
ContiguousAllocation& allocation) override;
6059

6160
void freeBytes(void* p, uint64_t bytes) noexcept override;
6261

@@ -84,23 +83,19 @@ class MallocAllocator : public MemoryAllocator {
8483

8584
private:
8685
bool allocateNonContiguousWithoutRetry(
87-
MachinePageCount numPages,
88-
Allocation& out,
89-
ReservationCallback reservationCB = nullptr,
90-
MachinePageCount minSizeClass = 0) override;
86+
const SizeMix& sizeMix,
87+
Allocation& out) override;
9188

9289
bool allocateContiguousWithoutRetry(
9390
MachinePageCount numPages,
9491
Allocation* collateral,
9592
ContiguousAllocation& allocation,
96-
ReservationCallback reservationCB = nullptr,
9793
MachinePageCount maxPages = 0) override;
9894

9995
bool allocateContiguousImpl(
10096
MachinePageCount numPages,
10197
Allocation* collateral,
10298
ContiguousAllocation& allocation,
103-
ReservationCallback reservationCB,
10499
MachinePageCount maxPages);
105100

106101
void freeContiguousImpl(ContiguousAllocation& allocation);

0 commit comments

Comments
 (0)