-
Notifications
You must be signed in to change notification settings - Fork 109
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implemenation of a range that gradually releases memory back to the OS. It quickly pulls memory, but the dealloc_range locally caches the memory and uses Pal timers to release it back to the next level range when sufficient time has passed. TODO: codify that parent range needs to be concurrency safe.
- Loading branch information
Showing
2 changed files
with
298 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,290 @@ | ||
#pragma once | ||
|
||
#include "../ds/ptrwrap.h" | ||
#include "../pal/pal_ds.h" | ||
#include "largebuddyrange.h" | ||
|
||
namespace snmalloc | ||
{ | ||
template<typename Rep> | ||
class PagemapList | ||
{ | ||
uintptr_t head = 0; | ||
|
||
PagemapList(uintptr_t head) : head(head) {} | ||
|
||
public: | ||
constexpr PagemapList() = default; | ||
|
||
bool is_empty() const | ||
{ | ||
return head == 0; | ||
} | ||
|
||
PagemapList get_next() | ||
{ | ||
SNMALLOC_ASSERT(!is_empty()); | ||
auto next_field = &(Rep::ref(false, head)); | ||
auto next = Rep::get(next_field); | ||
return {next}; | ||
} | ||
|
||
capptr::Chunk<void> get_capability() | ||
{ | ||
return capptr::Chunk<void>(reinterpret_cast<void*>(head)); | ||
} | ||
|
||
PagemapList cons(capptr::Chunk<void> new_head_cap) | ||
{ | ||
auto new_head = new_head_cap.unsafe_uintptr(); | ||
auto field = &(Rep::ref(false, new_head)); | ||
Rep::set(field, head); | ||
return {new_head}; | ||
} | ||
}; | ||
|
||
/** | ||
* Concurrent Stack | ||
* | ||
* This stack supports the following clients | ||
* (push|pop)* || pop_all* || ... || pop_all* | ||
* | ||
* That is a single thread that can do push and pop, and other threads | ||
* that do pop_all. pop_all if it returns a value, returns all of the | ||
* stack, however, it may return nullptr if it races with either a push | ||
* or a pop. | ||
* | ||
* The primary use case is single-threaded access, where other threads | ||
* can attempt to steal all the values. | ||
*/ | ||
template<typename Rep> | ||
class PagemapStack | ||
{ | ||
static constexpr auto empty = PagemapList<Rep>{}; | ||
|
||
private: | ||
alignas(CACHELINE_SIZE) std::atomic<PagemapList<Rep>> stack{}; | ||
|
||
PagemapList<Rep> take() | ||
{ | ||
if (stack.load(std::memory_order_relaxed).is_empty()) | ||
return empty; | ||
return stack.exchange(empty, std::memory_order_acquire); | ||
} | ||
|
||
void replace(PagemapList<Rep> new_head) | ||
{ | ||
SNMALLOC_ASSERT(stack.load().is_empty()); | ||
stack.store(new_head, std::memory_order_release); | ||
} | ||
|
||
public: | ||
constexpr PagemapStack() = default; | ||
|
||
void push(capptr::Chunk<void> new_head_cap) | ||
{ | ||
auto old_head = take(); | ||
auto new_head = old_head.cons(new_head_cap); | ||
replace(new_head); | ||
} | ||
|
||
capptr::Chunk<void> pop() | ||
{ | ||
auto old_head = take(); | ||
if (old_head.is_empty()) | ||
return nullptr; | ||
|
||
auto next = old_head.get_next(); | ||
auto result = old_head.get_capability(); | ||
|
||
replace(next); | ||
return result; | ||
} | ||
|
||
PagemapList<Rep> pop_all() | ||
{ | ||
return take(); | ||
} | ||
}; | ||
|
||
template<typename ParentRange, typename PAL, typename Pagemap> | ||
class DecayRange | ||
{ | ||
typename ParentRange::State parent{}; | ||
|
||
/** | ||
* How many slab sizes that can be provided. | ||
*/ | ||
static constexpr size_t NUM_SLAB_SIZES = Pal::address_bits - MIN_CHUNK_BITS; | ||
|
||
/** | ||
* Number of free stacks per chunk size that each allocator will use. | ||
* For performance ideally a power of 2. We will return to the central | ||
* pool anything that has not be used in the last NUM_EPOCHS - 1, where | ||
* each epoch is separated by DecayMemoryTimerObject::PERIOD. | ||
* I.e. if period is 500ms and num of epochs is 4, then we will return to | ||
* the central pool anything not used for the last 1500-2000ms. | ||
*/ | ||
static constexpr size_t NUM_EPOCHS = 4; | ||
static_assert(bits::is_pow2(NUM_EPOCHS), "Code assumes power of two."); | ||
|
||
/** | ||
* Stack of ranges that have been returned for reuse. | ||
*/ | ||
ModArray< | ||
NUM_SLAB_SIZES, | ||
ModArray<NUM_EPOCHS, PagemapStack<BuddyChunkRep<Pagemap>>>> | ||
chunk_stack; | ||
|
||
/** | ||
* Which is the current epoch to place dealloced chunks, and the | ||
* first place we look for allocating chunks. | ||
*/ | ||
static inline // alignas(CACHELINE_SIZE) | ||
std::atomic<size_t> | ||
epoch{0}; | ||
|
||
/** | ||
* Flag to ensure one-shot registration with the PAL. | ||
*/ | ||
static inline std::atomic_bool registered_timer{false}; | ||
|
||
std::atomic_bool registered_local{false}; | ||
|
||
/** | ||
* All activated DecayRanges. | ||
*/ | ||
static inline std::atomic<DecayRange*> all_local{nullptr}; | ||
|
||
DecayRange* next{nullptr}; | ||
|
||
static void handle_decay_tick() | ||
{ | ||
auto new_epoch = (epoch + 1) % NUM_EPOCHS; | ||
// Flush old index for all threads. | ||
auto curr = all_local.load(std::memory_order_acquire); | ||
while (curr != nullptr) | ||
{ | ||
for (size_t sc = 0; sc < NUM_SLAB_SIZES; sc++) | ||
{ | ||
// Don't use ChunkRecord, store in pagemap. | ||
auto old_stack = curr->chunk_stack[sc][new_epoch].pop_all(); | ||
while (!old_stack.is_empty()) | ||
{ | ||
auto next = old_stack.get_next(); | ||
|
||
curr->parent->dealloc_range( | ||
old_stack.get_capability(), MIN_CHUNK_SIZE << sc); | ||
|
||
old_stack = next; | ||
} | ||
} | ||
curr = curr->next; | ||
} | ||
|
||
// Advance current index | ||
epoch = new_epoch; | ||
} | ||
|
||
class DecayMemoryTimerObject : public PalTimerObject | ||
{ | ||
/*** | ||
* Method for callback object to perform lazy decommit. | ||
*/ | ||
static void process(PalTimerObject*) | ||
{ | ||
handle_decay_tick(); | ||
} | ||
|
||
// Specify that we notify the ChunkAllocator every 500ms. | ||
static constexpr size_t PERIOD = 500; | ||
|
||
public: | ||
constexpr DecayMemoryTimerObject() : PalTimerObject(&process, PERIOD) {} | ||
}; | ||
|
||
static inline DecayMemoryTimerObject timer_object; | ||
|
||
public: | ||
class State | ||
{ | ||
DecayRange commit_range{}; | ||
|
||
public: | ||
constexpr State() = default; | ||
|
||
DecayRange* operator->() | ||
{ | ||
return &commit_range; | ||
} | ||
}; | ||
|
||
static constexpr bool Aligned = ParentRange::Aligned; | ||
|
||
constexpr DecayRange() = default; | ||
|
||
capptr::Chunk<void> alloc_range(size_t size) | ||
{ | ||
// Check local cache | ||
|
||
if constexpr (pal_supports<Time, PAL>) | ||
{ | ||
auto slab_sizeclass = bits::next_pow2_bits(size) - MIN_CHUNK_BITS; | ||
// Try local cache of chunks first | ||
for (size_t e = 0; e < NUM_EPOCHS; e++) | ||
{ | ||
auto p = chunk_stack[slab_sizeclass][(epoch - e) % NUM_EPOCHS].pop(); | ||
|
||
if (p != nullptr) | ||
return p; | ||
} | ||
} | ||
|
||
capptr::Chunk<void> result; | ||
for (auto i = NUM_EPOCHS + 2; i > 0; i--) | ||
{ | ||
// Nothing in local cache to allocate from parent. | ||
result = parent->alloc_range(size); | ||
if (result != nullptr) | ||
{ | ||
return result; | ||
} | ||
|
||
// We have run out of memory. | ||
handle_decay_tick(); // Try to free some memory. | ||
} | ||
|
||
return result; | ||
} | ||
|
||
void dealloc_range(capptr::Chunk<void> base, size_t size) | ||
{ | ||
if constexpr (!pal_supports<Time, PAL>) | ||
{ | ||
parent->dealloc_range(base, size); | ||
return; | ||
} | ||
|
||
if (!registered_timer.exchange(true)) | ||
{ | ||
// Register with the PAL. | ||
PAL::register_timer(&timer_object); | ||
} | ||
|
||
// Check we have registered | ||
if (!registered_local.exchange(true)) | ||
{ | ||
// Add to the list of local states. | ||
auto* head = all_local.load(); | ||
do | ||
{ | ||
next = head; | ||
} while (!all_local.compare_exchange_strong(head, this)); | ||
} | ||
|
||
auto slab_sizeclass = bits::next_pow2_bits(size) - MIN_CHUNK_BITS; | ||
// Add to local cache. | ||
chunk_stack[slab_sizeclass][epoch].push(base); | ||
} | ||
}; | ||
} // namespace snmalloc |