Skip to content

Commit

Permalink
WIP: DecayRange
Browse files Browse the repository at this point in the history
Implemenation of a range that gradually releases memory back to
the OS. It quickly pulls memory, but the dealloc_range locally caches
the memory and uses Pal timers to release it back to the next level
range when sufficient time has passed.

TODO:
  codify that parent range needs to be concurrency safe.
  • Loading branch information
mjp41 committed Mar 21, 2022
1 parent 6ad7f65 commit f7e897a
Show file tree
Hide file tree
Showing 2 changed files with 298 additions and 4 deletions.
12 changes: 8 additions & 4 deletions src/backend/backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "chunkallocator.h"
#include "commitrange.h"
#include "commonconfig.h"
#include "decayrange.h"
#include "empty_range.h"
#include "globalrange.h"
#include "largebuddyrange.h"
Expand Down Expand Up @@ -144,9 +145,10 @@ namespace snmalloc
using GlobalR = GlobalRange<StatsR>;

# ifdef SNMALLOC_META_PROTECTED
using CommittedRange =
DecayRange<CommitRange<GlobalR, DefaultPal>, DefaultPal, Pagemap>;
// Source for object allocations
using ObjectRange =
LargeBuddyRange<CommitRange<GlobalR, DefaultPal>, 21, 21, Pagemap>;
using ObjectRange = LargeBuddyRange<CommittedRange, 21, 21, Pagemap>;
// Set up protected range for metadata
using SubR = CommitRange<SubRange<GlobalR, DefaultPal, 6>, DefaultPal>;
using MetaRange =
Expand All @@ -155,8 +157,10 @@ namespace snmalloc
# else
// Source for object allocations and metadata
// No separation between the two
using ObjectRange = SmallBuddyRange<
LargeBuddyRange<CommitRange<GlobalR, DefaultPal>, 21, 21, Pagemap>>;
using CommittedRange =
DecayRange<CommitRange<GlobalR, DefaultPal>, DefaultPal, Pagemap>;
using ObjectRange =
SmallBuddyRange<LargeBuddyRange<CommittedRange, 21, 21, Pagemap>>;
using GlobalMetaRange = GlobalRange<ObjectRange>;
# endif
#endif
Expand Down
290 changes: 290 additions & 0 deletions src/backend/decayrange.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
#pragma once

#include "../ds/ptrwrap.h"
#include "../pal/pal_ds.h"
#include "largebuddyrange.h"

namespace snmalloc
{
template<typename Rep>
class PagemapList
{
uintptr_t head = 0;

PagemapList(uintptr_t head) : head(head) {}

public:
constexpr PagemapList() = default;

bool is_empty() const
{
return head == 0;
}

PagemapList get_next()
{
SNMALLOC_ASSERT(!is_empty());
auto next_field = &(Rep::ref(false, head));
auto next = Rep::get(next_field);
return {next};
}

capptr::Chunk<void> get_capability()
{
return capptr::Chunk<void>(reinterpret_cast<void*>(head));
}

PagemapList cons(capptr::Chunk<void> new_head_cap)
{
auto new_head = new_head_cap.unsafe_uintptr();
auto field = &(Rep::ref(false, new_head));
Rep::set(field, head);
return {new_head};
}
};

/**
* Concurrent Stack
*
* This stack supports the following clients
* (push|pop)* || pop_all* || ... || pop_all*
*
* That is a single thread that can do push and pop, and other threads
* that do pop_all. pop_all if it returns a value, returns all of the
* stack, however, it may return nullptr if it races with either a push
* or a pop.
*
* The primary use case is single-threaded access, where other threads
* can attempt to steal all the values.
*/
template<typename Rep>
class PagemapStack
{
static constexpr auto empty = PagemapList<Rep>{};

private:
alignas(CACHELINE_SIZE) std::atomic<PagemapList<Rep>> stack{};

PagemapList<Rep> take()
{
if (stack.load(std::memory_order_relaxed).is_empty())
return empty;
return stack.exchange(empty, std::memory_order_acquire);
}

void replace(PagemapList<Rep> new_head)
{
SNMALLOC_ASSERT(stack.load().is_empty());
stack.store(new_head, std::memory_order_release);
}

public:
constexpr PagemapStack() = default;

void push(capptr::Chunk<void> new_head_cap)
{
auto old_head = take();
auto new_head = old_head.cons(new_head_cap);
replace(new_head);
}

capptr::Chunk<void> pop()
{
auto old_head = take();
if (old_head.is_empty())
return nullptr;

auto next = old_head.get_next();
auto result = old_head.get_capability();

replace(next);
return result;
}

PagemapList<Rep> pop_all()
{
return take();
}
};

template<typename ParentRange, typename PAL, typename Pagemap>
class DecayRange
{
typename ParentRange::State parent{};

/**
* How many slab sizes that can be provided.
*/
static constexpr size_t NUM_SLAB_SIZES = Pal::address_bits - MIN_CHUNK_BITS;

/**
* Number of free stacks per chunk size that each allocator will use.
* For performance ideally a power of 2. We will return to the central
* pool anything that has not be used in the last NUM_EPOCHS - 1, where
* each epoch is separated by DecayMemoryTimerObject::PERIOD.
* I.e. if period is 500ms and num of epochs is 4, then we will return to
* the central pool anything not used for the last 1500-2000ms.
*/
static constexpr size_t NUM_EPOCHS = 4;
static_assert(bits::is_pow2(NUM_EPOCHS), "Code assumes power of two.");

/**
* Stack of ranges that have been returned for reuse.
*/
ModArray<
NUM_SLAB_SIZES,
ModArray<NUM_EPOCHS, PagemapStack<BuddyChunkRep<Pagemap>>>>
chunk_stack;

/**
* Which is the current epoch to place dealloced chunks, and the
* first place we look for allocating chunks.
*/
static inline // alignas(CACHELINE_SIZE)
std::atomic<size_t>
epoch{0};

/**
* Flag to ensure one-shot registration with the PAL.
*/
static inline std::atomic_bool registered_timer{false};

std::atomic_bool registered_local{false};

/**
* All activated DecayRanges.
*/
static inline std::atomic<DecayRange*> all_local{nullptr};

DecayRange* next{nullptr};

static void handle_decay_tick()
{
auto new_epoch = (epoch + 1) % NUM_EPOCHS;
// Flush old index for all threads.
auto curr = all_local.load(std::memory_order_acquire);
while (curr != nullptr)
{
for (size_t sc = 0; sc < NUM_SLAB_SIZES; sc++)
{
// Don't use ChunkRecord, store in pagemap.
auto old_stack = curr->chunk_stack[sc][new_epoch].pop_all();
while (!old_stack.is_empty())
{
auto next = old_stack.get_next();

curr->parent->dealloc_range(
old_stack.get_capability(), MIN_CHUNK_SIZE << sc);

old_stack = next;
}
}
curr = curr->next;
}

// Advance current index
epoch = new_epoch;
}

class DecayMemoryTimerObject : public PalTimerObject
{
/***
* Method for callback object to perform lazy decommit.
*/
static void process(PalTimerObject*)
{
handle_decay_tick();
}

// Specify that we notify the ChunkAllocator every 500ms.
static constexpr size_t PERIOD = 500;

public:
constexpr DecayMemoryTimerObject() : PalTimerObject(&process, PERIOD) {}
};

static inline DecayMemoryTimerObject timer_object;

public:
class State
{
DecayRange commit_range{};

public:
constexpr State() = default;

DecayRange* operator->()
{
return &commit_range;
}
};

static constexpr bool Aligned = ParentRange::Aligned;

constexpr DecayRange() = default;

capptr::Chunk<void> alloc_range(size_t size)
{
// Check local cache

if constexpr (pal_supports<Time, PAL>)
{
auto slab_sizeclass = bits::next_pow2_bits(size) - MIN_CHUNK_BITS;
// Try local cache of chunks first
for (size_t e = 0; e < NUM_EPOCHS; e++)
{
auto p = chunk_stack[slab_sizeclass][(epoch - e) % NUM_EPOCHS].pop();

if (p != nullptr)
return p;
}
}

capptr::Chunk<void> result;
for (auto i = NUM_EPOCHS + 2; i > 0; i--)
{
// Nothing in local cache to allocate from parent.
result = parent->alloc_range(size);
if (result != nullptr)
{
return result;
}

// We have run out of memory.
handle_decay_tick(); // Try to free some memory.
}

return result;
}

void dealloc_range(capptr::Chunk<void> base, size_t size)
{
if constexpr (!pal_supports<Time, PAL>)
{
parent->dealloc_range(base, size);
return;
}

if (!registered_timer.exchange(true))
{
// Register with the PAL.
PAL::register_timer(&timer_object);
}

// Check we have registered
if (!registered_local.exchange(true))
{
// Add to the list of local states.
auto* head = all_local.load();
do
{
next = head;
} while (!all_local.compare_exchange_strong(head, this));
}

auto slab_sizeclass = bits::next_pow2_bits(size) - MIN_CHUNK_BITS;
// Add to local cache.
chunk_stack[slab_sizeclass][epoch].push(base);
}
};
} // namespace snmalloc

0 comments on commit f7e897a

Please sign in to comment.