Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow hash table to dynamically shrink #107

Merged
merged 1 commit into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 97 additions & 75 deletions libCacheSim/dataStructure/hashtable/chainedHashTableV2.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
extern "C" {
#endif

#include "chainedHashTableV2.h"

#include <assert.h>
#include <errno.h>
#include <stdio.h>
Expand All @@ -34,20 +36,21 @@ extern "C" {
#include "../../include/libCacheSim/macro.h"
#include "../../utils/include/mymath.h"
#include "../hash/hash.h"
#include "chainedHashTableV2.h"

#define OBJ_EMPTY(cache_obj) ((cache_obj)->obj_size == 0)
#define NEXT_OBJ(cur_obj) (((cache_obj_t *)(cur_obj))->hash_next)


static void _copy_entries(hashtable_t *new_table, cache_obj_t **old_table, uint64_t old_size);
static void _chained_hashtable_shrink_v2(hashtable_t *hashtable);
static void _chained_hashtable_expand_v2(hashtable_t *hashtable);
static void print_hashbucket_item_distribution(const hashtable_t *hashtable);

/************************ helper func ************************/
/**
* get the last object in the hash bucket
*/
static inline cache_obj_t *_last_obj_in_bucket(const hashtable_t *hashtable,
const uint64_t hv) {
static inline cache_obj_t *_last_obj_in_bucket(const hashtable_t *hashtable, const uint64_t hv) {
cache_obj_t *cur_obj_in_bucket = hashtable->ptr_table[hv];
while (cur_obj_in_bucket->hash_next) {
cur_obj_in_bucket = cur_obj_in_bucket->hash_next;
Expand All @@ -56,10 +59,8 @@ static inline cache_obj_t *_last_obj_in_bucket(const hashtable_t *hashtable,
}

/* add an object to the hashtable */
static inline void add_to_bucket(hashtable_t *hashtable,
cache_obj_t *cache_obj) {
uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) &
hashmask(hashtable->hashpower);
static inline void add_to_table(hashtable_t *hashtable, cache_obj_t *cache_obj) {
uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) & hashmask(hashtable->hashpower);
if (hashtable->ptr_table[hv] == NULL) {
hashtable->ptr_table[hv] = cache_obj;
return;
Expand Down Expand Up @@ -92,9 +93,8 @@ hashtable_t *create_chained_hashtable_v2(const uint16_t hashpower) {
size_t size = sizeof(cache_obj_t *) * hashsize(hashtable->hashpower);
hashtable->ptr_table = my_malloc_n(cache_obj_t *, hashsize(hashpower));
if (hashtable->ptr_table == NULL) {
ERROR("allocate hash table %zu entry * %lu B = %ld MiB failed\n",
sizeof(cache_obj_t *), (unsigned long)(hashsize(hashpower)),
(long)(sizeof(cache_obj_t *) * hashsize(hashpower) / 1024 / 1024));
ERROR("allocate hash table %zu entry * %lu B = %ld MiB failed\n", sizeof(cache_obj_t *),
(unsigned long)(hashsize(hashpower)), (long)(sizeof(cache_obj_t *) * hashsize(hashpower) / 1024 / 1024));
exit(1);
}
memset(hashtable->ptr_table, 0, size);
Expand All @@ -108,8 +108,7 @@ hashtable_t *create_chained_hashtable_v2(const uint16_t hashpower) {
return hashtable;
}

cache_obj_t *chained_hashtable_find_obj_id_v2(const hashtable_t *hashtable,
const obj_id_t obj_id) {
cache_obj_t *chained_hashtable_find_obj_id_v2(const hashtable_t *hashtable, const obj_id_t obj_id) {
cache_obj_t *cache_obj = NULL;
uint64_t hv = get_hash_value_int_64(&obj_id);
hv = hv & hashmask(hashtable->hashpower);
Expand All @@ -124,49 +123,41 @@ cache_obj_t *chained_hashtable_find_obj_id_v2(const hashtable_t *hashtable,
return cache_obj;
}

cache_obj_t *chained_hashtable_find_v2(const hashtable_t *hashtable,
const request_t *req) {
cache_obj_t *chained_hashtable_find_v2(const hashtable_t *hashtable, const request_t *req) {
return chained_hashtable_find_obj_id_v2(hashtable, req->obj_id);
}

cache_obj_t *chained_hashtable_find_obj_v2(const hashtable_t *hashtable,
const cache_obj_t *obj_to_find) {
cache_obj_t *chained_hashtable_find_obj_v2(const hashtable_t *hashtable, const cache_obj_t *obj_to_find) {
return chained_hashtable_find_obj_id_v2(hashtable, obj_to_find->obj_id);
}

/* the user needs to make sure the added object is not in the hash table */
cache_obj_t *chained_hashtable_insert_v2(hashtable_t *hashtable,
const request_t *req) {
if (hashtable->n_obj > (uint64_t)(hashsize(hashtable->hashpower) *
CHAINED_HASHTABLE_EXPAND_THRESHOLD)) {
cache_obj_t *chained_hashtable_insert_v2(hashtable_t *hashtable, const request_t *req) {
if (hashtable->n_obj > (uint64_t)(hashsize(hashtable->hashpower) * CHAINED_HASHTABLE_EXPAND_THRESHOLD)) {
_chained_hashtable_expand_v2(hashtable);
}

cache_obj_t *new_cache_obj = create_cache_obj_from_request(req);
add_to_bucket(hashtable, new_cache_obj);
add_to_table(hashtable, new_cache_obj);
hashtable->n_obj += 1;
return new_cache_obj;
}

/* the user needs to make sure the added object is not in the hash table */
cache_obj_t *chained_hashtable_insert_obj_v2(hashtable_t *hashtable,
cache_obj_t *cache_obj) {
cache_obj_t *chained_hashtable_insert_obj_v2(hashtable_t *hashtable, cache_obj_t *cache_obj) {
DEBUG_ASSERT(hashtable->external_obj);
if (hashtable->n_obj > (uint64_t)(hashsize(hashtable->hashpower) *
CHAINED_HASHTABLE_EXPAND_THRESHOLD))
if (hashtable->n_obj > (uint64_t)(hashsize(hashtable->hashpower) * CHAINED_HASHTABLE_EXPAND_THRESHOLD))
_chained_hashtable_expand_v2(hashtable);

add_to_bucket(hashtable, cache_obj);
add_to_table(hashtable, cache_obj);
hashtable->n_obj += 1;
return cache_obj;
}

/* you need to free the extra_metadata before deleting from hash table */
void chained_hashtable_delete_v2(hashtable_t *hashtable,
cache_obj_t *cache_obj) {
void chained_hashtable_delete_v2(hashtable_t *hashtable, cache_obj_t *cache_obj) {
hashtable->n_obj -= 1;
uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) &
hashmask(hashtable->hashpower);
uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) & hashmask(hashtable->hashpower);
if (hashtable->ptr_table[hv] == cache_obj) {
hashtable->ptr_table[hv] = cache_obj->hash_next;
if (!hashtable->external_obj) free_cache_obj(cache_obj);
Expand All @@ -183,9 +174,8 @@ void chained_hashtable_delete_v2(hashtable_t *hashtable,

if (chain_len > max_chain_len) {
max_chain_len = chain_len;
WARN("hashtable remove %lu chain len %d, hashtable load %ld/%ld %lf\n",
(unsigned long)cache_obj->obj_id, max_chain_len,
(long)hashtable->n_obj, (long)hashsize(hashtable->hashpower),
WARN("hashtable remove %lu chain len %d, hashtable load %ld/%ld %lf\n", (unsigned long)cache_obj->obj_id,
max_chain_len, (long)hashtable->n_obj, (long)hashsize(hashtable->hashpower),
(double)hashtable->n_obj / hashsize(hashtable->hashpower));

print_hashbucket_item_distribution(hashtable);
Expand All @@ -199,12 +189,10 @@ void chained_hashtable_delete_v2(hashtable_t *hashtable,
}
}

bool chained_hashtable_try_delete_v2(hashtable_t *hashtable,
cache_obj_t *cache_obj) {
bool chained_hashtable_try_delete_v2(hashtable_t *hashtable, cache_obj_t *cache_obj) {
static int max_chain_len = 1;

uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) &
hashmask(hashtable->hashpower);
uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) & hashmask(hashtable->hashpower);
if (hashtable->ptr_table[hv] == cache_obj) {
hashtable->ptr_table[hv] = cache_obj->hash_next;
hashtable->n_obj -= 1;
Expand Down Expand Up @@ -261,8 +249,7 @@ bool chained_hashtable_try_delete_v2(hashtable_t *hashtable,
* @param obj_id [The object id to remove]
* @return [true or false]
*/
bool chained_hashtable_delete_obj_id_v2(hashtable_t *hashtable,
const obj_id_t obj_id) {
bool chained_hashtable_delete_obj_id_v2(hashtable_t *hashtable, const obj_id_t obj_id) {
uint64_t hv = get_hash_value_int_64(&obj_id) & hashmask(hashtable->hashpower);
cache_obj_t *cur_obj = hashtable->ptr_table[hv];
// the hash bucket is empty
Expand Down Expand Up @@ -294,15 +281,34 @@ bool chained_hashtable_delete_obj_id_v2(hashtable_t *hashtable,
return false;
}

cache_obj_t *chained_hashtable_rand_obj_v2(const hashtable_t *hashtable) {
cache_obj_t *chained_hashtable_rand_obj_v2(hashtable_t *hashtable) {
uint64_t pos = next_rand() & hashmask(hashtable->hashpower);
while (hashtable->ptr_table[pos] == NULL)
int n_tries = 0;
while (hashtable->ptr_table[pos] == NULL) {
n_tries += 1;
if (n_tries > 32) {
DEBUG("shrink hash table size from 2**%d to 2**%d\n", hashtable->hashpower, hashtable->hashpower - 1);
_chained_hashtable_shrink_v2(hashtable);
}
pos = next_rand() & hashmask(hashtable->hashpower);
return hashtable->ptr_table[pos];
}

int n_obj_in_bucket = 1;
cache_obj_t *cur_obj = hashtable->ptr_table[pos];
while (cur_obj->hash_next) {
cur_obj = cur_obj->hash_next;
n_obj_in_bucket += 1;
}
int rand_pos = next_rand() % n_obj_in_bucket;
cur_obj = hashtable->ptr_table[pos];
for (int i = 0; i < rand_pos; i++) {
cur_obj = cur_obj->hash_next;
}

return cur_obj;
}

void chained_hashtable_foreach_v2(hashtable_t *hashtable,
hashtable_iter iter_func, void *user_data) {
void chained_hashtable_foreach_v2(hashtable_t *hashtable, hashtable_iter iter_func, void *user_data) {
cache_obj_t *cur_obj, *next_obj;
for (uint64_t i = 0; i < hashsize(hashtable->hashpower); i++) {
cur_obj = hashtable->ptr_table[i];
Expand All @@ -315,44 +321,62 @@ void chained_hashtable_foreach_v2(hashtable_t *hashtable,
}

void free_chained_hashtable_v2(hashtable_t *hashtable) {
if (!hashtable->external_obj)
chained_hashtable_foreach_v2(hashtable, foreach_free_obj, NULL);
my_free(sizeof(cache_obj_t *) * hashsize(hashtable->hashpower),
hashtable->ptr_table);
if (!hashtable->external_obj) chained_hashtable_foreach_v2(hashtable, foreach_free_obj, NULL);
my_free(sizeof(cache_obj_t *) * hashsize(hashtable->hashpower), hashtable->ptr_table);
my_free(sizeof(hashtable_t), hashtable);
}

/* grows the hashtable to the next power of 2. */
static void _chained_hashtable_expand_v2(hashtable_t *hashtable) {
cache_obj_t **old_table = hashtable->ptr_table;
hashtable->ptr_table =
my_malloc_n(cache_obj_t *, hashsize(++hashtable->hashpower));
#ifdef USE_HUGEPAGE
madvise(hashtable->table,
sizeof(cache_obj_t *) * hashsize(hashtable->hashpower),
MADV_HUGEPAGE);
#endif
memset(hashtable->ptr_table, 0,
hashsize(hashtable->hashpower) * sizeof(cache_obj_t *));
ASSERT_NOT_NULL(hashtable->ptr_table,
"unable to grow hashtable to size %llu\n",
hashsizeULL(hashtable->hashpower));

VERBOSE("hashtable resized from %llu to %llu\n",
hashsizeULL((uint16_t)(hashtable->hashpower - 1)),
hashsizeULL(hashtable->hashpower));

/**
* @brief copy the entries from the old_table to the new_table
*
* @param new_table
* @param old_table
* @param old_size
* @return * void
*/
static void _copy_entries(hashtable_t *new_table, cache_obj_t **old_table, uint64_t old_size) {
cache_obj_t *cur_obj, *next_obj;
for (uint64_t i = 0; i < hashsize((uint16_t)(hashtable->hashpower - 1));
i++) {
for (uint64_t i = 0; i < old_size; i++) {
cur_obj = old_table[i];
while (cur_obj != NULL) {
next_obj = cur_obj->hash_next;
cur_obj->hash_next = NULL;
add_to_bucket(hashtable, cur_obj);
add_to_table(new_table, cur_obj);
cur_obj = next_obj;
}
}
}

static void _chained_hashtable_shrink_v2(hashtable_t *hashtable) {
cache_obj_t **old_table = hashtable->ptr_table;
hashtable->ptr_table = my_malloc_n(cache_obj_t *, hashsize(--hashtable->hashpower));
#ifdef USE_HUGEPAGE
madvise(hashtable->table, sizeof(cache_obj_t *) * hashsize(hashtable->hashpower), MADV_HUGEPAGE);
#endif
memset(hashtable->ptr_table, 0, hashsize(hashtable->hashpower) * sizeof(cache_obj_t *));
ASSERT_NOT_NULL(hashtable->ptr_table, "unable to shrink hashtable to size %llu\n", hashsizeULL(hashtable->hashpower));

VERBOSE("hashtable resized from %llu to %llu\n", hashsizeULL((uint16_t)(hashtable->hashpower + 1)),
hashsizeULL(hashtable->hashpower));

_copy_entries(hashtable, old_table, hashsize(hashtable->hashpower + 1));
my_free(sizeof(cache_obj_t) * hashsize(hashtable->hashpower + 1), old_table);
}

/* grows the hashtable to the next power of 2. */
static void _chained_hashtable_expand_v2(hashtable_t *hashtable) {
cache_obj_t **old_table = hashtable->ptr_table;
hashtable->ptr_table = my_malloc_n(cache_obj_t *, hashsize(++hashtable->hashpower));
#ifdef USE_HUGEPAGE
madvise(hashtable->table, sizeof(cache_obj_t *) * hashsize(hashtable->hashpower), MADV_HUGEPAGE);
#endif
memset(hashtable->ptr_table, 0, hashsize(hashtable->hashpower) * sizeof(cache_obj_t *));
ASSERT_NOT_NULL(hashtable->ptr_table, "unable to grow hashtable to size %llu\n", hashsizeULL(hashtable->hashpower));

DEBUG("expand hashtable from %llu to %llu entries\n", hashsizeULL((uint16_t)(hashtable->hashpower - 1)),
hashsizeULL(hashtable->hashpower));

_copy_entries(hashtable, old_table, hashsize(hashtable->hashpower - 1));
my_free(sizeof(cache_obj_t) * hashsize(hashtable->hashpower), old_table);
}

Expand All @@ -362,8 +386,7 @@ void check_hashtable_integrity_v2(const hashtable_t *hashtable) {
cur_obj = hashtable->ptr_table[i];
while (cur_obj != NULL) {
next_obj = cur_obj->hash_next;
assert(i == (get_hash_value_int_64(&cur_obj->obj_id) &
hashmask(hashtable->hashpower)));
assert(i == (get_hash_value_int_64(&cur_obj->obj_id) & hashmask(hashtable->hashpower)));
cur_obj = next_obj;
}
}
Expand All @@ -376,8 +399,7 @@ static int count_n_obj_in_bucket(cache_obj_t *curr_obj) {
obj_id_arr[chain_len] = curr_obj->obj_id;
for (int i = 0; i < chain_len; i++) {
if (obj_id_arr[i] == curr_obj->obj_id) {
ERROR("obj_id %lu is duplicated in hashtable\n",
(unsigned long)curr_obj->obj_id);
ERROR("obj_id %lu is duplicated in hashtable\n", (unsigned long)curr_obj->obj_id);
abort();
}
}
Expand Down
2 changes: 1 addition & 1 deletion libCacheSim/dataStructure/hashtable/chainedHashTableV2.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ bool chained_hashtable_try_delete_v2(hashtable_t *hashtable,
void chained_hashtable_delete_v2(hashtable_t *hashtable,
cache_obj_t *cache_obj);

cache_obj_t *chained_hashtable_rand_obj_v2(const hashtable_t *hashtable);
cache_obj_t *chained_hashtable_rand_obj_v2(hashtable_t *hashtable);

void chained_hashtable_foreach_v2(hashtable_t *hashtable,
hashtable_iter iter_func, void *user_data);
Expand Down
1 change: 1 addition & 0 deletions libCacheSim/dataStructure/hashtable/hashtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ extern "C" {
#define hashtable_rand_obj(hashtable) chained_hashtable_rand_obj_v2(hashtable)
#define hashtable_foreach(hashtable, iter_func, user_data) \
chained_hashtable_foreach_v2(hashtable, iter_func, user_data)

#define free_hashtable(hashtable) free_chained_hashtable_v2(hashtable)
#define hashtable_add_ptr_to_monitoring(hashtable, ptr)
#define HASHTABLE_VER 2
Expand Down
2 changes: 1 addition & 1 deletion libCacheSim/include/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ extern "C" {
#endif

#ifndef CHAINED_HASHTABLE_EXPAND_THRESHOLD
#define CHAINED_HASHTABLE_EXPAND_THRESHOLD 1
#define CHAINED_HASHTABLE_EXPAND_THRESHOLD 2
#endif

#include <sys/mman.h>
Expand Down
Loading