Skip to content

Commit

Permalink
allow hash table to dynamically shrink
Browse files Browse the repository at this point in the history
  • Loading branch information
1a1a11a committed Nov 25, 2024
1 parent 8ee4481 commit 62c82db
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 77 deletions.
172 changes: 97 additions & 75 deletions libCacheSim/dataStructure/hashtable/chainedHashTableV2.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
extern "C" {
#endif

#include "chainedHashTableV2.h"

#include <assert.h>
#include <errno.h>
#include <stdio.h>
Expand All @@ -34,20 +36,21 @@ extern "C" {
#include "../../include/libCacheSim/macro.h"
#include "../../utils/include/mymath.h"
#include "../hash/hash.h"
#include "chainedHashTableV2.h"

#define OBJ_EMPTY(cache_obj) ((cache_obj)->obj_size == 0)
#define NEXT_OBJ(cur_obj) (((cache_obj_t *)(cur_obj))->hash_next)


static void _copy_entries(hashtable_t *new_table, cache_obj_t **old_table, uint64_t old_size);
static void _chained_hashtable_shrink_v2(hashtable_t *hashtable);
static void _chained_hashtable_expand_v2(hashtable_t *hashtable);
static void print_hashbucket_item_distribution(const hashtable_t *hashtable);

/************************ helper func ************************/
/**
* get the last object in the hash bucket
*/
static inline cache_obj_t *_last_obj_in_bucket(const hashtable_t *hashtable,
const uint64_t hv) {
static inline cache_obj_t *_last_obj_in_bucket(const hashtable_t *hashtable, const uint64_t hv) {
cache_obj_t *cur_obj_in_bucket = hashtable->ptr_table[hv];
while (cur_obj_in_bucket->hash_next) {
cur_obj_in_bucket = cur_obj_in_bucket->hash_next;
Expand All @@ -56,10 +59,8 @@ static inline cache_obj_t *_last_obj_in_bucket(const hashtable_t *hashtable,
}

/* add an object to the hashtable */
static inline void add_to_bucket(hashtable_t *hashtable,
cache_obj_t *cache_obj) {
uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) &
hashmask(hashtable->hashpower);
static inline void add_to_table(hashtable_t *hashtable, cache_obj_t *cache_obj) {
uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) & hashmask(hashtable->hashpower);
if (hashtable->ptr_table[hv] == NULL) {
hashtable->ptr_table[hv] = cache_obj;
return;
Expand Down Expand Up @@ -92,9 +93,8 @@ hashtable_t *create_chained_hashtable_v2(const uint16_t hashpower) {
size_t size = sizeof(cache_obj_t *) * hashsize(hashtable->hashpower);
hashtable->ptr_table = my_malloc_n(cache_obj_t *, hashsize(hashpower));
if (hashtable->ptr_table == NULL) {
ERROR("allocate hash table %zu entry * %lu B = %ld MiB failed\n",
sizeof(cache_obj_t *), (unsigned long)(hashsize(hashpower)),
(long)(sizeof(cache_obj_t *) * hashsize(hashpower) / 1024 / 1024));
ERROR("allocate hash table %zu entry * %lu B = %ld MiB failed\n", sizeof(cache_obj_t *),
(unsigned long)(hashsize(hashpower)), (long)(sizeof(cache_obj_t *) * hashsize(hashpower) / 1024 / 1024));
exit(1);
}
memset(hashtable->ptr_table, 0, size);
Expand All @@ -108,8 +108,7 @@ hashtable_t *create_chained_hashtable_v2(const uint16_t hashpower) {
return hashtable;
}

cache_obj_t *chained_hashtable_find_obj_id_v2(const hashtable_t *hashtable,
const obj_id_t obj_id) {
cache_obj_t *chained_hashtable_find_obj_id_v2(const hashtable_t *hashtable, const obj_id_t obj_id) {
cache_obj_t *cache_obj = NULL;
uint64_t hv = get_hash_value_int_64(&obj_id);
hv = hv & hashmask(hashtable->hashpower);
Expand All @@ -124,49 +123,41 @@ cache_obj_t *chained_hashtable_find_obj_id_v2(const hashtable_t *hashtable,
return cache_obj;
}

cache_obj_t *chained_hashtable_find_v2(const hashtable_t *hashtable,
const request_t *req) {
cache_obj_t *chained_hashtable_find_v2(const hashtable_t *hashtable, const request_t *req) {
return chained_hashtable_find_obj_id_v2(hashtable, req->obj_id);
}

cache_obj_t *chained_hashtable_find_obj_v2(const hashtable_t *hashtable,
const cache_obj_t *obj_to_find) {
cache_obj_t *chained_hashtable_find_obj_v2(const hashtable_t *hashtable, const cache_obj_t *obj_to_find) {
return chained_hashtable_find_obj_id_v2(hashtable, obj_to_find->obj_id);
}

/* the user needs to make sure the added object is not in the hash table */
cache_obj_t *chained_hashtable_insert_v2(hashtable_t *hashtable,
const request_t *req) {
if (hashtable->n_obj > (uint64_t)(hashsize(hashtable->hashpower) *
CHAINED_HASHTABLE_EXPAND_THRESHOLD)) {
cache_obj_t *chained_hashtable_insert_v2(hashtable_t *hashtable, const request_t *req) {
if (hashtable->n_obj > (uint64_t)(hashsize(hashtable->hashpower) * CHAINED_HASHTABLE_EXPAND_THRESHOLD)) {
_chained_hashtable_expand_v2(hashtable);
}

cache_obj_t *new_cache_obj = create_cache_obj_from_request(req);
add_to_bucket(hashtable, new_cache_obj);
add_to_table(hashtable, new_cache_obj);
hashtable->n_obj += 1;
return new_cache_obj;
}

/* the user needs to make sure the added object is not in the hash table */
cache_obj_t *chained_hashtable_insert_obj_v2(hashtable_t *hashtable,
cache_obj_t *cache_obj) {
cache_obj_t *chained_hashtable_insert_obj_v2(hashtable_t *hashtable, cache_obj_t *cache_obj) {
DEBUG_ASSERT(hashtable->external_obj);
if (hashtable->n_obj > (uint64_t)(hashsize(hashtable->hashpower) *
CHAINED_HASHTABLE_EXPAND_THRESHOLD))
if (hashtable->n_obj > (uint64_t)(hashsize(hashtable->hashpower) * CHAINED_HASHTABLE_EXPAND_THRESHOLD))
_chained_hashtable_expand_v2(hashtable);

add_to_bucket(hashtable, cache_obj);
add_to_table(hashtable, cache_obj);
hashtable->n_obj += 1;
return cache_obj;
}

/* you need to free the extra_metadata before deleting from hash table */
void chained_hashtable_delete_v2(hashtable_t *hashtable,
cache_obj_t *cache_obj) {
void chained_hashtable_delete_v2(hashtable_t *hashtable, cache_obj_t *cache_obj) {
hashtable->n_obj -= 1;
uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) &
hashmask(hashtable->hashpower);
uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) & hashmask(hashtable->hashpower);
if (hashtable->ptr_table[hv] == cache_obj) {
hashtable->ptr_table[hv] = cache_obj->hash_next;
if (!hashtable->external_obj) free_cache_obj(cache_obj);
Expand All @@ -183,9 +174,8 @@ void chained_hashtable_delete_v2(hashtable_t *hashtable,

if (chain_len > max_chain_len) {
max_chain_len = chain_len;
WARN("hashtable remove %lu chain len %d, hashtable load %ld/%ld %lf\n",
(unsigned long)cache_obj->obj_id, max_chain_len,
(long)hashtable->n_obj, (long)hashsize(hashtable->hashpower),
WARN("hashtable remove %lu chain len %d, hashtable load %ld/%ld %lf\n", (unsigned long)cache_obj->obj_id,
max_chain_len, (long)hashtable->n_obj, (long)hashsize(hashtable->hashpower),
(double)hashtable->n_obj / hashsize(hashtable->hashpower));

print_hashbucket_item_distribution(hashtable);
Expand All @@ -199,12 +189,10 @@ void chained_hashtable_delete_v2(hashtable_t *hashtable,
}
}

bool chained_hashtable_try_delete_v2(hashtable_t *hashtable,
cache_obj_t *cache_obj) {
bool chained_hashtable_try_delete_v2(hashtable_t *hashtable, cache_obj_t *cache_obj) {
static int max_chain_len = 1;

uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) &
hashmask(hashtable->hashpower);
uint64_t hv = get_hash_value_int_64(&cache_obj->obj_id) & hashmask(hashtable->hashpower);
if (hashtable->ptr_table[hv] == cache_obj) {
hashtable->ptr_table[hv] = cache_obj->hash_next;
hashtable->n_obj -= 1;
Expand Down Expand Up @@ -261,8 +249,7 @@ bool chained_hashtable_try_delete_v2(hashtable_t *hashtable,
* @param obj_id [The object id to remove]
* @return [true or false]
*/
bool chained_hashtable_delete_obj_id_v2(hashtable_t *hashtable,
const obj_id_t obj_id) {
bool chained_hashtable_delete_obj_id_v2(hashtable_t *hashtable, const obj_id_t obj_id) {
uint64_t hv = get_hash_value_int_64(&obj_id) & hashmask(hashtable->hashpower);
cache_obj_t *cur_obj = hashtable->ptr_table[hv];
// the hash bucket is empty
Expand Down Expand Up @@ -294,15 +281,34 @@ bool chained_hashtable_delete_obj_id_v2(hashtable_t *hashtable,
return false;
}

cache_obj_t *chained_hashtable_rand_obj_v2(const hashtable_t *hashtable) {
cache_obj_t *chained_hashtable_rand_obj_v2(hashtable_t *hashtable) {
uint64_t pos = next_rand() & hashmask(hashtable->hashpower);
while (hashtable->ptr_table[pos] == NULL)
int n_tries = 0;
while (hashtable->ptr_table[pos] == NULL) {
n_tries += 1;
if (n_tries > 32) {
DEBUG("shrink hash table size from 2**%d to 2**%d\n", hashtable->hashpower, hashtable->hashpower - 1);
_chained_hashtable_shrink_v2(hashtable);
}
pos = next_rand() & hashmask(hashtable->hashpower);
return hashtable->ptr_table[pos];
}

int n_obj_in_bucket = 1;
cache_obj_t *cur_obj = hashtable->ptr_table[pos];
while (cur_obj->hash_next) {
cur_obj = cur_obj->hash_next;
n_obj_in_bucket += 1;
}
int rand_pos = next_rand() % n_obj_in_bucket;
cur_obj = hashtable->ptr_table[pos];
for (int i = 0; i < rand_pos; i++) {
cur_obj = cur_obj->hash_next;
}

return cur_obj;
}

void chained_hashtable_foreach_v2(hashtable_t *hashtable,
hashtable_iter iter_func, void *user_data) {
void chained_hashtable_foreach_v2(hashtable_t *hashtable, hashtable_iter iter_func, void *user_data) {
cache_obj_t *cur_obj, *next_obj;
for (uint64_t i = 0; i < hashsize(hashtable->hashpower); i++) {
cur_obj = hashtable->ptr_table[i];
Expand All @@ -315,44 +321,62 @@ void chained_hashtable_foreach_v2(hashtable_t *hashtable,
}

void free_chained_hashtable_v2(hashtable_t *hashtable) {
if (!hashtable->external_obj)
chained_hashtable_foreach_v2(hashtable, foreach_free_obj, NULL);
my_free(sizeof(cache_obj_t *) * hashsize(hashtable->hashpower),
hashtable->ptr_table);
if (!hashtable->external_obj) chained_hashtable_foreach_v2(hashtable, foreach_free_obj, NULL);
my_free(sizeof(cache_obj_t *) * hashsize(hashtable->hashpower), hashtable->ptr_table);
my_free(sizeof(hashtable_t), hashtable);
}

/* grows the hashtable to the next power of 2. */
static void _chained_hashtable_expand_v2(hashtable_t *hashtable) {
cache_obj_t **old_table = hashtable->ptr_table;
hashtable->ptr_table =
my_malloc_n(cache_obj_t *, hashsize(++hashtable->hashpower));
#ifdef USE_HUGEPAGE
madvise(hashtable->table,
sizeof(cache_obj_t *) * hashsize(hashtable->hashpower),
MADV_HUGEPAGE);
#endif
memset(hashtable->ptr_table, 0,
hashsize(hashtable->hashpower) * sizeof(cache_obj_t *));
ASSERT_NOT_NULL(hashtable->ptr_table,
"unable to grow hashtable to size %llu\n",
hashsizeULL(hashtable->hashpower));

VERBOSE("hashtable resized from %llu to %llu\n",
hashsizeULL((uint16_t)(hashtable->hashpower - 1)),
hashsizeULL(hashtable->hashpower));

/**
* @brief copy the entries from the old_table to the new_table
*
* @param new_table
* @param old_table
* @param old_size
* @return * void
*/
static void _copy_entries(hashtable_t *new_table, cache_obj_t **old_table, uint64_t old_size) {
cache_obj_t *cur_obj, *next_obj;
for (uint64_t i = 0; i < hashsize((uint16_t)(hashtable->hashpower - 1));
i++) {
for (uint64_t i = 0; i < old_size; i++) {
cur_obj = old_table[i];
while (cur_obj != NULL) {
next_obj = cur_obj->hash_next;
cur_obj->hash_next = NULL;
add_to_bucket(hashtable, cur_obj);
add_to_table(new_table, cur_obj);
cur_obj = next_obj;
}
}
}

static void _chained_hashtable_shrink_v2(hashtable_t *hashtable) {
cache_obj_t **old_table = hashtable->ptr_table;
hashtable->ptr_table = my_malloc_n(cache_obj_t *, hashsize(--hashtable->hashpower));
#ifdef USE_HUGEPAGE
madvise(hashtable->table, sizeof(cache_obj_t *) * hashsize(hashtable->hashpower), MADV_HUGEPAGE);
#endif
memset(hashtable->ptr_table, 0, hashsize(hashtable->hashpower) * sizeof(cache_obj_t *));
ASSERT_NOT_NULL(hashtable->ptr_table, "unable to shrink hashtable to size %llu\n", hashsizeULL(hashtable->hashpower));

VERBOSE("hashtable resized from %llu to %llu\n", hashsizeULL((uint16_t)(hashtable->hashpower + 1)),
hashsizeULL(hashtable->hashpower));

_copy_entries(hashtable, old_table, hashsize(hashtable->hashpower + 1));
my_free(sizeof(cache_obj_t) * hashsize(hashtable->hashpower + 1), old_table);
}

/* grows the hashtable to the next power of 2. */
static void _chained_hashtable_expand_v2(hashtable_t *hashtable) {
cache_obj_t **old_table = hashtable->ptr_table;
hashtable->ptr_table = my_malloc_n(cache_obj_t *, hashsize(++hashtable->hashpower));
#ifdef USE_HUGEPAGE
madvise(hashtable->table, sizeof(cache_obj_t *) * hashsize(hashtable->hashpower), MADV_HUGEPAGE);
#endif
memset(hashtable->ptr_table, 0, hashsize(hashtable->hashpower) * sizeof(cache_obj_t *));
ASSERT_NOT_NULL(hashtable->ptr_table, "unable to grow hashtable to size %llu\n", hashsizeULL(hashtable->hashpower));

DEBUG("expand hashtable from %llu to %llu entries\n", hashsizeULL((uint16_t)(hashtable->hashpower - 1)),
hashsizeULL(hashtable->hashpower));

_copy_entries(hashtable, old_table, hashsize(hashtable->hashpower - 1));
my_free(sizeof(cache_obj_t) * hashsize(hashtable->hashpower), old_table);
}

Expand All @@ -362,8 +386,7 @@ void check_hashtable_integrity_v2(const hashtable_t *hashtable) {
cur_obj = hashtable->ptr_table[i];
while (cur_obj != NULL) {
next_obj = cur_obj->hash_next;
assert(i == (get_hash_value_int_64(&cur_obj->obj_id) &
hashmask(hashtable->hashpower)));
assert(i == (get_hash_value_int_64(&cur_obj->obj_id) & hashmask(hashtable->hashpower)));
cur_obj = next_obj;
}
}
Expand All @@ -376,8 +399,7 @@ static int count_n_obj_in_bucket(cache_obj_t *curr_obj) {
obj_id_arr[chain_len] = curr_obj->obj_id;
for (int i = 0; i < chain_len; i++) {
if (obj_id_arr[i] == curr_obj->obj_id) {
ERROR("obj_id %lu is duplicated in hashtable\n",
(unsigned long)curr_obj->obj_id);
ERROR("obj_id %lu is duplicated in hashtable\n", (unsigned long)curr_obj->obj_id);
abort();
}
}
Expand Down
2 changes: 1 addition & 1 deletion libCacheSim/dataStructure/hashtable/chainedHashTableV2.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ bool chained_hashtable_try_delete_v2(hashtable_t *hashtable,
void chained_hashtable_delete_v2(hashtable_t *hashtable,
cache_obj_t *cache_obj);

cache_obj_t *chained_hashtable_rand_obj_v2(const hashtable_t *hashtable);
cache_obj_t *chained_hashtable_rand_obj_v2(hashtable_t *hashtable);

void chained_hashtable_foreach_v2(hashtable_t *hashtable,
hashtable_iter iter_func, void *user_data);
Expand Down
1 change: 1 addition & 0 deletions libCacheSim/dataStructure/hashtable/hashtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ extern "C" {
#define hashtable_rand_obj(hashtable) chained_hashtable_rand_obj_v2(hashtable)
#define hashtable_foreach(hashtable, iter_func, user_data) \
chained_hashtable_foreach_v2(hashtable, iter_func, user_data)

#define free_hashtable(hashtable) free_chained_hashtable_v2(hashtable)
#define hashtable_add_ptr_to_monitoring(hashtable, ptr)
#define HASHTABLE_VER 2
Expand Down
2 changes: 1 addition & 1 deletion libCacheSim/include/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ extern "C" {
#endif

#ifndef CHAINED_HASHTABLE_EXPAND_THRESHOLD
#define CHAINED_HASHTABLE_EXPAND_THRESHOLD 1
#define CHAINED_HASHTABLE_EXPAND_THRESHOLD 2
#endif

#include <sys/mman.h>
Expand Down

0 comments on commit 62c82db

Please sign in to comment.