From f99ecd690149daa276badaef169a5f47a556f1d6 Mon Sep 17 00:00:00 2001 From: Jake Date: Sat, 20 Jul 2024 21:51:04 -0700 Subject: [PATCH 01/11] Add hashmap start --- src/util/hashmap.c | 40 ++++++++++++++++++++++++++++++++++++++++ src/util/hashmap.h | 24 ++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 src/util/hashmap.c create mode 100644 src/util/hashmap.h diff --git a/src/util/hashmap.c b/src/util/hashmap.c new file mode 100644 index 0000000..f5ff16f --- /dev/null +++ b/src/util/hashmap.c @@ -0,0 +1,40 @@ +#include "hashmap.h" +#include + +unsigned fnva1(char* value) { + unsigned long long h = 14695981039346656037; + long int prime = 1099511628211; + + while (value) { + h ^= prime; + (*value)++; + } + +} + +struct Hashmap *create_hashmap(int capacity) { + struct Hashmap *h = malloc(sizeof(struct Hashmap)); + + h->buckets = calloc(capacity, sizeof(struct BucketNode *)); + + h->size = 0; + h->cap = capacity; + + return h; +} + +struct BucketNode *create_bucket(void *key, void *value) { + struct BucketNode *b = malloc(sizeof(struct BucketNode)); + + b->key = key; + b->value = value; + b->next = NULL; + + return b; +} + +struct BucketNode *hm_get(void *key); + +void hm_set(void *key, void *value); + +void double_cap(); diff --git a/src/util/hashmap.h b/src/util/hashmap.h new file mode 100644 index 0000000..8e1070e --- /dev/null +++ b/src/util/hashmap.h @@ -0,0 +1,24 @@ +struct BucketNode { + void *key; + void *value; + struct BucketNode *next; +}; + +struct Hashmap { + struct BucketNode **buckets; + int size; + int cap; + + unsigned (*hash)(void *); + unsigned (*equals)(void *); +}; + +struct BucketNode *create_bucket(void *key, void *value); + +// Get a value with a key +struct BucketNode *hm_get(void *key); +// Set a value with a key +void hm_set(void *key, void *value); +// Double the capacity of the hashmap (happens automatically when size > +// capacity) +void double_cap(); From 53410bb9b30a42d053c842c15870ca260099e0b5 Mon Sep 17 00:00:00 2001 From: Jake Date: Tue, 30 Jul 2024 23:04:41 -0700 Subject: [PATCH 02/11] Finish prime --- src/util/hashmap.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/util/hashmap.c b/src/util/hashmap.c index f5ff16f..7822e07 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -1,15 +1,16 @@ #include "hashmap.h" #include -unsigned fnva1(char* value) { - unsigned long long h = 14695981039346656037; - long int prime = 1099511628211; +unsigned fnva1(char *value) { + unsigned long long h = 14695981039346656037; + long int prime = 1099511628211; - while (value) { - h ^= prime; - (*value)++; - } + while (value) { + h ^= prime; + (*value)++; + } + return h; } struct Hashmap *create_hashmap(int capacity) { From 169490d721e467b20d9d8fdb6e2ee7f1a73dea89 Mon Sep 17 00:00:00 2001 From: Jake Date: Tue, 30 Jul 2024 23:48:49 -0700 Subject: [PATCH 03/11] Add start of get and set --- src/util/hashmap.c | 37 ++++++++++++++++++++++++++++++++++--- src/util/hashmap.h | 8 ++++---- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/util/hashmap.c b/src/util/hashmap.c index 7822e07..6ef3989 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -34,8 +34,39 @@ struct BucketNode *create_bucket(void *key, void *value) { return b; } -struct BucketNode *hm_get(void *key); +struct BucketNode *hm_get(struct Hashmap *h, void *key) { + unsigned a = h->hash(key) % h->cap; -void hm_set(void *key, void *value); + struct BucketNode *b = h->buckets[a]; + if (b != NULL) { -void double_cap(); + // Check if key is the same, because the hash might have collided + while (!h->equals(key, b->key)) { + b = b->next; + } + return b; + } + + return NULL; +} + +void hm_set(struct Hashmap *h, void *key, void *value) { + unsigned a = h->hash(key) % h->cap; + + struct BucketNode *b = h->buckets[a]; + if (b == NULL) { + h->buckets[a] = malloc(sizeof(struct BucketNode *)); + h->buckets[a]->key = key; + h->buckets[a]->value = value; + h->buckets[a]->next = NULL; + } else { + // Handle chaining + } +} + +void double_cap(struct Hashmap *h) { + h->buckets = realloc(h->buckets, h->cap * 2 * sizeof(struct BucketNode *)); + + h->size = 0; + h->cap = h->cap * 2; +} diff --git a/src/util/hashmap.h b/src/util/hashmap.h index 8e1070e..cf63996 100644 --- a/src/util/hashmap.h +++ b/src/util/hashmap.h @@ -10,15 +10,15 @@ struct Hashmap { int cap; unsigned (*hash)(void *); - unsigned (*equals)(void *); + unsigned (*equals)(void *, void *); }; struct BucketNode *create_bucket(void *key, void *value); // Get a value with a key -struct BucketNode *hm_get(void *key); +struct BucketNode *hm_get(struct Hashmap *h, void *key); // Set a value with a key -void hm_set(void *key, void *value); +void hm_set(struct Hashmap *h, void *key, void *value); // Double the capacity of the hashmap (happens automatically when size > // capacity) -void double_cap(); +void double_cap(struct Hashmap *h); From 43348482ccf085a3278433402e3a49393af54ab1 Mon Sep 17 00:00:00 2001 From: Jake Date: Wed, 31 Jul 2024 10:50:20 -0700 Subject: [PATCH 04/11] Add create hashmap to header --- src/util/hashmap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util/hashmap.h b/src/util/hashmap.h index cf63996..077d12b 100644 --- a/src/util/hashmap.h +++ b/src/util/hashmap.h @@ -13,6 +13,7 @@ struct Hashmap { unsigned (*equals)(void *, void *); }; +struct Hashmap *create_hashmap(int capacity); struct BucketNode *create_bucket(void *key, void *value); // Get a value with a key From e9b47e1f98d7a5d363f97e6c92a51e6be4ab18f9 Mon Sep 17 00:00:00 2001 From: Jake Date: Wed, 31 Jul 2024 10:52:02 -0700 Subject: [PATCH 05/11] Add destroy hashmap --- src/util/hashmap.c | 5 +++++ src/util/hashmap.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/src/util/hashmap.c b/src/util/hashmap.c index 6ef3989..bc3b320 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -24,6 +24,11 @@ struct Hashmap *create_hashmap(int capacity) { return h; } +void destroy_hashmap(struct Hashmap *h) { + free(h->buckets); + free(h); +} + struct BucketNode *create_bucket(void *key, void *value) { struct BucketNode *b = malloc(sizeof(struct BucketNode)); diff --git a/src/util/hashmap.h b/src/util/hashmap.h index 077d12b..09a7545 100644 --- a/src/util/hashmap.h +++ b/src/util/hashmap.h @@ -14,6 +14,8 @@ struct Hashmap { }; struct Hashmap *create_hashmap(int capacity); +void destroy_hashmap(struct Hashmap *h); + struct BucketNode *create_bucket(void *key, void *value); // Get a value with a key From 855fcc9db2023eaeb81d03dacaa959e4f8c30a83 Mon Sep 17 00:00:00 2001 From: Jake Date: Wed, 31 Jul 2024 11:17:43 -0700 Subject: [PATCH 06/11] Add old tests for hashmap --- src/util/hashmap.c | 54 +++++++++++++++++++++++++++++++++++++++++++++- src/util/hashmap.h | 6 +++++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/util/hashmap.c b/src/util/hashmap.c index bc3b320..2957c6c 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -1,5 +1,9 @@ #include "hashmap.h" +#include #include +#include +#include +#include unsigned fnva1(char *value) { unsigned long long h = 14695981039346656037; @@ -55,7 +59,7 @@ struct BucketNode *hm_get(struct Hashmap *h, void *key) { return NULL; } -void hm_set(struct Hashmap *h, void *key, void *value) { +int hm_set(struct Hashmap *h, void *key, void *value) { unsigned a = h->hash(key) % h->cap; struct BucketNode *b = h->buckets[a]; @@ -64,8 +68,11 @@ void hm_set(struct Hashmap *h, void *key, void *value) { h->buckets[a]->key = key; h->buckets[a]->value = value; h->buckets[a]->next = NULL; + + return 0; } else { // Handle chaining + return -1; } } @@ -75,3 +82,48 @@ void double_cap(struct Hashmap *h) { h->size = 0; h->cap = h->cap * 2; } + +int test_hash_init() { + testing_func_setup(); + struct Hashmap *h = create_hashmap(100); + + tassert(h->size == 0); + tassert(h->cap == 100); +} + +int test_hash_init_and_store() { + testing_func_setup(); + struct Hashmap *h = create_hashmap(100); + + tassert(h->size == 0); + tassert(h->cap == 100); + + char name[100] = "jake"; + + char key[10] = "test"; + int ret = hm_set(h, key, name); + tassert(ret != -1); + + uint64_t ind = h->hash(key) % h->cap; + struct BucketNode *b = h->buckets[ind]; + tassert(strcmp(b->key, key) == 0); + + tassert(h->size == 1); + tassert(h->cap == 100); +} + +int test_hash_set_and_get() { + testing_func_setup(); + struct Hashmap *h = create_hashmap(100); + + char name[100] = "jake"; + char key[10] = "test"; + + int ret = hm_set(h, key, name); + tassert(ret != -1); + + struct BucketNode *got = hm_get(h, "test"); + tassert(strcmp(got->value, "jake") == 0); + + return 0; +} diff --git a/src/util/hashmap.h b/src/util/hashmap.h index 09a7545..56be856 100644 --- a/src/util/hashmap.h +++ b/src/util/hashmap.h @@ -21,7 +21,11 @@ struct BucketNode *create_bucket(void *key, void *value); // Get a value with a key struct BucketNode *hm_get(struct Hashmap *h, void *key); // Set a value with a key -void hm_set(struct Hashmap *h, void *key, void *value); +int hm_set(struct Hashmap *h, void *key, void *value); // Double the capacity of the hashmap (happens automatically when size > // capacity) void double_cap(struct Hashmap *h); + +int test_hash_init(); +int test_hash_init_and_store(); +int test_hash_set_and_get(); From 2899e2dc2475a3d277bffbd3de601868b08851e2 Mon Sep 17 00:00:00 2001 From: Jake Date: Wed, 31 Jul 2024 11:17:58 -0700 Subject: [PATCH 07/11] Add util test files --- src/util/test_util.c | 13 +++++++++++++ src/util/test_util.h | 5 +++++ 2 files changed, 18 insertions(+) create mode 100644 src/util/test_util.c create mode 100644 src/util/test_util.h diff --git a/src/util/test_util.c b/src/util/test_util.c new file mode 100644 index 0000000..83ea4c1 --- /dev/null +++ b/src/util/test_util.c @@ -0,0 +1,13 @@ +#include "hashmap.h" +#include + +int test_util() { + testing_module_setup(); + + test_hash_init(); + test_hash_init_and_store(); + test_hash_set_and_get(); + + testing_module_cleanup(); + return 0; +} diff --git a/src/util/test_util.h b/src/util/test_util.h new file mode 100644 index 0000000..ef0a526 --- /dev/null +++ b/src/util/test_util.h @@ -0,0 +1,5 @@ +#pragma once + +#include "hashmap.h" + +int test_util(); From efbf8e0b5eb2bc01af132a47697182be4854e4c9 Mon Sep 17 00:00:00 2001 From: Jake Date: Wed, 31 Jul 2024 11:18:38 -0700 Subject: [PATCH 08/11] Add test --- src/testing/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/testing/main.c b/src/testing/main.c index 24c148b..7c6027b 100644 --- a/src/testing/main.c +++ b/src/testing/main.c @@ -1,11 +1,13 @@ #include "codegen/x86/test_x86.h" #include "lexer/test_lexer.h" #include +#include int main() { test_lexer(); test_x86(); test_list(); + test_util(); return 0; } From 0640b2ac57b53f15477d2d71f167f67cdb43b465 Mon Sep 17 00:00:00 2001 From: Jake Date: Sat, 3 Aug 2024 20:01:00 -0700 Subject: [PATCH 09/11] Fix function signatures --- src/util/hashmap.c | 2 ++ src/util/hashmap.h | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/util/hashmap.c b/src/util/hashmap.c index 2957c6c..3d3eecf 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -25,6 +25,8 @@ struct Hashmap *create_hashmap(int capacity) { h->size = 0; h->cap = capacity; + h->hash = fnva1; + return h; } diff --git a/src/util/hashmap.h b/src/util/hashmap.h index 56be856..6bbaf79 100644 --- a/src/util/hashmap.h +++ b/src/util/hashmap.h @@ -1,5 +1,5 @@ struct BucketNode { - void *key; + char *key; void *value; struct BucketNode *next; }; @@ -9,8 +9,8 @@ struct Hashmap { int size; int cap; - unsigned (*hash)(void *); - unsigned (*equals)(void *, void *); + unsigned (*hash)(char *); + unsigned (*equals)(char *, char *); }; struct Hashmap *create_hashmap(int capacity); From 3a45451b482af5550c1c9a46297a2ab7b4eac0db Mon Sep 17 00:00:00 2001 From: Jake Date: Sat, 3 Aug 2024 20:26:37 -0700 Subject: [PATCH 10/11] Fix broken hash --- src/util/hashmap.c | 48 ++++++++++++++++++++++++++++++---------------- src/util/hashmap.h | 6 +++--- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/util/hashmap.c b/src/util/hashmap.c index 3d3eecf..4b43521 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -1,22 +1,26 @@ #include "hashmap.h" #include +#include #include #include #include #include unsigned fnva1(char *value) { - unsigned long long h = 14695981039346656037; - long int prime = 1099511628211; + unsigned long h = 16777619; + long int prime = 2166136261; - while (value) { - h ^= prime; - (*value)++; + while (*value != '\0') { + h ^= *value; + h *= prime; + ++value; } return h; } +unsigned equal_key(char *a, char *b) { return strcmp(a, b) == 0; } + struct Hashmap *create_hashmap(int capacity) { struct Hashmap *h = malloc(sizeof(struct Hashmap)); @@ -26,6 +30,7 @@ struct Hashmap *create_hashmap(int capacity) { h->cap = capacity; h->hash = fnva1; + h->equals = equal_key; return h; } @@ -35,7 +40,7 @@ void destroy_hashmap(struct Hashmap *h) { free(h); } -struct BucketNode *create_bucket(void *key, void *value) { +struct BucketNode *create_bucket(char *key, void *value) { struct BucketNode *b = malloc(sizeof(struct BucketNode)); b->key = key; @@ -45,27 +50,36 @@ struct BucketNode *create_bucket(void *key, void *value) { return b; } -struct BucketNode *hm_get(struct Hashmap *h, void *key) { +struct BucketNode *hm_get(struct Hashmap *h, char *key) { unsigned a = h->hash(key) % h->cap; struct BucketNode *b = h->buckets[a]; - if (b != NULL) { - // Check if key is the same, because the hash might have collided - while (!h->equals(key, b->key)) { - b = b->next; - } + if (b == NULL) { + return NULL; + } + + if (h->equals(b->key, key)) { return b; } + // check for linear probing + return NULL; } -int hm_set(struct Hashmap *h, void *key, void *value) { +int hm_set(struct Hashmap *h, char *key, void *value) { unsigned a = h->hash(key) % h->cap; struct BucketNode *b = h->buckets[a]; + if (b == NULL) { + if (h->size == h->cap) { + double_cap(h); + } + + h->size++; + h->buckets[a] = malloc(sizeof(struct BucketNode *)); h->buckets[a]->key = key; h->buckets[a]->value = value; @@ -73,12 +87,14 @@ int hm_set(struct Hashmap *h, void *key, void *value) { return 0; } else { - // Handle chaining + // Handle linear probing return -1; } } void double_cap(struct Hashmap *h) { + // TODO: rehash all the old elements + // They will be in the wrong spot after this h->buckets = realloc(h->buckets, h->cap * 2 * sizeof(struct BucketNode *)); h->size = 0; @@ -100,9 +116,9 @@ int test_hash_init_and_store() { tassert(h->size == 0); tassert(h->cap == 100); - char name[100] = "jake"; + char name[5] = "jake"; - char key[10] = "test"; + char key[5] = "test"; int ret = hm_set(h, key, name); tassert(ret != -1); diff --git a/src/util/hashmap.h b/src/util/hashmap.h index 6bbaf79..ef57a3b 100644 --- a/src/util/hashmap.h +++ b/src/util/hashmap.h @@ -16,12 +16,12 @@ struct Hashmap { struct Hashmap *create_hashmap(int capacity); void destroy_hashmap(struct Hashmap *h); -struct BucketNode *create_bucket(void *key, void *value); +struct BucketNode *create_bucket(char *key, void *value); // Get a value with a key -struct BucketNode *hm_get(struct Hashmap *h, void *key); +struct BucketNode *hm_get(struct Hashmap *h, char *key); // Set a value with a key -int hm_set(struct Hashmap *h, void *key, void *value); +int hm_set(struct Hashmap *h, char *key, void *value); // Double the capacity of the hashmap (happens automatically when size > // capacity) void double_cap(struct Hashmap *h); From 4223904b183eae4e444e8af4d8eb5c9d1f937c71 Mon Sep 17 00:00:00 2001 From: Jake Date: Sat, 3 Aug 2024 20:51:48 -0700 Subject: [PATCH 11/11] Add rehash after double --- src/util/hashmap.c | 35 +++++++++++++++++++++++++++++++---- src/util/hashmap.h | 1 + src/util/test_util.c | 1 + 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/util/hashmap.c b/src/util/hashmap.c index 4b43521..1cb3e9a 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -93,11 +93,20 @@ int hm_set(struct Hashmap *h, char *key, void *value) { } void double_cap(struct Hashmap *h) { - // TODO: rehash all the old elements - // They will be in the wrong spot after this - h->buckets = realloc(h->buckets, h->cap * 2 * sizeof(struct BucketNode *)); + struct BucketNode **new_buckets = + calloc(h->cap * 2, sizeof(struct BucketNode *)); + + for (int i = 0; i < h->cap; i++) { + + if (h->buckets[i] != NULL) { + struct BucketNode *b = h->buckets[i]; + unsigned a = h->hash(b->key) % h->cap; + new_buckets[a] = b; + } + } + + h->buckets = new_buckets; - h->size = 0; h->cap = h->cap * 2; } @@ -145,3 +154,21 @@ int test_hash_set_and_get() { return 0; } + +int test_hash_set_and_double_get() { + testing_func_setup(); + struct Hashmap *h = create_hashmap(100); + + char name[100] = "jake"; + char key[10] = "test"; + + int ret = hm_set(h, key, name); + tassert(ret != -1); + + double_cap(h); + + struct BucketNode *got = hm_get(h, "test"); + tassert(strcmp(got->value, "jake") == 0); + + return 0; +} diff --git a/src/util/hashmap.h b/src/util/hashmap.h index ef57a3b..80c9b03 100644 --- a/src/util/hashmap.h +++ b/src/util/hashmap.h @@ -29,3 +29,4 @@ void double_cap(struct Hashmap *h); int test_hash_init(); int test_hash_init_and_store(); int test_hash_set_and_get(); +int test_hash_set_and_double_get(); diff --git a/src/util/test_util.c b/src/util/test_util.c index 83ea4c1..413a5f3 100644 --- a/src/util/test_util.c +++ b/src/util/test_util.c @@ -7,6 +7,7 @@ int test_util() { test_hash_init(); test_hash_init_and_store(); test_hash_set_and_get(); + test_hash_set_and_double_get(); testing_module_cleanup(); return 0;