added a todo

asterbot · Nov 15, 2024 · acb3ac6 · acb3ac6
1 parent 2e13ee8
commit acb3ac6
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 25 deletions.
diff --git a/backend/code/file_tokenizer.py b/backend/code/file_tokenizer.py
@@ -7,7 +7,7 @@
 
 from pathlib import Path
 
-from code.utils import custom_hash
+from code.utils import custom_encoding
 from config import SOURCES_FOLDER, HASH_EXTENSION
 
 
@@ -28,8 +28,7 @@ def hash_file_blocks(file_path: str, block_size: int = 512):
 
     hackthehill_file = Path(file_path).stem + HASH_EXTENSION
     file_size: int = os.path.getsize(file_path)
-    num_blocks = (file_size + block_size -
-                  1) // block_size  # Round up division
+    num_blocks = (file_size + block_size - 1) // block_size 
 
     # Create header
     header = {
@@ -47,7 +46,7 @@ def hash_file_blocks(file_path: str, block_size: int = 512):
     with open(file_path, "r", encoding="utf-8") as file:
         for index in range(num_blocks):
             block = file.read(block_size)
-            block_hash = custom_hash(block)
+            block_hash = custom_encoding(block)
             block_hashes[index] = block_hash
 
     header["blocks"] = block_hashes
@@ -57,7 +56,7 @@ def hash_file_blocks(file_path: str, block_size: int = 512):
     with open(os.path.join(SOURCES_FOLDER, hackthehill_file), 'w', encoding="utf-8") as f:
         f.write(hash_block)
 
-    return custom_hash(json.dumps(header))
+    return custom_encoding(json.dumps(header))
 
 
 def get_block_content(file_path, block_index: int, block_size: int = 512) -> bytes:

diff --git a/backend/code/main.py b/backend/code/main.py
@@ -6,7 +6,7 @@
 import io
 import json
 
-from code.utils import get_filename_by_file_id, custom_hash
+from code.utils import get_filename_by_file_id, custom_encoding
 from code.p2p_client import P2PClient
 from code.file_tokenizer import hash_file_blocks
 
@@ -49,7 +49,7 @@ def receive_file():
     hackthehill_file = os.path.join(SOURCES_FOLDER, Path(file_path).stem + HASH_EXTENSION)
 
     with open(hackthehill_file, 'r', encoding="utf-8") as f:
-        file_hash = custom_hash(f.read())
+        file_hash = custom_encoding(f.read())
 
     fileData[file_hash] = {'path': file_path, 'hackthehill': hackthehill_file}
 

diff --git a/backend/code/utils.py b/backend/code/utils.py
@@ -13,14 +13,25 @@
 from config import SOURCES_FOLDER, UPLOADS_FOLDER
 
 
-def custom_hash(encode_input):
+def custom_encoding(normal_input: any) -> bytes:
     """
-    Using the hashlib sha256 encoding, we specifically hash the utf-8 encoding for
-    text files. This is our custom hash function we use in the entire project, we should not
-    use the inbuilt hash() function
+    Using utf-8 encoding. This is our custom encoding function we use in the entire project, 
+    we should not use the inbuilt functions.
+
+    TODO Correct Implementation has to be written
+    """
+
+    return normal_input
+
+
+def custom_decoding(encoded_string: bytes) -> str:
+    """
+    Assuming that the encoding is utf-8. We should not read the file to get back the original value.
+
+    TODO Correct Implementation has to be written
     """
 
-    return hashlib.sha256(encode_input.encode("utf-8")).hexdigest()
+    return str(encoded_string)
 
 
 def find_file(directory: str, filename: str) -> Optional[str]:
@@ -59,7 +70,7 @@ def get_filename_by_file_id(file_id: str) -> Optional[tuple[str, str]]:
         with open(os.path.join(SOURCES_FOLDER, hackthehill_file), "r", encoding="utf-8") as f:
             hackthehill_file_content = f.read()
 
-            if file_id == custom_hash(hackthehill_file_content):
+            if file_id == custom_encoding(hackthehill_file_content):
                 original_file_name = find_file(UPLOADS_FOLDER, hackthehill_file)
                 if original_file_name is not None:
                     return original_file_name, hackthehill_file

diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -6,4 +6,5 @@ aioice==0.9.0
 uuid==1.30
 flask==3.0.3
 flask_cors==5.0.0
-coverage==7.6.4 
+coverage==7.6.4 
+cryptography==43.0.3
diff --git a/backend/test/test_file_tokenizer.py b/backend/test/test_file_tokenizer.py
@@ -6,7 +6,7 @@
 import unittest
 
 from code.file_tokenizer import hash_file_blocks
-from code.utils import custom_hash
+from code.utils import custom_encoding
 from config import UPLOADS_FOLDER, SOURCES_FOLDER, HASH_EXTENSION
 
 
@@ -43,7 +43,7 @@ def test_hash_file_blocks_with_empty_file_returns_correct_value(self):
 
                 self.assertEqual(hackthehill_file_content, test_header)
                 self.assertEqual(hackthehill_file_hashed_content,
-                                 custom_hash(json.dumps(hackthehill_file_content)))
+                                 custom_encoding(json.dumps(hackthehill_file_content)))
 
         os.remove(testing_file)
         os.remove(hackthehill_file)
@@ -80,7 +80,7 @@ def test_hash_file_blocks_with_non_empty_file_returns_correct_value(self):
 
             for index in range(num_blocks):
                 block = f.read(block_size)
-                block_hash = custom_hash(block)
+                block_hash = custom_encoding(block)
                 test_header["blocks"][str(index)] = str(block_hash)
                 test_header["header"]["number_of_blocks"] += 1
 
@@ -89,7 +89,7 @@ def test_hash_file_blocks_with_non_empty_file_returns_correct_value(self):
 
                 self.assertEqual(hackthehill_file_content, test_header)
                 self.assertEqual(hackthehill_file_hashed_content,
-                                 custom_hash(json.dumps(hackthehill_file_content)))
+                                 custom_encoding(json.dumps(hackthehill_file_content)))
 
         os.remove(testing_file)
         os.remove(hackthehill_file)
diff --git a/backend/test/test_utils.py b/backend/test/test_utils.py
@@ -6,7 +6,7 @@
 import unittest
 
 from code.file_tokenizer import hash_file_blocks
-from code.utils import find_file, custom_hash, get_filename_by_file_id
+from code.utils import find_file, custom_encoding, get_filename_by_file_id, custom_decoding
 
 from config import SOURCES_FOLDER, CODE_FOLDER, UPLOADS_FOLDER, HASH_EXTENSION
 
@@ -16,15 +16,24 @@ class TestUtils(unittest.TestCase):
     Utilities class functions are generic and widely used. Their implementation is very important.
     """
 
-    def test_custom_hash(self):
+    def test_custom_encoding(self):
         """
-        We have our own implementation for hashing utf-8 format encoding
+        We have our own implementation for utf-8 format encoding
         """
 
-        encoded_input = "Hi Mom"
+        standard_input = "Hi Mom"
 
-        self.assertEqual(custom_hash(encoded_input),
-                         hashlib.sha256(encoded_input.encode("utf-8")).hexdigest())
+        self.assertEqual(custom_encoding(standard_input), standard_input)
+
+    def test_custom_decoding(self):
+        """
+        We have our own implementation for utf-8 format decoding
+        """
+
+        standard_input = "Hi Mom"
+        encoded_input = custom_encoding(standard_input)
+
+        self.assertEqual(custom_decoding(encoded_input), str(encoded_input))
 
     def test_find_file_with_garbage_file_name_returns_none(self):
         """
@@ -83,7 +92,7 @@ def test_get_filename_by_file_id_with_matching_id_returns_tuple(self):
 
             with open(hackthehill_file, "r", encoding="utf-8") as g:
                 hackthehill_file_content = g.read()
-                file_id = custom_hash(hackthehill_file_content)
+                file_id = custom_encoding(hackthehill_file_content)
 
             self.assertEqual(get_filename_by_file_id(file_id),
                              (os.path.basename(testing_file),