Skip to content

Commit

Permalink
added a todo
Browse files Browse the repository at this point in the history
  • Loading branch information
TheFJcurve committed Nov 15, 2024
1 parent 2e13ee8 commit acb3ac6
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 25 deletions.
9 changes: 4 additions & 5 deletions backend/code/file_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from pathlib import Path

from code.utils import custom_hash
from code.utils import custom_encoding
from config import SOURCES_FOLDER, HASH_EXTENSION


Expand All @@ -28,8 +28,7 @@ def hash_file_blocks(file_path: str, block_size: int = 512):

hackthehill_file = Path(file_path).stem + HASH_EXTENSION
file_size: int = os.path.getsize(file_path)
num_blocks = (file_size + block_size -
1) // block_size # Round up division
num_blocks = (file_size + block_size - 1) // block_size

# Create header
header = {
Expand All @@ -47,7 +46,7 @@ def hash_file_blocks(file_path: str, block_size: int = 512):
with open(file_path, "r", encoding="utf-8") as file:
for index in range(num_blocks):
block = file.read(block_size)
block_hash = custom_hash(block)
block_hash = custom_encoding(block)
block_hashes[index] = block_hash

header["blocks"] = block_hashes
Expand All @@ -57,7 +56,7 @@ def hash_file_blocks(file_path: str, block_size: int = 512):
with open(os.path.join(SOURCES_FOLDER, hackthehill_file), 'w', encoding="utf-8") as f:
f.write(hash_block)

return custom_hash(json.dumps(header))
return custom_encoding(json.dumps(header))


def get_block_content(file_path, block_index: int, block_size: int = 512) -> bytes:
Expand Down
4 changes: 2 additions & 2 deletions backend/code/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import io
import json

from code.utils import get_filename_by_file_id, custom_hash
from code.utils import get_filename_by_file_id, custom_encoding
from code.p2p_client import P2PClient
from code.file_tokenizer import hash_file_blocks

Expand Down Expand Up @@ -49,7 +49,7 @@ def receive_file():
hackthehill_file = os.path.join(SOURCES_FOLDER, Path(file_path).stem + HASH_EXTENSION)

with open(hackthehill_file, 'r', encoding="utf-8") as f:
file_hash = custom_hash(f.read())
file_hash = custom_encoding(f.read())

fileData[file_hash] = {'path': file_path, 'hackthehill': hackthehill_file}

Expand Down
23 changes: 17 additions & 6 deletions backend/code/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,25 @@
from config import SOURCES_FOLDER, UPLOADS_FOLDER


def custom_hash(encode_input):
def custom_encoding(normal_input: any) -> bytes:
"""
Using the hashlib sha256 encoding, we specifically hash the utf-8 encoding for
text files. This is our custom hash function we use in the entire project, we should not
use the inbuilt hash() function
Using utf-8 encoding. This is our custom encoding function we use in the entire project,
we should not use the inbuilt functions.
TODO Correct Implementation has to be written
"""

return normal_input


def custom_decoding(encoded_string: bytes) -> str:
"""
Assuming that the encoding is utf-8. We should not read the file to get back the original value.
TODO Correct Implementation has to be written
"""

return hashlib.sha256(encode_input.encode("utf-8")).hexdigest()
return str(encoded_string)


def find_file(directory: str, filename: str) -> Optional[str]:
Expand Down Expand Up @@ -59,7 +70,7 @@ def get_filename_by_file_id(file_id: str) -> Optional[tuple[str, str]]:
with open(os.path.join(SOURCES_FOLDER, hackthehill_file), "r", encoding="utf-8") as f:
hackthehill_file_content = f.read()

if file_id == custom_hash(hackthehill_file_content):
if file_id == custom_encoding(hackthehill_file_content):
original_file_name = find_file(UPLOADS_FOLDER, hackthehill_file)
if original_file_name is not None:
return original_file_name, hackthehill_file
Expand Down
3 changes: 2 additions & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ aioice==0.9.0
uuid==1.30
flask==3.0.3
flask_cors==5.0.0
coverage==7.6.4
coverage==7.6.4
cryptography==43.0.3
8 changes: 4 additions & 4 deletions backend/test/test_file_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import unittest

from code.file_tokenizer import hash_file_blocks
from code.utils import custom_hash
from code.utils import custom_encoding
from config import UPLOADS_FOLDER, SOURCES_FOLDER, HASH_EXTENSION


Expand Down Expand Up @@ -43,7 +43,7 @@ def test_hash_file_blocks_with_empty_file_returns_correct_value(self):

self.assertEqual(hackthehill_file_content, test_header)
self.assertEqual(hackthehill_file_hashed_content,
custom_hash(json.dumps(hackthehill_file_content)))
custom_encoding(json.dumps(hackthehill_file_content)))

os.remove(testing_file)
os.remove(hackthehill_file)
Expand Down Expand Up @@ -80,7 +80,7 @@ def test_hash_file_blocks_with_non_empty_file_returns_correct_value(self):

for index in range(num_blocks):
block = f.read(block_size)
block_hash = custom_hash(block)
block_hash = custom_encoding(block)
test_header["blocks"][str(index)] = str(block_hash)
test_header["header"]["number_of_blocks"] += 1

Expand All @@ -89,7 +89,7 @@ def test_hash_file_blocks_with_non_empty_file_returns_correct_value(self):

self.assertEqual(hackthehill_file_content, test_header)
self.assertEqual(hackthehill_file_hashed_content,
custom_hash(json.dumps(hackthehill_file_content)))
custom_encoding(json.dumps(hackthehill_file_content)))

os.remove(testing_file)
os.remove(hackthehill_file)
23 changes: 16 additions & 7 deletions backend/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import unittest

from code.file_tokenizer import hash_file_blocks
from code.utils import find_file, custom_hash, get_filename_by_file_id
from code.utils import find_file, custom_encoding, get_filename_by_file_id, custom_decoding

from config import SOURCES_FOLDER, CODE_FOLDER, UPLOADS_FOLDER, HASH_EXTENSION

Expand All @@ -16,15 +16,24 @@ class TestUtils(unittest.TestCase):
Utilities class functions are generic and widely used. Their implementation is very important.
"""

def test_custom_hash(self):
def test_custom_encoding(self):
"""
We have our own implementation for hashing utf-8 format encoding
We have our own implementation for utf-8 format encoding
"""

encoded_input = "Hi Mom"
standard_input = "Hi Mom"

self.assertEqual(custom_hash(encoded_input),
hashlib.sha256(encoded_input.encode("utf-8")).hexdigest())
self.assertEqual(custom_encoding(standard_input), standard_input)

def test_custom_decoding(self):
"""
We have our own implementation for utf-8 format decoding
"""

standard_input = "Hi Mom"
encoded_input = custom_encoding(standard_input)

self.assertEqual(custom_decoding(encoded_input), str(encoded_input))

def test_find_file_with_garbage_file_name_returns_none(self):
"""
Expand Down Expand Up @@ -83,7 +92,7 @@ def test_get_filename_by_file_id_with_matching_id_returns_tuple(self):

with open(hackthehill_file, "r", encoding="utf-8") as g:
hackthehill_file_content = g.read()
file_id = custom_hash(hackthehill_file_content)
file_id = custom_encoding(hackthehill_file_content)

self.assertEqual(get_filename_by_file_id(file_id),
(os.path.basename(testing_file),
Expand Down

0 comments on commit acb3ac6

Please sign in to comment.