Skip to content

Commit

Permalink
coverage 100%
Browse files Browse the repository at this point in the history
  • Loading branch information
TheFJcurve committed Nov 12, 2024
1 parent 3db1f46 commit 02cdd4e
Show file tree
Hide file tree
Showing 12 changed files with 123 additions and 42 deletions.
9 changes: 7 additions & 2 deletions backend/.gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
venv
sources
uploads

sources/
!sources/sample.hackthehill

uploads/
!uploads/sample.txt

__pycache__
.coverage
.idea
11 changes: 0 additions & 11 deletions backend/code/config.py

This file was deleted.

6 changes: 4 additions & 2 deletions backend/code/file_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from pathlib import Path

from config import SOURCES_FOLDER


class SenderTokenizer:
"""
Expand All @@ -31,6 +33,7 @@ def hash_file_blocks(self, block_size=512):
be used to hide the nature of the file in communication
"""

hackthehill_file = Path(self.file_path).stem + ".hackthehill"
file_size: int = os.path.getsize(self.file_path)
num_blocks = (file_size + block_size -
1) // block_size # Round up division
Expand Down Expand Up @@ -58,8 +61,7 @@ def hash_file_blocks(self, block_size=512):

hash_block = json.dumps(header, indent=2)

with open("./sources/" + Path(self.file_path).stem + ".hackthehill",
'w', encoding="utf-8") as f:
with open(os.path.join(SOURCES_FOLDER, hackthehill_file), 'w', encoding="utf-8") as f:
f.write(hash_block)

return hashlib.sha256(json.dumps(header).encode('utf-8')).hexdigest()
Expand Down
6 changes: 3 additions & 3 deletions backend/code/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import io
import json

from code.config import UPLOAD_FOLDER
from config import UPLOADS_FOLDER
from code.utils import get_filename_by_file_id, custom_hash
from code.p2p_client import P2PClient
from code.file_tokenizer import SenderTokenizer
Expand All @@ -20,8 +20,8 @@
CORS(app)

# Ensure the upload directory exists
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
if not os.path.exists(UPLOADS_FOLDER):
os.makedirs(UPLOADS_FOLDER)

fileData = {}

Expand Down
2 changes: 1 addition & 1 deletion backend/code/p2p_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from code.utils import get_filename_by_file_id
from code.file_tokenizer import ReceiverTokenizer
from code.config import DISCOVERY_PORT, CHAT_PORT, MAX_UDP_PACKET, DISCOVERY_ADDRESS
from config import DISCOVERY_PORT, CHAT_PORT, MAX_UDP_PACKET, DISCOVERY_ADDRESS


class P2PClient:
Expand Down
35 changes: 21 additions & 14 deletions backend/code/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
from pathlib import Path
from typing import Optional

from config import SOURCES_FOLDER, UPLOADS_FOLDER


def custom_hash(encode_input):
"""
Using the hashlib sha256 encoding, we specifically hash the utf-8 encoding for
Using the hashlib sha256 encoding, we specifically hash the utf-8 encoding for
text files. This is our custom hash function we use in the entire project, we should not
use the inbuilt hash() function
"""
Expand All @@ -24,11 +26,11 @@ def custom_hash(encode_input):
def find_file(directory: str, filename: str) -> Optional[str]:
"""
Helper function for get_filename_by_file_id
Given a directory and a filename, check if the directory exists, and if the file
exists inside the given directory. If yes, return the file name, otherwise return
None
:param directory: str
:param filename: str
:returns: Either string or None
Expand All @@ -37,23 +39,28 @@ def find_file(directory: str, filename: str) -> Optional[str]:
if os.path.exists(directory):
for file in os.listdir(directory):
name, _ = os.path.splitext(file)
if name == filename:
if name == Path(filename).stem:
return file
return None


def get_filename_by_file_id(file_id):
def get_filename_by_file_id(file_id: str) -> Optional[tuple[str, str]]:
"""
TODO
Given the unique file id we have, we check if original file which is related to
that particular file id exists or not. Returns None if file does not exist in that directory
Note that by our convention, .hackthehill files exist in backend/sources and the
original files exist in backend/uploads.
:param file_id: str
:returns: tuple[str, str] | None
"""

for fingerprint_file_name in os.listdir("sources"):
with open(os.path.join("sources", fingerprint_file_name), "r", encoding="utf-8") as f:
file_fingerprint_content = f.read()
for hackthehill_file in os.listdir(SOURCES_FOLDER):
with open(os.path.join(SOURCES_FOLDER, hackthehill_file), "r", encoding="utf-8") as f:
hackthehill_file_content = f.read()

if file_id == custom_hash(file_fingerprint_content):
file_name = find_file(
"uploads", Path(fingerprint_file_name).stem)
if file_name is not None:
return [file_name, fingerprint_file_name]
if file_id == custom_hash(hackthehill_file_content):
original_file_name = find_file(UPLOADS_FOLDER, hackthehill_file)
if original_file_name is not None:
return original_file_name, hackthehill_file
return None
19 changes: 19 additions & 0 deletions backend/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Global variables for the project
Since the file contains actual directory names, it must be at the root of backend
"""

import os.path

DISCOVERY_PORT = 5000
CHAT_PORT = 5001
MAX_UDP_PACKET = 65507
DISCOVERY_ADDRESS = '192.168.181.255'

BACKEND_FOLDER = os.path.dirname(os.path.abspath(__file__))

UPLOADS_FOLDER = os.path.join(BACKEND_FOLDER, "uploads")
SOURCES_FOLDER = os.path.join(BACKEND_FOLDER, "sources")
CODE_FOLDER = os.path.join(BACKEND_FOLDER, "code")
TEST_FOLDER = os.path.join(BACKEND_FOLDER, "test")
1 change: 1 addition & 0 deletions backend/sources/sample.hackthehill
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Adding the file just so that sources folder is recognized via github
9 changes: 9 additions & 0 deletions backend/sourcestest.hackthehill
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"header": {
"file_name": "test.txt",
"file_size": 0,
"number_of_blocks": 0,
"block_size": 512
},
"blocks": {}
}
64 changes: 55 additions & 9 deletions backend/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,27 @@
Testing the Utilities class
"""
import hashlib
import os.path
import pathlib
import unittest
from code.utils import find_file, custom_hash

from code.file_tokenizer import SenderTokenizer
from config import SOURCES_FOLDER, CODE_FOLDER, UPLOADS_FOLDER
from code.utils import find_file, custom_hash, get_filename_by_file_id


class TestUtils(unittest.TestCase):
"""
Utilities class functions are generic and widely used. Their implementation is very important.
"""

def test_custom_hash(self):
"""
We have our own implementation for hashing utf-8 format encoding
"""

encoded_input = "Hi Mom"

self.assertEqual(custom_hash(encoded_input), hashlib.sha256(encoded_input.encode("utf-8")).hexdigest())

def test_find_file_with_garbage_file_name_returns_none(self):
Expand All @@ -33,13 +38,54 @@ def test_find_file_with_garbage_directory_name_returns_none(self):
"""

self.assertEqual(find_file("random_directory", "uploads.txt"), None)

def test_find_file_with_proper_director_and_file_returns_filename(self):
"""
If a file and directory exists, and file exists inside the directory, return the
filename
"""

self.assertEqual(find_file("code", "utils"), "utils.py")



self.assertEqual(find_file(CODE_FOLDER, "utils.py"), "utils.py")

def test_get_filename_by_file_id_with_no_matching_id_returns_none(self):
"""
If none of the hashed ids match with the file id, we should return None
"""

testing_file = os.path.join(UPLOADS_FOLDER, "test_get_filename_by_file_id_with_no_matching_id_returns_none.txt")
message = "test_get_filename_by_file_id_with_no_matching_id_returns_none"

with open(testing_file, "x", encoding="utf-8") as f:
f.write(message)
file_id = "random-file-id"

self.assertEqual(get_filename_by_file_id(file_id), None)

os.remove(testing_file)

def test_get_filename_by_file_id_with_matching_id_returns_tuple(self):
"""
If the hashed id is the same as the hash of .hackthehill file, we should return the tuple of filenames
"""

testing_file = os.path.join(UPLOADS_FOLDER,
"test_get_filename_by_file_id_with_matching_id_returns_tuple.txt")
hackthehill_file = os.path.join(SOURCES_FOLDER,
"test_get_filename_by_file_id_with_matching_id_returns_tuple.hackthehill")

hashed_file = SenderTokenizer(testing_file)
message = "test_get_filename_by_file_id_with_matching_id_returns_tuple"

with open(testing_file, "x", encoding="utf-8") as f:
f.write(message)
hashed_file.hash_file_blocks()

with open(hackthehill_file, "r", encoding="utf-8") as f:
hackthehill_file_content = f.read()
file_id = custom_hash(hackthehill_file_content)

self.assertEqual(get_filename_by_file_id(file_id), (os.path.basename(testing_file),
os.path.basename(hackthehill_file)))

os.remove(testing_file)
os.remove(hackthehill_file)
1 change: 1 addition & 0 deletions backend/uploads/sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Adding the file just so that uploads folder is recognized via github
2 changes: 2 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,7 @@ cd "$backendDir"
python3.9 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
mkdir sources
mkdir uploads

cd "$currentDir"

0 comments on commit 02cdd4e

Please sign in to comment.