Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unit test for File Tokenizer #4

Merged
merged 12 commits into from
Nov 21, 2024
38 changes: 22 additions & 16 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
name: Pylint

on: [push]
on:
push:
branches:
- "**"
pull_request:
branches:
- "**"

jobs:
build:
Expand All @@ -9,18 +15,18 @@ jobs:
matrix:
python-version: ["3.9"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
cd backend
pip install -r requirements.txt
pip install pylint
- name: Analysing the code with pylint
run: |
cd backend
pylint --fail-under=7.0 $(git ls-files '*.py')
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
cd backend
pip install -r requirements.txt
pip install pylint
- name: Analysing the code with pylint
run: |
cd backend
pylint --fail-under=7.0 $(git ls-files '*.py')
47 changes: 24 additions & 23 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,36 @@ name: Python application

on:
push:
branches: [ "main" ]
branches:
- "**"
pull_request:
branches: [ "main" ]
branches:
- "**"

permissions:
contents: read

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: "3.9"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
cd backend
pip install -r requirements.txt
- name: Test
run: |
cd backend
python -m unittest discover .
- name: View Coverage
run: |
cd backend
coverage run --source=code -m unittest discover -s .
coverage report -m
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: "3.9"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
cd backend
pip install -r requirements.txt
- name: Test
run: |
cd backend
python -m unittest discover .
- name: View Coverage
run: |
cd backend
coverage run --source=code -m unittest discover -s .
coverage report -m
2 changes: 1 addition & 1 deletion backend/code/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""
TODO
CODE folder is the main python package that has the logic of the system's backend
"""
123 changes: 53 additions & 70 deletions backend/code/file_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,101 +1,84 @@
"""
TODO
This file helps in creating and interpreting a .hackthehill file
"""

import hashlib
import os
import json

from pathlib import Path

from config import SOURCES_FOLDER
from code.utils import custom_encoding, custom_decoding
from config import SOURCES_FOLDER, HASH_EXTENSION


class SenderTokenizer:
def hash_file_blocks(file_path: str, block_size: int = 512):
"""
TODO
Takes the original file, creates a .hackthehill file containing a header and a dictionary
of keys with hashes. The header contains information regarding the original file and the
hashed contents. Information in the header is as follows: name of the file, size of the
file, number of hashed blocks present inside the .hackthehill file, size of each block
(information that they contain) in bytes.

:param file_path: str. The path to the original file.
:param block_size: Integer. The default competition block size was 512 bytes, change this
to change how large one block can be.
:return: A hash of the .hackthehill file created, this hashed name of the .hackthehill file
be used to hide the nature of the file in communication.
"""

def __init__(self, file_path):
self.file_path = file_path
hackthehill_file = Path(file_path).stem + HASH_EXTENSION
file_size: int = os.path.getsize(file_path)
num_blocks = (file_size + block_size - 1) // block_size

def hash_file_blocks(self, block_size=512):
"""
Takes the original file, creates a .hackthehill file containing a header and a dictionary
of keys with hashes. The header contains information regarding the original file and the
hashed contents. Information in the header is as follows: name of the file, size of the
file, number of hashed blocks present inside the .hackthehill file, size of each block
(information that they contain) in bytes.
# Create header
header = {
"header": {
"file_name": os.path.basename(file_path),
"file_size": file_size,
"number_of_blocks": num_blocks,
"block_size": block_size,
},
"blocks": {}
}

:param block_size: Integer. The default competition block size was 512 bytes, change this
to change how large one block can be.
:return: A hash of the .hackthehill file created, this hashed name of the .hackthehill file
be used to hide the nature of the file in communication
"""
block_hashes = {}

hackthehill_file = Path(self.file_path).stem + ".hackthehill"
file_size: int = os.path.getsize(self.file_path)
num_blocks = (file_size + block_size -
1) // block_size # Round up division
with open(file_path, "r", encoding="utf-8") as file:
for index in range(num_blocks):
block = file.read(block_size)
block_hash = custom_encoding(block)
block_hashes[index] = block_hash

# Create header
header = {
"header": {
"file_name": os.path.basename(self.file_path),
"file_size": file_size,
"number_of_blocks": num_blocks,
"block_size": block_size,
},
"blocks": {}
}
header["blocks"] = block_hashes

block_hashes = {}
hash_block = json.dumps(header, indent=2)

with open(self.file_path, 'rb') as file:
for index in range(num_blocks):
block = file.read(block_size)
block_hash = hashlib.sha256(block).hexdigest()
block_hashes[index] = block_hash
with open(os.path.join(SOURCES_FOLDER, hackthehill_file), 'w', encoding="utf-8") as f:
f.write(hash_block)

header["blocks"] = block_hashes
return custom_encoding(json.dumps(header))

hash_block = json.dumps(header, indent=2)

with open(os.path.join(SOURCES_FOLDER, hackthehill_file), 'w', encoding="utf-8") as f:
f.write(hash_block)

return hashlib.sha256(json.dumps(header).encode('utf-8')).hexdigest()


class ReceiverTokenizer:
"""
TODO
def get_block_content(hackthehill_file: str, block_index: int) -> str:
"""
Should take in the hashed file content from .hackthehill file and return back the normal
file byte content

def __init__(self, file_path):
self.file_path = file_path

def get_block_content(self, block_index: int, block_size: int = 512) -> bytes:
"""
Should take in the hashed file content from .hackthehill file and return back the normal
file byte content

:param block_index: Integer. The dictionary index of the block with respect to other
blocks in the sequence, from the .hackthehill file
:param block_size: Integer. The default competition block size was 512 bytes, change this
to change how large one block can be.
:return: bytes. Particular portion of the original file content.
"""
:param hackthehill_file: str. The path to the .hackthehill file.
:param block_index: Integer. The dictionary index of the block with respect to other
blocks in the sequence, from the .hackthehill file.
:return: bytes. Particular portion of the original file content.
"""

file_size = os.path.getsize(self.file_path)
num_blocks = (file_size + block_size - 1) // block_size
with open(hackthehill_file, "r", encoding="utf-8") as f:
file_content = json.loads(f.read())
num_blocks = file_content["header"]["number_of_blocks"]

if block_index < 0 or block_index >= num_blocks:
raise ValueError(
f"Block index out of range. Valid range: 0 to {num_blocks - 1}")

with open(self.file_path, 'rb') as file:
file.seek(block_index * block_size)
block_content = file.read(block_size)
encoded_content = file_content["blocks"][str(block_index)]
block_content = custom_decoding(encoded_content)

return block_content
return block_content
25 changes: 12 additions & 13 deletions backend/code/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
import io
import json

from code.utils import get_filename_by_file_id, custom_hash
from code.utils import get_filename_by_file_id, custom_encoding
from code.p2p_client import P2PClient
from code.file_tokenizer import SenderTokenizer
from code.file_tokenizer import hash_file_blocks

from pathlib import Path
from flask_cors import CORS
from flask import Flask, request, jsonify, send_file

from config import UPLOADS_FOLDER
from config import UPLOADS_FOLDER, HASH_EXTENSION, SOURCES_FOLDER, WEBSITE_DATA

app = Flask(__name__)
CORS(app)
Expand Down Expand Up @@ -44,17 +44,16 @@ def receive_file():
file_path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
file.save(file_path)

tokenized_file = SenderTokenizer(file_path)
tokenized_file.hash_file_blocks()
hash_file_blocks(file_path)

with open("./sources/" + Path(file_path).stem + ".hackthehill", 'r', encoding="utf-8") as f:
file_hash = custom_hash(f.read())
hackthehill_file = os.path.join(SOURCES_FOLDER, Path(file_path).stem + HASH_EXTENSION)

fileData[file_hash] = {'path': file_path, 'hackthehill': "./sources/" +
Path(file_path).stem + ".hackthehill"}
# print(fileData)
with open(hackthehill_file, 'r', encoding="utf-8") as f:
file_hash = custom_encoding(f.read())

with open("website_data.json", "w", encoding="utf-8") as f:
fileData[file_hash] = {'path': file_path, 'hackthehill': hackthehill_file}

with open(WEBSITE_DATA, "w", encoding="utf-8") as f:
f.write(json.dumps(fileData, indent=2))

return jsonify({"status": "File uploaded", "file_path": file_path, "data": fileData}), 200
Expand All @@ -77,12 +76,12 @@ def receive_token():
print("Could not find the fucking file with file id " + file_hash)
return jsonify({"error": "Can't find file hash"}), 400

file_path = os.path.join('sources', files[1])
file_path = os.path.join(SOURCES_FOLDER, files[1])

with open(file_path, 'r', encoding="utf-8") as f:
file_with_extension = json.loads(f.read())['header']['file_name']

with open(os.path.join('uploads', file_with_extension), 'rb') as f:
with open(os.path.join(UPLOADS_FOLDER, file_with_extension), 'rb') as f:
file_data = f.read()

file_blob = io.BytesIO(file_data)
Expand Down
Loading
Loading