From 897a77664c4d29437eb8df1462f423a22ec0b41d Mon Sep 17 00:00:00 2001 From: Pratiksha Sankhe Date: Mon, 28 Oct 2024 23:53:34 +0530 Subject: [PATCH 1/9] added upload endpoint Signed-off-by: Pratiksha Sankhe --- .../api/elixircloud/csh/controllers.py | 126 +++++++++++++++++- cloud_storage_handler/api/specs/specs.yaml | 68 ++++++++++ deployment/config.yaml | 2 + poetry.lock | 119 ++++++++--------- pyproject.toml | 1 + 5 files changed, 251 insertions(+), 65 deletions(-) diff --git a/cloud_storage_handler/api/elixircloud/csh/controllers.py b/cloud_storage_handler/api/elixircloud/csh/controllers.py index fb23a34..301999f 100644 --- a/cloud_storage_handler/api/elixircloud/csh/controllers.py +++ b/cloud_storage_handler/api/elixircloud/csh/controllers.py @@ -1,9 +1,13 @@ """ELIXIR's Cloud Storage Handler controllers.""" +import hashlib import logging +import os +import uuid from http import HTTPStatus -from flask import jsonify +from flask import Flask, current_app, jsonify, request +from minio import S3Error logger = logging.getLogger(__name__) @@ -13,3 +17,123 @@ def home(): return jsonify( {"message": "Welcome to the Cloud Storage Handler server!"} ), HTTPStatus.OK + + +app = Flask(__name__) + +app.config["TUS_UPLOAD_DIR"] = "/tmp/tus_uploads" +app.config["TUS_CHUNK_SIZE"] = 8 * 1024 + + +def get_chunks(object, chunk_size): + """Generator to yield chunks from object.""" + while True: + chunk = object.read(chunk_size) + if not chunk: + break + yield chunk + + +def compute_file_hash(file_path): + """Compute MD5 hash of the file.""" + hash_md5 = hashlib.md5() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + + +def initiate_upload(): + """Initiate TUS upload, creates object and returns object_id.""" + object_id = str(uuid.uuid4()) + object_path = os.path.join(app.config["TUS_UPLOAD_DIR"], f"{object_id}.temp") + os.makedirs(app.config["TUS_UPLOAD_DIR"], exist_ok=True) + + open(object_path, "wb").close() + + return jsonify({"object_id": object_id}), HTTPStatus.CREATED + + +def upload_chunk(object_id): + """Upload a object chunk based on object_id and content-range.""" + if request.method == "OPTIONS": + response = jsonify({"status": "CORS preflight check"}) + response.headers.add("Access-Control-Allow-Origin", "*") + response.headers.add( + "Access-Control-Allow-Methods", "GET, POST, PATCH, PUT, DELETE, OPTIONS" + ) + response.headers.add( + "Access-Control-Allow-Headers", "Content-Type,Authorization" + ) + + object_path = os.path.join(app.config["TUS_UPLOAD_DIR"], f"{object_id}.temp") + if not os.path.exists(object_path): + return jsonify({"error": "object not found"}), HTTPStatus.NOT_FOUND + + try: + content_range = request.headers.get("Content-Range") + start_byte = int(content_range.split(" ")[1].split("-")[0]) + + with open(object_path, "r+b") as f: + f.seek(start_byte) + f.write(request.data) + + return jsonify( + {"message": "Chunk uploaded successfully"} + ), HTTPStatus.NO_CONTENT + except Exception as e: + return jsonify({"error": str(e)}), HTTPStatus.INTERNAL_SERVER_ERROR + + +def complete_upload(object_id): + """Complete upload by transferring the object to MinIO after TUS upload.""" + minio_config = current_app.config.foca.custom.minio + bucket_name = minio_config.bucket_name + minio_client = current_app.config.foca.custom.minio.client.client + object_path = os.path.join(app.config["TUS_UPLOAD_DIR"], f"{object_id}.temp") + + if not os.path.exists(object_path): + return jsonify({"error": "object not found"}), HTTPStatus.NOT_FOUND + + try: + # Compute the file's hash + file_hash = compute_file_hash(object_path) + + # Check if an object with the same hash exists in MinIO + found_duplicate = False + for obj in minio_client.list_objects(bucket_name): + obj_info = minio_client.stat_object(bucket_name, obj.object_name) + if ( + "file-hash" in obj_info.metadata + and obj_info.metadata["file-hash"] == file_hash + ): + found_duplicate = True + break + + if found_duplicate: + os.remove(object_path) + return jsonify( + {"message": "Duplicate object detected. Upload skipped."} + ), HTTPStatus.CONFLICT + + minio_client.fput_object( + bucket_name=bucket_name, + object_name=object_id, + file_path=object_path, + content_type="application/octet-stream", + ) + + os.remove(object_path) + + return jsonify( + {"message": "Upload complete and object stored in MinIO"} + ), HTTPStatus.OK + + except S3Error as e: + return jsonify( + {"error": f"Failed to upload to MinIO: {str(e)}"} + ), HTTPStatus.INTERNAL_SERVER_ERROR + except Exception as e: + return jsonify( + {"error": f"An unexpected error occurred: {str(e)}"} + ), HTTPStatus.INTERNAL_SERVER_ERROR diff --git a/cloud_storage_handler/api/specs/specs.yaml b/cloud_storage_handler/api/specs/specs.yaml index 2c61d7f..bc4c121 100644 --- a/cloud_storage_handler/api/specs/specs.yaml +++ b/cloud_storage_handler/api/specs/specs.yaml @@ -35,4 +35,72 @@ paths: description: The request is malformed. '500': description: An unexpected error occurred. + /upload/initiate: + post: + summary: Initiate TUS upload session + operationId: initiate_upload + tags: + - Upload + responses: + "201": + description: TUS upload session initiated + content: + application/json: + schema: + type: object + properties: + object_id: + type: string + description: Unique identifier for the upload session + /upload/{object_id}/chunk: + patch: + summary: Upload a file chunk + operationId: upload_chunk + tags: + - Upload + parameters: + - in: path + name: object_id + required: true + schema: + type: string + description: Unique identifier for the upload session + requestBody: + required: true + content: + application/octet-stream: + schema: + type: string + format: binary + responses: + "204": + description: Chunk uploaded successfully + "404": + description: File not found + /upload/complete/{object_id}: + post: + summary: Complete upload and store in MinIO + operationId: complete_upload + tags: + - Upload + parameters: + - in: path + name: object_id + required: true + schema: + type: string + description: Unique identifier for the upload session + responses: + "200": + description: Upload complete and file stored in MinIO + content: + application/json: + schema: + type: object + properties: + message: + type: string + description: Confirmation message + "404": + description: File not found ... diff --git a/deployment/config.yaml b/deployment/config.yaml index 0fec9c0..31e387c 100644 --- a/deployment/config.yaml +++ b/deployment/config.yaml @@ -21,6 +21,8 @@ security: - userinfo - public_key validation_checks: all + cors: + enabled: True api: specs: diff --git a/poetry.lock b/poetry.lock index 562b3fa..c844900 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1772,70 +1772,61 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] [[package]] name = "pymongo" -version = "4.10.1" +version = "4.8.0" description = "Python driver for MongoDB " optional = false python-versions = ">=3.8" files = [ - {file = "pymongo-4.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e699aa68c4a7dea2ab5a27067f7d3e08555f8d2c0dc6a0c8c60cfd9ff2e6a4b1"}, - {file = "pymongo-4.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70645abc714f06b4ad6b72d5bf73792eaad14e3a2cfe29c62a9c81ada69d9e4b"}, - {file = "pymongo-4.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae2fd94c9fe048c94838badcc6e992d033cb9473eb31e5710b3707cba5e8aee2"}, - {file = "pymongo-4.10.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ded27a4a5374dae03a92e084a60cdbcecd595306555bda553b833baf3fc4868"}, - {file = "pymongo-4.10.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ecc2455e3974a6c429687b395a0bc59636f2d6aedf5785098cf4e1f180f1c71"}, - {file = "pymongo-4.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920fee41f7d0259f5f72c1f1eb331bc26ffbdc952846f9bd8c3b119013bb52c"}, - {file = "pymongo-4.10.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0a15665b2d6cf364f4cd114d62452ce01d71abfbd9c564ba8c74dcd7bbd6822"}, - {file = "pymongo-4.10.1-cp310-cp310-win32.whl", hash = "sha256:29e1c323c28a4584b7095378ff046815e39ff82cdb8dc4cc6dfe3acf6f9ad1f8"}, - {file = "pymongo-4.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:88dc4aa45f8744ccfb45164aedb9a4179c93567bbd98a33109d7dc400b00eb08"}, - {file = "pymongo-4.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:57ee6becae534e6d47848c97f6a6dff69e3cce7c70648d6049bd586764febe59"}, - {file = "pymongo-4.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6f437a612f4d4f7aca1812311b1e84477145e950fdafe3285b687ab8c52541f3"}, - {file = "pymongo-4.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a970fd3117ab40a4001c3dad333bbf3c43687d90f35287a6237149b5ccae61d"}, - {file = "pymongo-4.10.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c4d0e7cd08ef9f8fbf2d15ba281ed55604368a32752e476250724c3ce36c72e"}, - {file = "pymongo-4.10.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca6f700cff6833de4872a4e738f43123db34400173558b558ae079b5535857a4"}, - {file = "pymongo-4.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cec237c305fcbeef75c0bcbe9d223d1e22a6e3ba1b53b2f0b79d3d29c742b45b"}, - {file = "pymongo-4.10.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3337804ea0394a06e916add4e5fac1c89902f1b6f33936074a12505cab4ff05"}, - {file = "pymongo-4.10.1-cp311-cp311-win32.whl", hash = "sha256:778ac646ce6ac1e469664062dfe9ae1f5c9961f7790682809f5ec3b8fda29d65"}, - {file = "pymongo-4.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:9df4ab5594fdd208dcba81be815fa8a8a5d8dedaf3b346cbf8b61c7296246a7a"}, - {file = "pymongo-4.10.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fbedc4617faa0edf423621bb0b3b8707836687161210d470e69a4184be9ca011"}, - {file = "pymongo-4.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7bd26b2aec8ceeb95a5d948d5cc0f62b0eb6d66f3f4230705c1e3d3d2c04ec76"}, - {file = "pymongo-4.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb104c3c2a78d9d85571c8ac90ec4f95bca9b297c6eee5ada71fabf1129e1674"}, - {file = "pymongo-4.10.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4924355245a9c79f77b5cda2db36e0f75ece5faf9f84d16014c0a297f6d66786"}, - {file = "pymongo-4.10.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:11280809e5dacaef4971113f0b4ff4696ee94cfdb720019ff4fa4f9635138252"}, - {file = "pymongo-4.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5d55f2a82e5eb23795f724991cac2bffbb1c0f219c0ba3bf73a835f97f1bb2e"}, - {file = "pymongo-4.10.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e974ab16a60be71a8dfad4e5afccf8dd05d41c758060f5d5bda9a758605d9a5d"}, - {file = "pymongo-4.10.1-cp312-cp312-win32.whl", hash = "sha256:544890085d9641f271d4f7a47684450ed4a7344d6b72d5968bfae32203b1bb7c"}, - {file = "pymongo-4.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:dcc07b1277e8b4bf4d7382ca133850e323b7ab048b8353af496d050671c7ac52"}, - {file = "pymongo-4.10.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:90bc6912948dfc8c363f4ead54d54a02a15a7fee6cfafb36dc450fc8962d2cb7"}, - {file = "pymongo-4.10.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:594dd721b81f301f33e843453638e02d92f63c198358e5a0fa8b8d0b1218dabc"}, - {file = "pymongo-4.10.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0783e0c8e95397c84e9cf8ab092ab1e5dd7c769aec0ef3a5838ae7173b98dea0"}, - {file = "pymongo-4.10.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6fb6a72e88df46d1c1040fd32cd2d2c5e58722e5d3e31060a0393f04ad3283de"}, - {file = "pymongo-4.10.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e3a593333e20c87415420a4fb76c00b7aae49b6361d2e2205b6fece0563bf40"}, - {file = "pymongo-4.10.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72e2ace7456167c71cfeca7dcb47bd5dceda7db2231265b80fc625c5e8073186"}, - {file = "pymongo-4.10.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ad05eb9c97e4f589ed9e74a00fcaac0d443ccd14f38d1258eb4c39a35dd722b"}, - {file = "pymongo-4.10.1-cp313-cp313-win32.whl", hash = "sha256:ee4c86d8e6872a61f7888fc96577b0ea165eb3bdb0d841962b444fa36001e2bb"}, - {file = "pymongo-4.10.1-cp313-cp313-win_amd64.whl", hash = "sha256:45ee87a4e12337353242bc758accc7fb47a2f2d9ecc0382a61e64c8f01e86708"}, - {file = "pymongo-4.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:442ca247f53ad24870a01e80a71cd81b3f2318655fd9d66748ee2bd1b1569d9e"}, - {file = "pymongo-4.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:23e1d62df5592518204943b507be7b457fb8a4ad95a349440406fd42db5d0923"}, - {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6131bc6568b26e7495a9f3ef2b1700566b76bbecd919f4472bfe90038a61f425"}, - {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fdeba88c540c9ed0338c0b2062d9f81af42b18d6646b3e6dda05cf6edd46ada9"}, - {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15a624d752dd3c89d10deb0ef6431559b6d074703cab90a70bb849ece02adc6b"}, - {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba164e73fdade9b4614a2497321c5b7512ddf749ed508950bdecc28d8d76a2d9"}, - {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9235fa319993405ae5505bf1333366388add2e06848db7b3deee8f990b69808e"}, - {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4a65567bd17d19f03157c7ec992c6530eafd8191a4e5ede25566792c4fe3fa2"}, - {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:f1945d48fb9b8a87d515da07f37e5b2c35b364a435f534c122e92747881f4a7c"}, - {file = "pymongo-4.10.1-cp38-cp38-win32.whl", hash = "sha256:345f8d340802ebce509f49d5833cc913da40c82f2e0daf9f60149cacc9ca680f"}, - {file = "pymongo-4.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:3a70d5efdc0387ac8cd50f9a5f379648ecfc322d14ec9e1ba8ec957e5d08c372"}, - {file = "pymongo-4.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15b1492cc5c7cd260229590be7218261e81684b8da6d6de2660cf743445500ce"}, - {file = "pymongo-4.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:95207503c41b97e7ecc7e596d84a61f441b4935f11aa8332828a754e7ada8c82"}, - {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb99f003c720c6d83be02c8f1a7787c22384a8ca9a4181e406174db47a048619"}, - {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f2bc1ee4b1ca2c4e7e6b7a5e892126335ec8d9215bcd3ac2fe075870fefc3358"}, - {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93a0833c10a967effcd823b4e7445ec491f0bf6da5de0ca33629c0528f42b748"}, - {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f56707497323150bd2ed5d63067f4ffce940d0549d4ea2dfae180deec7f9363"}, - {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:409ab7d6c4223e5c85881697f365239dd3ed1b58f28e4124b846d9d488c86880"}, - {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dac78a650dc0637d610905fd06b5fa6419ae9028cf4d04d6a2657bc18a66bbce"}, - {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1ec3fa88b541e0481aff3c35194c9fac96e4d57ec5d1c122376000eb28c01431"}, - {file = "pymongo-4.10.1-cp39-cp39-win32.whl", hash = "sha256:e0e961923a7b8a1c801c43552dcb8153e45afa41749d9efbd3a6d33f45489f7a"}, - {file = "pymongo-4.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:dabe8bf1ad644e6b93f3acf90ff18536d94538ca4d27e583c6db49889e98e48f"}, - {file = "pymongo-4.10.1.tar.gz", hash = "sha256:a9de02be53b6bb98efe0b9eda84ffa1ec027fcb23a2de62c4f941d9a2f2f3330"}, + {file = "pymongo-4.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f2b7bec27e047e84947fbd41c782f07c54c30c76d14f3b8bf0c89f7413fac67a"}, + {file = "pymongo-4.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3c68fe128a171493018ca5c8020fc08675be130d012b7ab3efe9e22698c612a1"}, + {file = "pymongo-4.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:920d4f8f157a71b3cb3f39bc09ce070693d6e9648fb0e30d00e2657d1dca4e49"}, + {file = "pymongo-4.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52b4108ac9469febba18cea50db972605cc43978bedaa9fea413378877560ef8"}, + {file = "pymongo-4.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:180d5eb1dc28b62853e2f88017775c4500b07548ed28c0bd9c005c3d7bc52526"}, + {file = "pymongo-4.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aec2b9088cdbceb87e6ca9c639d0ff9b9d083594dda5ca5d3c4f6774f4c81b33"}, + {file = "pymongo-4.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0cf61450feadca81deb1a1489cb1a3ae1e4266efd51adafecec0e503a8dcd84"}, + {file = "pymongo-4.8.0-cp310-cp310-win32.whl", hash = "sha256:8b18c8324809539c79bd6544d00e0607e98ff833ca21953df001510ca25915d1"}, + {file = "pymongo-4.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e5df28f74002e37bcbdfdc5109799f670e4dfef0fb527c391ff84f078050e7b5"}, + {file = "pymongo-4.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6b50040d9767197b77ed420ada29b3bf18a638f9552d80f2da817b7c4a4c9c68"}, + {file = "pymongo-4.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:417369ce39af2b7c2a9c7152c1ed2393edfd1cbaf2a356ba31eb8bcbd5c98dd7"}, + {file = "pymongo-4.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf821bd3befb993a6db17229a2c60c1550e957de02a6ff4dd0af9476637b2e4d"}, + {file = "pymongo-4.8.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9365166aa801c63dff1a3cb96e650be270da06e3464ab106727223123405510f"}, + {file = "pymongo-4.8.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc8b8582f4209c2459b04b049ac03c72c618e011d3caa5391ff86d1bda0cc486"}, + {file = "pymongo-4.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16e5019f75f6827bb5354b6fef8dfc9d6c7446894a27346e03134d290eb9e758"}, + {file = "pymongo-4.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b5802151fc2b51cd45492c80ed22b441d20090fb76d1fd53cd7760b340ff554"}, + {file = "pymongo-4.8.0-cp311-cp311-win32.whl", hash = "sha256:4bf58e6825b93da63e499d1a58de7de563c31e575908d4e24876234ccb910eba"}, + {file = "pymongo-4.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:b747c0e257b9d3e6495a018309b9e0c93b7f0d65271d1d62e572747f4ffafc88"}, + {file = "pymongo-4.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e6a720a3d22b54183352dc65f08cd1547204d263e0651b213a0a2e577e838526"}, + {file = "pymongo-4.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:31e4d21201bdf15064cf47ce7b74722d3e1aea2597c6785882244a3bb58c7eab"}, + {file = "pymongo-4.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6b804bb4f2d9dc389cc9e827d579fa327272cdb0629a99bfe5b83cb3e269ebf"}, + {file = "pymongo-4.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f2fbdb87fe5075c8beb17a5c16348a1ea3c8b282a5cb72d173330be2fecf22f5"}, + {file = "pymongo-4.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd39455b7ee70aabee46f7399b32ab38b86b236c069ae559e22be6b46b2bbfc4"}, + {file = "pymongo-4.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:940d456774b17814bac5ea7fc28188c7a1338d4a233efbb6ba01de957bded2e8"}, + {file = "pymongo-4.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:236bbd7d0aef62e64caf4b24ca200f8c8670d1a6f5ea828c39eccdae423bc2b2"}, + {file = "pymongo-4.8.0-cp312-cp312-win32.whl", hash = "sha256:47ec8c3f0a7b2212dbc9be08d3bf17bc89abd211901093e3ef3f2adea7de7a69"}, + {file = "pymongo-4.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:e84bc7707492f06fbc37a9f215374d2977d21b72e10a67f1b31893ec5a140ad8"}, + {file = "pymongo-4.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:519d1bab2b5e5218c64340b57d555d89c3f6c9d717cecbf826fb9d42415e7750"}, + {file = "pymongo-4.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87075a1feb1e602e539bdb1ef8f4324a3427eb0d64208c3182e677d2c0718b6f"}, + {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f53429515d2b3e86dcc83dadecf7ff881e538c168d575f3688698a8707b80a"}, + {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fdc20cd1e1141b04696ffcdb7c71e8a4a665db31fe72e51ec706b3bdd2d09f36"}, + {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:284d0717d1a7707744018b0b6ee7801b1b1ff044c42f7be7a01bb013de639470"}, + {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5bf0eb8b6ef40fa22479f09375468c33bebb7fe49d14d9c96c8fd50355188b0"}, + {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ecd71b9226bd1d49416dc9f999772038e56f415a713be51bf18d8676a0841c8"}, + {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e0061af6e8c5e68b13f1ec9ad5251247726653c5af3c0bbdfbca6cf931e99216"}, + {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:658d0170f27984e0d89c09fe5c42296613b711a3ffd847eb373b0dbb5b648d5f"}, + {file = "pymongo-4.8.0-cp38-cp38-win32.whl", hash = "sha256:3ed1c316718a2836f7efc3d75b4b0ffdd47894090bc697de8385acd13c513a70"}, + {file = "pymongo-4.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:7148419eedfea9ecb940961cfe465efaba90595568a1fb97585fb535ea63fe2b"}, + {file = "pymongo-4.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8400587d594761e5136a3423111f499574be5fd53cf0aefa0d0f05b180710b0"}, + {file = "pymongo-4.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:af3e98dd9702b73e4e6fd780f6925352237f5dce8d99405ff1543f3771201704"}, + {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de3a860f037bb51f968de320baef85090ff0bbb42ec4f28ec6a5ddf88be61871"}, + {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fc18b3a093f3db008c5fea0e980dbd3b743449eee29b5718bc2dc15ab5088bb"}, + {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18c9d8f975dd7194c37193583fd7d1eb9aea0c21ee58955ecf35362239ff31ac"}, + {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:408b2f8fdbeca3c19e4156f28fff1ab11c3efb0407b60687162d49f68075e63c"}, + {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6564780cafd6abeea49759fe661792bd5a67e4f51bca62b88faab497ab5fe89"}, + {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d18d86bc9e103f4d3d4f18b85a0471c0e13ce5b79194e4a0389a224bb70edd53"}, + {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9097c331577cecf8034422956daaba7ec74c26f7b255d718c584faddd7fa2e3c"}, + {file = "pymongo-4.8.0-cp39-cp39-win32.whl", hash = "sha256:d5428dbcd43d02f6306e1c3c95f692f68b284e6ee5390292242f509004c9e3a8"}, + {file = "pymongo-4.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:ef7225755ed27bfdb18730c68f6cb023d06c28f2b734597480fb4c0e500feb6f"}, + {file = "pymongo-4.8.0.tar.gz", hash = "sha256:454f2295875744dc70f1881e4b2eb99cdad008a33574bc8aaf120530f66c0cde"}, ] [package.dependencies] @@ -1843,12 +1834,12 @@ dnspython = ">=1.16.0,<3.0.0" [package.extras] aws = ["pymongo-auth-aws (>=1.1.0,<2.0.0)"] -docs = ["furo (==2023.9.10)", "readthedocs-sphinx-search (>=0.3,<1.0)", "sphinx (>=5.3,<8)", "sphinx-autobuild (>=2020.9.1)", "sphinx-rtd-theme (>=2,<3)", "sphinxcontrib-shellcheck (>=1,<2)"] -encryption = ["certifi", "pymongo-auth-aws (>=1.1.0,<2.0.0)", "pymongocrypt (>=1.10.0,<2.0.0)"] +docs = ["furo (==2023.9.10)", "readthedocs-sphinx-search (>=0.3,<1.0)", "sphinx (>=5.3,<8)", "sphinx-rtd-theme (>=2,<3)", "sphinxcontrib-shellcheck (>=1,<2)"] +encryption = ["certifi", "pymongo-auth-aws (>=1.1.0,<2.0.0)", "pymongocrypt (>=1.6.0,<2.0.0)"] gssapi = ["pykerberos", "winkerberos (>=0.5.0)"] ocsp = ["certifi", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"] snappy = ["python-snappy"] -test = ["pytest (>=8.2)", "pytest-asyncio (>=0.24.0)"] +test = ["pytest (>=7)"] zstd = ["zstandard"] [[package]] @@ -2789,4 +2780,4 @@ watchdog = ["watchdog (>=2.3)"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<4.0" -content-hash = "ffb925f124704f7c25777ccddf33db730125530c7df7c7bc609607ef8c3edec9" +content-hash = "5b1eb113eb006bb3da74cec10c255df7f066428849c677f7dffa280ce0210800" diff --git a/pyproject.toml b/pyproject.toml index 8377d03..83f7494 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ version = "0.1.0" flask = "2.2.3" foca = "^0.13.0" minio = "^7.2.9" +pymongo = "4.8.0" python = ">=3.11,<4.0" sphinx = "^8.0.2" From 366e272a7f2779ac722b22456cc6d667e77217ee Mon Sep 17 00:00:00 2001 From: Pratiksha Sankhe Date: Mon, 28 Oct 2024 23:56:44 +0530 Subject: [PATCH 2/9] removed unused function Signed-off-by: Pratiksha Sankhe --- .../api/elixircloud/csh/controllers.py | 24 ++++--------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/cloud_storage_handler/api/elixircloud/csh/controllers.py b/cloud_storage_handler/api/elixircloud/csh/controllers.py index 301999f..2463050 100644 --- a/cloud_storage_handler/api/elixircloud/csh/controllers.py +++ b/cloud_storage_handler/api/elixircloud/csh/controllers.py @@ -18,21 +18,7 @@ def home(): {"message": "Welcome to the Cloud Storage Handler server!"} ), HTTPStatus.OK - -app = Flask(__name__) - -app.config["TUS_UPLOAD_DIR"] = "/tmp/tus_uploads" -app.config["TUS_CHUNK_SIZE"] = 8 * 1024 - - -def get_chunks(object, chunk_size): - """Generator to yield chunks from object.""" - while True: - chunk = object.read(chunk_size) - if not chunk: - break - yield chunk - +TUS_UPLOAD_DIR = "/tmp/tus_uploads" def compute_file_hash(file_path): """Compute MD5 hash of the file.""" @@ -46,8 +32,8 @@ def compute_file_hash(file_path): def initiate_upload(): """Initiate TUS upload, creates object and returns object_id.""" object_id = str(uuid.uuid4()) - object_path = os.path.join(app.config["TUS_UPLOAD_DIR"], f"{object_id}.temp") - os.makedirs(app.config["TUS_UPLOAD_DIR"], exist_ok=True) + object_path = os.path.join(TUS_UPLOAD_DIR, f"{object_id}.temp") + os.makedirs(TUS_UPLOAD_DIR, exist_ok=True) open(object_path, "wb").close() @@ -66,7 +52,7 @@ def upload_chunk(object_id): "Access-Control-Allow-Headers", "Content-Type,Authorization" ) - object_path = os.path.join(app.config["TUS_UPLOAD_DIR"], f"{object_id}.temp") + object_path = os.path.join(TUS_UPLOAD_DIR, f"{object_id}.temp") if not os.path.exists(object_path): return jsonify({"error": "object not found"}), HTTPStatus.NOT_FOUND @@ -90,7 +76,7 @@ def complete_upload(object_id): minio_config = current_app.config.foca.custom.minio bucket_name = minio_config.bucket_name minio_client = current_app.config.foca.custom.minio.client.client - object_path = os.path.join(app.config["TUS_UPLOAD_DIR"], f"{object_id}.temp") + object_path = os.path.join(TUS_UPLOAD_DIR, f"{object_id}.temp") if not os.path.exists(object_path): return jsonify({"error": "object not found"}), HTTPStatus.NOT_FOUND From eb42ffedb78a3cf151108fd9566153889aa5766f Mon Sep 17 00:00:00 2001 From: Pratiksha Sankhe Date: Mon, 28 Oct 2024 23:58:16 +0530 Subject: [PATCH 3/9] formatted Signed-off-by: Pratiksha Sankhe --- cloud_storage_handler/api/elixircloud/csh/controllers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cloud_storage_handler/api/elixircloud/csh/controllers.py b/cloud_storage_handler/api/elixircloud/csh/controllers.py index 2463050..79ba02b 100644 --- a/cloud_storage_handler/api/elixircloud/csh/controllers.py +++ b/cloud_storage_handler/api/elixircloud/csh/controllers.py @@ -6,7 +6,7 @@ import uuid from http import HTTPStatus -from flask import Flask, current_app, jsonify, request +from flask import current_app, jsonify, request from minio import S3Error logger = logging.getLogger(__name__) @@ -18,8 +18,10 @@ def home(): {"message": "Welcome to the Cloud Storage Handler server!"} ), HTTPStatus.OK + TUS_UPLOAD_DIR = "/tmp/tus_uploads" + def compute_file_hash(file_path): """Compute MD5 hash of the file.""" hash_md5 = hashlib.md5() From 144dbd6af0961e2a926014e5451778b29a062126 Mon Sep 17 00:00:00 2001 From: Pratiksha Sankhe Date: Tue, 29 Oct 2024 00:00:40 +0530 Subject: [PATCH 4/9] fixing vulnerability Signed-off-by: Pratiksha Sankhe --- .../api/elixircloud/csh/controllers.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cloud_storage_handler/api/elixircloud/csh/controllers.py b/cloud_storage_handler/api/elixircloud/csh/controllers.py index 79ba02b..7750d81 100644 --- a/cloud_storage_handler/api/elixircloud/csh/controllers.py +++ b/cloud_storage_handler/api/elixircloud/csh/controllers.py @@ -3,6 +3,7 @@ import hashlib import logging import os +import tempfile import uuid from http import HTTPStatus @@ -11,6 +12,8 @@ logger = logging.getLogger(__name__) +TUS_UPLOAD_DIR = tempfile.gettempdir() + def home(): """Endpoint to return a welcome message.""" @@ -19,16 +22,13 @@ def home(): ), HTTPStatus.OK -TUS_UPLOAD_DIR = "/tmp/tus_uploads" - - def compute_file_hash(file_path): - """Compute MD5 hash of the file.""" - hash_md5 = hashlib.md5() + """Compute SHA-256 hash of the file.""" + hash_sha256 = hashlib.sha256() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): - hash_md5.update(chunk) - return hash_md5.hexdigest() + hash_sha256.update(chunk) + return hash_sha256.hexdigest() def initiate_upload(): From d45e707fe144c260caaafe82536eb491db9acf42 Mon Sep 17 00:00:00 2001 From: Pratiksha Sankhe Date: Thu, 31 Oct 2024 01:28:44 +0530 Subject: [PATCH 5/9] added new tus logic Signed-off-by: Pratiksha Sankhe --- .../api/elixircloud/csh/controllers.py | 143 ++----- .../api/elixircloud/csh/tus/__init__.py | 1 + .../api/elixircloud/csh/tus/tus.py | 378 ++++++++++++++++++ cloud_storage_handler/api/specs/specs.yaml | 125 +++--- ...ud_storage_handler.api.elixircloud.csh.rst | 8 + ...torage_handler.api.elixircloud.csh.tus.rst | 21 + 6 files changed, 498 insertions(+), 178 deletions(-) create mode 100644 cloud_storage_handler/api/elixircloud/csh/tus/__init__.py create mode 100644 cloud_storage_handler/api/elixircloud/csh/tus/tus.py create mode 100644 docs/source/pages/cloud_storage_handler.api.elixircloud.csh.tus.rst diff --git a/cloud_storage_handler/api/elixircloud/csh/controllers.py b/cloud_storage_handler/api/elixircloud/csh/controllers.py index 7750d81..188b1ec 100644 --- a/cloud_storage_handler/api/elixircloud/csh/controllers.py +++ b/cloud_storage_handler/api/elixircloud/csh/controllers.py @@ -1,127 +1,50 @@ """ELIXIR's Cloud Storage Handler controllers.""" -import hashlib import logging -import os -import tempfile -import uuid -from http import HTTPStatus -from flask import current_app, jsonify, request -from minio import S3Error +from flask import current_app +from foca.utils.logging import log_traffic # type: ignore -logger = logging.getLogger(__name__) - -TUS_UPLOAD_DIR = tempfile.gettempdir() - - -def home(): - """Endpoint to return a welcome message.""" - return jsonify( - {"message": "Welcome to the Cloud Storage Handler server!"} - ), HTTPStatus.OK - - -def compute_file_hash(file_path): - """Compute SHA-256 hash of the file.""" - hash_sha256 = hashlib.sha256() - with open(file_path, "rb") as f: - for chunk in iter(lambda: f.read(4096), b""): - hash_sha256.update(chunk) - return hash_sha256.hexdigest() - - -def initiate_upload(): - """Initiate TUS upload, creates object and returns object_id.""" - object_id = str(uuid.uuid4()) - object_path = os.path.join(TUS_UPLOAD_DIR, f"{object_id}.temp") - os.makedirs(TUS_UPLOAD_DIR, exist_ok=True) - - open(object_path, "wb").close() - - return jsonify({"object_id": object_id}), HTTPStatus.CREATED - - -def upload_chunk(object_id): - """Upload a object chunk based on object_id and content-range.""" - if request.method == "OPTIONS": - response = jsonify({"status": "CORS preflight check"}) - response.headers.add("Access-Control-Allow-Origin", "*") - response.headers.add( - "Access-Control-Allow-Methods", "GET, POST, PATCH, PUT, DELETE, OPTIONS" - ) - response.headers.add( - "Access-Control-Allow-Headers", "Content-Type,Authorization" - ) - - object_path = os.path.join(TUS_UPLOAD_DIR, f"{object_id}.temp") - if not os.path.exists(object_path): - return jsonify({"error": "object not found"}), HTTPStatus.NOT_FOUND - - try: - content_range = request.headers.get("Content-Range") - start_byte = int(content_range.split(" ")[1].split("-")[0]) +from cloud_storage_handler.api.elixircloud.csh.tus.tus import TusController - with open(object_path, "r+b") as f: - f.seek(start_byte) - f.write(request.data) - - return jsonify( - {"message": "Chunk uploaded successfully"} - ), HTTPStatus.NO_CONTENT - except Exception as e: - return jsonify({"error": str(e)}), HTTPStatus.INTERNAL_SERVER_ERROR +logger = logging.getLogger(__name__) -def complete_upload(object_id): - """Complete upload by transferring the object to MinIO after TUS upload.""" +@log_traffic +def upload_object(): + """Upload an object to the storage.""" minio_config = current_app.config.foca.custom.minio bucket_name = minio_config.bucket_name minio_client = current_app.config.foca.custom.minio.client.client - object_path = os.path.join(TUS_UPLOAD_DIR, f"{object_id}.temp") - - if not os.path.exists(object_path): - return jsonify({"error": "object not found"}), HTTPStatus.NOT_FOUND - - try: - # Compute the file's hash - file_hash = compute_file_hash(object_path) + tus = TusController(minio_client, bucket_name) + return tus.tus_object_upload() - # Check if an object with the same hash exists in MinIO - found_duplicate = False - for obj in minio_client.list_objects(bucket_name): - obj_info = minio_client.stat_object(bucket_name, obj.object_name) - if ( - "file-hash" in obj_info.metadata - and obj_info.metadata["file-hash"] == file_hash - ): - found_duplicate = True - break - if found_duplicate: - os.remove(object_path) - return jsonify( - {"message": "Duplicate object detected. Upload skipped."} - ), HTTPStatus.CONFLICT +@log_traffic +def object_upload_chunk_head(resourceId): + """Handle HEAD request for chunk upload.""" + minio_config = current_app.config.foca.custom.minio + bucket_name = minio_config.bucket_name + minio_client = current_app.config.foca.custom.minio.client.client + tus = TusController(minio_client, bucket_name) + return tus.tus_object_upload_chunk(resourceId) - minio_client.fput_object( - bucket_name=bucket_name, - object_name=object_id, - file_path=object_path, - content_type="application/octet-stream", - ) - os.remove(object_path) +@log_traffic +def object_upload_chunk_patch(resourceId): + """Handle PATCH request for chunk upload.""" + minio_config = current_app.config.foca.custom.minio + bucket_name = minio_config.bucket_name + minio_client = current_app.config.foca.custom.minio.client.client + tus = TusController(minio_client, bucket_name) + return tus.tus_object_upload_chunk(resourceId) - return jsonify( - {"message": "Upload complete and object stored in MinIO"} - ), HTTPStatus.OK - except S3Error as e: - return jsonify( - {"error": f"Failed to upload to MinIO: {str(e)}"} - ), HTTPStatus.INTERNAL_SERVER_ERROR - except Exception as e: - return jsonify( - {"error": f"An unexpected error occurred: {str(e)}"} - ), HTTPStatus.INTERNAL_SERVER_ERROR +@log_traffic +def object_upload_chunk_delete(resourceId): + """Handle DELETE request for chunk upload.""" + minio_config = current_app.config.foca.custom.minio + bucket_name = minio_config.bucket_name + minio_client = current_app.config.foca.custom.minio.client.client + tus = TusController(minio_client, bucket_name) + return tus.tus_object_upload_chunk(resourceId) diff --git a/cloud_storage_handler/api/elixircloud/csh/tus/__init__.py b/cloud_storage_handler/api/elixircloud/csh/tus/__init__.py new file mode 100644 index 0000000..3765c5c --- /dev/null +++ b/cloud_storage_handler/api/elixircloud/csh/tus/__init__.py @@ -0,0 +1 @@ +"""ELIXIR Cloud Storage Handler TUS Server Implementation.""" diff --git a/cloud_storage_handler/api/elixircloud/csh/tus/tus.py b/cloud_storage_handler/api/elixircloud/csh/tus/tus.py new file mode 100644 index 0000000..ef6a7ea --- /dev/null +++ b/cloud_storage_handler/api/elixircloud/csh/tus/tus.py @@ -0,0 +1,378 @@ +"""TUS-based file upload controller with MinIO.""" + +import base64 +import hashlib +import io +import json +import logging +import uuid + +from flask import current_app, make_response, request +from minio.error import S3Error + +logger = logging.getLogger(__name__) + + +class TusController: + """Controller for handling TUS-based file uploads using MinIO as backend storage.""" + + def __init__(self, minio_client, bucket_name): + """Initialize TusController with MinIO client and bucket name. + + Args: + minio_client: MinIO client for interacting with the storage. + bucket_name: Name of the bucket in MinIO to store files. + """ + self.minio_client = minio_client + self.bucket_name = bucket_name + self.tus_api_version = "1.0.0" + self.tus_api_version_supported = "1.0.0" + self.tus_api_extensions = ["creation", "expiration", "termination"] + self.tus_max_object_size = 50 * 1024 * 1024 * 1024 # 50 GB + self.object_overwrite = False + self.upload_url = "elixircloud/csh/v1/object" + + def compute_sha256(self, data): + """Compute the SHA-256 checksum of the provided data. + + Args: + data: Byte data to calculate the checksum for. + + Returns: + str: SHA-256 hash of the data as a hexadecimal string. + """ + sha256_hash = hashlib.sha256() + sha256_hash.update(data) + return sha256_hash.hexdigest() + + # def tus_object_upload(self): + # """Handle TUS protocol for object upload operations. + + # Supports various HTTP methods: + # - HEAD: Checks object status or retrieves upload offset. + # - GET: Fetches metadata or checks object existence. + # - OPTIONS: Provides TUS protocol options. + # - POST/PUT: Handles new object upload. + + # Returns: + # Flask response: Response indicating success or error. + # """ + # response = make_response("", 200) + + # if request.method == "HEAD": + # return self.handle_head_request(response) + + # elif request.method == "GET": + # return self.handle_get_request(response) + + # elif ( + # request.method == "OPTIONS" + # and request.headers.get("Access-Control-Request-Method", None) is not None + # ): + # return response + + # # Handle general TUS upload methods and responses + # if request.headers.get("Tus-Resumable") is not None: + # response.headers["Tus-Resumable"] = self.tus_api_version + # response.headers["Tus-Version"] = self.tus_api_version_supported + + # if request.method == "OPTIONS": + # response.headers["Tus-Extension"] = ",".join(self.tus_api_extensions) + # response.headers["Tus-Max-Size"] = self.tus_max_object_size + # response.status_code = 204 + # return response + + # metadata = {} + # upload_metadata = request.headers.get("Upload-Metadata", None) + # if upload_metadata: + # for kv in upload_metadata.split(","): + # key, value = kv.split(" ") + # metadata[key] = base64.b64decode(value).decode("utf-8") + + # if not self.object_overwrite: + # try: + # incoming_sha256 = self.compute_sha256(request.data) + + # for obj in self.minio_client.list_objects( + # self.bucket_name, recursive=True + # ): + # existing_data = self.minio_client.get_object( + # self.bucket_name, obj.object_name + # ) + # existing_sha256 = self.compute_sha256(existing_data.read()) + # existing_data.close() + + # if existing_sha256 == incoming_sha256: + # response.status_code = 409 + # response.data = json.dumps( + # { + # "message": ( + # "Object with the same content already exists." + # ), + # "objectname": obj.object_name, + # } + # ) + # response.headers["Content-Type"] = "application/json" + # return response + + # except S3Error as e: + # if e.code != "NoSuchKey": + # response.status_code = 500 + # return response + + # resource_id = str(uuid.uuid4()) + + # try: + # data_stream = io.BytesIO(request.data) + # self.minio_client.put_object( + # self.bucket_name, resource_id, data_stream, len(request.data) + # ) + # except S3Error as e: + # current_app.logger.error(f"Unable to upload object to MinIO: {e}") + # response.status_code = 500 + # return response + + # response.status_code = 201 + # response.headers["Location"] = ( + # f"{request.url_root}/{self.upload_url}/{resource_id}" + # ) + # response.headers["Tus-Temp-Objectname"] = resource_id + # response.autocorrect_location_header = False + + # else: + # current_app.logger.warning( + # "Received object upload for unsupported object transfer protocol" + # ) + # response.data = ( + # "Received object upload for unsupported object transfer protocol" + # ) + # response.status_code = 500 + + # return response + + def tus_object_upload(self): + """Handle TUS protocol for object upload operations.""" + response = make_response("", 200) + + if request.method == "HEAD": + return self.handle_head_request(response) + + if request.method == "GET": + return self.handle_get_request(response) + + if request.method == "OPTIONS" and request.headers.get( + "Access-Control-Request-Method" + ): + response.headers["Tus-Resumable"] = self.tus_api_version + response.headers["Tus-Version"] = self.tus_api_version_supported + response.headers["Tus-Extension"] = ",".join(self.tus_api_extensions) + response.headers["Tus-Max-Size"] = self.tus_max_object_size + response.status_code = 204 + return response + + if request.headers.get("Tus-Resumable"): + response.headers["Tus-Resumable"] = self.tus_api_version + response.headers["Tus-Version"] = self.tus_api_version_supported + + if request.method in ["POST", "PUT"]: + return self.handle_upload(response) + + current_app.logger.warning( + "Received unsupported protocol or method for object upload" + ) + response.data = "Unsupported protocol or method" + response.status_code = 500 + return response + + def handle_upload(self, response): + """Handle file upload for POST/PUT requests.""" + metadata = {} + upload_metadata = request.headers.get("Upload-Metadata", None) + if upload_metadata: + for kv in upload_metadata.split(","): + key, value = kv.split(" ") + metadata[key] = base64.b64decode(value).decode("utf-8") + + if not self.object_overwrite: + try: + incoming_sha256 = self.compute_sha256(request.data) + + for obj in self.minio_client.list_objects( + self.bucket_name, recursive=True + ): + existing_data = self.minio_client.get_object( + self.bucket_name, obj.object_name + ) + existing_sha256 = self.compute_sha256(existing_data.read()) + existing_data.close() + + if existing_sha256 == incoming_sha256: + response.status_code = 409 + response.data = json.dumps( + { + "message": ( + "Object with the same content already exists." + ), + "objectname": obj.object_name, + } + ) + response.headers["Content-Type"] = "application/json" + return response + + except S3Error as e: + if e.code != "NoSuchKey": + response.status_code = 500 + return response + + resource_id = str(uuid.uuid4()) + + try: + data_stream = io.BytesIO(request.data) + self.minio_client.put_object( + self.bucket_name, resource_id, data_stream, len(request.data) + ) + except S3Error as e: + current_app.logger.error(f"Unable to upload object to MinIO: {e}") + response.status_code = 500 + return response + + response.status_code = 201 + response.headers["Location"] = ( + f"{request.url_root}/{self.upload_url}/{resource_id}" + ) + response.headers["Tus-Temp-Objectname"] = resource_id + response.autocorrect_location_header = False + + return response + + def handle_head_request(self, response): + """Handle the HEAD request for retrieving object status or upload offset. + + Args: + response (Response): The response object to be returned. + + Returns: + Response: The modified response object with object status or offset. + """ + resource_id = request.headers.get("Resource-ID") + if not resource_id: + response.status_code = 400 + response.data = "Resource-ID header missing" + return response + + try: + obj_stat = self.minio_client.stat_object(self.bucket_name, resource_id) + response.headers["Upload-Offset"] = obj_stat.size + response.headers["Tus-Resumable"] = self.tus_api_version + response.status_code = 200 + except S3Error as e: + if e.code == "NoSuchKey": + response.status_code = 404 + else: + response.status_code = 500 + return response + + def handle_get_request(self, response): + """Handle the GET request for fetching metadata or object existence status. + + Args: + response (Response): The response object to be returned. + + Returns: + Response: The modified response object with metadata or object status. + """ + metadata = {} + upload_metadata = request.headers.get("Upload-Metadata", None) + if upload_metadata: + for kv in upload_metadata.split(","): + key, value = kv.split(" ") + metadata[key] = base64.b64decode(value).decode("utf-8") + + if metadata.get("objectname") is None: + return make_response("Metadata objectname is not set", 404) + + try: + self.minio_client.get_object(self.bucket_name, metadata["objectname"]) + response.headers["Tus-Object-Name"] = metadata["objectname"] + response.headers["Tus-Object-Exists"] = True + except S3Error as e: + if e.code == "NoSuchKey": + response.headers["Tus-Object-Exists"] = False + else: + response.status_code = 500 + return response + return response + + def tus_object_upload_chunk(self, resource_id): + """Handle TUS protocol chunk upload operations for a given resource. + + Supports HTTP methods: + - HEAD: Get the current upload offset. + - DELETE: Delete the uploaded object. + - PATCH: Append a chunk of data to the existing object. + + Args: + resource_id (str): Unique identifier for the resource being uploaded. + + Returns: + Flask response: Response indicating success or error. + """ + response = make_response("", 204) + response.headers["Tus-Resumable"] = self.tus_api_version + response.headers["Tus-Version"] = self.tus_api_version_supported + + try: + if request.method == "HEAD": + # Handle HEAD request for chunk offset + existing_data = self.minio_client.get_object( + self.bucket_name, resource_id + ) + object_info = existing_data.read() + response.headers["Upload-Offset"] = len(object_info) + existing_data.close() + response.status_code = 200 + response.headers["Cache-Control"] = "no-store" + return response + + if request.method == "DELETE": + # Handle DELETE request to remove object + self.minio_client.remove_object(self.bucket_name, resource_id) + response.status_code = 204 + return response + + if request.method == "PATCH": + # Handle PATCH request to upload a chunk + new_data = request.data + current_size = 0 + + try: + existing_data = self.minio_client.get_object( + self.bucket_name, resource_id + ) + existing_data_bytes = existing_data.read() + current_size = len(existing_data_bytes) + new_data = existing_data_bytes + new_data + except S3Error as e: + if e.code != "NoSuchKey": + raise + + data_stream = io.BytesIO(new_data) + self.minio_client.put_object( + self.bucket_name, resource_id, data_stream, len(new_data) + ) + + new_offset = current_size + len(request.data) + response.headers["Upload-Offset"] = new_offset + response.status_code = 204 + response.headers["Cache-Control"] = "no-store" + return response + + except S3Error as e: + if e.code == "NoSuchKey": + response.status_code = 404 + logging.error(f"Object not found: {resource_id}") + else: + response.status_code = 500 + logging.error(f"Error: {e}") + + return response diff --git a/cloud_storage_handler/api/specs/specs.yaml b/cloud_storage_handler/api/specs/specs.yaml index bc4c121..0c9a0fc 100644 --- a/cloud_storage_handler/api/specs/specs.yaml +++ b/cloud_storage_handler/api/specs/specs.yaml @@ -3,7 +3,7 @@ openapi: 3.0.2 info: version: 1.0.0 title: Cloud Storage Handler - description: API for handling files on a cloud storage service + description: API for handling objects on a cloud storage service contact: name: ELIXIR Cloud & AAI email: cloud-service@elixir-europe.org @@ -13,94 +13,83 @@ info: https://github.com/elixir-cloud-aai/ cloud-storage-handler/blob/main/LICENSE servers: - - url: /elixircoud/csh/v1 + - url: /elixircloud/csh/v1 paths: - /: - get: - description: | - Returns a welcome message - operationId: home + /object: + post: + operationId: upload_object + summary: Upload a object + requestBody: + content: + application/octet-stream: + schema: + type: string + format: binary responses: - '200': - description: basic response - content: - application/json: - schema: - type: object - properties: - message: - type: string - example: "Welcome to Cloud Storage Handler API" + '201': + description: object uploaded successfully '400': - description: The request is malformed. + description: Bad request + '409': + description: Conflict - object already exists '500': - description: An unexpected error occurred. - /upload/initiate: - post: - summary: Initiate TUS upload session - operationId: initiate_upload - tags: - - Upload + description: Internal server error + /object/{resourceId}: + head: + operationId: object_upload_chunk_head + summary: Check upload status of a object + parameters: + - name: resourceId + in: path + required: true + description: ID of the resource + schema: + type: string responses: - "201": - description: TUS upload session initiated - content: - application/json: - schema: - type: object - properties: - object_id: - type: string - description: Unique identifier for the upload session - /upload/{object_id}/chunk: + '200': + description: object exists and offset returned + '404': + description: object not found + '500': + description: Internal server error patch: - summary: Upload a file chunk - operationId: upload_chunk - tags: - - Upload + operationId: object_upload_chunk_patch + summary: Upload a chunk of a object parameters: - - in: path - name: object_id + - name: resourceId + in: path required: true + description: ID of the resource schema: type: string - description: Unique identifier for the upload session requestBody: - required: true content: application/octet-stream: schema: type: string format: binary responses: - "204": + '204': description: Chunk uploaded successfully - "404": - description: File not found - /upload/complete/{object_id}: - post: - summary: Complete upload and store in MinIO - operationId: complete_upload - tags: - - Upload + '404': + description: object not found + '500': + description: Internal server error + delete: + operationId: object_upload_chunk_delete + summary: Delete a object parameters: - - in: path - name: object_id + - name: resourceId + in: path required: true + description: ID of the resource schema: type: string - description: Unique identifier for the upload session responses: - "200": - description: Upload complete and file stored in MinIO - content: - application/json: - schema: - type: object - properties: - message: - type: string - description: Confirmation message - "404": - description: File not found + '204': + description: object deleted successfully + '404': + description: object not found + '500': + description: Internal server error ... diff --git a/docs/source/pages/cloud_storage_handler.api.elixircloud.csh.rst b/docs/source/pages/cloud_storage_handler.api.elixircloud.csh.rst index 1b786f5..ea3f516 100644 --- a/docs/source/pages/cloud_storage_handler.api.elixircloud.csh.rst +++ b/docs/source/pages/cloud_storage_handler.api.elixircloud.csh.rst @@ -1,6 +1,14 @@ cloud\_storage\_handler.api.elixircloud.csh package =================================================== +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + cloud_storage_handler.api.elixircloud.csh.tus + Submodules ---------- diff --git a/docs/source/pages/cloud_storage_handler.api.elixircloud.csh.tus.rst b/docs/source/pages/cloud_storage_handler.api.elixircloud.csh.tus.rst new file mode 100644 index 0000000..ae2d636 --- /dev/null +++ b/docs/source/pages/cloud_storage_handler.api.elixircloud.csh.tus.rst @@ -0,0 +1,21 @@ +cloud\_storage\_handler.api.elixircloud.csh.tus package +======================================================= + +Submodules +---------- + +cloud\_storage\_handler.api.elixircloud.csh.tus.tus module +---------------------------------------------------------- + +.. automodule:: cloud_storage_handler.api.elixircloud.csh.tus.tus + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: cloud_storage_handler.api.elixircloud.csh.tus + :members: + :undoc-members: + :show-inheritance: From 33ee2e4b1f46a7f2e41249e3db1a33a73bef64e5 Mon Sep 17 00:00:00 2001 From: Pratiksha Sankhe Date: Thu, 31 Oct 2024 11:16:23 +0530 Subject: [PATCH 6/9] added docstrings Signed-off-by: Pratiksha Sankhe --- .../api/elixircloud/csh/controllers.py | 54 ++++- .../api/elixircloud/csh/tus/tus.py | 189 +++++------------- 2 files changed, 94 insertions(+), 149 deletions(-) diff --git a/cloud_storage_handler/api/elixircloud/csh/controllers.py b/cloud_storage_handler/api/elixircloud/csh/controllers.py index 188b1ec..b5633c9 100644 --- a/cloud_storage_handler/api/elixircloud/csh/controllers.py +++ b/cloud_storage_handler/api/elixircloud/csh/controllers.py @@ -1,18 +1,23 @@ """ELIXIR's Cloud Storage Handler controllers.""" import logging - -from flask import current_app +from flask import current_app, Response from foca.utils.logging import log_traffic # type: ignore from cloud_storage_handler.api.elixircloud.csh.tus.tus import TusController logger = logging.getLogger(__name__) - @log_traffic -def upload_object(): - """Upload an object to the storage.""" +def upload_object() -> Response: + """Upload an object to the storage. + + This function handles the upload of an object to the specified + storage bucket using the Tus protocol. + + Returns: + Response: The Flask response object containing the result of the upload. + """ minio_config = current_app.config.foca.custom.minio bucket_name = minio_config.bucket_name minio_client = current_app.config.foca.custom.minio.client.client @@ -21,8 +26,17 @@ def upload_object(): @log_traffic -def object_upload_chunk_head(resourceId): - """Handle HEAD request for chunk upload.""" +def object_upload_chunk_head(resourceId: str) -> Response: + """Handle HEAD request for chunk upload. + + This function processes HEAD requests for chunk uploads. + + Args: + resourceId (str): The unique identifier for the resource being uploaded. + + Returns: + Response: The Flask response object containing the status of the upload. + """ minio_config = current_app.config.foca.custom.minio bucket_name = minio_config.bucket_name minio_client = current_app.config.foca.custom.minio.client.client @@ -31,8 +45,17 @@ def object_upload_chunk_head(resourceId): @log_traffic -def object_upload_chunk_patch(resourceId): - """Handle PATCH request for chunk upload.""" +def object_upload_chunk_patch(resourceId: str) -> Response: + """Handle PATCH request for chunk upload. + + This function processes PATCH requests to upload chunks of an object. + + Args: + resourceId (str): The unique identifier for the resource being uploaded. + + Returns: + Response: The Flask response object containing the status of the upload. + """ minio_config = current_app.config.foca.custom.minio bucket_name = minio_config.bucket_name minio_client = current_app.config.foca.custom.minio.client.client @@ -41,8 +64,17 @@ def object_upload_chunk_patch(resourceId): @log_traffic -def object_upload_chunk_delete(resourceId): - """Handle DELETE request for chunk upload.""" +def object_upload_chunk_delete(resourceId: str) -> Response: + """Handle DELETE request for chunk upload. + + This function processes DELETE requests for uploaded chunks. + + Args: + resourceId (str): The unique identifier for the resource being uploaded. + + Returns: + Response: The Flask response object confirming the deletion. + """ minio_config = current_app.config.foca.custom.minio bucket_name = minio_config.bucket_name minio_client = current_app.config.foca.custom.minio.client.client diff --git a/cloud_storage_handler/api/elixircloud/csh/tus/tus.py b/cloud_storage_handler/api/elixircloud/csh/tus/tus.py index ef6a7ea..9f1bd5c 100644 --- a/cloud_storage_handler/api/elixircloud/csh/tus/tus.py +++ b/cloud_storage_handler/api/elixircloud/csh/tus/tus.py @@ -6,8 +6,9 @@ import json import logging import uuid +from typing import Any, Dict, Optional -from flask import current_app, make_response, request +from flask import Response, current_app, make_response, request from minio.error import S3Error logger = logging.getLogger(__name__) @@ -16,12 +17,12 @@ class TusController: """Controller for handling TUS-based file uploads using MinIO as backend storage.""" - def __init__(self, minio_client, bucket_name): + def __init__(self, minio_client: Any, bucket_name: str) -> None: """Initialize TusController with MinIO client and bucket name. Args: - minio_client: MinIO client for interacting with the storage. - bucket_name: Name of the bucket in MinIO to store files. + minio_client (Any): MinIO client for interacting with the storage. + bucket_name (str): Name of the bucket in MinIO to store files. """ self.minio_client = minio_client self.bucket_name = bucket_name @@ -32,11 +33,11 @@ def __init__(self, minio_client, bucket_name): self.object_overwrite = False self.upload_url = "elixircloud/csh/v1/object" - def compute_sha256(self, data): + def compute_sha256(self, data: bytes) -> str: """Compute the SHA-256 checksum of the provided data. Args: - data: Byte data to calculate the checksum for. + data (bytes): Byte data to calculate the checksum for. Returns: str: SHA-256 hash of the data as a hexadecimal string. @@ -45,113 +46,16 @@ def compute_sha256(self, data): sha256_hash.update(data) return sha256_hash.hexdigest() - # def tus_object_upload(self): - # """Handle TUS protocol for object upload operations. - - # Supports various HTTP methods: - # - HEAD: Checks object status or retrieves upload offset. - # - GET: Fetches metadata or checks object existence. - # - OPTIONS: Provides TUS protocol options. - # - POST/PUT: Handles new object upload. - - # Returns: - # Flask response: Response indicating success or error. - # """ - # response = make_response("", 200) - - # if request.method == "HEAD": - # return self.handle_head_request(response) - - # elif request.method == "GET": - # return self.handle_get_request(response) - - # elif ( - # request.method == "OPTIONS" - # and request.headers.get("Access-Control-Request-Method", None) is not None - # ): - # return response - - # # Handle general TUS upload methods and responses - # if request.headers.get("Tus-Resumable") is not None: - # response.headers["Tus-Resumable"] = self.tus_api_version - # response.headers["Tus-Version"] = self.tus_api_version_supported - - # if request.method == "OPTIONS": - # response.headers["Tus-Extension"] = ",".join(self.tus_api_extensions) - # response.headers["Tus-Max-Size"] = self.tus_max_object_size - # response.status_code = 204 - # return response - - # metadata = {} - # upload_metadata = request.headers.get("Upload-Metadata", None) - # if upload_metadata: - # for kv in upload_metadata.split(","): - # key, value = kv.split(" ") - # metadata[key] = base64.b64decode(value).decode("utf-8") - - # if not self.object_overwrite: - # try: - # incoming_sha256 = self.compute_sha256(request.data) - - # for obj in self.minio_client.list_objects( - # self.bucket_name, recursive=True - # ): - # existing_data = self.minio_client.get_object( - # self.bucket_name, obj.object_name - # ) - # existing_sha256 = self.compute_sha256(existing_data.read()) - # existing_data.close() - - # if existing_sha256 == incoming_sha256: - # response.status_code = 409 - # response.data = json.dumps( - # { - # "message": ( - # "Object with the same content already exists." - # ), - # "objectname": obj.object_name, - # } - # ) - # response.headers["Content-Type"] = "application/json" - # return response - - # except S3Error as e: - # if e.code != "NoSuchKey": - # response.status_code = 500 - # return response - - # resource_id = str(uuid.uuid4()) - - # try: - # data_stream = io.BytesIO(request.data) - # self.minio_client.put_object( - # self.bucket_name, resource_id, data_stream, len(request.data) - # ) - # except S3Error as e: - # current_app.logger.error(f"Unable to upload object to MinIO: {e}") - # response.status_code = 500 - # return response - - # response.status_code = 201 - # response.headers["Location"] = ( - # f"{request.url_root}/{self.upload_url}/{resource_id}" - # ) - # response.headers["Tus-Temp-Objectname"] = resource_id - # response.autocorrect_location_header = False - - # else: - # current_app.logger.warning( - # "Received object upload for unsupported object transfer protocol" - # ) - # response.data = ( - # "Received object upload for unsupported object transfer protocol" - # ) - # response.status_code = 500 - - # return response - - def tus_object_upload(self): - """Handle TUS protocol for object upload operations.""" + def tus_object_upload(self) -> Response: + """Handle TUS protocol for object upload operations. + + Returns: + Response: The response object with the appropriate headers and status code. + + Raises: + None directly, but calls to `handle_upload` or other helper methods + may raise various errors depending on protocol or server issues. + """ response = make_response("", 200) if request.method == "HEAD": @@ -166,7 +70,7 @@ def tus_object_upload(self): response.headers["Tus-Resumable"] = self.tus_api_version response.headers["Tus-Version"] = self.tus_api_version_supported response.headers["Tus-Extension"] = ",".join(self.tus_api_extensions) - response.headers["Tus-Max-Size"] = self.tus_max_object_size + response.headers["Tus-Max-Size"] = str(self.tus_max_object_size) response.status_code = 204 return response @@ -184,10 +88,22 @@ def tus_object_upload(self): response.status_code = 500 return response - def handle_upload(self, response): - """Handle file upload for POST/PUT requests.""" - metadata = {} - upload_metadata = request.headers.get("Upload-Metadata", None) + def handle_upload(self, response: Response) -> Response: + """Handle file upload for POST/PUT requests. + + Args: + response (Response): The response object to be modified. + + Returns: + Response: The modified response object with the upload status. + + Raises: + ValueError: If there's an error with metadata encoding. + S3Error: If there is an issue interacting with MinIO. + """ + metadata: Dict[str, str] = {} + upload_metadata: Optional[str] = request.headers.get("Upload-Metadata") + if upload_metadata: for kv in upload_metadata.split(","): key, value = kv.split(" ") @@ -245,7 +161,7 @@ def handle_upload(self, response): return response - def handle_head_request(self, response): + def handle_head_request(self, response: Response) -> Response: """Handle the HEAD request for retrieving object status or upload offset. Args: @@ -272,7 +188,7 @@ def handle_head_request(self, response): response.status_code = 500 return response - def handle_get_request(self, response): + def handle_get_request(self, response: Response) -> Response: """Handle the GET request for fetching metadata or object existence status. Args: @@ -281,8 +197,8 @@ def handle_get_request(self, response): Returns: Response: The modified response object with metadata or object status. """ - metadata = {} - upload_metadata = request.headers.get("Upload-Metadata", None) + metadata: Dict[str, str] = {} + upload_metadata: Optional[str] = request.headers.get("Upload-Metadata", None) if upload_metadata: for kv in upload_metadata.split(","): key, value = kv.split(" ") @@ -294,16 +210,16 @@ def handle_get_request(self, response): try: self.minio_client.get_object(self.bucket_name, metadata["objectname"]) response.headers["Tus-Object-Name"] = metadata["objectname"] - response.headers["Tus-Object-Exists"] = True + response.headers["Tus-Object-Exists"] = "true" if True else "false" except S3Error as e: if e.code == "NoSuchKey": - response.headers["Tus-Object-Exists"] = False + response.headers["Tus-Object-Exists"] = "false" if False else "true" else: response.status_code = 500 return response return response - def tus_object_upload_chunk(self, resource_id): + def tus_object_upload_chunk(self, resource_id: str) -> Response: """Handle TUS protocol chunk upload operations for a given resource. Supports HTTP methods: @@ -315,7 +231,7 @@ def tus_object_upload_chunk(self, resource_id): resource_id (str): Unique identifier for the resource being uploaded. Returns: - Flask response: Response indicating success or error. + Response: Response indicating success or error. """ response = make_response("", 204) response.headers["Tus-Resumable"] = self.tus_api_version @@ -328,7 +244,7 @@ def tus_object_upload_chunk(self, resource_id): self.bucket_name, resource_id ) object_info = existing_data.read() - response.headers["Upload-Offset"] = len(object_info) + response.headers["Upload-Offset"] = str(len(object_info)) existing_data.close() response.status_code = 200 response.headers["Cache-Control"] = "no-store" @@ -360,19 +276,16 @@ def tus_object_upload_chunk(self, resource_id): self.minio_client.put_object( self.bucket_name, resource_id, data_stream, len(new_data) ) - - new_offset = current_size + len(request.data) - response.headers["Upload-Offset"] = new_offset + response.headers["Upload-Offset"] = str( + current_size + len(request.data) + ) response.status_code = 204 - response.headers["Cache-Control"] = "no-store" return response - except S3Error as e: - if e.code == "NoSuchKey": - response.status_code = 404 - logging.error(f"Object not found: {resource_id}") - else: - response.status_code = 500 - logging.error(f"Error: {e}") + logger.error(f"Error during TUS chunk upload: {e}") + response.status_code = 500 + return response + logger.warning("Received unsupported method for TUS chunk upload") + response.status_code = 405 return response From eeb278cc267e3c3fe1bd19ffba315f1b7c3737f8 Mon Sep 17 00:00:00 2001 From: Pratiksha Sankhe Date: Thu, 31 Oct 2024 11:19:02 +0530 Subject: [PATCH 7/9] formatted Signed-off-by: Pratiksha Sankhe --- cloud_storage_handler/api/elixircloud/csh/controllers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cloud_storage_handler/api/elixircloud/csh/controllers.py b/cloud_storage_handler/api/elixircloud/csh/controllers.py index b5633c9..0b6a5e8 100644 --- a/cloud_storage_handler/api/elixircloud/csh/controllers.py +++ b/cloud_storage_handler/api/elixircloud/csh/controllers.py @@ -1,13 +1,15 @@ """ELIXIR's Cloud Storage Handler controllers.""" import logging -from flask import current_app, Response + +from flask import Response, current_app from foca.utils.logging import log_traffic # type: ignore from cloud_storage_handler.api.elixircloud.csh.tus.tus import TusController logger = logging.getLogger(__name__) + @log_traffic def upload_object() -> Response: """Upload an object to the storage. From 6719565b9b282d5b24e6fbafac724565a31d4ac3 Mon Sep 17 00:00:00 2001 From: Pratiksha Sankhe Date: Thu, 31 Oct 2024 11:23:36 +0530 Subject: [PATCH 8/9] fix workflow Signed-off-by: Pratiksha Sankhe --- .../api/elixircloud/csh/controllers.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/cloud_storage_handler/api/elixircloud/csh/controllers.py b/cloud_storage_handler/api/elixircloud/csh/controllers.py index 0b6a5e8..786479a 100644 --- a/cloud_storage_handler/api/elixircloud/csh/controllers.py +++ b/cloud_storage_handler/api/elixircloud/csh/controllers.py @@ -1,8 +1,7 @@ """ELIXIR's Cloud Storage Handler controllers.""" import logging - -from flask import Response, current_app +from flask import current_app, Response from foca.utils.logging import log_traffic # type: ignore from cloud_storage_handler.api.elixircloud.csh.tus.tus import TusController @@ -20,9 +19,9 @@ def upload_object() -> Response: Returns: Response: The Flask response object containing the result of the upload. """ - minio_config = current_app.config.foca.custom.minio + minio_config = current_app.config.foca.custom.minio # type: ignore bucket_name = minio_config.bucket_name - minio_client = current_app.config.foca.custom.minio.client.client + minio_client = current_app.config.foca.custom.minio.client.client # type: ignore tus = TusController(minio_client, bucket_name) return tus.tus_object_upload() @@ -39,9 +38,9 @@ def object_upload_chunk_head(resourceId: str) -> Response: Returns: Response: The Flask response object containing the status of the upload. """ - minio_config = current_app.config.foca.custom.minio + minio_config = current_app.config.foca.custom.minio # type: ignore bucket_name = minio_config.bucket_name - minio_client = current_app.config.foca.custom.minio.client.client + minio_client = current_app.config.foca.custom.minio.client.client # type: ignore tus = TusController(minio_client, bucket_name) return tus.tus_object_upload_chunk(resourceId) @@ -58,9 +57,9 @@ def object_upload_chunk_patch(resourceId: str) -> Response: Returns: Response: The Flask response object containing the status of the upload. """ - minio_config = current_app.config.foca.custom.minio + minio_config = current_app.config.foca.custom.minio # type: ignore bucket_name = minio_config.bucket_name - minio_client = current_app.config.foca.custom.minio.client.client + minio_client = current_app.config.foca.custom.minio.client.client # type: ignore tus = TusController(minio_client, bucket_name) return tus.tus_object_upload_chunk(resourceId) @@ -77,8 +76,8 @@ def object_upload_chunk_delete(resourceId: str) -> Response: Returns: Response: The Flask response object confirming the deletion. """ - minio_config = current_app.config.foca.custom.minio + minio_config = current_app.config.foca.custom.minio # type: ignore bucket_name = minio_config.bucket_name - minio_client = current_app.config.foca.custom.minio.client.client + minio_client = current_app.config.foca.custom.minio.client.client # type: ignore tus = TusController(minio_client, bucket_name) return tus.tus_object_upload_chunk(resourceId) From c067f0a0be50f67a3baa2c0f5fb126ca0bed6565 Mon Sep 17 00:00:00 2001 From: Pratiksha Sankhe Date: Thu, 31 Oct 2024 11:24:53 +0530 Subject: [PATCH 9/9] formatted Signed-off-by: Pratiksha Sankhe --- cloud_storage_handler/api/elixircloud/csh/controllers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cloud_storage_handler/api/elixircloud/csh/controllers.py b/cloud_storage_handler/api/elixircloud/csh/controllers.py index 786479a..4f11a11 100644 --- a/cloud_storage_handler/api/elixircloud/csh/controllers.py +++ b/cloud_storage_handler/api/elixircloud/csh/controllers.py @@ -1,7 +1,8 @@ """ELIXIR's Cloud Storage Handler controllers.""" import logging -from flask import current_app, Response + +from flask import Response, current_app from foca.utils.logging import log_traffic # type: ignore from cloud_storage_handler.api.elixircloud.csh.tus.tus import TusController