diff --git a/BUILD b/BUILD index c00c174..3488241 100644 --- a/BUILD +++ b/BUILD @@ -1,6 +1,8 @@ load("@bazel_gazelle//:def.bzl", "gazelle") load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") load("@aspect_bazel_lib//lib:jq.bzl", "jq") +load("@rules_python//python:defs.bzl", "py_binary") +load("@base_pip3//:requirements.bzl", "requirement") # gazelle:prefix github.com/aptly-dev/aptly gazelle(name = "gazelle") @@ -45,3 +47,77 @@ jq( """, visibility = ["//visibility:public"], ) + +jq( + name = "deb_checksum_downloads", + srcs = [":envoy_versions"], + out = "deb_checksum_downloads.txt", + filter = """ + reduce .latest_releases[].releases[] as $item ({}; + .[$item] = {"signature": "foo@bar.com"}) + | with_entries( + {"key": "https://github.com/envoyproxy/envoy/releases/download/\\(.key)/checksums.txt.asc", + "value": .value}) + """, + args = ["-r"], + visibility = ["//visibility:public"], +) + +py_binary( + name = "fetcher", + srcs = ["fetcher.py"], + deps = [ + requirement("aio.core"), + requirement("aio.run.runner"), + requirement("envoy.base.utils"), + requirement("aiohttp"), + ], +) + +genrule( + name = "published_checksums", + outs = ["published_checksums.txt"], + cmd = """ + $(location :fetcher) --downloads=$(location :deb_checksum_downloads) --output=json > $@ + """, + tools = [ + ":fetcher", + ":deb_checksum_downloads", + ], +) + +jq( + name = "debs_downloads", + srcs = [":published_checksums"], + out = "debs_downloads.json", + filter = """ + with_entries( + .key as $key + | .value as $value + | ($key | capture("v(?[0-9.]+)") | .version) as $version + | {key: ("https://github.com/envoyproxy/envoy/releases/download/v\\($version)/debs.tar.gz"), + value: { + "path": $version, + "checksum": ( + $value + | split("\n") + | map(select(endswith("debs.tar.gz"))) + | first + | split(" ") + | .[0] + )}}) + """, + visibility = ["//visibility:public"], +) + +genrule( + name = "debs", + outs = ["debs.tar.gz"], + cmd = """ + $(location :fetcher) --downloads=$(location :debs_downloads) --extract-downloads --output-path=$@ + """, + tools = [ + ":fetcher", + ":debs_downloads", + ], +) diff --git a/fetcher.py b/fetcher.py new file mode 100644 index 0000000..9333b32 --- /dev/null +++ b/fetcher.py @@ -0,0 +1,114 @@ + +import asyncio +import json +import hashlib +import pathlib +import sys +from functools import cached_property +from urllib.parse import urlsplit + +import aiohttp + +from aio.core.tasks import concurrent +from aio.run import runner +from envoy.base.utils import extract, pack + + +class FetchingRunner(runner.Runner): + + @cached_property + def downloads(self): + return json.load(pathlib.Path(self.args.downloads).open()) + + @cached_property + def downloads_path(self): + return pathlib.Path(self.tempdir.name).joinpath("downloads") + + @cached_property + def session(self): + return aiohttp.ClientSession() + + def add_arguments(self, parser) -> None: + super().add_arguments(parser) + parser.add_argument('--downloads', help="JSON k/v of downloads/checksums(optional)") + parser.add_argument('--extract-downloads', action="store_true", default=False, help="Extract downloaded files") + parser.add_argument('--output', help="Output format") + parser.add_argument('--output-path', help="Output path") + + def download_path(self, url): + if "path" not in self.downloads[url]: + return + return self.downloads_path.joinpath( + self.downloads[url]["path"], + self.filename(url)) + + async def fetch_bytes(self, url, path=None): + async with self.session.get(url) as response: + if not path: + return url, await response.read() + + with path.open("wb") as f: + async for chunk in response.content.iter_chunked(1024): + f.write(chunk) + + if "checksum" in self.downloads[url]: + await self.validate_checksum(url) + + if path and self.args.extract_downloads: + await asyncio.to_thread(extract, path.parent, path) + path.unlink() + + return url, None + + async def fetch_artefacts(self, url): + download_path = None + if "path" in self.downloads[url]: + download_path = self.download_path(url) + download_path.parent.mkdir(parents=True, exist_ok=True) + return await self.fetch_bytes(url, path=download_path) + + def filename(self, url): + parsed_url = urlsplit(url) + path_parts = parsed_url.path.split("/") + return path_parts[-1] + + def hashed(self, content): + hash_object = hashlib.sha256() + hash_object.update(content) + return hash_object.hexdigest() + + @runner.cleansup + async def run(self): + result = {} + async for (url, response) in concurrent((self.fetch_artefacts(url) for url in self.downloads), limit=3): + if self.args.output == "json": + result[url] = response.decode() + + if self.args.output == "json": + print(json.dumps(result)) + return + + if self.args.output_path: + await asyncio.to_thread( + pack, + self.downloads_path, + self.args.output_path) + + await self.session.close() + + async def validate_checksum(self, url): + hashed = await asyncio.to_thread( + self.hashed, + (self.download_path(url).read_bytes() + if "path" in self.downloads[url] + else response)) + if hashed != self.downloads[url]["checksum"]: + raise Exception(f"Checksums do not match({url}):\n expected: {self.downloads[url]["checksum"]}\n received: {hashed}") + + +def main(*args): + return FetchingRunner(*args)() + + +if __name__ == "__main__": + sys.exit(main(*sys.argv[1:]))