From 8f726a8b3b08bc7013ad982cd0e025e47a392194 Mon Sep 17 00:00:00 2001 From: psakiev Date: Thu, 2 May 2024 11:39:09 -0600 Subject: [PATCH 01/12] Split out S3 specific stuff to enable FS case --- images/ci-prune-buildcache/buildcache.py | 107 ----------------- .../ci_buildcache_prune.py | 4 +- images/ci-prune-buildcache/s3_buildcache.py | 108 ++++++++++++++++++ 3 files changed, 110 insertions(+), 109 deletions(-) create mode 100644 images/ci-prune-buildcache/s3_buildcache.py diff --git a/images/ci-prune-buildcache/buildcache.py b/images/ci-prune-buildcache/buildcache.py index c065e4df1..140768dc5 100644 --- a/images/ci-prune-buildcache/buildcache.py +++ b/images/ci-prune-buildcache/buildcache.py @@ -1,22 +1,7 @@ -import boto3 -from botocore.config import Config from urllib.parse import urlparse from datetime import datetime import helper from io import StringIO -import math -import multiprocessing.pool as pool -import time - - -def s3_resource(): - config = Config( - retries={ - "mode": "adaptive", - "max_attempts": 10, - } - ) - return boto3.resource("s3", config=config) class Object: @@ -38,23 +23,6 @@ def endswith(self, exts): return self.key.endswith(exts) -class S3Object(Object): - def delete(self): - print(f"Deleting s3://{self.bucket_name}/{self.key}") - # s3 = s3_resource() - # obj = s3.Object(self.bucket_name, self.key) - # response = obj.delete() - # return response["DeleteMarker"] - return False - - def get(self): - s3 = s3_resource() - bucket = s3.Bucket(self.bucket_name) - s3obj = bucket.Object(self.key) - response = s3obj.get() - return response["Body"] - - class BuildCache: def __init__(self, url: str): self.url = urlparse(url) @@ -117,78 +85,3 @@ def get_index(self): raise e return index, obj - - -class S3BuildCache(BuildCache): - def object_type(self): - return S3Object - - def delete(self, keys : list = [], processes: int = 1, per_page: int = 1000): - """Delete the listed keys from the buildcache, by default this will - delete all of the keys that exist in the buildcache. - - Arguments: - keys (list(str), optional): list of keys to delete (default: all keys) - processes (int, optional): number of processes to use when calling delete - (default: 1, max: ) - per_page (int, optional): The max number of items to delete at a time (default: 1000, max: 1000) - """ - - if not keys: - keys = [obj.key for obj in self.list()] - - # Get the keys to delete that exists in this buildcache - prefix = self.url.path.lstrip("/") - delete_keys = [{"Key": k} for k in keys if prefix in k] - - # Nothing to delete - if not delete_keys: - return [], [] - - max_del = 1000 - per_page = min(max_del, per_page) - nkeys = len(delete_keys) - stride = math.ceil(nkeys / per_page) - - # Auto detecte number of threads for per_page - if processes < 1: - processes = stride - - # Only spawn as many processes as needed - processes = min(stride, processes) - - s3 = s3_resource() - bucket = s3.Bucket(self.url.netloc) - - def delete_keys_f(i: int): - # time.sleep(1) - return bucket.delete_objects(Delete={ - "Objects": delete_keys[i:nkeys:stride], - "Quiet": True, - } - ) - - failures = [] - errors = [] - if processes > 1: - with pool.ThreadPool(processes) as tp: - for response in tp.imap_unordered(helper.star(delete_keys_f), [(i,) for i in range(stride)]): - failures.extend([obj for obj in response.get("Deleted", []) if not obj["DeleteMarker"]]) - errors.extend(response.get("Errors", [])) - else: - for i in range(stride): - response = delete_keys_f(i) - failures.extend([obj for obj in response.get("Deleted", []) if not obj["DeleteMarker"]]) - errors.extend(response.get("Errors", [])) - - return errors, failures - - def _list(self): - s3 = s3_resource() - bucket = s3.Bucket(self.url.netloc) - for obj in bucket.objects.filter(Prefix=self.url.path.lstrip("/")): - yield S3Object( - obj.bucket_name, - obj.key, - obj.last_modified, - ) diff --git a/images/ci-prune-buildcache/ci_buildcache_prune.py b/images/ci-prune-buildcache/ci_buildcache_prune.py index e82dbcf15..1fbc3be20 100644 --- a/images/ci-prune-buildcache/ci_buildcache_prune.py +++ b/images/ci-prune-buildcache/ci_buildcache_prune.py @@ -10,7 +10,7 @@ from io import StringIO from urllib.parse import urlparse -import buildcache +from s3_buildcache import S3BuildCache import helper from pruner import DirectPruner, IndexPruner, OrphanPruner @@ -371,7 +371,7 @@ def configure_parser(): else: url = f"{BUILDCACHE_URL}/{stack}/build_cache/" - bc = buildcache.S3BuildCache(url) + bc = S3BuildCache(url) snapshot_key = f"s3-snapshot-{stack}" if cache.exists(snapshot_key): diff --git a/images/ci-prune-buildcache/s3_buildcache.py b/images/ci-prune-buildcache/s3_buildcache.py new file mode 100644 index 000000000..d2799ab9d --- /dev/null +++ b/images/ci-prune-buildcache/s3_buildcache.py @@ -0,0 +1,108 @@ +import boto3 +import helper +import multiprocessing.pool as pool +import math +import time +from botocore.config import Config +from buildcache import Object, BuildCache + +def s3_resource(): + config = Config( + retries={ + "mode": "adaptive", + "max_attempts": 10, + } + ) + return boto3.resource("s3", config=config) + + +class S3Object(Object): + def delete(self): + print(f"Deleting s3://{self.bucket_name}/{self.key}") + # s3 = s3_resource() + # obj = s3.Object(self.bucket_name, self.key) + # response = obj.delete() + # return response["DeleteMarker"] + return False + + def get(self): + s3 = s3_resource() + bucket = s3.Bucket(self.bucket_name) + s3obj = bucket.Object(self.key) + response = s3obj.get() + return response["Body"] + + +class S3BuildCache(BuildCache): + def object_type(self): + return S3Object + + def delete(self, keys : list = [], processes: int = 1, per_page: int = 1000): + """Delete the listed keys from the buildcache, by default this will + delete all of the keys that exist in the buildcache. + + Arguments: + keys (list(str), optional): list of keys to delete (default: all keys) + processes (int, optional): number of processes to use when calling delete + (default: 1, max: ) + per_page (int, optional): The max number of items to delete at a time (default: 1000, max: 1000) + """ + + if not keys: + keys = [obj.key for obj in self.list()] + + # Get the keys to delete that exists in this buildcache + prefix = self.url.path.lstrip("/") + delete_keys = [{"Key": k} for k in keys if prefix in k] + + # Nothing to delete + if not delete_keys: + return [], [] + + max_del = 1000 + per_page = min(max_del, per_page) + nkeys = len(delete_keys) + stride = math.ceil(nkeys / per_page) + + # Auto detecte number of threads for per_page + if processes < 1: + processes = stride + + # Only spawn as many processes as needed + processes = min(stride, processes) + + s3 = s3_resource() + bucket = s3.Bucket(self.url.netloc) + + def delete_keys_f(i: int): + # time.sleep(1) + return bucket.delete_objects(Delete={ + "Objects": delete_keys[i:nkeys:stride], + "Quiet": True, + } + ) + + failures = [] + errors = [] + if processes > 1: + with pool.ThreadPool(processes) as tp: + for response in tp.imap_unordered(helper.star(delete_keys_f), [(i,) for i in range(stride)]): + failures.extend([obj for obj in response.get("Deleted", []) if not obj["DeleteMarker"]]) + errors.extend(response.get("Errors", [])) + else: + for i in range(stride): + response = delete_keys_f(i) + failures.extend([obj for obj in response.get("Deleted", []) if not obj["DeleteMarker"]]) + errors.extend(response.get("Errors", [])) + + return errors, failures + + def _list(self): + s3 = s3_resource() + bucket = s3.Bucket(self.url.netloc) + for obj in bucket.objects.filter(Prefix=self.url.path.lstrip("/")): + yield S3Object( + obj.bucket_name, + obj.key, + obj.last_modified, + ) From a893ca8a3989af2b82616e87c64a133f617bda17 Mon Sep 17 00:00:00 2001 From: psakiev Date: Thu, 2 May 2024 14:07:39 -0600 Subject: [PATCH 02/12] Add a filesystem abstraction --- images/ci-prune-buildcache/fs_buildcache.py | 94 +++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 images/ci-prune-buildcache/fs_buildcache.py diff --git a/images/ci-prune-buildcache/fs_buildcache.py b/images/ci-prune-buildcache/fs_buildcache.py new file mode 100644 index 000000000..fb0e34608 --- /dev/null +++ b/images/ci-prune-buildcache/fs_buildcache.py @@ -0,0 +1,94 @@ +import helper +import math +import multiprocessing.pool as pool +import os + +from datetime.datetime import fromtimestamp +from buildcache import Object, BuildCache + + + +class FileSystemObject(Object): + def __init__(self, entry: os.DirEntry): + lm = fromtimestamp(entry.stat_info.st_mtime) + super().__init__(bucket_name=None, key=entry.path, last_modified = lm) + if entry.is_file(): + self._get_method = self._get_file + elif entry.is_dir(): + self._get_method = self._get_dir + + def _get_file(self): + return open(self.key, "r") + + def _get_dir(self): + # not sure if os.scandir would be better + return os.listdir(self.key) + + def get(self): + return self._get_method() + + def delete(self): + print(f"Deleting {self.key}") + return False + + +class FileSystemBuildCache: + def object_type(self): + return FileSystemObject + + def delete(self, keys : list = [], processes: int = 1, per_page: int = 1000): + """Delete the listed keys from the buildcache, by default this will + delete all of the keys that exist in the buildcache. + + Arguments: + keys (list(str), optional): list of keys to delete (default: all keys) + processes (int, optional): number of processes to use when calling delete + (default: 1, max: ) + per_page (int, optional): The max number of items to delete at a time (default: 1000, max: 1000) + """ + + if not keys: + keys = [obj.key for obj in self.list()] + + # Get the keys to delete that exists in this buildcache + prefix = self.url.path.lstrip("/") + delete_keys = [{"Key": k} for k in keys if prefix in k] + + # Nothing to delete + if not delete_keys: + return [], [] + + max_del = 1000 + per_page = min(max_del, per_page) + nkeys = len(delete_keys) + stride = math.ceil(nkeys / per_page) + + # Auto detecte number of threads for per_page + if processes < 1: + processes = stride + + # Only spawn as many processes as needed + processes = min(stride, processes) + + def delete_keys_f(i: int): + # TODO need to implement + return { "Deleted": [key for key in delete_keys[i:nkeys:stride]} + + failures = [] + errors = [] + if processes > 1: + with pool.ThreadPool(processes) as tp: + for response in tp.imap_unordered(helper.star(delete_keys_f), [(i,) for i in range(stride)]): + failures.extend([obj for obj in response.get("Deleted", []) if not obj["DeleteMarker"]]) + errors.extend(response.get("Errors", [])) + else: + for i in range(stride): + response = delete_keys_f(i) + failures.extend([obj for obj in response.get("Deleted", []) if not obj["DeleteMarker"]]) + errors.extend(response.get("Errors", [])) + + return errors, failures + + def _list(self): + for dir_obj in os.scandir(self.url.path.lstrip("/")): + yield FileSystemObject(dir_obj) From 91461d25ac78e3c61ee063bbb1e27d2057746931 Mon Sep 17 00:00:00 2001 From: Philip Sakievich Date: Thu, 2 May 2024 14:20:16 -0600 Subject: [PATCH 03/12] Fix some typos --- images/ci-prune-buildcache/fs_buildcache.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/images/ci-prune-buildcache/fs_buildcache.py b/images/ci-prune-buildcache/fs_buildcache.py index fb0e34608..38825b54d 100644 --- a/images/ci-prune-buildcache/fs_buildcache.py +++ b/images/ci-prune-buildcache/fs_buildcache.py @@ -3,14 +3,14 @@ import multiprocessing.pool as pool import os -from datetime.datetime import fromtimestamp +from datetime import datetime from buildcache import Object, BuildCache class FileSystemObject(Object): def __init__(self, entry: os.DirEntry): - lm = fromtimestamp(entry.stat_info.st_mtime) + lm = datetime.fromtimestamp(entry.stat_info.st_mtime) super().__init__(bucket_name=None, key=entry.path, last_modified = lm) if entry.is_file(): self._get_method = self._get_file @@ -32,7 +32,7 @@ def delete(self): return False -class FileSystemBuildCache: +class FileSystemBuildCache(BuildCache): def object_type(self): return FileSystemObject @@ -72,7 +72,7 @@ def delete(self, keys : list = [], processes: int = 1, per_page: int = 1000): def delete_keys_f(i: int): # TODO need to implement - return { "Deleted": [key for key in delete_keys[i:nkeys:stride]} + return { "Deleted": [key for key in delete_keys[i:nkeys:stride]]} failures = [] errors = [] From 70610d6e1452d8102f18adb86e5c5911e6a3acf8 Mon Sep 17 00:00:00 2001 From: Philip Sakievich Date: Thu, 2 May 2024 16:35:05 -0600 Subject: [PATCH 04/12] Updates to get FSCache.list() to work --- images/ci-prune-buildcache/buildcache.py | 7 ++----- images/ci-prune-buildcache/fs_buildcache.py | 7 +++++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/images/ci-prune-buildcache/buildcache.py b/images/ci-prune-buildcache/buildcache.py index 140768dc5..f12dab9fb 100644 --- a/images/ci-prune-buildcache/buildcache.py +++ b/images/ci-prune-buildcache/buildcache.py @@ -1,5 +1,6 @@ from urllib.parse import urlparse from datetime import datetime +import copy import helper from io import StringIO @@ -31,11 +32,7 @@ def __init__(self, url: str): def snapshot(self): self._listed = [] for obj in self._list(): - self._listed.append(self.object_type()( - obj.bucket_name, - obj.key, - obj.last_modified, - )) + self._listed.append(copy.deepcopy(obj)) return self._listed diff --git a/images/ci-prune-buildcache/fs_buildcache.py b/images/ci-prune-buildcache/fs_buildcache.py index 38825b54d..46d6e2840 100644 --- a/images/ci-prune-buildcache/fs_buildcache.py +++ b/images/ci-prune-buildcache/fs_buildcache.py @@ -10,7 +10,7 @@ class FileSystemObject(Object): def __init__(self, entry: os.DirEntry): - lm = datetime.fromtimestamp(entry.stat_info.st_mtime) + lm = datetime.fromtimestamp(entry.stat().st_mtime) super().__init__(bucket_name=None, key=entry.path, last_modified = lm) if entry.is_file(): self._get_method = self._get_file @@ -36,6 +36,9 @@ class FileSystemBuildCache(BuildCache): def object_type(self): return FileSystemObject + def load(self, snapshot_data: list): + raise Exception("Not implemented") + def delete(self, keys : list = [], processes: int = 1, per_page: int = 1000): """Delete the listed keys from the buildcache, by default this will delete all of the keys that exist in the buildcache. @@ -90,5 +93,5 @@ def delete_keys_f(i: int): return errors, failures def _list(self): - for dir_obj in os.scandir(self.url.path.lstrip("/")): + for dir_obj in os.scandir(self.url.path): yield FileSystemObject(dir_obj) From a7e70f8286870ad2e70736c02ed2fd2643734b84 Mon Sep 17 00:00:00 2001 From: psakiev Date: Thu, 2 May 2024 17:39:29 -0600 Subject: [PATCH 05/12] Specific index getter --- images/ci-prune-buildcache/buildcache.py | 12 +----------- images/ci-prune-buildcache/fs_buildcache.py | 13 +++++++++++++ images/ci-prune-buildcache/s3_buildcache.py | 13 +++++++++++++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/images/ci-prune-buildcache/buildcache.py b/images/ci-prune-buildcache/buildcache.py index f12dab9fb..0858c3df0 100644 --- a/images/ci-prune-buildcache/buildcache.py +++ b/images/ci-prune-buildcache/buildcache.py @@ -71,14 +71,4 @@ def object_type(self): return Object def get_index(self): - key = f"{self.url.path}index.json".lstrip("/") - obj = next(self.list(key=key)) - print("Fetching: ", key, obj) - try: - response = obj.get() - index = helper.load_json(response) - except Exception as e: - print("Could not fetch index: ", key) - raise e - - return index, obj + raise Exception("Must implement per class") diff --git a/images/ci-prune-buildcache/fs_buildcache.py b/images/ci-prune-buildcache/fs_buildcache.py index 46d6e2840..fac94d0c8 100644 --- a/images/ci-prune-buildcache/fs_buildcache.py +++ b/images/ci-prune-buildcache/fs_buildcache.py @@ -95,3 +95,16 @@ def delete_keys_f(i: int): def _list(self): for dir_obj in os.scandir(self.url.path): yield FileSystemObject(dir_obj) + + def get_index(self): + key = f"{self.url.path}index.json" + obj = next(self.list(key=key)) + print("Fetching: ", key, obj) + try: + response = obj.get() + index = helper.load_json(response) + except Exception as e: + print("Could not fetch index: ", key) + raise e + + return index, obj diff --git a/images/ci-prune-buildcache/s3_buildcache.py b/images/ci-prune-buildcache/s3_buildcache.py index d2799ab9d..e1a02c4be 100644 --- a/images/ci-prune-buildcache/s3_buildcache.py +++ b/images/ci-prune-buildcache/s3_buildcache.py @@ -106,3 +106,16 @@ def _list(self): obj.key, obj.last_modified, ) + + def get_index(self): + key = f"{self.url.path}index.json".lstrip("/") + obj = next(self.list(key=key)) + print("Fetching: ", key, obj) + try: + response = obj.get() + index = helper.load_json(response) + except Exception as e: + print("Could not fetch index: ", key) + raise e + + return index, obj From a64b822d4e8f17b9feb6c95308acd7891ce9a5f8 Mon Sep 17 00:00:00 2001 From: Philip Sakievich Date: Thu, 16 May 2024 14:02:59 -0600 Subject: [PATCH 06/12] More updates --- images/ci-prune-buildcache/fs_buildcache.py | 2 +- .../fs_buildcache_prune.py | 114 ++++++++++++++++++ images/ci-prune-buildcache/pruner.py | 18 +++ 3 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 images/ci-prune-buildcache/fs_buildcache_prune.py diff --git a/images/ci-prune-buildcache/fs_buildcache.py b/images/ci-prune-buildcache/fs_buildcache.py index fac94d0c8..c5c5d27cb 100644 --- a/images/ci-prune-buildcache/fs_buildcache.py +++ b/images/ci-prune-buildcache/fs_buildcache.py @@ -99,7 +99,7 @@ def _list(self): def get_index(self): key = f"{self.url.path}index.json" obj = next(self.list(key=key)) - print("Fetching: ", key, obj) + print("Fetching: ", key) try: response = obj.get() index = helper.load_json(response) diff --git a/images/ci-prune-buildcache/fs_buildcache_prune.py b/images/ci-prune-buildcache/fs_buildcache_prune.py new file mode 100644 index 000000000..f6150209d --- /dev/null +++ b/images/ci-prune-buildcache/fs_buildcache_prune.py @@ -0,0 +1,114 @@ +import argparse +import helper +import os + +from datetime import datetime, timedelta, timezone +from fs_buildcache import FileSystemBuildCache +from pruner import pruner_factory + + +def configure_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "path", + help="location of the buildcache", + ) + parser.add_argument( + "--start-date", + help="Starting date for pruning window", + default=datetime.now(timezone.utc).isoformat(), + ) + parser.add_argument( + "--since-days", + help="Ending date for pruning window", + type=int, + default=30 + ) + parser.add_argument( + "-j", "--nprocs", + help="Numer of process to use", + type=int, + metavar="N", + default=1 + ) + parser.add_argument( + "--prune-hashes", + help="json file with hash list to prune", + type=argparse.FileType("r"), + metavar="prune.json", + ) + parser.add_argument( + "--keep-hashes", + help="json file with hash list to keep", + type=argparse.FileType("r"), + metavar="keep.json", + ) + parser.add_argument( + "--snapshot-dir", + help="Directory containering snapshots of mirrors." + "If it exists they will be loaded, if it does not they will be written", + metavar="DIR", + ) + parser.add_argument( + "-o", "--output-dir", + help="output directory", + ) + parser.add_argument( + "-S", "--suffix", + help="logging file suffix", + ) + parser.add_argument( + "-D", "--delete", + help="Dry run", + action="store_true", + ) + + pruner_group = parser.add_mutually_exclusive_group(required=True) + pruner_group.add_argument( + "--direct", + help="use the buildcache index to check for buildcache hashes", + action="store_true", + ) + pruner_group.add_argument( + "--orphaned", + help="Enable orphan pruning", + action="store_true", + ) + pruner_group.add_argument( + "--check-index", + help="use the buildcache index to check for buildcache hashes", + action="store_true", + ) + parser.add_argument( + "--delete-only", + help="use the buildcache index to check for buildcache hashes", + action="store_true", + ) + + return parser + + +if __name__=="__main__": + + args = configure_parser().parse_args() + + cache = FileSystemBuildCache(args.path) + + now = datetime.fromisoformat(args.start_date) + time_window = now - timedelta(days=args.since_days) + + pruner = pruner_factory(cache, args, since=time_window) + + print("-- Computing prunable hashes") + prunable_hashes = pruner.determine_prunable_hashes() + with open(f"log.json", "w") as fd: + helper.write_json(fd, prunable_hashes) + + pruned = [] + if prunable_hashes: + print("-- Finding prunable files") + pruned.extend(pruner.prune(prunable_hashes)) + print(f"-- Found prunable {len(pruned)} files in buildcache") + else: + print("-- Nothing to prune") + diff --git a/images/ci-prune-buildcache/pruner.py b/images/ci-prune-buildcache/pruner.py index 51915c907..23ccf36c4 100644 --- a/images/ci-prune-buildcache/pruner.py +++ b/images/ci-prune-buildcache/pruner.py @@ -257,3 +257,21 @@ def a_not_in_b(a, b): # print("\n".join(self.unmatched_binaries)) return self.prunable_hashes + + +def pruner_factory(cache, args, keep_hashes=[], since=None): + """ Factory with variable args a kwargs """ + # make sure only one type was supplied + type_sum = int(args.direct) + int(args.orphaned) + int(args.check_index) + assert type_sum == 1 + + if args.direct: + return DirectPruner(cache, keep_hashes, args) + elif args.orphaned: + return OrphanPruner(cache, since, args) + elif args.check_index: + return IndexPruner(cache, keep_hashes, args) + else: + raise Exception("Pruner type not implemented") + + From 13482ce4a878acae1ad101e46cdd3828fbda1204 Mon Sep 17 00:00:00 2001 From: Philip Sakievich Date: Thu, 16 May 2024 16:40:54 -0600 Subject: [PATCH 07/12] More tweaks, direct pruner not working, others not checking dates --- images/ci-prune-buildcache/fs_buildcache_prune.py | 5 ++++- images/ci-prune-buildcache/pruner.py | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) mode change 100644 => 100755 images/ci-prune-buildcache/fs_buildcache_prune.py diff --git a/images/ci-prune-buildcache/fs_buildcache_prune.py b/images/ci-prune-buildcache/fs_buildcache_prune.py old mode 100644 new mode 100755 index f6150209d..b7079c299 --- a/images/ci-prune-buildcache/fs_buildcache_prune.py +++ b/images/ci-prune-buildcache/fs_buildcache_prune.py @@ -91,13 +91,16 @@ def configure_parser(): if __name__=="__main__": args = configure_parser().parse_args() + keep_hashes=[] + if args.keep_hashes: + keep_hashes = helper.load_json(args.keep_hashes) cache = FileSystemBuildCache(args.path) now = datetime.fromisoformat(args.start_date) time_window = now - timedelta(days=args.since_days) - pruner = pruner_factory(cache, args, since=time_window) + pruner = pruner_factory(cache, args, keep_hashes, since=time_window) print("-- Computing prunable hashes") prunable_hashes = pruner.determine_prunable_hashes() diff --git a/images/ci-prune-buildcache/pruner.py b/images/ci-prune-buildcache/pruner.py index 23ccf36c4..7f0a8aff6 100644 --- a/images/ci-prune-buildcache/pruner.py +++ b/images/ci-prune-buildcache/pruner.py @@ -52,11 +52,13 @@ def determine_prunable_hashes(self): hashes.append(h) self.prunable_hashes.update(hashes) + print(self.prunable_hashes) return self.prunable_hashes def _prune_buildcache(self, obj: Object): """ Apply pruning to buildcache object """ + breakpoint() prunit = self._is_prunable(obj) return obj, prunit @@ -67,6 +69,7 @@ def _list(self, ext=None, wrapped=True): ext : extension(s) to filter by """ for obj in self.buildcache.list(ignore=lambda o: ext and not o.endswith(ext)): + breakpoint() if wrapped: yield (obj,) else: @@ -135,12 +138,13 @@ def _is_prunable(self, obj: Object): # keep_hashes list if obj.key.endswith(self.tarball_ext): if obj.last_modified.timestamp() > self.start_date.timestamp(): - # print(f"{obj.key} is too new") + print(f"{obj.key} is too new") return False return BasePruner._is_prunable(self, obj) def determine_prunable_hashes(self): + breakpoint() if not self.prunable_hashes: # Direct pruning requires filtering tarballs first due to one day buffer hashes: list = [] From 2b83126998a96285d2eb21981a99c9def756edc5 Mon Sep 17 00:00:00 2001 From: Philip Sakievich Date: Thu, 16 May 2024 21:41:55 -0600 Subject: [PATCH 08/12] Dive on directories --- images/ci-prune-buildcache/fs_buildcache.py | 20 +++++++++++++++---- .../fs_buildcache_prune.py | 3 +++ images/ci-prune-buildcache/pruner.py | 4 ---- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/images/ci-prune-buildcache/fs_buildcache.py b/images/ci-prune-buildcache/fs_buildcache.py index c5c5d27cb..23eda4d56 100644 --- a/images/ci-prune-buildcache/fs_buildcache.py +++ b/images/ci-prune-buildcache/fs_buildcache.py @@ -74,8 +74,11 @@ def delete(self, keys : list = [], processes: int = 1, per_page: int = 1000): processes = min(stride, processes) def delete_keys_f(i: int): - # TODO need to implement - return { "Deleted": [key for key in delete_keys[i:nkeys:stride]]} + deleted = [] + for key in delete_keys[i:nkeys:stride]: + shutil.rmtree(key) + deleted.append(key) + return { "Deleted": deleted} failures = [] errors = [] @@ -93,8 +96,17 @@ def delete_keys_f(i: int): return errors, failures def _list(self): - for dir_obj in os.scandir(self.url.path): - yield FileSystemObject(dir_obj) + def traverse_directory(directory): + for entry in os.scandir(directory): + if entry.is_file(): + yield entry + elif entry.is_dir(): + yield from traverse_directory(entry.path) + + for file_obj in traverse_directory(self.url.path): + yield FileSystemObject(file_obj) + + def get_index(self): key = f"{self.url.path}index.json" diff --git a/images/ci-prune-buildcache/fs_buildcache_prune.py b/images/ci-prune-buildcache/fs_buildcache_prune.py index b7079c299..5a543d030 100755 --- a/images/ci-prune-buildcache/fs_buildcache_prune.py +++ b/images/ci-prune-buildcache/fs_buildcache_prune.py @@ -111,7 +111,10 @@ def configure_parser(): if prunable_hashes: print("-- Finding prunable files") pruned.extend(pruner.prune(prunable_hashes)) + pruned_keys = [ obj.key for obj in pruned ] print(f"-- Found prunable {len(pruned)} files in buildcache") + with open(f"files_to_prune.json", "w") as fd: + helper.write_json(fd, pruned_keys) else: print("-- Nothing to prune") diff --git a/images/ci-prune-buildcache/pruner.py b/images/ci-prune-buildcache/pruner.py index 7f0a8aff6..710758e31 100644 --- a/images/ci-prune-buildcache/pruner.py +++ b/images/ci-prune-buildcache/pruner.py @@ -52,13 +52,11 @@ def determine_prunable_hashes(self): hashes.append(h) self.prunable_hashes.update(hashes) - print(self.prunable_hashes) return self.prunable_hashes def _prune_buildcache(self, obj: Object): """ Apply pruning to buildcache object """ - breakpoint() prunit = self._is_prunable(obj) return obj, prunit @@ -69,7 +67,6 @@ def _list(self, ext=None, wrapped=True): ext : extension(s) to filter by """ for obj in self.buildcache.list(ignore=lambda o: ext and not o.endswith(ext)): - breakpoint() if wrapped: yield (obj,) else: @@ -144,7 +141,6 @@ def _is_prunable(self, obj: Object): return BasePruner._is_prunable(self, obj) def determine_prunable_hashes(self): - breakpoint() if not self.prunable_hashes: # Direct pruning requires filtering tarballs first due to one day buffer hashes: list = [] From 08c9c72ce7f16cc9eec388e7353b8925ed2dc8fb Mon Sep 17 00:00:00 2001 From: Philip Sakievich Date: Fri, 17 May 2024 14:17:27 -0600 Subject: [PATCH 09/12] Add logic for trimming empty directories --- images/ci-prune-buildcache/fs_buildcache.py | 24 ++++++-- .../fs_buildcache_prune.py | 59 +++++++++++++++---- images/ci-prune-buildcache/pruner.py | 2 +- 3 files changed, 66 insertions(+), 19 deletions(-) diff --git a/images/ci-prune-buildcache/fs_buildcache.py b/images/ci-prune-buildcache/fs_buildcache.py index 23eda4d56..d6bb1c6e9 100644 --- a/images/ci-prune-buildcache/fs_buildcache.py +++ b/images/ci-prune-buildcache/fs_buildcache.py @@ -74,24 +74,38 @@ def delete(self, keys : list = [], processes: int = 1, per_page: int = 1000): processes = min(stride, processes) def delete_keys_f(i: int): + """ + Delete keys/files and parent directory if it is empty + after the key has been removed + """ deleted = [] + errors = [] + failures = [] for key in delete_keys[i:nkeys:stride]: - shutil.rmtree(key) - deleted.append(key) - return { "Deleted": deleted} + parent_directory = os.path.dirname(key) + try: + os.remove(key) + if not os.listdir(parent_directory): + os.rmdir(parent_directory) + deleted.append(key) + except PermissionError: + failures.append((key, "permissions")) + except FileNotFoundError: + errors.append((key, "file not found")) + return { "Deleted": deleted, "Errors": errors, "Failures": failures} failures = [] errors = [] if processes > 1: with pool.ThreadPool(processes) as tp: for response in tp.imap_unordered(helper.star(delete_keys_f), [(i,) for i in range(stride)]): - failures.extend([obj for obj in response.get("Deleted", []) if not obj["DeleteMarker"]]) errors.extend(response.get("Errors", [])) + failures.extend(response.get("Failures", [])) else: for i in range(stride): response = delete_keys_f(i) - failures.extend([obj for obj in response.get("Deleted", []) if not obj["DeleteMarker"]]) errors.extend(response.get("Errors", [])) + failures.extend(response.get("Failures", [])) return errors, failures diff --git a/images/ci-prune-buildcache/fs_buildcache_prune.py b/images/ci-prune-buildcache/fs_buildcache_prune.py index 5a543d030..b3c5530e0 100755 --- a/images/ci-prune-buildcache/fs_buildcache_prune.py +++ b/images/ci-prune-buildcache/fs_buildcache_prune.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 import argparse import helper import os @@ -43,14 +44,9 @@ def configure_parser(): type=argparse.FileType("r"), metavar="keep.json", ) - parser.add_argument( - "--snapshot-dir", - help="Directory containering snapshots of mirrors." - "If it exists they will be loaded, if it does not they will be written", - metavar="DIR", - ) parser.add_argument( "-o", "--output-dir", + default=os.getcwd(), help="output directory", ) parser.add_argument( @@ -59,7 +55,7 @@ def configure_parser(): ) parser.add_argument( "-D", "--delete", - help="Dry run", + help="attempt to delete the files", action="store_true", ) @@ -91,6 +87,24 @@ def configure_parser(): if __name__=="__main__": args = configure_parser().parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + + if args.check_index: + prune_method = "Index Based" + elif args.orphaned: + prune_method = "Orphaned" + elif args.direct: + prune_method = "Direct" + else: + prune_method = config.get("method", "Direct") + + prune_method_safe = "_".join(prune_method.split()).lower() + if not args.suffix: + log_suffix = "_" + prune_method_safe + else: + log_suffix = args.suffix + keep_hashes=[] if args.keep_hashes: keep_hashes = helper.load_json(args.keep_hashes) @@ -104,17 +118,36 @@ def configure_parser(): print("-- Computing prunable hashes") prunable_hashes = pruner.determine_prunable_hashes() - with open(f"log.json", "w") as fd: - helper.write_json(fd, prunable_hashes) + prune_hash_file = f"{args.output_dir}/prunable-hashes-{log_suffix}.txt" + with open(f"{prune_hash_file}", "w") as fd: + fd.writelines("\n".join(prunable_hashes)) - pruned = [] if prunable_hashes: print("-- Finding prunable files") - pruned.extend(pruner.prune(prunable_hashes)) + + pruned = pruner.prune(prunable_hashes) pruned_keys = [ obj.key for obj in pruned ] + print(f"-- Found prunable {len(pruned)} files in buildcache") - with open(f"files_to_prune.json", "w") as fd: - helper.write_json(fd, pruned_keys) + + prune_list_file = f"{args.output_dir}/prunable-files-{log_suffix}.txt" + with open(f"{prune_list_file}", "w") as fd: + fd.writelines("\n".join(pruned_keys)) else: print("-- Nothing to prune") + if args.delete: + print("-- Pruning build cache") + err, fail = cache.delete(prune_keys, process=args.nprocs) + fname_template = f"{args.output_dir}/delete-{{0}}-{log_suffix}.json" + if err: + print(f"errors found") + with open(fname_template.format("errors")) as fd: + helper.write_json(fd, err) + + if fail: + print(f"failures found") + with open(fname_template.format("failures")) as fd: + helper.write_json(fd, fail) + + diff --git a/images/ci-prune-buildcache/pruner.py b/images/ci-prune-buildcache/pruner.py index 710758e31..23ccf36c4 100644 --- a/images/ci-prune-buildcache/pruner.py +++ b/images/ci-prune-buildcache/pruner.py @@ -135,7 +135,7 @@ def _is_prunable(self, obj: Object): # keep_hashes list if obj.key.endswith(self.tarball_ext): if obj.last_modified.timestamp() > self.start_date.timestamp(): - print(f"{obj.key} is too new") + # print(f"{obj.key} is too new") return False return BasePruner._is_prunable(self, obj) From 252157eccc22d50590b9ac9b9cf42277c0194e84 Mon Sep 17 00:00:00 2001 From: Philip Sakievich Date: Mon, 20 May 2024 17:19:27 -0600 Subject: [PATCH 10/12] Make supplied prune hashes work --- images/ci-prune-buildcache/fs_buildcache_prune.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/images/ci-prune-buildcache/fs_buildcache_prune.py b/images/ci-prune-buildcache/fs_buildcache_prune.py index b3c5530e0..ab3ed0630 100755 --- a/images/ci-prune-buildcache/fs_buildcache_prune.py +++ b/images/ci-prune-buildcache/fs_buildcache_prune.py @@ -117,7 +117,12 @@ def configure_parser(): pruner = pruner_factory(cache, args, keep_hashes, since=time_window) print("-- Computing prunable hashes") - prunable_hashes = pruner.determine_prunable_hashes() + prunable_hashes = [] + if args.prune_hashes: + prunable_hashes.extend( helper.load_json(args.prune_hashes)) + else: + prunable_hashes.extend(pruner.determine_prunable_hashes()) + prune_hash_file = f"{args.output_dir}/prunable-hashes-{log_suffix}.txt" with open(f"{prune_hash_file}", "w") as fd: fd.writelines("\n".join(prunable_hashes)) From 69a356af7604d65c4b9b46dae68ca9d05577f08d Mon Sep 17 00:00:00 2001 From: Philip Sakievich Date: Thu, 23 May 2024 09:42:14 -0600 Subject: [PATCH 11/12] Fix errors during deletion --- images/ci-prune-buildcache/fs_buildcache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/images/ci-prune-buildcache/fs_buildcache.py b/images/ci-prune-buildcache/fs_buildcache.py index d6bb1c6e9..78ca9b13b 100644 --- a/images/ci-prune-buildcache/fs_buildcache.py +++ b/images/ci-prune-buildcache/fs_buildcache.py @@ -54,8 +54,8 @@ def delete(self, keys : list = [], processes: int = 1, per_page: int = 1000): keys = [obj.key for obj in self.list()] # Get the keys to delete that exists in this buildcache - prefix = self.url.path.lstrip("/") - delete_keys = [{"Key": k} for k in keys if prefix in k] + prefix = self.url.path + delete_keys = [k for k in keys if prefix in k] # Nothing to delete if not delete_keys: From 795198d344e5122a513d6c5e6fbb6c3accea84f4 Mon Sep 17 00:00:00 2001 From: Philip Sakievich Date: Thu, 23 May 2024 14:06:31 -0600 Subject: [PATCH 12/12] Add a single entry point python file for fs (depends on spack-python) --- images/ci-prune-buildcache/buildcache.py | 3 +- .../ci-prune-buildcache/buildcache_query.py | 35 ++++ images/ci-prune-buildcache/cache-prune.py | 165 ++++++++++++++++++ images/ci-prune-buildcache/fs_buildcache.py | 5 +- images/ci-prune-buildcache/pruner.py | 60 +++++-- 5 files changed, 249 insertions(+), 19 deletions(-) create mode 100755 images/ci-prune-buildcache/buildcache_query.py create mode 100755 images/ci-prune-buildcache/cache-prune.py diff --git a/images/ci-prune-buildcache/buildcache.py b/images/ci-prune-buildcache/buildcache.py index 0858c3df0..550bb0536 100644 --- a/images/ci-prune-buildcache/buildcache.py +++ b/images/ci-prune-buildcache/buildcache.py @@ -6,9 +6,10 @@ class Object: - def __init__(self, bucket_name: str, key: str, last_modified): + def __init__(self, bucket_name: str, key: str, last_modified, size = 0): self.bucket_name = bucket_name self.key = key + self.size = size if isinstance(last_modified, datetime): self.last_modified = last_modified else: diff --git a/images/ci-prune-buildcache/buildcache_query.py b/images/ci-prune-buildcache/buildcache_query.py new file mode 100755 index 000000000..84526499e --- /dev/null +++ b/images/ci-prune-buildcache/buildcache_query.py @@ -0,0 +1,35 @@ +#!/usr/bin/env spack-python + +# copy of https://github.com/sandialabs/spack-manager/blob/main/manager/manager_cmds/cache_query.py +# as a stand alone script +# query the buildcache like `spack find` + +import argparse + +import spack.binary_distribution as bindist +import spack.cmd as cmd +import spack.cmd.find + + +parser = argparse.ArgumentParser() +spack.cmd.find.setup_parser(parser) + +def cache_search(self, **kwargs): + qspecs = spack.cmd.parse_specs(self.values) + search_engine = bindist.BinaryCacheQuery(True) + results = {} + for q in qspecs: + hits = search_engine(str(q), **kwargs) + for hit in hits: + results[hit.dag_hash()] = hit + return sorted(results.values()) + +spack.cmd.common.arguments.ConstraintAction._specs = cache_search + +def find(parser, args): + spack.cmd.find.find(parser, args) + +if __name__ == "__main__": + args = parser.parse_args() + find(parser, args) + diff --git a/images/ci-prune-buildcache/cache-prune.py b/images/ci-prune-buildcache/cache-prune.py new file mode 100755 index 000000000..e8242a475 --- /dev/null +++ b/images/ci-prune-buildcache/cache-prune.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 + +import argparse +import helper +import math +import os +import subprocess + +from datetime import datetime, timedelta, timezone +from fs_buildcache import FileSystemBuildCache +from pruner import pruner_factory, PRUNER_TYPES + +def convert_size(size_bytes): + if size_bytes == 0: + return "0B" + size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") + i = int(math.floor(math.log(size_bytes, 1024))) + p = math.pow(1024, i) + size = round(size_bytes / p, 2) + return f"{size} {size_name[i]}" + +def configure_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "path", + help="location of the buildcache", + ) + parser.add_argument( + "--start-date", + help="Starting date for pruning window", + default=datetime.now(timezone.utc).isoformat(), + ) + parser.add_argument( + "--since-days", + help="Ending date for pruning window", + type=int, + default=0 + ) + parser.add_argument( + "-j", "--nprocs", + help="Numer of process to use", + type=int, + metavar="N", + default=1 + ) + parser.add_argument( + "--prune-hashes", + help="json file with hash list to prune", + type=argparse.FileType("r"), + metavar="prune.json", + ) + parser.add_argument( + "--keep-hashes", + help="json file with hash list to keep", + type=argparse.FileType("r"), + metavar="keep.json", + ) + parser.add_argument( + "--keep-specs", + help="specs to preserve in the cache (includes dependencies)", + nargs="+", + ) + parser.add_argument( + "-o", "--output-dir", + default=os.getcwd(), + help="output directory", + ) + parser.add_argument( + "-S", "--suffix", + help="logging file suffix", + ) + parser.add_argument( + "-D", "--delete", + help="attempt to delete the files", + action="store_true", + ) + parser.add_argument( + "-m", "--method", + help="pruning method to use on the cache", + choices = list(PRUNER_TYPES.keys()), + default = "direct", + ) + + return parser + + +def get_cache_hashes_from_specs(*args, **kwargs): + command = ['spack-python', 'buildcache_query.py', '--format', '{hash}'] + command.extend([*args]) + result = subprocess.check_output(command, universal_newlines=True).strip().split() + return result + +def get_keep_hashes(args: argparse.Namespace): + keep_hashes=[] + if args.keep_hashes: + keep_hashes.extend(helper.load_json(args.keep_hashes)) + if args.keep_specs: + keep_hashes.extend(get_cache_hashes_from_specs("--deps", *args.keep_specs)) + return keep_hashes + +if __name__=="__main__": + args = configure_parser().parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + + + if not args.suffix: + log_suffix = "_" + args.method + else: + log_suffix = args.suffix + + keep_hashes=get_keep_hashes(args) + + cache = FileSystemBuildCache(args.path) + + now = datetime.fromisoformat(args.start_date) + time_window = now - timedelta(days=args.since_days) + + # combine start date and delta for passing to pruners + args.start_date = time_window + + pruner = pruner_factory(cache, args.method, args, keep_hashes, since=time_window) + + print("-- Computing prunable hashes") + prunable_hashes = [] + if args.prune_hashes: + prunable_hashes.extend( helper.load_json(args.prune_hashes)) + else: + prunable_hashes.extend(pruner.determine_prunable_hashes()) + + prune_hash_file = f"{args.output_dir}/prunable-hashes-{log_suffix}.txt" + with open(f"{prune_hash_file}", "w") as fd: + fd.writelines("\n".join(prunable_hashes)) + + if prunable_hashes: + print("-- Finding prunable files") + + pruned = pruner.prune(prunable_hashes) + + pruned_keys = [ obj.key for obj in pruned ] + + print(f"-- Found prunable {len(pruned)} files in buildcache") + total_size_human = convert_size(sum(obj.size for obj in pruned)) + print(f"-- Total Size of prunable files is {total_size_human}") + + prune_list_file = f"{args.output_dir}/prunable-files-{log_suffix}.txt" + with open(f"{prune_list_file}", "w") as fd: + fd.writelines("\n".join(pruned_keys)) + else: + print("-- Nothing to prune") + + if args.delete: + print("-- Pruning build cache") + err, fail = cache.delete(pruned_keys, processes=args.nprocs) + fname_template = f"{args.output_dir}/delete-{{0}}-{log_suffix}.json" + if err: + print(f"errors found") + with open(fname_template.format("errors")) as fd: + helper.write_json(fd, err) + + if fail: + print(f"failures found") + with open(fname_template.format("failures")) as fd: + helper.write_json(fd, fail) + diff --git a/images/ci-prune-buildcache/fs_buildcache.py b/images/ci-prune-buildcache/fs_buildcache.py index 78ca9b13b..ab4046973 100644 --- a/images/ci-prune-buildcache/fs_buildcache.py +++ b/images/ci-prune-buildcache/fs_buildcache.py @@ -10,8 +10,9 @@ class FileSystemObject(Object): def __init__(self, entry: os.DirEntry): - lm = datetime.fromtimestamp(entry.stat().st_mtime) - super().__init__(bucket_name=None, key=entry.path, last_modified = lm) + stat = entry.stat() + lm = datetime.fromtimestamp(stat.st_mtime) + super().__init__(bucket_name=None, key=entry.path, last_modified = lm, size = stat.st_size) if entry.is_file(): self._get_method = self._get_file elif entry.is_dir(): diff --git a/images/ci-prune-buildcache/pruner.py b/images/ci-prune-buildcache/pruner.py index 23ccf36c4..57aab66b9 100644 --- a/images/ci-prune-buildcache/pruner.py +++ b/images/ci-prune-buildcache/pruner.py @@ -1,9 +1,19 @@ from buildcache import BuildCache, Object from datetime import datetime, timedelta, timezone +import argparse import helper import multiprocessing.pool as pool +CLI_ARGS_DICT = { + "start_date": None, + "delete": False, + "nprocs": 1, +} + +DEFAULT_CLI_ARGS = argparse.Namespace() +DEFAULT_CLI_ARGS.__dict__.update(CLI_ARGS_DICT) + class PrunedObject(Object): def __init__(self, obj: Object, method: str): self.__dict__.update(obj.__dict__) @@ -14,7 +24,7 @@ class BasePruner: spec_ext = (".spec.json", ".spec.yaml", ".spec.json.sig") tarball_ext = (".spack", ".tar.gz") - def __init__(self, buildcache: BuildCache, keep_hash_list, cli_args): + def __init__(self, buildcache: BuildCache, keep_hash_list, cli_args=DEFAULT_CLI_ARGS): self.buildcache = buildcache self.keep_hashes = keep_hash_list self.cli_args = cli_args @@ -22,7 +32,10 @@ def __init__(self, buildcache: BuildCache, keep_hash_list, cli_args): self.prunable_hashes = set() self.prune_ext = self.spec_ext + self.tarball_ext - self.start_date = datetime.fromisoformat(cli_args.start_date) + if isinstance(cli_args.start_date, datetime): + self.start_date = cli_args.start_date + else: + self.start_date = datetime.fromisoformat(cli_args.start_date) self.enable_delete = self.cli_args.delete @@ -72,7 +85,7 @@ def _list(self, ext=None, wrapped=True): else: yield obj - def prune(self, prunable_hashes=None): + def prune(self, prunable_hashes=None, compute_size=False): """ Prune the buildcache """ # Get the list of prunable hashes @@ -191,7 +204,7 @@ class OrphanPruner(BasePruner): """Pruning Strategy that looks for .spack binaries with no matching spec.json buildcache """ - def __init__(self, buildcache: BuildCache, date_cutoff: datetime, cli_args): + def __init__(self, buildcache: BuildCache, date_cutoff: datetime, cli_args=DEFAULT_CLI_ARGS): BasePruner.__init__(self, buildcache, None, cli_args) self.date_cutoff = datetime.fromisoformat(cli_args.start_date) @@ -258,20 +271,35 @@ def a_not_in_b(a, b): return self.prunable_hashes +PRUNER_TYPES = { + "direct": DirectPruner, + "orphan": OrphanPruner, + "index": IndexPruner, + } -def pruner_factory(cache, args, keep_hashes=[], since=None): +def pruner_factory(cache, method, args=DEFAULT_CLI_ARGS, keep_hashes=[], since=None): """ Factory with variable args a kwargs """ - # make sure only one type was supplied - type_sum = int(args.direct) + int(args.orphaned) + int(args.check_index) - assert type_sum == 1 - - if args.direct: - return DirectPruner(cache, keep_hashes, args) - elif args.orphaned: - return OrphanPruner(cache, since, args) - elif args.check_index: - return IndexPruner(cache, keep_hashes, args) + obj = PRUNER_TYPES.get(method, None) + + # check's that the arguments passed meet the needs of the pruner objects + new_args = argparse.Namespace() + new_args.__dict__.update(CLI_ARGS_DICT) + # Update the first namespace with values from the second namespace + for key, value in args.__dict__.items(): + if key in new_args.__dict__ and key in args.__dict__: + new_args.__dict__[key] = value + elif key not in new_args.__dict__: + continue + else: + raise Exception(f"Missing {key} in the arguments passed to the pruner factory") + + + + if method=="direct" or method=="index": + return obj(cache, keep_hashes, new_args) + elif method=="orphan": + return obj(cache, since, new_args) else: - raise Exception("Pruner type not implemented") + raise Exception(f"Pruner {method} type not implemented")