From 5df2cd38ba681dcce7328b40b448689acb7ee385 Mon Sep 17 00:00:00 2001 From: Sietse Snel Date: Fri, 16 Feb 2024 16:23:35 +0100 Subject: [PATCH] YDA-5600 create group lifecycle management report --- README.md | 17 +++ setup.py | 1 + yclienttools/reportgrouplifecycle.py | 160 +++++++++++++++++++++++++++ 3 files changed, 178 insertions(+) create mode 100644 yclienttools/reportgrouplifecycle.py diff --git a/README.md b/README.md index d6dc47e..dd6477d 100644 --- a/README.md +++ b/README.md @@ -328,6 +328,23 @@ List of columns if --by-extension is enabled: 4. Extension 5. Name of collection +### yreport\_grouplifecycle + +``` +usage: yreport_grouplifecycle [-h] [-q] [-y {1.7,1.8,1.9}] + +Generates a list of groups, along with their creation date, expiration date +(if available), as well as whether the research and vault compartment contain +any collections or data objects . + +optional arguments: + -h, --help show this help message and exit + -q, --quasi-xml Enable Quasi-XML parser in order to be able to parse + characters not supported by regular XML parser + -y {1.7,1.8,1.9}, --yoda-version {1.7,1.8,1.9} + Override Yoda version on the server +``` + ### yreport\_intake Prints an intake collection report. This report is only relevant for environments diff --git a/setup.py b/setup.py index 26fea6e..341724c 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ 'console_scripts': [ 'yreport_dataobjectspercollection= yclienttools.reportdoc:entry', 'yreport_collectionsize= yclienttools.reportsize:entry', + 'yreport_grouplifecycle= yclienttools.reportgrouplifecycle:entry', 'yreport_intake= yclienttools.reportintake:entry', 'yreport_linecount= yclienttools.reportlinecount:entry', 'ycleanup_files= yclienttools.cleanupfiles:entry', diff --git a/yclienttools/reportgrouplifecycle.py b/yclienttools/reportgrouplifecycle.py new file mode 100644 index 0000000..ae3933a --- /dev/null +++ b/yclienttools/reportgrouplifecycle.py @@ -0,0 +1,160 @@ +'''Generates a list of groups, along with their creation date, expiration date (if available), + as well as whether the research and vault compartment contain any collections or data objects .''' + +import argparse +import csv +import itertools +import sys + +from irods.column import Like +from irods.message import (XML_Parser_Type, ET) +from irods.models import Collection, DataObject, User +from yclienttools import common_args, common_config +from yclienttools import session as s + + +def entry(): + '''Entry point''' + try: + args = _get_args() + yoda_version = args.yoda_version if args.yoda_version is not None else common_config.get_default_yoda_version() + session = s.setup_session(yoda_version) + if args.quasi_xml: + ET(XML_Parser_Type.QUASI_XML, session.server_version) + report_groups_lifecycle(args, session) + session.cleanup() + + except KeyboardInterrupt: + print("Script interrupted by user.\n", file=sys.stderr) + + +def _get_args(): + '''Parse command line arguments''' + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("-q", "--quasi-xml", default=False, action='store_true', + help='Enable Quasi-XML parser in order to be able to parse characters not supported by regular XML parser') + common_args.add_default_args(parser) + return parser.parse_args() + + +def _get_group_attributes(session, group_name): + """Retrieves a dictionary of attribute-values of group metadata. + This assumes attribute names are unique. + """ + relevant_single_attributes = {"category", "subcategory", "expiration_date"} + relevant_multiple_attributes = {"manager"} + result = dict() + group_objects = list(session.query(User).filter( + User.name == group_name).filter( + User.type == "rodsgroup").get_results()) + + if len(group_objects) > 0: + for attribute in relevant_multiple_attributes: + result[attribute] = [] + for group_object in group_objects: + obj = session.users.get(group_object[User.name]) + avus = obj.metadata.items() + for avu in avus: + if avu.name in relevant_single_attributes: + result[avu.name] = avu.value + elif avu.name in relevant_multiple_attributes: + result[avu.name].append(avu.value) + + return result + + +def _group_research_has_data(session, group_name): + """Returns boolean that indicates whether the research compartment of + the group has any data (i.e. data objects or subcollections). + if the group has no research department, None is returned. + """ + research_collection = f"/{session.zone}/home/{group_name}" + return _collection_has_data(session, research_collection) + + +def _group_vault_has_data(session, group_name): + """Returns boolean that indicates whether the vault compartment of + the group has any data (i.e. data objects or subcollections). + If the group has no vault compartment, None is returned. + """ + vault_collection = f"/{session.zone}/home/{group_name}".replace( + "research-", "vault-", 1) + return _collection_has_data(session, vault_collection) + + +def _collection_has_data(session, coll_name): + root_data_objects = session.query(Collection.name, DataObject.name).filter( + Collection.name == coll_name).get_results() + sub_data_objects = session.query(Collection.name, DataObject.name).filter( + Like(Collection.name, coll_name + "/%")).get_results() + sub_data_collections = session.query(Collection.name).filter( + Like(Collection.name, coll_name + "/%")).get_results() + return len(list(itertools.chain(root_data_objects, + sub_data_objects, + sub_data_collections))) + + +def _get_group_creation_date(session, group_name): + create_times = list(session.query( + User.create_time).filter( + User.name == group_name).get_results()) + return create_times[0][User.create_time] if len(create_times) else None + + +def _get_research_groups_list(session): + groups = session.query(User).filter(User.type == 'rodsgroup').get_results() + return [x[User.name] + for x in groups if x[User.name].startswith("research-")] + + +def _get_regular_members(session, group_name, attributes): + members_and_managers = session.user_groups.getmembers(group_name) + return [ + member.name for member in members_and_managers if member.name + "#" + session.zone not in attributes["manager"]] + + +def _get_readonly_members(session, group_name, attributes): + readonly_group = group_name.replace("research-", "read-", 1) + return [u.name for u in session.user_groups.getmembers(readonly_group)] + + +def _get_group_managers(session, group_name, attributes): + return [manager.split("#")[0] for manager in attributes["manager"]] + + +def report_groups_lifecycle(args, session): + output = csv.writer(sys.stdout, delimiter=',') + output.writerow(["Group name", "Category", "Subcategory", + "Group managers", "Regular members", "Read-only members", + "Creation date", "Expiration date", "Has research data", "Has vault data"]) + + def _has_data_to_string(value): + if value is None: + return "N/A" + else: + return "yes" if value else "no" + + for group in sorted(_get_research_groups_list(session)): + attributes = _get_group_attributes(session, group) + category = attributes.get("category", "no category") + subcategory = attributes.get("subcategory", "no subcategory") + group_managers = ";".join( + _get_group_managers( + session, group, attributes)) + regular_members = ";".join( + _get_regular_members( + session, group, attributes)) + readonly_members = ";".join( + _get_readonly_members( + session, group, attributes)) + creation_date = _get_group_creation_date(session, group) + creation_date_str = creation_date.strftime( + "%Y-%m-%d") if creation_date is not None else "N/A" + expiration_date = attributes.get("expiration_date", "N/A") + research_has_data = _has_data_to_string( + _group_research_has_data(session, group)) + vault_has_data = _has_data_to_string( + _group_vault_has_data(session, group)) + output.writerow([group, category, subcategory, + group_managers, regular_members, readonly_members, + creation_date_str, expiration_date, research_has_data, vault_has_data])