Skip to content

Commit

Permalink
YDA-5600 create group lifecycle management report
Browse files Browse the repository at this point in the history
  • Loading branch information
stsnel committed Feb 16, 2024
1 parent e9cbbbd commit 5df2cd3
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 0 deletions.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,23 @@ List of columns if --by-extension is enabled:
4. Extension
5. Name of collection
### yreport\_grouplifecycle
```
usage: yreport_grouplifecycle [-h] [-q] [-y {1.7,1.8,1.9}]

Generates a list of groups, along with their creation date, expiration date
(if available), as well as whether the research and vault compartment contain
any collections or data objects .

optional arguments:
-h, --help show this help message and exit
-q, --quasi-xml Enable Quasi-XML parser in order to be able to parse
characters not supported by regular XML parser
-y {1.7,1.8,1.9}, --yoda-version {1.7,1.8,1.9}
Override Yoda version on the server
```
### yreport\_intake
Prints an intake collection report. This report is only relevant for environments
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
'console_scripts': [
'yreport_dataobjectspercollection= yclienttools.reportdoc:entry',
'yreport_collectionsize= yclienttools.reportsize:entry',
'yreport_grouplifecycle= yclienttools.reportgrouplifecycle:entry',
'yreport_intake= yclienttools.reportintake:entry',
'yreport_linecount= yclienttools.reportlinecount:entry',
'ycleanup_files= yclienttools.cleanupfiles:entry',
Expand Down
160 changes: 160 additions & 0 deletions yclienttools/reportgrouplifecycle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
'''Generates a list of groups, along with their creation date, expiration date (if available),
as well as whether the research and vault compartment contain any collections or data objects .'''

import argparse
import csv
import itertools
import sys

from irods.column import Like
from irods.message import (XML_Parser_Type, ET)
from irods.models import Collection, DataObject, User
from yclienttools import common_args, common_config
from yclienttools import session as s


def entry():
'''Entry point'''
try:
args = _get_args()
yoda_version = args.yoda_version if args.yoda_version is not None else common_config.get_default_yoda_version()
session = s.setup_session(yoda_version)
if args.quasi_xml:
ET(XML_Parser_Type.QUASI_XML, session.server_version)
report_groups_lifecycle(args, session)
session.cleanup()

except KeyboardInterrupt:
print("Script interrupted by user.\n", file=sys.stderr)


def _get_args():
'''Parse command line arguments'''
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("-q", "--quasi-xml", default=False, action='store_true',
help='Enable Quasi-XML parser in order to be able to parse characters not supported by regular XML parser')
common_args.add_default_args(parser)
return parser.parse_args()


def _get_group_attributes(session, group_name):
"""Retrieves a dictionary of attribute-values of group metadata.
This assumes attribute names are unique.
"""
relevant_single_attributes = {"category", "subcategory", "expiration_date"}
relevant_multiple_attributes = {"manager"}
result = dict()
group_objects = list(session.query(User).filter(
User.name == group_name).filter(
User.type == "rodsgroup").get_results())

if len(group_objects) > 0:
for attribute in relevant_multiple_attributes:
result[attribute] = []
for group_object in group_objects:
obj = session.users.get(group_object[User.name])
avus = obj.metadata.items()
for avu in avus:
if avu.name in relevant_single_attributes:
result[avu.name] = avu.value
elif avu.name in relevant_multiple_attributes:
result[avu.name].append(avu.value)

return result


def _group_research_has_data(session, group_name):
"""Returns boolean that indicates whether the research compartment of
the group has any data (i.e. data objects or subcollections).
if the group has no research department, None is returned.
"""
research_collection = f"/{session.zone}/home/{group_name}"
return _collection_has_data(session, research_collection)


def _group_vault_has_data(session, group_name):
"""Returns boolean that indicates whether the vault compartment of
the group has any data (i.e. data objects or subcollections).
If the group has no vault compartment, None is returned.
"""
vault_collection = f"/{session.zone}/home/{group_name}".replace(
"research-", "vault-", 1)
return _collection_has_data(session, vault_collection)


def _collection_has_data(session, coll_name):
root_data_objects = session.query(Collection.name, DataObject.name).filter(
Collection.name == coll_name).get_results()
sub_data_objects = session.query(Collection.name, DataObject.name).filter(
Like(Collection.name, coll_name + "/%")).get_results()
sub_data_collections = session.query(Collection.name).filter(
Like(Collection.name, coll_name + "/%")).get_results()
return len(list(itertools.chain(root_data_objects,
sub_data_objects,
sub_data_collections)))


def _get_group_creation_date(session, group_name):
create_times = list(session.query(
User.create_time).filter(
User.name == group_name).get_results())
return create_times[0][User.create_time] if len(create_times) else None


def _get_research_groups_list(session):
groups = session.query(User).filter(User.type == 'rodsgroup').get_results()
return [x[User.name]
for x in groups if x[User.name].startswith("research-")]


def _get_regular_members(session, group_name, attributes):
members_and_managers = session.user_groups.getmembers(group_name)
return [
member.name for member in members_and_managers if member.name + "#" + session.zone not in attributes["manager"]]


def _get_readonly_members(session, group_name, attributes):
readonly_group = group_name.replace("research-", "read-", 1)
return [u.name for u in session.user_groups.getmembers(readonly_group)]


def _get_group_managers(session, group_name, attributes):
return [manager.split("#")[0] for manager in attributes["manager"]]


def report_groups_lifecycle(args, session):
output = csv.writer(sys.stdout, delimiter=',')
output.writerow(["Group name", "Category", "Subcategory",
"Group managers", "Regular members", "Read-only members",
"Creation date", "Expiration date", "Has research data", "Has vault data"])

def _has_data_to_string(value):
if value is None:
return "N/A"
else:
return "yes" if value else "no"

for group in sorted(_get_research_groups_list(session)):
attributes = _get_group_attributes(session, group)
category = attributes.get("category", "no category")
subcategory = attributes.get("subcategory", "no subcategory")
group_managers = ";".join(
_get_group_managers(
session, group, attributes))
regular_members = ";".join(
_get_regular_members(
session, group, attributes))
readonly_members = ";".join(
_get_readonly_members(
session, group, attributes))
creation_date = _get_group_creation_date(session, group)
creation_date_str = creation_date.strftime(
"%Y-%m-%d") if creation_date is not None else "N/A"
expiration_date = attributes.get("expiration_date", "N/A")
research_has_data = _has_data_to_string(
_group_research_has_data(session, group))
vault_has_data = _has_data_to_string(
_group_vault_has_data(session, group))
output.writerow([group, category, subcategory,
group_managers, regular_members, readonly_members,
creation_date_str, expiration_date, research_has_data, vault_has_data])

0 comments on commit 5df2cd3

Please sign in to comment.