Skip to content

Commit

Permalink
YDA-5600 create group lifecycle management report
Browse files Browse the repository at this point in the history
  • Loading branch information
stsnel committed Feb 16, 2024
1 parent e9cbbbd commit 88497b4
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 0 deletions.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,23 @@ List of columns if --by-extension is enabled:
4. Extension
5. Name of collection
### yreport\_grouplifecycle
```
usage: yreport_grouplifecycle [-h] [-q] [-y {1.7,1.8,1.9}]

Generates a list of groups, along with their creation date, expiration date
(if available), as well as whether the research and vault compartment contain
any collections or data objects .

optional arguments:
-h, --help show this help message and exit
-q, --quasi-xml Enable Quasi-XML parser in order to be able to parse
characters not supported by regular XML parser
-y {1.7,1.8,1.9}, --yoda-version {1.7,1.8,1.9}
Override Yoda version on the server
```
### yreport\_intake
Prints an intake collection report. This report is only relevant for environments
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
'console_scripts': [
'yreport_dataobjectspercollection= yclienttools.reportdoc:entry',
'yreport_collectionsize= yclienttools.reportsize:entry',
'yreport_grouplifecycle= yclienttools.reportgrouplifecycle:entry',
'yreport_intake= yclienttools.reportintake:entry',
'yreport_linecount= yclienttools.reportlinecount:entry',
'ycleanup_files= yclienttools.cleanupfiles:entry',
Expand Down
129 changes: 129 additions & 0 deletions yclienttools/reportgrouplifecycle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
'''Generates a list of groups, along with their creation date, expiration date (if available),
as well as whether the research and vault compartment contain any collections or data objects .'''

import argparse
import csv
import itertools
import sys

from irods.column import Like
from irods.message import (XML_Parser_Type, ET)
from irods.models import Collection, DataObject, User
from yclienttools import common_args, common_config
from yclienttools import session as s


def entry():
'''Entry point'''
try:
args = _get_args()
yoda_version = args.yoda_version if args.yoda_version is not None else common_config.get_default_yoda_version()
session = s.setup_session(yoda_version)
if args.quasi_xml:
ET(XML_Parser_Type.QUASI_XML, session.server_version)
report_groups_lifecycle(args, session)
session.cleanup()

except KeyboardInterrupt:
print("Script interrupted by user.\n", file=sys.stderr)


def _get_args():
'''Parse command line arguments'''
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("-q", "--quasi-xml", default=False, action='store_true',
help='Enable Quasi-XML parser in order to be able to parse characters not supported by regular XML parser')
common_args.add_default_args(parser)
return parser.parse_args()


def _get_group_attributes(session, group_name):
"""Retrieves a dictionary of attribute-values of group metadata.
This assumes attribute names are unique.
"""
relevant_attributes = {"category", "subcategory", "expiration_date"}
result = dict()
group_objects = list(session.query(User).filter(
User.name == group_name).filter(
User.type == "rodsgroup").get_results())

if len(group_objects) > 0:
for group_object in group_objects:
obj = session.users.get(group_object[User.name])
avus = obj.metadata.items()
for avu in avus:
if avu.name in relevant_attributes:
result[avu.name] = avu.value

return result


def _group_research_has_data(session, group_name):
"""Returns boolean that indicates whether the research compartment of
the group has any data (i.e. data objects or subcollections).
if the group has no research department, None is returned.
"""
research_collection = f"/{session.zone}/home/{group_name}"
return _collection_has_data(session, research_collection)


def _group_vault_has_data(session, group_name):
"""Returns boolean that indicates whether the vault compartment of
the group has any data (i.e. data objects or subcollections).
If the group has no vault compartment, None is returned.
"""
vault_collection = f"/{session.zone}/home/{group_name}".replace(
"research-", "vault-", 1)
return _collection_has_data(session, vault_collection)


def _collection_has_data(session, coll_name):
root_data_objects = session.query(Collection.name, DataObject.name).filter(
Collection.name == coll_name).get_results()
sub_data_objects = session.query(Collection.name, DataObject.name).filter(
Like(Collection.name, coll_name + "/%")).get_results()
sub_data_collections = session.query(Collection.name).filter(
Like(Collection.name, coll_name + "/%")).get_results()
return len(list(itertools.chain(root_data_objects,
sub_data_objects,
sub_data_collections)))


def _get_group_creation_date(session, group_name):
create_times = list(session.query(
User.create_time).filter(
User.name == group_name).get_results())
return create_times[0][User.create_time] if len(create_times) else None


def _get_research_groups_list(session):
groups = session.query(User).filter(User.type == 'rodsgroup').get_results()
return [x[User.name]
for x in groups if x[User.name].startswith("research-")]


def report_groups_lifecycle(args, session):
output = csv.writer(sys.stdout, delimiter=',')
output.writerow(["Group name", "Category", "Subcategory", "Creation date", "Expiration date",
"Has research data", "Has vault data"])

def _has_data_to_string(value):
if value is None:
return "N/A"
else:
return "yes" if value else "no"

for group in sorted(_get_research_groups_list(session)):
attributes = _get_group_attributes(session, group)
category = attributes.get("category", "no category")
subcategory = attributes.get("subcategory", "no subcategory")
creation_date = _get_group_creation_date(session, group)
creation_date_str = creation_date.strftime(
"%Y-%m-%d") if creation_date is not None else "N/A"
expiration_date = attributes.get("expiration_date", "N/A")
research_has_data = _has_data_to_string(
_group_research_has_data(session, group))
vault_has_data = _has_data_to_string(
_group_vault_has_data(session, group))
output.writerow([group, category, subcategory, creation_date_str, expiration_date,
research_has_data, vault_has_data])

0 comments on commit 88497b4

Please sign in to comment.