From ddd0c83b6be96df40bcb8980bb8f2b8c6e44f94c Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Wed, 22 Dec 2021 16:28:34 +0100 Subject: [PATCH 01/12] Add rudimentary archival test --- tests/factories/user.py | 1 + tests/lib/user/test_deletion.py | 89 +++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 tests/lib/user/test_deletion.py diff --git a/tests/factories/user.py b/tests/factories/user.py index bdfffcc76..b8d509ff2 100644 --- a/tests/factories/user.py +++ b/tests/factories/user.py @@ -32,6 +32,7 @@ class Meta: room = factory.SubFactory(RoomFactory) address = factory.SelfAttribute('room.address') unix_account = None + swdd_person_id = None @classmethod def _create(cls, model_class, *args, **kwargs): diff --git a/tests/lib/user/test_deletion.py b/tests/lib/user/test_deletion.py new file mode 100644 index 000000000..a44017c53 --- /dev/null +++ b/tests/lib/user/test_deletion.py @@ -0,0 +1,89 @@ +# Copyright (c) 2021. The Pycroft Authors. See the AUTHORS file. +# This file is part of the Pycroft project and licensed under the terms of +# the Apache License, Version 2.0. See the LICENSE file for details +from datetime import datetime, date + +import pytest + +from pycroft.helpers.interval import closed, closedopen +from tests.factories import UserFactory, ConfigFactory, MembershipFactory, PropertyGroupFactory, \ + HostFactory + +from pycroft.lib.user_deletion import get_archivable_members + + +@pytest.fixture(scope='module') +def config(module_session): + return ConfigFactory() + + +def test_no_archivable_users(session): + assert get_archivable_members(session) == [] + + +def test_users_without_membership_not_in_list(session): + UserFactory.create_batch(5) + assert get_archivable_members(session) == [] + + +def assert_archivable_members(members, expected_user, expected_end_date): + match members: + case [(user, mem_id, mem_end)]: + assert user == expected_user + assert mem_id is not None + assert mem_end.date() == expected_end_date + case _: + pytest.fail() + + +class TestUserDeletion: + @pytest.fixture(scope='class') + def do_not_archive_group(self, class_session): + return PropertyGroupFactory(granted={'do-not-archive'}) + + @pytest.fixture(scope='class') + def old_user(self, class_session, config, do_not_archive_group): + user = UserFactory.create( + registered_at=datetime(2000, 1, 1), + with_membership=True, + membership__active_during=closed(datetime(2020, 1, 1), datetime(2020, 3, 1)), + membership__group=config.member_group, + ) + MembershipFactory.create( + user=user, group=do_not_archive_group, + active_during=closed(datetime(2000, 1, 1), datetime(2010, 1, 1)) + ) + return user + + @pytest.fixture(scope='class', autouse=True) + def other_users(self, class_session): + return UserFactory.create_batch(5) + + @pytest.fixture + def do_not_archive_membership(self, session, old_user, do_not_archive_group): + return MembershipFactory( + user=old_user, group=do_not_archive_group, + active_during=closedopen(datetime(2020, 1, 1), None), + ) + + def test_old_users_in_deletion_list(self, session, old_user): + members = get_archivable_members(session) + assert_archivable_members(members, old_user, date(2020, 3, 1)) + + def test_user_with_do_not_archive_not_in_list(self, session, old_user, + do_not_archive_membership): + assert get_archivable_members(session) == [] + + @pytest.mark.parametrize('num_hosts', [0, 2]) + def test_user_with_host_in_list(self, session, old_user, num_hosts): + if num_hosts: + HostFactory.create_batch(num_hosts, owner=old_user) + members = get_archivable_members(session) + assert_archivable_members(members, old_user, date(2020, 3, 1)) + + def test_user_with_room_in_list(self, session, old_user): + with session.begin_nested(): + old_user.room = None + session.add(old_user) + members = get_archivable_members(session) + assert_archivable_members(members, old_user, date(2020, 3, 1)) From 8da8f9f5001080e23fc460eabd3f5d1691cce67a Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Mon, 21 Feb 2022 19:28:15 +0100 Subject: [PATCH 02/12] Extract timedelta from archivable membership filter --- pycroft/lib/user_deletion.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pycroft/lib/user_deletion.py b/pycroft/lib/user_deletion.py index bf831f9dd..6de00ba72 100644 --- a/pycroft/lib/user_deletion.py +++ b/pycroft/lib/user_deletion.py @@ -24,12 +24,16 @@ class ArchivableMemberInfo(Protocol): mem_end: datetime -def get_archivable_members(session: Session) -> Sequence[ArchivableMemberInfo]: +def get_archivable_members(session: Session, delta: timedelta = timedelta(days=14)) \ + -> Sequence[ArchivableMemberInfo]: """Return all the users that qualify for being archived right now. Selected are those users - whose last membership in the member_group ended two weeks in the past, - excluding users who currently have the `do-not-archive` property. + + :param session: + :param delta: how far back the end of membership has to lie (positive timedelta). """ # see FunctionElement.over mem_ends_at = func.upper(Membership.active_during) @@ -71,7 +75,7 @@ def get_archivable_members(session: Session) -> Sequence[ArchivableMemberInfo]: # …and use that to filter out the `do-not-archive` occurrences. .filter(CurrentProperty.property_name.is_(None)) .join(User, User.id == last_mem.c.user_id) - .filter(last_mem.c.mem_end < current_timestamp() - timedelta(days=14)) # type: ignore[no-untyped-call] + .filter(last_mem.c.mem_end < current_timestamp() - delta) # type: ignore[no-untyped-call] .order_by(last_mem.c.mem_end) .options(joinedload(User.hosts), # joinedload(User.current_memberships), joinedload(User.account, innerjoin=True), joinedload(User.room), From c0848aced377c5a5d05807618d75462deffbd498 Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Mon, 21 Feb 2022 19:30:45 +0100 Subject: [PATCH 03/12] reintroduce `cast` to silence warning --- pycroft/lib/user_deletion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pycroft/lib/user_deletion.py b/pycroft/lib/user_deletion.py index 6de00ba72..d822220d0 100644 --- a/pycroft/lib/user_deletion.py +++ b/pycroft/lib/user_deletion.py @@ -6,7 +6,7 @@ """ from __future__ import annotations from datetime import timedelta, datetime -from typing import Protocol, Sequence +from typing import Protocol, Sequence, cast from sqlalchemy import func, nulls_last, and_, not_ from sqlalchemy.future import select @@ -82,7 +82,7 @@ def get_archivable_members(session: Session, delta: timedelta = timedelta(days=1 joinedload(User.current_properties_maybe_denied)) ) - return session.execute(stmt).unique().all() + return cast(list[ArchivableMemberInfo], session.execute(stmt).unique().all()) def get_invalidated_archive_memberships() -> list[Membership]: From 74c074f85ac4bdc3981279084e137fdbcb930fc8 Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Mon, 21 Feb 2022 19:43:03 +0100 Subject: [PATCH 04/12] Add test for longer delta in archivable query --- tests/lib/user/test_deletion.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/lib/user/test_deletion.py b/tests/lib/user/test_deletion.py index a44017c53..a23f5645d 100644 --- a/tests/lib/user/test_deletion.py +++ b/tests/lib/user/test_deletion.py @@ -6,10 +6,10 @@ import pytest from pycroft.helpers.interval import closed, closedopen -from tests.factories import UserFactory, ConfigFactory, MembershipFactory, PropertyGroupFactory, \ - HostFactory - from pycroft.lib.user_deletion import get_archivable_members +from tests.factories import UserFactory, ConfigFactory, MembershipFactory, \ + PropertyGroupFactory, \ + HostFactory @pytest.fixture(scope='module') @@ -70,6 +70,10 @@ def test_old_users_in_deletion_list(self, session, old_user): members = get_archivable_members(session) assert_archivable_members(members, old_user, date(2020, 3, 1)) + def test_old_user_not_in_list_with_long_delta(self, session, old_user): + delta = date.today() - date(2020, 1, 1) # before 2020-03-01 + assert get_archivable_members(session, delta) == [] + def test_user_with_do_not_archive_not_in_list(self, session, old_user, do_not_archive_membership): assert get_archivable_members(session) == [] From 014464685e7be706ea3e66623955f8ce5a360703 Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Mon, 21 Feb 2022 21:33:25 +0100 Subject: [PATCH 05/12] Add `archive_users` stub and test --- pycroft/lib/user_deletion.py | 12 ++++++------ tests/factories/log.py | 2 ++ tests/factories/task.py | 9 +++++++-- tests/factories/user.py | 14 ++++++++++++++ tests/lib/user/test_deletion.py | 30 +++++++++++++++++++++++++++++- 5 files changed, 58 insertions(+), 9 deletions(-) diff --git a/pycroft/lib/user_deletion.py b/pycroft/lib/user_deletion.py index d822220d0..937d191f8 100644 --- a/pycroft/lib/user_deletion.py +++ b/pycroft/lib/user_deletion.py @@ -85,10 +85,10 @@ def get_archivable_members(session: Session, delta: timedelta = timedelta(days=1 return cast(list[ArchivableMemberInfo], session.execute(stmt).unique().all()) -def get_invalidated_archive_memberships() -> list[Membership]: - """Get all memberships in `to_be_archived` of users who have an active `do-not-archive` property. - - This can happen if archivability is detected, and later the user becomes a member again, - or if for some reason the user shall not be archived. - """ +def archive_users(session: Session, user_ids: Sequence[int]) -> None: + # todo remove hosts + # todo remove tasks + # todo remove log entries + # todo insert these users into an archival log + # todo add membership in archival group pass diff --git a/tests/factories/log.py b/tests/factories/log.py index 2e2aac955..4b98461ad 100644 --- a/tests/factories/log.py +++ b/tests/factories/log.py @@ -13,6 +13,8 @@ class Meta: message = factory.Faker('paragraph') author = factory.SubFactory(UserFactory) user = factory.SubFactory(UserFactory) + created_at = None + class RoomLogEntryFactory(BaseFactory): class Meta: diff --git a/tests/factories/task.py b/tests/factories/task.py index 7b4ced796..a59e0cbe2 100644 --- a/tests/factories/task.py +++ b/tests/factories/task.py @@ -1,6 +1,6 @@ import datetime -import factory +import factory.fuzzy from pycroft.model.task import UserTask, TaskType, Task, TaskStatus from tests.factories.base import BaseFactory @@ -14,7 +14,7 @@ class TaskFactory(BaseFactory): class Meta: model = Task - type: TaskType = None + type: TaskType = factory.fuzzy.FuzzyChoice(TaskType) due = None parameters_json = None created = None @@ -34,3 +34,8 @@ class Meta: model = UserTask user = factory.SubFactory('tests.factories.UserFactory') + + class Params: + self_created = factory.Trait( + creator=factory.SelfAttribute('user') + ) diff --git a/tests/factories/user.py b/tests/factories/user.py index b8d509ff2..4a15b0750 100644 --- a/tests/factories/user.py +++ b/tests/factories/user.py @@ -65,6 +65,20 @@ class Params: room=None, address=factory.SubFactory('tests.factories.address.AddressFactory'), ) + with_creation_log_entry = factory.Trait( + log_entry=factory.RelatedFactory( + 'tests.factories.log.UserLogEntryFactory', 'user', + created_at=factory.SelfAttribute('..registered_at'), + message="User created", + ) + ) + with_random_task = factory.Trait( + task=factory.RelatedFactory( + 'tests.factories.task.UserTaskFactory', 'user', + self_created=True, + due_yesterday=True, + ) + ) @factory.post_generation def room_history_entries(self, create, extracted, **kwargs): diff --git a/tests/lib/user/test_deletion.py b/tests/lib/user/test_deletion.py index a23f5645d..30ae7e647 100644 --- a/tests/lib/user/test_deletion.py +++ b/tests/lib/user/test_deletion.py @@ -6,7 +6,7 @@ import pytest from pycroft.helpers.interval import closed, closedopen -from pycroft.lib.user_deletion import get_archivable_members +from pycroft.lib.user_deletion import get_archivable_members, archive_users from tests.factories import UserFactory, ConfigFactory, MembershipFactory, \ PropertyGroupFactory, \ HostFactory @@ -91,3 +91,31 @@ def test_user_with_room_in_list(self, session, old_user): session.add(old_user) members = get_archivable_members(session) assert_archivable_members(members, old_user, date(2020, 3, 1)) + + +class TestUserArchival: + @pytest.fixture(scope='class') + def archivable_users(self, class_session, config): + return UserFactory.create_batch( + 3, + with_membership=True, + membership__active_during=closed(datetime(2020, 1, 1), datetime(2020, 3, 1)), + membership__group=config.member_group, + with_host=True, patched=True, + with_creation_log_entry=True, + with_random_task=True, + ) + + @pytest.fixture(scope='class') + def archived_users(self, class_session, archivable_users): + archive_users(class_session, [u.id for u in archivable_users]) + return archivable_users + + @pytest.mark.parametrize('index', [0, 1, 2]) + def test_user_archival(self, archived_users, index): + user = archived_users[index] + assert user.tasks == [], "archival did not delete tasks" + assert [le for le in user.log_entries + if le.created_at == user.registered_at] == [], "archival did not delete logs" + assert user.hosts == [], "archival did not delete hosts" + assert 'archived' in user.current_properties From 2e99189eaaa6decb0d567df1f5c835202de5dafb Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Thu, 4 Aug 2022 02:16:52 +0200 Subject: [PATCH 06/12] Extract last membership select from archive query Introduces `select_users_and_last_mem` returning the `select` --- pycroft/lib/membership.py | 32 +++++++++++++++++++++++++++++++- pycroft/lib/user_deletion.py | 30 ++++-------------------------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/pycroft/lib/membership.py b/pycroft/lib/membership.py index 47402dd63..28669977c 100644 --- a/pycroft/lib/membership.py +++ b/pycroft/lib/membership.py @@ -9,12 +9,15 @@ management. """ +from __future__ import annotations import typing as t -from sqlalchemy import and_, func, distinct, Result +from sqlalchemy import and_, func, distinct, Result, nulls_last from sqlalchemy.future import select from sqlalchemy.orm import aliased +from sqlalchemy.sql import Select, ClauseElement +from pycroft import Config from pycroft.helpers.i18n import deferred_gettext from pycroft.helpers.interval import UnboundedInterval, IntervalSet, Interval from pycroft.helpers.utc import DateTimeTz @@ -203,3 +206,30 @@ def user_memberships_query( .group_by(Membership.id) ) return session.session.execute(memberships) + + +def select_user_and_last_mem() -> Select: # Select[Tuple[int, int, str]] + """Select users with their last membership of a user in the ``member`` group. + + :returns: a select statement with columns ``user_id``, ``mem_id``, ``mem_end``. + """ + mem_ends_at = func.upper(Membership.active_during) + window_args: dict[str, ClauseElement | t.Sequence[ClauseElement | str] | None] = { + "partition_by": User.id, + "order_by": nulls_last(mem_ends_at), + } + return ( + select( + User.id.label("user_id"), + func.last_value(Membership.id) + .over(**window_args, rows=(None, None)) # type: ignore[no-untyped-call] + .label("mem_id"), + func.last_value(mem_ends_at) + .over(**window_args, rows=(None, None)) # type: ignore[no-untyped-call] + .label("mem_end"), + ) + .select_from(User) + .distinct() + .join(Membership) + .join(Config, Config.member_group_id == Membership.group_id) + ) # mypy: ignore[no-untyped-call] diff --git a/pycroft/lib/user_deletion.py b/pycroft/lib/user_deletion.py index 937d191f8..7d421165a 100644 --- a/pycroft/lib/user_deletion.py +++ b/pycroft/lib/user_deletion.py @@ -8,14 +8,14 @@ from datetime import timedelta, datetime from typing import Protocol, Sequence, cast -from sqlalchemy import func, nulls_last, and_, not_ +from sqlalchemy import func, and_, not_ from sqlalchemy.future import select from sqlalchemy.orm import joinedload, Session from sqlalchemy.sql.functions import current_timestamp -from pycroft import Config from pycroft.model.property import CurrentProperty -from pycroft.model.user import User, Membership +from pycroft.model.user import User +from pycroft.lib.membership import select_user_and_last_mem class ArchivableMemberInfo(Protocol): @@ -36,29 +36,7 @@ def get_archivable_members(session: Session, delta: timedelta = timedelta(days=1 :param delta: how far back the end of membership has to lie (positive timedelta). """ # see FunctionElement.over - mem_ends_at = func.upper(Membership.active_during) - window_args = { - 'partition_by': User.id, - 'order_by': nulls_last(mem_ends_at), - } - # mypy: ignore[no-untyped-call] - last_mem = ( - select( - User.id.label('user_id'), - func.last_value(Membership.id) - .over(**window_args, rows=(None, None)) # type: ignore[no-untyped-call] - .label("mem_id"), - func.last_value(mem_ends_at) - .over(**window_args, rows=(None, None)) # type: ignore[no-untyped-call] - .label("mem_end"), - ) - .select_from(User) - .distinct() - .join(Membership) - .join(Config, Config.member_group_id == Membership.group_id) - ).cte( - "last_mem" - ) # mypy: ignore[no-untyped-call] + last_mem = select_user_and_last_mem().cte("last_mem") stmt = ( select( User, From c0f103663c3d3a0b503f8e63afd6eb3969553239 Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Thu, 4 Aug 2022 02:34:09 +0200 Subject: [PATCH 07/12] =?UTF-8?q?Switch=20membership=20selection=20to=20`f?= =?UTF-8?q?rom=20=E2=80=A6=20select`=20style?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This suits the relational algebra pipeline better, as the entities we select from actually correspond to a projection done after all the joins and filtering. This change does not alter the compilation result of this select statement. --- pycroft/lib/membership.py | 12 +++++++----- pycroft/lib/user_deletion.py | 1 - 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pycroft/lib/membership.py b/pycroft/lib/membership.py index 28669977c..5b9d62ae5 100644 --- a/pycroft/lib/membership.py +++ b/pycroft/lib/membership.py @@ -214,12 +214,18 @@ def select_user_and_last_mem() -> Select: # Select[Tuple[int, int, str]] :returns: a select statement with columns ``user_id``, ``mem_id``, ``mem_end``. """ mem_ends_at = func.upper(Membership.active_during) + # see FunctionElement.over for documentation on `partition_by`, `order_by` window_args: dict[str, ClauseElement | t.Sequence[ClauseElement | str] | None] = { "partition_by": User.id, "order_by": nulls_last(mem_ends_at), } return ( - select( + select() + .select_from(User) + .distinct() + .join(Membership) + .join(Config, Config.member_group_id == Membership.group_id) + .add_columns( User.id.label("user_id"), func.last_value(Membership.id) .over(**window_args, rows=(None, None)) # type: ignore[no-untyped-call] @@ -228,8 +234,4 @@ def select_user_and_last_mem() -> Select: # Select[Tuple[int, int, str]] .over(**window_args, rows=(None, None)) # type: ignore[no-untyped-call] .label("mem_end"), ) - .select_from(User) - .distinct() - .join(Membership) - .join(Config, Config.member_group_id == Membership.group_id) ) # mypy: ignore[no-untyped-call] diff --git a/pycroft/lib/user_deletion.py b/pycroft/lib/user_deletion.py index 7d421165a..4e5eb6179 100644 --- a/pycroft/lib/user_deletion.py +++ b/pycroft/lib/user_deletion.py @@ -35,7 +35,6 @@ def get_archivable_members(session: Session, delta: timedelta = timedelta(days=1 :param session: :param delta: how far back the end of membership has to lie (positive timedelta). """ - # see FunctionElement.over last_mem = select_user_and_last_mem().cte("last_mem") stmt = ( select( From e509d767654b0ed7005f3dfcef47324c2a2056a2 Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Thu, 4 Aug 2022 02:48:33 +0200 Subject: [PATCH 08/12] Decouple archivable users select statement from execution --- pycroft/lib/user_deletion.py | 53 +++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/pycroft/lib/user_deletion.py b/pycroft/lib/user_deletion.py index 4e5eb6179..4b9d807d9 100644 --- a/pycroft/lib/user_deletion.py +++ b/pycroft/lib/user_deletion.py @@ -11,6 +11,7 @@ from sqlalchemy import func, and_, not_ from sqlalchemy.future import select from sqlalchemy.orm import joinedload, Session +from sqlalchemy.sql import Select from sqlalchemy.sql.functions import current_timestamp from pycroft.model.property import CurrentProperty @@ -24,19 +25,10 @@ class ArchivableMemberInfo(Protocol): mem_end: datetime -def get_archivable_members(session: Session, delta: timedelta = timedelta(days=14)) \ - -> Sequence[ArchivableMemberInfo]: - """Return all the users that qualify for being archived right now. - - Selected are those users - - whose last membership in the member_group ended two weeks in the past, - - excluding users who currently have the `do-not-archive` property. - - :param session: - :param delta: how far back the end of membership has to lie (positive timedelta). - """ +def select_archivable_members(delta: timedelta) -> Select: # Select[Tuple[User, int, datetime]] + # last_mem: (user_id, mem_id, mem_end) last_mem = select_user_and_last_mem().cte("last_mem") - stmt = ( + return ( select( User, last_mem.c.mem_id, @@ -54,12 +46,41 @@ def get_archivable_members(session: Session, delta: timedelta = timedelta(days=1 .join(User, User.id == last_mem.c.user_id) .filter(last_mem.c.mem_end < current_timestamp() - delta) # type: ignore[no-untyped-call] .order_by(last_mem.c.mem_end) - .options(joinedload(User.hosts), # joinedload(User.current_memberships), - joinedload(User.account, innerjoin=True), joinedload(User.room), - joinedload(User.current_properties_maybe_denied)) + ) - return cast(list[ArchivableMemberInfo], session.execute(stmt).unique().all()) + +def get_archivable_members( + session: Session, delta: timedelta = timedelta(days=14) +) -> Sequence[ArchivableMemberInfo]: + """Return all the users that qualify for being archived right now. + + Selected are those users + - whose last membership in the member_group ended two weeks in the past, + - excluding users who currently have the `do-not-archive` property. + + We joined load the following information: + - hosts + - account + - room + - current_properties_maybe_denied + + :param session: + :param delta: how far back the end of membership has to lie (positive timedelta). + """ + return cast( + list[ArchivableMemberInfo], + session.execute( + select_archivable_members(delta) + .options( + joinedload(User.hosts), + # joinedload(User.current_memberships), + joinedload(User.account, innerjoin=True), + joinedload(User.room), + joinedload(User.current_properties_maybe_denied), + ) + ).unique().all(), + ) def archive_users(session: Session, user_ids: Sequence[int]) -> None: From cee036d5a17c40fc29fcdfcffe055810e2d88804 Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Thu, 4 Aug 2022 02:56:37 +0200 Subject: [PATCH 09/12] =?UTF-8?q?Switch=20archival=20query=20to=20`from=20?= =?UTF-8?q?=E2=80=A6=20select`=20style?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pycroft/lib/user_deletion.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pycroft/lib/user_deletion.py b/pycroft/lib/user_deletion.py index 4b9d807d9..4d319281b 100644 --- a/pycroft/lib/user_deletion.py +++ b/pycroft/lib/user_deletion.py @@ -29,11 +29,7 @@ def select_archivable_members(delta: timedelta) -> Select: # Select[Tuple[User, # last_mem: (user_id, mem_id, mem_end) last_mem = select_user_and_last_mem().cte("last_mem") return ( - select( - User, - last_mem.c.mem_id, - last_mem.c.mem_end, - ) + select() .select_from(last_mem) # Join the granted `do-not-archive` property, if existent .join(CurrentProperty, @@ -46,7 +42,11 @@ def select_archivable_members(delta: timedelta) -> Select: # Select[Tuple[User, .join(User, User.id == last_mem.c.user_id) .filter(last_mem.c.mem_end < current_timestamp() - delta) # type: ignore[no-untyped-call] .order_by(last_mem.c.mem_end) - + .add_columns( + User, + last_mem.c.mem_id, + last_mem.c.mem_end, + ) ) From e39606b1172d9c732009c898b0f6c289c435a3af Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Thu, 4 Aug 2022 20:36:42 +0200 Subject: [PATCH 10/12] Add comment explaining window args --- pycroft/lib/membership.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pycroft/lib/membership.py b/pycroft/lib/membership.py index 5b9d62ae5..908c85e60 100644 --- a/pycroft/lib/membership.py +++ b/pycroft/lib/membership.py @@ -215,6 +215,8 @@ def select_user_and_last_mem() -> Select: # Select[Tuple[int, int, str]] """ mem_ends_at = func.upper(Membership.active_during) # see FunctionElement.over for documentation on `partition_by`, `order_by` + # ideally, sqlalchemy would support named windows; + # instead, we have to re-use the arguments. window_args: dict[str, ClauseElement | t.Sequence[ClauseElement | str] | None] = { "partition_by": User.id, "order_by": nulls_last(mem_ends_at), From 9c9a2e835644997acc023a3a3ef399dfcaf7b0ae Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Fri, 5 Aug 2022 01:08:28 +0200 Subject: [PATCH 11/12] Switch archivable member selection from delta to year-based cutoff Fixes an oversight in the previous implementation of #470 Also, expand on the test suite substantially while retaining a decent runtime. Refs #67 --- pycroft/lib/user_deletion.py | 23 ++++++--- tests/lib/user/test_deletion.py | 88 +++++++++++++++++++++++---------- 2 files changed, 77 insertions(+), 34 deletions(-) diff --git a/pycroft/lib/user_deletion.py b/pycroft/lib/user_deletion.py index 4d319281b..c976b1109 100644 --- a/pycroft/lib/user_deletion.py +++ b/pycroft/lib/user_deletion.py @@ -5,14 +5,13 @@ This module contains methods concerning user archival and deletion. """ from __future__ import annotations -from datetime import timedelta, datetime +from datetime import datetime from typing import Protocol, Sequence, cast from sqlalchemy import func, and_, not_ from sqlalchemy.future import select from sqlalchemy.orm import joinedload, Session from sqlalchemy.sql import Select -from sqlalchemy.sql.functions import current_timestamp from pycroft.model.property import CurrentProperty from pycroft.model.user import User @@ -25,7 +24,9 @@ class ArchivableMemberInfo(Protocol): mem_end: datetime -def select_archivable_members(delta: timedelta) -> Select: # Select[Tuple[User, int, datetime]] +def select_archivable_members( + current_year: int, +) -> Select: # Select[Tuple[User, int, datetime]] # last_mem: (user_id, mem_id, mem_end) last_mem = select_user_and_last_mem().cte("last_mem") return ( @@ -40,7 +41,7 @@ def select_archivable_members(delta: timedelta) -> Select: # Select[Tuple[User, # …and use that to filter out the `do-not-archive` occurrences. .filter(CurrentProperty.property_name.is_(None)) .join(User, User.id == last_mem.c.user_id) - .filter(last_mem.c.mem_end < current_timestamp() - delta) # type: ignore[no-untyped-call] + .filter(func.extract("year", last_mem.c.mem_end) + 2 <= current_year) # type: ignore[no-untyped-call] .order_by(last_mem.c.mem_end) .add_columns( User, @@ -51,7 +52,8 @@ def select_archivable_members(delta: timedelta) -> Select: # Select[Tuple[User, def get_archivable_members( - session: Session, delta: timedelta = timedelta(days=14) + session: Session, + current_year: int | None = None, ) -> Sequence[ArchivableMemberInfo]: """Return all the users that qualify for being archived right now. @@ -66,13 +68,18 @@ def get_archivable_members( - current_properties_maybe_denied :param session: - :param delta: how far back the end of membership has to lie (positive timedelta). + :param current_year: dependency injection of the current year. + defaults to the current year. """ return cast( list[ArchivableMemberInfo], session.execute( - select_archivable_members(delta) - .options( + select_archivable_members( + # I know we're sloppy with time zones, + # but ±2h around new year's eve don't matter. + current_year=current_year + or datetime.now().year + ).options( joinedload(User.hosts), # joinedload(User.current_memberships), joinedload(User.account, innerjoin=True), diff --git a/tests/lib/user/test_deletion.py b/tests/lib/user/test_deletion.py index 30ae7e647..dae88b8d1 100644 --- a/tests/lib/user/test_deletion.py +++ b/tests/lib/user/test_deletion.py @@ -2,11 +2,18 @@ # This file is part of the Pycroft project and licensed under the terms of # the Apache License, Version 2.0. See the LICENSE file for details from datetime import datetime, date +from typing import Sequence import pytest from pycroft.helpers.interval import closed, closedopen -from pycroft.lib.user_deletion import get_archivable_members, archive_users +from pycroft.helpers.utc import with_min_time +from pycroft.lib.user_deletion import ( + get_archivable_members, + archive_users, + ArchivableMemberInfo, +) +from pycroft.model.user import User from tests.factories import UserFactory, ConfigFactory, MembershipFactory, \ PropertyGroupFactory, \ HostFactory @@ -26,27 +33,49 @@ def test_users_without_membership_not_in_list(session): assert get_archivable_members(session) == [] -def assert_archivable_members(members, expected_user, expected_end_date): - match members: - case [(user, mem_id, mem_end)]: - assert user == expected_user - assert mem_id is not None - assert mem_end.date() == expected_end_date - case _: - pytest.fail() +def filter_members(members, user): + return [(u, id, end) for u, id, end in members if u == user] -class TestUserDeletion: +def assert_member_present( + members: Sequence[ArchivableMemberInfo], + expected_user: User, + expected_end_date: date, +): + relevant_members = filter_members(members, expected_user) + assert len(relevant_members) == 1 + [(_, mem_id, mem_end)] = relevant_members + assert mem_id is not None + assert mem_end.date() == expected_end_date + + +def assert_member_absent( + members: Sequence[ArchivableMemberInfo], + expected_absent_user: User, +): + assert not filter_members(members, expected_absent_user) + + +class TestArchivableUserSelection: + @pytest.fixture( + scope="class", + params=[date(2020, 3, 1), date(2020, 1, 1), date(2020, 12, 15)], + ) + def end_date(self, request): + return request.param + @pytest.fixture(scope='class') def do_not_archive_group(self, class_session): return PropertyGroupFactory(granted={'do-not-archive'}) - @pytest.fixture(scope='class') - def old_user(self, class_session, config, do_not_archive_group): + @pytest.fixture(scope="class") + def old_user(self, class_session, config, do_not_archive_group, end_date) -> User: user = UserFactory.create( registered_at=datetime(2000, 1, 1), with_membership=True, - membership__active_during=closed(datetime(2020, 1, 1), datetime(2020, 3, 1)), + membership__active_during=closed( + with_min_time(date(2020, 1, 1)), with_min_time(end_date) + ), membership__group=config.member_group, ) MembershipFactory.create( @@ -66,31 +95,38 @@ def do_not_archive_membership(self, session, old_user, do_not_archive_group): active_during=closedopen(datetime(2020, 1, 1), None), ) - def test_old_users_in_deletion_list(self, session, old_user): - members = get_archivable_members(session) - assert_archivable_members(members, old_user, date(2020, 3, 1)) + @pytest.mark.parametrize("year", [2022, 2023, 2024]) + def test_old_users_in_deletion_list_after(self, session, old_user, year, end_date): + members = get_archivable_members(session, current_year=year) + assert_member_present(members, old_user, end_date) - def test_old_user_not_in_list_with_long_delta(self, session, old_user): - delta = date.today() - date(2020, 1, 1) # before 2020-03-01 - assert get_archivable_members(session, delta) == [] + @pytest.mark.parametrize("year", [2019, 2020, 2021]) + def test_old_user_not_in_list_before(self, session, old_user, year): + assert_member_absent( + get_archivable_members(session, current_year=year), old_user + ) - def test_user_with_do_not_archive_not_in_list(self, session, old_user, - do_not_archive_membership): - assert get_archivable_members(session) == [] + @pytest.mark.parametrize("year", list(range(2018, 2023))) + def test_user_with_do_not_archive_not_in_list( + self, session, old_user, do_not_archive_membership, year + ): + assert_member_absent( + get_archivable_members(session, current_year=year), old_user + ) @pytest.mark.parametrize('num_hosts', [0, 2]) - def test_user_with_host_in_list(self, session, old_user, num_hosts): + def test_user_with_host_in_list(self, session, old_user, num_hosts, end_date): if num_hosts: HostFactory.create_batch(num_hosts, owner=old_user) members = get_archivable_members(session) - assert_archivable_members(members, old_user, date(2020, 3, 1)) + assert_member_present(members, old_user, end_date) - def test_user_with_room_in_list(self, session, old_user): + def test_user_with_room_in_list(self, session, old_user, end_date): with session.begin_nested(): old_user.room = None session.add(old_user) members = get_archivable_members(session) - assert_archivable_members(members, old_user, date(2020, 3, 1)) + assert_member_present(members, old_user, end_date) class TestUserArchival: From f715e29d3e4fa8738db7f67986f1cd68ad5ea4b8 Mon Sep 17 00:00:00 2001 From: Lukas Juhrich Date: Fri, 5 Aug 2022 01:13:54 +0200 Subject: [PATCH 12/12] Improve archivability test performance 1. We restrict the ranges to a smaller subset which is still meaningful. 2. We use the `select` statement itself rather than the wrapper method adding a ton of `joinedload`s. --- tests/lib/user/test_deletion.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/lib/user/test_deletion.py b/tests/lib/user/test_deletion.py index dae88b8d1..413f0703c 100644 --- a/tests/lib/user/test_deletion.py +++ b/tests/lib/user/test_deletion.py @@ -9,7 +9,7 @@ from pycroft.helpers.interval import closed, closedopen from pycroft.helpers.utc import with_min_time from pycroft.lib.user_deletion import ( - get_archivable_members, + select_archivable_members, archive_users, ArchivableMemberInfo, ) @@ -19,6 +19,11 @@ HostFactory +def get_archivable_members(session, current_year=2022): + """Like `get_archivable_members`, just without all the joinedloads.""" + return session.execute(select_archivable_members(current_year)).all() + + @pytest.fixture(scope='module') def config(module_session): return ConfigFactory() @@ -95,7 +100,7 @@ def do_not_archive_membership(self, session, old_user, do_not_archive_group): active_during=closedopen(datetime(2020, 1, 1), None), ) - @pytest.mark.parametrize("year", [2022, 2023, 2024]) + @pytest.mark.parametrize("year", [2022, 2023]) def test_old_users_in_deletion_list_after(self, session, old_user, year, end_date): members = get_archivable_members(session, current_year=year) assert_member_present(members, old_user, end_date) @@ -106,7 +111,7 @@ def test_old_user_not_in_list_before(self, session, old_user, year): get_archivable_members(session, current_year=year), old_user ) - @pytest.mark.parametrize("year", list(range(2018, 2023))) + @pytest.mark.parametrize("year", list(range(2019, 2023))) def test_user_with_do_not_archive_not_in_list( self, session, old_user, do_not_archive_membership, year ):