Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[24.2] Better cleanup of sharing roles on user purge #19096

Merged
merged 16 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
VENV?=.venv
# Source virtualenv to execute command (darker, sphinx, twine, etc...)
IN_VENV=if [ -f "$(VENV)/bin/activate" ]; then . "$(VENV)/bin/activate"; fi;
RELEASE_CURR:=24.2
RELEASE_CURR:=25.0
RELEASE_UPSTREAM:=upstream
CONFIG_MANAGE=$(IN_VENV) python lib/galaxy/config/config_manage.py
PROJECT_URL?=https://github.com/galaxyproject/galaxy
Expand Down
2 changes: 1 addition & 1 deletion client/src/api/schema/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4804,7 +4804,7 @@ export interface paths {
};
/**
* Get User Roles
* @description Return a collection of roles associated with this user. Only admins can see user roles.
* @description Return a list of roles associated with this user. Only admins can see user roles.
*/
get: operations["get_user_roles_api_users__user_id__roles_get"];
put?: never;
Expand Down
15 changes: 7 additions & 8 deletions lib/galaxy/datatypes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
FILENAME_VALID_CHARS,
inflector,
iter_start_of_line,
to_content_disposition,
unicodify,
UNKNOWN,
)
Expand Down Expand Up @@ -437,7 +438,7 @@ def _serve_raw(
element_identifier=kwd.get("element_identifier"),
filename_pattern=kwd.get("filename_pattern"),
)
headers["Content-Disposition"] = f'attachment; filename="{filename}"'
headers["Content-Disposition"] = to_content_disposition(filename)
return open(dataset.get_file_name(), mode="rb"), headers

def to_archive(self, dataset: DatasetProtocol, name: str = "") -> Iterable:
Expand Down Expand Up @@ -483,7 +484,7 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):
headers["content-type"] = (
"application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename
)
headers["Content-Disposition"] = f'attachment; filename="{filename}"'
headers["Content-Disposition"] = to_content_disposition(filename)
return open(data.get_file_name(), "rb"), headers

def _serve_binary_file_contents_as_text(self, trans, data, headers, file_size, max_peek_size):
Expand Down Expand Up @@ -660,16 +661,13 @@ def _download_filename(
element_identifier: Optional[str] = None,
filename_pattern: Optional[str] = None,
) -> str:
def escape(raw_identifier):
return "".join(c in FILENAME_VALID_CHARS and c or "_" for c in raw_identifier)[0:150]

if not to_ext or to_ext == "data":
# If a client requests to_ext with the extension 'data', they are
# deferring to the server, set it based on datatype.
to_ext = dataset.extension

template_values = {
"name": escape(dataset.name),
"name": dataset.name,
"ext": to_ext,
"hid": dataset.hid,
}
Expand All @@ -682,8 +680,9 @@ def escape(raw_identifier):

if hdca is not None:
# Use collection context to build up filename.
template_values["element_identifier"] = element_identifier
template_values["hdca_name"] = escape(hdca.name)
if element_identifier is not None:
template_values["element_identifier"] = element_identifier
template_values["hdca_name"] = hdca.name
template_values["hdca_hid"] = hdca.hid

return string.Template(filename_pattern).substitute(**template_values)
Expand Down
6 changes: 2 additions & 4 deletions lib/galaxy/managers/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
)
from galaxy.model.base import transaction
from galaxy.model.db.user import (
_cleanup_nonprivate_user_roles,
get_user_by_email,
get_user_by_username,
)
Expand Down Expand Up @@ -214,10 +215,7 @@ def purge(self, user, flush=True):
# Delete UserGroupAssociations
for uga in user.groups:
self.session().delete(uga)
# Delete UserRoleAssociations EXCEPT FOR THE PRIVATE ROLE
for ura in user.roles:
if ura.role_id != private_role.id:
self.session().delete(ura)
_cleanup_nonprivate_user_roles(self.session(), user, private_role.id)
# Delete UserAddresses
for address in user.addresses:
self.session().delete(address)
Expand Down
28 changes: 27 additions & 1 deletion lib/galaxy/model/db/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
true,
)

from galaxy.model import User
from galaxy.model import (
Role,
User,
)
from galaxy.model.scoped_session import galaxy_scoped_session


Expand Down Expand Up @@ -64,3 +67,26 @@ def get_users_for_index(
else:
stmt = stmt.where(User.deleted == false())
return session.scalars(stmt).all()


def _cleanup_nonprivate_user_roles(session, user, private_role_id):
"""
Delete UserRoleAssociations EXCEPT FOR THE PRIVATE ROLE;
Delete sharing roles that are associated with this user only;
Remove user email from sharing role names associated with multiple users.

Note: this method updates the session without flushing or committing.
"""
user_roles = [ura for ura in user.roles if ura.role_id != private_role_id]
for user_role_assoc in user_roles:
role = user_role_assoc.role
if role.type == Role.types.SHARING:
if len(role.users) == 1:
# This role is associated with this user only, so we can delete it
session.delete(role)
elif user.email in role.name:
# Remove user email from sharing role's name
role.name = role.name.replace(user.email, "[USER PURGED]")
session.add(role)
# Delete user role association
session.delete(user_role_assoc)
9 changes: 9 additions & 0 deletions lib/galaxy/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
Union,
)
from urllib.parse import (
quote,
urlencode,
urlparse,
urlsplit,
Expand Down Expand Up @@ -2006,3 +2007,11 @@ def lowercase_alphanum_to_hex(lowercase_alphanum: str) -> str:
import numpy as np

return np.base_repr(int(lowercase_alphanum, 36), 16).lower()


def to_content_disposition(target: str) -> str:
filename, ext = os.path.splitext(target)
character_limit = 255 - len(ext)
sanitized_filename = "".join(c in FILENAME_VALID_CHARS and c or "_" for c in filename)[0:character_limit] + ext
utf8_encoded_filename = quote(re.sub(r'[\/\\\?%*:|"<>]', "_", filename), safe="")[0:character_limit] + ext
return f"attachment; filename=\"{sanitized_filename}\"; filename*=UTF-8''{utf8_encoded_filename}"
4 changes: 2 additions & 2 deletions lib/galaxy/util/zipstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import zipstream

from galaxy.util import to_content_disposition
from .path import safe_walk

CRC32_MIN = 1444
Expand Down Expand Up @@ -41,8 +42,7 @@ def response(self) -> Iterator[bytes]:
def get_headers(self) -> Dict[str, str]:
headers = {}
if self.archive_name:
archive_name = self.archive_name.encode("latin-1", "replace").decode("latin-1")
headers["Content-Disposition"] = f'attachment; filename="{archive_name}.zip"'
headers["Content-Disposition"] = to_content_disposition(f"{self.archive_name}.zip")
if self.upstream_mod_zip:
headers["X-Archive-Files"] = "zip"
else:
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
VERSION_MAJOR = "24.2"
VERSION_MINOR = "rc1"
VERSION_MAJOR = "25.0"
VERSION_MINOR = "dev0"
VERSION = VERSION_MAJOR + (f".{VERSION_MINOR}" if VERSION_MINOR else "")
2 changes: 1 addition & 1 deletion lib/galaxy/webapps/galaxy/api/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ def send_activation_email(
@router.get(
"/api/users/{user_id}/roles",
name="get user roles",
description="Return a collection of roles associated with this user. Only admins can see user roles.",
description="Return a list of roles associated with this user. Only admins can see user roles.",
require_admin=True,
)
def get_user_roles(
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy_test/api/test_dataset_collections.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import zipfile
from io import BytesIO
from typing import List
from urllib.parse import quote

from galaxy.util.unittest_utils import skip_if_github_down
from galaxy_test.base.api_asserts import assert_object_id_error
Expand Down Expand Up @@ -189,6 +190,7 @@ def test_download_non_english_characters(self):
hdca_id = self.dataset_populator.fetch(payload, wait=True).json()["outputs"][0]["id"]
create_response = self._download_dataset_collection(history_id=history_id, hdca_id=hdca_id)
self._assert_status_code_is(create_response, 200)
assert quote(name, safe="") in create_response.headers["Content-Disposition"]

@requires_new_user
def test_hda_security(self):
Expand Down
8 changes: 8 additions & 0 deletions lib/galaxy_test/api/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Dict,
List,
)
from urllib.parse import quote

from galaxy.model.unittest_utils.store_fixtures import (
deferred_hda_model_store_dict,
Expand Down Expand Up @@ -897,3 +898,10 @@ def test_cannot_update_datatype_on_immutable_history(self, history_id):
response = self._put(f"histories/{history_id}/contents/{hda_id}", data={"datatype": "tabular"}, json=True)
self._assert_status_code_is(response, 403)
assert response.json()["err_msg"] == "History is immutable"

def test_download_non_english_characters(self, history_id):
name = "دیتاست"
hda = self.dataset_populator.new_dataset(history_id=history_id, name=name, content="data", wait=True)
response = self._get(f"histories/{history_id}/contents/{hda['id']}/display?to_ext=json")
self._assert_status_code_is(response, 200)
assert quote(name, safe="") in response.headers["Content-Disposition"]
81 changes: 81 additions & 0 deletions test/unit/data/model/db/test_user.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import pytest
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError

from galaxy.model import (
Role,
UserRoleAssociation,
)
from galaxy.model.db.user import (
_cleanup_nonprivate_user_roles,
get_user_by_email,
get_user_by_username,
get_users_by_ids,
Expand Down Expand Up @@ -80,3 +86,78 @@ def test_username_is_unique(make_user):
make_user(username="a")
with pytest.raises(IntegrityError):
make_user(username="a")


def test_cleanup_nonprivate_user_roles(session, make_user_and_role, make_role, make_user_role_association):
# Create 3 users with private roles
user1, role_private1 = make_user_and_role(email="[email protected]")
user2, role_private2 = make_user_and_role(email="[email protected]")
user3, role_private3 = make_user_and_role(email="[email protected]")

# Create role_sharing1 and associated it with user1 and user2
role_sharing1 = make_role(type=Role.types.SHARING, name="sharing role for [email protected], [email protected]")
make_user_role_association(user1, role_sharing1)
make_user_role_association(user2, role_sharing1)

# Create role_sharing2 and associated it with user3
role_sharing2 = make_role(type=Role.types.SHARING, name="sharing role for [email protected]")
make_user_role_association(user3, role_sharing2)

# Create another role and associate it with all users
role6 = make_role()
make_user_role_association(user1, role6)
make_user_role_association(user2, role6)
make_user_role_association(user3, role6)

# verify number of all user role associations
associations = session.scalars(select(UserRoleAssociation)).all()
assert len(associations) == 9 # 3 private, 2 + 1 sharing, 3 with role6

# verify user1 roles
assert len(user1.roles) == 3
assert role_private1 in [ura.role for ura in user1.roles]
assert role_sharing1 in [ura.role for ura in user1.roles]
assert role6 in [ura.role for ura in user1.roles]

# run cleanup on user user1
_cleanup_nonprivate_user_roles(session, user1, role_private1.id)
session.commit() # must commit since method does not commit

# private role not deleted, associations with role6 and with sharing role deleted, sharing role name updated
assert len(user1.roles) == 1
assert user1.roles[0].id == role_private1.id
assert role_sharing1.name == "sharing role for [USER PURGED], [email protected]"

# verify user2 roles
assert len(user2.roles) == 3
assert role_private2 in [ura.role for ura in user2.roles]
assert role_sharing1 in [ura.role for ura in user2.roles]
assert role6 in [ura.role for ura in user2.roles]

# run cleanup on user user2
_cleanup_nonprivate_user_roles(session, user2, role_private2.id)
session.commit()

# private role not deleted, association with sharing role deleted
assert len(user2.roles) == 1
assert user2.roles[0].id == role_private2.id
# sharing role1 deleted since it has no more users associated with it
roles = session.scalars(select(Role)).all()
assert len(roles) == 5 # 3 private, role6, sharing2
assert role_sharing1.id not in [role.id for role in roles]

# verify user3 roles
assert len(user3.roles) == 3
assert role_private3 in [ura.role for ura in user3.roles]
assert role_sharing2 in [ura.role for ura in user3.roles]
assert role6 in [ura.role for ura in user3.roles]

# run cleanup on user user3
_cleanup_nonprivate_user_roles(session, user3, role_private3.id)
session.commit()

# remaining: 3 private roles + 3 associations, role6
roles = session.scalars(select(Role)).all()
assert len(roles) == 4
associations = session.scalars(select(UserRoleAssociation)).all()
assert len(associations) == 3
Loading