Skip to content

Commit

Permalink
Refactor filename handling in data download to use a dedicated toCont…
Browse files Browse the repository at this point in the history
…entDisposition function for improved UTF-8 support
  • Loading branch information
arash77 committed Nov 20, 2024
1 parent 4bc21f8 commit 18a0b92
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 25 deletions.
32 changes: 12 additions & 20 deletions lib/galaxy/datatypes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
TYPE_CHECKING,
Union,
)
from urllib.parse import quote

from markupsafe import escape
from typing_extensions import Literal
Expand Down Expand Up @@ -51,6 +50,7 @@
FILENAME_VALID_CHARS,
inflector,
iter_start_of_line,
toContentDisposition,
unicodify,
UNKNOWN,
)
Expand Down Expand Up @@ -431,16 +431,14 @@ def _serve_raw(
headers["content-type"] = (
"application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename
)
filename, utf8_encoded_filename = self._download_filename(
filename = self._download_filename(
dataset,
to_ext,
hdca=kwd.get("hdca"),
element_identifier=kwd.get("element_identifier"),
filename_pattern=kwd.get("filename_pattern"),
)
headers["Content-Disposition"] = (
f"attachment; filename=\"{filename}\"; filename*=UTF-8''{utf8_encoded_filename}"
)
headers["Content-Disposition"] = toContentDisposition(filename)
return open(dataset.get_file_name(), mode="rb"), headers

def to_archive(self, dataset: DatasetProtocol, name: str = "") -> Iterable:
Expand Down Expand Up @@ -476,7 +474,7 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):
return self._archive_composite_dataset(trans, data, headers, do_action=kwd.get("do_action", "zip"))
else:
headers["Content-Length"] = str(file_size)
filename, utf8_encoded_filename = self._download_filename(
filename = self._download_filename(
data,
to_ext,
hdca=kwd.get("hdca"),
Expand All @@ -486,9 +484,7 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):
headers["content-type"] = (
"application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename
)
headers["Content-Disposition"] = (
f"attachment; filename=\"{filename}\"; filename*=UTF-8''{utf8_encoded_filename}"
)
headers["Content-Disposition"] = toContentDisposition(filename)
return open(data.get_file_name(), "rb"), headers

def _serve_binary_file_contents_as_text(self, trans, data, headers, file_size, max_peek_size):
Expand Down Expand Up @@ -664,17 +660,14 @@ def _download_filename(
hdca: Optional[DatasetHasHidProtocol] = None,
element_identifier: Optional[str] = None,
filename_pattern: Optional[str] = None,
) -> Tuple[str, str]:
def escape(raw_identifier):
return "".join(c in FILENAME_VALID_CHARS and c or "_" for c in raw_identifier)[0:150]

) -> str:
if not to_ext or to_ext == "data":
# If a client requests to_ext with the extension 'data', they are
# deferring to the server, set it based on datatype.
to_ext = dataset.extension

template_values = {
"name": escape(dataset.name),
"name": dataset.name,
"ext": to_ext,
"hid": dataset.hid,
}
Expand All @@ -687,13 +680,12 @@ def escape(raw_identifier):

if hdca is not None:
# Use collection context to build up filename.
template_values["element_identifier"] = element_identifier
template_values["hdca_name"] = escape(hdca.name)
if element_identifier is not None:
template_values["element_identifier"] = element_identifier
template_values["hdca_name"] = hdca.name
template_values["hdca_hid"] = hdca.hid
filename = string.Template(filename_pattern).substitute(**template_values)
template_values["name"] = quote(dataset.name, safe="")
utf8_encoded_filename = string.Template(filename_pattern).substitute(**template_values)
return filename, utf8_encoded_filename

return string.Template(filename_pattern).substitute(**template_values)

def display_name(self, dataset: HasName) -> str:
"""Returns formatted html of dataset name"""
Expand Down
6 changes: 6 additions & 0 deletions lib/galaxy/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
Union,
)
from urllib.parse import (
quote,
urlencode,
urlparse,
urlsplit,
Expand Down Expand Up @@ -2006,3 +2007,8 @@ def lowercase_alphanum_to_hex(lowercase_alphanum: str) -> str:
import numpy as np

return np.base_repr(int(lowercase_alphanum, 36), 16).lower()


def toContentDisposition(filename: str) -> str:
utf8_encoded_filename = quote(filename, safe="")
return f"attachment; filename=\"{utf8_encoded_filename}\"; filename*=UTF-8''{utf8_encoded_filename}"
7 changes: 2 additions & 5 deletions lib/galaxy/util/zipstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import zipstream

from galaxy.util import toContentDisposition
from .path import safe_walk

CRC32_MIN = 1444
Expand Down Expand Up @@ -41,11 +42,7 @@ def response(self) -> Iterator[bytes]:
def get_headers(self) -> Dict[str, str]:
headers = {}
if self.archive_name:
archive_name = self.archive_name.encode("latin-1", "replace").decode("latin-1")
utf8_encoded_filename = quote(self.archive_name, safe="")
headers["Content-Disposition"] = (
f"attachment; filename=\"{archive_name}.zip\"; filename*=UTF-8''{utf8_encoded_filename}.zip"
)
headers["Content-Disposition"] = toContentDisposition(f"{self.archive_name}.zip")
if self.upstream_mod_zip:
headers["X-Archive-Files"] = "zip"
else:
Expand Down

0 comments on commit 18a0b92

Please sign in to comment.