diff --git a/.gitignore b/.gitignore index 321dab6ab4a0..3e429a0c991a 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,7 @@ workflow_schedulers_conf.xml config/* config/plugins/interactive_environments/**/*.ini config/plugins/**/.cache +config/plugins/**/.parcel-cache !config/plugins static/welcome.html.* static/welcome.html @@ -179,6 +180,7 @@ config/plugins/**/static/*.map # viz-specific build artifacts to ignore (until these are removed from codebase) config/plugins/visualizations/annotate_image/static/jquery.contextMenu.css config/plugins/visualizations/nvd3/nvd3_bar/static/nvd3.js +config/plugins/visualizations/h5web/static/script.css # CWL conformance tests lib/galaxy_test/api/cwl/test_cwl_conformance_v1_?.py diff --git a/client/gulpfile.js b/client/gulpfile.js index 1e70394b9868..c0f64b9bd675 100644 --- a/client/gulpfile.js +++ b/client/gulpfile.js @@ -17,6 +17,7 @@ const STATIC_PLUGIN_BUILD_IDS = [ "drawrna", "editor", "example", + "h5web", "heatmap/heatmap_default", "hyphyvision", "jqplot/jqplot_bar", @@ -156,7 +157,11 @@ function buildPlugins(callback, forceRebuild) { }; // if node version is >16, set NODE_OPTIONS to use legacy openssl provider if (process.versions.node.split(".")[0] > "16") { - opts.env = { ...process.env, NODE_OPTIONS: "--openssl-legacy-provider" }; + opts.env = { + ...process.env, + PARCEL_WORKER_BACKEND: "process", + NODE_OPTIONS: "--openssl-legacy-provider", + }; } if (child_process.spawnSync("yarn", ["build"], opts).status === 0) { console.log(`Successfully built, saving build state to ${hashFilePath}`); diff --git a/client/src/components/ToolRecommendation.vue b/client/src/components/ToolRecommendation.vue index 5dac75553787..7d8fba52cc67 100644 --- a/client/src/components/ToolRecommendation.vue +++ b/client/src/components/ToolRecommendation.vue @@ -86,32 +86,59 @@ export default { const duration = 750; const maxTextLength = 20; const svg = d3.select("#tool-recommendation").append("svg").attr("class", "tree-size").append("g"); - const gElem = svg[0][0]; - const svgElem = gElem.parentNode; + const svgElem = svg.node().parentElement; const clientH = svgElem.clientHeight; const clientW = svgElem.clientWidth; const translateX = parseInt(clientW * 0.15); - svgElem.setAttribute("viewBox", -translateX + " 0 " + 0.5 * clientW + " " + clientH); svgElem.setAttribute("preserveAspectRatio", "xMidYMid meet"); - - const tree = d3.tree().size([clientH, clientW]); - const diagonal = d3.svg.diagonal().projection((d) => { - return [d.y, d.x]; + const d3Tree = d3.tree().size([clientH, clientW]); + root = d3.hierarchy(predictedTools, (d) => { + return d.children; }); + root.x0 = parseInt(clientH / 2); + root.y0 = 0; + const collapse = (d) => { + if (d.children) { + d._children = d.children; + d._children.forEach(collapse); + d.children = null; + } + }; + root.children.forEach(collapse); + const diagonal = (s, d) => { + const path = `M ${s.y} ${s.x} + C ${(s.y + d.y) / 2} ${s.x}, + ${(s.y + d.y) / 2} ${d.x}, + ${d.y} ${d.x}`; + return path; + }; + const click = (e, d) => { + if (d.children) { + d._children = d.children; + d.children = null; + } else { + d.children = d._children; + d._children = null; + } + if (d.parent == null) { + update(d); + } + const tId = d.data.id; + if (tId !== undefined && tId !== "undefined" && tId !== null && tId !== "") { + document.location.href = `${getAppRoot()}tool_runner?tool_id=${tId}`; + } + }; const update = (source) => { - // Compute the new tree layout. - const nodes = tree.nodes(root).reverse(); - const links = tree.links(nodes); - // Normalize for fixed-depth. + const predictedTools = d3Tree(root); + const nodes = predictedTools.descendants(); + const links = predictedTools.descendants().slice(1); nodes.forEach((d) => { d.y = d.depth * (clientW / 10); }); - // Update the nodes const node = svg.selectAll("g.node").data(nodes, (d) => { return d.id || (d.id = ++i); }); - // Enter any new nodes at the parent's previous position. const nodeEnter = node .enter() .append("g") @@ -120,97 +147,75 @@ export default { return "translate(" + source.y0 + "," + source.x0 + ")"; }) .on("click", click); - nodeEnter.append("circle").attr("r", 1e-6); + nodeEnter.append("circle").attr("class", "node").attr("r", 1e-6); nodeEnter .append("text") + .attr("dy", ".35em") .attr("x", (d) => { return d.children || d._children ? -10 : 10; }) - .attr("dy", ".35em") .attr("text-anchor", (d) => { return d.children || d._children ? "end" : "start"; }) .text((d) => { - const tName = d.name; + const tName = d.data.name; if (tName.length > maxTextLength) { return tName.slice(0, maxTextLength) + "..."; } - return d.name; + return d.data.name; }); nodeEnter.append("title").text((d) => { - return d.children || d._children ? d.name : "Open tool - " + d.name; + return d.children ? d.data.name : "Open tool - " + d.data.name; }); - // Transition nodes to their new position. - const nodeUpdate = node + const nodeUpdate = nodeEnter.merge(node); + nodeUpdate .transition() .duration(duration) .attr("transform", (d) => { return "translate(" + d.y + "," + d.x + ")"; }); - nodeUpdate.select("circle").attr("r", 2.5); - // Transition exiting nodes to the parent's new position. - node.exit() + nodeUpdate.select("circle.node").attr("r", 2.5); + const nodeExit = node + .exit() .transition() .duration(duration) .attr("transform", (d) => { return "translate(" + source.y + "," + source.x + ")"; }) .remove(); - // Update the links + nodeExit.select("circle").attr("r", 1e-6); const link = svg.selectAll("path.link").data(links, (d) => { - return d.target.id; + return d.data.id; }); - // Enter any new links at the parent's previous position. - link.enter() + const linkEnter = link + .enter() .insert("path", "g") .attr("class", "link") .attr("d", (d) => { const o = { x: source.x0, y: source.y0 }; - return diagonal({ source: o, target: o }); + return diagonal(o, o); + }); + const linkUpdate = linkEnter.merge(link); + linkUpdate + .transition() + .duration(duration) + .attr("d", (d) => { + return diagonal(d, d.parent); }); - // Transition links to their new position. link.transition().duration(duration).attr("d", diagonal); - // Transition exiting nodes to the parent's new position. link.exit() .transition() .duration(duration) .attr("d", (d) => { const o = { x: source.x, y: source.y }; - return diagonal({ source: o, target: o }); + return diagonal(o, o); }) .remove(); - // Stash the old positions for transition. nodes.forEach((d) => { d.x0 = d.x; d.y0 = d.y; }); }; - // Toggle children on click. - const click = (d) => { - if (d.children) { - d._children = d.children; - d.children = null; - } else { - d.children = d._children; - d._children = null; - } - update(d); - const tId = d.id; - if (tId !== undefined && tId !== "undefined" && tId !== null && tId !== "") { - document.location.href = `${getAppRoot()}tool_runner?tool_id=${tId}`; - } - }; - const collapse = (d) => { - if (d.children) { - d._children = d.children; - d._children.forEach(collapse); - d.children = null; - } - }; - root = predictedTools; - root.x0 = parseInt(clientH / 2); - root.y0 = 0; - root.children.forEach(collapse); update(root); }, }, diff --git a/client/src/mvc/upload/upload-ftp.js b/client/src/mvc/upload/upload-ftp.js index 6acc2e3ba8e1..4bba7e0fa6d6 100644 --- a/client/src/mvc/upload/upload-ftp.js +++ b/client/src/mvc/upload/upload-ftp.js @@ -16,7 +16,7 @@ export default Backbone.View.extend({ help_enabled: true, oidc_text: `
If you are signed-in to Galaxy using a third-party identity and you do not have a Galaxy password please use the reset password option in the login form with your email to create a password for your account.`, help_text: `This Galaxy server allows you to upload files via FTP. To upload some files, log in to the FTP server at ${options.ftp_upload_site} using your Galaxy credentials. - For help visit the tutorial.`, + For help visit the documentation.`, collection: null, onchange: function () {}, onadd: function () {}, diff --git a/client/src/schema/schema.ts b/client/src/schema/schema.ts index 8f1f59a66415..0e26f999d37d 100644 --- a/client/src/schema/schema.ts +++ b/client/src/schema/schema.ts @@ -112,6 +112,10 @@ export interface paths { */ get: operations["show_api_datasets__dataset_id__get"]; }; + "/api/datasets/{dataset_id}/content/{content_type}": { + /** Retrieve information about the content of a dataset. */ + get: operations["get_structured_content_api_datasets__dataset_id__content__content_type__get"]; + }; "/api/datasets/{dataset_id}/converted": { /** * Return a a map with all the existing converted datasets associated with this instance. @@ -2339,6 +2343,12 @@ export interface components { * @description Represents a collection of elements contained in the dataset collection. */ DatasetCollectionContentElements: components["schemas"]["DCESummary"][]; + /** + * DatasetContentType + * @description For retrieving content from a structured dataset (e.g. HDF5) + * @enum {string} + */ + DatasetContentType: "meta" | "attr" | "stats" | "data"; /** * DatasetErrorMessage * @description Base model definition with common configuration used by all derived models. @@ -7957,6 +7967,34 @@ export interface operations { }; }; }; + get_structured_content_api_datasets__dataset_id__content__content_type__get: { + /** Retrieve information about the content of a dataset. */ + parameters: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + header?: { + "run-as"?: string; + }; + /** @description The encoded database identifier of the dataset. */ + path: { + dataset_id: string; + content_type: components["schemas"]["DatasetContentType"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + content: { + "application/json": Record; + }; + }; + /** @description Validation Error */ + 422: { + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; converted_api_datasets__dataset_id__converted_get: { /** * Return a a map with all the existing converted datasets associated with this instance. diff --git a/config/plugins/visualizations/h5web/config/h5web.xml b/config/plugins/visualizations/h5web/config/h5web.xml new file mode 100644 index 000000000000..792d223dcd9f --- /dev/null +++ b/config/plugins/visualizations/h5web/config/h5web.xml @@ -0,0 +1,22 @@ + + + + HDF5 data visualization and exploration + + + HistoryDatasetAssociation + binary.H5 + + + + dataset_id + + + + + explorer + false + boolean + + + diff --git a/config/plugins/visualizations/h5web/package.json b/config/plugins/visualizations/h5web/package.json new file mode 100644 index 000000000000..145aa121d70a --- /dev/null +++ b/config/plugins/visualizations/h5web/package.json @@ -0,0 +1,27 @@ +{ + "name": "visualization", + "version": "0.1.0", + "keywords": [ + "galaxy", + "visualization" + ], + "license": "AFL-3.0", + "dependencies": { + "@galaxyproject/charts": "^0.0.6", + "@h5web/app": "^7.1.0", + "backbone": "^1.3.3", + "normalize.css": "^8.0.1", + "react": "17.0.2", + "react-dom": "17.0.2" + }, + "scripts": { + "build": "parcel build src/script.js --dist-dir static" + }, + "devDependencies": { + "@types/react": "^17.0.52", + "@types/react-dom": "^17.0.18", + "buffer": "^5.7.1", + "parcel": "^2.0.0", + "typescript": "4.9.3" + } +} diff --git a/config/plugins/visualizations/h5web/src/script.js b/config/plugins/visualizations/h5web/src/script.js new file mode 100644 index 000000000000..db416ab2e35a --- /dev/null +++ b/config/plugins/visualizations/h5web/src/script.js @@ -0,0 +1,47 @@ +/** + * Visualizer interface for h5web (https://github.com/silx-kit/h5web) + * + * This relies on Galaxy being able to serve files using the + * h5grove protocol (https://silx-kit.github.io/h5grove/). + * This provides efficient access to the contents of the + * HDF5 file and avoids having to read the whole file at any + * point. + */ + +import './styles.css'; +import React, { StrictMode } from 'react' +import {render as reactRender} from 'react-dom' +import {App, H5GroveProvider} from '@h5web/app' + +function MyApp(props) { + return ( + + + + ); +} + +window.bundleEntries = window.bundleEntries || {}; +window.bundleEntries.load = function (options) { + var dataset = options.dataset; + var settings = options.chart.settings; + var explorer = settings.get('explorer'); + var url = window.location.origin + "/api/datasets/" + dataset.id + "/content"; + reactRender( + , + document.getElementById(options.target) + ) + options.chart.state('ok', 'Chart drawn.'); + options.process.resolve(); +}; + +export default MyApp; diff --git a/config/plugins/visualizations/h5web/src/styles.css b/config/plugins/visualizations/h5web/src/styles.css new file mode 100644 index 000000000000..36ccace70eb5 --- /dev/null +++ b/config/plugins/visualizations/h5web/src/styles.css @@ -0,0 +1,43 @@ +@import 'npm:normalize.css'; +@import 'npm:@h5web/app/dist/styles.css'; /* global app styles */ + +*, +*::before, +*::after { + box-sizing: inherit; +} + +body { + box-sizing: border-box; + min-width: 42em; + font-family: var(--sans-serif); + line-height: 1.2; + color: #020402; +} + +body { + --primary: #c0da74; + --primary-lighter: #eaf0cd; + --primary-light: #d4e09b; + --primary-dark: #9aae5d; + --primary-bg: #f5fbef; + --primary-light-bg: #fafdf7; + --primary-dark-bg: #dde2d7; + --secondary: #8cdfc7; + --secondary-light: #b7fcf3; + --secondary-lighter: #dbfef9; + --secondary-dark: #1b998b; + --secondary-dark-15: #1b998b26; + --secondary-darker: #0e5846; + --secondary-bg: #d9f4ec; + --secondary-light-bg: #ecfaf6; + --monospace: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, Courier, + monospace; + --sans-serif: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', + 'Oxygen', 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', + sans-serif; +} + +#root { + height: 100vh; +} \ No newline at end of file diff --git a/config/plugins/visualizations/h5web/static/logo.png b/config/plugins/visualizations/h5web/static/logo.png new file mode 100644 index 000000000000..f95e21a9f6ad Binary files /dev/null and b/config/plugins/visualizations/h5web/static/logo.png differ diff --git a/config/plugins/webhooks/news/config.yml b/config/plugins/webhooks/news/config.yml index 9c35b671c8c9..8f69784c4b80 100644 --- a/config/plugins/webhooks/news/config.yml +++ b/config/plugins/webhooks/news/config.yml @@ -1,7 +1,7 @@ id: news type: - masthead -activate: true +activate: false icon: fa-bell tooltip: See the Galaxy Release Notes diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index cc6702cd589f..0c2cd6ad1777 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -30,6 +30,12 @@ TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, ) +from h5grove.content import ( + DatasetContent, + get_content_from_file, + ResolvedEntityContent, +) +from h5grove.encoders import encode from galaxy import util from galaxy.datatypes import metadata @@ -122,6 +128,9 @@ def get_mime(self) -> str: """Returns the mime type of the datatype""" return "application/octet-stream" + def get_structured_content(self, dataset, content_type, **kwargs): + raise Exception("get_structured_content is not implemented for this datatype.") + class Ab1(Binary): """Class describing an ab1 binary sequence file""" @@ -1095,6 +1104,47 @@ def sniff(self, filename: str) -> bool: return False +class HIC(Binary): + """ + Class describing an Juicer hic file + + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('SRR1791297_30.hic') + >>> HIC().sniff(fname) + True + """ + file_ext = "hic" + # edam_format = "format_3590" # Don't know what to set here or if it is necessary at all. Copied from h5(binary) class. + + def __init__(self, **kwd): + Binary.__init__(self, **kwd) + self._magic = b'HIC' + + def sniff(self, filename): + try: + header = open(filename, 'rb') + header_magic = struct.unpack('<3s', header.read(3))[0] + if header_magic == self._magic: + return True + return False + except Exception: + return False + + def set_peek(self, dataset, is_multi_byte=False): + if not dataset.dataset.purged: + dataset.peek = "HIC file for storing genomic interaction data." + dataset.blurb = nice_size(dataset.get_size()) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def display_peek(self, dataset): + try: + return dataset.peek + except Exception: + return "HIC file (%s)" % (nice_size(dataset.get_size())) + + class H5(Binary): """ Class describing an HDF5 file @@ -1139,6 +1189,40 @@ def display_peek(self, dataset: "DatasetInstance") -> str: except Exception: return f"Binary HDF5 file ({nice_size(dataset.get_size())})" + def get_structured_content( + self, + dataset, + content_type=None, + path="/", + dtype="origin", + format="json", + flatten=False, + selection=None, + **kwargs, + ): + """ + Implements h5grove protocol (https://silx-kit.github.io/h5grove/). + This allows the h5web visualization tool (https://github.com/silx-kit/h5web) + to be used directly with Galaxy datasets. + """ + with get_content_from_file(dataset.file_name, path, self._create_error) as content: + if content_type == "attr": + assert isinstance(content, ResolvedEntityContent) + resp = encode(content.attributes(), "json") + elif content_type == "meta": + resp = encode(content.metadata(), "json") + elif content_type == "stats": + assert isinstance(content, DatasetContent) + resp = encode(content.data_stats(selection), "json") + else: # default 'data' + assert isinstance(content, DatasetContent) + resp = encode(content.data(selection, flatten, dtype), format) + + return resp.content, resp.headers + + def _create_error(self, status_code, message): + return Exception(status_code, message) + class Loom(H5): """ @@ -1906,6 +1990,62 @@ def display_peek(self, dataset: "DatasetInstance") -> str: except Exception: return f"MCool (HDF5) file ({nice_size(dataset.get_size())})." + +class SCool(H5): + """ + Class describing the single-cell cool format (https://github.com/mirnylab/cooler) + """ + + file_ext = "scool" + + def sniff(self, filename): + """ + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('matrix.scool') + >>> SCool().sniff(fname) + True + >>> fname = get_test_fname('matrix.cool') + >>> SCool().sniff(fname) + False + >>> fname = get_test_fname('test.mz5') + >>> SCool().sniff(fname) + False + >>> fname = get_test_fname('wiggle.wig') + >>> SCool().sniff(fname) + False + >>> fname = get_test_fname('biom2_sparse_otu_table_hdf5.biom2') + >>> SCool().sniff(fname) + False + """ + + MAGIC = "HDF5::Cooler" + URL = "https://github.com/mirnylab/cooler" + + if super(SCool, self).sniff(filename): + with h5py.File(filename, 'r') as handle: + keys = ['chroms', 'bins', 'pixels', 'indexes'] + for matrix in handle.keys(): + fmt = util.unicodify(handle[matrix].attrs.get('format')) + url = util.unicodify(handle[matrix].attrs.get('format-url')) + if fmt == MAGIC or url == URL: + if not all(name in handle[matrix].keys() for name in keys): + return False + return True + return False + + def set_peek(self, dataset, is_multi_byte=False): + if not dataset.dataset.purged: + dataset.peek = "Single-cell Cool (HDF5) file for storing genomic interaction data." + dataset.blurb = nice_size(dataset.get_size()) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def display_peek(self, dataset): + try: + return dataset.peek + except Exception: + return "SCool (HDF5) file (%s)." % (nice_size(dataset.get_size())) class H5MLM(H5): """ diff --git a/lib/galaxy/datatypes/display_applications/configs/biom/biom_simple.xml b/lib/galaxy/datatypes/display_applications/configs/biom/biom_simple.xml index 787a4b3ef4dc..670d4a91de55 100644 --- a/lib/galaxy/datatypes/display_applications/configs/biom/biom_simple.xml +++ b/lib/galaxy/datatypes/display_applications/configs/biom/biom_simple.xml @@ -1,7 +1,13 @@ + + + https://usegalaxy.eu/phinch/index.html?biomURL=${biom_file.url} + + diff --git a/lib/galaxy/datatypes/test/SRR1791297_30.hic b/lib/galaxy/datatypes/test/SRR1791297_30.hic new file mode 100644 index 000000000000..f7a03874e0b2 Binary files /dev/null and b/lib/galaxy/datatypes/test/SRR1791297_30.hic differ diff --git a/lib/galaxy/datatypes/test/test_matrix.scool b/lib/galaxy/datatypes/test/test_matrix.scool new file mode 100644 index 000000000000..059dd05238fc Binary files /dev/null and b/lib/galaxy/datatypes/test/test_matrix.scool differ diff --git a/lib/galaxy/dependencies/pinned-requirements.txt b/lib/galaxy/dependencies/pinned-requirements.txt index 2defe34a2a08..67e08ab208b2 100644 --- a/lib/galaxy/dependencies/pinned-requirements.txt +++ b/lib/galaxy/dependencies/pinned-requirements.txt @@ -75,6 +75,7 @@ greenlet==2.0.1 ; python_version >= "3.7" and (platform_machine == "aarch64" or gunicorn==20.1.0 ; python_version >= "3.7" and python_version < "3.12" gxformat2==0.18.0 ; python_version >= "3.7" and python_version < "3.12" h11==0.14.0 ; python_version >= "3.7" and python_version < "3.12" +h5grove==1.2.1 ; python_version >= "3.7" and python_version < "3.12" h5py==3.8.0 ; python_version >= "3.7" and python_version < "3.12" humanfriendly==10.0 ; python_version >= "3.7" and python_version < "3.12" idna==3.4 ; python_version >= "3.7" and python_version < "3.12" diff --git a/lib/galaxy/files/sources/gitlab_arc.py b/lib/galaxy/files/sources/gitlab_arc.py new file mode 100644 index 000000000000..cc9c70b65d53 --- /dev/null +++ b/lib/galaxy/files/sources/gitlab_arc.py @@ -0,0 +1,19 @@ +try: + from gitlab_arc_fs.gitlab_fs import GitlabFS +except ImportError: + GitlabFS = None +from ._pyfilesystem2 import PyFilesystem2FilesSource # NOQA + + +class GitlabFSFilesSource(PyFilesystem2FilesSource): + plugin_type = "gitlabfs" + required_module = GitlabFS + required_package = "gitlab_fs" + + def _open_fs(self, user_context): + props = self._serialization_props(user_context) + handle = GitlabFS(**props) + return handle + + +__all__ = (GitlabFSFilesSource,) diff --git a/lib/galaxy/job_execution/container_monitor.py b/lib/galaxy/job_execution/container_monitor.py index ebc9be73677e..fedd67e3eeea 100644 --- a/lib/galaxy/job_execution/container_monitor.py +++ b/lib/galaxy/job_execution/container_monitor.py @@ -74,7 +74,11 @@ def main(): if ports[key]["host"] == "0.0.0.0": ports[key]["host"] = host_ip if callback_url: - requests.post(callback_url, json={"container_runtime": ports}, timeout=DEFAULT_SOCKET_TIMEOUT) + for retry in range(10): + res = requests.post(callback_url, json={"container_runtime": ports}, timeout=DEFAULT_SOCKET_TIMEOUT) + if res.status_code == 200: + break + time.sleep(retry * 2) else: with open("container_runtime.json", "w") as f: json.dump(ports, f) diff --git a/lib/galaxy/model/custom_types.py b/lib/galaxy/model/custom_types.py index 8f292a792a43..52c9214dc115 100644 --- a/lib/galaxy/model/custom_types.py +++ b/lib/galaxy/model/custom_types.py @@ -100,7 +100,10 @@ def process_bind_param(self, value, dialect): def process_result_value(self, value, dialect): if value is not None: - value = json_decoder.decode(unicodify(_sniffnfix_pg9_hex(value))) + try: + value = json_decoder.decode(unicodify(_sniffnfix_pg9_hex(value))) + except ValueError: + value = unicodify(_sniffnfix_pg9_hex(value)) return value def load_dialect_impl(self, dialect): diff --git a/lib/galaxy/tools/recommendations.py b/lib/galaxy/tools/recommendations.py index 6443ea2f987b..101ac450dad9 100644 --- a/lib/galaxy/tools/recommendations.py +++ b/lib/galaxy/tools/recommendations.py @@ -60,7 +60,7 @@ def create_transformer_model(self, vocab_size): class TransformerBlock(Layer): def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): - super().__init__() + super(TransformerBlock, self).__init__() self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, dropout=rate) self.ffn = Sequential([Dense(ff_dim, activation="relu"), Dense(embed_dim)]) self.layernorm1 = LayerNormalization(epsilon=1e-6) @@ -80,7 +80,7 @@ def call(self, inputs, training): class TokenAndPositionEmbedding(Layer): def __init__(self, maxlen, vocab_size, embed_dim): - super().__init__() + super(TokenAndPositionEmbedding, self).__init__() self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim, mask_zero=True) self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim, mask_zero=True) @@ -134,7 +134,7 @@ def __set_model(self, trans, remote_model_url): self.loaded_model = self.create_transformer_model(len(self.reverse_dictionary) + 1) self.loaded_model.load_weights(self.tool_recommendation_model_path) - self.model_data_dictionary = {v: k for k, v in self.reverse_dictionary.items()} + self.model_data_dictionary = dict((v, k) for k, v in self.reverse_dictionary.items()) # set the list of compatible tools self.compatible_tools = json.loads(model_file["compatible_tools"][()].decode("utf-8")) tool_weights = json.loads(model_file["class_weights"][()].decode("utf-8")) @@ -210,14 +210,6 @@ def __filter_tool_predictions(self, trans, prediction_data, tool_ids, tool_score Filter tool predictions based on datatype compatibility and tool connections. Add admin preferences to recommendations. """ - last_compatible_tools = list() - if last_tool_name in self.model_data_dictionary: - last_tool_name_id = self.model_data_dictionary[last_tool_name] - if last_tool_name_id in self.compatible_tools: - last_compatible_tools = [ - self.reverse_dictionary[t_id] for t_id in self.compatible_tools[last_tool_name_id] - ] - prediction_data["is_deprecated"] = False # get the list of datatype extensions of the last tool of the tool sequence _, last_output_extensions = self.__get_tool_extensions(trans, self.all_tools[last_tool_name][0]) @@ -231,13 +223,14 @@ def __filter_tool_predictions(self, trans, prediction_data, tool_ids, tool_score if ( t_id == child and score >= 0.0 - and t_id in last_compatible_tools and child not in self.deprecated_tools + and child != last_tool_name ): full_tool_id = self.all_tools[t_id][0] pred_input_extensions, _ = self.__get_tool_extensions(trans, full_tool_id) c_dict["name"] = self.all_tools[t_id][1] c_dict["tool_id"] = full_tool_id + c_dict["tool_score"] = score c_dict["i_extensions"] = list(set(pred_input_extensions)) prediction_data["children"].append(c_dict) break @@ -267,18 +260,39 @@ def __filter_tool_predictions(self, trans, prediction_data, tool_ids, tool_score break return prediction_data - def __get_predicted_tools(self, base_tools, predictions, topk): + def __get_predicted_tools(self, pub_tools, predictions, last_tool_name, topk): """ Get predicted tools. If predicted tools are less in number, combine them with published tools """ - t_intersect = list(set(predictions).intersection(set(base_tools))) - t_diff = list(set(predictions).difference(set(base_tools))) + last_compatible_tools = list() + if last_tool_name in self.model_data_dictionary: + last_tool_name_id = self.model_data_dictionary[last_tool_name] + if last_tool_name_id in self.compatible_tools: + last_compatible_tools = [ + self.reverse_dictionary[t_id] for t_id in self.compatible_tools[last_tool_name_id] + ] + t_intersect = list(set(predictions).intersection(set(pub_tools))) + t_diff = list(set(predictions).difference(set(pub_tools))) t_intersect, u_intersect = self.__sort_by_usage( t_intersect, self.tool_weights_sorted, self.model_data_dictionary ) - t_diff, u_diff = self.__sort_by_usage(t_diff, self.tool_weights_sorted, self.model_data_dictionary) - t_intersect.extend(t_diff) - u_intersect.extend(u_diff) + t_diff, u_diff = self.__sort_by_usage( + t_diff, self.tool_weights_sorted, self.model_data_dictionary + ) + t_intersect_compat = list(set(last_compatible_tools).intersection(set(t_diff))) + # filter against rare bad predictions for any tool + if len(t_intersect_compat) > 0: + t_compat, u_compat = self.__sort_by_usage( + t_intersect_compat, self.tool_weights_sorted, self.model_data_dictionary + ) + else: + t_compat, u_compat = self.__sort_by_usage( + last_compatible_tools, self.tool_weights_sorted, self.model_data_dictionary + ) + t_intersect.extend(t_compat) + u_intersect.extend(u_compat) + t_intersect = t_intersect[:topk] + u_intersect = u_intersect[:topk] return t_intersect, u_intersect def __sort_by_usage(self, t_list, class_weights, d_dict): @@ -297,17 +311,25 @@ def __separate_predictions(self, base_tools, predictions, last_tool_name, weight Get predictions from published and normal workflows """ last_base_tools = list() + weight_values = list(self.tool_weights_sorted.values()) + wt_predictions = predictions * weight_values prediction_pos = np.argsort(predictions, axis=-1) - topk_prediction_pos = prediction_pos[-topk:] + wt_prediction_pos = np.argsort(wt_predictions, axis=-1) + topk_prediction_pos = list(prediction_pos[-topk:]) + wt_topk_prediction_pos = list(wt_prediction_pos[-topk:]) # get tool ids + wt_pred_tool_names = [self.reverse_dictionary[str(tool_pos)] for tool_pos in wt_topk_prediction_pos] pred_tool_names = [self.reverse_dictionary[str(tool_pos)] for tool_pos in topk_prediction_pos] + # exclude same tool as the last tool + pred_tool_names.extend(wt_pred_tool_names) + pred_tool_names = [item for item in pred_tool_names if item != last_tool_name] if last_tool_name in base_tools: last_base_tools = base_tools[last_tool_name] if type(last_base_tools).__name__ == "str": # get published or compatible tools for the last tool in a sequence of tools last_base_tools = last_base_tools.split(",") # get predicted tools - sorted_c_t, sorted_c_v = self.__get_predicted_tools(last_base_tools, pred_tool_names, topk) + sorted_c_t, sorted_c_v = self.__get_predicted_tools(last_base_tools, pred_tool_names, last_tool_name, topk) return sorted_c_t, sorted_c_v def __compute_tool_prediction(self, trans, tool_sequence): @@ -353,8 +375,5 @@ def __compute_tool_prediction(self, trans, tool_sequence): pub_t, pub_v = self.__separate_predictions( self.standard_connections, prediction, last_tool_name, weight_values, topk ) - # remove duplicates if any - pub_t = list(dict.fromkeys(pub_t)) - pub_v = list(dict.fromkeys(pub_v)) prediction_data = self.__filter_tool_predictions(trans, prediction_data, pub_t, pub_v, last_tool_name) return prediction_data diff --git a/lib/galaxy/webapps/galaxy/api/datasets.py b/lib/galaxy/webapps/galaxy/api/datasets.py index 90fdad813564..2f488e7b705b 100644 --- a/lib/galaxy/webapps/galaxy/api/datasets.py +++ b/lib/galaxy/webapps/galaxy/api/datasets.py @@ -21,7 +21,10 @@ Query, Request, ) -from starlette.responses import StreamingResponse +from starlette.responses import ( + Response, + StreamingResponse, +) from galaxy.schema import ( FilterQueryParams, @@ -52,6 +55,7 @@ from galaxy.webapps.galaxy.services.datasets import ( ComputeDatasetHashPayload, ConvertedDatasetsMap, + DatasetContentType, DatasetInheritanceChain, DatasetsService, DatasetStorageDetails, @@ -388,6 +392,20 @@ def show( return self.service.show(trans, dataset_id, hda_ldda, serialization_params, data_type, **extra_params) + @router.get( + "/api/datasets/{dataset_id}/content/{content_type}", + summary="Retrieve information about the content of a dataset.", + ) + def get_structured_content( + self, + request: Request, + trans=DependsOnTrans, + dataset_id: DecodedDatabaseIdField = DatasetIDPathParam, + content_type: DatasetContentType = DatasetContentType.data, + ): + content, headers = self.service.get_structured_content(trans, dataset_id, content_type, **request.query_params) + return Response(content=content, headers=headers) + @router.delete( "/api/datasets", summary="Deletes or purges a batch of datasets.", diff --git a/lib/galaxy/webapps/galaxy/services/datasets.py b/lib/galaxy/webapps/galaxy/services/datasets.py index f3aadcb1d736..308ae0019de3 100644 --- a/lib/galaxy/webapps/galaxy/services/datasets.py +++ b/lib/galaxy/webapps/galaxy/services/datasets.py @@ -23,6 +23,7 @@ web, ) from galaxy.celery.tasks import compute_dataset_hash +from galaxy.datatypes.binary import Binary from galaxy.datatypes.dataproviders.exceptions import NoProviderAvailable from galaxy.managers.base import ModelSerializer from galaxy.managers.context import ProvidesHistoryContext @@ -98,6 +99,15 @@ class RequestDataType(str, Enum): in_use_state = "in_use_state" +class DatasetContentType(str, Enum): + """For retrieving content from a structured dataset (e.g. HDF5)""" + + meta = "meta" + attr = "attr" + stats = "stats" + data = "data" + + class DatasetStorageDetails(Model): object_store_id: Optional[str] = Field( description="The identifier of the destination ObjectStore for this dataset.", @@ -672,6 +682,30 @@ def delete_batch( trans.sa_session.flush() return DeleteDatasetBatchResult.construct(success_count=success_count, errors=errors) + def get_structured_content( + self, + trans: ProvidesHistoryContext, + dataset_id: DecodedDatabaseIdField, + content_type: DatasetContentType, + **params, + ): + """ + Retrieves contents of a dataset. It is left to the datatype to decide how + to interpret the content types. + """ + headers = {} + content: Any = "" + dataset = self.hda_manager.get_accessible(dataset_id, trans.user) + if not isinstance(dataset.datatype, Binary): + raise galaxy_exceptions.InvalidFileFormatError("Only available for structured datatypes") + try: + content, headers = dataset.datatype.get_structured_content(dataset, content_type, **params) + except galaxy_exceptions.MessageException: + raise + except Exception as e: + raise galaxy_exceptions.InternalServerError(f"Could not get content for dataset: {util.unicodify(e)}") + return content, headers + def _get_or_create_converted(self, trans, original: model.DatasetInstance, target_ext: str): try: original.get_converted_dataset(trans, target_ext) diff --git a/packages/data/setup.cfg b/packages/data/setup.cfg index 151c44a24796..8e43540bbb64 100644 --- a/packages/data/setup.cfg +++ b/packages/data/setup.cfg @@ -40,6 +40,7 @@ install_requires = bx-python dnspython galaxy-sequence-utils + h5grove h5py isa-rwval MarkupSafe diff --git a/pyproject.toml b/pyproject.toml index 0b58ff0055e0..bf5d6cde0f59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ galaxy_sequence_utils = "*" gravity = ">=1.0" gunicorn = "*" gxformat2 = "*" +h5grove = ">=1.2.1" h5py = "*" importlib-metadata = "<5" # Work around https://github.com/celery/kombu/issues/1600 importlib-resources = "*" diff --git a/static/favicon.ico b/static/favicon.ico index cf52fdcad290..afc41f1370a5 100644 Binary files a/static/favicon.ico and b/static/favicon.ico differ diff --git a/test-data/chopper.h5 b/test-data/chopper.h5 new file mode 100644 index 000000000000..470e89249f68 Binary files /dev/null and b/test-data/chopper.h5 differ diff --git a/test/integration/test_structured_dataset.py b/test/integration/test_structured_dataset.py new file mode 100644 index 000000000000..d0215ee37c23 --- /dev/null +++ b/test/integration/test_structured_dataset.py @@ -0,0 +1,71 @@ +"""Integration tests for structured datasets (currently only HDF5 supported). + +This file checks the ability to access datasets using the get_structured_content +API and services. +""" + +import os + +from galaxy_test.base.populators import DatasetPopulator +from galaxy_test.driver import integration_util + +SCRIPT_DIR = os.path.normpath(os.path.dirname(__file__)) +TEST_DATA_DIRECTORY = os.path.join(SCRIPT_DIR, os.pardir, os.pardir, "test-data") + + +class TestStructuredDataset(integration_util.IntegrationTestCase): + require_admin_user = True + dataset_populator: DatasetPopulator + + def setUp(self): + super().setUp() + self.dataset_populator = DatasetPopulator(self.galaxy_interactor) + self.history_id = self.dataset_populator.new_history() + + def test_fail_on_nonbinary(self): + dataset = self.dataset_populator.new_dataset( + self.history_id, "file://%s/random-file" % TEST_DATA_DIRECTORY, file_type="txt", wait=True + ) + dataset_id = dataset["dataset_id"] + response = self._get(f"datasets/{dataset_id}/content/meta") + self._assert_status_code_is(response, 500) + + def test_api_meta(self): + dataset = self.dataset_populator.new_dataset( + self.history_id, "file://%s/chopper.h5" % TEST_DATA_DIRECTORY, file_type="h5", wait=True + ) + dataset_id = dataset["dataset_id"] + response = self._get(f"datasets/{dataset_id}/content/meta") + self._assert_status_code_is(response, 200) + hvals = response.json() + self._assert_has_keys(hvals, "attributes", "name", "type") + + def test_api_attr(self): + dataset = self.dataset_populator.new_dataset( + self.history_id, "file://%s/chopper.h5" % TEST_DATA_DIRECTORY, file_type="h5", wait=True + ) + dataset_id = dataset["dataset_id"] + response = self._get(f"datasets/{dataset_id}/content/attr") + self._assert_status_code_is(response, 200) + hvals = response.json() + self._assert_has_keys(hvals, "HDF5_Version", "NeXus_version", "default", "file_name", "file_time") + + def test_api_stats(self): + dataset = self.dataset_populator.new_dataset( + self.history_id, "file://%s/chopper.h5" % TEST_DATA_DIRECTORY, file_type="h5", wait=True + ) + dataset_id = dataset["dataset_id"] + response = self._get(f"datasets/{dataset_id}/content/stats?path=%2Fentry%2Fdata%2Fdata") + self._assert_status_code_is(response, 200) + hvals = response.json() + self._assert_has_keys(hvals, "strict_positive_min", "positive_min", "min", "max", "mean", "std") + + def test_api_data(self): + dataset = self.dataset_populator.new_dataset( + self.history_id, "file://%s/chopper.h5" % TEST_DATA_DIRECTORY, file_type="h5", wait=True + ) + dataset_id = dataset["dataset_id"] + response = self._get(f"datasets/{dataset_id}/content/data?path=%2Fentry%2Fdata%2Fdata") + self._assert_status_code_is(response, 200) + hvals = response.json() + assert len(hvals) == 148 diff --git a/tools/data_source/omicsdi.xml b/tools/data_source/omicsdi.xml new file mode 100644 index 000000000000..6a3ac6561acf --- /dev/null +++ b/tools/data_source/omicsdi.xml @@ -0,0 +1,29 @@ + + + Sync dataset's files from OmicsDI + + + + + go to OmicsDI server $GALAXY_URL + + + + + + + + + + + + + + diff --git a/tools/data_source/omicsdi_importer.py b/tools/data_source/omicsdi_importer.py new file mode 100644 index 000000000000..e7be5ba155d8 --- /dev/null +++ b/tools/data_source/omicsdi_importer.py @@ -0,0 +1,325 @@ +import argparse +import json +import os +import sys +import uuid +import urllib + +import galaxy +from galaxy.datatypes import sniff +from galaxy.datatypes.registry import Registry +from galaxy.util.bunch import Bunch + + +# Mappings for known genomespace formats to galaxy formats +GENOMESPACE_EXT_TO_GALAXY_EXT = { + "rifles": "rifles", + "lifes": "lifes", + "cn": "cn", + "gtf": "gtf", + "res": "res", + "xcn": "xcn", + "lowercasetxt": "lowercasetxt", + "bed": "bed", + "cbs": "cbs", + "genomicatab": "genomicatab", + "gxp": "gxp", + "reversedtxt": "reversedtxt", + "nowhitespace": "nowhitespace", + "unknown": "unknown", + "txt": "txt", + "uppercasetxt": "uppercasetxt", + "gistic": "gistic", + "gff": "gff", + "gmt": "gmt", + "gct": "gct", +} + + +def _prepare_json_list(param_list): + """ + JSON serialization Support functions for exec_before_job hook + """ + rval = [] + for value in param_list: + if isinstance(value, dict): + rval.append(_prepare_json_param_dict(value)) + elif isinstance(value, list): + rval.append(_prepare_json_list(value)) + else: + rval.append(str(value)) + return rval + + +def _prepare_json_param_dict(param_dict): + """ + JSON serialization Support functions for exec_before_job hook + """ + rval = {} + for key, value in param_dict.items(): + if isinstance(value, dict): + rval[key] = _prepare_json_param_dict(value) + elif isinstance(value, list): + rval[key] = _prepare_json_list(value) + else: + rval[key] = str(value) + return rval + + +def exec_before_job(app, inp_data, out_data, param_dict=None, tool=None): + """ + Galaxy override hook + See: https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccode.3E_tag_set + Since only tools with tool_type="data_source" provides functionality for having a JSON param file such as this: + https://wiki.galaxyproject.org/Admin/Tools/DataManagers/DataManagerJSONSyntax#Example_JSON_input_to_tool, + this hook is used to manually create a similar JSON file. + However, this hook does not provide access to GALAXY_DATATYPES_CONF_FILE and GALAXY_ROOT_DIR + properties, so these must be passed in as commandline params. + """ + if param_dict is None: + param_dict = {} + json_params = {} + json_params["param_dict"] = _prepare_json_param_dict(param_dict) + json_params["output_data"] = [] + json_params["job_config"] = dict( + GALAXY_DATATYPES_CONF_FILE=param_dict.get("GALAXY_DATATYPES_CONF_FILE"), + GALAXY_ROOT_DIR=param_dict.get("GALAXY_ROOT_DIR"), + TOOL_PROVIDED_JOB_METADATA_FILE=galaxy.jobs.TOOL_PROVIDED_JOB_METADATA_FILE, + ) + json_filename = None + for i, (out_name, data) in enumerate(out_data.items()): + file_name = data.get_file_name() + data_dict = dict( + out_data_name=out_name, + ext=data.ext, + dataset_id=data.dataset.id, + hda_id=data.id, + file_name=file_name, + ) + json_params["output_data"].append(data_dict) + if json_filename is None: + json_filename = file_name + with open(json_filename, "w") as out: + out.write(json.dumps(json_params)) + + +# def get_galaxy_ext_from_genomespace_format(format): +# return GENOMESPACE_EXT_TO_GALAXY_EXT.get(format, None) + + +def get_galaxy_ext_from_file_ext(filename): + if not filename: + return None + filename = filename.lower() + # ext = filename.rsplit(".", 1)[-1] + return "txt" # get_galaxy_ext_from_genomespace_format(ext) + + +def sniff_and_handle_data_type(json_params, output_file): + """ + The sniff.handle_uploaded_dataset_file() method in Galaxy performs dual + functions: it sniffs the filetype and if it's a compressed archive for + a non compressed datatype such as fasta, it will be unpacked. + """ + try: + datatypes_registry = Registry() + datatypes_registry.load_datatypes( + root_dir=json_params["job_config"]["GALAXY_ROOT_DIR"], + config=json_params["job_config"]["GALAXY_DATATYPES_CONF_FILE"], + ) + file_type = sniff.handle_uploaded_dataset_file(output_file, datatypes_registry) + return file_type + except Exception: + return None + + +def determine_output_filename(input_url, metadata, json_params, primary_dataset): + """ + Determines the output file name. If only a single output file, the dataset name + is used. If multiple files are being downloaded, each file is given a unique dataset + name + """ + return os.path.join( + os.getcwd(), "outputs", "%s.%s" % (metadata.name, metadata.data_format) + ) + output_filename = json_params["output_data"][0]["file_name"] + + if not primary_dataset or not output_filename: + hda_id = json_params["output_data"][0]["hda_id"] + output_filename = "primary_%i_%s_visible_%s" % ( + hda_id, + metadata.name, + uuid.uuid4(), + ) + + return os.path.join(os.getcwd(), output_filename) + + +def determine_file_type( + input_url, output_filename, metadata, json_params, sniffed_type +): + """ + Determine the Galaxy data format for this file. + """ + # Use genomespace metadata to map type + # file_format = metadata.data_format_name if metadata.data_format else None + file_type = "txt" # get_galaxy_ext_from_genomespace_format(file_format) + + # If genomespace metadata has no identifiable format, attempt to sniff type + if not file_type: + file_type = sniffed_type + + # Still no type? Attempt to use filename extension to determine a type + if not file_type: + file_type = get_galaxy_ext_from_file_ext(metadata.name) + + # Nothing works, use default + if not file_type: + file_type = "data" + + return file_type + + +def save_result_metadata( + output_filename, file_type, metadata, json_params, primary_dataset=False +): + """ + Generates a new job metadata file (typically galaxy.json) with details of + all downloaded files, which Galaxy can read and use to display history items + and associated metadata + """ + dataset_id = json_params["output_data"][0]["dataset_id"] + with open( + json_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"], "ab" + ) as metadata_parameter_file: + if primary_dataset: + metadata_parameter_file.write( + "%s\n" + % json.dumps( + dict( + type="dataset", + dataset_id=dataset_id, + ext=file_type, + name="GenomeSpace importer on %s" % (metadata.name), + ) + ) + ) + else: + metadata_parameter_file.write( + "%s\n" + % json.dumps( + dict( + type="new_primary_dataset", + base_dataset_id=dataset_id, + ext=file_type, + filename=output_filename, + name="GenomeSpace importer on %s" % (metadata.name), + ) + ) + ) + + +def download_single_file(input_url, json_params, primary_dataset=False): + # 1. Get file metadata + # metadata = gs_client.get_metadata(input_url) + + metadata = Bunch() + metadata.name = os.path.split(input_url)[-1] + metadata.data_format_name = "FASTA" + metadata.data_format = "fasta" + metadata.url = input_url + + # 2. Determine output file name + output_filename = determine_output_filename( + input_url, metadata, json_params, primary_dataset + ) + print("output_filename is: %s", output_filename) + # 3. Download file + # gs_client.copy(input_url, output_filename) + os.system("wget --quiet -O %s %s " % (output_filename, input_url)) + + # 4. Decompress file if compressed and sniff type + sniffed_type = sniff_and_handle_data_type(json_params, output_filename) + + # 5. Determine file type from available metadata + file_type = determine_file_type( + input_url, output_filename, metadata, json_params, sniffed_type + ) + + # 6. Write job output metadata + save_result_metadata( + output_filename, + file_type, + metadata, + json_params, + primary_dataset=primary_dataset, + ) + print(json_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"]) + print(open(json_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"]).read()) + + +def download_from_omicsdi_importer(json_parameter_file, root, data_conf): + with open(json_parameter_file, "r") as param_file: + json_params = json.load(param_file) + + print("###########") + print(json_params) + print("###########") + # Add in missing job config properties that could not be set in the exec_before_job hook + json_params["job_config"]["GALAXY_ROOT_DIR"] = root + json_params["job_config"]["GALAXY_DATATYPES_CONF_FILE"] = data_conf + + # Extract input_urls and token (format is input_urls^token). If a custom_token is + # provided, use that instead. + url = json_params.get("param_dict", {}).get("URL", "") + print(url) + response = urllib.urlopen(url) + data = json.loads(response.read()) + print(data) + # if data: + # input_urls = url_with_token.split('^')[0] + # token = custom_token + # else: + # input_urls, token = url_with_token.split('^') + input_url_list = data + + # gs_client = GenomeSpaceClient(token=token) + + for idx, input_url in enumerate(input_url_list): + print(input_url) + download_single_file(input_url, json_params, primary_dataset=(idx == 0)) + + +def process_args(args): + parser = argparse.ArgumentParser() + parser.add_argument( + "-p", + "--json_parameter_file", + type=str, + help="JSON parameter file", + required=True, + ) + parser.add_argument( + "-r", "--galaxy_root", type=str, help="Galaxy root dir", required=True + ) + parser.add_argument( + "-c", + "--data_conf", + type=str, + help="Galaxy data types conf file for mapping file types", + required=True, + ) + + args = parser.parse_args(args[1:]) + return args + + +def main(): + args = process_args(sys.argv) + download_from_omicsdi_importer( + args.json_parameter_file, args.galaxy_root, args.data_conf + ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/data_source/seek.xml b/tools/data_source/seek.xml new file mode 100644 index 000000000000..041cea749599 --- /dev/null +++ b/tools/data_source/seek.xml @@ -0,0 +1,20 @@ + + + Import files from SEEK + + data_source.py '$output' $__app__.config.output_size_limit + + + go to Sync Datasource $GALAXY_URL + + + + + + + + + + + + diff --git a/tools/interactive/interactivetool_audiolabeler.xml b/tools/interactive/interactivetool_audiolabeler.xml new file mode 100644 index 000000000000..29078a56c58a --- /dev/null +++ b/tools/interactive/interactivetool_audiolabeler.xml @@ -0,0 +1,69 @@ + + Nature+Energy Audio Labeller audio data annotation tool + + ylebras/neal-docker:latest + + + + 3838 + /sample-apps/audio/ + + + > /var/log/shiny-server.log 2>&1 + + ]]> + + + + + + + + + + + + + + + + + + + + + +`_ is an open-source interactive audio data annotation tool based on R Shiny. + +label file format: + +"date_time","file_name","start_time","end_time","start_freq","end_freq","class_label","call_type","confidence","notes","labeler","id" +"2022-05-21 21:19:54","1_SMU05115_20211104_064902_start_39_16.wav",4.9825037303551,5.829261426746,0.53573887350619,4.3165341640833,"Meadow Pipit",NA,1,NA,"Anthony",4 +"2022-05-21 22:25:56","1_SMU05115_20211104_064902_start_39_16.wav",5.4647543215773,6.6376032643079,6.2302700519063,8.6107707904178,"Weather Noise",NA,1,NA,"Anthony",9 +"2022-05-21 22:26:36","2_RICHFIELDMET_20211218_111120.wav",2.6209644325282,4.3674997777827,4.3165341640833,6.4636524772505,"Eurasian Magpie",NA,1,NA,"Anthony",4 +]]> + + + @misc{githubsurvey2022, + author = {ANTHONY THOMAS GIBBONS}, + title = {{NEAL (Nature+Energy Audio Labeller) R Shiny app}}, + publisher = {Github}, + url = {https://github.com/gibbona1/neal} + } + } + + diff --git a/tools/interactive/interactivetool_blobtoolkit.xml b/tools/interactive/interactivetool_blobtoolkit.xml new file mode 100644 index 000000000000..0ff46b2b817c --- /dev/null +++ b/tools/interactive/interactivetool_blobtoolkit.xml @@ -0,0 +1,53 @@ + + genome assembly QC viewer + + 4.1.0 + 1 + + + quay.io/galaxy/blobtoolkit-server:4.1.0 + + + + 80 + view/all + + + + + + + + + + + + + + 10.1534/g3.119.400908 + + diff --git a/tools/interactive/interactivetool_cellxgene_1.1.1.xml b/tools/interactive/interactivetool_cellxgene_1.1.1.xml new file mode 100644 index 000000000000..08edd9273b4d --- /dev/null +++ b/tools/interactive/interactivetool_cellxgene_1.1.1.xml @@ -0,0 +1,98 @@ + + + quay.io/biocontainers/cellxgene:1.1.1--pyhdfd78af_0 + + + + 80 + + + + + + + + + + + + + + + + + + + + + + +`_ . +]]> + diff --git a/tools/interactive/interactivetool_divand.xml b/tools/interactive/interactivetool_divand.xml new file mode 100644 index 000000000000..8b60a2f6a6f2 --- /dev/null +++ b/tools/interactive/interactivetool_divand.xml @@ -0,0 +1,135 @@ + + + quay.io/galaxy/divand-jupyterhub:galaxy-2023-04-25t0904-bag + + + + 8888 + ipython/lab + + + + $__history_id__ + $__galaxy_url__ + 8080 + $__galaxy_url__ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `_'s package. + +Why such notebooks? +------------------- + +DIVAnd (Data-Interpolating Variational Analysis in n dimensions) performs an n-dimensional variational analysis/gridding of arbitrarily located observations. +Observations will be interpolated/analyzed on a curvilinear grid in 1, 2, 3 or more dimensions. In this sense it is a generalization of the original +two-dimensional DIVA version (still available `here `_ but not further developed anymore). + +The method bears some similarities and equivalences with Optimal Interpolation or Krigging in that it allows to create a smooth and continous field from a +collection of observations, observations which can be affected by errors. The analysis method is however different in practise, allowing to take into account +topological features, physical constraints etc in a natural way. The method was initially developped with ocean data in mind, but it can be applied to any +field where localized observations have to be used to produce gridded fields which are "smooth". + +DIVAndrun is the core analysis function in n dimensions. It does not know anything about the physical parameters or units you work with. +Coordinates can also be very general. The only constraint is that the metrics (pm,pn,po,...) when multiplied by the corresponding length scales +len lead to non-dimensional parameters. Furthermore the coordinates of the output grid (xi,yi,zi,...) need to have the same units as the observation +coordinates (x,y,z,...). + +DIVAndfun is a version with a minimal set of parameters (the coordinates and values of observations, i.e. (x,f), the remaining parameters being optional) +and provides an interpolation function rather than an already gridded field. + +diva3D is a higher-level function specifically designed for climatological analysis of data on Earth, using longitude/latitude/depth/time coordinates +and correlations length in meters. It makes the necessary preparation of metrics, parameter optimizations etc you normally would program yourself before +calling the analysis function DIVAndrun. + +DIVAnd_heatmap can be used for additive data and produces Kernel Density Estimations. + +DIVAndgo is only needed for very large problems when a call to DIVAndrun leads to memory or CPU time problems. This function tries to decide which +solver (direct or iterative) to use and how to make an automatic domain decomposition. Not all options from DIVAndrun are available. + +If you want to try out multivariate approaches, you can look at DIVAnd_multivarEOF and DIVAnd_multivarJAC + +There are example Notebooks written in Julia. They are now organised into 5 categories, separeted in folders: + +* Intro +* Preprocessing +* Analysis +* Postprocessing +* AdvancedTopics + +If you want more informations about the functions and parameters see also the `documentations here `_. + ]]> + + + diff --git a/tools/interactive/interactivetool_hdfview.xml b/tools/interactive/interactivetool_hdfview.xml new file mode 100644 index 000000000000..6169710361f3 --- /dev/null +++ b/tools/interactive/interactivetool_hdfview.xml @@ -0,0 +1,33 @@ + + Desktop application to display netcdf, hdf4 or hdf5 files + + 3.3.0 + + + quay.io/galaxy/hdfview-desktop-vnc:@VERSION@ + + + + 80 + + + + + + + + + + + + + + + + + diff --git a/tools/interactive/interactivetool_metashark.xml b/tools/interactive/interactivetool_metashark.xml index a188809b67ba..402db029b29a 100644 --- a/tools/interactive/interactivetool_metashark.xml +++ b/tools/interactive/interactivetool_metashark.xml @@ -1,7 +1,7 @@ - + Metadata Shiny Automated Resource and Knowledge - eliearnaud/metashark:1.7.0 + ylebras/metashark:1.7.3 @@ -9,16 +9,15 @@ / - - + @@ -28,18 +27,7 @@ `_ is a R Shiny App to automate metadata creation in Ecological Metadata Language (E> - -Example input file (csv ot tabular):: - -"ID" "x" "y" "test" -01 -60.291838 46.328137 2 -02 -114.58927 35.022485 3 -03 -93.37406 30.00586 4 -04 -79.336288 43.682218 5 -05 -109.156024 31.904185 2 -06 -71.098031 42.297408 9 -07 -110.927215 32.18203 12 +`MetaShARK `_ is an work in progress innovative R Shiny App to automate metadata creation in Ecological Metadata Language. Based on EML Assembly Line R package, MetaShARK allows you to produce EML metadata only from text files as csv or tsv, and allows you to provide metadata on data attributes, geographic, taxonomic and temporal coverages, authors, summary, keywords and methods. Futhermore, the 1.7.3 version alows you to add EML semantic annotation related to attributes names and/or keywords, based on terminological resources from Bioportal. This version of MetaShARK is provided "as is" for testing, as bugs can appear. ]]> diff --git a/tools/interactive/interactivetool_metashrimps.xml b/tools/interactive/interactivetool_metashrimps.xml new file mode 100644 index 000000000000..cbb3d467a1c3 --- /dev/null +++ b/tools/interactive/interactivetool_metashrimps.xml @@ -0,0 +1,45 @@ + + Metadata Shiny for Retroactive Information Management and Production of data paper Sketches + + ylebras/ms_metashrimps:latest + + + + 3838 + /sample-apps/META/App_metadig/ + + + > /var/log/shiny-server.log 2>&1 + ]]> + + + + + + + + + + +`_ is a R Shiny App to automate production of FAIR metrics and data paper sketche from Ecological Metadata Language based metadata documents + +]]> + + + @misc{githubsurvey2023, + author = {Tanguy Genthon}, + title = {{Metadata Shiny for Retroactive Information Management and Production of data paper Sketches}}, + publisher = {Github}, + url = {https://github.com/TanguyGen/metaCure}, + year = {2023} + } + } + + diff --git a/tools/interactive/interactivetool_odv.xml b/tools/interactive/interactivetool_odv.xml new file mode 100644 index 000000000000..359084ebfe99 --- /dev/null +++ b/tools/interactive/interactivetool_odv.xml @@ -0,0 +1,47 @@ + + interactive plotting tool for geo-referenced data + + 5.6.5 + + + quay.io/mariejo/docker_odv:@VERSION@ + + + + 80 + + + + + + + + + + + + + + + + + + `_ plots geo-referenced and other arrays from netCDF, HDF, GRIB, and other datasets. + ]]> + + + + @Manual{, + title = {Ocean Data View}, + author = {Schlitzer Reiner}, + year = {2023}, + note = {https://odv.awi.de} + + + diff --git a/tools/interactive/interactivetool_pampa.xml b/tools/interactive/interactivetool_pampa.xml new file mode 100644 index 000000000000..c834f84fc795 --- /dev/null +++ b/tools/interactive/interactivetool_pampa.xml @@ -0,0 +1,57 @@ + + occurences data analysis + + ylebras/pampa:latest + + + + 3838 + /sample-apps/PAMPA/ + + + > /var/log/shiny-server.log 2>&1 + ]]> + + + + + + + + + + + + + + + + + + +`_ is a R Shiny App to facilitate production of biodiversity metrics from observation files related to species + +To have outputs on your Galaxy history, once data tretment finished, one need to specify `/srv/shiny-server/sample-apps/PAMPA/` as "working directory" in the "Load data" tab of the PAMPA interface. + +]]> + + + @misc{githubsurvey2023, + author = {Mélanie Madelin, Yves Recht, Yvan Le Bras, Dominique Pelletier}, + title = {{Performance Indicators of Marine Protected Areas for the management of coastal ecosystems, resources and their uses}}, + publisher = {Github}, + url = {https://github.com/madelinm/Package_PAMPA}, + year = {2023} + } + } + + diff --git a/tools/interactive/interactivetool_pangeo_ml_notebook.xml b/tools/interactive/interactivetool_pangeo_ml_notebook.xml new file mode 100644 index 000000000000..387cc65a9a5d --- /dev/null +++ b/tools/interactive/interactivetool_pangeo_ml_notebook.xml @@ -0,0 +1,102 @@ + + + quay.io/nordicesmhub/docker-pangeo-ml-notebook:2023.02.27 + + + + 8888 + ipython/lab + + + + $__history_id__ + $__galaxy_url__ + 8080 + $__galaxy_url__ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The Pangeo Machine Learning Notebook is based on Pangeo ml-notebook. Pangeo is an open and inclusive community platform for Big Data geoscience. + + Galaxy offers you to use Pangeo Notebooks directly in Galaxy accessing and interacting with Galaxy datasets as you like. A very common use-case is to + do the heavy lifting and data reduction steps in Galaxy and the plotting and more `interactive` part on smaller datasets in Jupyter. + + You can start with a new Jupyter notebook from scratch or load an already existing one, e.g. from your collegue and execute it on your dataset. + If you have a defined input dataset you can even execute a Jupyter notebook in a workflow, given that the notebook is writing the output back to the history. + + You can import data into the notebook via a predefined `get()` function and write results back to Galaxy with a `put()` function. + + The Pangeo Machine Learning notebook is developed and maintained by the [Pangeo](https://pangeo.io/) community. + A new version of Galaxy Pangeo Machine Learning JupyterLab is deployed when a new Pangeo ml-notebook docker image is made available by the Pangeo community. + The list of packages is available at [Pangeo docker images](https://github.com/pangeo-data/pangeo-docker-images/tree/master/ml-notebook). + + + diff --git a/tools/interactive/interactivetool_pangeo_notebook.xml b/tools/interactive/interactivetool_pangeo_notebook.xml index b3c521d1c8cb..831a224514fd 100644 --- a/tools/interactive/interactivetool_pangeo_notebook.xml +++ b/tools/interactive/interactivetool_pangeo_notebook.xml @@ -1,6 +1,6 @@ - quay.io/nordicesmhub/docker-pangeo-notebook:1c0f66b + quay.io/nordicesmhub/docker-pangeo-notebook:2023.02.27 diff --git a/tools/interactive/interactivetool_scoop3.xml b/tools/interactive/interactivetool_scoop3.xml new file mode 100644 index 000000000000..7bd3d9db8f7a --- /dev/null +++ b/tools/interactive/interactivetool_scoop3.xml @@ -0,0 +1,38 @@ + + Interactive visual quality control of Argo netCDF files + + 1.40 + + + quay.io/galaxy/scoop3-ubuntu-desktop-lxde-vnc:@VERSION@ + + + + 80 + + + + + + + + + + + + + + + + + 10.17882/48531 + + diff --git a/tools/sr_assembly/velveth.xml b/tools/sr_assembly/velveth.xml index 369b54ac05e4..5b025b6dc645 100644 --- a/tools/sr_assembly/velveth.xml +++ b/tools/sr_assembly/velveth.xml @@ -24,6 +24,29 @@ + + + + + + + + + + + + + + + + + + + + + + +