From 9bb5e3b4e4c54836c9cc39f80ea07af53b3672c3 Mon Sep 17 00:00:00 2001 From: Colin Date: Sat, 9 Dec 2023 08:33:31 -0500 Subject: [PATCH 1/9] Use ruff --- jbrowse_jupyter/util.py | 58 +++++++++++++++++++++++++++++++++++++++++ ruff.toml | 4 +++ 2 files changed, 62 insertions(+) create mode 100644 ruff.toml diff --git a/jbrowse_jupyter/util.py b/jbrowse_jupyter/util.py index 599d54c..572c330 100644 --- a/jbrowse_jupyter/util.py +++ b/jbrowse_jupyter/util.py @@ -8,6 +8,64 @@ from urllib.parse import urlparse +hg38_asm = { + "name": "GRCh38", + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "GRCh38-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" + }, + "faiLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" + }, + "gziLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" + }, + }, + }, + "aliases": ["hg38"], + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" + }, + } + }, +} +hg19_asm = { + "name": "hg19", + "aliases": ["GRCh37"], + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "hg19-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz" + }, + "faiLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai" + }, + "gziLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi" + }, + }, + }, + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" + }, + } + }, +} + + def is_url(filePath): """ Checks whether or not the file path diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..96068e0 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,4 @@ + +# Same as Black. +line-length = 88 +indent-width = 2 From f22e2fd6b75626bff86632b27802b3c8dac994b3 Mon Sep 17 00:00:00 2001 From: Colin Date: Sat, 9 Dec 2023 08:38:30 -0500 Subject: [PATCH 2/9] Format --- jbrowse_jupyter/dev_server.py | 256 ++--- jbrowse_jupyter/jbrowse_config.py | 1534 ++++++++++++++--------------- jbrowse_jupyter/tracks.py | 962 +++++++++--------- jbrowse_jupyter/util.py | 444 ++++----- 4 files changed, 1594 insertions(+), 1602 deletions(-) diff --git a/jbrowse_jupyter/dev_server.py b/jbrowse_jupyter/dev_server.py index 24881d0..550744f 100644 --- a/jbrowse_jupyter/dev_server.py +++ b/jbrowse_jupyter/dev_server.py @@ -18,148 +18,148 @@ def copy_byte_range(infile, outfile, start=None, stop=None, bufsize=16 * 1024): - """Like shutil.copyfileobj, but only copy a range of the streams. - Both start and stop are inclusive. - """ - if start is not None: - infile.seek(start) - while 1: - to_read = min(bufsize, stop + 1 - infile.tell() if stop else bufsize) - buf = infile.read(to_read) - if not buf: - break - outfile.write(buf) + """Like shutil.copyfileobj, but only copy a range of the streams. + Both start and stop are inclusive. + """ + if start is not None: + infile.seek(start) + while 1: + to_read = min(bufsize, stop + 1 - infile.tell() if stop else bufsize) + buf = infile.read(to_read) + if not buf: + break + outfile.write(buf) BYTE_RANGE_RE = re.compile(r"bytes=(\d+)-(\d+)?$") def parse_byte_range(byte_range): - """ - Returns the two numbers in 'bytes=123-456' or throws ValueError. - The last number or both numbers may be None. - """ - if byte_range.strip() == "": - return None, None + """ + Returns the two numbers in 'bytes=123-456' or throws ValueError. + The last number or both numbers may be None. + """ + if byte_range.strip() == "": + return None, None - m = BYTE_RANGE_RE.match(byte_range) - if not m: - raise ValueError("Invalid byte range %s" % byte_range) + m = BYTE_RANGE_RE.match(byte_range) + if not m: + raise ValueError("Invalid byte range %s" % byte_range) - first, last = [x and int(x) for x in m.groups()] - if last and last < first: - raise ValueError("Invalid byte range %s" % byte_range) - return first, last + first, last = [x and int(x) for x in m.groups()] + if last and last < first: + raise ValueError("Invalid byte range %s" % byte_range) + return first, last class CustomRequestHandler(SimpleHTTPRequestHandler): - """ - Creating a small HTTP request server - """ - - def send_head(self): - if "Range" not in self.headers: - self.range = None - return SimpleHTTPRequestHandler.send_head(self) - try: - self.range = parse_byte_range(self.headers["Range"]) - except ValueError: - self.send_error(400, "Invalid byte range") - return None - first, last = self.range - - # Mirroring SimpleHTTPServer.py here - path = self.translate_path(self.path) - f = None - ctype = self.guess_type(path) - try: - f = open(path, "rb") - except IOError: - self.send_error(404, "File not found") - return None - - fs = os.fstat(f.fileno()) - file_len = fs[6] - if first >= file_len: - self.send_error(416, "Requested Range Not Satisfiable") - return None - - self.send_response(206) - self.send_header("Content-type", ctype) - - if last is None or last >= file_len: - last = file_len - 1 - response_length = last - first + 1 - - self.send_header("Content-Range", "bytes %s-%s/%s" % (first, last, file_len)) - self.send_header("Content-Length", str(response_length)) - self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) - self.end_headers() - return f - - def copyfile(self, source, outputfile): - if not self.range: - return SimpleHTTPRequestHandler.copyfile(self, source, outputfile) - - # SimpleHTTPRequestHandler uses shutil.copyfileobj, which doesn't let - # you stop the copying before the end of the file. - start, stop = self.range # set in send_head() - copy_byte_range(source, outputfile, start, stop) - - def end_headers(self): - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header("Access-Control-Allow-Methods", "GET, OPTIONS") - self.send_header("Access-Control-Expose-Headers", "*") - self.send_header("Accept-Ranges", "bytes") - self.send_header("Content-Type", "application/octet-stream") - SimpleHTTPRequestHandler.end_headers(self) - - def translate_path(self, path): - path = SimpleHTTPRequestHandler.translate_path(self, path) - relpath = os.path.relpath(path, os.getcwd()) - fullpath = os.path.join(self.server.base_path, relpath) - return fullpath + """ + Creating a small HTTP request server + """ + + def send_head(self): + if "Range" not in self.headers: + self.range = None + return SimpleHTTPRequestHandler.send_head(self) + try: + self.range = parse_byte_range(self.headers["Range"]) + except ValueError: + self.send_error(400, "Invalid byte range") + return None + first, last = self.range + + # Mirroring SimpleHTTPServer.py here + path = self.translate_path(self.path) + f = None + ctype = self.guess_type(path) + try: + f = open(path, "rb") + except IOError: + self.send_error(404, "File not found") + return None + + fs = os.fstat(f.fileno()) + file_len = fs[6] + if first >= file_len: + self.send_error(416, "Requested Range Not Satisfiable") + return None + + self.send_response(206) + self.send_header("Content-type", ctype) + + if last is None or last >= file_len: + last = file_len - 1 + response_length = last - first + 1 + + self.send_header("Content-Range", "bytes %s-%s/%s" % (first, last, file_len)) + self.send_header("Content-Length", str(response_length)) + self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) + self.end_headers() + return f + + def copyfile(self, source, outputfile): + if not self.range: + return SimpleHTTPRequestHandler.copyfile(self, source, outputfile) + + # SimpleHTTPRequestHandler uses shutil.copyfileobj, which doesn't let + # you stop the copying before the end of the file. + start, stop = self.range # set in send_head() + copy_byte_range(source, outputfile, start, stop) + + def end_headers(self): + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET, OPTIONS") + self.send_header("Access-Control-Expose-Headers", "*") + self.send_header("Accept-Ranges", "bytes") + self.send_header("Content-Type", "application/octet-stream") + SimpleHTTPRequestHandler.end_headers(self) + + def translate_path(self, path): + path = SimpleHTTPRequestHandler.translate_path(self, path) + relpath = os.path.relpath(path, os.getcwd()) + fullpath = os.path.join(self.server.base_path, relpath) + return fullpath class DevServer(HTTPServer): - def __init__( - self, base_path, server_address, RequestHandlerClass=CustomRequestHandler - ): - self.base_path = base_path - HTTPServer.__init__(self, server_address, RequestHandlerClass) + def __init__( + self, base_path, server_address, RequestHandlerClass=CustomRequestHandler + ): + self.base_path = base_path + HTTPServer.__init__(self, server_address, RequestHandlerClass) def serve(data_path, **kwargs): - """ - Launches a development http server. It is not recommended - for production. - - e.g - serve('./path/to/data', port=8080, host='localhost') - - :param str data_path: path to file directory to serve - defaults to the current working dir - :param int port: (optional) port to utilize when running - the dev server, defaults to 8080 - :param str host: (optional) host to utilize when running - the dev server, default to localhost - """ - print("=============================================") - print( - "Warning: \n" - "This is a development environment.\n" - "This is not recommended for production." - ) - port = kwargs.get("port", 8080) - host = kwargs.get("host", "localhost") - # data_path = kwargs.get('path', ".") - # print('data', data_path) - # dir_path = os.path.join(os.path.dirname(__file__), data_path) - # print('dir path', dir_path) - # print('relative ', os.path.relpath(data_path, os.getcwd())) - # print('join', os.path.join(os.getcwd(), data_path)) - httpd = DevServer(data_path, (host, port)) - server = f"http://{host}:{port}" - print("=============================================") - print(f'Server is now running at \n "{server}"') - httpd.serve_forever() + """ + Launches a development http server. It is not recommended + for production. + + e.g + serve('./path/to/data', port=8080, host='localhost') + + :param str data_path: path to file directory to serve + defaults to the current working dir + :param int port: (optional) port to utilize when running + the dev server, defaults to 8080 + :param str host: (optional) host to utilize when running + the dev server, default to localhost + """ + print("=============================================") + print( + "Warning: \n" + "This is a development environment.\n" + "This is not recommended for production." + ) + port = kwargs.get("port", 8080) + host = kwargs.get("host", "localhost") + # data_path = kwargs.get('path', ".") + # print('data', data_path) + # dir_path = os.path.join(os.path.dirname(__file__), data_path) + # print('dir path', dir_path) + # print('relative ', os.path.relpath(data_path, os.getcwd())) + # print('join', os.path.join(os.getcwd(), data_path)) + httpd = DevServer(data_path, (host, port)) + server = f"http://{host}:{port}" + print("=============================================") + print(f'Server is now running at \n "{server}"') + httpd.serve_forever() diff --git a/jbrowse_jupyter/jbrowse_config.py b/jbrowse_jupyter/jbrowse_config.py index a1bc2d6..390d3a2 100644 --- a/jbrowse_jupyter/jbrowse_config.py +++ b/jbrowse_jupyter/jbrowse_config.py @@ -1,791 +1,791 @@ import IPython from jbrowse_jupyter.util import ( - is_url, - get_default, - guess_file_name, - get_name, + is_url, + get_default, + guess_file_name, + get_name, ) from jbrowse_jupyter.tracks import ( - guess_adapter_type, - guess_track_type, - check_track_data, - get_from_config_adapter, - guess_display_type, - make_url_colab_jupyter, + guess_adapter_type, + guess_track_type, + check_track_data, + get_from_config_adapter, + guess_display_type, + make_url_colab_jupyter, ) def create(view_type="LGV", **kwargs): - """ - Creates a JBrowseConfig given a view type. - - - create(): creates empty LGV JBrowseConfig - - create("CGV"):creates empty CGV JBrowseConfig - - create("LGV", genome="hg19"):creates LGV JBrowseConfig w/ default genome - - create("CGV", conf={"k": "v"}):creates CGV JBrowseConfig w/ a conf obj - - :param str view_type: the type of view ('LGV' or 'CGV'), defaults to LGV - :param str genome: genome ('hg19' or 'hg38') - :return: JBrowseConfig - :rtype: JBrowseConfig instance - :raises TypeError: if genome passed is not hg19 or hg38 - :raises TypeError: if view type is not `LGV` or `CGV` - """ - available_genomes = {"hg19", "hg38"} - conf = kwargs.get("conf", {}) - genome = kwargs.get("genome", "empty") - view = view_type - # view type (LGV or CGV) - # make it backwards compatible - if view_type == "view" or view_type == "conf": - view = "LGV" - if view != "LGV" and view != "CGV": - raise TypeError(f"Currently not supporting view_type: {view}.") - # configuration - # 1) genomes available - # 2) with own conf obj OR - # 3) empty default config to customize) - no_configuration = genome != "empty" and not conf - # Check passed genome is available - message1 = "is not a valid default genome to view" - message2 = "Choose from hg19 or hg38 or pass your own conf" - if genome not in available_genomes and no_configuration: - raise TypeError(f'"{genome}" {message1}.{message2}.') - # genome - if genome in available_genomes: - conf = get_default(genome, view) - # start from empty JBrowseConfig - elif not conf: - return JBrowseConfig(view=view) - # get customized JBrowseConfig - return JBrowseConfig(view=view, conf=conf) + """ + Creates a JBrowseConfig given a view type. + + - create(): creates empty LGV JBrowseConfig + - create("CGV"):creates empty CGV JBrowseConfig + - create("LGV", genome="hg19"):creates LGV JBrowseConfig w/ default genome + - create("CGV", conf={"k": "v"}):creates CGV JBrowseConfig w/ a conf obj + + :param str view_type: the type of view ('LGV' or 'CGV'), defaults to LGV + :param str genome: genome ('hg19' or 'hg38') + :return: JBrowseConfig + :rtype: JBrowseConfig instance + :raises TypeError: if genome passed is not hg19 or hg38 + :raises TypeError: if view type is not `LGV` or `CGV` + """ + available_genomes = {"hg19", "hg38"} + conf = kwargs.get("conf", {}) + genome = kwargs.get("genome", "empty") + view = view_type + # view type (LGV or CGV) + # make it backwards compatible + if view_type == "view" or view_type == "conf": + view = "LGV" + if view != "LGV" and view != "CGV": + raise TypeError(f"Currently not supporting view_type: {view}.") + # configuration + # 1) genomes available + # 2) with own conf obj OR + # 3) empty default config to customize) + no_configuration = genome != "empty" and not conf + # Check passed genome is available + message1 = "is not a valid default genome to view" + message2 = "Choose from hg19 or hg38 or pass your own conf" + if genome not in available_genomes and no_configuration: + raise TypeError(f'"{genome}" {message1}.{message2}.') + # genome + if genome in available_genomes: + conf = get_default(genome, view) + # start from empty JBrowseConfig + elif not conf: + return JBrowseConfig(view=view) + # get customized JBrowseConfig + return JBrowseConfig(view=view, conf=conf) class JBrowseConfig: + """ + Creates JBrowse configuration objects. + Currently supporting configuration objects for the + React JBrowse Linear Genome View and React JBrowse + Circular Genome View + https://jbrowse.org/storybook/lgv/main + https://jbrowse.org/storybook/cgv/main/ + + """ + + def __init__(self, view="LGV", conf=None): """ - Creates JBrowse configuration objects. - Currently supporting configuration objects for the - React JBrowse Linear Genome View and React JBrowse - Circular Genome View - https://jbrowse.org/storybook/lgv/main - https://jbrowse.org/storybook/cgv/main/ + Initializes class. + :param str view: LGV or CGV + defaults to LGV + :param obj conf: optional conf obj """ + in_colab_notebook = False + in_jupyter_notebook = False + try: + import google.colab.output # noqa + + in_colab_notebook = True + except: # noqa + in_colab_notebook = False + try: + shell = IPython.get_ipython().__class__.__name__ # noqa + if shell == "ZMQInteractiveShell": # noqa + in_jupyter_notebook = True + else: + in_jupyter_notebook = False + except: # noqa + in_jupyter_notebook = False + # ===================== + view_default = { + "id": "linearGenomeView", + "type": "LinearGenomeView", + "tracks": [], + } + if view != "LGV" and view == "CGV": + view_default = {"id": "circularView", "type": "CircularView", "tracks": []} + default = { + "assembly": {}, + "tracks": [], + "defaultSession": {"name": "default-session", "view": view_default}, + "aggregateTextSearchAdapters": [], + "location": "", + "configuration": {"theme": {}}, + } + if conf is not None: + for r in default.keys(): + if r not in conf: + conf[r] = default[r] + self.config = default if conf is None else conf + if conf is not None: + ids = {x["trackId"]: x for x in conf["tracks"]} + self.tracks_ids_map = ids + self.tracks_ids_map = {} + self.view = view + # environment + self.nb_port = 8888 + self.nb_host = "localhost" + self.colab = in_colab_notebook + self.jupyter = not in_colab_notebook and in_jupyter_notebook + + def get_config(self): + """ + Returns the configuration object of the JBrowseConfig + instance. This object can then be passed to launch or + create_component to launch or create a Dash JBrowse + component - def __init__(self, view="LGV", conf=None): - """ - Initializes class. + e.g: create("LGV", genome="hg19").get_config() - :param str view: LGV or CGV - defaults to LGV - :param obj conf: optional conf obj - """ - in_colab_notebook = False - in_jupyter_notebook = False - try: - import google.colab.output # noqa - - in_colab_notebook = True - except: # noqa - in_colab_notebook = False - try: - shell = IPython.get_ipython().__class__.__name__ # noqa - if shell == "ZMQInteractiveShell": # noqa - in_jupyter_notebook = True - else: - in_jupyter_notebook = False - except: # noqa - in_jupyter_notebook = False - # ===================== - view_default = { - "id": "linearGenomeView", - "type": "LinearGenomeView", - "tracks": [], + :return: returns configuration object + :rtype: obj + """ + return self.config + + def get_colab(self): + return self.colab + + def get_jupyter(self): + return self.jupyter + + def get_env(self): + print("notebook port: ", self.nb_port) + print("notebook host: ", self.nb_host) + return self.nb_host, self.nb_port + + def set_env(self, notebook_host="localhost", notebook_port=8888): + """ + Changes the port and the host for creating links to files + found within the file tree of jupyter. + + We want to be able to use paths to local files that can be + accessed within the file tree of jupyter notebook and jupyter + lab. The port and host should match those configured in your + jupyter config. + + You can set_env after creating your view. + browser = create("LGV") + browser.set_env("localhost", 8989) + + :param str notebook_host: host used in jupyter config for + for using paths to local files. (Defaults to "localhost") + :param str notebook_port: port used in jupyter config for + for using paths to local files. (Defaults to 8888) + """ + self.nb_port = notebook_port + self.nb_host = notebook_host + + # ========== Assembly =========== + def get_assembly(self): + # Returns the JBrowseConfig assembly subconfiguration object + return self.config["assembly"] + + def get_assembly_name(self): + # Returns the assembly name. + assembly_error = ( + "Can not get assembly name. " "Please configure the assembly first." + ) + if self.get_assembly(): + return self.get_assembly()["name"] + else: + raise Exception(assembly_error) + + def set_assembly(self, assembly_data, **kwargs): + """ + Sets the assembly. + + Assumes assembly_data.fai exists for fasta assembly data + that is not bgzipped. + + Assumes assembly_data.fai and assembly_data.gzi exist for + bgzipped assembly data. + + e.g set_assembly("url/assembly.fasta.gz", overwrite=True) + assumes + "url/assembly.fasta.gz.fai" and + "url/assembly.fasta.gz.gzi" also exist + + For configuring assemblies check out our config docs + https://jbrowse.org/jb2/docs/config_guide/#assembly-config + + :param str assembly_data: url/path to the sequence data + :param str name: (optional) name for the assembly, + defaults to name generated from assembly_data file name + :param list aliases: (optional) list of aliases for the assembly + :param obj refname_aliases: (optional) config for refname aliases. + :param str overwrite: flag whether or not to overwrite + existing assembly, default to False. + :raises TypeError: Paths are only supported in jupyter. + :raises TypeError: adapter used for file type is not supported or + recognized + """ + overwrite = kwargs.get("overwrite", False) + indx = kwargs.get("index", "defaultIndex") + err = "assembly is already set, set overwrite to True to overwrite" + if self.get_assembly() and not overwrite: + raise TypeError(err) + aliases = kwargs.get("aliases", []) + refname_aliases = kwargs.get("refname_aliases", {}) + if is_url(assembly_data): + if indx != "defaultIndex": + if not is_url(indx) and not self.jupyter: + raise TypeError( + f"Path for {assembly_data} " + "is used in an unsupported environment." + "Paths are supported in Jupyter" + " notebooks and Jupyter lab." + "Please use a url for your assembly " + "data. You can check out our local " + "file support docs for more information" + ) + assembly_adapter = guess_adapter_type(assembly_data, "uri", indx, **kwargs) + name = kwargs.get("name", get_name(assembly_data)) + if assembly_adapter["type"] == "UNKNOWN": + raise TypeError("Adapter type is not recognized") + if assembly_adapter["type"] == "UNSUPPORTED": + raise TypeError("Adapter type is not supported") + assembly_config = { + "name": name, + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": f"{name}-ReferenceSequenceTrack", + "adapter": assembly_adapter, + }, + "aliases": aliases, + "refNameAliases": refname_aliases, + } + self.config["assembly"] = assembly_config + else: + if not self.jupyter: + raise TypeError( + f"Path {assembly_data} for assembly data " + "is used in an unsupported environment." + "Paths are supported in Jupyter notebooks" + " and Jupyter lab.Please use a url for " + "your assembly data. You can check out " + "our local file support docs for more " + "information" + ) + if indx != "defaultIndex" and not is_url(indx): + if not self.jupyter: + raise TypeError( + "Paths are used in an " + "unsupported environment." + "Paths are supported in Jupyter" + " notebooks and Jupyter lab." + "Please use a urls for your assembly" + " and index data. You can check out " + "our local file support docs for more" + " information" + ) + assembly_adapter = guess_adapter_type( + assembly_data, + "localPath", + indx, + colab=self.colab, + nb_port=self.nb_port, + nb_host=self.nb_host, + ) + name = kwargs.get("name", get_name(assembly_data)) + if assembly_adapter["type"] == "UNKNOWN": + raise TypeError("Adapter type is not recognized") + if assembly_adapter["type"] == "UNSUPPORTED": + raise TypeError("Adapter type is not supported") + assembly_config = { + "name": name, + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": f"{name}-ReferenceSequenceTrack", + "adapter": assembly_adapter, + }, + "aliases": aliases, + "refNameAliases": refname_aliases, + } + self.config["assembly"] = assembly_config + + # ============ Tracks ============= + + def get_reference_track(self): + # Returns the reference track for the configured assembly. + assembly_name = self.get_assembly_name() + configuration = f"{assembly_name}-ReferenceSequenceTrack" + conf_str = f"{configuration}-LinearReferenceSequenceDisplay" + return { + "type": "ReferenceSequenceTrack", + "configuration": configuration, + "displays": [ + { + "type": "LinearReferenceSequenceDisplay", + "configuration": conf_str, } - if view != "LGV" and view == "CGV": - view_default = {"id": "circularView", "type": "CircularView", "tracks": []} - default = { - "assembly": {}, - "tracks": [], - "defaultSession": {"name": "default-session", "view": view_default}, - "aggregateTextSearchAdapters": [], - "location": "", - "configuration": {"theme": {}}, + ], + } + + def get_track_display(self, track): + # Returns the track display subconfiguration. + track_type = track["type"] + track_id = track["trackId"] + display_type = guess_display_type(track_type, self.view) + return { + "type": track_type, + "configuration": track_id, + "displays": [ + {"type": display_type, "configuration": f"{track_id}-{display_type}"} + ], + } + + def get_track(self, track_name): + # Return the list of track configurations with that name + tracks = [track for track in self.get_tracks() if track["name"] == track_name] + return tracks + + def get_tracks(self): + # Returns list of tracks in the configuration. + return self.config["tracks"] + + def add_df_track(self, track_data, name, **kwargs): + """ + Adds track from a pandas DataFrame. If the score column + is present, it will create a Quantitative track else it + will create a Feature track. + + Requires DataFrame to have columns named 'refName', + 'start', 'end', and 'name' + + Requires refName and name columns to be of type str and + start, end, and score to be int + + e.g: + add_df_track(df, "track_name") + + :param track_data: panda DataFrame with the track data. + :param str name: name for the track. + :param str track_id: (optional) trackId for the track + :param str overwrite: flag whether or not to overwrite existing track. + :raises Exception: if assembly has not been configured. + :raises TypeError: if track data is invalid + :raises TypeError: if view is not LGV + :raises TypeError: if track with that trackId already exists + list of tracks + """ + if not self.get_assembly(): + raise Exception("Please set the assembly before adding a track.") + if self.view != "LGV": + raise TypeError("Can not add a data frame track to a CGV conf.") + check_track_data(track_data) + + overwrite = kwargs.get("overwrite", False) + assembly_name = self.get_assembly_name() + track_id = kwargs.get("track_id", f"{assembly_name}-{name}") + current_tracks = self.config["tracks"] + # if score column is present => QuantitativeTrack, else FeatureTrack + track_type = "FeatureTrack" + if "score" in track_data: + track_type = "QuantitativeTrack" + + adapter = get_from_config_adapter(track_data) + df_track_config = { + "type": track_type, + "trackId": track_id, + "name": name, + "assemblyNames": [assembly_name], + "adapter": adapter, + } + err = ( + f'track with trackId: "{track_id}" already exists in config.', + "Set overwrite to True if you want to overwrite it.", + ) + if track_id in self.tracks_ids_map.keys() and not overwrite: + raise TypeError(err) + if track_id in self.tracks_ids_map.keys() and overwrite: + # delete track and overwrite it + current_tracks = [t for t in current_tracks if t["trackId"] != track_id] + + current_tracks.append(df_track_config) + self.config["tracks"] = current_tracks + self.tracks_ids_map[track_id] = df_track_config + + def add_track(self, data, **kwargs): + """ + Adds a track subconfiguration to the list of tracks + in the config. + + if an index is not provided, it will assume an index file + with the same name can be found in the directory of the + track data + + e.g: + add_track("url.bam") + assumes "url.bam.bai" also exists + + :param str data: track file url/path + :param str name: (optional) name for the track + (defaults to data filename) + :param str track_id: (optional) trackId for the track + :param str index: (optional) file url/path for the track + :param str track_type: (optional) track type + :param boolean overwrite: (optional) defaults to False + :raises Exception: if assembly has not been configured + :raises TypeError: if track data is not provided + :raises TypeError: if track type is not supported + :raises TypeError: Paths are only supported in jupyter. + """ + if not data: + raise TypeError("Track data is required. None was provided.") + if not self.get_assembly(): + raise Exception("Please set the assembly before adding a track.") + + assembly_names = [self.get_assembly_name()] + name = kwargs.get("name", guess_file_name(data)) + index = kwargs.get("index", "defaultIndex") + overwrite = kwargs.get("overwrite", False) + current_tracks = self.get_tracks() + if is_url(data): + # default to uri protocol until local files enabled + if not is_url(index) and index != "defaultIndex": + if not self.jupyter: + raise TypeError( + f"Path {index} for index is used in an " + "unsupported environment. Paths are " + "supported in Jupyter notebooks and Jupy" + "ter lab.Please use a url for your " + "assembly data. You can check out " + "our local file support docs for more " + "information" + ) + else: + adapter = guess_adapter_type( + data, + "localPath", + index, + colab=self.colab, + nb_port=self.nb_port, + nb_host=self.nb_host, + ) + else: + adapter = guess_adapter_type(data, "uri", index) + # adapter = guess_adapter_type(data, 'uri', index) + if adapter["type"] == "UNKNOWN": + raise TypeError("Adapter type is not recognized") + if adapter["type"] == "UNSUPPORTED": + raise TypeError("Adapter type is not supported") + # get sequence adapter for cram adapter track + if adapter["type"] == "CramAdapter": + extra_config = self.get_assembly()["sequence"]["adapter"] + adapter["sequenceAdapter"] = extra_config + t_type = kwargs.get("track_type", guess_track_type(adapter["type"])) + supported_track_types = set( + { + "AlignmentsTrack", + "QuantitativeTrack", + "VariantTrack", + "FeatureTrack", + "ReferenceSequenceTrack", } - if conf is not None: - for r in default.keys(): - if r not in conf: - conf[r] = default[r] - self.config = default if conf is None else conf - if conf is not None: - ids = {x["trackId"]: x for x in conf["tracks"]} - self.tracks_ids_map = ids - self.tracks_ids_map = {} - self.view = view - # environment - self.nb_port = 8888 - self.nb_host = "localhost" - self.colab = in_colab_notebook - self.jupyter = not in_colab_notebook and in_jupyter_notebook - - def get_config(self): - """ - Returns the configuration object of the JBrowseConfig - instance. This object can then be passed to launch or - create_component to launch or create a Dash JBrowse - component - - e.g: create("LGV", genome="hg19").get_config() - - :return: returns configuration object - :rtype: obj - """ - return self.config - - def get_colab(self): - return self.colab - - def get_jupyter(self): - return self.jupyter - - def get_env(self): - print("notebook port: ", self.nb_port) - print("notebook host: ", self.nb_host) - return self.nb_host, self.nb_port - - def set_env(self, notebook_host="localhost", notebook_port=8888): - """ - Changes the port and the host for creating links to files - found within the file tree of jupyter. - - We want to be able to use paths to local files that can be - accessed within the file tree of jupyter notebook and jupyter - lab. The port and host should match those configured in your - jupyter config. - - You can set_env after creating your view. - browser = create("LGV") - browser.set_env("localhost", 8989) - - :param str notebook_host: host used in jupyter config for - for using paths to local files. (Defaults to "localhost") - :param str notebook_port: port used in jupyter config for - for using paths to local files. (Defaults to 8888) - """ - self.nb_port = notebook_port - self.nb_host = notebook_host - - # ========== Assembly =========== - def get_assembly(self): - # Returns the JBrowseConfig assembly subconfiguration object - return self.config["assembly"] - - def get_assembly_name(self): - # Returns the assembly name. - assembly_error = ( - "Can not get assembly name. " "Please configure the assembly first." + ) + if t_type not in supported_track_types: + raise TypeError(f'Track type: "{t_type}" is not supported.') + default_track_id = f"{self.get_assembly_name()}-{name}" + track_id = kwargs.get("track_id", default_track_id) + track_config = { + "type": t_type, + "trackId": track_id, + "name": name, + "assemblyNames": assembly_names, + "adapter": adapter, + } + if track_id in self.tracks_ids_map.keys() and not overwrite: + raise TypeError( + ( + f'track with trackId: "{track_id}" already exists in' + f"config. Set overwrite to True to overwrite it." + ) ) - if self.get_assembly(): - return self.get_assembly()["name"] - else: - raise Exception(assembly_error) - - def set_assembly(self, assembly_data, **kwargs): - """ - Sets the assembly. - - Assumes assembly_data.fai exists for fasta assembly data - that is not bgzipped. - - Assumes assembly_data.fai and assembly_data.gzi exist for - bgzipped assembly data. - - e.g set_assembly("url/assembly.fasta.gz", overwrite=True) - assumes - "url/assembly.fasta.gz.fai" and - "url/assembly.fasta.gz.gzi" also exist - - For configuring assemblies check out our config docs - https://jbrowse.org/jb2/docs/config_guide/#assembly-config - - :param str assembly_data: url/path to the sequence data - :param str name: (optional) name for the assembly, - defaults to name generated from assembly_data file name - :param list aliases: (optional) list of aliases for the assembly - :param obj refname_aliases: (optional) config for refname aliases. - :param str overwrite: flag whether or not to overwrite - existing assembly, default to False. - :raises TypeError: Paths are only supported in jupyter. - :raises TypeError: adapter used for file type is not supported or - recognized - """ - overwrite = kwargs.get("overwrite", False) - indx = kwargs.get("index", "defaultIndex") - err = "assembly is already set, set overwrite to True to overwrite" - if self.get_assembly() and not overwrite: - raise TypeError(err) - aliases = kwargs.get("aliases", []) - refname_aliases = kwargs.get("refname_aliases", {}) - if is_url(assembly_data): - if indx != "defaultIndex": - if not is_url(indx) and not self.jupyter: - raise TypeError( - f"Path for {assembly_data} " - "is used in an unsupported environment." - "Paths are supported in Jupyter" - " notebooks and Jupyter lab." - "Please use a url for your assembly " - "data. You can check out our local " - "file support docs for more information" - ) - assembly_adapter = guess_adapter_type(assembly_data, "uri", indx, **kwargs) - name = kwargs.get("name", get_name(assembly_data)) - if assembly_adapter["type"] == "UNKNOWN": - raise TypeError("Adapter type is not recognized") - if assembly_adapter["type"] == "UNSUPPORTED": - raise TypeError("Adapter type is not supported") - assembly_config = { - "name": name, - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": f"{name}-ReferenceSequenceTrack", - "adapter": assembly_adapter, - }, - "aliases": aliases, - "refNameAliases": refname_aliases, - } - self.config["assembly"] = assembly_config - else: - if not self.jupyter: - raise TypeError( - f"Path {assembly_data} for assembly data " - "is used in an unsupported environment." - "Paths are supported in Jupyter notebooks" - " and Jupyter lab.Please use a url for " - "your assembly data. You can check out " - "our local file support docs for more " - "information" - ) - if indx != "defaultIndex" and not is_url(indx): - if not self.jupyter: - raise TypeError( - "Paths are used in an " - "unsupported environment." - "Paths are supported in Jupyter" - " notebooks and Jupyter lab." - "Please use a urls for your assembly" - " and index data. You can check out " - "our local file support docs for more" - " information" - ) - assembly_adapter = guess_adapter_type( - assembly_data, - "localPath", - indx, - colab=self.colab, - nb_port=self.nb_port, - nb_host=self.nb_host, - ) - name = kwargs.get("name", get_name(assembly_data)) - if assembly_adapter["type"] == "UNKNOWN": - raise TypeError("Adapter type is not recognized") - if assembly_adapter["type"] == "UNSUPPORTED": - raise TypeError("Adapter type is not supported") - assembly_config = { - "name": name, - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": f"{name}-ReferenceSequenceTrack", - "adapter": assembly_adapter, - }, - "aliases": aliases, - "refNameAliases": refname_aliases, - } - self.config["assembly"] = assembly_config - - # ============ Tracks ============= - - def get_reference_track(self): - # Returns the reference track for the configured assembly. - assembly_name = self.get_assembly_name() - configuration = f"{assembly_name}-ReferenceSequenceTrack" - conf_str = f"{configuration}-LinearReferenceSequenceDisplay" - return { - "type": "ReferenceSequenceTrack", - "configuration": configuration, - "displays": [ - { - "type": "LinearReferenceSequenceDisplay", - "configuration": conf_str, - } - ], + if track_id in self.tracks_ids_map.keys() and overwrite: + current_tracks = [t for t in current_tracks if t["trackId"] != track_id] + + current_tracks.append(track_config) + self.config["tracks"] = current_tracks + self.tracks_ids_map[track_id] = track_config + else: + if not self.jupyter: + raise TypeError( + f"Path {data} for track data " + "is used in an unsupported environment." + "Paths are supported in Jupyter notebooks" + " and Jupyter lab.Please use a url for " + "your assembly data. You can check out " + "our local file support docs for more " + "information" + ) + if not is_url(index) and index != "defaultIndex": + if not self.jupyter: + raise TypeError( + f"Path {index} for index is used in an " + "unsupported environment.Paths are " + "supported in Jupyter notebooks and Jupyte" + "r lab.Please use a url for your assembly " + "data. You can check out our local file " + "support docs for more information" + ) + adapter = guess_adapter_type( + data, + "localPath", + index, + colab=self.colab, + nb_port=self.nb_port, + nb_host=self.nb_host, + ) + if adapter["type"] == "UNKNOWN": + raise TypeError("Adapter type is not recognized") + if adapter["type"] == "UNSUPPORTED": + raise TypeError("Adapter type is not supported") + # get sequence adapter for cram adapter track + if adapter["type"] == "CramAdapter": + extra_config = self.get_assembly()["sequence"]["adapter"] + adapter["sequenceAdapter"] = extra_config + t_type = kwargs.get("track_type", guess_track_type(adapter["type"])) + supported_track_types = set( + { + "AlignmentsTrack", + "QuantitativeTrack", + "VariantTrack", + "FeatureTrack", + "ReferenceSequenceTrack", } + ) + if t_type not in supported_track_types: + raise TypeError(f'Track type: "{t_type}" is not supported.') + default_track_id = f"{self.get_assembly_name()}-{name}" + track_id = kwargs.get("track_id", default_track_id) + track_config = { + "type": t_type, + "trackId": track_id, + "name": name, + "assemblyNames": assembly_names, + "adapter": adapter, + } + if track_id in self.tracks_ids_map.keys() and not overwrite: + raise TypeError( + ( + f'track with trackId: "{track_id}" already exists in' + f"config. Set overwrite to True to overwrite it." + ) + ) + if track_id in self.tracks_ids_map.keys() and overwrite: + current_tracks = [t for t in current_tracks if t["trackId"] != track_id] - def get_track_display(self, track): - # Returns the track display subconfiguration. - track_type = track["type"] - track_id = track["trackId"] - display_type = guess_display_type(track_type, self.view) - return { - "type": track_type, - "configuration": track_id, - "displays": [ - {"type": display_type, "configuration": f"{track_id}-{display_type}"} - ], - } + current_tracks.append(track_config) + self.config["tracks"] = current_tracks + self.tracks_ids_map[track_id] = track_config - def get_track(self, track_name): - # Return the list of track configurations with that name - tracks = [track for track in self.get_tracks() if track["name"] == track_name] - return tracks - - def get_tracks(self): - # Returns list of tracks in the configuration. - return self.config["tracks"] - - def add_df_track(self, track_data, name, **kwargs): - """ - Adds track from a pandas DataFrame. If the score column - is present, it will create a Quantitative track else it - will create a Feature track. - - Requires DataFrame to have columns named 'refName', - 'start', 'end', and 'name' - - Requires refName and name columns to be of type str and - start, end, and score to be int - - e.g: - add_df_track(df, "track_name") - - :param track_data: panda DataFrame with the track data. - :param str name: name for the track. - :param str track_id: (optional) trackId for the track - :param str overwrite: flag whether or not to overwrite existing track. - :raises Exception: if assembly has not been configured. - :raises TypeError: if track data is invalid - :raises TypeError: if view is not LGV - :raises TypeError: if track with that trackId already exists - list of tracks - """ - if not self.get_assembly(): - raise Exception("Please set the assembly before adding a track.") - if self.view != "LGV": - raise TypeError("Can not add a data frame track to a CGV conf.") - check_track_data(track_data) - - overwrite = kwargs.get("overwrite", False) - assembly_name = self.get_assembly_name() - track_id = kwargs.get("track_id", f"{assembly_name}-{name}") - current_tracks = self.config["tracks"] - # if score column is present => QuantitativeTrack, else FeatureTrack - track_type = "FeatureTrack" - if "score" in track_data: - track_type = "QuantitativeTrack" - - adapter = get_from_config_adapter(track_data) - df_track_config = { - "type": track_type, - "trackId": track_id, - "name": name, - "assemblyNames": [assembly_name], - "adapter": adapter, + def delete_track(self, track_id): + """ + Deletes a track from the config. + + e.g: + delete_track("test_track_id") + + Will delete track with track_id test_track_id. + + :param str track_id: track_id to delete + + :raises TypeError: if track_id provided does not exist + """ + new_tracks = [] + current_tracks = self.get_tracks() + if track_id not in self.tracks_ids_map.keys(): + raise TypeError( + (f'track with trackId: "{track_id}" does not exist in' f"config.") + ) + else: + new_tracks = [t for t in current_tracks if t["trackId"] != track_id] + self.config["tracks"] = new_tracks + # clear from default session + default_sess = self.get_default_session() + tracks_sess = default_sess["view"]["tracks"] + new_tracks_sess = [t for t in tracks_sess if t["configuration"] != track_id] + if self.view == "CGV": + self.config["defaultSession"] = { + "name": "my session", + "view": { + "id": "circularView", + "type": "CircularView", + "tracks": new_tracks_sess, + }, } - err = ( - f'track with trackId: "{track_id}" already exists in config.', - "Set overwrite to True if you want to overwrite it.", - ) - if track_id in self.tracks_ids_map.keys() and not overwrite: - raise TypeError(err) - if track_id in self.tracks_ids_map.keys() and overwrite: - # delete track and overwrite it - current_tracks = [t for t in current_tracks if t["trackId"] != track_id] - - current_tracks.append(df_track_config) - self.config["tracks"] = current_tracks - self.tracks_ids_map[track_id] = df_track_config - - def add_track(self, data, **kwargs): - """ - Adds a track subconfiguration to the list of tracks - in the config. - - if an index is not provided, it will assume an index file - with the same name can be found in the directory of the - track data - - e.g: - add_track("url.bam") - assumes "url.bam.bai" also exists - - :param str data: track file url/path - :param str name: (optional) name for the track - (defaults to data filename) - :param str track_id: (optional) trackId for the track - :param str index: (optional) file url/path for the track - :param str track_type: (optional) track type - :param boolean overwrite: (optional) defaults to False - :raises Exception: if assembly has not been configured - :raises TypeError: if track data is not provided - :raises TypeError: if track type is not supported - :raises TypeError: Paths are only supported in jupyter. - """ - if not data: - raise TypeError("Track data is required. None was provided.") - if not self.get_assembly(): - raise Exception("Please set the assembly before adding a track.") - - assembly_names = [self.get_assembly_name()] - name = kwargs.get("name", guess_file_name(data)) - index = kwargs.get("index", "defaultIndex") - overwrite = kwargs.get("overwrite", False) - current_tracks = self.get_tracks() - if is_url(data): - # default to uri protocol until local files enabled - if not is_url(index) and index != "defaultIndex": - if not self.jupyter: - raise TypeError( - f"Path {index} for index is used in an " - "unsupported environment. Paths are " - "supported in Jupyter notebooks and Jupy" - "ter lab.Please use a url for your " - "assembly data. You can check out " - "our local file support docs for more " - "information" - ) - else: - adapter = guess_adapter_type( - data, - "localPath", - index, - colab=self.colab, - nb_port=self.nb_port, - nb_host=self.nb_host, - ) - else: - adapter = guess_adapter_type(data, "uri", index) - # adapter = guess_adapter_type(data, 'uri', index) - if adapter["type"] == "UNKNOWN": - raise TypeError("Adapter type is not recognized") - if adapter["type"] == "UNSUPPORTED": - raise TypeError("Adapter type is not supported") - # get sequence adapter for cram adapter track - if adapter["type"] == "CramAdapter": - extra_config = self.get_assembly()["sequence"]["adapter"] - adapter["sequenceAdapter"] = extra_config - t_type = kwargs.get("track_type", guess_track_type(adapter["type"])) - supported_track_types = set( - { - "AlignmentsTrack", - "QuantitativeTrack", - "VariantTrack", - "FeatureTrack", - "ReferenceSequenceTrack", - } - ) - if t_type not in supported_track_types: - raise TypeError(f'Track type: "{t_type}" is not supported.') - default_track_id = f"{self.get_assembly_name()}-{name}" - track_id = kwargs.get("track_id", default_track_id) - track_config = { - "type": t_type, - "trackId": track_id, - "name": name, - "assemblyNames": assembly_names, - "adapter": adapter, - } - if track_id in self.tracks_ids_map.keys() and not overwrite: - raise TypeError( - ( - f'track with trackId: "{track_id}" already exists in' - f"config. Set overwrite to True to overwrite it." - ) - ) - if track_id in self.tracks_ids_map.keys() and overwrite: - current_tracks = [t for t in current_tracks if t["trackId"] != track_id] - - current_tracks.append(track_config) - self.config["tracks"] = current_tracks - self.tracks_ids_map[track_id] = track_config - else: - if not self.jupyter: - raise TypeError( - f"Path {data} for track data " - "is used in an unsupported environment." - "Paths are supported in Jupyter notebooks" - " and Jupyter lab.Please use a url for " - "your assembly data. You can check out " - "our local file support docs for more " - "information" - ) - if not is_url(index) and index != "defaultIndex": - if not self.jupyter: - raise TypeError( - f"Path {index} for index is used in an " - "unsupported environment.Paths are " - "supported in Jupyter notebooks and Jupyte" - "r lab.Please use a url for your assembly " - "data. You can check out our local file " - "support docs for more information" - ) - adapter = guess_adapter_type( - data, - "localPath", - index, - colab=self.colab, - nb_port=self.nb_port, - nb_host=self.nb_host, - ) - if adapter["type"] == "UNKNOWN": - raise TypeError("Adapter type is not recognized") - if adapter["type"] == "UNSUPPORTED": - raise TypeError("Adapter type is not supported") - # get sequence adapter for cram adapter track - if adapter["type"] == "CramAdapter": - extra_config = self.get_assembly()["sequence"]["adapter"] - adapter["sequenceAdapter"] = extra_config - t_type = kwargs.get("track_type", guess_track_type(adapter["type"])) - supported_track_types = set( - { - "AlignmentsTrack", - "QuantitativeTrack", - "VariantTrack", - "FeatureTrack", - "ReferenceSequenceTrack", - } - ) - if t_type not in supported_track_types: - raise TypeError(f'Track type: "{t_type}" is not supported.') - default_track_id = f"{self.get_assembly_name()}-{name}" - track_id = kwargs.get("track_id", default_track_id) - track_config = { - "type": t_type, - "trackId": track_id, - "name": name, - "assemblyNames": assembly_names, - "adapter": adapter, - } - if track_id in self.tracks_ids_map.keys() and not overwrite: - raise TypeError( - ( - f'track with trackId: "{track_id}" already exists in' - f"config. Set overwrite to True to overwrite it." - ) - ) - if track_id in self.tracks_ids_map.keys() and overwrite: - current_tracks = [t for t in current_tracks if t["trackId"] != track_id] - - current_tracks.append(track_config) - self.config["tracks"] = current_tracks - self.tracks_ids_map[track_id] = track_config - - def delete_track(self, track_id): - """ - Deletes a track from the config. - - e.g: - delete_track("test_track_id") - - Will delete track with track_id test_track_id. - - :param str track_id: track_id to delete - - :raises TypeError: if track_id provided does not exist - """ - new_tracks = [] - current_tracks = self.get_tracks() - if track_id not in self.tracks_ids_map.keys(): - raise TypeError( - (f'track with trackId: "{track_id}" does not exist in' f"config.") - ) - else: - new_tracks = [t for t in current_tracks if t["trackId"] != track_id] - self.config["tracks"] = new_tracks - # clear from default session - default_sess = self.get_default_session() - tracks_sess = default_sess["view"]["tracks"] - new_tracks_sess = [t for t in tracks_sess if t["configuration"] != track_id] - if self.view == "CGV": - self.config["defaultSession"] = { - "name": "my session", - "view": { - "id": "circularView", - "type": "CircularView", - "tracks": new_tracks_sess, - }, - } - else: - self.config["defaultSession"] = { - "name": "my session", - "view": { - "id": "LinearGenomeView", - "type": "LinearGenomeView", - "tracks": new_tracks_sess, - }, - } - - # ======= location =========== - def set_location(self, location): - """ - Sets initial location for when the browser first loads. - - e.g: - set_location("chr1:1..90") - - :param str location: location, syntax 'refName:start-end' - :raises TypeError: if view is CGV, location not supported in CGV - """ - if self.view == "CGV": - raise TypeError("Location is not available to set on a CGV") - else: - self.config["location"] = location - - # ======= default session ======== - def set_default_session(self, tracks_ids, display_assembly=True): - """ - Sets the default session given a list of track ids - - e.g: - set_default_session(['track_id', 'track_id2']) - - :param tracks_ids: list[str] list of track ids to display - :param boolean display_assembly: display the assembly reference - sequence track. Defaults to True - :raises Exception: if assembly has not been configured - """ - err = "Please set the assembly before setting the default session." - if not self.get_assembly(): - raise Exception(err) - reference_track = {} - tracks_configs = [] - if display_assembly: - reference_track = self.get_reference_track() - tracks_configs.append(reference_track) - tracks_to_display = [t for t in self.get_tracks() if t["trackId"] in tracks_ids] - # guess the display type - for t in tracks_to_display: - tracks_configs.append(self.get_track_display(t)) - if self.view == "CGV": - self.config["defaultSession"] = { - "name": "my session", - "view": { - "id": "circularView", - "type": "CircularView", - "tracks": tracks_configs, - }, - } - else: - self.config["defaultSession"] = { - "name": "my session", - "view": { - "id": "LinearGenomeView", - "type": "LinearGenomeView", - "tracks": tracks_configs, - }, - } - - def get_default_session(self): - # Returns the defaultSession subconfiguration - return self.config["defaultSession"] - - # ====== Advanced Customization =============== - def get_text_search_adapters(self): - # Returns the aggregateTextSearchAdapters in the config - return self.config["aggregateTextSearchAdapters"] - - def add_text_search_adapter(self, ix, ixx, meta, adapter_id=None): - """ - Adds an aggregate trix text search adapter. - Currently not available for Circular Genome View - - e.g: - add_text_search_adapter("url/file.ix", url/file.ixx", - "url/meta.json") - - :param str ix: url/path to ix file - :param str ixx: url/path to ixx file - :param str meta: url/path to meta.json file - :param str adapter_id: optional adapter_id - :raises Exception: if assembly has not been configured - :raises TypeError: if adapter with same adapter id - is already configured - :raises TypeError: Paths are only supported in jupyter. - :raises TypeError: if view is CGV - """ - err = "Please set the assembly before adding a text search adapter." - if not self.get_assembly(): - raise Exception(err) - local = is_url(ix) and is_url(ixx) and is_url(meta) - if local and not self.jupyter: - TypeError( - f'Paths for "{ix},{ixx},and {meta}"' - " are used in an unsupported environment. Paths are " - "supported in Jupyter notebooks and Jupyter lab.Please" - " use a url for your assembly data. You can check out" - " our local file support docs for more information" - ) - - if self.view == "CGV": - raise TypeError("Text Searching not currently available in CGV") - assembly_name = self.get_assembly_name() - default_id = f"{assembly_name}-{guess_file_name(ix)}-index" - text_id = default_id if adapter_id is None else adapter_id - text_search_adapter = { - "type": "TrixTextSearchAdapter", - "textSearchAdapterId": text_id, - "ixFilePath": { - "uri": make_url_colab_jupyter( - ix, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port - ), - "locationType": "UriLocation", - }, - "ixxFilePath": { - "uri": make_url_colab_jupyter( - ixx, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port - ), - "locationType": "UriLocation", - }, - "metaFilePath": { - "uri": make_url_colab_jupyter( - meta, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port - ), - "locationType": "UriLocation", - }, - "assemblyNames": [assembly_name], + else: + self.config["defaultSession"] = { + "name": "my session", + "view": { + "id": "LinearGenomeView", + "type": "LinearGenomeView", + "tracks": new_tracks_sess, + }, } - adapters = self.get_text_search_adapters() - exists = [a for a in adapters if a["textSearchAdapterId"] == text_id] - if len(exists) > 0: - raise TypeError( - "Adapter already exists for given adapterId: " - f"{text_id}.Provide a different adapter_id" - ) - adapters.append(text_search_adapter) - self.config["aggregateTextSearchAdapters"] = adapters - - def get_theme(self): - # Returns the theme subconfiguration. - subconfiguration = self.config["configuration"] - return subconfiguration["theme"] - - def set_theme(self, primary, secondary=None, tertiary=None, quaternary=None): - """ - Sets the theme in the configuration. Accepts up to 4 - hexadecimal colors. - - e.g: - set_theme("#311b92", "#0097a7", "#f57c00", "#d50000") - - :param str primary: primary color of custom palette - :param str secondary: (optional) secondary color - :param str tertiary: (optional) tertiary color - :param str quaternary: (optional) quaternary color - """ - palette = {"primary": {"main": primary}} - if secondary: - palette["secondary"] = {"main": secondary} - if tertiary: - palette["tertiary"] = {"main": tertiary} - if quaternary: - palette["quaternary"] = {"main": quaternary} - self.config["configuration"] = {"theme": {"palette": palette}} + + # ======= location =========== + def set_location(self, location): + """ + Sets initial location for when the browser first loads. + + e.g: + set_location("chr1:1..90") + + :param str location: location, syntax 'refName:start-end' + :raises TypeError: if view is CGV, location not supported in CGV + """ + if self.view == "CGV": + raise TypeError("Location is not available to set on a CGV") + else: + self.config["location"] = location + + # ======= default session ======== + def set_default_session(self, tracks_ids, display_assembly=True): + """ + Sets the default session given a list of track ids + + e.g: + set_default_session(['track_id', 'track_id2']) + + :param tracks_ids: list[str] list of track ids to display + :param boolean display_assembly: display the assembly reference + sequence track. Defaults to True + :raises Exception: if assembly has not been configured + """ + err = "Please set the assembly before setting the default session." + if not self.get_assembly(): + raise Exception(err) + reference_track = {} + tracks_configs = [] + if display_assembly: + reference_track = self.get_reference_track() + tracks_configs.append(reference_track) + tracks_to_display = [t for t in self.get_tracks() if t["trackId"] in tracks_ids] + # guess the display type + for t in tracks_to_display: + tracks_configs.append(self.get_track_display(t)) + if self.view == "CGV": + self.config["defaultSession"] = { + "name": "my session", + "view": { + "id": "circularView", + "type": "CircularView", + "tracks": tracks_configs, + }, + } + else: + self.config["defaultSession"] = { + "name": "my session", + "view": { + "id": "LinearGenomeView", + "type": "LinearGenomeView", + "tracks": tracks_configs, + }, + } + + def get_default_session(self): + # Returns the defaultSession subconfiguration + return self.config["defaultSession"] + + # ====== Advanced Customization =============== + def get_text_search_adapters(self): + # Returns the aggregateTextSearchAdapters in the config + return self.config["aggregateTextSearchAdapters"] + + def add_text_search_adapter(self, ix, ixx, meta, adapter_id=None): + """ + Adds an aggregate trix text search adapter. + Currently not available for Circular Genome View + + e.g: + add_text_search_adapter("url/file.ix", url/file.ixx", + "url/meta.json") + + :param str ix: url/path to ix file + :param str ixx: url/path to ixx file + :param str meta: url/path to meta.json file + :param str adapter_id: optional adapter_id + :raises Exception: if assembly has not been configured + :raises TypeError: if adapter with same adapter id + is already configured + :raises TypeError: Paths are only supported in jupyter. + :raises TypeError: if view is CGV + """ + err = "Please set the assembly before adding a text search adapter." + if not self.get_assembly(): + raise Exception(err) + local = is_url(ix) and is_url(ixx) and is_url(meta) + if local and not self.jupyter: + TypeError( + f'Paths for "{ix},{ixx},and {meta}"' + " are used in an unsupported environment. Paths are " + "supported in Jupyter notebooks and Jupyter lab.Please" + " use a url for your assembly data. You can check out" + " our local file support docs for more information" + ) + + if self.view == "CGV": + raise TypeError("Text Searching not currently available in CGV") + assembly_name = self.get_assembly_name() + default_id = f"{assembly_name}-{guess_file_name(ix)}-index" + text_id = default_id if adapter_id is None else adapter_id + text_search_adapter = { + "type": "TrixTextSearchAdapter", + "textSearchAdapterId": text_id, + "ixFilePath": { + "uri": make_url_colab_jupyter( + ix, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port + ), + "locationType": "UriLocation", + }, + "ixxFilePath": { + "uri": make_url_colab_jupyter( + ixx, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port + ), + "locationType": "UriLocation", + }, + "metaFilePath": { + "uri": make_url_colab_jupyter( + meta, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port + ), + "locationType": "UriLocation", + }, + "assemblyNames": [assembly_name], + } + adapters = self.get_text_search_adapters() + exists = [a for a in adapters if a["textSearchAdapterId"] == text_id] + if len(exists) > 0: + raise TypeError( + "Adapter already exists for given adapterId: " + f"{text_id}.Provide a different adapter_id" + ) + adapters.append(text_search_adapter) + self.config["aggregateTextSearchAdapters"] = adapters + + def get_theme(self): + # Returns the theme subconfiguration. + subconfiguration = self.config["configuration"] + return subconfiguration["theme"] + + def set_theme(self, primary, secondary=None, tertiary=None, quaternary=None): + """ + Sets the theme in the configuration. Accepts up to 4 + hexadecimal colors. + + e.g: + set_theme("#311b92", "#0097a7", "#f57c00", "#d50000") + + :param str primary: primary color of custom palette + :param str secondary: (optional) secondary color + :param str tertiary: (optional) tertiary color + :param str quaternary: (optional) quaternary color + """ + palette = {"primary": {"main": primary}} + if secondary: + palette["secondary"] = {"main": secondary} + if tertiary: + palette["tertiary"] = {"main": tertiary} + if quaternary: + palette["quaternary"] = {"main": quaternary} + self.config["configuration"] = {"theme": {"palette": palette}} diff --git a/jbrowse_jupyter/tracks.py b/jbrowse_jupyter/tracks.py index 74f201d..7d263fa 100644 --- a/jbrowse_jupyter/tracks.py +++ b/jbrowse_jupyter/tracks.py @@ -4,525 +4,525 @@ def make_location(location, protocol, **kwargs): - """ - Creates location object given a location and a protocol. - :param str location: file path/url - :param str protocol: protocol, for now only accepting `uri` - :return: the location subconfiguration - :rtype: obj - :raises ValueError: if a protocol other than `uri` is used. - - """ - in_colab = kwargs.get("colab", False) - notebook_host = kwargs.get("nb_host", 8888) - notebook_port = kwargs.get("nb_port", "localhost") - if protocol == "uri": - return {"uri": location, "locationType": "UriLocation"} - elif protocol == "localPath": - return { - "uri": make_url_colab_jupyter( - location, colab=in_colab, nb_port=notebook_port, nb_host=notebook_host - ), - "locationType": "UriLocation", - } - else: - raise TypeError(f"invalid protocol {protocol}") + """ + Creates location object given a location and a protocol. + :param str location: file path/url + :param str protocol: protocol, for now only accepting `uri` + :return: the location subconfiguration + :rtype: obj + :raises ValueError: if a protocol other than `uri` is used. + + """ + in_colab = kwargs.get("colab", False) + notebook_host = kwargs.get("nb_host", 8888) + notebook_port = kwargs.get("nb_port", "localhost") + if protocol == "uri": + return {"uri": location, "locationType": "UriLocation"} + elif protocol == "localPath": + return { + "uri": make_url_colab_jupyter( + location, colab=in_colab, nb_port=notebook_port, nb_host=notebook_host + ), + "locationType": "UriLocation", + } + else: + raise TypeError(f"invalid protocol {protocol}") def make_url_colab_jupyter(location, **kwargs): - """Generates url from path based on env colab or jupyter""" - in_colab = kwargs.get("colab", False) - notebook_host = kwargs.get("nb_host", 8888) - notebook_port = kwargs.get("nb_port", "localhost") - if in_colab: - return location - return f"http://{notebook_host}:{notebook_port}/files" + location + """Generates url from path based on env colab or jupyter""" + in_colab = kwargs.get("colab", False) + notebook_host = kwargs.get("nb_host", 8888) + notebook_port = kwargs.get("nb_port", "localhost") + if in_colab: + return location + return f"http://{notebook_host}:{notebook_port}/files" + location def supported_track_type(track_type): - """Checks whether or not the given track type is supported.""" - return track_type in { - "AlignmentsTrack", - "QuantitativeTrack", - "VariantTrack", - "FeatureTrack", - } + """Checks whether or not the given track type is supported.""" + return track_type in { + "AlignmentsTrack", + "QuantitativeTrack", + "VariantTrack", + "FeatureTrack", + } def guess_display_type(track_type, view="LGV"): - """ - Returns the possible display type to use for a given track type. - - :param str track_type: the type of the track - :return: the type of the display to use for the given track type - :rtype: str - """ + """ + Returns the possible display type to use for a given track type. + + :param str track_type: the type of the track + :return: the type of the display to use for the given track type + :rtype: str + """ + displays = { + "AlignmentsTrack": "LinearAlignmentsDisplay", + "VariantTrack": "LinearVariantDisplay", + "ReferenceSequenceTrack": "LinearReferenceSequenceDisplay", + "QuantitativeTrack": "LinearWiggleDisplay", + "FeatureTrack": "LinearBasicDisplay", + } + if view == "CGV": displays = { - "AlignmentsTrack": "LinearAlignmentsDisplay", - "VariantTrack": "LinearVariantDisplay", - "ReferenceSequenceTrack": "LinearReferenceSequenceDisplay", - "QuantitativeTrack": "LinearWiggleDisplay", - "FeatureTrack": "LinearBasicDisplay", + "VariantTrack": "ChordVariantDisplay", + "ReferenceSequenceTrack": "LinearReferenceSequenceDisplay", } + if track_type in displays: + return displays[track_type] + else: if view == "CGV": - displays = { - "VariantTrack": "ChordVariantDisplay", - "ReferenceSequenceTrack": "LinearReferenceSequenceDisplay", - } - if track_type in displays: - return displays[track_type] - else: - if view == "CGV": - return "ChordVariantDisplay" - return "LinearBasicDisplay" + return "ChordVariantDisplay" + return "LinearBasicDisplay" def guess_track_type(adapter_type): - """ - Returns the possible track type to use given an adapter type. - - :param str adapter_type: the type of the adapter - :return: the type of the track to use for the given an adapter type - :rtype: str - """ - known = { - "BamAdapter": "AlignmentsTrack", - "CramAdapter": "AlignmentsTrack", - "BgzipFastaAdapter": "ReferenceSequenceTrack", - "BigWigAdapter": "QuantitativeTrack", - "IndexedFastaAdapter": "ReferenceSequenceTrack", - "TwoBitAdapter": "ReferenceSequenceTrack", - "VcfTabixAdapter": "VariantTrack", - "HicAdapter": "HicTrack", - "PAFAdapter": "SyntenyTrack", - } - if adapter_type in known: - return known[adapter_type] - else: - return "FeatureTrack" + """ + Returns the possible track type to use given an adapter type. + + :param str adapter_type: the type of the adapter + :return: the type of the track to use for the given an adapter type + :rtype: str + """ + known = { + "BamAdapter": "AlignmentsTrack", + "CramAdapter": "AlignmentsTrack", + "BgzipFastaAdapter": "ReferenceSequenceTrack", + "BigWigAdapter": "QuantitativeTrack", + "IndexedFastaAdapter": "ReferenceSequenceTrack", + "TwoBitAdapter": "ReferenceSequenceTrack", + "VcfTabixAdapter": "VariantTrack", + "HicAdapter": "HicTrack", + "PAFAdapter": "SyntenyTrack", + } + if adapter_type in known: + return known[adapter_type] + else: + return "FeatureTrack" def guess_adapter_type(file_location, protocol, index="defaultIndex", **kwargs): - """ - Creates location object given a location and a protocol. - - :param str file_location: file path/url - :param str protocol: protocol, for now only accepting `uri` - :param str index: (optional) path to index - :return: the adapter track subconfiguration - :rtype: obj - """ - notebook_host = kwargs.get("nb_host", 8888) - notebook_port = kwargs.get("nb_port", "localhost") - in_colab = kwargs.get("colab", False) - bam = re.compile(r"\.bam$", re.IGNORECASE) - bed = re.compile(r"\.bed$", re.IGNORECASE) - bed_tabix = re.compile(r"\.bed\.b?gz$", re.IGNORECASE) - big_bed = re.compile(r"\.(bb|bigbed)$", re.IGNORECASE) - big_wig = re.compile(r"\.(bw|bigwig)$", re.IGNORECASE) - cram = re.compile(r"\.cram$", re.IGNORECASE) - fasta_idx = re.compile(r"\.(fa|fasta|fna|mfa)$", re.IGNORECASE) - fasta_gz = re.compile(r"\.(fa|fasta|fna|mfa)\.b?gz$", re.IGNORECASE) - gff3 = re.compile(r"\.gff3$", re.IGNORECASE) - gff3_tabix = re.compile(r"\.gff3?\.b?gz$", re.IGNORECASE) - gtf = re.compile(r"\.gtf$", re.IGNORECASE) - hic = re.compile(r"\.hic", re.IGNORECASE) - nclist = re.compile(r"\/trackData.jsonz?$", re.IGNORECASE) - paf = re.compile(r"\.paf", re.IGNORECASE) - sizes = re.compile(r"\.sizes$", re.IGNORECASE) - sparql = re.compile(r"\/sparql$", re.IGNORECASE) - twobit = re.compile(r"\.2bit$", re.IGNORECASE) - vcf = re.compile(r"\.vcf$", re.IGNORECASE) - vcf_gzp = re.compile(r"\.vcf\.b?gz$", re.IGNORECASE) - vcf_idx = re.compile(r"\.vcf\.idx$", re.IGNORECASE) - - # bam - if bool(re.search(bam, file_location)): - return { - "type": "BamAdapter", - "bamLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "index": { - "location": make_location( - f"{file_location}.bai", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "indexType": "CSI" - if (index != "defaultIndex" and index.upper().endswith("CSI")) - else "BAI", - }, - } - # cram - if bool(re.search(cram, file_location)): - return { - "type": "CramAdapter", - "cramLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "craiLocation": make_location( - f"{file_location}.crai", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # gff3 - if bool(re.search(gff3, file_location)): - return { - "type": "UNSUPPORTED", - } - - # gtf - if bool(re.search(gtf, file_location)): - return { - "type": "UNSUPPORTED", - } - - # gff3 tabix - if bool(re.search(gff3_tabix, file_location)): - return { - "type": "Gff3TabixAdapter", - "gffGzLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "index": { - "location": make_location( - f"{file_location}.tbi", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "indexType": "TBI", - }, - } - - # vcf - if bool(re.search(vcf, file_location)): - return { - "type": "VcfAdapter", - "vcfLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # vcf idx - if bool(re.search(vcf_idx, file_location)): - return { - "type": "UNSUPPORTED", - } - - # vcf gzipped - if bool(re.search(vcf_gzp, file_location)): - return { - "type": "VcfTabixAdapter", - "vcfGzLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "index": { - "location": make_location( - f"{file_location}.tbi", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "indexType": "CSI" - if (index != "defaultIndex" and index.upper().endswith("CSI")) - else "TBI", - }, - } - - # bigwig - if bool(re.search(big_wig, file_location)): - return { - "type": "BigWigAdapter", - "bigWigLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - # bed - if bool(re.search(bed, file_location)): - return { - "type": "UNSUPPORTED", - } - - # bed gz - if bool(re.search(bed_tabix, file_location)): - return { - "type": "BedTabixAdapter", - "bedGzLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "index": { - "location": make_location( - f"{file_location}.tbi", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "indexType": "CSI" - if (index != "defaultIndex" and index.upper().endswith("CSI")) - else "TBI", - }, - } - - # bigbed - if bool(re.search(big_bed, file_location)): - return { - "type": "BigBedAdapter", - "bigBedLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # fasta indexed - if bool(re.search(fasta_idx, file_location)): - fai = index if index != "defaultIndex" else f"{file_location}.fai" - return { - "type": "IndexedFastaAdapter", - "fastaLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "faiLocation": make_location( - fai, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # Bgzipped fasta - if bool(re.search(fasta_gz, file_location)): - return { - "type": "BgzipFastaAdapter", - "fastaLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "faiLocation": make_location( - f"{file_location}.fai", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "gziLocation": make_location( - f"{file_location}.gzi", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # twobit - if bool(re.search(twobit, file_location)): - return { - "type": "TwoBitAdapter", - "twoBitLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - # sizes - if bool(re.search(sizes, file_location)): - return { - "type": "UNSUPPORTED", - } - # nclist - if bool(re.search(nclist, file_location)): - return { - "type": "NCListAdapter", - "rootUrlTemplate": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # sparql - if bool(re.search(sparql, file_location)): - return { - "type": "SPARQLAdapter", - "endpoint": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - # hic - if bool(re.search(hic, file_location)): - return { - "type": "HicAdapter", - "hicLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # paf - if bool(re.search(paf, file_location)): - return { - "type": "PAFAdapter", - "pafLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } + """ + Creates location object given a location and a protocol. + + :param str file_location: file path/url + :param str protocol: protocol, for now only accepting `uri` + :param str index: (optional) path to index + :return: the adapter track subconfiguration + :rtype: obj + """ + notebook_host = kwargs.get("nb_host", 8888) + notebook_port = kwargs.get("nb_port", "localhost") + in_colab = kwargs.get("colab", False) + bam = re.compile(r"\.bam$", re.IGNORECASE) + bed = re.compile(r"\.bed$", re.IGNORECASE) + bed_tabix = re.compile(r"\.bed\.b?gz$", re.IGNORECASE) + big_bed = re.compile(r"\.(bb|bigbed)$", re.IGNORECASE) + big_wig = re.compile(r"\.(bw|bigwig)$", re.IGNORECASE) + cram = re.compile(r"\.cram$", re.IGNORECASE) + fasta_idx = re.compile(r"\.(fa|fasta|fna|mfa)$", re.IGNORECASE) + fasta_gz = re.compile(r"\.(fa|fasta|fna|mfa)\.b?gz$", re.IGNORECASE) + gff3 = re.compile(r"\.gff3$", re.IGNORECASE) + gff3_tabix = re.compile(r"\.gff3?\.b?gz$", re.IGNORECASE) + gtf = re.compile(r"\.gtf$", re.IGNORECASE) + hic = re.compile(r"\.hic", re.IGNORECASE) + nclist = re.compile(r"\/trackData.jsonz?$", re.IGNORECASE) + paf = re.compile(r"\.paf", re.IGNORECASE) + sizes = re.compile(r"\.sizes$", re.IGNORECASE) + sparql = re.compile(r"\/sparql$", re.IGNORECASE) + twobit = re.compile(r"\.2bit$", re.IGNORECASE) + vcf = re.compile(r"\.vcf$", re.IGNORECASE) + vcf_gzp = re.compile(r"\.vcf\.b?gz$", re.IGNORECASE) + vcf_idx = re.compile(r"\.vcf\.idx$", re.IGNORECASE) + + # bam + if bool(re.search(bam, file_location)): + return { + "type": "BamAdapter", + "bamLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "index": { + "location": make_location( + f"{file_location}.bai", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "indexType": "CSI" + if (index != "defaultIndex" and index.upper().endswith("CSI")) + else "BAI", + }, + } + # cram + if bool(re.search(cram, file_location)): + return { + "type": "CramAdapter", + "cramLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "craiLocation": make_location( + f"{file_location}.crai", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # gff3 + if bool(re.search(gff3, file_location)): + return { + "type": "UNSUPPORTED", + } + + # gtf + if bool(re.search(gtf, file_location)): + return { + "type": "UNSUPPORTED", + } + # gff3 tabix + if bool(re.search(gff3_tabix, file_location)): + return { + "type": "Gff3TabixAdapter", + "gffGzLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "index": { + "location": make_location( + f"{file_location}.tbi", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "indexType": "TBI", + }, + } + + # vcf + if bool(re.search(vcf, file_location)): + return { + "type": "VcfAdapter", + "vcfLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # vcf idx + if bool(re.search(vcf_idx, file_location)): + return { + "type": "UNSUPPORTED", + } + + # vcf gzipped + if bool(re.search(vcf_gzp, file_location)): + return { + "type": "VcfTabixAdapter", + "vcfGzLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "index": { + "location": make_location( + f"{file_location}.tbi", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "indexType": "CSI" + if (index != "defaultIndex" and index.upper().endswith("CSI")) + else "TBI", + }, + } + + # bigwig + if bool(re.search(big_wig, file_location)): + return { + "type": "BigWigAdapter", + "bigWigLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + # bed + if bool(re.search(bed, file_location)): return { - "type": "UNKNOWN", + "type": "UNSUPPORTED", } + # bed gz + if bool(re.search(bed_tabix, file_location)): + return { + "type": "BedTabixAdapter", + "bedGzLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "index": { + "location": make_location( + f"{file_location}.tbi", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "indexType": "CSI" + if (index != "defaultIndex" and index.upper().endswith("CSI")) + else "TBI", + }, + } + + # bigbed + if bool(re.search(big_bed, file_location)): + return { + "type": "BigBedAdapter", + "bigBedLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # fasta indexed + if bool(re.search(fasta_idx, file_location)): + fai = index if index != "defaultIndex" else f"{file_location}.fai" + return { + "type": "IndexedFastaAdapter", + "fastaLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "faiLocation": make_location( + fai, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # Bgzipped fasta + if bool(re.search(fasta_gz, file_location)): + return { + "type": "BgzipFastaAdapter", + "fastaLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "faiLocation": make_location( + f"{file_location}.fai", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "gziLocation": make_location( + f"{file_location}.gzi", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # twobit + if bool(re.search(twobit, file_location)): + return { + "type": "TwoBitAdapter", + "twoBitLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + # sizes + if bool(re.search(sizes, file_location)): + return { + "type": "UNSUPPORTED", + } + # nclist + if bool(re.search(nclist, file_location)): + return { + "type": "NCListAdapter", + "rootUrlTemplate": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # sparql + if bool(re.search(sparql, file_location)): + return { + "type": "SPARQLAdapter", + "endpoint": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + # hic + if bool(re.search(hic, file_location)): + return { + "type": "HicAdapter", + "hicLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # paf + if bool(re.search(paf, file_location)): + return { + "type": "PAFAdapter", + "pafLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + return { + "type": "UNKNOWN", + } + # ================== DataFrame Track ================= def check_track_data(df): - """ - Checks that data frame is a valid data frame with. - - :param df: the data frame with track data. - :return: whether or not df is a valid data frame for the track. - :rtype: boolean - :raises TypeError: - if df is not a valid data frame - if df data frame is empty - if df does not have the required columns (refName, start, end, name) - """ - if not isinstance(df, pd.DataFrame): - raise TypeError("Track data must be a DataFrame") - - if df.empty: - raise TypeError("DataFrame must not be empty.") - - if not check_columns(df): - raise TypeError("DataFrame must contain all required columns.") - - ref_names = df.dtypes["refName"] - names = df.dtypes["name"] - start = df.dtypes["start"] - end = df.dtypes["end"] - correct_string = ref_names == object and names == object - correct_numbers = start == int and end == int - if not (correct_numbers and correct_string): - col_err = "One or more columns do not have the correct data type." - raise TypeError(col_err) + """ + Checks that data frame is a valid data frame with. + + :param df: the data frame with track data. + :return: whether or not df is a valid data frame for the track. + :rtype: boolean + :raises TypeError: + if df is not a valid data frame + if df data frame is empty + if df does not have the required columns (refName, start, end, name) + """ + if not isinstance(df, pd.DataFrame): + raise TypeError("Track data must be a DataFrame") + + if df.empty: + raise TypeError("DataFrame must not be empty.") + + if not check_columns(df): + raise TypeError("DataFrame must contain all required columns.") + + ref_names = df.dtypes["refName"] + names = df.dtypes["name"] + start = df.dtypes["start"] + end = df.dtypes["end"] + correct_string = ref_names == object and names == object + correct_numbers = start == int and end == int + if not (correct_numbers and correct_string): + col_err = "One or more columns do not have the correct data type." + raise TypeError(col_err) def check_columns(df): - """ - Checks whether dataframe contains the required columns. + """ + Checks whether dataframe contains the required columns. - :param df: the data frame with track data. - :return: whether or not df contains all the required columns. - required columns: refName, start, end, name, score (is optional) - :rtype: boolean - """ - required = ["refName", "start", "end", "name"] - return all(col in df for col in required) + :param df: the data frame with track data. + :return: whether or not df contains all the required columns. + required columns: refName, start, end, name, score (is optional) + :rtype: boolean + """ + required = ["refName", "start", "end", "name"] + return all(col in df for col in required) def get_from_config_adapter(df): - """ - Creates a FromConfigAdapter adapter subconfiguration to - use in the data frame track configuration. + """ + Creates a FromConfigAdapter adapter subconfiguration to + use in the data frame track configuration. - :param df: the data frame with track data. - :return: the adapter subconfiguration - :rtype: obj - """ - features = get_track_data(df) - return {"type": "FromConfigAdapter", "features": features} + :param df: the data frame with track data. + :return: the adapter subconfiguration + :rtype: obj + """ + features = get_track_data(df) + return {"type": "FromConfigAdapter", "features": features} def format_feature(feature): - """Adds a uniqueId to the given featyre.""" - unique_id = str(uuid.uuid4().hex) - feature["uniqueId"] = unique_id + """Adds a uniqueId to the given featyre.""" + unique_id = str(uuid.uuid4().hex) + feature["uniqueId"] = unique_id def get_track_data(df): - """ - Retrieves the features from the data frame. - - :param df: the data frame with track data. - :return: features - :rtype: list[obj] - """ - required = ["refName", "start", "end", "name", "additional", "type"] - df["type"] = "" - df["additional"] = "" - if "score" in df: - required.append("score") - if df.dtypes["score"] != int: - raise TypeError("Score column must be an integer") - filtered = df[required] - rows = filtered.to_dict("records") - features = [] - for r in rows: - newFeature = r - newFeature["uniqueId"] = str(uuid.uuid4().hex) - features.append(newFeature) - return features + """ + Retrieves the features from the data frame. + + :param df: the data frame with track data. + :return: features + :rtype: list[obj] + """ + required = ["refName", "start", "end", "name", "additional", "type"] + df["type"] = "" + df["additional"] = "" + if "score" in df: + required.append("score") + if df.dtypes["score"] != int: + raise TypeError("Score column must be an integer") + filtered = df[required] + rows = filtered.to_dict("records") + features = [] + for r in rows: + newFeature = r + newFeature["uniqueId"] = str(uuid.uuid4().hex) + features.append(newFeature) + return features diff --git a/jbrowse_jupyter/util.py b/jbrowse_jupyter/util.py index 572c330..372273d 100644 --- a/jbrowse_jupyter/util.py +++ b/jbrowse_jupyter/util.py @@ -9,264 +9,256 @@ hg38_asm = { - "name": "GRCh38", - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "GRCh38-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" - }, - "faiLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" - }, - "gziLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" - }, - }, - }, - "aliases": ["hg38"], - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" - }, - } + "name": "GRCh38", + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "GRCh38-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" + }, + "faiLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" + }, + "gziLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" + }, }, + }, + "aliases": ["hg38"], + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" + }, + } + }, } hg19_asm = { - "name": "hg19", - "aliases": ["GRCh37"], - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "hg19-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz" - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai" - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi" - }, - }, - }, - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" - }, - } + "name": "hg19", + "aliases": ["GRCh37"], + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "hg19-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz"}, + "faiLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai"}, + "gziLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi"}, }, + }, + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" + }, + } + }, } def is_url(filePath): - """ - Checks whether or not the file path - is a valid url. - :param str filePath: file path/url - :return: returns true if path matches pattern starting with - http:// or https:// - :rtype: boolean - """ - regex = re.compile(r"^https?:\/\/", re.IGNORECASE) - return re.match(regex, filePath) is not None + """ + Checks whether or not the file path + is a valid url. + :param str filePath: file path/url + :return: returns true if path matches pattern starting with + http:// or https:// + :rtype: boolean + """ + regex = re.compile(r"^https?:\/\/", re.IGNORECASE) + return re.match(regex, filePath) is not None def guess_file_name(data): - """ - Guess the file name given a path. + """ + Guess the file name given a path. - :param str data: file path - :return: the predicted file name - :rtype: str - """ - url = urlparse(data) - return os.path.basename(url.path) + :param str data: file path + :return: the predicted file name + :rtype: str + """ + url = urlparse(data) + return os.path.basename(url.path) def get_name(assembly_file): - """Returns the name of the assembly based on the assembly data file""" - name_end = 0 - name_start = 0 - for i in range(0, len(assembly_file)): - if assembly_file[len(assembly_file) - i - 1 : len(assembly_file) - i] == "/": - name_start = len(assembly_file) - i - break - for i in range(name_start, len(assembly_file)): - if assembly_file[i : i + 1] == ".": - name_end = i - break + """Returns the name of the assembly based on the assembly data file""" + name_end = 0 + name_start = 0 + for i in range(0, len(assembly_file)): + if assembly_file[len(assembly_file) - i - 1 : len(assembly_file) - i] == "/": + name_start = len(assembly_file) - i + break + for i in range(name_start, len(assembly_file)): + if assembly_file[i : i + 1] == ".": + name_end = i + break - return assembly_file[name_start:name_end] + return assembly_file[name_start:name_end] def get_name_regex(assembly_file): - """Returns the name of the assembly based on the assembly data file""" - return re.search(r"(\w+)\.(?:fa|fasta|fa\.gz)$", assembly_file).group(1) + """Returns the name of the assembly based on the assembly data file""" + return re.search(r"(\w+)\.(?:fa|fasta|fa\.gz)$", assembly_file).group(1) def get_default(name, view_type="LGV"): - """Returns the configuration object given a genome name.""" - if view_type == "CGV": - with importlib.resources.open_text( - "jbrowse_jupyter.data", f"{name}_cgv.json" - ) as file: - return json.load(file) - else: - with importlib.resources.open_text( - "jbrowse_jupyter.data", f"{name}.json" - ) as file: - return json.load(file) + """Returns the configuration object given a genome name.""" + if view_type == "CGV": + with importlib.resources.open_text( + "jbrowse_jupyter.data", f"{name}_cgv.json" + ) as file: + return json.load(file) + else: + with importlib.resources.open_text("jbrowse_jupyter.data", f"{name}.json") as file: + return json.load(file) def create_component(conf, **kwargs): - """ - Creates a Dash JBrowse LinearGenomeView component - given a configuration object and optionally an id. + """ + Creates a Dash JBrowse LinearGenomeView component + given a configuration object and optionally an id. - e.g: + e.g: - conf = hg38.get_config() - create_component(conf,id="hg38-test", dash_comp="CGV") - where hg38 is an instance of JBrowseConfig + conf = hg38.get_config() + create_component(conf,id="hg38-test", dash_comp="CGV") + where hg38 is an instance of JBrowseConfig - :param obj conf: configuration object from JBrowseConfig - instance - :param str id: id to use in Dash component - :param str dash_comp: (optional) dash component type to - create. Currently supporting LGV and CGV. - defaults to `LGV` when no dash_comp= is specified - :return: Dash JBrowse View given dash_comp type - :rtype: Dash JBrowse component - """ - supported = set({"LGV", "CGV"}) - comp_id = "jbrowse-component" - dash_comp = kwargs.get("dash_comp", "LGV") - the_view_type = conf["defaultSession"]["view"]["type"] - msg = "config was passed but attempting to create" - err = "Please specify the correct dash_comp." - if the_view_type == "LinearGenomeView" and dash_comp == "CGV": - raise TypeError(f"LGV {msg} a CGV.{err}") - if the_view_type == "CircularView" and dash_comp == "LGV": - raise TypeError(f"CGV {msg} a LGV.{err}") - if "id" in kwargs: - comp_id = kwargs["id"] - if dash_comp in supported: - if dash_comp == "LGV": - return jb.LinearGenomeView( - id=comp_id, - assembly=conf["assembly"], - tracks=conf["tracks"], - defaultSession=conf["defaultSession"], - location=conf["location"], - configuration=conf["configuration"], - aggregateTextSearchAdapters=conf["aggregateTextSearchAdapters"], - ) - # here is where we can add another view - if dash_comp == "CGV": - return jb.CircularGenomeView( - id=comp_id, - assembly=conf["assembly"], - tracks=conf["tracks"], - defaultSession=conf["defaultSession"], - configuration=conf["configuration"], - ) - else: - raise TypeError(f"The {dash_comp} component is not supported.") + :param obj conf: configuration object from JBrowseConfig + instance + :param str id: id to use in Dash component + :param str dash_comp: (optional) dash component type to + create. Currently supporting LGV and CGV. + defaults to `LGV` when no dash_comp= is specified + :return: Dash JBrowse View given dash_comp type + :rtype: Dash JBrowse component + """ + supported = set({"LGV", "CGV"}) + comp_id = "jbrowse-component" + dash_comp = kwargs.get("dash_comp", "LGV") + the_view_type = conf["defaultSession"]["view"]["type"] + msg = "config was passed but attempting to create" + err = "Please specify the correct dash_comp." + if the_view_type == "LinearGenomeView" and dash_comp == "CGV": + raise TypeError(f"LGV {msg} a CGV.{err}") + if the_view_type == "CircularView" and dash_comp == "LGV": + raise TypeError(f"CGV {msg} a LGV.{err}") + if "id" in kwargs: + comp_id = kwargs["id"] + if dash_comp in supported: + if dash_comp == "LGV": + return jb.LinearGenomeView( + id=comp_id, + assembly=conf["assembly"], + tracks=conf["tracks"], + defaultSession=conf["defaultSession"], + location=conf["location"], + configuration=conf["configuration"], + aggregateTextSearchAdapters=conf["aggregateTextSearchAdapters"], + ) + # here is where we can add another view + if dash_comp == "CGV": + return jb.CircularGenomeView( + id=comp_id, + assembly=conf["assembly"], + tracks=conf["tracks"], + defaultSession=conf["defaultSession"], + configuration=conf["configuration"], + ) + else: + raise TypeError(f"The {dash_comp} component is not supported.") def launch(conf, **kwargs): - """ - Launches a LinearGenomeView Dash JBrowse component in a - server. + """ + Launches a LinearGenomeView Dash JBrowse component in a + server. - e.g - launch(conf, dash_comp="CGV",height=400, port=8002) + e.g + launch(conf, dash_comp="CGV",height=400, port=8002) - :param obj conf: JBrowseConfiguration object to pass to - the Dash JBrowse component - :param str id: (optional) id to use for the Dash JBrowse - component defaults to `jbrowse-component` - :param str dash_comp: (optional) dash component type to - launch. Currently supporting LGV and CGV. - defaults to `LGV` when no dash_comp= is specified - :param int port: (optional) port to utilize when running - the Dash app - :param int height: (optional) the height to utilize for - the Dash app - """ - app = Dash(__name__) - # could add other JBrowse view types e.g Circular, Dotplot - supported = set({"LGV", "CGV"}) - dash_comp = kwargs.get("dash_comp", "LGV") + :param obj conf: JBrowseConfiguration object to pass to + the Dash JBrowse component + :param str id: (optional) id to use for the Dash JBrowse + component defaults to `jbrowse-component` + :param str dash_comp: (optional) dash component type to + launch. Currently supporting LGV and CGV. + defaults to `LGV` when no dash_comp= is specified + :param int port: (optional) port to utilize when running + the Dash app + :param int height: (optional) the height to utilize for + the Dash app + """ + app = Dash(__name__) + # could add other JBrowse view types e.g Circular, Dotplot + supported = set({"LGV", "CGV"}) + dash_comp = kwargs.get("dash_comp", "LGV") - # error for mismatching config and launch type - the_view_type = conf["defaultSession"]["view"]["type"] - msg = "config was passed but attempting to launch" - err = "Please specify the correct dash_comp." - if the_view_type == "LinearGenomeView" and dash_comp == "CGV": - raise TypeError(f"LGV {msg} a CGV.{err}") - if the_view_type == "CircularView" and dash_comp == "LGV": - raise TypeError(f"CGV {msg} a LGV.{err}") - comp_id = "jbrowse-component" - comp_port = 8050 - comp_host = "127.0.0.1" - comp_height = 300 - comp_mode = "inline" - if "id" in kwargs: - comp_id = kwargs["id"] - if "port" in kwargs: - comp_port = kwargs["port"] - if "host" in kwargs: - comp_host = kwargs["host"] - if "height" in kwargs: - comp_height = kwargs["height"] - if "mode" in kwargs: - comp_mode = kwargs["mode"] + # error for mismatching config and launch type + the_view_type = conf["defaultSession"]["view"]["type"] + msg = "config was passed but attempting to launch" + err = "Please specify the correct dash_comp." + if the_view_type == "LinearGenomeView" and dash_comp == "CGV": + raise TypeError(f"LGV {msg} a CGV.{err}") + if the_view_type == "CircularView" and dash_comp == "LGV": + raise TypeError(f"CGV {msg} a LGV.{err}") + comp_id = "jbrowse-component" + comp_port = 8050 + comp_host = "127.0.0.1" + comp_height = 300 + comp_mode = "inline" + if "id" in kwargs: + comp_id = kwargs["id"] + if "port" in kwargs: + comp_port = kwargs["port"] + if "host" in kwargs: + comp_host = kwargs["host"] + if "height" in kwargs: + comp_height = kwargs["height"] + if "mode" in kwargs: + comp_mode = kwargs["mode"] - if dash_comp in supported: - if dash_comp == "LGV": - # create jupyter dash app layout - adapters = conf["aggregateTextSearchAdapters"] - app.layout = html.Div( - [ - jb.LinearGenomeView( - id=comp_id, - assembly=conf["assembly"], - tracks=conf["tracks"], - defaultSession=conf["defaultSession"], - aggregateTextSearchAdapters=adapters, - location=conf["location"], - configuration=conf["configuration"], - ) - ] - ) - if dash_comp == "CGV": - # create jupyter dash app layout - app.layout = html.Div( - [ - jb.CircularGenomeView( - id=comp_id, - assembly=conf["assembly"], - tracks=conf["tracks"], - defaultSession=conf["defaultSession"], - configuration=conf["configuration"], - ) - ] - ) - else: - raise TypeError(f"The {dash_comp} component is not supported.") - app.run_server( - port=comp_port, - host=comp_host, - height=comp_height, - mode=comp_mode, - use_reloader=False, - ) + if dash_comp in supported: + if dash_comp == "LGV": + # create jupyter dash app layout + adapters = conf["aggregateTextSearchAdapters"] + app.layout = html.Div( + [ + jb.LinearGenomeView( + id=comp_id, + assembly=conf["assembly"], + tracks=conf["tracks"], + defaultSession=conf["defaultSession"], + aggregateTextSearchAdapters=adapters, + location=conf["location"], + configuration=conf["configuration"], + ) + ] + ) + if dash_comp == "CGV": + # create jupyter dash app layout + app.layout = html.Div( + [ + jb.CircularGenomeView( + id=comp_id, + assembly=conf["assembly"], + tracks=conf["tracks"], + defaultSession=conf["defaultSession"], + configuration=conf["configuration"], + ) + ] + ) + else: + raise TypeError(f"The {dash_comp} component is not supported.") + app.run_server( + port=comp_port, + host=comp_host, + height=comp_height, + mode=comp_mode, + use_reloader=False, + ) From 1a825dee99ca3407ff49807e3a3468d61ee911f2 Mon Sep 17 00:00:00 2001 From: Colin Date: Sun, 10 Dec 2023 12:56:53 -0500 Subject: [PATCH 3/9] Remove werkzeug dependency pin --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 53e00bb..2208a44 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ dash>=2.11.1 dash-jbrowse>=1.2.0 -Werkzeug==2.0.3 pandas>=1.1.5 IPython>=7.0.0 From 961b6f23a00c58517ddbbfb9ab8a677bf89a5686 Mon Sep 17 00:00:00 2001 From: Colin Date: Sun, 10 Dec 2023 15:13:11 -0500 Subject: [PATCH 4/9] Remove json file loading --- jbrowse_jupyter/data/hg19.json | 67 ------- jbrowse_jupyter/data/hg19_cgv.json | 79 -------- jbrowse_jupyter/data/hg38.json | 71 ------- jbrowse_jupyter/data/hg38_cgv.json | 45 ----- jbrowse_jupyter/util.py | 296 ++++++++++++++++++++++++----- 5 files changed, 251 insertions(+), 307 deletions(-) delete mode 100644 jbrowse_jupyter/data/hg19.json delete mode 100644 jbrowse_jupyter/data/hg19_cgv.json delete mode 100644 jbrowse_jupyter/data/hg38.json delete mode 100644 jbrowse_jupyter/data/hg38_cgv.json diff --git a/jbrowse_jupyter/data/hg19.json b/jbrowse_jupyter/data/hg19.json deleted file mode 100644 index e3bb66a..0000000 --- a/jbrowse_jupyter/data/hg19.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "configuration": {}, - "assembly": { - "name": "hg19", - "aliases": ["GRCh37"], - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "hg19-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz" - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai" - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi" - } - } - }, - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" - } - } - } - }, - "tracks": [ - { - "type": "FeatureTrack", - "trackId": "repeats_hg19", - "name": "Repeats", - "assemblyNames": ["hg19"], - "category": ["Annotation"], - "adapter": { - "type": "BigBedAdapter", - "bigBedLocation": { - "uri": "https://jbrowse.org/genomes/hg19/repeats.bb", - "locationType": "UriLocation" - } - } - } - ], - "defaultSession": { - "name": "test", - "view": { - "id": "aU9Nqje1U", - "type": "LinearGenomeView", - "tracks": [ - { - "type": "ReferenceSequenceTrack", - "configuration": "hg19-ReferenceSequenceTrack", - "displays": [ - { - "type": "LinearReferenceSequenceDisplay", - "configuration": "hg19-ReferenceSequenceTrack-LinearReferenceSequenceDisplay" - } - ] - } - ] - } - }, - "location": "1:68654694..68654738" -} diff --git a/jbrowse_jupyter/data/hg19_cgv.json b/jbrowse_jupyter/data/hg19_cgv.json deleted file mode 100644 index c9a3325..0000000 --- a/jbrowse_jupyter/data/hg19_cgv.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "configuration": {}, - "assembly": { - "name": "hg19", - "aliases": ["GRCh37"], - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "Pd8Wh30ei9R", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz", - "locationType": "UriLocation" - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai", - "locationType": "UriLocation" - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi", - "locationType": "UriLocation" - } - } - }, - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt", - "locationType": "UriLocation" - } - } - } - }, - "tracks": [ - { - "type": "VariantTrack", - "trackId": "pacbio_sv_vcf", - "name": "HG002 Pacbio SV (VCF)", - "assemblyNames": ["hg19"], - "category": ["GIAB"], - "adapter": { - "type": "VcfTabixAdapter", - "vcfGzLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/pacbio/hs37d5.HG002-SequelII-CCS.bnd-only.sv.vcf.gz", - "locationType": "UriLocation" - }, - "index": { - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/pacbio/hs37d5.HG002-SequelII-CCS.bnd-only.sv.vcf.gz.tbi", - "locationType": "UriLocation" - } - } - } - } - ], - "defaultSession": { - "name": "My session", - "view": { - "id": "circularView", - "type": "CircularView", - "bpPerPx": 5000000, - "tracks": [ - { - "id": "uPdLKHik1", - "type": "VariantTrack", - "configuration": "pacbio_sv_vcf", - "displays": [ - { - "id": "v9QVAR3oaB", - "type": "ChordVariantDisplay", - "configuration": "pacbio_sv_vcf-ChordVariantDisplay" - } - ] - } - ] - } - } -} diff --git a/jbrowse_jupyter/data/hg38.json b/jbrowse_jupyter/data/hg38.json deleted file mode 100644 index efa741b..0000000 --- a/jbrowse_jupyter/data/hg38.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "configuration": {}, - "assembly": { - "name": "GRCh38", - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "GRCh38-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" - }, - "faiLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" - }, - "gziLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" - } - } - }, - "aliases": ["hg38"], - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" - } - } - } - }, - "tracks": [ - { - "type": "FeatureTrack", - "trackId": "ncbi_refseq_109_hg38", - "name": "NCBI RefSeq (GFF3Tabix)", - "assemblyNames": ["GRCh38"], - "category": ["Annotation"], - "adapter": { - "type": "Gff3TabixAdapter", - "gffGzLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz" - }, - "index": { - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz.tbi" - } - } - } - } - ], - "location": "10:29,838,737..29,838,819", - "defaultSession": { - "name": "My session", - "view": { - "id": "linearGenomeView", - "type": "LinearGenomeView", - "tracks": [ - { - "type": "ReferenceSequenceTrack", - "configuration": "GRCh38-ReferenceSequenceTrack", - "displays": [ - { - "type": "LinearReferenceSequenceDisplay", - "configuration": "GRCh38-ReferenceSequenceTrack-LinearReferenceSequenceDisplay" - } - ] - } - ] - } - } -} diff --git a/jbrowse_jupyter/data/hg38_cgv.json b/jbrowse_jupyter/data/hg38_cgv.json deleted file mode 100644 index bbe12e4..0000000 --- a/jbrowse_jupyter/data/hg38_cgv.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "configuration": {}, - "assembly": { - "name": "hg38", - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "GRCh38-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz", - "locationType": "UriLocation" - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.fai", - "locationType": "UriLocation" - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.gzi", - "locationType": "UriLocation" - } - } - }, - "aliases": ["GRCh38"], - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt", - "locationType": "UriLocation" - } - } - } - }, - "tracks": [], - "defaultSession": { - "name": "My session", - "view": { - "id": "circularView", - "type": "CircularView", - "bpPerPx": 5000000, - "tracks": [] - } - } -} diff --git a/jbrowse_jupyter/util.py b/jbrowse_jupyter/util.py index 372273d..80c6b54 100644 --- a/jbrowse_jupyter/util.py +++ b/jbrowse_jupyter/util.py @@ -1,61 +1,264 @@ import re import os -import json -import importlib import dash_jbrowse as jb from dash import html, Dash from urllib.parse import urlparse -hg38_asm = { - "name": "GRCh38", - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "GRCh38-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" +hg38_lgv = { + "assembly": { + "name": "GRCh38", + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "GRCh38-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" + }, + "faiLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" + }, + "gziLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" + }, }, - "faiLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" + }, + "aliases": ["hg38"], + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" + }, + } + }, + }, + "tracks": [ + { + "type": "FeatureTrack", + "trackId": "ncbi_refseq_109_hg38", + "name": "NCBI RefSeq (GFF3Tabix)", + "assemblyNames": ["GRCh38"], + "category": ["Annotation"], + "adapter": { + "type": "Gff3TabixAdapter", + "gffGzLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz" + }, + "index": { + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz.tbi" + } + }, }, - "gziLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" + } + ], + "location": "10:29,838,737..29,838,819", + "defaultSession": { + "name": "My session", + "view": { + "id": "linearGenomeView", + "type": "LinearGenomeView", + "tracks": [ + { + "type": "ReferenceSequenceTrack", + "configuration": "GRCh38-ReferenceSequenceTrack", + "displays": [ + { + "type": "LinearReferenceSequenceDisplay", + "configuration": "GRCh38-ReferenceSequenceTrack-LinearReferenceSequenceDisplay", + } + ], + } + ], + }, + }, +} + + +hg38_cgv = { + "assembly": { + "name": "hg38", + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "GRCh38-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz", + "locationType": "UriLocation", + }, + "faiLocation": { + "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.fai", + "locationType": "UriLocation", + }, + "gziLocation": { + "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.gzi", + "locationType": "UriLocation", + }, }, }, + "aliases": ["GRCh38"], + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt", + "locationType": "UriLocation", + }, + } + }, }, - "aliases": ["hg38"], - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" + "defaultSession": { + "name": "My session", + "view": { + "id": "circularView", + "type": "CircularView", + "bpPerPx": 5000000, + "tracks": [], + }, + }, +} + +hg19_lgv = { + "assembly": { + "name": "hg19", + "aliases": ["GRCh37"], + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "hg19-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz"}, + "faiLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai"}, + "gziLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi"}, + }, + }, + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" + }, + } + }, + }, + "tracks": [ + { + "type": "FeatureTrack", + "trackId": "repeats_hg19", + "name": "Repeats", + "assemblyNames": ["hg19"], + "category": ["Annotation"], + "adapter": { + "type": "BigBedAdapter", + "bigBedLocation": { + "uri": "https://jbrowse.org/genomes/hg19/repeats.bb", + "locationType": "UriLocation", + }, }, } + ], + "defaultSession": { + "name": "test", + "view": { + "id": "aU9Nqje1U", + "type": "LinearGenomeView", + "tracks": [ + { + "type": "ReferenceSequenceTrack", + "configuration": "hg19-ReferenceSequenceTrack", + "displays": [ + { + "type": "LinearReferenceSequenceDisplay", + "configuration": "hg19-ReferenceSequenceTrack-LinearReferenceSequenceDisplay", + } + ], + } + ], + }, }, + "location": "1:68654694..68654738", } -hg19_asm = { - "name": "hg19", - "aliases": ["GRCh37"], - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "hg19-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz"}, - "faiLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai"}, - "gziLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi"}, + +hg19_cgv = { + "assembly": { + "name": "hg19", + "aliases": ["GRCh37"], + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "Pd8Wh30ei9R", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz", + "locationType": "UriLocation", + }, + "faiLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai", + "locationType": "UriLocation", + }, + "gziLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi", + "locationType": "UriLocation", + }, + }, + }, + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt", + "locationType": "UriLocation", + }, + } }, }, - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" + "tracks": [ + { + "type": "VariantTrack", + "trackId": "pacbio_sv_vcf", + "name": "HG002 Pacbio SV (VCF)", + "assemblyNames": ["hg19"], + "category": ["GIAB"], + "adapter": { + "type": "VcfTabixAdapter", + "vcfGzLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/pacbio/hs37d5.HG002-SequelII-CCS.bnd-only.sv.vcf.gz", + "locationType": "UriLocation", + }, + "index": { + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/pacbio/hs37d5.HG002-SequelII-CCS.bnd-only.sv.vcf.gz.tbi", + "locationType": "UriLocation", + } + }, }, } + ], + "defaultSession": { + "name": "My session", + "view": { + "id": "circularView", + "type": "CircularView", + "bpPerPx": 5000000, + "tracks": [ + { + "id": "uPdLKHik1", + "type": "VariantTrack", + "configuration": "pacbio_sv_vcf", + "displays": [ + { + "id": "v9QVAR3oaB", + "type": "ChordVariantDisplay", + "configuration": "pacbio_sv_vcf-ChordVariantDisplay", + } + ], + } + ], + }, }, } @@ -108,14 +311,17 @@ def get_name_regex(assembly_file): def get_default(name, view_type="LGV"): """Returns the configuration object given a genome name.""" - if view_type == "CGV": - with importlib.resources.open_text( - "jbrowse_jupyter.data", f"{name}_cgv.json" - ) as file: - return json.load(file) - else: - with importlib.resources.open_text("jbrowse_jupyter.data", f"{name}.json") as file: - return json.load(file) + if name == "hg19": + if view_type == "CGV": + return hg38_cgv + else: + return hg38_lgv + elif name=="hg38": + if view_type=="CGV": + return hg19_cgv + else: + return hg19_lgv + def create_component(conf, **kwargs): From b595c6f642e7934a42a6ef751b762dba5b107b54 Mon Sep 17 00:00:00 2001 From: Colin Date: Sun, 10 Dec 2023 15:14:35 -0500 Subject: [PATCH 5/9] Remove json --- jbrowse_jupyter/tracks.py | 3 ++- jbrowse_jupyter/util.py | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jbrowse_jupyter/tracks.py b/jbrowse_jupyter/tracks.py index 7d263fa..3e8b6e0 100644 --- a/jbrowse_jupyter/tracks.py +++ b/jbrowse_jupyter/tracks.py @@ -74,7 +74,8 @@ def guess_display_type(track_type, view="LGV"): else: if view == "CGV": return "ChordVariantDisplay" - return "LinearBasicDisplay" + else: + return "LinearBasicDisplay" def guess_track_type(adapter_type): diff --git a/jbrowse_jupyter/util.py b/jbrowse_jupyter/util.py index 80c6b54..29abf2b 100644 --- a/jbrowse_jupyter/util.py +++ b/jbrowse_jupyter/util.py @@ -311,19 +311,18 @@ def get_name_regex(assembly_file): def get_default(name, view_type="LGV"): """Returns the configuration object given a genome name.""" - if name == "hg19": + if name == "hg383838": if view_type == "CGV": return hg38_cgv else: return hg38_lgv - elif name=="hg38": - if view_type=="CGV": + elif name == "hg19": + if view_type == "CGV": return hg19_cgv else: return hg19_lgv - def create_component(conf, **kwargs): """ Creates a Dash JBrowse LinearGenomeView component From bbd04776a9f1b716c85706d86f202a2568be4f8a Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 11 Dec 2023 07:47:36 -0500 Subject: [PATCH 6/9] Misc --- .github/workflows/main.yml | 4 ++-- .github/workflows/push.yml | 8 ++++---- .github/workflows/release.yml | 4 ++-- jbrowse_jupyter/jbrowse_config.py | 5 ++--- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9ec00a5..99c824c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,9 +7,9 @@ jobs: build-and-publish: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 17ce346..3443628 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -4,9 +4,9 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies @@ -21,9 +21,9 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d4c946e..7ffb91e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,9 +11,9 @@ jobs: build-and-publish: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies diff --git a/jbrowse_jupyter/jbrowse_config.py b/jbrowse_jupyter/jbrowse_config.py index 390d3a2..cd16e7b 100644 --- a/jbrowse_jupyter/jbrowse_config.py +++ b/jbrowse_jupyter/jbrowse_config.py @@ -35,10 +35,12 @@ def create(view_type="LGV", **kwargs): conf = kwargs.get("conf", {}) genome = kwargs.get("genome", "empty") view = view_type + # view type (LGV or CGV) # make it backwards compatible if view_type == "view" or view_type == "conf": view = "LGV" + if view != "LGV" and view != "CGV": raise TypeError(f"Currently not supporting view_type: {view}.") # configuration @@ -51,13 +53,10 @@ def create(view_type="LGV", **kwargs): message2 = "Choose from hg19 or hg38 or pass your own conf" if genome not in available_genomes and no_configuration: raise TypeError(f'"{genome}" {message1}.{message2}.') - # genome if genome in available_genomes: conf = get_default(genome, view) - # start from empty JBrowseConfig elif not conf: return JBrowseConfig(view=view) - # get customized JBrowseConfig return JBrowseConfig(view=view, conf=conf) From 3958d5d588f4c00e987f93b729c566d7c43f2aba Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 11 Dec 2023 07:53:18 -0500 Subject: [PATCH 7/9] 4 --- browser.py | 19 +- jbrowse_jupyter/dev_server.py | 256 ++--- jbrowse_jupyter/jbrowse_config.py | 1532 ++++++++++++++--------------- jbrowse_jupyter/tracks.py | 962 +++++++++--------- jbrowse_jupyter/util.py | 769 +++++++-------- ruff.toml | 2 +- 6 files changed, 1756 insertions(+), 1784 deletions(-) diff --git a/browser.py b/browser.py index f97fddc..fed5496 100644 --- a/browser.py +++ b/browser.py @@ -22,14 +22,16 @@ ix = "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/trix/hg38.ix" ixx = "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/trix/hg38.ixx" meta = "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/trix/meta.json" -jbrowse_conf.set_assembly(assembly_data, - aliases=aliases, - refname_aliases=ref_name_aliases) +jbrowse_conf.set_assembly( + assembly_data, aliases=aliases, refname_aliases=ref_name_aliases +) # add a track -track_data = "https://s3.amazonaws.com/jbrowse.org/genomes/" \ - "GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full" \ - "_analysis_set.refseq_annotation.sorted.gff.gz" +track_data = ( + "https://s3.amazonaws.com/jbrowse.org/genomes/" + "GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full" + "_analysis_set.refseq_annotation.sorted.gff.gz" +) jbrowse_conf.add_track(track_data, name="test-demo", track_id="test-track") # deleting a track jbrowse_conf.add_track(track_data, name="delete", track_id="test-delete-track") @@ -50,10 +52,7 @@ component = create_component(config) # launch the component -app.layout = html.Div( - [component], - id='test' -) +app.layout = html.Div([component], id="test") if __name__ == "__main__": app.run_server(port=3001, debug=True) diff --git a/jbrowse_jupyter/dev_server.py b/jbrowse_jupyter/dev_server.py index 550744f..24881d0 100644 --- a/jbrowse_jupyter/dev_server.py +++ b/jbrowse_jupyter/dev_server.py @@ -18,148 +18,148 @@ def copy_byte_range(infile, outfile, start=None, stop=None, bufsize=16 * 1024): - """Like shutil.copyfileobj, but only copy a range of the streams. - Both start and stop are inclusive. - """ - if start is not None: - infile.seek(start) - while 1: - to_read = min(bufsize, stop + 1 - infile.tell() if stop else bufsize) - buf = infile.read(to_read) - if not buf: - break - outfile.write(buf) + """Like shutil.copyfileobj, but only copy a range of the streams. + Both start and stop are inclusive. + """ + if start is not None: + infile.seek(start) + while 1: + to_read = min(bufsize, stop + 1 - infile.tell() if stop else bufsize) + buf = infile.read(to_read) + if not buf: + break + outfile.write(buf) BYTE_RANGE_RE = re.compile(r"bytes=(\d+)-(\d+)?$") def parse_byte_range(byte_range): - """ - Returns the two numbers in 'bytes=123-456' or throws ValueError. - The last number or both numbers may be None. - """ - if byte_range.strip() == "": - return None, None + """ + Returns the two numbers in 'bytes=123-456' or throws ValueError. + The last number or both numbers may be None. + """ + if byte_range.strip() == "": + return None, None - m = BYTE_RANGE_RE.match(byte_range) - if not m: - raise ValueError("Invalid byte range %s" % byte_range) + m = BYTE_RANGE_RE.match(byte_range) + if not m: + raise ValueError("Invalid byte range %s" % byte_range) - first, last = [x and int(x) for x in m.groups()] - if last and last < first: - raise ValueError("Invalid byte range %s" % byte_range) - return first, last + first, last = [x and int(x) for x in m.groups()] + if last and last < first: + raise ValueError("Invalid byte range %s" % byte_range) + return first, last class CustomRequestHandler(SimpleHTTPRequestHandler): - """ - Creating a small HTTP request server - """ - - def send_head(self): - if "Range" not in self.headers: - self.range = None - return SimpleHTTPRequestHandler.send_head(self) - try: - self.range = parse_byte_range(self.headers["Range"]) - except ValueError: - self.send_error(400, "Invalid byte range") - return None - first, last = self.range - - # Mirroring SimpleHTTPServer.py here - path = self.translate_path(self.path) - f = None - ctype = self.guess_type(path) - try: - f = open(path, "rb") - except IOError: - self.send_error(404, "File not found") - return None - - fs = os.fstat(f.fileno()) - file_len = fs[6] - if first >= file_len: - self.send_error(416, "Requested Range Not Satisfiable") - return None - - self.send_response(206) - self.send_header("Content-type", ctype) - - if last is None or last >= file_len: - last = file_len - 1 - response_length = last - first + 1 - - self.send_header("Content-Range", "bytes %s-%s/%s" % (first, last, file_len)) - self.send_header("Content-Length", str(response_length)) - self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) - self.end_headers() - return f - - def copyfile(self, source, outputfile): - if not self.range: - return SimpleHTTPRequestHandler.copyfile(self, source, outputfile) - - # SimpleHTTPRequestHandler uses shutil.copyfileobj, which doesn't let - # you stop the copying before the end of the file. - start, stop = self.range # set in send_head() - copy_byte_range(source, outputfile, start, stop) - - def end_headers(self): - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header("Access-Control-Allow-Methods", "GET, OPTIONS") - self.send_header("Access-Control-Expose-Headers", "*") - self.send_header("Accept-Ranges", "bytes") - self.send_header("Content-Type", "application/octet-stream") - SimpleHTTPRequestHandler.end_headers(self) - - def translate_path(self, path): - path = SimpleHTTPRequestHandler.translate_path(self, path) - relpath = os.path.relpath(path, os.getcwd()) - fullpath = os.path.join(self.server.base_path, relpath) - return fullpath + """ + Creating a small HTTP request server + """ + + def send_head(self): + if "Range" not in self.headers: + self.range = None + return SimpleHTTPRequestHandler.send_head(self) + try: + self.range = parse_byte_range(self.headers["Range"]) + except ValueError: + self.send_error(400, "Invalid byte range") + return None + first, last = self.range + + # Mirroring SimpleHTTPServer.py here + path = self.translate_path(self.path) + f = None + ctype = self.guess_type(path) + try: + f = open(path, "rb") + except IOError: + self.send_error(404, "File not found") + return None + + fs = os.fstat(f.fileno()) + file_len = fs[6] + if first >= file_len: + self.send_error(416, "Requested Range Not Satisfiable") + return None + + self.send_response(206) + self.send_header("Content-type", ctype) + + if last is None or last >= file_len: + last = file_len - 1 + response_length = last - first + 1 + + self.send_header("Content-Range", "bytes %s-%s/%s" % (first, last, file_len)) + self.send_header("Content-Length", str(response_length)) + self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) + self.end_headers() + return f + + def copyfile(self, source, outputfile): + if not self.range: + return SimpleHTTPRequestHandler.copyfile(self, source, outputfile) + + # SimpleHTTPRequestHandler uses shutil.copyfileobj, which doesn't let + # you stop the copying before the end of the file. + start, stop = self.range # set in send_head() + copy_byte_range(source, outputfile, start, stop) + + def end_headers(self): + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET, OPTIONS") + self.send_header("Access-Control-Expose-Headers", "*") + self.send_header("Accept-Ranges", "bytes") + self.send_header("Content-Type", "application/octet-stream") + SimpleHTTPRequestHandler.end_headers(self) + + def translate_path(self, path): + path = SimpleHTTPRequestHandler.translate_path(self, path) + relpath = os.path.relpath(path, os.getcwd()) + fullpath = os.path.join(self.server.base_path, relpath) + return fullpath class DevServer(HTTPServer): - def __init__( - self, base_path, server_address, RequestHandlerClass=CustomRequestHandler - ): - self.base_path = base_path - HTTPServer.__init__(self, server_address, RequestHandlerClass) + def __init__( + self, base_path, server_address, RequestHandlerClass=CustomRequestHandler + ): + self.base_path = base_path + HTTPServer.__init__(self, server_address, RequestHandlerClass) def serve(data_path, **kwargs): - """ - Launches a development http server. It is not recommended - for production. - - e.g - serve('./path/to/data', port=8080, host='localhost') - - :param str data_path: path to file directory to serve - defaults to the current working dir - :param int port: (optional) port to utilize when running - the dev server, defaults to 8080 - :param str host: (optional) host to utilize when running - the dev server, default to localhost - """ - print("=============================================") - print( - "Warning: \n" - "This is a development environment.\n" - "This is not recommended for production." - ) - port = kwargs.get("port", 8080) - host = kwargs.get("host", "localhost") - # data_path = kwargs.get('path', ".") - # print('data', data_path) - # dir_path = os.path.join(os.path.dirname(__file__), data_path) - # print('dir path', dir_path) - # print('relative ', os.path.relpath(data_path, os.getcwd())) - # print('join', os.path.join(os.getcwd(), data_path)) - httpd = DevServer(data_path, (host, port)) - server = f"http://{host}:{port}" - print("=============================================") - print(f'Server is now running at \n "{server}"') - httpd.serve_forever() + """ + Launches a development http server. It is not recommended + for production. + + e.g + serve('./path/to/data', port=8080, host='localhost') + + :param str data_path: path to file directory to serve + defaults to the current working dir + :param int port: (optional) port to utilize when running + the dev server, defaults to 8080 + :param str host: (optional) host to utilize when running + the dev server, default to localhost + """ + print("=============================================") + print( + "Warning: \n" + "This is a development environment.\n" + "This is not recommended for production." + ) + port = kwargs.get("port", 8080) + host = kwargs.get("host", "localhost") + # data_path = kwargs.get('path', ".") + # print('data', data_path) + # dir_path = os.path.join(os.path.dirname(__file__), data_path) + # print('dir path', dir_path) + # print('relative ', os.path.relpath(data_path, os.getcwd())) + # print('join', os.path.join(os.getcwd(), data_path)) + httpd = DevServer(data_path, (host, port)) + server = f"http://{host}:{port}" + print("=============================================") + print(f'Server is now running at \n "{server}"') + httpd.serve_forever() diff --git a/jbrowse_jupyter/jbrowse_config.py b/jbrowse_jupyter/jbrowse_config.py index cd16e7b..32fde89 100644 --- a/jbrowse_jupyter/jbrowse_config.py +++ b/jbrowse_jupyter/jbrowse_config.py @@ -1,790 +1,790 @@ import IPython from jbrowse_jupyter.util import ( - is_url, - get_default, - guess_file_name, - get_name, + is_url, + get_default, + guess_file_name, + get_name, ) from jbrowse_jupyter.tracks import ( - guess_adapter_type, - guess_track_type, - check_track_data, - get_from_config_adapter, - guess_display_type, - make_url_colab_jupyter, + guess_adapter_type, + guess_track_type, + check_track_data, + get_from_config_adapter, + guess_display_type, + make_url_colab_jupyter, ) def create(view_type="LGV", **kwargs): - """ - Creates a JBrowseConfig given a view type. - - - create(): creates empty LGV JBrowseConfig - - create("CGV"):creates empty CGV JBrowseConfig - - create("LGV", genome="hg19"):creates LGV JBrowseConfig w/ default genome - - create("CGV", conf={"k": "v"}):creates CGV JBrowseConfig w/ a conf obj - - :param str view_type: the type of view ('LGV' or 'CGV'), defaults to LGV - :param str genome: genome ('hg19' or 'hg38') - :return: JBrowseConfig - :rtype: JBrowseConfig instance - :raises TypeError: if genome passed is not hg19 or hg38 - :raises TypeError: if view type is not `LGV` or `CGV` - """ - available_genomes = {"hg19", "hg38"} - conf = kwargs.get("conf", {}) - genome = kwargs.get("genome", "empty") - view = view_type - - # view type (LGV or CGV) - # make it backwards compatible - if view_type == "view" or view_type == "conf": - view = "LGV" - - if view != "LGV" and view != "CGV": - raise TypeError(f"Currently not supporting view_type: {view}.") - # configuration - # 1) genomes available - # 2) with own conf obj OR - # 3) empty default config to customize) - no_configuration = genome != "empty" and not conf - # Check passed genome is available - message1 = "is not a valid default genome to view" - message2 = "Choose from hg19 or hg38 or pass your own conf" - if genome not in available_genomes and no_configuration: - raise TypeError(f'"{genome}" {message1}.{message2}.') - if genome in available_genomes: - conf = get_default(genome, view) - elif not conf: - return JBrowseConfig(view=view) - return JBrowseConfig(view=view, conf=conf) - - -class JBrowseConfig: - """ - Creates JBrowse configuration objects. - Currently supporting configuration objects for the - React JBrowse Linear Genome View and React JBrowse - Circular Genome View - https://jbrowse.org/storybook/lgv/main - https://jbrowse.org/storybook/cgv/main/ - - """ - - def __init__(self, view="LGV", conf=None): - """ - Initializes class. - - :param str view: LGV or CGV - defaults to LGV - :param obj conf: optional conf obj """ - in_colab_notebook = False - in_jupyter_notebook = False - try: - import google.colab.output # noqa - - in_colab_notebook = True - except: # noqa - in_colab_notebook = False - try: - shell = IPython.get_ipython().__class__.__name__ # noqa - if shell == "ZMQInteractiveShell": # noqa - in_jupyter_notebook = True - else: - in_jupyter_notebook = False - except: # noqa - in_jupyter_notebook = False - # ===================== - view_default = { - "id": "linearGenomeView", - "type": "LinearGenomeView", - "tracks": [], - } - if view != "LGV" and view == "CGV": - view_default = {"id": "circularView", "type": "CircularView", "tracks": []} - default = { - "assembly": {}, - "tracks": [], - "defaultSession": {"name": "default-session", "view": view_default}, - "aggregateTextSearchAdapters": [], - "location": "", - "configuration": {"theme": {}}, - } - if conf is not None: - for r in default.keys(): - if r not in conf: - conf[r] = default[r] - self.config = default if conf is None else conf - if conf is not None: - ids = {x["trackId"]: x for x in conf["tracks"]} - self.tracks_ids_map = ids - self.tracks_ids_map = {} - self.view = view - # environment - self.nb_port = 8888 - self.nb_host = "localhost" - self.colab = in_colab_notebook - self.jupyter = not in_colab_notebook and in_jupyter_notebook - - def get_config(self): + Creates a JBrowseConfig given a view type. + + - create(): creates empty LGV JBrowseConfig + - create("CGV"):creates empty CGV JBrowseConfig + - create("LGV", genome="hg19"):creates LGV JBrowseConfig w/ default genome + - create("CGV", conf={"k": "v"}):creates CGV JBrowseConfig w/ a conf obj + + :param str view_type: the type of view ('LGV' or 'CGV'), defaults to LGV + :param str genome: genome ('hg19' or 'hg38') + :return: JBrowseConfig + :rtype: JBrowseConfig instance + :raises TypeError: if genome passed is not hg19 or hg38 + :raises TypeError: if view type is not `LGV` or `CGV` """ - Returns the configuration object of the JBrowseConfig - instance. This object can then be passed to launch or - create_component to launch or create a Dash JBrowse - component + available_genomes = {"hg19", "hg38"} + conf = kwargs.get("conf", {}) + genome = kwargs.get("genome", "empty") + view = view_type + + # view type (LGV or CGV) + # make it backwards compatible + if view_type == "view" or view_type == "conf": + view = "LGV" + + if view != "LGV" and view != "CGV": + raise TypeError(f"Currently not supporting view_type: {view}.") + # configuration + # 1) genomes available + # 2) with own conf obj OR + # 3) empty default config to customize) + no_configuration = genome != "empty" and not conf + # Check passed genome is available + message1 = "is not a valid default genome to view" + message2 = "Choose from hg19 or hg38 or pass your own conf" + if genome not in available_genomes and no_configuration: + raise TypeError(f'"{genome}" {message1}.{message2}.') + if genome in available_genomes: + conf = get_default(genome, view) + elif not conf: + return JBrowseConfig(view=view) + return JBrowseConfig(view=view, conf=conf) - e.g: create("LGV", genome="hg19").get_config() - :return: returns configuration object - :rtype: obj +class JBrowseConfig: """ - return self.config + Creates JBrowse configuration objects. + Currently supporting configuration objects for the + React JBrowse Linear Genome View and React JBrowse + Circular Genome View + https://jbrowse.org/storybook/lgv/main + https://jbrowse.org/storybook/cgv/main/ - def get_colab(self): - return self.colab - - def get_jupyter(self): - return self.jupyter + """ - def get_env(self): - print("notebook port: ", self.nb_port) - print("notebook host: ", self.nb_host) - return self.nb_host, self.nb_port + def __init__(self, view="LGV", conf=None): + """ + Initializes class. - def set_env(self, notebook_host="localhost", notebook_port=8888): - """ - Changes the port and the host for creating links to files - found within the file tree of jupyter. - - We want to be able to use paths to local files that can be - accessed within the file tree of jupyter notebook and jupyter - lab. The port and host should match those configured in your - jupyter config. - - You can set_env after creating your view. - browser = create("LGV") - browser.set_env("localhost", 8989) - - :param str notebook_host: host used in jupyter config for - for using paths to local files. (Defaults to "localhost") - :param str notebook_port: port used in jupyter config for - for using paths to local files. (Defaults to 8888) - """ - self.nb_port = notebook_port - self.nb_host = notebook_host - - # ========== Assembly =========== - def get_assembly(self): - # Returns the JBrowseConfig assembly subconfiguration object - return self.config["assembly"] - - def get_assembly_name(self): - # Returns the assembly name. - assembly_error = ( - "Can not get assembly name. " "Please configure the assembly first." - ) - if self.get_assembly(): - return self.get_assembly()["name"] - else: - raise Exception(assembly_error) - - def set_assembly(self, assembly_data, **kwargs): - """ - Sets the assembly. - - Assumes assembly_data.fai exists for fasta assembly data - that is not bgzipped. - - Assumes assembly_data.fai and assembly_data.gzi exist for - bgzipped assembly data. - - e.g set_assembly("url/assembly.fasta.gz", overwrite=True) - assumes - "url/assembly.fasta.gz.fai" and - "url/assembly.fasta.gz.gzi" also exist - - For configuring assemblies check out our config docs - https://jbrowse.org/jb2/docs/config_guide/#assembly-config - - :param str assembly_data: url/path to the sequence data - :param str name: (optional) name for the assembly, - defaults to name generated from assembly_data file name - :param list aliases: (optional) list of aliases for the assembly - :param obj refname_aliases: (optional) config for refname aliases. - :param str overwrite: flag whether or not to overwrite - existing assembly, default to False. - :raises TypeError: Paths are only supported in jupyter. - :raises TypeError: adapter used for file type is not supported or - recognized - """ - overwrite = kwargs.get("overwrite", False) - indx = kwargs.get("index", "defaultIndex") - err = "assembly is already set, set overwrite to True to overwrite" - if self.get_assembly() and not overwrite: - raise TypeError(err) - aliases = kwargs.get("aliases", []) - refname_aliases = kwargs.get("refname_aliases", {}) - if is_url(assembly_data): - if indx != "defaultIndex": - if not is_url(indx) and not self.jupyter: - raise TypeError( - f"Path for {assembly_data} " - "is used in an unsupported environment." - "Paths are supported in Jupyter" - " notebooks and Jupyter lab." - "Please use a url for your assembly " - "data. You can check out our local " - "file support docs for more information" - ) - assembly_adapter = guess_adapter_type(assembly_data, "uri", indx, **kwargs) - name = kwargs.get("name", get_name(assembly_data)) - if assembly_adapter["type"] == "UNKNOWN": - raise TypeError("Adapter type is not recognized") - if assembly_adapter["type"] == "UNSUPPORTED": - raise TypeError("Adapter type is not supported") - assembly_config = { - "name": name, - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": f"{name}-ReferenceSequenceTrack", - "adapter": assembly_adapter, - }, - "aliases": aliases, - "refNameAliases": refname_aliases, - } - self.config["assembly"] = assembly_config - else: - if not self.jupyter: - raise TypeError( - f"Path {assembly_data} for assembly data " - "is used in an unsupported environment." - "Paths are supported in Jupyter notebooks" - " and Jupyter lab.Please use a url for " - "your assembly data. You can check out " - "our local file support docs for more " - "information" - ) - if indx != "defaultIndex" and not is_url(indx): - if not self.jupyter: - raise TypeError( - "Paths are used in an " - "unsupported environment." - "Paths are supported in Jupyter" - " notebooks and Jupyter lab." - "Please use a urls for your assembly" - " and index data. You can check out " - "our local file support docs for more" - " information" - ) - assembly_adapter = guess_adapter_type( - assembly_data, - "localPath", - indx, - colab=self.colab, - nb_port=self.nb_port, - nb_host=self.nb_host, - ) - name = kwargs.get("name", get_name(assembly_data)) - if assembly_adapter["type"] == "UNKNOWN": - raise TypeError("Adapter type is not recognized") - if assembly_adapter["type"] == "UNSUPPORTED": - raise TypeError("Adapter type is not supported") - assembly_config = { - "name": name, - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": f"{name}-ReferenceSequenceTrack", - "adapter": assembly_adapter, - }, - "aliases": aliases, - "refNameAliases": refname_aliases, - } - self.config["assembly"] = assembly_config - - # ============ Tracks ============= - - def get_reference_track(self): - # Returns the reference track for the configured assembly. - assembly_name = self.get_assembly_name() - configuration = f"{assembly_name}-ReferenceSequenceTrack" - conf_str = f"{configuration}-LinearReferenceSequenceDisplay" - return { - "type": "ReferenceSequenceTrack", - "configuration": configuration, - "displays": [ - { - "type": "LinearReferenceSequenceDisplay", - "configuration": conf_str, + :param str view: LGV or CGV + defaults to LGV + :param obj conf: optional conf obj + """ + in_colab_notebook = False + in_jupyter_notebook = False + try: + import google.colab.output # noqa + + in_colab_notebook = True + except: # noqa + in_colab_notebook = False + try: + shell = IPython.get_ipython().__class__.__name__ # noqa + if shell == "ZMQInteractiveShell": # noqa + in_jupyter_notebook = True + else: + in_jupyter_notebook = False + except: # noqa + in_jupyter_notebook = False + # ===================== + view_default = { + "id": "linearGenomeView", + "type": "LinearGenomeView", + "tracks": [], } - ], - } - - def get_track_display(self, track): - # Returns the track display subconfiguration. - track_type = track["type"] - track_id = track["trackId"] - display_type = guess_display_type(track_type, self.view) - return { - "type": track_type, - "configuration": track_id, - "displays": [ - {"type": display_type, "configuration": f"{track_id}-{display_type}"} - ], - } - - def get_track(self, track_name): - # Return the list of track configurations with that name - tracks = [track for track in self.get_tracks() if track["name"] == track_name] - return tracks - - def get_tracks(self): - # Returns list of tracks in the configuration. - return self.config["tracks"] - - def add_df_track(self, track_data, name, **kwargs): - """ - Adds track from a pandas DataFrame. If the score column - is present, it will create a Quantitative track else it - will create a Feature track. - - Requires DataFrame to have columns named 'refName', - 'start', 'end', and 'name' - - Requires refName and name columns to be of type str and - start, end, and score to be int - - e.g: - add_df_track(df, "track_name") - - :param track_data: panda DataFrame with the track data. - :param str name: name for the track. - :param str track_id: (optional) trackId for the track - :param str overwrite: flag whether or not to overwrite existing track. - :raises Exception: if assembly has not been configured. - :raises TypeError: if track data is invalid - :raises TypeError: if view is not LGV - :raises TypeError: if track with that trackId already exists - list of tracks - """ - if not self.get_assembly(): - raise Exception("Please set the assembly before adding a track.") - if self.view != "LGV": - raise TypeError("Can not add a data frame track to a CGV conf.") - check_track_data(track_data) - - overwrite = kwargs.get("overwrite", False) - assembly_name = self.get_assembly_name() - track_id = kwargs.get("track_id", f"{assembly_name}-{name}") - current_tracks = self.config["tracks"] - # if score column is present => QuantitativeTrack, else FeatureTrack - track_type = "FeatureTrack" - if "score" in track_data: - track_type = "QuantitativeTrack" - - adapter = get_from_config_adapter(track_data) - df_track_config = { - "type": track_type, - "trackId": track_id, - "name": name, - "assemblyNames": [assembly_name], - "adapter": adapter, - } - err = ( - f'track with trackId: "{track_id}" already exists in config.', - "Set overwrite to True if you want to overwrite it.", - ) - if track_id in self.tracks_ids_map.keys() and not overwrite: - raise TypeError(err) - if track_id in self.tracks_ids_map.keys() and overwrite: - # delete track and overwrite it - current_tracks = [t for t in current_tracks if t["trackId"] != track_id] - - current_tracks.append(df_track_config) - self.config["tracks"] = current_tracks - self.tracks_ids_map[track_id] = df_track_config - - def add_track(self, data, **kwargs): - """ - Adds a track subconfiguration to the list of tracks - in the config. - - if an index is not provided, it will assume an index file - with the same name can be found in the directory of the - track data - - e.g: - add_track("url.bam") - assumes "url.bam.bai" also exists - - :param str data: track file url/path - :param str name: (optional) name for the track - (defaults to data filename) - :param str track_id: (optional) trackId for the track - :param str index: (optional) file url/path for the track - :param str track_type: (optional) track type - :param boolean overwrite: (optional) defaults to False - :raises Exception: if assembly has not been configured - :raises TypeError: if track data is not provided - :raises TypeError: if track type is not supported - :raises TypeError: Paths are only supported in jupyter. - """ - if not data: - raise TypeError("Track data is required. None was provided.") - if not self.get_assembly(): - raise Exception("Please set the assembly before adding a track.") - - assembly_names = [self.get_assembly_name()] - name = kwargs.get("name", guess_file_name(data)) - index = kwargs.get("index", "defaultIndex") - overwrite = kwargs.get("overwrite", False) - current_tracks = self.get_tracks() - if is_url(data): - # default to uri protocol until local files enabled - if not is_url(index) and index != "defaultIndex": - if not self.jupyter: - raise TypeError( - f"Path {index} for index is used in an " - "unsupported environment. Paths are " - "supported in Jupyter notebooks and Jupy" - "ter lab.Please use a url for your " - "assembly data. You can check out " - "our local file support docs for more " - "information" - ) - else: - adapter = guess_adapter_type( - data, - "localPath", - index, - colab=self.colab, - nb_port=self.nb_port, - nb_host=self.nb_host, - ) - else: - adapter = guess_adapter_type(data, "uri", index) - # adapter = guess_adapter_type(data, 'uri', index) - if adapter["type"] == "UNKNOWN": - raise TypeError("Adapter type is not recognized") - if adapter["type"] == "UNSUPPORTED": - raise TypeError("Adapter type is not supported") - # get sequence adapter for cram adapter track - if adapter["type"] == "CramAdapter": - extra_config = self.get_assembly()["sequence"]["adapter"] - adapter["sequenceAdapter"] = extra_config - t_type = kwargs.get("track_type", guess_track_type(adapter["type"])) - supported_track_types = set( - { - "AlignmentsTrack", - "QuantitativeTrack", - "VariantTrack", - "FeatureTrack", - "ReferenceSequenceTrack", + if view != "LGV" and view == "CGV": + view_default = {"id": "circularView", "type": "CircularView", "tracks": []} + default = { + "assembly": {}, + "tracks": [], + "defaultSession": {"name": "default-session", "view": view_default}, + "aggregateTextSearchAdapters": [], + "location": "", + "configuration": {"theme": {}}, } - ) - if t_type not in supported_track_types: - raise TypeError(f'Track type: "{t_type}" is not supported.') - default_track_id = f"{self.get_assembly_name()}-{name}" - track_id = kwargs.get("track_id", default_track_id) - track_config = { - "type": t_type, - "trackId": track_id, - "name": name, - "assemblyNames": assembly_names, - "adapter": adapter, - } - if track_id in self.tracks_ids_map.keys() and not overwrite: - raise TypeError( - ( - f'track with trackId: "{track_id}" already exists in' - f"config. Set overwrite to True to overwrite it." - ) + if conf is not None: + for r in default.keys(): + if r not in conf: + conf[r] = default[r] + self.config = default if conf is None else conf + if conf is not None: + ids = {x["trackId"]: x for x in conf["tracks"]} + self.tracks_ids_map = ids + self.tracks_ids_map = {} + self.view = view + # environment + self.nb_port = 8888 + self.nb_host = "localhost" + self.colab = in_colab_notebook + self.jupyter = not in_colab_notebook and in_jupyter_notebook + + def get_config(self): + """ + Returns the configuration object of the JBrowseConfig + instance. This object can then be passed to launch or + create_component to launch or create a Dash JBrowse + component + + e.g: create("LGV", genome="hg19").get_config() + + :return: returns configuration object + :rtype: obj + """ + return self.config + + def get_colab(self): + return self.colab + + def get_jupyter(self): + return self.jupyter + + def get_env(self): + print("notebook port: ", self.nb_port) + print("notebook host: ", self.nb_host) + return self.nb_host, self.nb_port + + def set_env(self, notebook_host="localhost", notebook_port=8888): + """ + Changes the port and the host for creating links to files + found within the file tree of jupyter. + + We want to be able to use paths to local files that can be + accessed within the file tree of jupyter notebook and jupyter + lab. The port and host should match those configured in your + jupyter config. + + You can set_env after creating your view. + browser = create("LGV") + browser.set_env("localhost", 8989) + + :param str notebook_host: host used in jupyter config for + for using paths to local files. (Defaults to "localhost") + :param str notebook_port: port used in jupyter config for + for using paths to local files. (Defaults to 8888) + """ + self.nb_port = notebook_port + self.nb_host = notebook_host + + # ========== Assembly =========== + def get_assembly(self): + # Returns the JBrowseConfig assembly subconfiguration object + return self.config["assembly"] + + def get_assembly_name(self): + # Returns the assembly name. + assembly_error = ( + "Can not get assembly name. " "Please configure the assembly first." ) - if track_id in self.tracks_ids_map.keys() and overwrite: - current_tracks = [t for t in current_tracks if t["trackId"] != track_id] - - current_tracks.append(track_config) - self.config["tracks"] = current_tracks - self.tracks_ids_map[track_id] = track_config - else: - if not self.jupyter: - raise TypeError( - f"Path {data} for track data " - "is used in an unsupported environment." - "Paths are supported in Jupyter notebooks" - " and Jupyter lab.Please use a url for " - "your assembly data. You can check out " - "our local file support docs for more " - "information" - ) - if not is_url(index) and index != "defaultIndex": - if not self.jupyter: - raise TypeError( - f"Path {index} for index is used in an " - "unsupported environment.Paths are " - "supported in Jupyter notebooks and Jupyte" - "r lab.Please use a url for your assembly " - "data. You can check out our local file " - "support docs for more information" - ) - adapter = guess_adapter_type( - data, - "localPath", - index, - colab=self.colab, - nb_port=self.nb_port, - nb_host=self.nb_host, - ) - if adapter["type"] == "UNKNOWN": - raise TypeError("Adapter type is not recognized") - if adapter["type"] == "UNSUPPORTED": - raise TypeError("Adapter type is not supported") - # get sequence adapter for cram adapter track - if adapter["type"] == "CramAdapter": - extra_config = self.get_assembly()["sequence"]["adapter"] - adapter["sequenceAdapter"] = extra_config - t_type = kwargs.get("track_type", guess_track_type(adapter["type"])) - supported_track_types = set( - { - "AlignmentsTrack", - "QuantitativeTrack", - "VariantTrack", - "FeatureTrack", - "ReferenceSequenceTrack", + if self.get_assembly(): + return self.get_assembly()["name"] + else: + raise Exception(assembly_error) + + def set_assembly(self, assembly_data, **kwargs): + """ + Sets the assembly. + + Assumes assembly_data.fai exists for fasta assembly data + that is not bgzipped. + + Assumes assembly_data.fai and assembly_data.gzi exist for + bgzipped assembly data. + + e.g set_assembly("url/assembly.fasta.gz", overwrite=True) + assumes + "url/assembly.fasta.gz.fai" and + "url/assembly.fasta.gz.gzi" also exist + + For configuring assemblies check out our config docs + https://jbrowse.org/jb2/docs/config_guide/#assembly-config + + :param str assembly_data: url/path to the sequence data + :param str name: (optional) name for the assembly, + defaults to name generated from assembly_data file name + :param list aliases: (optional) list of aliases for the assembly + :param obj refname_aliases: (optional) config for refname aliases. + :param str overwrite: flag whether or not to overwrite + existing assembly, default to False. + :raises TypeError: Paths are only supported in jupyter. + :raises TypeError: adapter used for file type is not supported or + recognized + """ + overwrite = kwargs.get("overwrite", False) + indx = kwargs.get("index", "defaultIndex") + err = "assembly is already set, set overwrite to True to overwrite" + if self.get_assembly() and not overwrite: + raise TypeError(err) + aliases = kwargs.get("aliases", []) + refname_aliases = kwargs.get("refname_aliases", {}) + if is_url(assembly_data): + if indx != "defaultIndex": + if not is_url(indx) and not self.jupyter: + raise TypeError( + f"Path for {assembly_data} " + "is used in an unsupported environment." + "Paths are supported in Jupyter" + " notebooks and Jupyter lab." + "Please use a url for your assembly " + "data. You can check out our local " + "file support docs for more information" + ) + assembly_adapter = guess_adapter_type(assembly_data, "uri", indx, **kwargs) + name = kwargs.get("name", get_name(assembly_data)) + if assembly_adapter["type"] == "UNKNOWN": + raise TypeError("Adapter type is not recognized") + if assembly_adapter["type"] == "UNSUPPORTED": + raise TypeError("Adapter type is not supported") + assembly_config = { + "name": name, + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": f"{name}-ReferenceSequenceTrack", + "adapter": assembly_adapter, + }, + "aliases": aliases, + "refNameAliases": refname_aliases, + } + self.config["assembly"] = assembly_config + else: + if not self.jupyter: + raise TypeError( + f"Path {assembly_data} for assembly data " + "is used in an unsupported environment." + "Paths are supported in Jupyter notebooks" + " and Jupyter lab.Please use a url for " + "your assembly data. You can check out " + "our local file support docs for more " + "information" + ) + if indx != "defaultIndex" and not is_url(indx): + if not self.jupyter: + raise TypeError( + "Paths are used in an " + "unsupported environment." + "Paths are supported in Jupyter" + " notebooks and Jupyter lab." + "Please use a urls for your assembly" + " and index data. You can check out " + "our local file support docs for more" + " information" + ) + assembly_adapter = guess_adapter_type( + assembly_data, + "localPath", + indx, + colab=self.colab, + nb_port=self.nb_port, + nb_host=self.nb_host, + ) + name = kwargs.get("name", get_name(assembly_data)) + if assembly_adapter["type"] == "UNKNOWN": + raise TypeError("Adapter type is not recognized") + if assembly_adapter["type"] == "UNSUPPORTED": + raise TypeError("Adapter type is not supported") + assembly_config = { + "name": name, + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": f"{name}-ReferenceSequenceTrack", + "adapter": assembly_adapter, + }, + "aliases": aliases, + "refNameAliases": refname_aliases, + } + self.config["assembly"] = assembly_config + + # ============ Tracks ============= + + def get_reference_track(self): + # Returns the reference track for the configured assembly. + assembly_name = self.get_assembly_name() + configuration = f"{assembly_name}-ReferenceSequenceTrack" + conf_str = f"{configuration}-LinearReferenceSequenceDisplay" + return { + "type": "ReferenceSequenceTrack", + "configuration": configuration, + "displays": [ + { + "type": "LinearReferenceSequenceDisplay", + "configuration": conf_str, + } + ], } - ) - if t_type not in supported_track_types: - raise TypeError(f'Track type: "{t_type}" is not supported.') - default_track_id = f"{self.get_assembly_name()}-{name}" - track_id = kwargs.get("track_id", default_track_id) - track_config = { - "type": t_type, - "trackId": track_id, - "name": name, - "assemblyNames": assembly_names, - "adapter": adapter, - } - if track_id in self.tracks_ids_map.keys() and not overwrite: - raise TypeError( - ( - f'track with trackId: "{track_id}" already exists in' - f"config. Set overwrite to True to overwrite it." - ) - ) - if track_id in self.tracks_ids_map.keys() and overwrite: - current_tracks = [t for t in current_tracks if t["trackId"] != track_id] - - current_tracks.append(track_config) - self.config["tracks"] = current_tracks - self.tracks_ids_map[track_id] = track_config - def delete_track(self, track_id): - """ - Deletes a track from the config. - - e.g: - delete_track("test_track_id") - - Will delete track with track_id test_track_id. - - :param str track_id: track_id to delete + def get_track_display(self, track): + # Returns the track display subconfiguration. + track_type = track["type"] + track_id = track["trackId"] + display_type = guess_display_type(track_type, self.view) + return { + "type": track_type, + "configuration": track_id, + "displays": [ + {"type": display_type, "configuration": f"{track_id}-{display_type}"} + ], + } - :raises TypeError: if track_id provided does not exist - """ - new_tracks = [] - current_tracks = self.get_tracks() - if track_id not in self.tracks_ids_map.keys(): - raise TypeError( - (f'track with trackId: "{track_id}" does not exist in' f"config.") - ) - else: - new_tracks = [t for t in current_tracks if t["trackId"] != track_id] - self.config["tracks"] = new_tracks - # clear from default session - default_sess = self.get_default_session() - tracks_sess = default_sess["view"]["tracks"] - new_tracks_sess = [t for t in tracks_sess if t["configuration"] != track_id] - if self.view == "CGV": - self.config["defaultSession"] = { - "name": "my session", - "view": { - "id": "circularView", - "type": "CircularView", - "tracks": new_tracks_sess, - }, + def get_track(self, track_name): + # Return the list of track configurations with that name + tracks = [track for track in self.get_tracks() if track["name"] == track_name] + return tracks + + def get_tracks(self): + # Returns list of tracks in the configuration. + return self.config["tracks"] + + def add_df_track(self, track_data, name, **kwargs): + """ + Adds track from a pandas DataFrame. If the score column + is present, it will create a Quantitative track else it + will create a Feature track. + + Requires DataFrame to have columns named 'refName', + 'start', 'end', and 'name' + + Requires refName and name columns to be of type str and + start, end, and score to be int + + e.g: + add_df_track(df, "track_name") + + :param track_data: panda DataFrame with the track data. + :param str name: name for the track. + :param str track_id: (optional) trackId for the track + :param str overwrite: flag whether or not to overwrite existing track. + :raises Exception: if assembly has not been configured. + :raises TypeError: if track data is invalid + :raises TypeError: if view is not LGV + :raises TypeError: if track with that trackId already exists + list of tracks + """ + if not self.get_assembly(): + raise Exception("Please set the assembly before adding a track.") + if self.view != "LGV": + raise TypeError("Can not add a data frame track to a CGV conf.") + check_track_data(track_data) + + overwrite = kwargs.get("overwrite", False) + assembly_name = self.get_assembly_name() + track_id = kwargs.get("track_id", f"{assembly_name}-{name}") + current_tracks = self.config["tracks"] + # if score column is present => QuantitativeTrack, else FeatureTrack + track_type = "FeatureTrack" + if "score" in track_data: + track_type = "QuantitativeTrack" + + adapter = get_from_config_adapter(track_data) + df_track_config = { + "type": track_type, + "trackId": track_id, + "name": name, + "assemblyNames": [assembly_name], + "adapter": adapter, } - else: - self.config["defaultSession"] = { - "name": "my session", - "view": { - "id": "LinearGenomeView", - "type": "LinearGenomeView", - "tracks": new_tracks_sess, - }, + err = ( + f'track with trackId: "{track_id}" already exists in config.', + "Set overwrite to True if you want to overwrite it.", + ) + if track_id in self.tracks_ids_map.keys() and not overwrite: + raise TypeError(err) + if track_id in self.tracks_ids_map.keys() and overwrite: + # delete track and overwrite it + current_tracks = [t for t in current_tracks if t["trackId"] != track_id] + + current_tracks.append(df_track_config) + self.config["tracks"] = current_tracks + self.tracks_ids_map[track_id] = df_track_config + + def add_track(self, data, **kwargs): + """ + Adds a track subconfiguration to the list of tracks + in the config. + + if an index is not provided, it will assume an index file + with the same name can be found in the directory of the + track data + + e.g: + add_track("url.bam") + assumes "url.bam.bai" also exists + + :param str data: track file url/path + :param str name: (optional) name for the track + (defaults to data filename) + :param str track_id: (optional) trackId for the track + :param str index: (optional) file url/path for the track + :param str track_type: (optional) track type + :param boolean overwrite: (optional) defaults to False + :raises Exception: if assembly has not been configured + :raises TypeError: if track data is not provided + :raises TypeError: if track type is not supported + :raises TypeError: Paths are only supported in jupyter. + """ + if not data: + raise TypeError("Track data is required. None was provided.") + if not self.get_assembly(): + raise Exception("Please set the assembly before adding a track.") + + assembly_names = [self.get_assembly_name()] + name = kwargs.get("name", guess_file_name(data)) + index = kwargs.get("index", "defaultIndex") + overwrite = kwargs.get("overwrite", False) + current_tracks = self.get_tracks() + if is_url(data): + # default to uri protocol until local files enabled + if not is_url(index) and index != "defaultIndex": + if not self.jupyter: + raise TypeError( + f"Path {index} for index is used in an " + "unsupported environment. Paths are " + "supported in Jupyter notebooks and Jupy" + "ter lab.Please use a url for your " + "assembly data. You can check out " + "our local file support docs for more " + "information" + ) + else: + adapter = guess_adapter_type( + data, + "localPath", + index, + colab=self.colab, + nb_port=self.nb_port, + nb_host=self.nb_host, + ) + else: + adapter = guess_adapter_type(data, "uri", index) + # adapter = guess_adapter_type(data, 'uri', index) + if adapter["type"] == "UNKNOWN": + raise TypeError("Adapter type is not recognized") + if adapter["type"] == "UNSUPPORTED": + raise TypeError("Adapter type is not supported") + # get sequence adapter for cram adapter track + if adapter["type"] == "CramAdapter": + extra_config = self.get_assembly()["sequence"]["adapter"] + adapter["sequenceAdapter"] = extra_config + t_type = kwargs.get("track_type", guess_track_type(adapter["type"])) + supported_track_types = set( + { + "AlignmentsTrack", + "QuantitativeTrack", + "VariantTrack", + "FeatureTrack", + "ReferenceSequenceTrack", + } + ) + if t_type not in supported_track_types: + raise TypeError(f'Track type: "{t_type}" is not supported.') + default_track_id = f"{self.get_assembly_name()}-{name}" + track_id = kwargs.get("track_id", default_track_id) + track_config = { + "type": t_type, + "trackId": track_id, + "name": name, + "assemblyNames": assembly_names, + "adapter": adapter, + } + if track_id in self.tracks_ids_map.keys() and not overwrite: + raise TypeError( + ( + f'track with trackId: "{track_id}" already exists in' + f"config. Set overwrite to True to overwrite it." + ) + ) + if track_id in self.tracks_ids_map.keys() and overwrite: + current_tracks = [t for t in current_tracks if t["trackId"] != track_id] + + current_tracks.append(track_config) + self.config["tracks"] = current_tracks + self.tracks_ids_map[track_id] = track_config + else: + if not self.jupyter: + raise TypeError( + f"Path {data} for track data " + "is used in an unsupported environment." + "Paths are supported in Jupyter notebooks" + " and Jupyter lab.Please use a url for " + "your assembly data. You can check out " + "our local file support docs for more " + "information" + ) + if not is_url(index) and index != "defaultIndex": + if not self.jupyter: + raise TypeError( + f"Path {index} for index is used in an " + "unsupported environment.Paths are " + "supported in Jupyter notebooks and Jupyte" + "r lab.Please use a url for your assembly " + "data. You can check out our local file " + "support docs for more information" + ) + adapter = guess_adapter_type( + data, + "localPath", + index, + colab=self.colab, + nb_port=self.nb_port, + nb_host=self.nb_host, + ) + if adapter["type"] == "UNKNOWN": + raise TypeError("Adapter type is not recognized") + if adapter["type"] == "UNSUPPORTED": + raise TypeError("Adapter type is not supported") + # get sequence adapter for cram adapter track + if adapter["type"] == "CramAdapter": + extra_config = self.get_assembly()["sequence"]["adapter"] + adapter["sequenceAdapter"] = extra_config + t_type = kwargs.get("track_type", guess_track_type(adapter["type"])) + supported_track_types = set( + { + "AlignmentsTrack", + "QuantitativeTrack", + "VariantTrack", + "FeatureTrack", + "ReferenceSequenceTrack", + } + ) + if t_type not in supported_track_types: + raise TypeError(f'Track type: "{t_type}" is not supported.') + default_track_id = f"{self.get_assembly_name()}-{name}" + track_id = kwargs.get("track_id", default_track_id) + track_config = { + "type": t_type, + "trackId": track_id, + "name": name, + "assemblyNames": assembly_names, + "adapter": adapter, + } + if track_id in self.tracks_ids_map.keys() and not overwrite: + raise TypeError( + ( + f'track with trackId: "{track_id}" already exists in' + f"config. Set overwrite to True to overwrite it." + ) + ) + if track_id in self.tracks_ids_map.keys() and overwrite: + current_tracks = [t for t in current_tracks if t["trackId"] != track_id] + + current_tracks.append(track_config) + self.config["tracks"] = current_tracks + self.tracks_ids_map[track_id] = track_config + + def delete_track(self, track_id): + """ + Deletes a track from the config. + + e.g: + delete_track("test_track_id") + + Will delete track with track_id test_track_id. + + :param str track_id: track_id to delete + + :raises TypeError: if track_id provided does not exist + """ + new_tracks = [] + current_tracks = self.get_tracks() + if track_id not in self.tracks_ids_map.keys(): + raise TypeError( + (f'track with trackId: "{track_id}" does not exist in' f"config.") + ) + else: + new_tracks = [t for t in current_tracks if t["trackId"] != track_id] + self.config["tracks"] = new_tracks + # clear from default session + default_sess = self.get_default_session() + tracks_sess = default_sess["view"]["tracks"] + new_tracks_sess = [t for t in tracks_sess if t["configuration"] != track_id] + if self.view == "CGV": + self.config["defaultSession"] = { + "name": "my session", + "view": { + "id": "circularView", + "type": "CircularView", + "tracks": new_tracks_sess, + }, + } + else: + self.config["defaultSession"] = { + "name": "my session", + "view": { + "id": "LinearGenomeView", + "type": "LinearGenomeView", + "tracks": new_tracks_sess, + }, + } + + # ======= location =========== + def set_location(self, location): + """ + Sets initial location for when the browser first loads. + + e.g: + set_location("chr1:1..90") + + :param str location: location, syntax 'refName:start-end' + :raises TypeError: if view is CGV, location not supported in CGV + """ + if self.view == "CGV": + raise TypeError("Location is not available to set on a CGV") + else: + self.config["location"] = location + + # ======= default session ======== + def set_default_session(self, tracks_ids, display_assembly=True): + """ + Sets the default session given a list of track ids + + e.g: + set_default_session(['track_id', 'track_id2']) + + :param tracks_ids: list[str] list of track ids to display + :param boolean display_assembly: display the assembly reference + sequence track. Defaults to True + :raises Exception: if assembly has not been configured + """ + err = "Please set the assembly before setting the default session." + if not self.get_assembly(): + raise Exception(err) + reference_track = {} + tracks_configs = [] + if display_assembly: + reference_track = self.get_reference_track() + tracks_configs.append(reference_track) + tracks_to_display = [t for t in self.get_tracks() if t["trackId"] in tracks_ids] + # guess the display type + for t in tracks_to_display: + tracks_configs.append(self.get_track_display(t)) + if self.view == "CGV": + self.config["defaultSession"] = { + "name": "my session", + "view": { + "id": "circularView", + "type": "CircularView", + "tracks": tracks_configs, + }, + } + else: + self.config["defaultSession"] = { + "name": "my session", + "view": { + "id": "LinearGenomeView", + "type": "LinearGenomeView", + "tracks": tracks_configs, + }, + } + + def get_default_session(self): + # Returns the defaultSession subconfiguration + return self.config["defaultSession"] + + # ====== Advanced Customization =============== + def get_text_search_adapters(self): + # Returns the aggregateTextSearchAdapters in the config + return self.config["aggregateTextSearchAdapters"] + + def add_text_search_adapter(self, ix, ixx, meta, adapter_id=None): + """ + Adds an aggregate trix text search adapter. + Currently not available for Circular Genome View + + e.g: + add_text_search_adapter("url/file.ix", url/file.ixx", + "url/meta.json") + + :param str ix: url/path to ix file + :param str ixx: url/path to ixx file + :param str meta: url/path to meta.json file + :param str adapter_id: optional adapter_id + :raises Exception: if assembly has not been configured + :raises TypeError: if adapter with same adapter id + is already configured + :raises TypeError: Paths are only supported in jupyter. + :raises TypeError: if view is CGV + """ + err = "Please set the assembly before adding a text search adapter." + if not self.get_assembly(): + raise Exception(err) + local = is_url(ix) and is_url(ixx) and is_url(meta) + if local and not self.jupyter: + TypeError( + f'Paths for "{ix},{ixx},and {meta}"' + " are used in an unsupported environment. Paths are " + "supported in Jupyter notebooks and Jupyter lab.Please" + " use a url for your assembly data. You can check out" + " our local file support docs for more information" + ) + + if self.view == "CGV": + raise TypeError("Text Searching not currently available in CGV") + assembly_name = self.get_assembly_name() + default_id = f"{assembly_name}-{guess_file_name(ix)}-index" + text_id = default_id if adapter_id is None else adapter_id + text_search_adapter = { + "type": "TrixTextSearchAdapter", + "textSearchAdapterId": text_id, + "ixFilePath": { + "uri": make_url_colab_jupyter( + ix, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port + ), + "locationType": "UriLocation", + }, + "ixxFilePath": { + "uri": make_url_colab_jupyter( + ixx, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port + ), + "locationType": "UriLocation", + }, + "metaFilePath": { + "uri": make_url_colab_jupyter( + meta, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port + ), + "locationType": "UriLocation", + }, + "assemblyNames": [assembly_name], } - - # ======= location =========== - def set_location(self, location): - """ - Sets initial location for when the browser first loads. - - e.g: - set_location("chr1:1..90") - - :param str location: location, syntax 'refName:start-end' - :raises TypeError: if view is CGV, location not supported in CGV - """ - if self.view == "CGV": - raise TypeError("Location is not available to set on a CGV") - else: - self.config["location"] = location - - # ======= default session ======== - def set_default_session(self, tracks_ids, display_assembly=True): - """ - Sets the default session given a list of track ids - - e.g: - set_default_session(['track_id', 'track_id2']) - - :param tracks_ids: list[str] list of track ids to display - :param boolean display_assembly: display the assembly reference - sequence track. Defaults to True - :raises Exception: if assembly has not been configured - """ - err = "Please set the assembly before setting the default session." - if not self.get_assembly(): - raise Exception(err) - reference_track = {} - tracks_configs = [] - if display_assembly: - reference_track = self.get_reference_track() - tracks_configs.append(reference_track) - tracks_to_display = [t for t in self.get_tracks() if t["trackId"] in tracks_ids] - # guess the display type - for t in tracks_to_display: - tracks_configs.append(self.get_track_display(t)) - if self.view == "CGV": - self.config["defaultSession"] = { - "name": "my session", - "view": { - "id": "circularView", - "type": "CircularView", - "tracks": tracks_configs, - }, - } - else: - self.config["defaultSession"] = { - "name": "my session", - "view": { - "id": "LinearGenomeView", - "type": "LinearGenomeView", - "tracks": tracks_configs, - }, - } - - def get_default_session(self): - # Returns the defaultSession subconfiguration - return self.config["defaultSession"] - - # ====== Advanced Customization =============== - def get_text_search_adapters(self): - # Returns the aggregateTextSearchAdapters in the config - return self.config["aggregateTextSearchAdapters"] - - def add_text_search_adapter(self, ix, ixx, meta, adapter_id=None): - """ - Adds an aggregate trix text search adapter. - Currently not available for Circular Genome View - - e.g: - add_text_search_adapter("url/file.ix", url/file.ixx", - "url/meta.json") - - :param str ix: url/path to ix file - :param str ixx: url/path to ixx file - :param str meta: url/path to meta.json file - :param str adapter_id: optional adapter_id - :raises Exception: if assembly has not been configured - :raises TypeError: if adapter with same adapter id - is already configured - :raises TypeError: Paths are only supported in jupyter. - :raises TypeError: if view is CGV - """ - err = "Please set the assembly before adding a text search adapter." - if not self.get_assembly(): - raise Exception(err) - local = is_url(ix) and is_url(ixx) and is_url(meta) - if local and not self.jupyter: - TypeError( - f'Paths for "{ix},{ixx},and {meta}"' - " are used in an unsupported environment. Paths are " - "supported in Jupyter notebooks and Jupyter lab.Please" - " use a url for your assembly data. You can check out" - " our local file support docs for more information" - ) - - if self.view == "CGV": - raise TypeError("Text Searching not currently available in CGV") - assembly_name = self.get_assembly_name() - default_id = f"{assembly_name}-{guess_file_name(ix)}-index" - text_id = default_id if adapter_id is None else adapter_id - text_search_adapter = { - "type": "TrixTextSearchAdapter", - "textSearchAdapterId": text_id, - "ixFilePath": { - "uri": make_url_colab_jupyter( - ix, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port - ), - "locationType": "UriLocation", - }, - "ixxFilePath": { - "uri": make_url_colab_jupyter( - ixx, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port - ), - "locationType": "UriLocation", - }, - "metaFilePath": { - "uri": make_url_colab_jupyter( - meta, colab=self.colab, nb_host=self.nb_host, nb_port=self.nb_port - ), - "locationType": "UriLocation", - }, - "assemblyNames": [assembly_name], - } - adapters = self.get_text_search_adapters() - exists = [a for a in adapters if a["textSearchAdapterId"] == text_id] - if len(exists) > 0: - raise TypeError( - "Adapter already exists for given adapterId: " - f"{text_id}.Provide a different adapter_id" - ) - adapters.append(text_search_adapter) - self.config["aggregateTextSearchAdapters"] = adapters - - def get_theme(self): - # Returns the theme subconfiguration. - subconfiguration = self.config["configuration"] - return subconfiguration["theme"] - - def set_theme(self, primary, secondary=None, tertiary=None, quaternary=None): - """ - Sets the theme in the configuration. Accepts up to 4 - hexadecimal colors. - - e.g: - set_theme("#311b92", "#0097a7", "#f57c00", "#d50000") - - :param str primary: primary color of custom palette - :param str secondary: (optional) secondary color - :param str tertiary: (optional) tertiary color - :param str quaternary: (optional) quaternary color - """ - palette = {"primary": {"main": primary}} - if secondary: - palette["secondary"] = {"main": secondary} - if tertiary: - palette["tertiary"] = {"main": tertiary} - if quaternary: - palette["quaternary"] = {"main": quaternary} - self.config["configuration"] = {"theme": {"palette": palette}} + adapters = self.get_text_search_adapters() + exists = [a for a in adapters if a["textSearchAdapterId"] == text_id] + if len(exists) > 0: + raise TypeError( + "Adapter already exists for given adapterId: " + f"{text_id}.Provide a different adapter_id" + ) + adapters.append(text_search_adapter) + self.config["aggregateTextSearchAdapters"] = adapters + + def get_theme(self): + # Returns the theme subconfiguration. + subconfiguration = self.config["configuration"] + return subconfiguration["theme"] + + def set_theme(self, primary, secondary=None, tertiary=None, quaternary=None): + """ + Sets the theme in the configuration. Accepts up to 4 + hexadecimal colors. + + e.g: + set_theme("#311b92", "#0097a7", "#f57c00", "#d50000") + + :param str primary: primary color of custom palette + :param str secondary: (optional) secondary color + :param str tertiary: (optional) tertiary color + :param str quaternary: (optional) quaternary color + """ + palette = {"primary": {"main": primary}} + if secondary: + palette["secondary"] = {"main": secondary} + if tertiary: + palette["tertiary"] = {"main": tertiary} + if quaternary: + palette["quaternary"] = {"main": quaternary} + self.config["configuration"] = {"theme": {"palette": palette}} diff --git a/jbrowse_jupyter/tracks.py b/jbrowse_jupyter/tracks.py index 3e8b6e0..b3b0a99 100644 --- a/jbrowse_jupyter/tracks.py +++ b/jbrowse_jupyter/tracks.py @@ -4,526 +4,526 @@ def make_location(location, protocol, **kwargs): - """ - Creates location object given a location and a protocol. - :param str location: file path/url - :param str protocol: protocol, for now only accepting `uri` - :return: the location subconfiguration - :rtype: obj - :raises ValueError: if a protocol other than `uri` is used. - - """ - in_colab = kwargs.get("colab", False) - notebook_host = kwargs.get("nb_host", 8888) - notebook_port = kwargs.get("nb_port", "localhost") - if protocol == "uri": - return {"uri": location, "locationType": "UriLocation"} - elif protocol == "localPath": - return { - "uri": make_url_colab_jupyter( - location, colab=in_colab, nb_port=notebook_port, nb_host=notebook_host - ), - "locationType": "UriLocation", - } - else: - raise TypeError(f"invalid protocol {protocol}") + """ + Creates location object given a location and a protocol. + :param str location: file path/url + :param str protocol: protocol, for now only accepting `uri` + :return: the location subconfiguration + :rtype: obj + :raises ValueError: if a protocol other than `uri` is used. + + """ + in_colab = kwargs.get("colab", False) + notebook_host = kwargs.get("nb_host", 8888) + notebook_port = kwargs.get("nb_port", "localhost") + if protocol == "uri": + return {"uri": location, "locationType": "UriLocation"} + elif protocol == "localPath": + return { + "uri": make_url_colab_jupyter( + location, colab=in_colab, nb_port=notebook_port, nb_host=notebook_host + ), + "locationType": "UriLocation", + } + else: + raise TypeError(f"invalid protocol {protocol}") def make_url_colab_jupyter(location, **kwargs): - """Generates url from path based on env colab or jupyter""" - in_colab = kwargs.get("colab", False) - notebook_host = kwargs.get("nb_host", 8888) - notebook_port = kwargs.get("nb_port", "localhost") - if in_colab: - return location - return f"http://{notebook_host}:{notebook_port}/files" + location + """Generates url from path based on env colab or jupyter""" + in_colab = kwargs.get("colab", False) + notebook_host = kwargs.get("nb_host", 8888) + notebook_port = kwargs.get("nb_port", "localhost") + if in_colab: + return location + return f"http://{notebook_host}:{notebook_port}/files" + location def supported_track_type(track_type): - """Checks whether or not the given track type is supported.""" - return track_type in { - "AlignmentsTrack", - "QuantitativeTrack", - "VariantTrack", - "FeatureTrack", - } + """Checks whether or not the given track type is supported.""" + return track_type in { + "AlignmentsTrack", + "QuantitativeTrack", + "VariantTrack", + "FeatureTrack", + } def guess_display_type(track_type, view="LGV"): - """ - Returns the possible display type to use for a given track type. - - :param str track_type: the type of the track - :return: the type of the display to use for the given track type - :rtype: str - """ - displays = { - "AlignmentsTrack": "LinearAlignmentsDisplay", - "VariantTrack": "LinearVariantDisplay", - "ReferenceSequenceTrack": "LinearReferenceSequenceDisplay", - "QuantitativeTrack": "LinearWiggleDisplay", - "FeatureTrack": "LinearBasicDisplay", - } - if view == "CGV": + """ + Returns the possible display type to use for a given track type. + + :param str track_type: the type of the track + :return: the type of the display to use for the given track type + :rtype: str + """ displays = { - "VariantTrack": "ChordVariantDisplay", - "ReferenceSequenceTrack": "LinearReferenceSequenceDisplay", + "AlignmentsTrack": "LinearAlignmentsDisplay", + "VariantTrack": "LinearVariantDisplay", + "ReferenceSequenceTrack": "LinearReferenceSequenceDisplay", + "QuantitativeTrack": "LinearWiggleDisplay", + "FeatureTrack": "LinearBasicDisplay", } - if track_type in displays: - return displays[track_type] - else: if view == "CGV": - return "ChordVariantDisplay" + displays = { + "VariantTrack": "ChordVariantDisplay", + "ReferenceSequenceTrack": "LinearReferenceSequenceDisplay", + } + if track_type in displays: + return displays[track_type] else: - return "LinearBasicDisplay" + if view == "CGV": + return "ChordVariantDisplay" + else: + return "LinearBasicDisplay" def guess_track_type(adapter_type): - """ - Returns the possible track type to use given an adapter type. - - :param str adapter_type: the type of the adapter - :return: the type of the track to use for the given an adapter type - :rtype: str - """ - known = { - "BamAdapter": "AlignmentsTrack", - "CramAdapter": "AlignmentsTrack", - "BgzipFastaAdapter": "ReferenceSequenceTrack", - "BigWigAdapter": "QuantitativeTrack", - "IndexedFastaAdapter": "ReferenceSequenceTrack", - "TwoBitAdapter": "ReferenceSequenceTrack", - "VcfTabixAdapter": "VariantTrack", - "HicAdapter": "HicTrack", - "PAFAdapter": "SyntenyTrack", - } - if adapter_type in known: - return known[adapter_type] - else: - return "FeatureTrack" - - -def guess_adapter_type(file_location, protocol, index="defaultIndex", **kwargs): - """ - Creates location object given a location and a protocol. - - :param str file_location: file path/url - :param str protocol: protocol, for now only accepting `uri` - :param str index: (optional) path to index - :return: the adapter track subconfiguration - :rtype: obj - """ - notebook_host = kwargs.get("nb_host", 8888) - notebook_port = kwargs.get("nb_port", "localhost") - in_colab = kwargs.get("colab", False) - bam = re.compile(r"\.bam$", re.IGNORECASE) - bed = re.compile(r"\.bed$", re.IGNORECASE) - bed_tabix = re.compile(r"\.bed\.b?gz$", re.IGNORECASE) - big_bed = re.compile(r"\.(bb|bigbed)$", re.IGNORECASE) - big_wig = re.compile(r"\.(bw|bigwig)$", re.IGNORECASE) - cram = re.compile(r"\.cram$", re.IGNORECASE) - fasta_idx = re.compile(r"\.(fa|fasta|fna|mfa)$", re.IGNORECASE) - fasta_gz = re.compile(r"\.(fa|fasta|fna|mfa)\.b?gz$", re.IGNORECASE) - gff3 = re.compile(r"\.gff3$", re.IGNORECASE) - gff3_tabix = re.compile(r"\.gff3?\.b?gz$", re.IGNORECASE) - gtf = re.compile(r"\.gtf$", re.IGNORECASE) - hic = re.compile(r"\.hic", re.IGNORECASE) - nclist = re.compile(r"\/trackData.jsonz?$", re.IGNORECASE) - paf = re.compile(r"\.paf", re.IGNORECASE) - sizes = re.compile(r"\.sizes$", re.IGNORECASE) - sparql = re.compile(r"\/sparql$", re.IGNORECASE) - twobit = re.compile(r"\.2bit$", re.IGNORECASE) - vcf = re.compile(r"\.vcf$", re.IGNORECASE) - vcf_gzp = re.compile(r"\.vcf\.b?gz$", re.IGNORECASE) - vcf_idx = re.compile(r"\.vcf\.idx$", re.IGNORECASE) - - # bam - if bool(re.search(bam, file_location)): - return { - "type": "BamAdapter", - "bamLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "index": { - "location": make_location( - f"{file_location}.bai", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "indexType": "CSI" - if (index != "defaultIndex" and index.upper().endswith("CSI")) - else "BAI", - }, - } - # cram - if bool(re.search(cram, file_location)): - return { - "type": "CramAdapter", - "cramLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "craiLocation": make_location( - f"{file_location}.crai", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # gff3 - if bool(re.search(gff3, file_location)): - return { - "type": "UNSUPPORTED", - } - - # gtf - if bool(re.search(gtf, file_location)): - return { - "type": "UNSUPPORTED", - } - - # gff3 tabix - if bool(re.search(gff3_tabix, file_location)): - return { - "type": "Gff3TabixAdapter", - "gffGzLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "index": { - "location": make_location( - f"{file_location}.tbi", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "indexType": "TBI", - }, - } - - # vcf - if bool(re.search(vcf, file_location)): - return { - "type": "VcfAdapter", - "vcfLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # vcf idx - if bool(re.search(vcf_idx, file_location)): - return { - "type": "UNSUPPORTED", - } - - # vcf gzipped - if bool(re.search(vcf_gzp, file_location)): - return { - "type": "VcfTabixAdapter", - "vcfGzLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "index": { - "location": make_location( - f"{file_location}.tbi", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "indexType": "CSI" - if (index != "defaultIndex" and index.upper().endswith("CSI")) - else "TBI", - }, - } - - # bigwig - if bool(re.search(big_wig, file_location)): - return { - "type": "BigWigAdapter", - "bigWigLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - # bed - if bool(re.search(bed, file_location)): - return { - "type": "UNSUPPORTED", - } - - # bed gz - if bool(re.search(bed_tabix, file_location)): - return { - "type": "BedTabixAdapter", - "bedGzLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "index": { - "location": make_location( - f"{file_location}.tbi", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "indexType": "CSI" - if (index != "defaultIndex" and index.upper().endswith("CSI")) - else "TBI", - }, - } - - # bigbed - if bool(re.search(big_bed, file_location)): - return { - "type": "BigBedAdapter", - "bigBedLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # fasta indexed - if bool(re.search(fasta_idx, file_location)): - fai = index if index != "defaultIndex" else f"{file_location}.fai" - return { - "type": "IndexedFastaAdapter", - "fastaLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "faiLocation": make_location( - fai, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - - # Bgzipped fasta - if bool(re.search(fasta_gz, file_location)): - return { - "type": "BgzipFastaAdapter", - "fastaLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "faiLocation": make_location( - f"{file_location}.fai", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - "gziLocation": make_location( - f"{file_location}.gzi", - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), + """ + Returns the possible track type to use given an adapter type. + + :param str adapter_type: the type of the adapter + :return: the type of the track to use for the given an adapter type + :rtype: str + """ + known = { + "BamAdapter": "AlignmentsTrack", + "CramAdapter": "AlignmentsTrack", + "BgzipFastaAdapter": "ReferenceSequenceTrack", + "BigWigAdapter": "QuantitativeTrack", + "IndexedFastaAdapter": "ReferenceSequenceTrack", + "TwoBitAdapter": "ReferenceSequenceTrack", + "VcfTabixAdapter": "VariantTrack", + "HicAdapter": "HicTrack", + "PAFAdapter": "SyntenyTrack", } + if adapter_type in known: + return known[adapter_type] + else: + return "FeatureTrack" - # twobit - if bool(re.search(twobit, file_location)): - return { - "type": "TwoBitAdapter", - "twoBitLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - # sizes - if bool(re.search(sizes, file_location)): - return { - "type": "UNSUPPORTED", - } - # nclist - if bool(re.search(nclist, file_location)): - return { - "type": "NCListAdapter", - "rootUrlTemplate": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - # sparql - if bool(re.search(sparql, file_location)): - return { - "type": "SPARQLAdapter", - "endpoint": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } - # hic - if bool(re.search(hic, file_location)): - return { - "type": "HicAdapter", - "hicLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), - } +def guess_adapter_type(file_location, protocol, index="defaultIndex", **kwargs): + """ + Creates location object given a location and a protocol. + + :param str file_location: file path/url + :param str protocol: protocol, for now only accepting `uri` + :param str index: (optional) path to index + :return: the adapter track subconfiguration + :rtype: obj + """ + notebook_host = kwargs.get("nb_host", 8888) + notebook_port = kwargs.get("nb_port", "localhost") + in_colab = kwargs.get("colab", False) + bam = re.compile(r"\.bam$", re.IGNORECASE) + bed = re.compile(r"\.bed$", re.IGNORECASE) + bed_tabix = re.compile(r"\.bed\.b?gz$", re.IGNORECASE) + big_bed = re.compile(r"\.(bb|bigbed)$", re.IGNORECASE) + big_wig = re.compile(r"\.(bw|bigwig)$", re.IGNORECASE) + cram = re.compile(r"\.cram$", re.IGNORECASE) + fasta_idx = re.compile(r"\.(fa|fasta|fna|mfa)$", re.IGNORECASE) + fasta_gz = re.compile(r"\.(fa|fasta|fna|mfa)\.b?gz$", re.IGNORECASE) + gff3 = re.compile(r"\.gff3$", re.IGNORECASE) + gff3_tabix = re.compile(r"\.gff3?\.b?gz$", re.IGNORECASE) + gtf = re.compile(r"\.gtf$", re.IGNORECASE) + hic = re.compile(r"\.hic", re.IGNORECASE) + nclist = re.compile(r"\/trackData.jsonz?$", re.IGNORECASE) + paf = re.compile(r"\.paf", re.IGNORECASE) + sizes = re.compile(r"\.sizes$", re.IGNORECASE) + sparql = re.compile(r"\/sparql$", re.IGNORECASE) + twobit = re.compile(r"\.2bit$", re.IGNORECASE) + vcf = re.compile(r"\.vcf$", re.IGNORECASE) + vcf_gzp = re.compile(r"\.vcf\.b?gz$", re.IGNORECASE) + vcf_idx = re.compile(r"\.vcf\.idx$", re.IGNORECASE) + + # bam + if bool(re.search(bam, file_location)): + return { + "type": "BamAdapter", + "bamLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "index": { + "location": make_location( + f"{file_location}.bai", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "indexType": "CSI" + if (index != "defaultIndex" and index.upper().endswith("CSI")) + else "BAI", + }, + } + # cram + if bool(re.search(cram, file_location)): + return { + "type": "CramAdapter", + "cramLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "craiLocation": make_location( + f"{file_location}.crai", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # gff3 + if bool(re.search(gff3, file_location)): + return { + "type": "UNSUPPORTED", + } + + # gtf + if bool(re.search(gtf, file_location)): + return { + "type": "UNSUPPORTED", + } + + # gff3 tabix + if bool(re.search(gff3_tabix, file_location)): + return { + "type": "Gff3TabixAdapter", + "gffGzLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "index": { + "location": make_location( + f"{file_location}.tbi", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "indexType": "TBI", + }, + } + + # vcf + if bool(re.search(vcf, file_location)): + return { + "type": "VcfAdapter", + "vcfLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # vcf idx + if bool(re.search(vcf_idx, file_location)): + return { + "type": "UNSUPPORTED", + } + + # vcf gzipped + if bool(re.search(vcf_gzp, file_location)): + return { + "type": "VcfTabixAdapter", + "vcfGzLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "index": { + "location": make_location( + f"{file_location}.tbi", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "indexType": "CSI" + if (index != "defaultIndex" and index.upper().endswith("CSI")) + else "TBI", + }, + } + + # bigwig + if bool(re.search(big_wig, file_location)): + return { + "type": "BigWigAdapter", + "bigWigLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + # bed + if bool(re.search(bed, file_location)): + return { + "type": "UNSUPPORTED", + } + + # bed gz + if bool(re.search(bed_tabix, file_location)): + return { + "type": "BedTabixAdapter", + "bedGzLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "index": { + "location": make_location( + f"{file_location}.tbi", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "indexType": "CSI" + if (index != "defaultIndex" and index.upper().endswith("CSI")) + else "TBI", + }, + } + + # bigbed + if bool(re.search(big_bed, file_location)): + return { + "type": "BigBedAdapter", + "bigBedLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # fasta indexed + if bool(re.search(fasta_idx, file_location)): + fai = index if index != "defaultIndex" else f"{file_location}.fai" + return { + "type": "IndexedFastaAdapter", + "fastaLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "faiLocation": make_location( + fai, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # Bgzipped fasta + if bool(re.search(fasta_gz, file_location)): + return { + "type": "BgzipFastaAdapter", + "fastaLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "faiLocation": make_location( + f"{file_location}.fai", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + "gziLocation": make_location( + f"{file_location}.gzi", + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # twobit + if bool(re.search(twobit, file_location)): + return { + "type": "TwoBitAdapter", + "twoBitLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + # sizes + if bool(re.search(sizes, file_location)): + return { + "type": "UNSUPPORTED", + } + # nclist + if bool(re.search(nclist, file_location)): + return { + "type": "NCListAdapter", + "rootUrlTemplate": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # sparql + if bool(re.search(sparql, file_location)): + return { + "type": "SPARQLAdapter", + "endpoint": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + # hic + if bool(re.search(hic, file_location)): + return { + "type": "HicAdapter", + "hicLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } + + # paf + if bool(re.search(paf, file_location)): + return { + "type": "PAFAdapter", + "pafLocation": make_location( + file_location, + protocol, + colab=in_colab, + nb_host=notebook_host, + nb_port=notebook_port, + ), + } - # paf - if bool(re.search(paf, file_location)): return { - "type": "PAFAdapter", - "pafLocation": make_location( - file_location, - protocol, - colab=in_colab, - nb_host=notebook_host, - nb_port=notebook_port, - ), + "type": "UNKNOWN", } - return { - "type": "UNKNOWN", - } - # ================== DataFrame Track ================= def check_track_data(df): - """ - Checks that data frame is a valid data frame with. - - :param df: the data frame with track data. - :return: whether or not df is a valid data frame for the track. - :rtype: boolean - :raises TypeError: - if df is not a valid data frame - if df data frame is empty - if df does not have the required columns (refName, start, end, name) - """ - if not isinstance(df, pd.DataFrame): - raise TypeError("Track data must be a DataFrame") - - if df.empty: - raise TypeError("DataFrame must not be empty.") - - if not check_columns(df): - raise TypeError("DataFrame must contain all required columns.") - - ref_names = df.dtypes["refName"] - names = df.dtypes["name"] - start = df.dtypes["start"] - end = df.dtypes["end"] - correct_string = ref_names == object and names == object - correct_numbers = start == int and end == int - if not (correct_numbers and correct_string): - col_err = "One or more columns do not have the correct data type." - raise TypeError(col_err) + """ + Checks that data frame is a valid data frame with. + + :param df: the data frame with track data. + :return: whether or not df is a valid data frame for the track. + :rtype: boolean + :raises TypeError: + if df is not a valid data frame + if df data frame is empty + if df does not have the required columns (refName, start, end, name) + """ + if not isinstance(df, pd.DataFrame): + raise TypeError("Track data must be a DataFrame") + + if df.empty: + raise TypeError("DataFrame must not be empty.") + + if not check_columns(df): + raise TypeError("DataFrame must contain all required columns.") + + ref_names = df.dtypes["refName"] + names = df.dtypes["name"] + start = df.dtypes["start"] + end = df.dtypes["end"] + correct_string = ref_names == object and names == object + correct_numbers = start == int and end == int + if not (correct_numbers and correct_string): + col_err = "One or more columns do not have the correct data type." + raise TypeError(col_err) def check_columns(df): - """ - Checks whether dataframe contains the required columns. + """ + Checks whether dataframe contains the required columns. - :param df: the data frame with track data. - :return: whether or not df contains all the required columns. - required columns: refName, start, end, name, score (is optional) - :rtype: boolean - """ - required = ["refName", "start", "end", "name"] - return all(col in df for col in required) + :param df: the data frame with track data. + :return: whether or not df contains all the required columns. + required columns: refName, start, end, name, score (is optional) + :rtype: boolean + """ + required = ["refName", "start", "end", "name"] + return all(col in df for col in required) def get_from_config_adapter(df): - """ - Creates a FromConfigAdapter adapter subconfiguration to - use in the data frame track configuration. + """ + Creates a FromConfigAdapter adapter subconfiguration to + use in the data frame track configuration. - :param df: the data frame with track data. - :return: the adapter subconfiguration - :rtype: obj - """ - features = get_track_data(df) - return {"type": "FromConfigAdapter", "features": features} + :param df: the data frame with track data. + :return: the adapter subconfiguration + :rtype: obj + """ + features = get_track_data(df) + return {"type": "FromConfigAdapter", "features": features} def format_feature(feature): - """Adds a uniqueId to the given featyre.""" - unique_id = str(uuid.uuid4().hex) - feature["uniqueId"] = unique_id + """Adds a uniqueId to the given featyre.""" + unique_id = str(uuid.uuid4().hex) + feature["uniqueId"] = unique_id def get_track_data(df): - """ - Retrieves the features from the data frame. - - :param df: the data frame with track data. - :return: features - :rtype: list[obj] - """ - required = ["refName", "start", "end", "name", "additional", "type"] - df["type"] = "" - df["additional"] = "" - if "score" in df: - required.append("score") - if df.dtypes["score"] != int: - raise TypeError("Score column must be an integer") - filtered = df[required] - rows = filtered.to_dict("records") - features = [] - for r in rows: - newFeature = r - newFeature["uniqueId"] = str(uuid.uuid4().hex) - features.append(newFeature) - return features + """ + Retrieves the features from the data frame. + + :param df: the data frame with track data. + :return: features + :rtype: list[obj] + """ + required = ["refName", "start", "end", "name", "additional", "type"] + df["type"] = "" + df["additional"] = "" + if "score" in df: + required.append("score") + if df.dtypes["score"] != int: + raise TypeError("Score column must be an integer") + filtered = df[required] + rows = filtered.to_dict("records") + features = [] + for r in rows: + newFeature = r + newFeature["uniqueId"] = str(uuid.uuid4().hex) + features.append(newFeature) + return features diff --git a/jbrowse_jupyter/util.py b/jbrowse_jupyter/util.py index 29abf2b..1efbfb0 100644 --- a/jbrowse_jupyter/util.py +++ b/jbrowse_jupyter/util.py @@ -7,463 +7,436 @@ hg38_lgv = { - "assembly": { - "name": "GRCh38", - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "GRCh38-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" + "assembly": { + "name": "GRCh38", + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "GRCh38-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" + }, + "faiLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" + }, + "gziLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" + }, + }, }, - "faiLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" - }, - "gziLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" - }, - }, - }, - "aliases": ["hg38"], - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" + "aliases": ["hg38"], + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" + }, + } }, - } }, - }, - "tracks": [ - { - "type": "FeatureTrack", - "trackId": "ncbi_refseq_109_hg38", - "name": "NCBI RefSeq (GFF3Tabix)", - "assemblyNames": ["GRCh38"], - "category": ["Annotation"], - "adapter": { - "type": "Gff3TabixAdapter", - "gffGzLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz" - }, - "index": { - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz.tbi" - } - }, - }, - } - ], - "location": "10:29,838,737..29,838,819", - "defaultSession": { - "name": "My session", - "view": { - "id": "linearGenomeView", - "type": "LinearGenomeView", - "tracks": [ + "tracks": [ { - "type": "ReferenceSequenceTrack", - "configuration": "GRCh38-ReferenceSequenceTrack", - "displays": [ - { - "type": "LinearReferenceSequenceDisplay", - "configuration": "GRCh38-ReferenceSequenceTrack-LinearReferenceSequenceDisplay", - } - ], + "type": "FeatureTrack", + "trackId": "ncbi_refseq_109_hg38", + "name": "NCBI RefSeq (GFF3Tabix)", + "assemblyNames": ["GRCh38"], + "category": ["Annotation"], + "adapter": { + "type": "Gff3TabixAdapter", + "gffGzLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz" + }, + "index": { + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz.tbi" + } + }, + }, } - ], + ], + "location": "10:29,838,737..29,838,819", + "defaultSession": { + "name": "My session", + "view": { + "id": "linearGenomeView", + "type": "LinearGenomeView", + "tracks": [ + { + "type": "ReferenceSequenceTrack", + "configuration": "GRCh38-ReferenceSequenceTrack", + "displays": [ + { + "type": "LinearReferenceSequenceDisplay", + "configuration": "GRCh38-ReferenceSequenceTrack-LinearReferenceSequenceDisplay", + } + ], + } + ], + }, }, - }, } hg38_cgv = { - "assembly": { - "name": "hg38", - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "GRCh38-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz", - "locationType": "UriLocation", + "assembly": { + "name": "hg38", + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "GRCh38-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz", + "locationType": "UriLocation", + }, + "faiLocation": { + "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.fai", + "locationType": "UriLocation", + }, + "gziLocation": { + "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.gzi", + "locationType": "UriLocation", + }, + }, }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.fai", - "locationType": "UriLocation", - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.gzi", - "locationType": "UriLocation", + "aliases": ["GRCh38"], + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt", + "locationType": "UriLocation", + }, + } }, - }, }, - "aliases": ["GRCh38"], - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt", - "locationType": "UriLocation", + "tracks": [], + "defaultSession": { + "name": "My session", + "view": { + "id": "circularView", + "type": "CircularView", + "bpPerPx": 5000000, + "tracks": [], }, - } }, - }, - "defaultSession": { - "name": "My session", - "view": { - "id": "circularView", - "type": "CircularView", - "bpPerPx": 5000000, - "tracks": [], - }, - }, } hg19_lgv = { - "assembly": { - "name": "hg19", - "aliases": ["GRCh37"], - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "hg19-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz"}, - "faiLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai"}, - "gziLocation": {"uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi"}, - }, - }, - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" + "assembly": { + "name": "hg19", + "aliases": ["GRCh37"], + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "hg19-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz" + }, + "faiLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai" + }, + "gziLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi" + }, + }, }, - } - }, - }, - "tracks": [ - { - "type": "FeatureTrack", - "trackId": "repeats_hg19", - "name": "Repeats", - "assemblyNames": ["hg19"], - "category": ["Annotation"], - "adapter": { - "type": "BigBedAdapter", - "bigBedLocation": { - "uri": "https://jbrowse.org/genomes/hg19/repeats.bb", - "locationType": "UriLocation", + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" + }, + } }, - }, - } - ], - "defaultSession": { - "name": "test", - "view": { - "id": "aU9Nqje1U", - "type": "LinearGenomeView", - "tracks": [ + }, + "tracks": [ { - "type": "ReferenceSequenceTrack", - "configuration": "hg19-ReferenceSequenceTrack", - "displays": [ - { - "type": "LinearReferenceSequenceDisplay", - "configuration": "hg19-ReferenceSequenceTrack-LinearReferenceSequenceDisplay", - } - ], + "type": "FeatureTrack", + "trackId": "repeats_hg19", + "name": "Repeats", + "assemblyNames": ["hg19"], + "category": ["Annotation"], + "adapter": { + "type": "BigBedAdapter", + "bigBedLocation": { + "uri": "https://jbrowse.org/genomes/hg19/repeats.bb", + "locationType": "UriLocation", + }, + }, } - ], + ], + "defaultSession": { + "name": "test", + "view": { + "id": "aU9Nqje1U", + "type": "LinearGenomeView", + "tracks": [ + { + "type": "ReferenceSequenceTrack", + "configuration": "hg19-ReferenceSequenceTrack", + "displays": [ + { + "type": "LinearReferenceSequenceDisplay", + "configuration": "hg19-ReferenceSequenceTrack-LinearReferenceSequenceDisplay", + } + ], + } + ], + }, }, - }, - "location": "1:68654694..68654738", + "location": "1:68654694..68654738", } hg19_cgv = { - "assembly": { - "name": "hg19", - "aliases": ["GRCh37"], - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "Pd8Wh30ei9R", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz", - "locationType": "UriLocation", - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai", - "locationType": "UriLocation", - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi", - "locationType": "UriLocation", + "assembly": { + "name": "hg19", + "aliases": ["GRCh37"], + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "Pd8Wh30ei9R", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz", + "locationType": "UriLocation", + }, + "faiLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai", + "locationType": "UriLocation", + }, + "gziLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi", + "locationType": "UriLocation", + }, + }, }, - }, - }, - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt", - "locationType": "UriLocation", + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt", + "locationType": "UriLocation", + }, + } }, - } }, - }, - "tracks": [ - { - "type": "VariantTrack", - "trackId": "pacbio_sv_vcf", - "name": "HG002 Pacbio SV (VCF)", - "assemblyNames": ["hg19"], - "category": ["GIAB"], - "adapter": { - "type": "VcfTabixAdapter", - "vcfGzLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/pacbio/hs37d5.HG002-SequelII-CCS.bnd-only.sv.vcf.gz", - "locationType": "UriLocation", - }, - "index": { - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/pacbio/hs37d5.HG002-SequelII-CCS.bnd-only.sv.vcf.gz.tbi", - "locationType": "UriLocation", - } + "tracks": [], + "defaultSession": { + "name": "My session", + "view": { + "id": "circularView", + "type": "CircularView", + "bpPerPx": 5000000, + "tracks": [], }, - }, - } - ], - "defaultSession": { - "name": "My session", - "view": { - "id": "circularView", - "type": "CircularView", - "bpPerPx": 5000000, - "tracks": [ - { - "id": "uPdLKHik1", - "type": "VariantTrack", - "configuration": "pacbio_sv_vcf", - "displays": [ - { - "id": "v9QVAR3oaB", - "type": "ChordVariantDisplay", - "configuration": "pacbio_sv_vcf-ChordVariantDisplay", - } - ], - } - ], }, - }, } def is_url(filePath): - """ - Checks whether or not the file path - is a valid url. - :param str filePath: file path/url - :return: returns true if path matches pattern starting with - http:// or https:// - :rtype: boolean - """ - regex = re.compile(r"^https?:\/\/", re.IGNORECASE) - return re.match(regex, filePath) is not None + """ + Checks whether or not the file path + is a valid url. + :param str filePath: file path/url + :return: returns true if path matches pattern starting with + http:// or https:// + :rtype: boolean + """ + regex = re.compile(r"^https?:\/\/", re.IGNORECASE) + return re.match(regex, filePath) is not None def guess_file_name(data): - """ - Guess the file name given a path. + """ + Guess the file name given a path. - :param str data: file path - :return: the predicted file name - :rtype: str - """ - url = urlparse(data) - return os.path.basename(url.path) + :param str data: file path + :return: the predicted file name + :rtype: str + """ + url = urlparse(data) + return os.path.basename(url.path) def get_name(assembly_file): - """Returns the name of the assembly based on the assembly data file""" - name_end = 0 - name_start = 0 - for i in range(0, len(assembly_file)): - if assembly_file[len(assembly_file) - i - 1 : len(assembly_file) - i] == "/": - name_start = len(assembly_file) - i - break - for i in range(name_start, len(assembly_file)): - if assembly_file[i : i + 1] == ".": - name_end = i - break + """Returns the name of the assembly based on the assembly data file""" + name_end = 0 + name_start = 0 + for i in range(0, len(assembly_file)): + if assembly_file[len(assembly_file) - i - 1 : len(assembly_file) - i] == "/": + name_start = len(assembly_file) - i + break + for i in range(name_start, len(assembly_file)): + if assembly_file[i : i + 1] == ".": + name_end = i + break - return assembly_file[name_start:name_end] + return assembly_file[name_start:name_end] def get_name_regex(assembly_file): - """Returns the name of the assembly based on the assembly data file""" - return re.search(r"(\w+)\.(?:fa|fasta|fa\.gz)$", assembly_file).group(1) + """Returns the name of the assembly based on the assembly data file""" + return re.search(r"(\w+)\.(?:fa|fasta|fa\.gz)$", assembly_file).group(1) def get_default(name, view_type="LGV"): - """Returns the configuration object given a genome name.""" - if name == "hg383838": - if view_type == "CGV": - return hg38_cgv - else: - return hg38_lgv - elif name == "hg19": - if view_type == "CGV": - return hg19_cgv - else: - return hg19_lgv + """Returns the configuration object given a genome name.""" + if name == "hg383838": + if view_type == "CGV": + return hg38_cgv + else: + return hg38_lgv + elif name == "hg19": + if view_type == "CGV": + return hg19_cgv + else: + return hg19_lgv def create_component(conf, **kwargs): - """ - Creates a Dash JBrowse LinearGenomeView component - given a configuration object and optionally an id. + """ + Creates a Dash JBrowse LinearGenomeView component + given a configuration object and optionally an id. - e.g: + e.g: - conf = hg38.get_config() - create_component(conf,id="hg38-test", dash_comp="CGV") - where hg38 is an instance of JBrowseConfig + conf = hg38.get_config() + create_component(conf,id="hg38-test", dash_comp="CGV") + where hg38 is an instance of JBrowseConfig - :param obj conf: configuration object from JBrowseConfig - instance - :param str id: id to use in Dash component - :param str dash_comp: (optional) dash component type to - create. Currently supporting LGV and CGV. - defaults to `LGV` when no dash_comp= is specified - :return: Dash JBrowse View given dash_comp type - :rtype: Dash JBrowse component - """ - supported = set({"LGV", "CGV"}) - comp_id = "jbrowse-component" - dash_comp = kwargs.get("dash_comp", "LGV") - the_view_type = conf["defaultSession"]["view"]["type"] - msg = "config was passed but attempting to create" - err = "Please specify the correct dash_comp." - if the_view_type == "LinearGenomeView" and dash_comp == "CGV": - raise TypeError(f"LGV {msg} a CGV.{err}") - if the_view_type == "CircularView" and dash_comp == "LGV": - raise TypeError(f"CGV {msg} a LGV.{err}") - if "id" in kwargs: - comp_id = kwargs["id"] - if dash_comp in supported: - if dash_comp == "LGV": - return jb.LinearGenomeView( - id=comp_id, - assembly=conf["assembly"], - tracks=conf["tracks"], - defaultSession=conf["defaultSession"], - location=conf["location"], - configuration=conf["configuration"], - aggregateTextSearchAdapters=conf["aggregateTextSearchAdapters"], - ) - # here is where we can add another view - if dash_comp == "CGV": - return jb.CircularGenomeView( - id=comp_id, - assembly=conf["assembly"], - tracks=conf["tracks"], - defaultSession=conf["defaultSession"], - configuration=conf["configuration"], - ) - else: - raise TypeError(f"The {dash_comp} component is not supported.") + :param obj conf: configuration object from JBrowseConfig + instance + :param str id: id to use in Dash component + :param str dash_comp: (optional) dash component type to + create. Currently supporting LGV and CGV. + defaults to `LGV` when no dash_comp= is specified + :return: Dash JBrowse View given dash_comp type + :rtype: Dash JBrowse component + """ + supported = set({"LGV", "CGV"}) + comp_id = "jbrowse-component" + dash_comp = kwargs.get("dash_comp", "LGV") + the_view_type = conf["defaultSession"]["view"]["type"] + msg = "config was passed but attempting to create" + err = "Please specify the correct dash_comp." + if the_view_type == "LinearGenomeView" and dash_comp == "CGV": + raise TypeError(f"LGV {msg} a CGV.{err}") + if the_view_type == "CircularView" and dash_comp == "LGV": + raise TypeError(f"CGV {msg} a LGV.{err}") + if "id" in kwargs: + comp_id = kwargs["id"] + if dash_comp in supported: + if dash_comp == "LGV": + return jb.LinearGenomeView( + id=comp_id, + assembly=conf["assembly"], + tracks=conf["tracks"], + defaultSession=conf["defaultSession"], + location=conf["location"], + configuration=conf["configuration"], + aggregateTextSearchAdapters=conf["aggregateTextSearchAdapters"], + ) + # here is where we can add another view + if dash_comp == "CGV": + return jb.CircularGenomeView( + id=comp_id, + assembly=conf["assembly"], + tracks=conf["tracks"], + defaultSession=conf["defaultSession"], + configuration=conf["configuration"], + ) + else: + raise TypeError(f"The {dash_comp} component is not supported.") def launch(conf, **kwargs): - """ - Launches a LinearGenomeView Dash JBrowse component in a - server. + """ + Launches a LinearGenomeView Dash JBrowse component in a + server. - e.g - launch(conf, dash_comp="CGV",height=400, port=8002) + e.g + launch(conf, dash_comp="CGV",height=400, port=8002) - :param obj conf: JBrowseConfiguration object to pass to - the Dash JBrowse component - :param str id: (optional) id to use for the Dash JBrowse - component defaults to `jbrowse-component` - :param str dash_comp: (optional) dash component type to - launch. Currently supporting LGV and CGV. - defaults to `LGV` when no dash_comp= is specified - :param int port: (optional) port to utilize when running - the Dash app - :param int height: (optional) the height to utilize for - the Dash app - """ - app = Dash(__name__) - # could add other JBrowse view types e.g Circular, Dotplot - supported = set({"LGV", "CGV"}) - dash_comp = kwargs.get("dash_comp", "LGV") + :param obj conf: JBrowseConfiguration object to pass to + the Dash JBrowse component + :param str id: (optional) id to use for the Dash JBrowse + component defaults to `jbrowse-component` + :param str dash_comp: (optional) dash component type to + launch. Currently supporting LGV and CGV. + defaults to `LGV` when no dash_comp= is specified + :param int port: (optional) port to utilize when running + the Dash app + :param int height: (optional) the height to utilize for + the Dash app + """ + app = Dash(__name__) + # could add other JBrowse view types e.g Circular, Dotplot + supported = set({"LGV", "CGV"}) + dash_comp = kwargs.get("dash_comp", "LGV") - # error for mismatching config and launch type - the_view_type = conf["defaultSession"]["view"]["type"] - msg = "config was passed but attempting to launch" - err = "Please specify the correct dash_comp." - if the_view_type == "LinearGenomeView" and dash_comp == "CGV": - raise TypeError(f"LGV {msg} a CGV.{err}") - if the_view_type == "CircularView" and dash_comp == "LGV": - raise TypeError(f"CGV {msg} a LGV.{err}") - comp_id = "jbrowse-component" - comp_port = 8050 - comp_host = "127.0.0.1" - comp_height = 300 - comp_mode = "inline" - if "id" in kwargs: - comp_id = kwargs["id"] - if "port" in kwargs: - comp_port = kwargs["port"] - if "host" in kwargs: - comp_host = kwargs["host"] - if "height" in kwargs: - comp_height = kwargs["height"] - if "mode" in kwargs: - comp_mode = kwargs["mode"] + # error for mismatching config and launch type + the_view_type = conf["defaultSession"]["view"]["type"] + msg = "config was passed but attempting to launch" + err = "Please specify the correct dash_comp." + if the_view_type == "LinearGenomeView" and dash_comp == "CGV": + raise TypeError(f"LGV {msg} a CGV.{err}") + if the_view_type == "CircularView" and dash_comp == "LGV": + raise TypeError(f"CGV {msg} a LGV.{err}") + comp_id = "jbrowse-component" + comp_port = 8050 + comp_host = "127.0.0.1" + comp_height = 300 + comp_mode = "inline" + if "id" in kwargs: + comp_id = kwargs["id"] + if "port" in kwargs: + comp_port = kwargs["port"] + if "host" in kwargs: + comp_host = kwargs["host"] + if "height" in kwargs: + comp_height = kwargs["height"] + if "mode" in kwargs: + comp_mode = kwargs["mode"] - if dash_comp in supported: - if dash_comp == "LGV": - # create jupyter dash app layout - adapters = conf["aggregateTextSearchAdapters"] - app.layout = html.Div( - [ - jb.LinearGenomeView( - id=comp_id, - assembly=conf["assembly"], - tracks=conf["tracks"], - defaultSession=conf["defaultSession"], - aggregateTextSearchAdapters=adapters, - location=conf["location"], - configuration=conf["configuration"], - ) - ] - ) - if dash_comp == "CGV": - # create jupyter dash app layout - app.layout = html.Div( - [ - jb.CircularGenomeView( - id=comp_id, - assembly=conf["assembly"], - tracks=conf["tracks"], - defaultSession=conf["defaultSession"], - configuration=conf["configuration"], - ) - ] - ) - else: - raise TypeError(f"The {dash_comp} component is not supported.") - app.run_server( - port=comp_port, - host=comp_host, - height=comp_height, - mode=comp_mode, - use_reloader=False, - ) + if dash_comp in supported: + if dash_comp == "LGV": + # create jupyter dash app layout + adapters = conf["aggregateTextSearchAdapters"] + app.layout = html.Div( + [ + jb.LinearGenomeView( + id=comp_id, + assembly=conf["assembly"], + tracks=conf["tracks"], + defaultSession=conf["defaultSession"], + aggregateTextSearchAdapters=adapters, + location=conf["location"], + configuration=conf["configuration"], + ) + ] + ) + if dash_comp == "CGV": + # create jupyter dash app layout + app.layout = html.Div( + [ + jb.CircularGenomeView( + id=comp_id, + assembly=conf["assembly"], + tracks=conf["tracks"], + defaultSession=conf["defaultSession"], + configuration=conf["configuration"], + ) + ] + ) + else: + raise TypeError(f"The {dash_comp} component is not supported.") + app.run_server( + port=comp_port, + host=comp_host, + height=comp_height, + mode=comp_mode, + use_reloader=False, + ) diff --git a/ruff.toml b/ruff.toml index 96068e0..8e0beda 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,4 +1,4 @@ # Same as Black. line-length = 88 -indent-width = 2 +indent-width = 4 From 68a3d57f145d856a23101fedc886bcccb934ff99 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 11 Dec 2023 07:57:26 -0500 Subject: [PATCH 8/9] Add formatter check --- .github/workflows/format.yml | 8 +++ jbrowse_jupyter/util.py | 66 ++----------------------- tests/test_tracks.py | 96 ++++++++++++++---------------------- 3 files changed, 49 insertions(+), 121 deletions(-) create mode 100644 .github/workflows/format.yml diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml new file mode 100644 index 0000000..757cae6 --- /dev/null +++ b/.github/workflows/format.yml @@ -0,0 +1,8 @@ +name: Ruff +on: [ push, pull_request ] +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: chartboost/ruff-action@v1 diff --git a/jbrowse_jupyter/util.py b/jbrowse_jupyter/util.py index 1efbfb0..87fffcf 100644 --- a/jbrowse_jupyter/util.py +++ b/jbrowse_jupyter/util.py @@ -35,44 +35,14 @@ } }, }, - "tracks": [ - { - "type": "FeatureTrack", - "trackId": "ncbi_refseq_109_hg38", - "name": "NCBI RefSeq (GFF3Tabix)", - "assemblyNames": ["GRCh38"], - "category": ["Annotation"], - "adapter": { - "type": "Gff3TabixAdapter", - "gffGzLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz" - }, - "index": { - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz.tbi" - } - }, - }, - } - ], + "tracks": [], "location": "10:29,838,737..29,838,819", "defaultSession": { "name": "My session", "view": { "id": "linearGenomeView", "type": "LinearGenomeView", - "tracks": [ - { - "type": "ReferenceSequenceTrack", - "configuration": "GRCh38-ReferenceSequenceTrack", - "displays": [ - { - "type": "LinearReferenceSequenceDisplay", - "configuration": "GRCh38-ReferenceSequenceTrack-LinearReferenceSequenceDisplay", - } - ], - } - ], + "tracks": [], }, }, } @@ -152,39 +122,13 @@ } }, }, - "tracks": [ - { - "type": "FeatureTrack", - "trackId": "repeats_hg19", - "name": "Repeats", - "assemblyNames": ["hg19"], - "category": ["Annotation"], - "adapter": { - "type": "BigBedAdapter", - "bigBedLocation": { - "uri": "https://jbrowse.org/genomes/hg19/repeats.bb", - "locationType": "UriLocation", - }, - }, - } - ], + "tracks": [], "defaultSession": { "name": "test", "view": { "id": "aU9Nqje1U", "type": "LinearGenomeView", - "tracks": [ - { - "type": "ReferenceSequenceTrack", - "configuration": "hg19-ReferenceSequenceTrack", - "displays": [ - { - "type": "LinearReferenceSequenceDisplay", - "configuration": "hg19-ReferenceSequenceTrack-LinearReferenceSequenceDisplay", - } - ], - } - ], + "tracks": [], }, }, "location": "1:68654694..68654738", @@ -284,7 +228,7 @@ def get_name_regex(assembly_file): def get_default(name, view_type="LGV"): """Returns the configuration object given a genome name.""" - if name == "hg383838": + if name == "hg38": if view_type == "CGV": return hg38_cgv else: diff --git a/tests/test_tracks.py b/tests/test_tracks.py index 1d0e5d4..1dceb0d 100644 --- a/tests/test_tracks.py +++ b/tests/test_tracks.py @@ -1,26 +1,20 @@ import pytest import pandas as pd -from jbrowse_jupyter.tracks import ( - make_location, - check_track_data, - get_track_data -) +from jbrowse_jupyter.tracks import make_location, check_track_data, get_track_data from jbrowse_jupyter.jbrowse_config import create -# test files -base = "https://s3.amazonaws.com/jbrowse.org/genomes/" -cram = base + "hg19/skbr3/reads_lr_skbr3.fa_ngmlr-0.2.3_mapped.down.cram" -bam = base + "hg19/amplicon_deep_seq/out.marked.bam" -gff3 = base + "hg19/ncbi_refseq/GRCh37_latest_genomic.sort.gff" -gff3Tabix = base + "GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full" \ - "_analysis_set.refseq_annotation.sorted.gff.gz" -vcf = "https://ftp.ncbi.nlm.nih.gov/pub/" \ - "clinvar/vcf_GRCh37/clinvar.vcf" -vcfGz = "https://ftp.ncbi.nlm.nih.gov/pub/" \ - "clinvar/vcf_GRCh38/clinvar.vcf.gz" -bigWig = "http://hgdownload.cse.ucsc.edu/goldenpath/hg38/" \ - "phyloP100way/hg38.phyloP100way.bw" -gff3_tabix_index = gff3Tabix + ".tbi" +cram = "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/skbr3/reads_lr_skbr3.fa_ngmlr-0.2.3_mapped.down.cram" +bam = ( + "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/amplicon_deep_seq/out.marked.bam" +) +gff3 = "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/ncbi_refseq/GRCh37_latest_genomic.sort.gff" +gff3Tabix = "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz" +gff3TabixIndex = "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz.tbi" +bigWig = ( + "http://hgdownload.cse.ucsc.edu/goldenpath/hg38/phyloP100way/hg38.phyloP100way.bw" +) +vcf = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf" +vcfGz = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.tbi" def test_make_location(): @@ -39,7 +33,6 @@ def test_add_track_fail(): def test_alignments(): conf = create("LGV", genome="hg19") - # BAM or CRAM alignment data conf.add_track(cram, name="alignments cram track example") conf.add_track(bam, name="alignments bam track example") cram_track = conf.get_track("alignments cram track example") @@ -51,11 +44,9 @@ def test_alignments(): def test_feature(): conf = create("LGV", genome="hg38") track_error = "Adapter type is not recognized" - # gff is not supported with pytest.raises(TypeError) as excinfo: conf.add_track(gff3, name="gff feature") assert track_error in str(excinfo) - # gff3 is supported conf.add_track(gff3Tabix, name="gff3 feature") gff3_track = conf.get_track("gff3 feature") assert gff3_track[0]["type"] == "FeatureTrack" @@ -71,33 +62,33 @@ def test_add_track_type_fail(): def test_add_track_overwrite(): conf = create("LGV", genome="hg38") - overwrite_err = "track with trackId: " \ - '"GRCh38-test" already exists inconfig.' \ - ' Set overwrite to True to overwrite it.' - conf.add_track(gff3Tabix, name='test') + overwrite_err = ( + "track with trackId: " + '"GRCh38-test" already exists inconfig.' + " Set overwrite to True to overwrite it." + ) + conf.add_track(gff3Tabix, name="test") with pytest.raises(TypeError) as excinfo: - conf.add_track(gff3Tabix, name='test') + conf.add_track(gff3Tabix, name="test") assert overwrite_err in str(excinfo) - conf.add_track(gff3Tabix, name='test', overwrite=True) + conf.add_track(gff3Tabix, name="test", overwrite=True) tracks = conf.get_tracks() - # should have one track from hg38 conf + test track == 2 - assert len(tracks) == 2 + assert len(tracks) == 1 def test_add_track_with_index(): conf = create("LGV", genome="hg38") conf.add_track(gff3Tabix, name="test") conf2 = create("LGV", genome="hg38") - conf2.add_track(gff3Tabix, name="test", index=gff3_tabix_index) + conf2.add_track(gff3Tabix, name="test", index=gff3TabixIndex) index_one = conf.get_track("test") - idx = index_one[0]['adapter']['index']['location']['uri'] + idx = index_one[0]["adapter"]["index"]["location"]["uri"] index_two = conf2.get_track("test") - idx2 = index_two[0]['adapter']['index']['location']['uri'] + idx2 = index_two[0]["adapter"]["index"]["location"]["uri"] assert idx == idx2 def test_variant(): - # VCF data conf = create("LGV", genome="hg19") conf.add_track(vcf, name="vcf track") conf.add_track(vcfGz, name="vcfgz track") @@ -106,49 +97,35 @@ def test_variant(): def test_wiggle(): - # bigWig data (quantitative/wiggle) conf = create("LGV", genome="hg19") conf.add_track(bigWig, name="wiggle track") bigwig_track = conf.get_track("wiggle track") assert bigwig_track[0]["type"] == "QuantitativeTrack" -# ==== dataframe track ====== def test_data_frame_track(): - hg38 = create('LGV', genome='hg38') - assert len(hg38.get_tracks()) == 1 + hg38 = create("LGV", genome="hg38") + assert len(hg38.get_tracks()) == 0 data_frame = { "refName": ["1", "1"], "start": [123, 456], "end": [780, 101112], - "name": ["feature1", "feature2"] + "name": ["feature1", "feature2"], } df = pd.DataFrame(data_frame) - hg38.add_df_track(df, 'data_frame_track_name') - data_empty = { - "refName": [], - "start": [], - "end": [], - "name": [] - } - assert len(hg38.get_tracks()) == 2 - # throw error if the dataframe is empty + hg38.add_df_track(df, "data_frame_track_name") + data_empty = {"refName": [], "start": [], "end": [], "name": []} + assert len(hg38.get_tracks()) == 1 df_empty = pd.DataFrame(data_empty) df_error = "DataFrame must not be empty." with pytest.raises(TypeError) as excinfo: - hg38.add_df_track(df_empty, 'empty_data_frame_track') + hg38.add_df_track(df_empty, "empty_data_frame_track") assert df_error in str(excinfo) def test_check_track_data(): - # Test track from dataframe df_error = "Track data must be a DataFrame" - invalid_df = { - "refName": "1", - "start": 123, - "end": 789, - "name": "feature1" - } + invalid_df = {"refName": "1", "start": 123, "end": 789, "name": "feature1"} with pytest.raises(TypeError) as excinfo: check_track_data(invalid_df) assert df_error in str(excinfo) @@ -156,18 +133,17 @@ def test_check_track_data(): "refName": ["1", "1"], "start": [123, 456], "end": [780, 101112], - "name": ["feature1", "feature2"] + "name": ["feature1", "feature2"], } pd.DataFrame(data_frame) def test_check_columns(): - # missing start column column_error = "DataFrame must contain all required columns." invalid_df = { "refName": ["1", "1"], "end": [780, 101112], - "name": ['feature1', 'feature2'] + "name": ["feature1", "feature2"], } df = pd.DataFrame(invalid_df) with pytest.raises(TypeError) as excinfo: @@ -180,7 +156,7 @@ def test_get_df_features(): "refName": ["1", "1"], "start": [123, 456], "end": [780, 101112], - "name": ["feature1", "feature2"] + "name": ["feature1", "feature2"], } df = pd.DataFrame(data_frame) features = get_track_data(df) From d3575d821aa3ef16661c3fea3ece8fb034a23f81 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 11 Dec 2023 08:16:32 -0500 Subject: [PATCH 9/9] Modularize --- jbrowse_jupyter/dev_server.py | 3 +- jbrowse_jupyter/jbrowse_config.py | 3 +- jbrowse_jupyter/util.py | 187 ++++++++++-------------------- ruff.toml | 3 - tests/test_jbrowse_config.py | 96 +++++++-------- tests/test_tracks.py | 10 +- 6 files changed, 112 insertions(+), 190 deletions(-) diff --git a/jbrowse_jupyter/dev_server.py b/jbrowse_jupyter/dev_server.py index 24881d0..0b8c32b 100644 --- a/jbrowse_jupyter/dev_server.py +++ b/jbrowse_jupyter/dev_server.py @@ -91,7 +91,8 @@ def send_head(self): last = file_len - 1 response_length = last - first + 1 - self.send_header("Content-Range", "bytes %s-%s/%s" % (first, last, file_len)) + self.send_header("Content-Range", "bytes %s-%s/%s" % + (first, last, file_len)) self.send_header("Content-Length", str(response_length)) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) self.end_headers() diff --git a/jbrowse_jupyter/jbrowse_config.py b/jbrowse_jupyter/jbrowse_config.py index 32fde89..4f6076f 100644 --- a/jbrowse_jupyter/jbrowse_config.py +++ b/jbrowse_jupyter/jbrowse_config.py @@ -391,8 +391,7 @@ def add_df_track(self, track_data, name, **kwargs): "adapter": adapter, } err = ( - f'track with trackId: "{track_id}" already exists in config.', - "Set overwrite to True if you want to overwrite it.", + f'track with trackId: "{track_id}" already exists in config. Set overwrite to True if you want to overwrite it.', ) if track_id in self.tracks_ids_map.keys() and not overwrite: raise TypeError(err) diff --git a/jbrowse_jupyter/util.py b/jbrowse_jupyter/util.py index 87fffcf..2cfb8fb 100644 --- a/jbrowse_jupyter/util.py +++ b/jbrowse_jupyter/util.py @@ -1,40 +1,41 @@ import re +import copy import os import dash_jbrowse as jb from dash import html, Dash from urllib.parse import urlparse - -hg38_lgv = { - "assembly": { - "name": "GRCh38", - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "GRCh38-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" - }, - "faiLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" - }, - "gziLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" - }, +hg38_assembly = { + "name": "hg38", + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "GRCh38-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" + }, + "faiLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" + }, + "gziLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" }, }, - "aliases": ["hg38"], - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" - }, - } - }, }, + "aliases": ["GRCh38"], + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" + }, + } + }, +} +hg38_lgv = { + "assembly": hg38_assembly, "tracks": [], "location": "10:29,838,737..29,838,819", "defaultSession": { @@ -49,38 +50,7 @@ hg38_cgv = { - "assembly": { - "name": "hg38", - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "GRCh38-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz", - "locationType": "UriLocation", - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.fai", - "locationType": "UriLocation", - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.gzi", - "locationType": "UriLocation", - }, - }, - }, - "aliases": ["GRCh38"], - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt", - "locationType": "UriLocation", - }, - } - }, - }, + "assembly": hg38_assembly, "tracks": [], "defaultSession": { "name": "My session", @@ -92,36 +62,36 @@ }, }, } - -hg19_lgv = { - "assembly": { - "name": "hg19", - "aliases": ["GRCh37"], - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "hg19-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz" - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai" - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi" - }, +hg19_assembly = { + "name": "hg19", + "aliases": ["GRCh37"], + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "hg19-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz" + }, + "faiLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai" + }, + "gziLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi" }, }, - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" - }, - } - }, }, + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" + }, + } + }, +} +hg19_lgv = { + "assembly": hg19_assembly, "tracks": [], "defaultSession": { "name": "test", @@ -135,38 +105,7 @@ } hg19_cgv = { - "assembly": { - "name": "hg19", - "aliases": ["GRCh37"], - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "Pd8Wh30ei9R", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz", - "locationType": "UriLocation", - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai", - "locationType": "UriLocation", - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi", - "locationType": "UriLocation", - }, - }, - }, - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt", - "locationType": "UriLocation", - }, - } - }, - }, + "assembly": hg19_assembly, "tracks": [], "defaultSession": { "name": "My session", @@ -210,11 +149,11 @@ def get_name(assembly_file): name_end = 0 name_start = 0 for i in range(0, len(assembly_file)): - if assembly_file[len(assembly_file) - i - 1 : len(assembly_file) - i] == "/": + if assembly_file[len(assembly_file) - i - 1: len(assembly_file) - i] == "/": name_start = len(assembly_file) - i break for i in range(name_start, len(assembly_file)): - if assembly_file[i : i + 1] == ".": + if assembly_file[i: i + 1] == ".": name_end = i break @@ -230,14 +169,14 @@ def get_default(name, view_type="LGV"): """Returns the configuration object given a genome name.""" if name == "hg38": if view_type == "CGV": - return hg38_cgv + return copy.deepcopy(hg38_cgv) else: - return hg38_lgv + return copy.deepcopy(hg38_lgv) elif name == "hg19": if view_type == "CGV": - return hg19_cgv + return copy.deepcopy(hg19_cgv) else: - return hg19_lgv + return copy.deepcopy(hg19_lgv) def create_component(conf, **kwargs): diff --git a/ruff.toml b/ruff.toml index 8e0beda..5523921 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,4 +1 @@ - -# Same as Black. -line-length = 88 indent-width = 4 diff --git a/tests/test_jbrowse_config.py b/tests/test_jbrowse_config.py index 54387f5..74a6c10 100644 --- a/tests/test_jbrowse_config.py +++ b/tests/test_jbrowse_config.py @@ -41,9 +41,7 @@ def test_set_assembly(): conf.get_assembly_name() assert myError in str(excinfo) # raises an error if you try to add a track before an assembly is set - data = "https://s3.amazonaws.com/jbrowse.org/genomes/" \ - "GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_" \ - "analysis_set.refseq_annotation.sorted.gff.gz" + data = "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz" with pytest.raises(Exception) as excinfo: conf.add_track( data, @@ -53,15 +51,16 @@ def test_set_assembly(): # raises an error, there is no local path support in non jupyter envs with pytest.raises(TypeError) as excinfo: conf.set_assembly("/hi/there") - err = (f'Path {"/hi/there"} for assembly data is used' - ' in an unsupported environment.' - 'Paths are supported in Jupyter notebooks and Jupyter lab.' - 'Please use a url for your assembly data. You can check out ' - 'our local file support docs for more information') + err = ( + f'Path {"/hi/there"} for assembly data is used' + ' in an unsupported environment.' + 'Paths are supported in Jupyter notebooks and Jupyter lab.' + 'Please use a url for your assembly data. You can check out ' + 'our local file support docs for more information' + ) assert err == excinfo.value.args[0] aliases = ["hg38"] - uri = "https://s3.amazonaws.com/jbrowse.org/genomes/" \ - "GRCh38/hg38_aliases.txt" + uri = "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" ref_name_aliases = { "adapter": { "type": "RefNameAliasAdapter", @@ -84,39 +83,34 @@ def test_set_assembly(): ) assert err in str(excinfo) assert conf.get_assembly_name() == "hg38" - track_data = "https://s3.amazonaws.com/jbrowse.org/" \ - "genomes/GRCh38/ncbi_refseq/GCA_000001405.15_" \ + track_data = ( + "https://s3.amazonaws.com/jbrowse.org/" + "genomes/GRCh38/ncbi_refseq/GCA_000001405.15_" "GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz" + ) conf.add_track( track_data, name="test-demo", ) assert len(conf.get_tracks()) == 1 - alias_uri = "https://s3.amazonaws.com/jbrowse.org/genomes" \ - "/hg19/hg19_aliases.txt" + alias_uri = "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" ref_name = { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": alias_uri - } - } + "adapter": {"type": "RefNameAliasAdapter", "location": {"uri": alias_uri}} } - aliases = [ - "GRCh37" - ] + aliases = ["GRCh37"] a_data = "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz" - conf.set_assembly(a_data, aliases=aliases, - refname_aliases=ref_name, overwrite=True) - assert conf.get_assembly_name() == 'hg19' + conf.set_assembly(a_data, aliases=aliases, refname_aliases=ref_name, overwrite=True) + assert conf.get_assembly_name() == "hg19" def test_create_view(): "tests creating a view from one of the provided genomes" - genome_error = '"volvox" is not a valid default genome to view.' \ - 'Choose from hg19 or hg38 or pass your own conf.' + genome_error = ( + '"volvox" is not a valid default genome to view.' + "Choose from hg19 or hg38 or pass your own conf." + ) with pytest.raises(TypeError) as excinfo: create("LGV", genome="volvox") assert genome_error in str(excinfo) @@ -124,10 +118,10 @@ def test_create_view(): hg19 = create("LGV", genome="hg19") hg38 = create("LGV", genome="hg38") assert hg19.get_assembly_name() == "hg19" - assert len(hg19.get_tracks()) > 0 + assert len(hg19.get_tracks()) == 0 assert hg19.get_default_session() - assert hg38.get_assembly_name() == "GRCh38" - assert len(hg38.get_tracks()) > 0 + assert hg38.get_assembly_name() == "hg38" + assert len(hg38.get_tracks()) == 0 assert hg38.get_default_session() @@ -169,23 +163,15 @@ def test_create_view_from_conf(): "trackId": "hg19-ReferenceSequenceTrack", "adapter": { "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": fasta_loc - }, - "faiLocation": { - "uri": fai_loc - }, - "gziLocation": { - "uri": gz_loc - }, + "fastaLocation": {"uri": fasta_loc}, + "faiLocation": {"uri": fai_loc}, + "gziLocation": {"uri": gz_loc}, }, }, "refNameAliases": { "adapter": { "type": "RefNameAliasAdapter", - "location": { - "uri": rloc - }, + "location": {"uri": rloc}, } }, }, @@ -195,7 +181,7 @@ def test_create_view_from_conf(): # can add track assert len(hg19_from_config.get_tracks()) == 0 bigwig = "https://jbrowse.org/genomes/hg19/COLO829/colo_normal.bw" - hg19_from_config.add_track(bigwig, name="example", track_id='delete-test') + hg19_from_config.add_track(bigwig, name="example", track_id="delete-test") assert len(hg19_from_config.get_tracks()) == 1 # can set default session hg19_from_config.set_default_session(["example"]) @@ -208,8 +194,10 @@ def test_create_view_from_conf(): adapter_list = hg19_from_config.get_text_search_adapters() assert len(adapter_list) == 1 - same_adapter = "Adapter already exists for given adapterId: " \ + same_adapter = ( + "Adapter already exists for given adapterId: " "hg19-hg19.ix-index.Provide a different adapter_id" + ) with pytest.raises(Exception) as excinfo: hg19_from_config.add_text_search_adapter(ix, ixx, meta) assert same_adapter in str(excinfo) @@ -222,8 +210,9 @@ def test_empty_config_lgv(): # === empty config === empty_conf = create("LGV") assert empty_conf.get_config() - assembly_error = "Can not get assembly name. " \ - "Please configure the assembly first." + assembly_error = ( + "Can not get assembly name. " "Please configure the assembly first." + ) with pytest.raises(Exception) as excinfo: empty_conf.get_assembly_name() assert assembly_error in str(excinfo) @@ -236,8 +225,9 @@ def test_empty_cgv(): # === empty config === empty_conf = create("CGV") assert empty_conf.get_config() - assembly_error = "Can not get assembly name. " \ - "Please configure the assembly first." + assembly_error = ( + "Can not get assembly name. " "Please configure the assembly first." + ) with pytest.raises(Exception) as excinfo: empty_conf.get_assembly_name() assert assembly_error in str(excinfo) @@ -245,8 +235,10 @@ def test_empty_cgv(): def test_create_view_cgv(): "tests creating a view from one of the provided genomes" - genome_error = '"volvox" is not a valid default genome to view.' \ - 'Choose from hg19 or hg38 or pass your own conf.' + genome_error = ( + '"volvox" is not a valid default genome to view.' + "Choose from hg19 or hg38 or pass your own conf." + ) with pytest.raises(TypeError) as excinfo: create("CGV", genome="volvox") assert genome_error in str(excinfo) @@ -256,7 +248,7 @@ def test_create_view_cgv(): in_colab = hg19.colab assert not in_colab assert hg19.get_assembly_name() == "hg19" - assert len(hg19.get_tracks()) > 0 + assert len(hg19.get_tracks()) == 0 assert hg19.get_default_session() assert hg38.get_assembly_name() == "hg38" # hg38 for cgv does not have tracks diff --git a/tests/test_tracks.py b/tests/test_tracks.py index 1dceb0d..fe948ef 100644 --- a/tests/test_tracks.py +++ b/tests/test_tracks.py @@ -14,7 +14,7 @@ "http://hgdownload.cse.ucsc.edu/goldenpath/hg38/phyloP100way/hg38.phyloP100way.bw" ) vcf = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf" -vcfGz = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.tbi" +vcfGz = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz" def test_make_location(): @@ -62,15 +62,9 @@ def test_add_track_type_fail(): def test_add_track_overwrite(): conf = create("LGV", genome="hg38") - overwrite_err = ( - "track with trackId: " - '"GRCh38-test" already exists inconfig.' - " Set overwrite to True to overwrite it." - ) conf.add_track(gff3Tabix, name="test") - with pytest.raises(TypeError) as excinfo: + with pytest.raises(TypeError): conf.add_track(gff3Tabix, name="test") - assert overwrite_err in str(excinfo) conf.add_track(gff3Tabix, name="test", overwrite=True) tracks = conf.get_tracks() assert len(tracks) == 1