From ce58471df9a35b4dab7ac47c778fad8dc2b786d1 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 11 Dec 2023 08:16:32 -0500 Subject: [PATCH] Modularize --- jbrowse_jupyter/dev_server.py | 3 +- jbrowse_jupyter/jbrowse_config.py | 33 ++++-- jbrowse_jupyter/util.py | 187 ++++++++++-------------------- ruff.toml | 3 - tests/test_jbrowse_config.py | 96 +++++++-------- tests/test_tracks.py | 2 +- 6 files changed, 132 insertions(+), 192 deletions(-) diff --git a/jbrowse_jupyter/dev_server.py b/jbrowse_jupyter/dev_server.py index 24881d0..0b8c32b 100644 --- a/jbrowse_jupyter/dev_server.py +++ b/jbrowse_jupyter/dev_server.py @@ -91,7 +91,8 @@ def send_head(self): last = file_len - 1 response_length = last - first + 1 - self.send_header("Content-Range", "bytes %s-%s/%s" % (first, last, file_len)) + self.send_header("Content-Range", "bytes %s-%s/%s" % + (first, last, file_len)) self.send_header("Content-Length", str(response_length)) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) self.end_headers() diff --git a/jbrowse_jupyter/jbrowse_config.py b/jbrowse_jupyter/jbrowse_config.py index 32fde89..18d9697 100644 --- a/jbrowse_jupyter/jbrowse_config.py +++ b/jbrowse_jupyter/jbrowse_config.py @@ -102,7 +102,8 @@ def __init__(self, view="LGV", conf=None): "tracks": [], } if view != "LGV" and view == "CGV": - view_default = {"id": "circularView", "type": "CircularView", "tracks": []} + view_default = {"id": "circularView", + "type": "CircularView", "tracks": []} default = { "assembly": {}, "tracks": [], @@ -237,7 +238,8 @@ def set_assembly(self, assembly_data, **kwargs): "data. You can check out our local " "file support docs for more information" ) - assembly_adapter = guess_adapter_type(assembly_data, "uri", indx, **kwargs) + assembly_adapter = guess_adapter_type( + assembly_data, "uri", indx, **kwargs) name = kwargs.get("name", get_name(assembly_data)) if assembly_adapter["type"] == "UNKNOWN": raise TypeError("Adapter type is not recognized") @@ -335,7 +337,8 @@ def get_track_display(self, track): def get_track(self, track_name): # Return the list of track configurations with that name - tracks = [track for track in self.get_tracks() if track["name"] == track_name] + tracks = [track for track in self.get_tracks() if track["name"] + == track_name] return tracks def get_tracks(self): @@ -398,7 +401,8 @@ def add_df_track(self, track_data, name, **kwargs): raise TypeError(err) if track_id in self.tracks_ids_map.keys() and overwrite: # delete track and overwrite it - current_tracks = [t for t in current_tracks if t["trackId"] != track_id] + current_tracks = [ + t for t in current_tracks if t["trackId"] != track_id] current_tracks.append(df_track_config) self.config["tracks"] = current_tracks @@ -472,7 +476,8 @@ def add_track(self, data, **kwargs): if adapter["type"] == "CramAdapter": extra_config = self.get_assembly()["sequence"]["adapter"] adapter["sequenceAdapter"] = extra_config - t_type = kwargs.get("track_type", guess_track_type(adapter["type"])) + t_type = kwargs.get( + "track_type", guess_track_type(adapter["type"])) supported_track_types = set( { "AlignmentsTrack", @@ -501,7 +506,8 @@ def add_track(self, data, **kwargs): ) ) if track_id in self.tracks_ids_map.keys() and overwrite: - current_tracks = [t for t in current_tracks if t["trackId"] != track_id] + current_tracks = [ + t for t in current_tracks if t["trackId"] != track_id] current_tracks.append(track_config) self.config["tracks"] = current_tracks @@ -543,7 +549,8 @@ def add_track(self, data, **kwargs): if adapter["type"] == "CramAdapter": extra_config = self.get_assembly()["sequence"]["adapter"] adapter["sequenceAdapter"] = extra_config - t_type = kwargs.get("track_type", guess_track_type(adapter["type"])) + t_type = kwargs.get( + "track_type", guess_track_type(adapter["type"])) supported_track_types = set( { "AlignmentsTrack", @@ -572,7 +579,8 @@ def add_track(self, data, **kwargs): ) ) if track_id in self.tracks_ids_map.keys() and overwrite: - current_tracks = [t for t in current_tracks if t["trackId"] != track_id] + current_tracks = [ + t for t in current_tracks if t["trackId"] != track_id] current_tracks.append(track_config) self.config["tracks"] = current_tracks @@ -598,12 +606,14 @@ def delete_track(self, track_id): (f'track with trackId: "{track_id}" does not exist in' f"config.") ) else: - new_tracks = [t for t in current_tracks if t["trackId"] != track_id] + new_tracks = [ + t for t in current_tracks if t["trackId"] != track_id] self.config["tracks"] = new_tracks # clear from default session default_sess = self.get_default_session() tracks_sess = default_sess["view"]["tracks"] - new_tracks_sess = [t for t in tracks_sess if t["configuration"] != track_id] + new_tracks_sess = [ + t for t in tracks_sess if t["configuration"] != track_id] if self.view == "CGV": self.config["defaultSession"] = { "name": "my session", @@ -660,7 +670,8 @@ def set_default_session(self, tracks_ids, display_assembly=True): if display_assembly: reference_track = self.get_reference_track() tracks_configs.append(reference_track) - tracks_to_display = [t for t in self.get_tracks() if t["trackId"] in tracks_ids] + tracks_to_display = [ + t for t in self.get_tracks() if t["trackId"] in tracks_ids] # guess the display type for t in tracks_to_display: tracks_configs.append(self.get_track_display(t)) diff --git a/jbrowse_jupyter/util.py b/jbrowse_jupyter/util.py index 87fffcf..2cfb8fb 100644 --- a/jbrowse_jupyter/util.py +++ b/jbrowse_jupyter/util.py @@ -1,40 +1,41 @@ import re +import copy import os import dash_jbrowse as jb from dash import html, Dash from urllib.parse import urlparse - -hg38_lgv = { - "assembly": { - "name": "GRCh38", - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "GRCh38-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" - }, - "faiLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" - }, - "gziLocation": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" - }, +hg38_assembly = { + "name": "hg38", + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "GRCh38-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz" + }, + "faiLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai" + }, + "gziLocation": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi" }, }, - "aliases": ["hg38"], - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" - }, - } - }, }, + "aliases": ["GRCh38"], + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" + }, + } + }, +} +hg38_lgv = { + "assembly": hg38_assembly, "tracks": [], "location": "10:29,838,737..29,838,819", "defaultSession": { @@ -49,38 +50,7 @@ hg38_cgv = { - "assembly": { - "name": "hg38", - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "GRCh38-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz", - "locationType": "UriLocation", - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.fai", - "locationType": "UriLocation", - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/GRCh38/fasta/hg38.prefix.fa.gz.gzi", - "locationType": "UriLocation", - }, - }, - }, - "aliases": ["GRCh38"], - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt", - "locationType": "UriLocation", - }, - } - }, - }, + "assembly": hg38_assembly, "tracks": [], "defaultSession": { "name": "My session", @@ -92,36 +62,36 @@ }, }, } - -hg19_lgv = { - "assembly": { - "name": "hg19", - "aliases": ["GRCh37"], - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "hg19-ReferenceSequenceTrack", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz" - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai" - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi" - }, +hg19_assembly = { + "name": "hg19", + "aliases": ["GRCh37"], + "sequence": { + "type": "ReferenceSequenceTrack", + "trackId": "hg19-ReferenceSequenceTrack", + "adapter": { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz" + }, + "faiLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai" + }, + "gziLocation": { + "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi" }, }, - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" - }, - } - }, }, + "refNameAliases": { + "adapter": { + "type": "RefNameAliasAdapter", + "location": { + "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" + }, + } + }, +} +hg19_lgv = { + "assembly": hg19_assembly, "tracks": [], "defaultSession": { "name": "test", @@ -135,38 +105,7 @@ } hg19_cgv = { - "assembly": { - "name": "hg19", - "aliases": ["GRCh37"], - "sequence": { - "type": "ReferenceSequenceTrack", - "trackId": "Pd8Wh30ei9R", - "adapter": { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz", - "locationType": "UriLocation", - }, - "faiLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.fai", - "locationType": "UriLocation", - }, - "gziLocation": { - "uri": "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz.gzi", - "locationType": "UriLocation", - }, - }, - }, - "refNameAliases": { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt", - "locationType": "UriLocation", - }, - } - }, - }, + "assembly": hg19_assembly, "tracks": [], "defaultSession": { "name": "My session", @@ -210,11 +149,11 @@ def get_name(assembly_file): name_end = 0 name_start = 0 for i in range(0, len(assembly_file)): - if assembly_file[len(assembly_file) - i - 1 : len(assembly_file) - i] == "/": + if assembly_file[len(assembly_file) - i - 1: len(assembly_file) - i] == "/": name_start = len(assembly_file) - i break for i in range(name_start, len(assembly_file)): - if assembly_file[i : i + 1] == ".": + if assembly_file[i: i + 1] == ".": name_end = i break @@ -230,14 +169,14 @@ def get_default(name, view_type="LGV"): """Returns the configuration object given a genome name.""" if name == "hg38": if view_type == "CGV": - return hg38_cgv + return copy.deepcopy(hg38_cgv) else: - return hg38_lgv + return copy.deepcopy(hg38_lgv) elif name == "hg19": if view_type == "CGV": - return hg19_cgv + return copy.deepcopy(hg19_cgv) else: - return hg19_lgv + return copy.deepcopy(hg19_lgv) def create_component(conf, **kwargs): diff --git a/ruff.toml b/ruff.toml index 8e0beda..5523921 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,4 +1 @@ - -# Same as Black. -line-length = 88 indent-width = 4 diff --git a/tests/test_jbrowse_config.py b/tests/test_jbrowse_config.py index 54387f5..74a6c10 100644 --- a/tests/test_jbrowse_config.py +++ b/tests/test_jbrowse_config.py @@ -41,9 +41,7 @@ def test_set_assembly(): conf.get_assembly_name() assert myError in str(excinfo) # raises an error if you try to add a track before an assembly is set - data = "https://s3.amazonaws.com/jbrowse.org/genomes/" \ - "GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_" \ - "analysis_set.refseq_annotation.sorted.gff.gz" + data = "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/ncbi_refseq/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz" with pytest.raises(Exception) as excinfo: conf.add_track( data, @@ -53,15 +51,16 @@ def test_set_assembly(): # raises an error, there is no local path support in non jupyter envs with pytest.raises(TypeError) as excinfo: conf.set_assembly("/hi/there") - err = (f'Path {"/hi/there"} for assembly data is used' - ' in an unsupported environment.' - 'Paths are supported in Jupyter notebooks and Jupyter lab.' - 'Please use a url for your assembly data. You can check out ' - 'our local file support docs for more information') + err = ( + f'Path {"/hi/there"} for assembly data is used' + ' in an unsupported environment.' + 'Paths are supported in Jupyter notebooks and Jupyter lab.' + 'Please use a url for your assembly data. You can check out ' + 'our local file support docs for more information' + ) assert err == excinfo.value.args[0] aliases = ["hg38"] - uri = "https://s3.amazonaws.com/jbrowse.org/genomes/" \ - "GRCh38/hg38_aliases.txt" + uri = "https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/hg38_aliases.txt" ref_name_aliases = { "adapter": { "type": "RefNameAliasAdapter", @@ -84,39 +83,34 @@ def test_set_assembly(): ) assert err in str(excinfo) assert conf.get_assembly_name() == "hg38" - track_data = "https://s3.amazonaws.com/jbrowse.org/" \ - "genomes/GRCh38/ncbi_refseq/GCA_000001405.15_" \ + track_data = ( + "https://s3.amazonaws.com/jbrowse.org/" + "genomes/GRCh38/ncbi_refseq/GCA_000001405.15_" "GRCh38_full_analysis_set.refseq_annotation.sorted.gff.gz" + ) conf.add_track( track_data, name="test-demo", ) assert len(conf.get_tracks()) == 1 - alias_uri = "https://s3.amazonaws.com/jbrowse.org/genomes" \ - "/hg19/hg19_aliases.txt" + alias_uri = "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt" ref_name = { - "adapter": { - "type": "RefNameAliasAdapter", - "location": { - "uri": alias_uri - } - } + "adapter": {"type": "RefNameAliasAdapter", "location": {"uri": alias_uri}} } - aliases = [ - "GRCh37" - ] + aliases = ["GRCh37"] a_data = "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz" - conf.set_assembly(a_data, aliases=aliases, - refname_aliases=ref_name, overwrite=True) - assert conf.get_assembly_name() == 'hg19' + conf.set_assembly(a_data, aliases=aliases, refname_aliases=ref_name, overwrite=True) + assert conf.get_assembly_name() == "hg19" def test_create_view(): "tests creating a view from one of the provided genomes" - genome_error = '"volvox" is not a valid default genome to view.' \ - 'Choose from hg19 or hg38 or pass your own conf.' + genome_error = ( + '"volvox" is not a valid default genome to view.' + "Choose from hg19 or hg38 or pass your own conf." + ) with pytest.raises(TypeError) as excinfo: create("LGV", genome="volvox") assert genome_error in str(excinfo) @@ -124,10 +118,10 @@ def test_create_view(): hg19 = create("LGV", genome="hg19") hg38 = create("LGV", genome="hg38") assert hg19.get_assembly_name() == "hg19" - assert len(hg19.get_tracks()) > 0 + assert len(hg19.get_tracks()) == 0 assert hg19.get_default_session() - assert hg38.get_assembly_name() == "GRCh38" - assert len(hg38.get_tracks()) > 0 + assert hg38.get_assembly_name() == "hg38" + assert len(hg38.get_tracks()) == 0 assert hg38.get_default_session() @@ -169,23 +163,15 @@ def test_create_view_from_conf(): "trackId": "hg19-ReferenceSequenceTrack", "adapter": { "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": fasta_loc - }, - "faiLocation": { - "uri": fai_loc - }, - "gziLocation": { - "uri": gz_loc - }, + "fastaLocation": {"uri": fasta_loc}, + "faiLocation": {"uri": fai_loc}, + "gziLocation": {"uri": gz_loc}, }, }, "refNameAliases": { "adapter": { "type": "RefNameAliasAdapter", - "location": { - "uri": rloc - }, + "location": {"uri": rloc}, } }, }, @@ -195,7 +181,7 @@ def test_create_view_from_conf(): # can add track assert len(hg19_from_config.get_tracks()) == 0 bigwig = "https://jbrowse.org/genomes/hg19/COLO829/colo_normal.bw" - hg19_from_config.add_track(bigwig, name="example", track_id='delete-test') + hg19_from_config.add_track(bigwig, name="example", track_id="delete-test") assert len(hg19_from_config.get_tracks()) == 1 # can set default session hg19_from_config.set_default_session(["example"]) @@ -208,8 +194,10 @@ def test_create_view_from_conf(): adapter_list = hg19_from_config.get_text_search_adapters() assert len(adapter_list) == 1 - same_adapter = "Adapter already exists for given adapterId: " \ + same_adapter = ( + "Adapter already exists for given adapterId: " "hg19-hg19.ix-index.Provide a different adapter_id" + ) with pytest.raises(Exception) as excinfo: hg19_from_config.add_text_search_adapter(ix, ixx, meta) assert same_adapter in str(excinfo) @@ -222,8 +210,9 @@ def test_empty_config_lgv(): # === empty config === empty_conf = create("LGV") assert empty_conf.get_config() - assembly_error = "Can not get assembly name. " \ - "Please configure the assembly first." + assembly_error = ( + "Can not get assembly name. " "Please configure the assembly first." + ) with pytest.raises(Exception) as excinfo: empty_conf.get_assembly_name() assert assembly_error in str(excinfo) @@ -236,8 +225,9 @@ def test_empty_cgv(): # === empty config === empty_conf = create("CGV") assert empty_conf.get_config() - assembly_error = "Can not get assembly name. " \ - "Please configure the assembly first." + assembly_error = ( + "Can not get assembly name. " "Please configure the assembly first." + ) with pytest.raises(Exception) as excinfo: empty_conf.get_assembly_name() assert assembly_error in str(excinfo) @@ -245,8 +235,10 @@ def test_empty_cgv(): def test_create_view_cgv(): "tests creating a view from one of the provided genomes" - genome_error = '"volvox" is not a valid default genome to view.' \ - 'Choose from hg19 or hg38 or pass your own conf.' + genome_error = ( + '"volvox" is not a valid default genome to view.' + "Choose from hg19 or hg38 or pass your own conf." + ) with pytest.raises(TypeError) as excinfo: create("CGV", genome="volvox") assert genome_error in str(excinfo) @@ -256,7 +248,7 @@ def test_create_view_cgv(): in_colab = hg19.colab assert not in_colab assert hg19.get_assembly_name() == "hg19" - assert len(hg19.get_tracks()) > 0 + assert len(hg19.get_tracks()) == 0 assert hg19.get_default_session() assert hg38.get_assembly_name() == "hg38" # hg38 for cgv does not have tracks diff --git a/tests/test_tracks.py b/tests/test_tracks.py index 1dceb0d..2fc7858 100644 --- a/tests/test_tracks.py +++ b/tests/test_tracks.py @@ -14,7 +14,7 @@ "http://hgdownload.cse.ucsc.edu/goldenpath/hg38/phyloP100way/hg38.phyloP100way.bw" ) vcf = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf" -vcfGz = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.tbi" +vcfGz = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz" def test_make_location():