Skip to content

Commit

Permalink
change file paths for FactorNet (#138)
Browse files Browse the repository at this point in the history
  • Loading branch information
Avsecz authored Dec 10, 2018
1 parent 581e40b commit 0b45364
Show file tree
Hide file tree
Showing 55 changed files with 176 additions and 244 deletions.
18 changes: 8 additions & 10 deletions FactorNet/CEBPB/meta_Unique35_DGF/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def download_gencode_dir(output_dir):
"""Download all the required gencode files
"""
makedir_exist_ok(output_dir)
url_template = ("https://github.com/kipoi/models/blob/"
"7648d3fd57def50934835b52acadd26bcaaa275c/FactorNet/"
"template/dataloader_files/gencode_features/{}?raw=true")

url_template = ("https://s3.eu-central-1.amazonaws.com/kipoi-models/"
"dataloader_files/FactorNet/dataloader_files/gencode_features/{}")

# url_template = "https://github.com/uci-cbcl/FactorNet/blob/master/resources/{}?raw=true"
fnames = [('cpgisland.bed.gz', 'ac7dc007d7019c05adb7a331d1d6721d'),
Expand Down Expand Up @@ -87,8 +87,6 @@ def __init__(self,

self.bt = BT(intervals_file)



# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
Expand All @@ -97,7 +95,7 @@ def __init__(self,
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
Expand All @@ -118,10 +116,10 @@ def __init__(self,
output_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files/RNAseq_features/")
makedir_exist_ok(output_dir)
RNAseq_PC_file = os.path.join(output_dir, cell_line, "meta.txt")
url_template = ('https://github.com/kipoi/models/blob/7648d3fd57def50934835b52acadd26bcaaa275c'
'/FactorNet/template/dataloader_files/RNAseq_features/{}/meta.txt?raw=true')
url_template = ('https://s3.eu-central-1.amazonaws.com/kipoi-models/dataloader_files/'
'FactorNet/dataloader_files/RNAseq_features/{}/meta.txt')
# rf = RemoteFile(url=url_template.format(cell_line))
if not os.path.exists(RNAseq_PC_file): # or not rf.validate(mappability_file):
if not os.path.exists(RNAseq_PC_file): # or not rf.validate(mappability_file):
# download the path
download_url(url_template.format(cell_line), os.path.join(output_dir, cell_line), "meta.txt")
# rf.get_file(RNAseq_PC_file)
Expand Down Expand Up @@ -181,4 +179,4 @@ def __getitem__(self, idx):
"ranges": ranges,
"ranges_rc": ranges_rc
}
}
}
4 changes: 2 additions & 2 deletions FactorNet/CEBPB/meta_Unique35_DGF/dataloader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ args:
mappability_file:
doc: USCS mappability track - http://hgdownload.cse.ucsc.edu/goldenpath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig. by deafult, provide this file with the dataloader, download in background
example:
url: https://github.com/kipoi/models/blob/92b23ed62956d56eb457488b431fc27550ddbf32/FactorNet/template/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig?raw=true
url: https://s3.eu-central-1.amazonaws.com/kipoi-models/dataloader_files/FactorNet/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig
md5: 912e1bb77fb67c1a792ac4a05d9416f9
optional: True
use_linecache:
Expand Down Expand Up @@ -68,4 +68,4 @@ output_schema:
doc: Ranges describing inputs.seq
ranges_rc:
type: GenomicRanges
doc: Ranges describing inputs.seq_rc
doc: Ranges describing inputs.seq_rc
9 changes: 3 additions & 6 deletions FactorNet/CEBPB/onePeak_1_DGF/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ def download_gencode_dir(output_dir):
"""Download all the required gencode files
"""
makedir_exist_ok(output_dir)
url_template = ("https://github.com/kipoi/models/blob/"
"7648d3fd57def50934835b52acadd26bcaaa275c/FactorNet/"
"template/dataloader_files/gencode_features/{}?raw=true")
url_template = ("https://s3.eu-central-1.amazonaws.com/kipoi-models/"
"dataloader_files/FactorNet/dataloader_files/gencode_features/{}")

# url_template = "https://github.com/uci-cbcl/FactorNet/blob/master/resources/{}?raw=true"
fnames = [('cpgisland.bed.gz', 'ac7dc007d7019c05adb7a331d1d6721d'),
Expand Down Expand Up @@ -84,8 +83,6 @@ def __init__(self,

self.bt = BT(intervals_file)



# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
Expand Down Expand Up @@ -138,4 +135,4 @@ def __getitem__(self, idx):
"ranges": ranges,
"ranges_rc": ranges_rc
}
}
}
11 changes: 4 additions & 7 deletions FactorNet/CEBPB/onePeak_2_Unique35_DGF/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ def download_gencode_dir(output_dir):
"""Download all the required gencode files
"""
makedir_exist_ok(output_dir)
url_template = ("https://github.com/kipoi/models/blob/"
"7648d3fd57def50934835b52acadd26bcaaa275c/FactorNet/"
"template/dataloader_files/gencode_features/{}?raw=true")
url_template = ("https://s3.eu-central-1.amazonaws.com/kipoi-models/"
"dataloader_files/FactorNet/dataloader_files/gencode_features/{}")

# url_template = "https://github.com/uci-cbcl/FactorNet/blob/master/resources/{}?raw=true"
fnames = [('cpgisland.bed.gz', 'ac7dc007d7019c05adb7a331d1d6721d'),
Expand Down Expand Up @@ -85,8 +84,6 @@ def __init__(self,

self.bt = BT(intervals_file)



# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
Expand All @@ -95,7 +92,7 @@ def __init__(self,
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
Expand Down Expand Up @@ -158,4 +155,4 @@ def __getitem__(self, idx):
"ranges": ranges,
"ranges_rc": ranges_rc
}
}
}
4 changes: 2 additions & 2 deletions FactorNet/CEBPB/onePeak_2_Unique35_DGF/dataloader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ args:
mappability_file:
doc: USCS mappability track - http://hgdownload.cse.ucsc.edu/goldenpath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig. by deafult, provide this file with the dataloader, download in background
example:
url: https://github.com/kipoi/models/blob/92b23ed62956d56eb457488b431fc27550ddbf32/FactorNet/template/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig?raw=true
url: https://s3.eu-central-1.amazonaws.com/kipoi-models/dataloader_files/FactorNet/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig
md5: 912e1bb77fb67c1a792ac4a05d9416f9
optional: True
use_linecache:
Expand Down Expand Up @@ -58,4 +58,4 @@ output_schema:
doc: Ranges describing inputs.seq
ranges_rc:
type: GenomicRanges
doc: Ranges describing inputs.seq_rc
doc: Ranges describing inputs.seq_rc
16 changes: 7 additions & 9 deletions FactorNet/CTCF/metaGENCODE_RNAseq_Unique35_DGF/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def download_gencode_dir(output_dir):
"""Download all the required gencode files
"""
makedir_exist_ok(output_dir)
url_template = ("https://github.com/kipoi/models/blob/"
"7648d3fd57def50934835b52acadd26bcaaa275c/FactorNet/"
"template/dataloader_files/gencode_features/{}?raw=true")

url_template = ("https://s3.eu-central-1.amazonaws.com/kipoi-models/"
"dataloader_files/FactorNet/dataloader_files/gencode_features/{}")

# url_template = "https://github.com/uci-cbcl/FactorNet/blob/master/resources/{}?raw=true"
fnames = [('cpgisland.bed.gz', 'ac7dc007d7019c05adb7a331d1d6721d'),
Expand Down Expand Up @@ -88,8 +88,6 @@ def __init__(self,

self.bt = BT(intervals_file)



# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
Expand All @@ -98,7 +96,7 @@ def __init__(self,
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
Expand Down Expand Up @@ -141,10 +139,10 @@ def __init__(self,
output_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files/RNAseq_features/")
makedir_exist_ok(output_dir)
RNAseq_PC_file = os.path.join(output_dir, cell_line, "meta.txt")
url_template = ('https://github.com/kipoi/models/blob/7648d3fd57def50934835b52acadd26bcaaa275c'
'/FactorNet/template/dataloader_files/RNAseq_features/{}/meta.txt?raw=true')
url_template = ('https://s3.eu-central-1.amazonaws.com/kipoi-models/dataloader_files/'
'FactorNet/dataloader_files/RNAseq_features/{}/meta.txt')
# rf = RemoteFile(url=url_template.format(cell_line))
if not os.path.exists(RNAseq_PC_file): # or not rf.validate(mappability_file):
if not os.path.exists(RNAseq_PC_file): # or not rf.validate(mappability_file):
# download the path
download_url(url_template.format(cell_line), os.path.join(output_dir, cell_line), "meta.txt")
# rf.get_file(RNAseq_PC_file)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ args:
mappability_file:
doc: USCS mappability track - http://hgdownload.cse.ucsc.edu/goldenpath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig. by deafult, provide this file with the dataloader, download in background
example:
url: https://github.com/kipoi/models/blob/92b23ed62956d56eb457488b431fc27550ddbf32/FactorNet/template/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig?raw=true
url: https://s3.eu-central-1.amazonaws.com/kipoi-models/dataloader_files/FactorNet/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig
md5: 912e1bb77fb67c1a792ac4a05d9416f9
optional: True
GENCODE_dir:
Expand Down
16 changes: 7 additions & 9 deletions FactorNet/CTCF/meta_RNAseq_Unique35_DGF/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def download_gencode_dir(output_dir):
"""Download all the required gencode files
"""
makedir_exist_ok(output_dir)
url_template = ("https://github.com/kipoi/models/blob/"
"7648d3fd57def50934835b52acadd26bcaaa275c/FactorNet/"
"template/dataloader_files/gencode_features/{}?raw=true")

url_template = ("https://s3.eu-central-1.amazonaws.com/kipoi-models/"
"dataloader_files/FactorNet/dataloader_files/gencode_features/{}")

# url_template = "https://github.com/uci-cbcl/FactorNet/blob/master/resources/{}?raw=true"
fnames = [('cpgisland.bed.gz', 'ac7dc007d7019c05adb7a331d1d6721d'),
Expand Down Expand Up @@ -87,8 +87,6 @@ def __init__(self,

self.bt = BT(intervals_file)



# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
Expand All @@ -97,7 +95,7 @@ def __init__(self,
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
Expand All @@ -118,10 +116,10 @@ def __init__(self,
output_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files/RNAseq_features/")
makedir_exist_ok(output_dir)
RNAseq_PC_file = os.path.join(output_dir, cell_line, "meta.txt")
url_template = ('https://github.com/kipoi/models/blob/7648d3fd57def50934835b52acadd26bcaaa275c'
'/FactorNet/template/dataloader_files/RNAseq_features/{}/meta.txt?raw=true')
url_template = ('https://s3.eu-central-1.amazonaws.com/kipoi-models/dataloader_files/'
'FactorNet/dataloader_files/RNAseq_features/{}/meta.txt')
# rf = RemoteFile(url=url_template.format(cell_line))
if not os.path.exists(RNAseq_PC_file): # or not rf.validate(mappability_file):
if not os.path.exists(RNAseq_PC_file): # or not rf.validate(mappability_file):
# download the path
download_url(url_template.format(cell_line), os.path.join(output_dir, cell_line), "meta.txt")
# rf.get_file(RNAseq_PC_file)
Expand Down
2 changes: 1 addition & 1 deletion FactorNet/CTCF/meta_RNAseq_Unique35_DGF/dataloader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ args:
mappability_file:
doc: USCS mappability track - http://hgdownload.cse.ucsc.edu/goldenpath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig. by deafult, provide this file with the dataloader, download in background
example:
url: https://github.com/kipoi/models/blob/92b23ed62956d56eb457488b431fc27550ddbf32/FactorNet/template/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig?raw=true
url: https://s3.eu-central-1.amazonaws.com/kipoi-models/dataloader_files/FactorNet/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig
md5: 912e1bb77fb67c1a792ac4a05d9416f9
optional: True
use_linecache:
Expand Down
10 changes: 4 additions & 6 deletions FactorNet/E2F1/GENCODE_Unique35_DGF/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def download_gencode_dir(output_dir):
"""Download all the required gencode files
"""
makedir_exist_ok(output_dir)
url_template = ("https://github.com/kipoi/models/blob/"
"7648d3fd57def50934835b52acadd26bcaaa275c/FactorNet/"
"template/dataloader_files/gencode_features/{}?raw=true")

url_template = ("https://s3.eu-central-1.amazonaws.com/kipoi-models/"
"dataloader_files/FactorNet/dataloader_files/gencode_features/{}")

# url_template = "https://github.com/uci-cbcl/FactorNet/blob/master/resources/{}?raw=true"
fnames = [('cpgisland.bed.gz', 'ac7dc007d7019c05adb7a331d1d6721d'),
Expand Down Expand Up @@ -87,8 +87,6 @@ def __init__(self,

self.bt = BT(intervals_file)



# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
Expand All @@ -97,7 +95,7 @@ def __init__(self,
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
Expand Down
2 changes: 1 addition & 1 deletion FactorNet/E2F1/GENCODE_Unique35_DGF/dataloader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ args:
mappability_file:
doc: USCS mappability track - http://hgdownload.cse.ucsc.edu/goldenpath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig. by deafult, provide this file with the dataloader, download in background
example:
url: https://github.com/kipoi/models/blob/92b23ed62956d56eb457488b431fc27550ddbf32/FactorNet/template/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig?raw=true
url: https://s3.eu-central-1.amazonaws.com/kipoi-models/dataloader_files/FactorNet/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig
md5: 912e1bb77fb67c1a792ac4a05d9416f9
optional: True
GENCODE_dir:
Expand Down
10 changes: 4 additions & 6 deletions FactorNet/E2F1/onePeak_Unique35_DGF/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def download_gencode_dir(output_dir):
"""Download all the required gencode files
"""
makedir_exist_ok(output_dir)
url_template = ("https://github.com/kipoi/models/blob/"
"7648d3fd57def50934835b52acadd26bcaaa275c/FactorNet/"
"template/dataloader_files/gencode_features/{}?raw=true")

url_template = ("https://s3.eu-central-1.amazonaws.com/kipoi-models/"
"dataloader_files/FactorNet/dataloader_files/gencode_features/{}")

# url_template = "https://github.com/uci-cbcl/FactorNet/blob/master/resources/{}?raw=true"
fnames = [('cpgisland.bed.gz', 'ac7dc007d7019c05adb7a331d1d6721d'),
Expand Down Expand Up @@ -85,8 +85,6 @@ def __init__(self,

self.bt = BT(intervals_file)



# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
Expand All @@ -95,7 +93,7 @@ def __init__(self,
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
Expand Down
2 changes: 1 addition & 1 deletion FactorNet/E2F1/onePeak_Unique35_DGF/dataloader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ args:
mappability_file:
doc: USCS mappability track - http://hgdownload.cse.ucsc.edu/goldenpath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig. by deafult, provide this file with the dataloader, download in background
example:
url: https://github.com/kipoi/models/blob/92b23ed62956d56eb457488b431fc27550ddbf32/FactorNet/template/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig?raw=true
url: https://s3.eu-central-1.amazonaws.com/kipoi-models/dataloader_files/FactorNet/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig
md5: 912e1bb77fb67c1a792ac4a05d9416f9
optional: True
use_linecache:
Expand Down
Loading

0 comments on commit 0b45364

Please sign in to comment.