-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FEAT] Add new Snakefile and rules for analysis no_hashing
- Loading branch information
Anne Bertolini
committed
Aug 30, 2023
1 parent
c26467b
commit 9e451f5
Showing
3 changed files
with
255 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
# Define minimum Snakemake Version | ||
from snakemake.utils import min_version | ||
|
||
min_version("6.12.1") | ||
|
||
|
||
# Include Config file | ||
configfile: "config/config.yaml" | ||
|
||
|
||
# Include report functionality | ||
report: "../report/workflow.rst" | ||
|
||
|
||
# This file includes common functions used in the pipeline | ||
include: "rules/misc_snake.smk" | ||
|
||
|
||
## Include scampi as a module | ||
module scampi: | ||
skip_validation: | ||
True | ||
snakefile: | ||
github( | ||
"ETH-NEXUS/scAmpi_single_cell_RNA", | ||
path="workflow/snakefile_basic.smk", | ||
tag="v2.0.7", | ||
) | ||
config: | ||
config["scampi"] | ||
|
||
|
||
## Include the preprocessing rules | ||
include: "rules/gex_cellranger_no_hashing.smk" | ||
include: "rules/adt_cellranger_no_hashing.smk" | ||
## Include scampi | ||
include: "rules/scampi_module.smk" | ||
## Include citseq rules | ||
include: "rules/adt_analyse_citeseq.smk" | ||
|
||
|
||
# This rule defines which files should be created | ||
localrules: | ||
gExcite, | ||
|
||
|
||
rule gExcite: | ||
input: | ||
# Final files from preprocessing | ||
expand( | ||
"results/cellranger_gex/{sample}.matrix.mtx", | ||
sample=getSimpleSampleNames(), | ||
), | ||
expand( | ||
"results/cellranger_gex/{sample}.barcodes.tsv", | ||
sample=getSimpleSampleNames(), | ||
), | ||
expand( | ||
"results/cellranger_gex/{sample}.features.tsv", | ||
sample=getSimpleSampleNames(), | ||
), | ||
expand( | ||
"results/cellranger_adt/{sample}.matrix.mtx", | ||
sample=getSimpleSampleNames(), | ||
), | ||
# List of final files from scampi | ||
expand( | ||
"results/counts_raw/{sample}.h5", | ||
sample=getSimpleSampleNames(), | ||
), | ||
expand( | ||
"results/counts_filtered/{sample}.doublet_barcodes.txt", | ||
sample=getSimpleSampleNames(), | ||
), | ||
expand( | ||
"results/counts_raw/{sample}.raw.histogram_library_sizes.png", | ||
sample=getSimpleSampleNames(), | ||
), | ||
expand( | ||
"results/counts_corrected/{sample}.corrected.RDS", | ||
sample=getSimpleSampleNames(), | ||
), | ||
expand( | ||
"results/clustering/{sample}.clusters_phenograph.csv", | ||
sample=getSimpleSampleNames(), | ||
), | ||
expand( | ||
"results/gene_exp/{sample}.gene_expression_per_cluster.tsv", | ||
sample=getSimpleSampleNames(), | ||
), | ||
expand( | ||
"results/plotting/{sample}.celltype_barplot.png", | ||
sample=getSimpleSampleNames(), | ||
), | ||
expand( | ||
"results/gsva/{sample}.gsetscore_hm.png", | ||
sample=getSimpleSampleNames(), | ||
), | ||
# List of final files from citeseq analysis | ||
expand( | ||
"results/citeseq_analysis/{sample}/{sample}.GEX_cellrangerADT_SCE.RDS", | ||
sample=getSimpleSampleNames(), | ||
), | ||
output: | ||
"results/complete_gExcite.txt", | ||
resources: | ||
mem_mb=1000, | ||
time=1, | ||
benchmark: | ||
"results/complete_gExcite.benchmark" | ||
shell: | ||
"date > {output} " |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
# Rules required for the basic cellranger run of sc ADT data | ||
|
||
import os | ||
|
||
WORKDIR = os.getcwd() | ||
|
||
|
||
# Preprocessing to generate library file for ADT cellranger from the provided input parameters | ||
# Library file has a fixed format: fastqs,sample,library_type (with header) | ||
rule create_library_file_adt: | ||
input: | ||
fastqs_dir=WORKDIR | ||
+ "/" | ||
+ config["inputOutput"]["input_fastqs_adt"] | ||
+ "{sample}/", | ||
output: | ||
library_file=WORKDIR + "/results/cellranger_adt/{sample}.adt_library.txt", | ||
threads: config["computingResources"]["threads"]["low"] | ||
resources: | ||
mem_mb=config["computingResources"]["mem_mb"]["low"], | ||
runtime=config["computingResources"]["runtime"]["low"], | ||
log: | ||
"logs/create_library_file_adt/{sample}.log", | ||
benchmark: | ||
"results/cellranger_adt/benchmark/{sample}.generate_library_adt.benchmark" | ||
shell: | ||
r""" | ||
echo -e "fastqs,sample,library_type\n{input.fastqs_dir},{wildcards.sample},Antibody Capture" > {output.library_file} | ||
""" | ||
|
||
|
||
# Cellranger call to process the raw ADT samples | ||
rule cellranger_count_adt: | ||
input: | ||
library=WORKDIR + "/results/cellranger_adt/{sample}.adt_library.txt", | ||
reference=config["resources"]["reference_transcriptome"], | ||
features_ref=getFeatRefFileSimple, | ||
output: | ||
features_file="results/cellranger_adt/{sample}.features.tsv", | ||
matrix_file="results/cellranger_adt/{sample}.matrix.mtx", | ||
barcodes_file="results/cellranger_adt/{sample}.barcodes.tsv", | ||
web_file=report( | ||
"results/cellranger_adt/{sample}.web_summary.html", | ||
caption="../report/cellranger_adt.rst", | ||
category="preprocessing QC", | ||
), | ||
metrics_file="results/cellranger_adt/{sample}.metrics_summary.csv", | ||
params: | ||
cr_out="results/cellranger_adt/", | ||
variousParams=config["tools"]["cellranger_count_adt"]["variousParams"], | ||
targetCells=getTargetCellsCellranger_simple, | ||
sample="{sample}", | ||
threads: config["computingResources"]["threads"]["high"] | ||
log: | ||
"logs/cellranger_count_adt/{sample}.log", | ||
resources: | ||
mem_mb=config["computingResources"]["mem_mb"]["high"], | ||
runtime=config["computingResources"]["runtime"]["high"], | ||
benchmark: | ||
"results/cellranger_adt/benchmark/{sample}.cellranger_count_adt.benchmark" | ||
# NOTE: cellranger count function cannot specify the output directory, the output it the path you call it from. | ||
# Therefore, a subshell is used here. | ||
shell: | ||
"(cd {params.cr_out} ; " | ||
"{config[tools][cellranger_count_adt][call]} count " | ||
"--id={params.sample} " | ||
"--transcriptome={input.reference} " | ||
"--libraries={input.library} " | ||
"--feature-ref={input.features_ref} " | ||
"--nosecondary " | ||
"--localcores={threads} " | ||
"{params.variousParams} " | ||
"{params.targetCells}) " | ||
"&> {log} " | ||
"&& gunzip -c {params.cr_out}{params.sample}/outs/filtered_feature_bc_matrix/features.tsv.gz > {params.cr_out}{params.sample}/outs/filtered_feature_bc_matrix/features.tsv ; " | ||
"gunzip -c {params.cr_out}{params.sample}/outs/filtered_feature_bc_matrix/barcodes.tsv.gz > {params.cr_out}{params.sample}/outs/filtered_feature_bc_matrix/barcodes.tsv ; " | ||
"gunzip -c {params.cr_out}{params.sample}/outs/filtered_feature_bc_matrix/matrix.mtx.gz > {params.cr_out}{params.sample}/outs/filtered_feature_bc_matrix/matrix.mtx " | ||
"&& ln -rs '{params.cr_out}{params.sample}/outs/filtered_feature_bc_matrix/features.tsv' '{output.features_file}' ; " | ||
"ln -rs '{params.cr_out}{params.sample}/outs/filtered_feature_bc_matrix/matrix.mtx' '{output.matrix_file}' ; " | ||
"ln -rs '{params.cr_out}{params.sample}/outs/filtered_feature_bc_matrix/barcodes.tsv' '{output.barcodes_file}' ; " | ||
"ln -rs '{params.cr_out}{params.sample}/outs/web_summary.html' '{output.web_file}' ; " | ||
"ln -rs '{params.cr_out}{params.sample}/outs/metrics_summary.csv' '{output.metrics_file}' " |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import os | ||
|
||
WORKDIR = os.getcwd() | ||
|
||
|
||
# Cellranger call to process the raw GEX samples | ||
# input: fastq directory from config file and reference transcriptome read from config file | ||
# NOTE: the fastq directory as set in the config is interpreted to be relative to the gExcite working directory | ||
# output: cellranger files | ||
rule cellranger_count_gex: | ||
input: | ||
fastqs_dir=WORKDIR | ||
+ "/" | ||
+ config["inputOutput"]["input_fastqs_gex"] | ||
+ "{sample}/", | ||
reference=config["resources"]["reference_transcriptome"], | ||
output: | ||
features_file="results/cellranger_gex/{sample}.features.tsv", | ||
matrix_file="results/cellranger_gex/{sample}.matrix.mtx", | ||
barcodes_file="results/cellranger_gex/{sample}.barcodes.tsv", | ||
web_file=report( | ||
"results/cellranger_gex/{sample}.web_summary.html", | ||
caption="../report/cellranger.rst", | ||
category="preprocessing QC", | ||
), | ||
metrics_file="results/cellranger_gex/{sample}.metrics_summary.csv", | ||
params: | ||
cr_out="results/cellranger_gex/", | ||
variousParams=config["tools"]["cellranger_count_gex"]["variousParams"], | ||
targetCells=getTargetCellsCellranger_simple, | ||
mySample="{sample}", | ||
threads: config["computingResources"]["threads"]["high"] | ||
log: | ||
"logs/cellranger_count_gex/{sample}.log", | ||
resources: | ||
mem_mb=config["computingResources"]["mem_mb"]["high"], | ||
runtime=config["computingResources"]["runtime"]["high"], | ||
benchmark: | ||
"results/cellranger_gex/benchmark/{sample}.cellranger_count_gex.benchmark" | ||
# NOTE: cellranger count function cannot specify the output directory, the output is the path you call it from. | ||
# Therefore, a subshell is used here. | ||
# Also, unzip and symlink output files in preparation for downstream steps | ||
shell: | ||
"(cd {params.cr_out}; " | ||
"{config[tools][cellranger_count_gex][call]} count " | ||
"--id={params.mySample} " | ||
"--sample={params.mySample} " | ||
"--transcriptome={input.reference} " | ||
"--fastqs={input.fastqs_dir} " | ||
"--nosecondary " | ||
"--localcores={threads} " | ||
"{params.variousParams}) " | ||
"&> {log} ; " | ||
"gunzip {params.cr_out}{params.mySample}/outs/filtered_feature_bc_matrix/features.tsv.gz ; " | ||
"gunzip {params.cr_out}{params.mySample}/outs/filtered_feature_bc_matrix/barcodes.tsv.gz ; " | ||
"gunzip {params.cr_out}{params.mySample}/outs/filtered_feature_bc_matrix/matrix.mtx.gz ; " | ||
"ln -frs '{params.cr_out}{params.mySample}/outs/filtered_feature_bc_matrix/features.tsv' '{output.features_file}' ; " | ||
"ln -frs '{params.cr_out}{params.mySample}/outs/filtered_feature_bc_matrix/matrix.mtx' '{output.matrix_file}' ; " | ||
"ln -frs '{params.cr_out}{params.mySample}/outs/filtered_feature_bc_matrix/barcodes.tsv' '{output.barcodes_file}' ; " | ||
"ln -frs '{params.cr_out}{params.mySample}/outs/web_summary.html' '{output.web_file}' ; " | ||
"ln -frs '{params.cr_out}{params.mySample}/outs/metrics_summary.csv' '{output.metrics_file}' " |