Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
josefawelling committed Jan 18, 2024
1 parent e5d28ed commit c228a37
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 28 deletions.
3 changes: 0 additions & 3 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,19 @@ sample-sheet:
# path to the fastq files of the samples for the sample sheet
data-path: "/groups/ds/metagenomes/run_folder/"


data-handling:
# path to store input fastq data within the workflow
data: "data/"
# path to store databases and reference genomes used within the workflow
resources: "resources/"


## qualtiy criteria used for filtering
quality-criteria:
# minimal length of acceptable reads
min-length-reads: 15
# average quality of acceptable reads (PHRED)
min-PHRED: 20


human-ref: "https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.fna.gz"

kraken:
Expand Down
12 changes: 5 additions & 7 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ include: "rules/qc.smk"
include: "rules/species_diversity.smk"
include: "rules/report.smk"


if config["sample-sheet"]["auto-creation"]:

include: "rules/preprocessing.smk"


DATE = get_run_date()


Expand All @@ -37,15 +40,10 @@ rule all:

onsuccess:
print("Workflow finished without an error.")
if os.path.exists("results/{date}/report/{date}_report.zip".format(date=DATE)):
if os.path.exists("results/{date}/report/{date}_report.zip".format(date=DATE)):
shell("tar cpfz results/{DATE}/{DATE}_results.tar.gz results/{DATE}/report/")
print(
"You can find the results in {date}_results.tar.gz".format(
date=DATE
)
)
print("You can find the results in {date}_results.tar.gz".format(date=DATE))


onerror:
print("An error occurred")

2 changes: 1 addition & 1 deletion workflow/rules/preprocessing.smk
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
rule create_sample_sheet:
input:
"config/pep/samples.csv"
"config/pep/samples.csv",
params:
inpath=config["sample-sheet"]["data-path"],
renaming=config["sample-sheet"]["rename-sample-files"],
Expand Down
42 changes: 25 additions & 17 deletions workflow/scripts/create_sample_sheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,27 @@
## write to log file
sys.stderr = open(snakemake.log[0], "w")

inpath=snakemake.params.inpath
renaming=snakemake.params.renaming
sample_csv=snakemake.input[0]
inpath = snakemake.params.inpath
renaming = snakemake.params.renaming
sample_csv = snakemake.input[0]


def rename_fastqs(path):
samples=[]
#fastqs=os.listdir(path)
fastqs=[file for file in os.listdir(path) if file.endswith(".fastq.gz")]
if not fastqs:
print(f"Error: There are no fastq files in the directory. Have you used the correct path: {path}?")
raise Exception(f"There are no fastq files in the directory. Have you used the correct path: {path}?")
samples = []

fastqs = [file for file in os.listdir(path) if file.endswith(".fastq.gz")]
if not fastqs:
print(
f"Error: There are no fastq files in the directory. Have you used the correct path: {path}?"
)
raise Exception(
f"There are no fastq files in the directory. Have you used the correct path: {path}?"
)

if renaming:
print("Renaming fastq files, e.g. from sampleID_S40_L001_R1_001.fastq.gz to sampleID_R1.fastq.gz")
print(
"Renaming fastq files, e.g. from sampleID_S40_L001_R1_001.fastq.gz to sampleID_R1.fastq.gz"
)
else:
print("Fastq files will not be renamed")

Expand All @@ -33,20 +39,22 @@ def rename_fastqs(path):
samples.append(sample)

if renaming:
os.system(f"mv {path}{fastq} {path}{fastq_new}")
return(samples)
os.system(f"mv {path} {fastq} {path} {fastq_new}")

return samples


def write_sample_sheet(samples, path, outfile):
#os.system(f"touch {outfile}")
# os.system(f"touch {outfile}")

with open(outfile,"w") as sheet:
with open(outfile, "w") as sheet:
sheet.write("sample_name,fq1,fq2\n")

for sample in samples:
sheet.write(f"{sample},{path}{sample}_R1.fastq.gz,{path}{sample}_R2.fastq.gz\n")
sheet.write(
f"{sample},{path} {sample}_R1.fastq.gz,{path} {sample}_R2.fastq.gz\n"
)


samples=rename_fastqs(inpath)
samples = rename_fastqs(inpath)
write_sample_sheet(samples, inpath, sample_csv)

0 comments on commit c228a37

Please sign in to comment.