Skip to content

Commit

Permalink
Merge pull request #21 from rki-mf1/dev
Browse files Browse the repository at this point in the history
merge dev: updates in seeding of all data simulating modules
  • Loading branch information
Krannich479 authored Sep 11, 2023
2 parents 7dcfcb3 + 92ad5a4 commit 97b52f1
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 10 deletions.
30 changes: 24 additions & 6 deletions cievad.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@
metavar='INT',
default = 10,
help='Specify the number of samples to be simulated.')
parser_hap_group2.add_argument(
'--seed',
type = int,
metavar='INT',
default = int(round(time.time())),
help='Specify a random seed. Default is current system time in seconds.')
parser_hap_group2.add_argument(
'-r', '--reference',
metavar='FASTA',
Expand Down Expand Up @@ -94,6 +100,12 @@
metavar='INT',
default = 10,
help='Specify the number of samples to be simulated.')
parser_ngs_group2.add_argument(
'--seed',
type = int,
metavar='INT',
default = int(round(time.time())),
help='Specify a random seed. Default is current system time in seconds.')
parser_ngs_group2.add_argument(
'-f', '--nb-frags',
type = int,
Expand Down Expand Up @@ -128,6 +140,12 @@
metavar='INT',
default = 10,
help='Specify the number of samples to be simulated.')
parser_ampli_group2.add_argument(
'--seed',
type = int,
metavar='INT',
default = int(round(time.time())),
help='Specify a random seed. Default is current system time in seconds.')
parser_ampli_group2.add_argument(
'-r', '--reference',
metavar='FASTA',
Expand All @@ -151,12 +169,6 @@
help='Number of CPU threads for the task.',
metavar='INT',
default = 1)
parser_nanopore.add_argument(
'--seed',
type = int,
metavar='INT',
default = int(round(time.time())),
help='Specify a random seed for the nanosim simulator. Default is current system time in seconds.')
parser_nanopore_group1 = parser_nanopore.add_argument_group('Run with config', 'Use a config file (yaml) to generate ONT-style long reads.')
parser_nanopore_group1.add_argument(
'-c', '--config',
Expand All @@ -175,6 +187,12 @@
metavar='INT',
default = 10,
help='Specify the number of samples to be simulated.')
parser_nanopore_group2.add_argument(
'--seed',
type = int,
metavar='INT',
default = int(round(time.time())),
help='Specify a random seed. Default is current system time in seconds.')
parser_nanopore_group2.add_argument(
'-m', '--model-prefix',
metavar='STR',
Expand Down
6 changes: 6 additions & 0 deletions python/configGenerators.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ def generate_hap_config(args):
config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n')
config.write('REF:\n')
config.write(' ' + args.reference + '\n\n')
config.write('SEED:\n')
config.write(' ' + str(args.seed) + '\n\n')
config.write('SAMPLES:\n')

padding = int(math.log2(args.nb_samples))
Expand All @@ -35,6 +37,8 @@ def generate_ngs_config(args):
config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n')
config.write('NGS_NB_FRAGS:\n')
config.write(' ' + str(args.nb_frags) + '\n\n')
config.write('SEED:\n')
config.write(' ' + str(args.seed) + '\n\n')
config.write('SAMPLES:\n')

padding = int(math.log2(args.nb_samples))
Expand All @@ -56,6 +60,8 @@ def generate_ampli_config(args):
config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n')
config.write('REF:\n')
config.write(' ' + args.reference + '\n\n')
config.write('SEED:\n')
config.write(' ' + str(args.seed) + '\n\n')
config.write('PRIMER:\n')
config.write(' ' + args.primers + '\n\n')
config.write('SAMPLES:\n')
Expand Down
7 changes: 7 additions & 0 deletions snakemake/amplicon/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ configfile: "configs/snake_config_amplicon.yaml"
#################### PARAMS
HEAD_DIR = config["HEAD_DIR"]
SAMPLES = config["SAMPLES"]
SEED = config["SEED"]


#################### RULES
Expand All @@ -25,12 +26,15 @@ rule amplicon_simulator:
primer = config["PRIMER"]
output:
amplicons = config["HEAD_DIR"] + "/data/simulated_hap{sample}/amplicons.fa"
params:
seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound
log:
config["HEAD_DIR"] + "/logs/amplisim/amplisim.hap{sample}.log"
shell:
"""
# STATIC COMPILED BINARY OF AMPLISIM WILL BE REPLACED BY A LIVE BUILD OR CONDA
./bin/amplisim-v0_1_0-ubuntu_20_04 \
-s {params.seed} \
-o {output.amplicons} \
{input.reference} \
{input.primer} \
Expand All @@ -43,6 +47,8 @@ rule ngs_read_simulator:
output:
r1 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.amplicon.ngs.R1.fastq",
r2 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.amplicon.ngs.R2.fastq"
params:
seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound
threads:
workflow.cores
conda:
Expand All @@ -55,6 +61,7 @@ rule ngs_read_simulator:
-i {input.amplicons} \
-o {output.r1} \
-or {output.r2} \
--seed {params.seed} \
--illumina-read-length 150 \
> {log} 2>&1
"""
5 changes: 4 additions & 1 deletion snakemake/hap/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ include: "../include/vcf-norm-truth.smk"
HEAD_DIR = config["HEAD_DIR"]
REF = config["REF"]
SAMPLES = config["SAMPLES"]
SEED = config["SEED"]


#################### RULES
Expand All @@ -32,6 +33,8 @@ rule hap_simulator:
output:
fasta = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.fasta",
vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.vcf",
params:
seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound for mason_variator
conda:
config["HEAD_DIR"] + "/env/conda_mason.yaml"
log:
Expand All @@ -42,7 +45,7 @@ rule hap_simulator:
--in-reference {input.ref} \
--out-fasta {output.fasta} \
--out-vcf {output.vcf} \
--seed {wildcards.sample} \
--seed {params.seed} \
--snp-rate 0.01 \
--small-indel-rate 0.005 \
--min-small-indel-size 1 \
Expand Down
2 changes: 1 addition & 1 deletion snakemake/nanopore/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ rule nanopore_read_simulator:
median_length = config["MEDIAN_LENGTH"],
sd_length = config["SD_LENGTH"],
nb_reads = config["NB_READS"],
seed = config["SEED"]
seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound
threads:
workflow.cores
conda:
Expand Down
6 changes: 4 additions & 2 deletions snakemake/ngs/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ configfile: "configs/snake_config_ngs.yaml"
HEAD_DIR = config["HEAD_DIR"]
NGS_NB_FRAGS = config["NGS_NB_FRAGS"]
SAMPLES = config["SAMPLES"]

SEED = config["SEED"]

#################### RULES
rule all:
Expand All @@ -30,7 +30,8 @@ rule ngs_read_simulator:
r2 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.ngs.R2.fastq"
# ra = temp(config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.bam")
params:
nb_frag = config["NGS_NB_FRAGS"]
nb_frag = config["NGS_NB_FRAGS"],
seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound
threads:
workflow.cores
conda:
Expand All @@ -44,6 +45,7 @@ rule ngs_read_simulator:
-n {params.nb_frag} \
-o {output.r1} \
-or {output.r2} \
--seed {params.seed} \
--num-threads {threads} \
--fragment-min-size 450 \
--fragment-max-size 550 \
Expand Down

0 comments on commit 97b52f1

Please sign in to comment.