From 90144525b541e914870bb41a0e5734ad92ec2093 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Fri, 2 Feb 2024 10:01:31 -0500 Subject: [PATCH] Change machine type and number of output files for fastqprocess optimus --- tasks/skylab/FastqProcessing.wdl | 2 +- tasks/skylab/StarAlign.wdl | 86 ++++++++++---------------------- 2 files changed, 28 insertions(+), 60 deletions(-) diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index ac22cc38aa..7adbfef0a0 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -102,7 +102,7 @@ task FastqProcessing { fi fastqprocess \ - --bam-size 30.0 \ + --num-output-files 1 \ --sample-id "~{sample_id}" \ $FASTQS \ --white-list "~{whitelist}" \ diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 8ab0c8d615..b61030f56c 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -227,12 +227,15 @@ task STARsoloFastq { # runtime values String docker = "us.gcr.io/broad-gotc-prod/star:1.0.1-2.7.11a-1692706072" - Int machine_mem_mb = 64000 - Int cpu = 8 + + String cpu_platform = "Intel Ice Lake" + Int machine_mem_mb = 512000 + Int cpu = 128 + Int disk = 2000 # multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference. - Int disk = ceil((size(tar_star_reference, "Gi") * 3)) + ceil(size(r1_fastq, "Gi") * 20) + ceil(size(r2_fastq, "Gi") * 20) + # Int disk = ceil((size(tar_star_reference, "Gi") * 3)) + ceil(size(r1_fastq, "Gi") * 20) + ceil(size(r2_fastq, "Gi") * 20) # by default request non preemptible machine to make sure the slow star alignment step completes - Int preemptible = 3 + Int preemptible = 1 } meta { @@ -292,7 +295,23 @@ task STARsoloFastq { ## single cell or whole cell COUNTING_MODE="Gene" echo "Running in ~{counting_mode} mode. The Star parameter --soloFeatures will be set to $COUNTING_MODE" - STAR \ + elif [[ "~{counting_mode}" == "sn_rna" ]] + then + ## single nuclei + if [[ ~{count_exons} == false ]] + then + COUNTING_MODE="GeneFull_Ex50pAS" + echo "Running in ~{counting_mode} mode. Count_exons is false and the Star parameter --soloFeatures will be set to $COUNTING_MODE" + else + COUNTING_MODE="GeneFull_Ex50pAS Gene" + echo "Running in ~{counting_mode} mode. Count_exons is true and the Star parameter --soloFeatures will be set to $COUNTING_MODE" + fi + else + echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna. + exit 1; + fi + + STAR \ --soloType Droplet \ --soloStrand ~{star_strand_mode} \ --runThreadN ~{cpu} \ @@ -311,62 +330,10 @@ task STARsoloFastq { --soloBarcodeReadLength 0 \ --soloCellReadStats Standard \ ~{"--soloMultiMappers " + soloMultiMappers} - elif [[ "~{counting_mode}" == "sn_rna" ]] - then - ## single nuclei - if [[ ~{count_exons} == false ]] - then - COUNTING_MODE="GeneFull_Ex50pAS" - echo "Running in ~{counting_mode} mode. Count_exons is false and the Star parameter --soloFeatures will be set to $COUNTING_MODE" - STAR \ - --soloType Droplet \ - --soloStrand ~{star_strand_mode} \ - --runThreadN ~{cpu} \ - --genomeDir genome_reference \ - --readFilesIn "~{sep=',' r2_fastq}" "~{sep=',' r1_fastq}" \ - --readFilesCommand "gunzip -c" \ - --soloCBwhitelist ~{white_list} \ - --soloUMIlen $UMILen --soloCBlen $CBLen \ - --soloFeatures $COUNTING_MODE \ - --clipAdapterType CellRanger4 \ - --outFilterScoreMin 30 \ - --soloCBmatchWLtype 1MM_multi_Nbase_pseudocounts \ - --soloUMIdedup 1MM_Directional_UMItools \ - --outSAMtype BAM SortedByCoordinate \ - --outSAMattributes UB UR UY CR CB CY NH GX GN sF \ - --soloBarcodeReadLength 0 \ - --soloCellReadStats Standard \ - ~{"--soloMultiMappers " + soloMultiMappers} - else - COUNTING_MODE="GeneFull_Ex50pAS Gene" - echo "Running in ~{counting_mode} mode. Count_exons is true and the Star parameter --soloFeatures will be set to $COUNTING_MODE" - STAR \ - --soloType Droplet \ - --soloStrand ~{star_strand_mode} \ - --runThreadN ~{cpu} \ - --genomeDir genome_reference \ - --readFilesIn "~{sep=',' r2_fastq}" "~{sep=',' r1_fastq}" \ - --readFilesCommand "gunzip -c" \ - --soloCBwhitelist ~{white_list} \ - --soloUMIlen $UMILen --soloCBlen $CBLen \ - --soloFeatures $COUNTING_MODE \ - --clipAdapterType CellRanger4 \ - --outFilterScoreMin 30 \ - --soloCBmatchWLtype 1MM_multi_Nbase_pseudocounts \ - --soloUMIdedup 1MM_Directional_UMItools \ - --outSAMtype BAM SortedByCoordinate \ - --outSAMattributes UB UR UY CR CB CY NH GX GN sF \ - --soloBarcodeReadLength 0 \ - --soloCellReadStats Standard \ - ~{"--soloMultiMappers " + soloMultiMappers} - fi - else - echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna. - exit 1; - fi echo "UMI LEN " $UMILen + # why is this here? touch barcodes_sn_rna.tsv touch features_sn_rna.tsv touch matrix_sn_rna.mtx @@ -434,10 +401,11 @@ task STARsoloFastq { runtime { docker: docker memory: "~{machine_mem_mb} MiB" - disks: "local-disk ~{disk} HDD" + disks: "local-disk ~{disk} SSD" disk: disk + " GB" # TES cpu: cpu preemptible: preemptible + cpuPlatform: cpu_platform } output {