Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Np add metadata.txt file to build indices #1435

Open
wants to merge 20 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pipeline_versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ ExternalExomeReprocessing 3.3.3 2024-11-04
CramToUnmappedBams 1.1.3 2024-08-02
WholeGenomeReprocessing 3.3.3 2024-11-04
ExomeReprocessing 3.3.3 2024-11-04
BuildIndices 3.0.0 2023-12-06
BuildIndices 3.1.0 2024-11-26
scATAC 1.3.2 2023-08-03
snm3C 4.0.4 2024-08-06
Multiome 5.9.2 2024-11-15
Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/build_indices/BuildIndices.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 3.1.0
2024-11-26 (Date of Last Commit)

* Added metadata.txt file as an output to the pipeline

# 3.0.0
2023-12-06 (Date of Last Commit)

Expand Down
73 changes: 72 additions & 1 deletion pipelines/skylab/build_indices/BuildIndices.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow BuildIndices {
}

# version of this pipeline
String pipeline_version = "3.0.0"
String pipeline_version = "3.1.0"


parameter_meta {
Expand Down Expand Up @@ -49,12 +49,25 @@ workflow BuildIndices {
organism = organism
}

call RecordMetadata {
input:
pipeline_version = pipeline_version,
input_files = [annotations_gtf, genome_fa, biotypes],
output_files = [
BuildStarSingleNucleus.star_index,
BuildStarSingleNucleus.modified_annotation_gtf,
CalculateChromosomeSizes.chrom_sizes,
BuildBWAreference.reference_bundle
]
}

output {
File snSS2_star_index = BuildStarSingleNucleus.star_index
String pipeline_version_out = "BuildIndices_v~{pipeline_version}"
File snSS2_annotation_gtf_modified = BuildStarSingleNucleus.modified_annotation_gtf
File reference_bundle = BuildBWAreference.reference_bundle
File chromosome_sizes = CalculateChromosomeSizes.chrom_sizes
File metadata = RecordMetadata.metadata_file
}
}

Expand Down Expand Up @@ -195,3 +208,61 @@ String reference_name = "bwa-mem2-2.2.1-~{organism}-~{genome_source}-build-~{gen
}
}


task RecordMetadata {
input {
String pipeline_version
Array[File] input_files
Array[File] output_files
}

command <<<
set -euo pipefail

# create metadata file
echo "Pipeline Version: ~{pipeline_version}" > metadata.txt
echo "Date of Workflow Run: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> metadata.txt
echo "" >> metadata.txt

# echo paths and md5sums for input files
echo "Input Files and their md5sums:" >> metadata.txt
for file in ~{sep=" " input_files}; do
echo "$file : $(md5sum $file | awk '{print $1}')" >> metadata.txt
done
echo "" >> metadata.txt

# echo paths and md5sums for input files
echo "Output Files and their md5sums:" >> metadata.txt
for file in ~{sep=" " output_files}; do
echo "$file : $(md5sum $file | awk '{print $1}')" >> metadata.txt
done
echo "" >> metadata.txt

# grab workspace bucket
file="~{output_files[0]}"
workspace_bucket=$(echo $file | awk -F'/' '{print $3}')
echo "Workspace Bucket: $workspace_bucket" >> metadata.txt

# grab submission ID
submission_id=$(echo $file | awk -F'/' '{print $5}')
echo "Submission ID: $submission_id" >> metadata.txt

# grab workflow ID
workflow_id=$(echo $file | awk -F'/' '{print $7}')
echo "Workflow ID: $workflow_id" >> metadata.txt

echo "" >> metadata.txt
>>>

output {
File metadata_file = "metadata.txt"
}

runtime {
docker: "ubuntu:20.04"
memory: "5 GiB"
disks: "local-disk 100 HDD"
cpu: "1"
}
}