Skip to content

Commit

Permalink
more reformatting
Browse files Browse the repository at this point in the history
  • Loading branch information
bgruening committed Mar 2, 2024
1 parent 31c1870 commit 849d153
Show file tree
Hide file tree
Showing 22 changed files with 1,134 additions and 1,267 deletions.
29 changes: 14 additions & 15 deletions tools/picard/picard_AddOrReplaceReadGroups.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,20 @@
@set_read_group_vars@
@java_options@
@symlink_element_identifier@
picard
AddOrReplaceReadGroups
INPUT='$escaped_element_identifier'
$format_read_group("RGLB=", $rg_lb, '"')
$format_read_group("RGPL=", $rg_pl, '"')
$format_read_group("RGPU=", $rg_pu, '"')
$format_read_group("RGSM=", $rg_sm, '"')
$format_read_group("RGID=", $rg_id, '"')
$format_read_group("RGDS=", $rg_ds, '"')
$format_read_group("RGPI=", $rg_pi, '"')
$format_read_group("RGDT=", $rg_dt, '"')
VALIDATION_STRINGENCY='${validation_stringency}'
QUIET=true
VERBOSITY=ERROR
OUTPUT='${outFile}'
picard AddOrReplaceReadGroups
-INPUT '$escaped_element_identifier'
$format_read_group("-RGLB ", $rg_lb, '"')
$format_read_group("-RGPL ", $rg_pl, '"')
$format_read_group("-RGPU ", $rg_pu, '"')
$format_read_group("-RGSM ", $rg_sm, '"')
$format_read_group("-RGID ", $rg_id, '"')
$format_read_group("-RGDS ", $rg_ds, '"')
$format_read_group("-RGPI ", $rg_pi, '"')
$format_read_group("-RGDT ", $rg_dt, '"')
-OUTPUT '${outFile}'
-VALIDATION_STRINGENCY '${validation_stringency}'
-QUIET true
-VERBOSITY ERROR
]]></command>
<inputs>
Expand Down
2 changes: 1 addition & 1 deletion tools/picard/picard_CollectInsertSizeMetrics.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
picard CollectInsertSizeMetrics
-INPUT '$escaped_element_identifier'
-OUTPUT '${outFile}'
-HISTOGRAM_FILE '${histFile}'
-Histogram_FILE '${histFile}'
-DEVIATIONS '${deviations}'
#if str( $hist_width ):
Expand Down
2 changes: 1 addition & 1 deletion tools/picard/picard_CollectRnaSeqMetrics.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
@java_options@
picard CollectRnaSeqMetrics
-REF_FLATrefFlat.tab
-REF_FLATref Flat.tab
#if str( $ribosomal_intervals ) != "None":
-RIBOSOMAL_INTERVALS '${ribosomal_intervals}'
Expand Down
2 changes: 1 addition & 1 deletion tools/picard/picard_CollectSequencingArtifactsMetrics.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#end if;
-MINIMUM_QUALITY_SCORE '${min_quality_score}'
-INCLUDE_UNPAIRED '${unpaired}'
-MAXIMUM_INSERT_SIZE'${max_size}'
-MAXIMUM_INSERT_SIZE '${max_size}'
-MINIMUM_INSERT_SIZE '${min_size}'
-MINIMUM_MAPPING_QUALITY '${minim_map_quality}'
-VALIDATION_STRINGENCY '${validation_stringency}';
Expand Down
107 changes: 51 additions & 56 deletions tools/picard/picard_EstimateLibraryComplexity.xml
Original file line number Diff line number Diff line change
@@ -1,65 +1,60 @@
<tool name="EstimateLibraryComplexity" id="picard_EstimateLibraryComplexity" version="@TOOL_VERSION@.@WRAPPER_VERSION@" profile="@PROFILE@">
<description>assess sequence library complexity from read sequences</description>
<macros>
<import>picard_macros.xml</import>
<token name="@WRAPPER_VERSION@">0</token>
</macros>
<expand macro="requirements" />
<command detect_errors="exit_code"><![CDATA[
<description>assess sequence library complexity from read sequences</description>
<macros>
<import>picard_macros.xml</import>
<token name="@WRAPPER_VERSION@">0</token>
</macros>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
@java_options@
@symlink_element_identifier@
picard
EstimateLibraryComplexity
picard EstimateLibraryComplexity
INPUT='$escaped_element_identifier'
OUTPUT='${outFile}'
-INPUT '$escaped_element_identifier'
-OUTPUT '${outFile}'
MIN_IDENTICAL_BASES='${min_identical_bases}'
MAX_DIFF_RATE='${max_diff_rate}'
MIN_MEAN_QUALITY='${min_mean_quality}'
MAX_GROUP_RATIO='${max_group_ratio}'
READ_NAME_REGEX='${ str( $read_name_regex ) }'
OPTICAL_DUPLICATE_PIXEL_DISTANCE='${optical_duplicate_pixel_distance}'
-MIN_IDENTICAL_BASES '${min_identical_bases}'
-MAX_DIFF_RATE '${max_diff_rate}'
-MIN_MEAN_QUALITY '${min_mean_quality}'
-MAX_GROUP_RATIO '${max_group_ratio}'
-READ_NAME_REGEX '${ str( $read_name_regex ) }'
-OPTICAL_DUPLICATE_PIXEL_DISTANCE '${optical_duplicate_pixel_distance}'
VALIDATION_STRINGENCY='${validation_stringency}'
QUIET=true
VERBOSITY=ERROR
-VALIDATION_STRINGENCY '${validation_stringency}'
-QUIET true
-VERBOSITY ERROR
]]></command>
<inputs>
<param format="bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset" />
<param name="min_identical_bases" type="integer" value="5" label="The minimum number of bases at the starts of reads that must be identical for reads to be grouped together for duplicate detection" help="MIN_IDENTICAL_BASES; In effect total_reads / 4^max_id_bases reads will be compared at a time, so lower numbers will produce more accurate results but consume exponentially more memory and CPU; default=5"/>
<param name="max_diff_rate" type="float" value="0.03" label="The maximum rate of differences between two reads to call them identical" help="MAX_DIFF_RATE; default=0.03"/>
<param name="min_mean_quality" type="integer" min="0" max="93" value="20" label="The minimum mean quality of the bases in a read pair for the read to be analyzed" help="MIN_MEAN_QUALITY; Reads with lower average quality are filtered out and not considered in any calculations; default=20"/>
<param name="max_group_ratio" type="integer" value="500" label="Do not process self-similar groups that are this many times over the mean expected group size" help="MAX_GROUP_RATIO; I.e. if the input contains 10m read pairs and MIN_IDENTICAL_BASES is set to 5, then the mean expected group size would be approximately 10 reads; default-500"/>

<param name="read_name_regex" type="text" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*." label="Regular expression that can be used to parse read names in the incoming SAM/BAM dataset" help="READ_NAME_REGEX; Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. See help below for more info; default=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.">
<expand macro="sanitize_query" />
</param>
<param name="optical_duplicate_pixel_distance" type="integer" value="100" min="0" max="500" label="The maximum offset between two duplicte clusters in order to consider them optical duplicates" help="OPTICAL_DUPLICATE_PIXEL_DISTANCE; default=100"/>
<expand macro="VS" />

</inputs>
<outputs>
<data format="tabular" name="outFile" label="${tool.name} on ${on_string}: Library complexity report"/>
</outputs>

<tests>
<test>
<param name="inputFile" value="picard_EstimateLibraryComplexity.bam" ftype="bam"/>
<param name="min_identical_bases" value="5"/>
<param name="max_diff_rate" value="0.03"/>
<param name="min_mean_quality" value="20"/>
<param name="read_name_regex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*."/>
<param name="optical_duplicate_pixel_distance" value="100"/>
<param name="max_group_ratio" value="500"/>
<param name="validation_stringency" value="LENIENT"/>
<output name="outFile" file="picard_EstimateLibraryComplexity_test1.tab" ftype="tabular" lines_diff="4"/>
</test>
</tests>


<help>
<inputs>
<param format="bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/>
<param name="min_identical_bases" type="integer" value="5" label="The minimum number of bases at the starts of reads that must be identical for reads to be grouped together for duplicate detection" help="MIN_IDENTICAL_BASES; In effect total_reads / 4^max_id_bases reads will be compared at a time, so lower numbers will produce more accurate results but consume exponentially more memory and CPU; default=5"/>
<param name="max_diff_rate" type="float" value="0.03" label="The maximum rate of differences between two reads to call them identical" help="MAX_DIFF_RATE; default=0.03"/>
<param name="min_mean_quality" type="integer" min="0" max="93" value="20" label="The minimum mean quality of the bases in a read pair for the read to be analyzed" help="MIN_MEAN_QUALITY; Reads with lower average quality are filtered out and not considered in any calculations; default=20"/>
<param name="max_group_ratio" type="integer" value="500" label="Do not process self-similar groups that are this many times over the mean expected group size" help="MAX_GROUP_RATIO; I.e. if the input contains 10m read pairs and MIN_IDENTICAL_BASES is set to 5, then the mean expected group size would be approximately 10 reads; default-500"/>
<param name="read_name_regex" type="text" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*." label="Regular expression that can be used to parse read names in the incoming SAM/BAM dataset" help="READ_NAME_REGEX; Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. See help below for more info; default=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.">
<expand macro="sanitize_query"/>
</param>
<param name="optical_duplicate_pixel_distance" type="integer" value="100" min="0" max="500" label="The maximum offset between two duplicte clusters in order to consider them optical duplicates" help="OPTICAL_DUPLICATE_PIXEL_DISTANCE; default=100"/>
<expand macro="VS"/>
</inputs>
<outputs>
<data format="tabular" name="outFile" label="${tool.name} on ${on_string}: Library complexity report"/>
</outputs>
<tests>
<test>
<param name="inputFile" value="picard_EstimateLibraryComplexity.bam" ftype="bam"/>
<param name="min_identical_bases" value="5"/>
<param name="max_diff_rate" value="0.03"/>
<param name="min_mean_quality" value="20"/>
<param name="read_name_regex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*."/>
<param name="optical_duplicate_pixel_distance" value="100"/>
<param name="max_group_ratio" value="500"/>
<param name="validation_stringency" value="LENIENT"/>
<output name="outFile" file="picard_EstimateLibraryComplexity_test1.tab" ftype="tabular" lines_diff="4"/>
</test>
</tests>
<help>

**Purpose**

Expand All @@ -74,7 +69,7 @@ Unpaired reads are ignored in this computation.
The algorithm attempts to detect optical duplicates separately from PCR duplicates and excludes these in the calculation of library size.

Also, since there is no alignment to screen out technical reads one further filter is applied on the data. After examining all reads a Histogram
is built of [#reads in duplicate set -> #of duplicate sets]; all bins that contain exactly one duplicate set are then removed from the Histogram
is built of [#reads in duplicate set -&gt; #of duplicate sets]; all bins that contain exactly one duplicate set are then removed from the Histogram
as outliers before library size is estimated.

@dataset_collections@
Expand Down Expand Up @@ -119,5 +114,5 @@ as outliers before library size is estimated.
@more_info@

</help>
<expand macro="citations" />
<expand macro="citations"/>
</tool>
Loading

0 comments on commit 849d153

Please sign in to comment.