galaxyproject · renu-pal · Nov 22, 2024 · Nov 22, 2024 · Nov 25, 2024 · Nov 25, 2024
diff --git a/tools/mmuphin/.shed.yml b/tools/mmuphin/.shed.yml
@@ -0,0 +1,14 @@
+name: mmuphin
+owner: iuc
+description: "MMUPHin is an R package implementing meta-analysis methods for microbial community profiles"
+long_description: |
+  MMUPHin enables the normalization and combination of multiple microbial community studies. It can then help in identifying microbes, genes, or pathways that are differential with respect to combined phenotypes. 
+  Finally, it can find clusters or gradients of sample types that reproduce consistently among studies.
+homepage_url: https://huttenhower.sph.harvard.edu/mmuphin
+remote_repository_url: https://github.com/biobakery/MMUPHin
+type: unrestricted
+categories:
+  - Metagenomics
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "Wrapper for the mmuphin adjust_batch function: {{ tool_name }}"
diff --git a/tools/mmuphin/macros.xml b/tools/mmuphin/macros.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">1.16.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">23.2</token>
+
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">mmuphin</xref> 
+            <xref type="bioconductor">mmuphin</xref>
+
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">bioconductor-mmuphin</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi"> 10.18129/B9.bioc.MMUPHin </citation>
+        </citations>
+    </xml>
+ </macros>
diff --git a/tools/mmuphin/mmuphin.xml b/tools/mmuphin/mmuphin.xml
@@ -0,0 +1,156 @@
+<tool id="mmuphin" name="mmuphin" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"  profile="@PROFILE@">
+    <description>Performing meta-analyses of microbiome studies</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+
+        ## get the batch name
+        export batch=`awk -v idx='$batch_input' -F"\t" 'NR == 1 { print \$idx }' '$input_metadata'` &&
+
+
+        ## get covariates names
+        #if $covariates_input
+        #set idx = []
+        #for $i in $covariates_input:
+            #silent idx.append(f'${i}')
+        #end for
+        #set idx_for_awk = ','.join(idx)
+
+        export covariates=`awk -v OFS=',' -F"\t" 'NR == 1 { print $idx_for_awk}' '$input_metadata'` &&
+
+        #end if
+
+        echo 'Assigned batch columns:' \$batch &&
+        echo 'Assigned covariates:' \$covariates &&
+
+        Rscript '$rscript' &&
+        cp 'adjust_batch_diagnostic.pdf' '$diagnostic_plot_output' 
+    ]]></command>
+
+    <configfiles>
+    <configfile name="rscript"><![CDATA[
+
+        library(MMUPHin)
+
+        ## input files
+        print("Read input files")
+        data <- read.csv("$input_data", sep = "\t", row.names=1, check.names = FALSE)
+        meta_data <- read.csv("$input_metadata", sep = "\t", row.names=1, check.names = FALSE)
+
+        # Define control list
+        controls <- list("$additional_options.zero_inflation", 
+                         "$additional_options.pseudo_count", 
+                         "$additional_options.conv", 
+                         "$additional_options.maxit", 
+                         "$additional_options.verbose", 
+                         "$additional_options.diagnostic_plot")
+
+        #Perform batch adjustment
+
+        ## get var names from env
+        batch <- Sys.getenv("batch")
+        covariates <- Sys.getenv("covariates")
+
+        covariates_vector <- unlist(strsplit(covariates, split = ","))
+
+
+        result <- adjust_batch(feature_abd = data, 
+                               batch = batch, 
+                               covariates = covariates_vector, 
+                               data = meta_data,
+                               control=controls
+                               )
+
+        # Save results into output files
+
+        write.table(result\$feature_abd_adj,file="$output",quote = FALSE, sep="\t")
+        ## write.table(result\$control,file="$control_output",quote = FALSE)
+
+    ]]></configfile>
+</configfiles>
+
+
+
+    <inputs>
+        <param name="input_data" type="data" format="tabular" label="Data (or features) file"/>
+        <param name="input_metadata" type="data" format="tabular" label="Metadata file"/>
+        <param argument="batch_input" type="data_column" data_ref="input_metadata" use_header_names="true"  label="batch" />
+        <param argument="covariates_input" type="data_column" data_ref="input_metadata" use_header_names="true" multiple="true" optional="true" label="covariates" />
+        <section name="additional_options" title="Additional Options" expanded="true">
+            <param argument="zero_inflation" type="boolean" truevalue="zero_inflation TRUE" falsevalue="zero_inflation FALSE" checked="true" label=" Run zero-inflated model"/>            
+            <param argument="pseudo_count" type="float" optional="true" label="Pseudo_count" help="Pseudo count to add feature_abd before the methods' log transformation.Default to NULL, in which case will be set to half of minimal non-zero values in feature_abd"/>
+            <param argument="conv" type="float" value="0.0001" optional="true" label="Convergence threshold" help="Convergence threshold for the method's iterative algorithm for shrinking batch effect parameters"/>
+            <param argument="maxit" type="float" value="1000" optional="true" label="Maximum number of iterations" help="Maximum number of iterations allowed for the method's iterative algorithm. Default to 1000"/>
+            <param argument="verbose" type="boolean" truevalue="verbose TRUE" falsevalue="verbose FALSE" checked="true" label="Print verbose information"/> 
+            <param argument="diagnostic_plot" type="boolean" truevalue="diagnostic_plot TRUE" falsevalue="diagnostic_plot FALSE" checked="true" label="Generate  diagnostic figure file, default: adjust_batch_diagnostic.pdf"/>
+         </section>
+    </inputs>
+
+
+    <outputs>
+          <data name="output" format="tabular"  label="Adjusted abundance table"/>
+          <data name="diagnostic_plot_output" format="pdf" label="diagnostic figure file"/>
+   </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="input_data" value="CRC_abd.tsv"/>
+            <param name="input_metadata" value="CRC_meta.tsv"/>
+            <param name="batch_input" value="2"/> 
+            <param name="covariates_input" value="3"/> 
+            <section name="additional_options">
+                <param name="zero_inflation" value="TRUE"/>
+                <param name="pseudo_count" value="3"/>
+                <param name="conv" value="0.0001"/>
+                <param name="maxit" value="1000"/>
+                <param name="verbose" value="TRUE"/>
+                <param name="diagnostic_plot" value="TRUE"/>
+            </section>
+
+            <output name="output">
+                <assert_contents>
+                    <has_size value="150053" delta="1000" />
+                </assert_contents>
+            </output>
+            <output name="diagnostic_plot_output" file="adjust_batch_diagnostic.pdf" ftype="pdf"/>
+
+        </test>
+    </tests>
+    <help><![CDATA[ 
+@HELP_HEADER@
+MmuPHin
+=========
+MMUPHin is an R package implementing meta-analysis methods for microbial community profiles. It has interfaces for: 
+
+a) Performing batch (study) effect adjustment with adjust_batch : 
+------------------------------------------------------------------
+It aims to correct for technical batch effects in microbial feature abundances. Batch effects refer to variations in data that arise not from the biological or experimental variables of interest but due to differences in technical or procedural factors during data collection or processing. For example:
+
+    Different equipment or lab environments.
+    Different operators handling the experiment.
+    Variations in sample preparation, sequencing runs, or platforms.
+
+These unwanted variations can obscure true biological signals and introduce bias, making it critical to adjust for batch effects to ensure accurate and comparable results across datasets.
+
+The function adjust_batch in the MMUPHin package is designed to correct batch effects in microbiome data.
+
+Inputs:
+=======
+A feature-by-sample abundance matrix (e.g., microbial abundances).
+A metadata file, which contains information about samples, including batch identifiers and optional covariates.
+
+Output:
+=======
+A batch-adjusted abundance matrix for downstream analyses.
+
+b) meta-analytic differential abundance testing 
+c) meta-analytic discovery of  discrete (cluster-based) or continuous unsupervised population structure.
+
+Meta-analysis methods are statistical techniques used to combine and synthesize data from multiple independent studies, typically to derive a more precise or generalizable conclusion. This approach is commonly used in fields such as medicine, psychology, and biology to aggregate research findings and increase the statistical power of analyses by pooling data from different experiments or studies.
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
diff --git a/tools/mmuphin/test-data/CRC_abd.tsv b/tools/mmuphin/test-data/CRC_abd.tsv