diff --git a/Dockerfile b/Dockerfile
index 299d068..94a291d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 FROM condaforge/mambaforge:latest
 LABEL io.github.snakemake.containerized="true"
-LABEL io.github.snakemake.conda_env_hash="f7297458609bceb0462c5a2467a4d166cc341f021f89686883de965a01db8e21"
+LABEL io.github.snakemake.conda_env_hash="28c328bc05583c80dbef9948c2ab8c01c4c5f9f5b7d2411314f06e0b3472cfd0"
 
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt update && apt install -y build-essential libz-dev && rm -rf /var/lib/apt/lists/*
@@ -54,6 +54,58 @@ COPY workflow/envs/create_inflation_factors_table.yaml /conda-envs/a160f42d06f9d
 RUN mkdir -p /conda-envs/a2826e2ef1005ed23bdbb3539321f7e9
 COPY workflow/envs/ldsc.yaml /conda-envs/a2826e2ef1005ed23bdbb3539321f7e9/environment.yaml
 
+# Conda environment:
+#   source: workflow/envs/plink-pandas.yml
+#   prefix: /conda-envs/a9b8ccc53333def92898879edc6df0ca
+#   name: plink-pandas
+#   dependencies:
+#     - bioconda::plink=1.90*
+#     - bioconda::tabix=1.11
+#     - python=3.11.*
+#     - numpy
+#     - scipy
+#     - pandas
+#     - pip
+RUN mkdir -p /conda-envs/a9b8ccc53333def92898879edc6df0ca
+COPY workflow/envs/plink-pandas.yml /conda-envs/a9b8ccc53333def92898879edc6df0ca/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/plink2.yml
+#   prefix: /conda-envs/fd0f8d28d055274e6ce4cf006d07ec05
+#   name: plink2
+#   channels:
+#     - defaults
+#   dependencies:
+#     - bioconda::plink2==2.00a5
+RUN mkdir -p /conda-envs/fd0f8d28d055274e6ce4cf006d07ec05
+COPY workflow/envs/plink2.yml /conda-envs/fd0f8d28d055274e6ce4cf006d07ec05/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/susier.yml
+#   prefix: /conda-envs/68f405a25f9c3bf3528a1b7b39978bcc
+#   name: susier
+#   dependencies:
+#     - r-base==4.3.0
+#     - r-susier==0.12.35
+#     - r-tidyverse==2.0.0
+#     - r-dplyr==1.1.2
+#     - r-stringr==1.5.0
+#     - r-purrr==1.0.1
+#     - r-glue==1.6.2
+#     - r-ggplot2==3.4.2
+#     - r-ggpubr==0.6.0
+#     - r-data.table==1.14.8
+#     - r-rcpp==1.0.*
+#     - r-rcppeigen==0.3.3.*
+#     - r-markdown==1.10
+#     - r-rlog==0.1.0
+#     - numpy==1.24.3
+#     - scipy==1.9.3
+#     - pandas==1.5.3
+#     - pip
+RUN mkdir -p /conda-envs/68f405a25f9c3bf3528a1b7b39978bcc
+COPY workflow/envs/susier.yml /conda-envs/68f405a25f9c3bf3528a1b7b39978bcc/environment.yaml
+
 # Conda environment:
 #   source: workflow/scripts/gwaspipe/environment.yml
 #   prefix: /conda-envs/ab67c3cfb8e1a5ad9d9cb7824966853e
@@ -83,5 +135,8 @@ COPY workflow/scripts/gwaspipe/environment.yml /conda-envs/ab67c3cfb8e1a5ad9d9cb
 RUN mamba env create --prefix /conda-envs/6e056d31662ab0bd2fd3fba49416042f --file /conda-envs/6e056d31662ab0bd2fd3fba49416042f/environment.yaml && \
     mamba env create --prefix /conda-envs/a160f42d06f9d24b41c5cbece52b682d --file /conda-envs/a160f42d06f9d24b41c5cbece52b682d/environment.yaml && \
     mamba env create --prefix /conda-envs/a2826e2ef1005ed23bdbb3539321f7e9 --file /conda-envs/a2826e2ef1005ed23bdbb3539321f7e9/environment.yaml && \
+    mamba env create --prefix /conda-envs/a9b8ccc53333def92898879edc6df0ca --file /conda-envs/a9b8ccc53333def92898879edc6df0ca/environment.yaml && \
+    mamba env create --prefix /conda-envs/fd0f8d28d055274e6ce4cf006d07ec05 --file /conda-envs/fd0f8d28d055274e6ce4cf006d07ec05/environment.yaml && \
+    mamba env create --prefix /conda-envs/68f405a25f9c3bf3528a1b7b39978bcc --file /conda-envs/68f405a25f9c3bf3528a1b7b39978bcc/environment.yaml && \
     mamba env create --prefix /conda-envs/ab67c3cfb8e1a5ad9d9cb7824966853e --file /conda-envs/ab67c3cfb8e1a5ad9d9cb7824966853e/environment.yaml && \
     mamba clean --all -y
diff --git a/Makefile b/Makefile
index a305005..c66d079 100644
--- a/Makefile
+++ b/Makefile
@@ -28,5 +28,5 @@ rerun:
 unlock:
 	snakemake --unlock
 
-dockerfile:
+dockerfile_:
 	snakemake --containerize --snakefile workflow/Snakefile > Dockerfile
diff --git a/config/config.yaml b/config/config.yaml
index 0ab5744..ab8c666 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -15,6 +15,54 @@ workspace_path: "/path/to/the/workspace"
 sumstat:
   pvalcol: "LOG10P"
   pthr: 1.7e-11
+  annotate: False
+
+
+# Genotype position definition
+genodata:
+  json: "data/genetic_data.json"
+  name: "INTERVAL"
+
+
+# Phenotype file used in the GWAS
+# -------------------------------
+# tab separated.
+# First two colums should be FID and IID
+pheno_file: "data/INTERVAL_NonImp_residuals_final.txt"
+run_list: "data/pheno_to_run.csv"
+sample_file: "data/samplelist.csv"
+
+# Clumping
+# --------
+# NB: logp1 and logp2 will only work with plink2
+clumping:
+  # logp1: 10.769551078621726
+  logp1: 7.3
+  logp2: 1.3010299956639813
+  r2: 0.1
+  kb: 10000
+  p1: 1.7e-11
+  p2: 0.05
+  totsize: 1e6
+
+
+# SusieR parameters
+# -----------------
+susieR:
+  # The following parameter will enable the use of correlation matrix based
+  # on LD as specified in [https://stephenslab.github.io/susieR/articles/finemapping_summary_statistics.html](https://stephenslab.github.io/susieR/articles/finemapping_summary_statistics.html)
+  # If set to False (default), it will use the genotypes coded with additive model
+  # together with the phenotype to evaluate the RSS model.
+  use_ld: False
+  # When using this pipeline on CHRIS samples, the IDs
+  # have leading zeros, and will have a total length of 10 characters.
+  # Thus within the `scripts/finemapping.R` will do a conversion
+  # with for zero padding of the IDs to match the ones in the genotypes.
+  # Set this value to `False` for remove 0 padding to 10 character.
+  chris_id: False
+  min_abs_corr: 0.1
+  iter: 1000
+
 
 params:
   harmonize_sumstats:
diff --git a/environment.yml b/environment.yml
index 83f0185..1205b1b 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,4 +10,3 @@ dependencies:
   - snakemake-storage-plugin-s3
   - mamba
   - pip
-  - apptainer
diff --git a/slurm/config.yaml b/slurm/config.yaml
index f408086..1bdedd6 100644
--- a/slurm/config.yaml
+++ b/slurm/config.yaml
@@ -39,3 +39,17 @@ set-resources:
         mem_mb: 14336 + attempt * 2048
     save_min_pvalue:
         mem_mb: 2048 + attempt * 2048
+    sumstat_2_plink:
+        mem_mb: 12288 + attempt * 2048
+    cut_pheno:
+        mem_mb: 12288 + attempt * 2048
+    clumping:
+        mem_mb: 16384 + attempt * 2048
+    enlarge_and_merge:
+        mem_mb: 14336 + attempt * 2048
+    run_susieR:
+      mem_mb: 14336 + attempt * 2048
+    collect_by_pheno:
+      mem_mb: 12288 + attempt * 2048
+    collect_all:
+      mem_mb: 12288 + attempt * 2048
diff --git a/submit.sbatch b/submit.sbatch
index 96eccda..7d4e0d9 100644
--- a/submit.sbatch
+++ b/submit.sbatch
@@ -11,7 +11,6 @@
 source ~/.bashrc
 module -s load singularity/3.8.5
 
-
 # set some singularity directories depending on frontend/computing node/vm
 case $(hostname) in
   hnode*)
@@ -36,5 +35,5 @@ if [ ! -d "pqtl_pipeline" ]; then
   git clone --recurse-submodules https://github.com/ht-diva/pqtl_pipeline.git
 fi
 cd pqtl_pipeline
-conda activate snakemake
+conda activate /group/diangelantonio/software/envs/snakemake
 make run
diff --git a/workflow/Snakefile b/workflow/Snakefile
index b165be1..01e97e9 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -14,9 +14,9 @@ include: "rules/common.smk"
 include: "rules/metal.smk"
 include: "rules/ldsc.smk"
 include: "rules/tiledb.smk"
+include: "rules/finemapping.smk"
 
 
-# include: "rules/finemapping.smk"
 # include: "rules/move.smk"
 
 
diff --git a/workflow/envs/plink-pandas.yml b/workflow/envs/plink-pandas.yml
new file mode 100644
index 0000000..8a46920
--- /dev/null
+++ b/workflow/envs/plink-pandas.yml
@@ -0,0 +1,9 @@
+name: plink-pandas
+dependencies:
+  - bioconda::plink=1.90*
+  - bioconda::tabix=1.11
+  - python=3.11.*
+  - numpy
+  - scipy
+  - pandas
+  - pip
diff --git a/workflow/envs/plink2.yml b/workflow/envs/plink2.yml
new file mode 100644
index 0000000..1d128cb
--- /dev/null
+++ b/workflow/envs/plink2.yml
@@ -0,0 +1,5 @@
+name: plink2
+channels:
+  - defaults
+dependencies:
+  - bioconda::plink2==2.00a5
diff --git a/workflow/envs/susier.yml b/workflow/envs/susier.yml
new file mode 100644
index 0000000..220b889
--- /dev/null
+++ b/workflow/envs/susier.yml
@@ -0,0 +1,20 @@
+name: susier
+dependencies:
+  - r-base==4.3.0
+  - r-susier==0.12.35
+  - r-tidyverse==2.0.0
+  - r-dplyr==1.1.2
+  - r-stringr==1.5.0
+  - r-purrr==1.0.1
+  - r-glue==1.6.2
+  - r-ggplot2==3.4.2
+  - r-ggpubr==0.6.0
+  - r-data.table==1.14.8
+  - r-rcpp==1.0.*
+  - r-rcppeigen==0.3.3.*
+  - r-markdown==1.10
+  - r-rlog==0.1.0
+  - numpy==1.24.3
+  - scipy==1.9.3
+  - pandas==1.5.3
+  - pip
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 37fd6d0..8e55f01 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -1,14 +1,27 @@
 from pathlib import Path
 import pandas as pd
+import json
 
+# Store config variables for ease access
+pvalcol = config["sumstat"]["pvalcol"]
+pthr = config["sumstat"]["pthr"]
+run_list = pd.read_csv(config["run_list"], header=0, sep="\t")
 
+# Load genetic data information
+genotype = config["genodata"]["name"]
+with open(config["genodata"]["json"], "r") as f:
+    gd = json.load(f)
+gt = gd[genotype]
+
+# Define input for the rules
 data = []
 with open(config["sumstats_path"], "r") as fp:
     lines = fp.readlines()
 
 for line in lines:
     p = Path(line.strip())
-    data.append((p.stem, str(p)))
+    seqid = ".".join(p.stem.split(".")[:3])
+    data.append((seqid, str(p)))
 
 analytes = (
     pd.DataFrame.from_records(data, columns=["seqid", "sumstat_path"])
@@ -17,6 +30,16 @@ analytes = (
 )
 
 
+def get_pfile_from_chrom(wildcards):
+    pfiles = gt["plinkfiles"]
+    nfiles = gt["nfiles"]
+    if nfiles == 1:
+        ff = pfiles
+    else:
+        ff = [p for p in pfiles if p.find(f"dedup_{wildcards.chrom}_") > 0]
+    return ff[0]
+
+
 def get_sumstats(wildcards):
     return analytes.loc[wildcards.seqid, "sumstat_path"]
 
@@ -74,4 +97,7 @@ def get_final_output():
             )
         )
 
+    if config.get("run").get("finemapping"):
+        final_output.append(ws_path("all_phenos_summary.cs"))
+
     return final_output
diff --git a/workflow/rules/finemapping.smk b/workflow/rules/finemapping.smk
index fdf6f0e..701cca6 100644
--- a/workflow/rules/finemapping.smk
+++ b/workflow/rules/finemapping.smk
@@ -1,10 +1,6 @@
-# Credits to @filosi
+# Credits to @filosi, adapted from
 # https://github.com/EuracBiomedicalResearch/finemap_pipeline
 
-# Store config variables for ease access
-pvalcol = config["sumstat"]["pvalcol"]
-pthr = config["sumstat"]["pthr"]
-
 
 # Divide sumstat by chromosome
 rule sumstat_2_plink:
@@ -15,7 +11,7 @@ rule sumstat_2_plink:
     output:
         temp(ws_path("fm/{seqid}/tmp/chr{chrom}_sumstat.csv")),
     resources:
-        runtime=lambda wc, attempt: attempt * 100,
+        runtime=lambda wc, attempt: attempt * 60,
     params:
         pval_thr=pthr,
         pval_col=pvalcol,
@@ -31,8 +27,137 @@ rule cut_pheno:
     input:
         phenofile=config["pheno_file"],
     output:
-        temp(ws_path("{pheno}/tmp/phenotype.csv")),
+        temp(ws_path("fm/{seqid}/tmp/phenotype.csv")),
+    resources:
+        runtime=lambda wc, attempt: attempt * 60,
     conda:
         "../envs/plink-pandas.yml"
     script:
         "../scripts/separate_pheno.py"
+
+
+rule clumping:
+    message:
+        "Run clumping"
+    input:
+        smstat=ws_path("fm/{seqid}/tmp/chr{chrom}_sumstat.csv"),
+    output:
+        ws_path("fm/{seqid}/clumping/chr{chrom}.clumps"),
+        ws_path("fm/{seqid}/clumping/chr{chrom}.log"),
+    params:
+        infile=get_pfile_from_chrom,
+        ofile=lambda wildcards, output: output[0].replace(".clumps", ""),
+        clump_logp1=config["clumping"]["logp1"],
+        clump_logp2=config["clumping"]["logp2"],
+        clump_r2=config["clumping"]["r2"],
+        clump_kb=config["clumping"]["kb"],
+        sampfile=config["sample_file"],
+    resources:
+        runtime=lambda wc, attempt: attempt * 100,
+    conda:
+        "../envs/plink2.yml"
+    shell:
+        """
+    if [ -s {input} ]
+    then
+      plink2 --pfile {params.infile} --clump {input.smstat} \
+      --clump-log10 'input-only' --clump-field {pvalcol} \
+      --clump-log10-p1 {params.clump_logp1} --clump-log10-p2 {params.clump_logp2} \
+      --clump-r2 {params.clump_r2} --clump-kb {params.clump_kb} \
+      --clump-snp-field ID  --out {params.ofile} \
+      --memory {resources.mem_mb} \
+      --keep {params.sampfile}
+    else
+      touch {output[0]}
+      touch {output[1]}
+    fi
+    """
+
+
+rule enlarge_and_merge:
+    message:
+        "Merge overlapping clumps after enlarging to at least 1Mb"
+    input:
+        rules.clumping.output[0],
+    output:
+        ws_path("fm/{seqid}/ld/chr{chrom}.ld"),
+    params:
+        plinkfile=get_pfile_from_chrom,
+        totsize=config["clumping"]["totsize"],
+    conda:
+        "../envs/plink-pandas.yml"
+    resources:
+        runtime=lambda wc, attempt: attempt * 100,
+    script:
+        "../scripts/run_susieR.py"
+
+
+rule run_susieR:
+    input:
+        ldfile=rules.enlarge_and_merge.output,
+        smstat=ws_path("fm/{seqid}/tmp/chr{chrom}_sumstat.csv"),
+        phenofile=rules.cut_pheno.output,
+    output:
+        cs_smstat=ws_path("fm/{seqid}/cs/cs_chr{chrom}.cssmstat"),
+        cs_report=ws_path("fm/{seqid}/cs/cs_chr{chrom}.csreport"),
+        cs_rds=ws_path("fm/{seqid}/cs/cs_chr{chrom}_fit.rds"),  #,
+    params:
+        use_ld=config["susieR"]["use_ld"],
+        chris_id=config["susieR"]["chris_id"],
+        iter=config["susieR"]["iter"],
+        min_abs_corr=config["susieR"]["min_abs_corr"],
+    resources:
+        runtime=lambda wc, attempt: attempt * 100,
+    conda:
+        "../envs/susier.yml"
+    script:
+        "../scripts/finemapping.R"
+
+
+rule collect_by_pheno:
+    input:
+        expand(
+            ws_path("fm/{seqid}/cs/cs_chr{chrom}.cssmstat"),
+            chrom=lookup(query="pheno == '{seqid}'", within=run_list, cols="chrom"),
+            seqid=analytes.seqid,
+        ),
+    output:
+        ws_path("fm/{seqid}/summary.cs"),
+    resources:
+        runtime=lambda wc, attempt: attempt * 60,
+    params:
+        bypheno=False,
+    script:
+        "../scripts/aggregate_by_pheno.py"
+
+
+# rule annotate_by_pheno:
+#   input:
+#     rules.collect_by_pheno.output
+#   output:
+#     ws_path("fm/{seqid}/summary_annot.cs")
+#   params:
+#     tophitsdir = config["sumstat"]["tophits_dir"]
+#   resources:
+#     mem_mb = 8000
+#   script:
+#     "../scripts/annotate_cs.py"
+
+
+rule collect_all:
+    input:
+        branch(
+            config["sumstat"]["annotate"] == True,
+            then=expand(ws_path("fm/{seqid}/summary_annot.cs"), seqid=analytes.seqid),
+            otherwise=expand(ws_path("fm/{seqid}/summary.cs"), seqid=analytes.seqid),
+        ),
+    output:
+        ws_path("all_phenos_summary.cs"),
+    resources:
+        runtime=lambda wc, attempt: attempt * 60,
+    params:
+        bypheno=True,
+    conda:
+        "../envs/plink-pandas.yml"
+    script:
+        "../scripts/aggregate_by_pheno.py"
diff --git a/workflow/scripts/aggregate_by_pheno.py b/workflow/scripts/aggregate_by_pheno.py
new file mode 100644
index 0000000..e6f84da
--- /dev/null
+++ b/workflow/scripts/aggregate_by_pheno.py
@@ -0,0 +1,91 @@
+import pandas as pd
+import os
+from pathlib import Path
+
+
+def main(summary_files, outfile, bypheno=True):
+    # Open output file
+    fo = open(outfile, "w")
+
+    # Initialize variable to detect if it's the first time writing
+    # in the output file
+    firstw = True
+    wmode = "w"
+
+    # Cycle over input files
+    for i, ff in enumerate(summary_files):
+        # Exctract phenotype from path
+        absp = Path(ff)
+        pheno = absp.parts[-2]
+        # Handle empty file with exception from pandas
+        try:
+            # Read input file
+            print(f"Computing phenotype: {pheno}")
+            try:
+                sdf = pd.read_csv(ff, sep="\t")
+            except pd.errors.EmptyDataError:
+                sdf = pd.DataFrame()
+
+            # Handling an empty data.frame
+            if sdf.shape[0] > 0:
+                if bypheno:
+                    tmpdf = summarize_df(sdf)
+                    tmpdf["pheno"] = pheno
+                    tmpdf["sumfile"] = ff
+                else:
+                    tmpdf = sdf
+
+                # Write results to output file
+                tmpdf.to_csv(fo, sep="\t", index=False, header=firstw,
+                             mode=wmode)
+
+                # After the first writing set header to false and append mode
+                firstw = False
+                wmode = "a"
+        except pd.errors.EmptyDataError:
+            continue
+
+    # Close file
+    fo.close()
+
+
+def summarize_df(df, cols=[]):
+    """This function handle the summary report for credible sets
+    produced by the runSusieR.R script.
+
+    It extract the variable posterior probability and get the
+    best hit for each credible set.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        contains the summary statistic filtered by credible sets computed
+        with the susieR algorithm
+    cols : list
+        a list of characters containing a subset of the columns in the
+        original sumstat to report in the output
+
+    Returns
+    -------
+    dfres : pandas.DataFrame
+        a summary dataframe. One line for each credible set.
+    """
+    gg = df.groupby(["CHROM", "cs"])
+    idx = gg.agg({"variable_prob": "idxmax", "N": "size"})
+    tmp = df.loc[idx["variable_prob"]]
+    idx = idx.set_index("variable_prob")
+    tmp["N_VARIANTS"] = idx["N"]
+    if cols:
+        try:
+            dfres = tmp[cols]
+        except KeyError:
+            print("Some columns are not in the data.frame")
+            dfres = tmp
+    else:
+        dfres = tmp
+    return dfres
+
+
+if __name__ == "__main__":
+
+    main(snakemake.input, snakemake.output[0], snakemake.params.bypheno)
diff --git a/workflow/scripts/annotate_cs.py b/workflow/scripts/annotate_cs.py
new file mode 100644
index 0000000..7e43e6c
--- /dev/null
+++ b/workflow/scripts/annotate_cs.py
@@ -0,0 +1,32 @@
+import pandas as pd
+import os
+
+
+def main(pheno, tophits_dir, cs_file):
+
+    try:
+        cs = pd.read_csv(cs_file, header=0, delimiter="\t")
+    except pd.errors.EmptyDataError:
+        return pd.DataFrame()
+
+    tpfile = os.path.join(tophits_dir, f"{pheno}.regenie.filtered.gz")
+    tpdf = pd.read_csv(tpfile, header=0, delimiter="\t",
+                       usecols=["ID", "GENE_NAME"])
+    cs.set_index("ID", inplace=True)
+    tpdf.set_index("ID", inplace=True)
+
+    cs_annot = cs.merge(tpdf, how="left", on="ID")
+
+    return cs_annot
+
+
+if __name__ == "__main__":
+
+    cs_annot = main(pheno=snakemake.wildcards.pheno,
+                    tophits_dir=snakemake.params.tophitsdir,
+                    cs_file=snakemake.input[0])
+    if cs_annot.shape[0] > 0:
+        cs_annot.to_csv(snakemake.output[0], index_label="ID", sep="\t")
+    else:
+        f = open(snakemake.output[0], "w")
+        f.close()
diff --git a/workflow/scripts/create_sample_table.py b/workflow/scripts/create_sample_table.py
new file mode 100644
index 0000000..dc49af5
--- /dev/null
+++ b/workflow/scripts/create_sample_table.py
@@ -0,0 +1,22 @@
+from pathlib import Path
+import pandas as pd
+
+data = []
+
+with open('../config/path_list.txt', 'r') as fp:
+    lines = fp.readlines()
+
+for line in lines:
+    p = Path(line.strip())
+    data.append((p.stem, str(p)))
+
+
+samples = (
+    pd.DataFrame.from_records(data, columns=['sample_name', 'sample_path']).set_index("sample_name", drop=False).sort_index()
+)
+
+
+
+# Writing to file
+# with open("myfile.txt", "w") as fp:
+#     fp.writelines(samples)
diff --git a/workflow/scripts/finemapping.R b/workflow/scripts/finemapping.R
new file mode 100644
index 0000000..3281035
--- /dev/null
+++ b/workflow/scripts/finemapping.R
@@ -0,0 +1,234 @@
+#!/shared/bioinf/R/bin/Rscript-4.3-BioC3.17
+
+#---- Libraries ----
+library(dplyr)
+library(stringr)
+library(glue)
+library(susieR, lib.loc = "/home/mfilosi/R/rocker-rstudio/4.3")
+library(Matrix)
+library(rlog)
+# library(Rfast, lib.loc = "/home/mfilosi/R/rocker-rstudio/4.3")
+library(data.table)
+
+#---- Functions ----
+# Not needed any more
+create_ldmat <- function(x, snplist){
+  #---- Create LD matrix for all SNPs
+  #---- Check if it's plink1.9 or plink2 input -----
+  nn <- names(x)
+  if (any(grepl("ID_", nn))){
+    colix <- grep("ID_", nn)
+  } else if (any(grepl("SNP_", nn))){
+    colix <- grep("SNP_", nn)
+  } else {
+    stop("Not conformable ld file")
+  }
+
+  # Get Column for R value
+  rcol <- grep("^(UNPHASED_R|R)$", nn, perl=TRUE)
+
+  # snps <- unique(c(x$ID_A, x$ID_B))
+  snps <- unique(unlist(x[, colix]))
+  snps <- snps[!is.na(match(snps, snplist))]
+  nsnps <- length(snps)
+  snps <- data.frame(ID=snps) %>% mutate(i=1:n())
+
+  j1 <- j2 <- c("ID")
+  names(j1) <- nn[colix[1]]
+  names(j2) <- nn[colix[2]]
+
+  ldmat2 <- x %>%
+    left_join(snps, by=j1) %>%
+    left_join(snps, by=j2)
+
+  cormat <- matrix(0, nrow=nsnps, ncol=nsnps)
+  for (i in 1:nrow(ldmat2)){
+    ix <- ldmat2[i, "i.x"]
+    iy <- ldmat2[i, "i.y"]
+    cormat[ix, iy] <- cormat[iy, ix] <- ldmat2[i, rcol]
+  }
+  diag(cormat) <- 1
+
+  dimnames(cormat) <- list(snps$ID, snps$ID)
+  # attr(cormat, "d") <- rep(nsnps, 2)
+
+  return(cormat)
+}
+
+err_handling <- function(e){
+  rlog::log_error(glue("{e}"))
+  rlog::log_warn(glue("Creating empty output for clump: {clid}, chrom: {mychrom}"))
+  # Return an empty list and set class to "susie"
+  aa <- list()
+  aa$sets <- list()
+  class(aa) <- "susie"
+  return(aa)
+  }
+
+#---- Setup ----
+# cwd <- snakemake@params["cwd"]
+ld_file <- snakemake@input[["ldfile"]]
+sumstat_file <- snakemake@input[["smstat"]]
+pheno_file <- snakemake@input[["phenofile"]]
+
+# Load parameters for data handling
+compute_ld <- snakemake@params[["use_ld"]]
+CHRIS <- snakemake@params[["chris_id"]]
+
+# Load parameters for susieR model
+susie_min_abs_cor <- snakemake@params[["min_abs_corr"]]
+susie_iter <- snakemake@params[["iter"]]
+
+cs_smstat_file <- snakemake@output[["cs_smstat"]]
+cs_report_file <- snakemake@output[["cs_report"]]
+cs_rds_file <- snakemake@output[["cs_rds"]]
+cs_log <- snakemake@output[["cs_log"]]
+
+# RUN ONLY LOCALLY
+# proj_path <- "/scratch/mfilosi/test_gwas_regenie/metaboGWAS"
+# ld_file <- glue(proj_path, "/ld/meta_6/chr4.ld")
+# geno_file <- glue(proj_path, "/ld/meta_6/chr4.ld_0.raw")
+# snp_file <- glue(proj_path, "/ld/meta_6/chr4.ld_0.snplist")
+# clump_file <- glue(proj_path, "/clumping/meta_6/chr4.clumps")
+# finemap_file <- glue(proj_path, "/results/meta_6.finemapped")
+# sumstat_file <- glue(proj_path, "/tmp-smstat/meta_6/chr4_sumstat.csv")
+# pheno_file <- glue(proj_path, "/tmp-smstat/meta_6/phenotype.csv")
+
+rlog::log_info(glue("Running finemapping on file: {ld_file}"))
+
+ld_file_size <- file.size(ld_file)
+if (ld_file_size > 0) {
+  rlog::log_debug("Start preprocessing for susieR")
+  geno_info <- read.table(ld_file, header = TRUE, sep = "\t")
+
+  #---- Read phenotype file ----
+  rlog::log_debug(glue("Reading phenotype file: {pheno_file}"))
+  pheno <- read.table(pheno_file, header = TRUE, sep = "\t")
+  y <- pheno["Y"]
+  sid <- pheno["IID"]
+  if (CHRIS) {
+    rlog::log_debug("Padding IDs...")
+    sid <- sid %>%
+      mutate(IID = str_pad(IID, side = "left", width = 10, pad = 0))
+  }
+
+  # Remove NA from sample list
+  nonasamp <- which(!is.na(y))
+  sid <- sid[nonasamp, ]
+  y <- y[nonasamp, ]
+
+  if (length(nonasamp) == 0){
+    rlog::log_error("No samples left for analysis...")
+    rlog::log_error("Please check the overlap between sample ID in
+    genotype files and phenotype file.")
+    quit(save="no", status=1)
+  } else {
+    #---- read summary stat ----
+    smstat <- fread(sumstat_file, header = TRUE, sep = "\t")
+
+    #---- Prepare the result list ----
+    cs_smstat_l <- list()
+    cs_report_l <- list()
+    cs_rssfit_l <- list()
+
+    for (i in 1:nrow(geno_info)){
+      #---- Get clump id and chromosome ----
+      clid <- geno_info[i, "CLUMPID"]
+      mychrom <- geno_info[i, "CHROM"]
+
+      #---- Logging ----
+      rlog::log_info(glue("Processing clump id {clid} on chrom: {mychrom}"))
+
+      #---- Read clump genotype file ----
+      geno_file <- geno_info[i, "GENOFILE"]
+      # geno_file <- file.path(proj_path, geno_file)
+      genos <- fread(geno_file)
+      snps <- names(genos)[7:ncol(genos)]
+      snps <- gsub("_.+", "", snps)
+      rlog::log_info(glue("Analyzing: {length(snps)} snps!"))
+
+      #---- Subset summary stat ----
+      smstattmp <- smstat[ID %in% snps, ]
+      ix <- match(snps, smstattmp$ID)
+      smstattmp <- smstattmp[ix, ]
+
+      #---- Match phenotype with genotypes ----
+      gsix <- match(genos$IID, sid)
+      genos_nona <- genos[which(!is.na(gsix))]
+      ytmp <- y[gsix[!is.na(gsix)]]
+      sidtmp <- sid[gsix[!is.na(gsix)]]
+      X <- as.matrix(genos_nona[,7:ncol(genos_nona)]) * 1.0
+
+      rlog::log_debug("Compute susieR model...\n")
+      if (compute_ld){
+        betas <- smstattmp$BETA
+        sebetas <- smstattmp$SE
+        n <- min(smstattmp$N, na.rm=TRUE)
+
+        rlog::log_debug("Compute correlation matrix...")
+        R <- cor(X)
+        rlog::log_debug("Run susie model on LD.")
+        rss_ress <- tryCatch(
+          susie_rss(bhat = betas, shat = sebetas, n = n, R = R, max_iter = susie_iter, min_abs_corr = susie_min_abs_cor)
+        , error = err_handling
+          )
+      } else {
+        rlog::log_debug("Run susie model on genotype.")
+        rss_ress <- tryCatch(
+          susie(X, y = ytmp, max_iter = susie_iter,
+                min_abs_corr = susie_min_abs_cor),
+          error = err_handling
+        )
+      }
+
+      cs <- rss_ress$sets
+      if (length(cs$cs) > 0) {
+        rlog::log_info("Found some credible sets!")
+        vars <- summary(rss_ress)
+        allsnps <- vars$vars
+        allsnps <- allsnps %>% mutate(ID = snps[variable])
+
+        smstatcs <- smstattmp %>%
+          left_join(allsnps, by = "ID") %>%
+          filter(cs > 0) %>%
+          mutate(csid = paste(CHROM, cs, sep = "_"))
+        csrep <- vars$cs %>%
+          mutate(CLUMPID = clid,
+                 CHROM = mychrom,
+                 csid = paste(CHROM, cs, sep = "_"))
+
+        cs_smstat_l[[i]] <- data.table(smstatcs)
+        cs_report_l[[i]] <- data.table(csrep)
+        cs_rssfit_l[[i]] <- rss_ress
+      } else {
+        rlog::log_info("No credible set found!")
+      }
+    }
+
+    cs_smstat <- data.table::rbindlist(cs_smstat_l)
+    cs_report <- data.table::rbindlist(cs_report_l)
+
+    #---- Saving output ----
+    rlog::log_info("Saving output...")
+    write.table(cs_report, file = cs_report_file, sep = "\t",
+                col.names = TRUE, row.names = FALSE, quote = FALSE)
+    write.table(cs_smstat, file = cs_smstat_file, sep = "\t",
+                col.names = TRUE, row.names = FALSE, quote = FALSE)
+    saveRDS(cs_rssfit_l, file = cs_rds_file)
+  }
+} else {
+  rlog::log_debug("Touch output files...")
+  cat(file = cs_report_file)
+  cat(file = cs_smstat_file)
+  cat(file = cs_rds_file)
+}
+
+rlog::log_info(cat("\n",
+    "----------", "\n",
+    "Done!!!", "\n",
+    "----------", "\n"))
+
+# susie_plot(rss_res,y = "PIP")
+# susie_plot(zscores,y = "z")
+#
+# lambda <- estimate_s_rss(zscores, R = ldmat, n=n)
diff --git a/workflow/scripts/run_susieR.py b/workflow/scripts/run_susieR.py
new file mode 100644
index 0000000..d0ac57c
--- /dev/null
+++ b/workflow/scripts/run_susieR.py
@@ -0,0 +1,473 @@
+import os
+import csv
+import pandas as pd
+import numpy as np
+import subprocess as sb
+from tempfile import NamedTemporaryFile
+
+
+class Clump(object):
+    def __init__(self, lead="", proxies=[], begin=0, end=0, **kwargs):
+        self.lead = lead
+        self.proxies = proxies
+
+        if begin > end:
+            self.begin = end
+            self.end = begin
+            self.strand = "-"
+        else:
+            self.begin = begin
+            self.end = end
+            self.strand = "+"
+
+        self._span = self.end - self.begin
+
+        try:
+            pval = kwargs["pvalue"]
+        except KeyError:
+            pval = None
+        self.pvalue = pval
+
+    @classmethod
+    def from_clumpfileline(cls, row):
+        lead = row[2]
+        proxies = get_snps_fromclump(row)
+        try:
+            pvalue = float(row[3])
+        except ValueError:
+            pvalue = 1
+
+        return cls(lead=lead, proxies=proxies, pvalue=pvalue)
+
+    @property
+    def pvalue(self):
+        return self._pvalue
+
+    @property
+    def lpvalue(self):
+        return -np.log10(self._pvalue)
+
+    @pvalue.setter
+    def pvalue(self, new):
+        try:
+            if new >= 0 and new <= 1:
+                self._pvalue = new
+            else:
+                raise ValueError
+        except ValueError:
+            print(f"Incorrect range for pvalue {new}")
+
+    @property
+    def span(self):
+        return self._span
+
+
+    def enlarge_clump(self, totsize=1000000):
+
+        if self.span == 0:
+            cllimits = self.get_span_from_clump()
+            clspan = cllimits[1] - cllimits[0]
+        else:
+            cllimits = self.get_interval()
+            clspan = self.span
+
+        if clspan < totsize:
+            newlimits = widen_span(cllimits, tot=totsize)
+            # newspan = np.array(cllimits)
+            # self.span = newspan
+        else:
+            newlimits = cllimits
+        self.set_interval(newlimits)
+
+        return self
+
+    def get_span_from_clump(self):
+        try:
+            pos = [ll.split(":")[1] for ll in self.proxies + [self.lead]
+                   if ll != "."]
+            pos = [int(p) for p in pos]
+            # print(pos)
+            # pos.append(self.begin)
+            # print(pos)
+        except IndexError:
+            # The list of proxies is empty
+            print("No proxies provided...")
+            pos = [0, 0]
+        except ValueError:
+            # The ID are not formatted as expected (chr:pos)
+            print("Cannot retrieve position from proxies")
+            pos = [0, 0]
+
+        return [min(pos), max(pos)]
+
+    def to_list(self):
+        return [self.lead] + self.proxies
+
+    def get_interval(self):
+        return [self.begin, self.end]
+
+    def set_interval(self, new):
+        if isinstance(new, list):
+            tmp = np.array(new)
+            tmp.sort()
+            try:
+                tmp.astype("int64")
+            except ValueError as e:
+                print(f"Please provide a list of numerical values to set \
+                      span: \n {e}")
+
+        if isinstance(new, np.ndarray):
+            tmp = new.copy()
+            tmp.sort()
+
+        if isinstance(new, dict):
+            tmp = self._span
+            try:
+                tmp[0] = np.int64(new['begin'])
+                tmp[1] = np.int64(new['end'])
+            except KeyError:
+                print("Please provide a correctly formed dictionary with \
+                      keys 'begin' and 'end'")
+            except ValueError:
+                print("Some values in the dictionary are not numeric")
+
+        self._span = tmp[-1] - tmp[0]
+        self.begin = tmp[0]
+        self.end = tmp[-1]
+
+    def __add__(self, other):
+        if isinstance(other, Clump):
+            px = self.proxies + other.proxies
+            ix = np.argsort(np.array([self.pvalue, other.pvalue]))
+
+            if ix[0] == 0:
+                ll = self.lead
+                px += [other.lead]
+                pval = self.pvalue
+            else:
+                ll = other.lead
+                px += [self.lead]
+                pval = other.pvalue
+
+            # Initialize new clump
+            res = Clump(lead=ll, proxies=px, pvalue=pval)
+
+            # Update interval
+            spanarra = np.array([self.get_interval(), other.get_interval()])
+            st = spanarra.min(axis=0)[0]
+            en = spanarra.max(axis=0)[-1]
+            res.set_interval([st, en])
+
+            return res
+        else:
+            raise NotImplementedError(f"Cannot add Clump class to \
+                                      type {other.__class__}")
+
+
+def get_snps_fromclump(line):
+    if line[-1] == 'NONE':
+        snps = []
+    else:
+        snps = line[-1].replace('(1)', '')
+        snps = snps.split(',')
+
+    return snps
+
+
+def read_clump_file(file, enlarge=False, merge=False, **kwargs):
+    """
+    Parameters
+    ----------
+    file: str, FileBuffer
+        read a clump file with extension `.clumped` resulted from
+        plink clumping and merging over chromosomes
+    enlarge: bool
+        if set to True try to enlarge the the clumping size to a minimum of
+        `totsize`. Parameter `totsize` can be specified through `kwargs`
+    merge: bool
+        if set to True search in the clumplist for overlapping regions and
+        merge them if it finds any
+
+    kwargs: Additional arguments can be passed to the function. For now only
+    `totsize` a numerical value specifing the minimum span of a clumping region
+    in bp (i.e. 1Mb: 1e6, 1kb: 1000). Default = 1Mb (1e6).
+
+    Returns
+    -------
+    list of Clump instances
+        a list of Clump() instances containing the clumping
+        provided by the plink software
+    """
+    mylist = []
+
+    # Handling empty files and do nothing...
+
+    with open(file, mode='r') as fc:
+        fcr = csv.reader(fc, delimiter='\t', skipinitialspace=True)
+        next(fcr)
+
+        for ll in fcr:
+            if len(ll) > 0:
+                # snps = get_snps_fromclump(ll)
+                # cc = Clump(
+                #     lead=ll[2],
+                #     proxies=snps,
+                #     pvalue=float(ll[3])
+                # )
+                cc = Clump.from_clumpfileline(ll)
+                mylist.append(cc)
+
+    # Enlarge clump to a minimum size of `totsize`
+    # TODO: check if totsize is > 0...and other checks on totsize
+
+    if enlarge:
+        try:
+            totsize = kwargs['totsize']
+        except KeyError:
+            totsize = 1e6
+        # mylist = [enlarge_clump(cl, totsize=totsize) for cl in mylist]
+        mylist = [cl.enlarge_clump(totsize=totsize) for cl in mylist]
+
+    # Merge overlapping clumps
+    if merge:
+        mylist = sort_and_merge(mylist)
+
+    return mylist
+
+
+def compute_ld(snps, clid, plinkfile, dryrun=False, prefix="ld_clump",
+               **kwargs):
+    # Handle kwargs
+    try:
+        mem = kwargs["memory"]
+        mem = int(mem)
+    except KeyError:
+        mem = 8192
+
+    # plinkfile = os.path.join(genopath, f"chr{mychr}.forgwas.nofid")
+
+    # Define file names
+    ofile = f'{prefix}_{clid}'
+    genofile = ofile + ".raw"
+    snpfile = ofile + ".snplist"
+
+    # Write snp list
+    ff = open(snpfile, "w")
+    # with NamedTemporaryFile(mode='w') as ftmp:
+    for s in snps:
+        ff.write(s)
+        ff.write("\n")
+        # ftmp.write(s)
+        # ftmp.write("\n")
+    ff.close()
+
+    # DO NOT COMPUTE LD matrix with plink, sometimes it's not semi-positive
+    # defined and can contain negative eigenvalues.
+    # Thus extract the genotype matrix and compute correlation in python/R
+    # See Issue #91: https://github.com/stephenslab/susieR/issues/91
+    # ldcommand = ['plink', '--bfile', plinkfile, '--keep-allele-order',
+    #                 # '--ld-window-r2', '0.1',
+    #                 '--r', 'square',   '--extract', #    '--ld-snp-list'
+    #                 snpfile, '--out', ofile, '--memory', str(mem)]
+    ldcommand = ['plink', '--bfile', plinkfile, '--keep-allele-order',
+                 '--extract', snpfile,
+                 '--recode', 'A', '--out', ofile, '--memory', str(mem)]
+    print(" ".join(ldcommand))
+
+    if dryrun:
+        print('---- This is a dry run ----')
+        print('---- Running the following command...----')
+        print(' '.join(ldcommand))
+    else:
+        if len(snps) > 1:
+            myproc = sb.run(ldcommand)
+
+            if myproc.returncode > 0:
+                print("OOOpppps some issue with this process")
+
+    return genofile, snpfile
+
+
+def process_ld_file(ldfile):
+    # Read the ld file and adjust column names
+    # df = pd.read_csv(ldfile, sep="\t", header=0)
+    print(ldfile)
+    df = pd.read_csv(ldfile, sep="\s+", header=0)
+    cc = df.columns
+    df.columns = [c.replace("#", "") for c in cc]
+
+    return df
+
+
+def widen_span(span, tot):
+    span2 = np.array(span)
+    span2.sort()
+    spandist = np.diff(span2)[0]
+    totdiff = tot - spandist
+
+    if totdiff > 0:
+        span2[0] -= totdiff / 2.0
+        span2[1] += totdiff / 2.0
+
+    return span2
+
+
+def intersect(cl1, cl2):
+    cl1.sort()
+    cl2.sort()
+
+    # Handle possible cases
+
+    if (cl1[0] > cl2[1]) or (cl1[1] < cl2[0]):
+        # cl1 and cl2 are dop not overlap
+        #  |-------|              cl1
+        #             |-------|   cl2
+        ck = False
+    else:
+        if (cl1[1] <= cl2[1]) and (cl1[1] >= cl2[0]):
+            # cl1 end is within cl2 range
+            #     |-------|        cl1
+            #        |-------|     cl2
+            ck = True
+
+        if (cl1[0] >= cl2[0]) and (cl1[0] < cl2[1]):
+            # cl1 end is within cl2 range
+            #     |-------|        cl1
+            #  |-------|           cl2
+            ck = True
+
+    return ck
+
+
+def main_tmp(clumpfile, gendata, outfile="", **kwargs):
+    # Get info on genetic data
+    # gg = GenDataList.from_json("../../metaboGWAS/genetic_data.json")
+    # gg = GenDataList.from_json(gendata)
+    # ggg = gg.gendata["HRC13K"]
+
+    genopath = {os.path.dirname(f) for f in gendata.get_plinkfiles()}
+    genopath = genopath.pop()
+
+    # Read clump file
+    cldf = read_cl_file(clumpfile)
+    clumplist = read_clump_file(clumpfile)
+    dflist = []
+
+    # Run LD computation for each clumping region
+
+    for clump in cldf.iterrows():
+        ldfile = compute_ld(clump, genopath=genopath,
+                            prefix=outfile, **kwargs)
+        # Get list of betas and sd for betas
+        try:
+            dflist.append(process_ld_file(ldfile, clump[0]))
+        except FileNotFoundError:
+            print(f"Cannot find file {ldfile}")
+    dfall = pd.concat(dflist, ignore_index=True)
+
+    print(f"Export csv file to {outfile}")
+    dfall.to_csv(outfile, sep="\t", index=False, header=True)
+
+
+def sort_and_merge(clumplist):
+    nclumps = len(clumplist)
+    res = np.zeros(nclumps) * np.nan
+    clumplist.sort(key=lambda x: x.begin)
+    newclumplist = []
+    clustid = 0
+    res[0] = clustid
+    clnew = clumplist[0]
+
+    for i in range(1, nclumps):
+        if clumplist[i].begin < clumplist[i - 1].end:
+            # Found an overlap, then merge clumps
+            res[i] = clustid
+            clnew += clumplist[i]
+        else:
+            # Assign merged clumps into the results
+            newclumplist.append(clnew)
+
+            # Set new pointer to new cluster
+            clnew = clumplist[i]
+
+            # Update cluster id
+            clustid += 1
+            res[i] = clustid
+
+    # Add the last merge of the cycle
+    newclumplist.append(clnew)
+
+    return newclumplist
+
+
+def main(clumpfile, plinkfile, chr, outfile="",  **kwargs):
+
+    try:
+        totsize = int(kwargs["totsize"])
+    except KeyError:
+        totsize = 1e6
+    except ValueError as e:
+        print("Invalid totsize value. Set it to default: 1e6", e)
+        totsize = 1e6
+
+    try:
+        # Get clumping file size
+        fs = os.path.getsize(clumpfile)
+    except OSError:
+        # If the file does not exists, then set size to 0
+        fs = 0
+
+    # Prepare the file for output
+    fout = open(outfile, "w")
+    # If file size is not 0...
+    if fs > 0:
+        clumplist = read_clump_file(clumpfile, enlarge=True, merge=True,
+                                    totsize=1e6)
+        fw = csv.writer(fout, delimiter="\t")
+
+        # Output header
+        myheader = ["CHROM", "CLUMPID", "GENOFILE", "SNPLIST"]
+        firstw = True
+        for i, cl in enumerate(clumplist):
+            snps = cl.to_list()
+            genofile, snpfile = compute_ld(snps, clid=i,
+                                        plinkfile=plinkfile,
+                                        prefix=outfile, **kwargs)
+
+            # Check if the written file exists and is not empty
+            try:
+                if os.path.getsize(genofile) > 0:
+                    # Write header if it's the first time writing
+                    if firstw:
+                        fw.writerow(myheader)
+                        firstw = False
+                    fw.writerow([chr, i, genofile, snpfile])
+            except FileNotFoundError:
+                pass
+
+    fout.close()
+
+    return outfile
+
+
+if __name__ == "__main__":
+    clumplist = main(clumpfile=snakemake.input[0],
+                     plinkfile=snakemake.params.plinkfile,
+                     outfile=snakemake.output[0],
+                     chr=snakemake.wildcards.chrom,
+                     memory=snakemake.resources.mem_mb,
+                     totsize=snakemake.params.totsize)
+    # import argparse
+    # parser = argparse.ArgumentParser()
+
+    # parser.add_argument("--clumpfile", default="", required=True)
+    # parser.add_argument("--outfile", default="")  # , required=True)
+    # parser.add_argument("--dryrun", action='store_true')
+    # parser.add_argument("--plinkfile", required=True)
+    # parser.add_argument("--memory", default=8192)
+    # parser.add_argument("--chr", type=int)
+
+    # args = parser.parse_args()
+
+    # clumplist = main(args.clumpfile, plinkfile=args.plinkfile,
+    #                  outfile=args.outfile, chr=args.chr)
diff --git a/workflow/scripts/separate_gwas.py b/workflow/scripts/separate_gwas.py
new file mode 100644
index 0000000..9202fa8
--- /dev/null
+++ b/workflow/scripts/separate_gwas.py
@@ -0,0 +1,76 @@
+import pandas as pd
+import argparse
+import os
+import subprocess as sb
+import gzip
+from tempfile import NamedTemporaryFile
+
+
+def extract_chr_from_smstat(inputfile, chrom=None):
+    # Extract header from sm stat
+    myhead = pd.read_csv(inputfile, sep='\t', header=None, nrows=1)
+    myhead = myhead.iloc[0, :].tolist()
+    with NamedTemporaryFile(mode="w+b") as tmpfile:
+        p1 = sb.run(["tabix", f"{inputfile}", f"{chrom}"], stdout=tmpfile)
+        # p2 = sb.run(["gzip", "-c"], stdin=p1.stdout, stdout=tmpfile)
+        try:
+            p1.check_returncode()
+            df = pd.read_csv(tmpfile.name, sep="\t", header=None)
+            df.columns = myhead
+        except sb.CalledProcessError:
+            df = pd.DataFrame()
+    return df
+
+
+def main(inputfile, outdir, pval_thr=5e-8, pvalcol="LOG10P", pheno="",
+         phenofile="", chrom=None):
+
+    # Columns to write for the sumstat
+    cols_to_write = ["CHROM", "GENPOS", "ID", pvalcol, "BETA", "SE", "N",
+                     "CHISQ", "ALLELE0", "ALLELE1"]
+
+    print(f"Read gwas: {inputfile}")
+    mygwas = extract_chr_from_smstat(inputfile, chrom=chrom)
+
+    # Compute p_values from LOGP
+    print("Compute pvalues")
+    mygwas['P'] = 10 ** -mygwas[pvalcol]
+    ix = mygwas['P'] < pval_thr
+    mygwas_filt = mygwas[ix]
+
+    if mygwas_filt.shape[0] > 0:
+        # Filter out NA values
+        ix_nona = ~mygwas[pvalcol].isna()
+        mygwas.loc[ix_nona, cols_to_write].to_csv(outdir, sep='\t',
+                                                  index=False, header=True,
+                                                  na_rep="NA")
+    else:
+        # Touch file
+        f = open(outdir, "w")
+        f.close()
+
+
+if __name__ == '__main__':
+    main(inputfile=snakemake.input.sumstat,
+         outdir=snakemake.output[0],
+         pval_thr=snakemake.params.pval_thr,
+         pvalcol=snakemake.params.pval_col,
+         pheno=snakemake.wildcards.pheno,
+         chrom=snakemake.wildcards.chrom)
+
+# parser = argparse.ArgumentParser()
+# parser.add_argument('-i', '--infile', default=None, type=str)
+# parser.add_argument('-o', '--outdir', default=".")
+# parser.add_argument('--pcol', default="LOG10P")
+# parser.add_argument('--pthr', default=5e-8, type=float)
+# parser.add_argument('--pheno', default='')
+# args = parser.parse_args()
+
+# if not os.path.exists(args.outdir):
+#     os.makedirs(args.outdir)
+
+# main(inputfile=args.infile,
+#      outdir=args.outdir,
+#      pval_thr=args.pthr,
+#      pvalcol=args.pcol,
+#      pheno=args.pheno)
diff --git a/workflow/scripts/separate_pheno.py b/workflow/scripts/separate_pheno.py
new file mode 100644
index 0000000..87947ab
--- /dev/null
+++ b/workflow/scripts/separate_pheno.py
@@ -0,0 +1,26 @@
+import pandas as pd
+
+
+def main(phenofile, pheno, outfile):
+    print(outfile)
+    phenoval = pd.read_csv(phenofile, sep="\t", header=0,
+                           usecols=["FID", "IID", pheno])
+    phenoval.rename(columns={pheno: "Y"}, inplace=True)
+    phenoval.to_csv(outfile, index=False, sep="\t")
+
+
+if __name__ == "__main__":
+    main(phenofile=snakemake.input.phenofile,
+         pheno=snakemake.wildcards.pheno,
+         outfile=snakemake.output[0])
+    # import argparse
+
+    # parser = argparse.ArgumentParser()
+    # parser.add_argument("--phenofile", required=True)
+    # parser.add_argument("--outfile", required=True)
+    # parser.add_argument("--pheno", required=True)
+
+    # # fake_args = ["--phenofile", snakemake.input.sumstat]
+    # args = parser.parse_args()
+
+    # main(phenofile=args.phenofile, pheno=args.pheno, outfile=args.outfile)