config.yml

# snakeflow config file.
# 2017-03-28

# dir contains all indexes, genome sequences
genome: /home/fangzq/genome"
# Path to an uncompressed FASTA file with all choromsome genome sequences.
# we use gencode archive, downlaod from here:
# http://www.gencodegenes.org/releases/current.html
dna: "/home/fangzq/genome/GRCh38.p13.genome.fa"

# Path to an uncompressed FASTA file with all transcript sequences.
# we use gencode archive, downlaod from here:
# http://www.gencodegenes.org/releases/current.html
cdna: "/home/fangzq/genome/GRCh38.gencode.v33.transcripts.fa"

# GTF file path
# we use gencode archive, downlaod from here:
# http://www.gencodegenes.org/releases/current.html
gtf: "/home/fangzq/genome/GRCh38.gencode.v33.annotation.gtf"

# RSeQC bed file
# this two files are optional, if you do not need to run RSeQC.
# Download from https://sourceforge.net/projects/rseqc/files/BED/Human_Homo_sapiens/
rseqc:
    refseq: "/home/fangzq/genome/rseqc/hg38.refseq.bed"
    housekeep: "/home/fangzq/genome/rseqc/hg38.HouseKeepingGenes.bed"

#trimmonatic adaptors
# adaptors:
#     illumina: "/home/fangzq/github/snakeflow/adaptors/TruSeq3-PE.fa"

# Index dir
hisat2_index: "/home/fangzq/genome/hisat2Indices_hg38"
salmon_index: "/home/fangzq/genome/salmonIndices_hg38"

# Index basebame (only for hisat2)
index_prefix: "hg38"

# Full path to a folder where output files will be created.
workdir: "/home/fangzq/projects/bulk"

# extra scripts' dir for running. e.g preDEseq.py (StringTie)
# scripts: "/home/fangzq/github/snakeflow/scripts"

# Full path to a folder that holds all of your FASTQ files
fastq_dir: "/data/bases/shared/bulk_RNA_seq/30_225476525_trim"

# Sequencing read length, only reqired for running rMATS or preDEseq.py.
read_length: 150

# Paired end sequencing library? True or False.
paired: True

# Stranded library ? True or False
stranded: False

# information in `samples` is used for deseq2 and rMATS
# for trimed fastq, use suffix like: _trimmed.fq.gz
read_pattern:
    r1: "{sample}_R1_001.fastq.gz" # don't change {sample}
    r2: "{sample}_R2_001.fastq.gz" # don't change {sample}

# sample metadata
sample_meta: "/home/fangzq/projects/bulk/group.txt"
# ``dataframe``attribute works only if a file is given.
# each column names correspond to the samples' attributes of above.
    # a sampleTable.txt look like this.
      ### name  alias  conditon  treatment
      ## WGC096874R S74    Normal 0
      ## WGC096875R S75    Cancer 0
      ## WGC096876R S76    Normal 0
      ## WGC096877R S77    Cancer 0

# RNA Binding Protein list
rbps: "/home/fangzq/github/snakeflow/221RBPs.csv"
# DESeq2 cutoff
log2fc: 1
fdr: 0.01
enrichr_library: ['GO_Biological_Process_2018','GO_Cellular_Component_2018','GO_Molecular_Function_2018',
                  'Human_Phenotype_Ontology', 'MSigDB_Oncogenic_Signatures',
                   'KEGG_2016', 'KEGG_2019_Human'] # KEGG_2019_Mouse