forked from zqfang/snakeflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yml
81 lines (65 loc) · 2.83 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# snakeflow config file.
# 2017-03-28
# dir contains all indexes, genome sequences
genome: /home/fangzq/genome"
# Path to an uncompressed FASTA file with all choromsome genome sequences.
# we use gencode archive, downlaod from here:
# http://www.gencodegenes.org/releases/current.html
dna: "/home/fangzq/genome/GRCh38.p13.genome.fa"
# Path to an uncompressed FASTA file with all transcript sequences.
# we use gencode archive, downlaod from here:
# http://www.gencodegenes.org/releases/current.html
cdna: "/home/fangzq/genome/GRCh38.gencode.v33.transcripts.fa"
# GTF file path
# we use gencode archive, downlaod from here:
# http://www.gencodegenes.org/releases/current.html
gtf: "/home/fangzq/genome/GRCh38.gencode.v33.annotation.gtf"
# RSeQC bed file
# this two files are optional, if you do not need to run RSeQC.
# Download from https://sourceforge.net/projects/rseqc/files/BED/Human_Homo_sapiens/
rseqc:
refseq: "/home/fangzq/genome/rseqc/hg38.refseq.bed"
housekeep: "/home/fangzq/genome/rseqc/hg38.HouseKeepingGenes.bed"
#trimmonatic adaptors
# adaptors:
# illumina: "/home/fangzq/github/snakeflow/adaptors/TruSeq3-PE.fa"
# Index dir
hisat2_index: "/home/fangzq/genome/hisat2Indices_hg38"
salmon_index: "/home/fangzq/genome/salmonIndices_hg38"
# Index basebame (only for hisat2)
index_prefix: "hg38"
# Full path to a folder where output files will be created.
workdir: "/home/fangzq/projects/bulk"
# extra scripts' dir for running. e.g preDEseq.py (StringTie)
# scripts: "/home/fangzq/github/snakeflow/scripts"
# Full path to a folder that holds all of your FASTQ files
fastq_dir: "/data/bases/shared/bulk_RNA_seq/30_225476525_trim"
# Sequencing read length, only reqired for running rMATS or preDEseq.py.
read_length: 150
# Paired end sequencing library? True or False.
paired: True
# Stranded library ? True or False
stranded: False
# information in `samples` is used for deseq2 and rMATS
# for trimed fastq, use suffix like: _trimmed.fq.gz
read_pattern:
r1: "{sample}_R1_001.fastq.gz" # don't change {sample}
r2: "{sample}_R2_001.fastq.gz" # don't change {sample}
# sample metadata
sample_meta: "/home/fangzq/projects/bulk/group.txt"
# ``dataframe``attribute works only if a file is given.
# each column names correspond to the samples' attributes of above.
# a sampleTable.txt look like this.
### name alias conditon treatment
## WGC096874R S74 Normal 0
## WGC096875R S75 Cancer 0
## WGC096876R S76 Normal 0
## WGC096877R S77 Cancer 0
# RNA Binding Protein list
rbps: "/home/fangzq/github/snakeflow/221RBPs.csv"
# DESeq2 cutoff
log2fc: 1
fdr: 0.01
enrichr_library: ['GO_Biological_Process_2018','GO_Cellular_Component_2018','GO_Molecular_Function_2018',
'Human_Phenotype_Ontology', 'MSigDB_Oncogenic_Signatures',
'KEGG_2016', 'KEGG_2019_Human'] # KEGG_2019_Mouse