-
Notifications
You must be signed in to change notification settings - Fork 10
/
configuration.yaml
124 lines (116 loc) · 6.98 KB
/
configuration.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# PinAPLPy: Platform-independent Analysis of Pooled Screens using Python
# P. Spahn et al., UC San Diego
# ***********************************************************************
# CONFIGURATION FILE
# ***********************************************************************
# PROJECT PARAMETERS
ScreenType: 'enrichment' # type of screen ['enrichment'/'depletion']
# LIBRARY PARAMETERS
LibFilename: 'GeCKOv21_Human.tsv' # filename of library spreadsheet
seq_5_end: 'TCTTGTGGAAAGGACGAAACACCN' # 5'-sequence adapter
NonTargetPrefix: 'NonTargeting' # prefix for non-targeting sgRNAs in library
NumGuidesPerGene: 6 # number of sgRNAs per gene
# ANALYSIS OPTIONS
GeneMetric: 'SigmaFC' # metric for gene ranking ['SigmaFC'/'AvgLogFC'/'aRRA'/'STARS']
padj: 'sidak' # method for p-value adjustment ['fdr_bh'/'sidak'/'bonferroni']
alpha_s: 0.01 # significance threshold (sgRNAs enrichment/depletion)
alpha_g: 0.01 # significance threshold (gene ranking)
Normalization: 'total' # normalization of read counts ['cpm'/'size'/'total']
ClusterBy: 'variance' # clustering criterion ['variance'/'counts']
RoundCount: False # round counts after normalization [True/False]
repl_avg: 'median' # average counts between replicates ['mean'/'median']
sgRNALength: 20 # sgRNA length [bp]
AS_min: 40 # alignment matching threshold
Theta: 2 # alignment ambiguity threshold
Np: 10000 # number of permutations for gene ranking p-values
w_SigmaFC: [0,1,2,3,4,5,6] # weighing function for SigmaFC score
thr_STARS: 10 # threshold percentage for STARS analysis
TopN: 25 # number of top sgRNAs to take into account for clustering
ShowNonTargets: True # show non-targeting controls in scatterplots [True/False]
scatter_annotate: False # annotate highlighted sgRNAs in scatterplots [True/False]
IncludeDensityPlots: True # include sgRNA density plots [True/False]
AutoHighlight: True # automatically highlight the top 10 ranked genes [True/False]
# TECHNICAL PARAMETERS (changes not required/recommended)
Cutoff: 0 # read count threshold
CutErrorTol: 0.1 # cutadapt error tolerance
L_bw: 11 # Bowtie2 -L parameter (seed length)
N_bw: 1 # Bowtie2 -N parameter (seed mismatch)
i_bw: 'S,1,0.75' # Bowtie2 -i parameter (interval function)
delta: 1 # count shift to avoid zero counts in logFC
p_overdisp: 0.05 # significance threshold for overdispersion test
dotsize: 10 # size of dots in scatterplot
TransparencyLevel: 0.05 # transparency (alpha) in scatterplots
logbase: 10 # base for log transformation in scatterplots
AlnOutput: 'Delete' # keep raw alignment output? ['Keep'/'Compress'/'Delete']
keepCutReads: False # keep files containing trimmed reads? [True/False]
dpi: 300 # resolution of PNG plots
svg: True # save figures as vector graphics? [True/False]
HitListFormat: 'xlsx' # format of results spreadsheets ['txt'/'xlsx']
PrintHighlights: True # print highlighted sgRNA info in console [True/False]
# HARD-CODED PARAMETERS (not available on website)
FCmin_SigmaFC: 0 # minimal FC required for SigmaFC score (not compatible with depletion screen)
IncludeGeneRankCombination: False # Fishers statistic for p-value combination
width_p: 800 # width of heatmap image (pixels)
height_p: 800 # height of heatmap image (pixels)
fontsize_p: 14 # fontsize in heatmap image
marginsize: 10 # size of margin in heatmap image
# DIRECTORIES
WorkingDir: '/workingdir/'
DataDir: '/workingdir/Data/'
TempDataDir: '/workingdir/TempData/'
LibDir: '/workingdir/Library/'
IndexDir: '/workingdir/Library/Bowtie2_Index/'
ScriptsDir: '/opt/PinAPL-Py/Scripts/'
AlignDir: '/workingdir/Alignments/'
AnalysisDir: '/workingdir/Analysis/'
TrimLogDir: '/workingdir/Analysis/01_Alignment_Results/Read_Trimming/'
sgRNARanksDir: '/workingdir/Analysis/02_sgRNA-Ranking_Results/sgRNA_Ranking/'
GeneDir: '/workingdir/Analysis/03_GeneRanking_Results/Gene_Ranking/'
ControlDir: '/workingdir/Analysis/02_sgRNA-Ranking_Results/ControlSample_Analysis/'
HeatDir: '/workingdir/Analysis/02_sgRNA-Ranking_Results/Heatmap/'
AlnQCDir: '/workingdir/Analysis/01_Alignment_Results/Alignment_Statistics/'
sgRNAReadCountDir: '/workingdir/Analysis/01_Alignment_Results/ReadCounts_per_sgRNA/'
GeneReadCountDir: '/workingdir/Analysis/01_Alignment_Results/ReadCounts_per_Gene/'
CountQCDir: '/workingdir/Analysis/02_sgRNA-Ranking_Results/ReadCount_Distribution/'
ScatterDir: '/workingdir/Analysis/02_sgRNA-Ranking_Results/sgRNA_Scatterplots/'
DensityDir: '/workingdir/Analysis/02_sgRNA-Ranking_Results/sgRNA_Densities/'
GenePlotDir: '/workingdir/Analysis/03_GeneRanking_Results/GeneScore_Scatterplots/'
CorrelDir: '/workingdir/Analysis/02_sgRNA-Ranking_Results/Replicate_Correlation/'
HiLiteDir2: '/workingdir/Analysis/02_sgRNA-Ranking_Results/Replicate_Correlation/Highlighted_Genes/'
EffDir: '/workingdir/Analysis/03_GeneRanking_Results/sgRNA_Efficacy/'
DepthDir: '/workingdir/Analysis/01_Alignment_Results/Read_Depth/'
SeqQCDir: '/workingdir/Analysis/01_Alignment_Results/Sequence_Quality/'
pvalDir_sgRNA: '/workingdir/Analysis/02_sgRNA-Ranking_Results/p-value_Distribution/'
pvalDir_genes: '/workingdir/Analysis/03_GeneRanking_Results/p-value_Distribution/'
zScoreDir_sgRNA: '/workingdir/Analysis/02_sgRNA-Ranking_Results/sgRNA_z-Scores/'
VolcanoDir_sgRNA: '/workingdir/Analysis/02_sgRNA-Ranking_Results/sgRNA_VolcanoPlots/'
LogFileDir: '/workingdir/Analysis/00_LogFile/'
bw2Dir: '/usr/bin/'
CutAdaptDir: '/root/.local/bin/'
STARSDir: '/opt/PinAPL-Py/Scripts/STARS_mod/'
# SCRIPT FILENAMES
SanityScript: 'CheckCharacters'
IndexScript: 'BuildLibraryIndex'
LoaderScript: 'LoadDataSheet'
ReadDepthScript: 'PlotNumReads'
SeqQCScript: 'CheckSequenceQuality'
TrimScript: 'TrimReads'
AlignScript: 'AlignReads'
ClassifyScript: 'GetReadCounts'
CutoffScript: 'ApplyCutoff'
CleanUpScript: 'RemoveTempOutput'
NormalizeScript: 'NormalizeReadCounts'
AverageCountsScript: 'AverageCounts'
StatsScript: 'AnalyzeReadCounts'
ControlScript: 'AnalyzeControl'
sgRNARankScript: 'RanksgRNAs'
zFCScript: 'PlotFCz'
vFCScript: 'PlotFCvolcano'
GeneRankScript: 'RankGenes'
GenePlotScript: 'PlotGeneScores'
CombineScript: 'CombineGeneRanks'
ScatterScript: 'PlotCounts'
DensityScript: 'PlotDensity'
ReplicateScript: 'PlotReplicates'
ClusterScript: 'PlotHeatmap'
ExtractTop10Script: 'ExtractTop10Genes'