-
Notifications
You must be signed in to change notification settings - Fork 2
/
ChIPseq_Alignment_Pipeline_Summary.txt
64 lines (36 loc) · 3.99 KB
/
ChIPseq_Alignment_Pipeline_Summary.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
###########################################################################################################
######Read alignment/processing for spike-in calibrated ChIP-seq #########################################
###########################################################################################################
######Check the quality of fastq files with FASTQC --------------------------------------------------------
fastqc *fastq.gz
multiqc . -ip
######Align ChIP-seq reads to the concatenated genome (mm10+dm6 for NChIP-seq or mm10+hg19 for XChIP-seq)--
######To make concatenated genomes, chromosome names were changed to mm10_chr*, dm6_chr*, hg19_chr*--------
######to extract reads mapping to individual genomes after alignment---------------------------------------
######The example analysis shown below is for NChIP-seq
nohup ChIPseq_PairedEnd_Bowtie2_SpikeIn_AlignmentFromList.pl -file align.files.txt -genome mm10 -spikein dm6 &
### align.files.txt has three tab-separated columns: Read1.fastq Read2.fastq filename
###### Each column for Read1/2 can have more than one fastq.gz file, for example, when aligning reads from the NextSeq 500/550 Illumina sequencer that outputs reads for each lane separately
######Calculate factors for spike-in normalisation --------------------------------------------------------
######For how to calculate factors for spike-in normalisation please see Fursova et al., 2019 (PMID: 31029541) and Hu et al., 2015 (PMID: 26130708).
######Randomly subsample mm10 ChIP-seq bam files for individual replicates to reflect the total number of dm6 reads in each sample, correcting for the actual spike-in cell ratio (dm6/mm10 total read counts in corresponding Input samples)
######Note: subsampling is done to generate bam files for the purposes of data visualisation (metaplots, UCSC genome browser) and annotation of genomic regions with read counts
ChIPseq_RNAseq_Downsample_BamFile_FromList.pl -file mm10.downsample.txt
### mm10.downsample.txt has two tab-separated columns: file.bam subsampling_fraction (< 1)
######Check correlation between individual biological replicates ----------------------------------------------
multiBamSummary BED-file --BED RegionsOfInterest.bed --bamfiles [Rep1.bam Rep2.bam ...] -o results.npz
plotCorrelation --corData results.npz --corMethod pearson -p heatmap --removeOutliers --plotNumbers -o correlation_output.pdf --outFileCorMatrix correlation_output_matrix.txt
###Regions of interest can be ChIP-seq peaks, gene promoters, etc
######If individual replicates are highly correlated (Pearson cor > 0.9), merge them for the downstream analysis and visualisation
######Merge individual replicates ---------------------------------------------------------------------------------
sambamba merge SampleName_mm10_UniqMapped_sorted_rmdup_downsampled_MERGED.bam SampleName_rep1_mm10_UniqMapped_sorted_rmdup_downsampled.bam SampleName_rep2_mm10_UniqMapped_sorted_rmdup_downsampled.bam SampleName_rep3_mm10_UniqMapped_sorted_rmdup_downsampled.bam
#######Generate bigWig files for data visualisation (metaplots/heatmaps or UCSC Genome browser) --------------------
for file in *mm10*downsampled_MERGED.bam
do
ChIPseq_bam2bigwig_MACS2.pl -bam $file -readtype paired -genome mm10 -name $(echo $file | rev | cut -f 2- -d '.' | rev).MACS2
done
######Count reads in regions of interest (peaks, gene promoters or bodies) ------------------------------------------
######For annotation of genomic regions with read counts, merged bam files of downsampled biological replicates were used
multiBamSummary BED-file --BED peaks.bed --bamfiles file1.bam file2.bam -o peaks.counts.results.npz --outRawCounts peaks.counts.results.txt -l file1 file2
#######To load the output of multiBamSummary into R, remove "#" from the first line of the peaks.counts.results.txt file
#######For making metaplots and heatmaps, computeMatrix was used in combination with plotProfile/plotHeatmap per instructions from deepTools web page