snp

remove adapter

cutadapt_function(){
echo cutadapt -m 20 -a ${adapt1} -A ${adapt2} -o ${data_path}/out1.fastq -p ${data_path}/out2.fastq ${data_path}/${file1} ${data_path}/${file2}
cutadapt -m 20 -a ${adapt1} -A ${adapt2} -o ${data_path}/out1.fastq -p ${data_path}/out2.fastq ${data_path}/${file1} ${data_path}/${file2}

}


# sample1
data_path=/home/xugang/data/wangjydata/1.rawdata/0_FKDL202596126-1a/
adapt1=GGACTCC
adapt2=TATCCTCT
file1=0_FKDL202596126-1a_1.fq.gz
file2=0_FKDL202596126-1a_2.fq.gz
cutadapt_function

#sample2
data_path=/home/xugang/data/wangjydata/1.rawdata/0-jia_FKDL202596128-1a/
adapt1=TAGGCATG
adapt2=TATCCTCT
file1=0-jia_FKDL202596128-1a_1.fq.gz
file2=0-jia_FKDL202596128-1a_2.fq.gz
cutadapt_function
#sample3
data_path=/home/xugang/data/wangjydata/1.rawdata/1_FKDL202596127-1a/
adapt1=TAAGGCGA
adapt2=AGAGTAGA
file1=1_FKDL202596127-1a_1.fq.gz
file2=1_FKDL202596127-1a_2.fq.gz
cutadapt_function
#sample4
data_path=/home/xugang/data/wangjydata/1.rawdata/1-jia_FKDL202596129-1a/
adapt1=CGTACTAG
adapt2=AGAGTAGA
file1=1-jia_FKDL202596129-1a_1.fq.gz
file2=1-jia_FKDL202596129-1a_2.fq.gz
cutadapt_function

bowtie map

indexp=/home/xugang/data/wangjydata/ref/bowtie_index/emx1
indexp2=/home/xugang/data/wangjydata/ref/bowtie2_index/emx1


bowtief2(){
echo "bowtie2 $name";
bowtie2 -p 6 -N 1 -x ${indexp2} -1 ${datap}/out1.fastq -2 ${datap}/out2.fastq --seed 0 --very-sensitive -S ${datap}/${name}.sam > ${datap}/log.${name}.log

}

bowtief(){
echo "bowtie $name";
bowtie -p 6  ${indexp} -1 ${datap}/out1.fastq -2 ${datap}/out2.fastq -n 3 -S ${datap}/${name}.bowtie1.sam
}

datap=/home/xugang/data/wangjydata/1.rawdata/0_FKDL202596126-1a
name=0_fkd
bowtief
bowtief2

datap=/home/xugang/data/wangjydata/1.rawdata/0-jia_FKDL202596128-1a/
name=0-jia
bowtief
bowtief2

datap=/home/xugang/data/wangjydata/1.rawdata/1_FKDL202596127-1a/
name=1_fkd
bowtief
bowtief2

datap=/home/xugang/data/wangjydata/1.rawdata/1-jia_FKDL202596129-1a/
name=1-jia
bowtief
bowtief2

bam sort and index

sort(){
samtools view -S -b ${datap}/${name} > ${datap}/${name}.bam
samtools sort ${datap}/${name}.bam > ${datap}/${name}.sort.bam
rm ${datap}/${name}.bam
samtools index ${datap}/${name}.sort.bam
}

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/0_FKDL202596126-1a/
# file name
name=0_fkd.sam
# function
sort
name=0_fkd.bowtie1.sam
# function
sort

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/0-jia_FKDL202596128-1a
# file name
name=0-jia.sam
# function
sort
# file name
name=0-jia.bowtie1.sam
# function
sort

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/1_FKDL202596127-1a
# file name
name=1_fkd.sam
# function
sort
# file name
name=1_fkd.bowtie1.sam
# function
sort

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/1-jia_FKDL202596129-1a
# file name
name=1-jia.sam
# function
sort
# file name
name=1-jia.bowtie1.sam
# function
sort

call snp

snp(){
[ -d ${outp}/ ] || mkdir -p ${outp}/
freebayes -f ${genome} ${datap}/${name} >${outp}/${name}.vcf
}


# do not change
genome=/home/xugang/data/wangjydata/ref/emx1.fa
outp=/home/xugang/data/wangjydata/3.var.new

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/0_FKDL202596126-1a/
# file name
name=0_fkd.sam.sort.bam
# function
snp
name=0_fkd.bowtie1.sam.sort.bam
# function
snp

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/0-jia_FKDL202596128-1a
# file name
name=0-jia.sam.sort.bam
# function
snp
# file name
name=0-jia.bowtie1.sam.sort.bam
# function
snp

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/1_FKDL202596127-1a
# file name
name=1_fkd.sam.sort.bam
# function
snp
# file name
name=1_fkd.bowtie1.sam.sort.bam
# function
snp

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/1-jia_FKDL202596129-1a
# file name
name=1-jia.sam.sort.bam
# function
snp
# file name
name=1-jia.bowtie1.sam.sort.bam
# function
snp

Calling variants: from fastq to VCF

You've sequenced some samples. You have a reference genome or assembled set of contigs, and you'd like to determine reference-relative variants in your samples. You can use freebayes to detect the variants, following these steps:

Align your reads to a suitable reference (e.g. with bwa or MOSAIK)
Ensure your alignments have read groups attached so their sample may be identified by freebayes. Aligners allow you to do this, but you can also use bamaddrg to do so post-alignment.
Sort the alignments (e.g. sambamba sort).
Mark duplicates, for instance with sambamba markdup (if PCR was used in the preparation of your sequencing library)
Run freebayes on all your alignment data simultaneously, generating a VCF. The default settings should work for most use cases, but if your samples are not diploid, set the --ploidy and adjust the --min-alternate-fraction suitably.
Filter the output e.g. using reported QUAL and/or depth (DP) or observation count (AO).
Interpret your results.
(possibly, Iterate the variant detection process in response to insight gained from your interpretation)

bam count

bamcount(){
[ -d ${outp} ] || mkdir -p ${outp}
bam-readcount -f ${genome} ${datap}/${name} > ${outp}/${name}.txt 2>${outp}/${name}.log

}


# do not change
genome=/home/xugang/data/wangjydata/ref/emx1.fa
outp=/home/xugang/data/wangjydata/4.bam_count

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/0_FKDL202596126-1a/
# file name
name=0_fkd.bowtie1.sam.sort.bam
# function
bamcount
name=0_fkd.sam.sort.bam
# function
bamcount

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/0-jia_FKDL202596128-1a
# file name
name=0-jia.sam.sort.bam
# function
bamcount
# file name
name=0-jia.bowtie1.sam.sort.bam
# function
bamcount

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/1_FKDL202596127-1a
# file name
name=1_fkd.sam.sort.bam
# function
bamcount
# file name
name=1_fkd.bowtie1.sam.sort.bam
# function
bamcount

# file path
datap=/home/xugang/data/wangjydata/1.rawdata/1-jia_FKDL202596129-1a
# file name
name=1-jia.sam.sort.bam
# function
bamcount
# file name
name=1-jia.bowtie1.sam.sort.bam
# function
bamcount

Normal Output

The output format prints to standard out as follows:

chr	position	reference_base	depth	base:count:avg_mapping_quality:avg_basequality:avg_se_mapping_quality:num_plus_strand:num_minus_strand:avg_pos_as_fraction:avg_num_mismatches_as_fraction:avg_sum_mismatch_qualities:num_q2_containing_reads:avg_distance_to_q2_start_in_q2_reads:avg_clipped_length:avg_distance_to_effective_3p_end   ...

Each tab separated field contains the following information, each one is tab-separated:

base → the base that all reads following in this field contain at the reported position i.e. C
count → the number of reads containing the base
avg_mapping_quality → the mean mapping quality of reads containing the base
avg_basequality → the mean base quality for these reads
avg_se_mapping_quality → mean single ended mapping quality
num_plus_strand → number of reads on the plus/forward strand
num_minus_strand → number of reads on the minus/reverse strand
avg_pos_as_fraction → average position on the read as a fraction (calculated with respect to the length after clipping). This value is normalized to the center of the read (bases occurring strictly at the center of the read have a value of 1, those occurring strictly at the ends should approach a value of 0)
avg_num_mismatches_as_fraction → average number of mismatches on these reads per base
avg_sum_mismatch_qualities → average sum of the base qualities of mismatches in the reads
num_q2_containing_reads → number of reads with q2 runs at the 3’ end
avg_distance_to_q2_start_in_q2_reads → average distance of position (as fraction of unclipped read length) to the start of the q2 run
avg_clipped_length → average clipped read length of reads
avg_distance_to_effective_3p_end → average distance to the 3’ prime end of the read (as fraction of unclipped read length)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

20200923_snp.md

20200923_snp.md

snp

remove adapter

bowtie map

bam sort and index

call snp

Calling variants: from fastq to VCF

bam count

Normal Output

Files

20200923_snp.md

Latest commit

History

20200923_snp.md

File metadata and controls

snp

remove adapter

bowtie map

bam sort and index

call snp

Calling variants: from fastq to VCF

bam count

Normal Output