Skip to content

Commit

Permalink
cnv array working
Browse files Browse the repository at this point in the history
  • Loading branch information
MikeWLloyd committed Aug 2, 2024
1 parent 109ad21 commit 1039258
Show file tree
Hide file tree
Showing 7 changed files with 552 additions and 10 deletions.
4 changes: 4 additions & 0 deletions bin/cnv_array/ASCAT_run.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ colnames(SNPpos) <- c('Probe_Set_ID', 'Chromosome', 'Physical_Position')

##

if (gender == 'NA') {
gender = 'XY'
}

ascat.bc = ascat.loadData(Tumor_LogR_file = LRR_file, Tumor_BAF_file = BAF_file, gender = gender, genomeVersion = "hg38")

ascat.bc$samples[1] <- sampleID
Expand Down
132 changes: 132 additions & 0 deletions bin/cnv_array/annotate_ensembl_genes.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/usr/bin/perl -w
use POSIX;
use File::Basename;

# This script annotates ensembl genes with copy number and breakpoints
# perl ensemblegenes_cnv_break.pl *.segments_raw.extend.txt mart_export_gene_chr1-Y.hg19ensembl75-85.08232016.txt

if ($#ARGV != 1) {
print "This scripts requires: <file_cn> <file_gene> \n";
exit(-1);
}

$file_cn = $ARGV[0];
$file_gene = $ARGV[1];

$file_output = basename($file_cn,".txt").".ensgene_cnvbreak.txt";
open(OUTFILE, ">$file_output");

open(GENEFILE, "$file_gene") or die "can't open $file_gene: $!";
$gene = <GENEFILE>;
chomp($gene);

open(CNFILE, "$file_cn") or die "can't open $file_cn: $!";
@data = <CNFILE>;
close(CNFILE);
chomp(@data);

#print OUTFILE "$tmp\tstartext\tendext\tstartext_desc\tendext_desc\tCN_raw\tLOH\tparm_fraction\tqarm_fraction\tploidy\tcopydiff_2\tcopydiff_ploidy\tlogratio_2\tlogratio_ploidy\n";
print OUTFILE "$gene\tnum_cnv_seg\tseg_desc\tploidy\tnMajor\tnMinor\tnAraw\tnBraw\tCN_raw\tLOH\tcopydiff_2\tcopydiff_ploidy\tlogratio_2\tlogratio_ploidy\tnMajor_max\tnMinor_max\tnAraw_max\tnBraw_max\tCN_raw_max\tLOH_max\tcopydiff_2_max\tcopydiff_ploidy_max\tlogratio_2_max\tlogratio_ploidy_max\n";

while ($gene = <GENEFILE>) {

chomp($gene);
@line = split(/\t/, $gene);
$chr = $line[2];
$start = $line[3];
$end = $line[4];

#$cnraw1=999;
$numseg=0;
$region="";
%segline = ();
@n = ();

for ($j=1; $j<=$#data; $j++) {
@segment = split(/\t/, $data[$j]);

$chr_cn = $segment[1];
$pos1 = $segment[2];
$pos2 = $segment[3];
$pos1ext = $segment[9];
$pos2ext = $segment[10];
$left = $segment[11];
$right = $segment[12];
$cnraw = $segment[13];

if (($chr_cn eq $chr) && ($start <= $pos2ext) && ($end >= $pos1ext)) { #overlap
#$numseg++;
push(@n, $cnraw);
$segline{$cnraw} = [ @segment ];

#check if overlap with regions with no call
if (($start <= $pos1) && ($end >= $pos1ext)) {
$region = $region.$left.";";
}
if (($start <= $pos2ext) && ($end >= $pos2)) {
$region = $region.$right.";";
}

#if ($cnraw < $cnraw1) {
# $cnraw1 = $cnraw;
# $count = $j;
#}
}
}

if ($region eq "") {
$region = "NA";
}

if ($#n >= 0) {

$numseg = $#n +1;
@sortn = sort{ $a <=> $b } @n;

$nA = $segline{$sortn[0]}[4];
$nB = $segline{$sortn[0]}[5];
$rawA = $segline{$sortn[0]}[6];
$rawB = $segline{$sortn[0]}[7];
$cnraw = $segline{$sortn[0]}[13];
$loh = $segline{$sortn[0]}[14];
$ploidy= $segline{$sortn[0]}[17];
$copydiff1 = $segline{$sortn[0]}[18];
$copydiff2 = $segline{$sortn[0]}[19];
$logratio1 = $segline{$sortn[0]}[20];
$logratio2 = $segline{$sortn[0]}[21];

$outline = "$gene\t$numseg\t$region\t$ploidy\t$nA\t$nB\t$rawA\t$rawB\t$cnraw\t$loh\t$copydiff1\t$copydiff2\t$logratio1\t$logratio2\t";

if ($numseg > 1 ) {
$nA = $segline{$sortn[$#sortn]}[4];
$nB = $segline{$sortn[$#sortn]}[5];
$rawA = $segline{$sortn[$#sortn]}[6];
$rawB = $segline{$sortn[$#sortn]}[7];
$cnraw = $segline{$sortn[$#sortn]}[13];
$loh = $segline{$sortn[$#sortn]}[14];
$copydiff1 = $segline{$sortn[$#sortn]}[18];
$copydiff2 = $segline{$sortn[$#sortn]}[19];
$logratio1 = $segline{$sortn[$#sortn]}[20];
$logratio2 = $segline{$sortn[$#sortn]}[21];
}
else {
$nA = "NA";
$nB = "NA";
$rawA = "NA";
$rawB = "NA";
$cnraw = "NA";
$loh = "NA";
$copydiff1 = "NA";
$copydiff2 = "NA";
$logratio1 = "NA";
$logratio2 = "NA";

}

$outline = $outline."$nA\t$nB\t$rawA\t$rawB\t$cnraw\t$loh\t$copydiff1\t$copydiff2\t$logratio1\t$logratio2";
print OUTFILE "$outline\n";
}
}

close (GENEFILE);
close (OUTFILE);
117 changes: 117 additions & 0 deletions bin/cnv_array/seg_plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# updated to have Graces Sept '19 tweaks

options(scipen = 999)

args=(commandArgs(TRUE))

# filename is name of the *segments_raw.extend.txt file
filename <- args[1]

sampleID <- args[2]

# outdir is the dir where png result will be written ... use "./" for current dir
outdir <- args[3]


CNS <-read.table(filename,header=T,sep="\t")

gender <- 'female'
sex <- 'female'

if (sex == "female") {
CNS=CNS[CNS$chr!="Y",]
}

#title of plot
ploidy=round(CNS$ploidy[1], digits=2)
plottitle=paste( gsub("_"," ", sampleID), " ploidy=",ploidy,sep="")

chromo <- unique(CNS$chr)
chromo
xx=0
y = c()
start=CNS$startpos
end=CNS$endpos

for (x in chromo) {
start[CNS$chr == x]=start[CNS$chr == x]+xx
end[CNS$chr == x]=end[CNS$chr == x]+xx
tmp = CNS$endpos[CNS$chr == x]
xx=tail(tmp,1)+xx
y <- c(y, xx)
}

png(paste(outdir,sampleID,"_segmentsgenomeplot.copydiffploidy.png",sep=""), width=1300,height=600)

par(mar = c(5, 5, 8, 4))

val=CNS$copydiff_ploidy

# in the following stmt, family="serif" changes font to times-roman; cex.main=1.8 scales up the title font size
# formerly also used: ylim=c(-7,20),
plot( c(start,end), c(val,val), col="white", main=plottitle, xlab="Chromosome", ylab="Delta from Ploidy",
ylim=c(-8, max( c(val,val) ) ))

for (i in 1:length(start)) {
if (CNS$LOH[i]==1) {
polygon(c(start[i],end[i],end[i],start[i]),c(min(-7),min(-7),max(-6),max(-6)),col="lightsteelblue",border="lightsteelblue",lwd=2)
}

}

segments(start,val,end,val,col="tomato",lwd=5)
abline(v=y,col="grey")
posy=c(-8)
i=1
l=0
for (x in chromo) {
posx=(l+y[i])/2
text(posx,posy,x,cex=1.2,srt=45)
l=y[i]
i=i+1
}
abline(h=0,col="black",lty=2,lwd=1.5)

# formerly also used: inset=c(0,-0.1),
legend("topright", inset=-0.1, c("Difference from sample ploidy ", "LOH"), xpd=TRUE, horiz=T,
bty="n", lty=c(1,1), lwd=6, col=c("tomato", "lightsteelblue"), cex=1.5 )

dev.off()


png(paste(outdir,sampleID,"_segmentsgenomeplot.CNraw_loh.png",sep=""), width=1300,height=600)

par(mar = c(5, 5, 8, 4))

val=CNS$CN_raw
d=max(val)/100
plot(c(start,end),c(val,val),col="white",ylab="CN, CN Major, CN Minor",main=plottitle,ylim=c(-d,max(val)+4*d),xaxt='n',xlab="chromosomes")

for (i in 1:length(start)) {
if (CNS$LOH[i]==1) {
polygon(c(start[i],end[i],end[i],start[i]),c(min(val),min(val),max(val),max(val)),col="lightsteelblue",border="lightsteelblue",lwd=2)
}

}

val=CNS$nBraw-d
segments(start,val,end,val,col="blue",lwd=4)
val=CNS$nAraw
segments(start,val,end,val,col="red",lwd=4)
val=CNS$CN_raw+d
segments(start,val,end,val,col="purple",lwd=4)
abline(v=y,col="grey")
posy=max(val)+2*d
i=1
l=0
for (x in chromo) {
posx=(l+y[i])/2
text(posx,posy,x,cex=1,srt=45)
l=y[i]
i=i+1
}
abline(h=CNS$ploidy[1],col="black",lty=2,lwd=1.5)

legend("topright", c("CN Total", "CN Major", "CN Minor", "LOH"),xpd=TRUE,horiz=T, inset=c(0,-0.1), bty = "n", lty=c(1,1,1,1), lwd=6, col = c("purple", "red", "blue", "lightsteelblue"), cex = 1)
dev.off()

Loading

0 comments on commit 1039258

Please sign in to comment.