From 06c54e8066ecbc6292167e7c5bdfb8af945a41ba Mon Sep 17 00:00:00 2001 From: Ross Lazarus Date: Mon, 16 Sep 2024 03:00:52 +1000 Subject: [PATCH] Bug fix: fix bed start and end in bigwig_outlier_bed (#6312) * fix all beds by padding the region end in the output bed file to suit UCSC math * update tests with padded bed region ends * flake8 pacification * odd. test outputs pass here - will test biocontainers...x * add 1 to start instead * hmmm. tests passing here... * updating tests and test data * having strange problems getting planemo to update existing but wrong test outputs * using a specialised bed file -> bigwig chr1 0 10 100 100 + chr1 20 30 100 100 + chr1 40 50 100 100 + chr1 60 70 100 100 + chr1 200 300 100 100 + chr1 300 400 0 0 + chr1 400 1000 100 100 + * add fakery * trying again with refreshed test data. Not sure what's going on...passes here. * placate flake8 * wrong fake.bed sample... * fix multiple bigwigs * more flake8 fixing. * remove bogus test parameter... * add nice histogram test * restore some histogram table reports - just use the old merlin note the odd offsets. caused me some grief. * reduce ram bloat by iterating instead of using a list comprehension for large bed texts. --- .../bigwig_outlier_bed/bigwig_outlier_bed.py | 13 +- .../bigwig_outlier_bed/bigwig_outlier_bed.xml | 57 +- .../test-data/bedouthi2_sample | 1348 +-------------- .../test-data/bedouthi_qlo_notset_sample | 1292 +++++++++++++-- .../test-data/bedouthi_sample | 1351 +-------------- .../test-data/bedouthilo2_sample | 1457 ----------------- .../test-data/bedouthilo_qlo_notset_sample | 70 - .../test-data/bedouthilo_sample | 188 +-- .../test-data/bedouthilo_sample_2 | 1244 ++++++++++++++ .../test-data/bedoutlo2_sample | 110 -- .../test-data/bedoutlo_sample | 3 +- tools/bigwig_outlier_bed/test-data/fake.bed | 7 + .../test-data/{1.bigwig => fake.bigwig} | Bin 21218 -> 12888 bytes .../test-data/table2_sample | 67 +- .../test-data/table3_sample | 65 +- .../test-data/table_only_sample | 78 +- .../test-data/table_qlo_notset_sample | 2 +- .../bigwig_outlier_bed/test-data/table_sample | 2 +- 18 files changed, 2471 insertions(+), 4883 deletions(-) delete mode 100644 tools/bigwig_outlier_bed/test-data/bedouthilo2_sample delete mode 100644 tools/bigwig_outlier_bed/test-data/bedouthilo_qlo_notset_sample create mode 100644 tools/bigwig_outlier_bed/test-data/bedouthilo_sample_2 delete mode 100644 tools/bigwig_outlier_bed/test-data/bedoutlo2_sample create mode 100644 tools/bigwig_outlier_bed/test-data/fake.bed rename tools/bigwig_outlier_bed/test-data/{1.bigwig => fake.bigwig} (57%) diff --git a/tools/bigwig_outlier_bed/bigwig_outlier_bed.py b/tools/bigwig_outlier_bed/bigwig_outlier_bed.py index e762021ea7f..c6f4a0bbec9 100644 --- a/tools/bigwig_outlier_bed/bigwig_outlier_bed.py +++ b/tools/bigwig_outlier_bed/bigwig_outlier_bed.py @@ -146,7 +146,7 @@ def processVals(self, bw, isTop): else: bwex = np.r_[False, bw <= self.bwbot, False] bwexd = np.diff(bwex) - bwexdnz = bwexd.nonzero()[0] + bwexdnz = bwexd.nonzero()[0] # start and end transition of each segment - nice! bwregions = np.reshape(bwexdnz, (-1, 2)) return bwregions @@ -155,10 +155,9 @@ def writeBed(self, bed, bedfname): potentially multiple """ bed.sort() - beds = ["%s\t%d\t%d\t%s\t%d" % x for x in bed] with open(bedfname, "w") as bedf: - bedf.write("\n".join(beds)) - bedf.write("\n") + for b in bed: + bedf.write("%s\t%d\t%d\t%s\t%d\n" % b) def makeTableRow(self, bw, bwlabel, chr): """ @@ -194,7 +193,6 @@ def makeBed(self): restab = [] bwlabels = self.bwlabels bwnames = self.bwnames - bwnames.sort() reshead = "bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot" for i, bwname in enumerate(bwnames): bwlabel = bwlabels[i].replace(" ", "") @@ -251,8 +249,11 @@ def makeBed(self): self.bwbot = np.quantile(bw, self.qlo) bwlo = self.processVals(bw, isTop=False) for j, seg in enumerate(bwlo): + seglen = seg[1] - seg[0] if seg[1] - seg[0] >= self.bedwin: - score = -1 * np.sum(bw[seg[0]:seg[1]]) / float(seglen) + score = ( + -1 * np.sum(bw[seg[0]:seg[1]]) / float(seglen) + ) bedlo.append( ( chr, diff --git a/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml b/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml index b9df7c5430f..8750a174a39 100644 --- a/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml +++ b/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml @@ -3,7 +3,7 @@ 0.2.0 3.12.3 - 0 + 1 topic_0157 @@ -25,13 +25,13 @@ - + - + - + - - + + - + - - + + @@ -125,7 +125,7 @@ - + @@ -134,24 +134,25 @@ - - - - + + + + - - - + - - - - - + + + + + + + + `iqeF+{t z$tcoKAPBlSbZ{0a-sSGvB7(EwAGo}`d$|kwWy>RtuR%W)Qk+PU9eo!cT(q8X%Aleu&nc#0*5|+t1K*BO(^FjAq&760R#|0009ILKmY**5I_I{ U1Q0*~0R#|00D+tfH1t0u-;Lcm#Q*>R literal 21218 zcmeI(i&Ilq8UXMBI)I?r1$jvUkqXkN0Usa+leMT+sj@4zU?~Jdh!!Q_Bae~?qChoh zM60AcOf8jIu?AESnve(x804X~7N{g4BtVr%NQmS~vdL<8yF0UgKxfXGx$~WS&za24 zZ@!#4Gha@if&P^p#1_JUylsMak?Q!a^M{aa?dMrL5J;Z$+djwtZH!v+ZfxD#NZtH4 zR=KW(*I+#F+WK=bZlLy;4)f4^+MWABsau@3rf7Q)274xJjma^bb1&_HW}Lt_#Ote+ zaRgs>BlWovs@H20{5A%M_y(3O<&~b;ODH|Ix3M%#c4J-Fy^&01WF*S{s)w5QHP@MP zligjT_>Aitb}78=(}6u=FTO&bJE0fYDudjWrxu6$vLHY|Mb3NN)$ zFPVtd_+Ydvxfl4kc&Oh9-xITIU`zPQA+j&J!eyGbVdZ3UN4$!y>{sm8CThZX6Gpwp zhH{2E%$j3L#x(`T7~>WBNtv*gWk^KkK^Mydstj9DkAq(B##1j)Y|vjkIL97GNk1JL z6;ddVg;G&v30l^#Im$teQaQ79B<@6t?Z+-YIDuWj55zCYW4~v^5o4Fe&o99{{pQh* zpA<<)Ct)L~YQG8v7kzJeip7;5NzY8?f{wmGBK&CYGkmJ0P#d%Hp)E`LG?lfo3;dy^<*_ z&ncp?jq#MFv4TFSn>_2aX-25mcve*xAVv3Q);=>|!Ler#vkJ&N^tW+rMI-~A>`BXw z--YgIh7!}0dw5s5*SIC>9s=wLrvDpiTQ2809oj0HE|$1*X${O^W)S)y|8riTtYqm|&I_(CLiVV#!=q;Q?hh$S%978f z`V&Kwf97p~v)O0%ky+KeJ6wP4Eo?BBRyj0Zrf!uRM5jm$b(PQ?&TPfZG?Dz#g*A%k zc{r(@!?FClAgCjF+rHn>BmD}zKG%4m2xmG_;gzZ$Dt?Nh!+Ra(V>EY#u=TCt`CGyX zWwsw!TsHaOoUcn;cCH7rFA=ZRVn;c1WN)f$o(uLb`7iw&y>ZELO?aq|%3zEDi^OB~%B$zX#AKFr)%UR!MVh0ZHK2B{Q z@q{k(yX5_YM`elGta+rNr!viOU$~Ha?tu!gwG-#&*JTt$8gFOALjy0f%G_|T2!Z4< z;vSNn(}3GIZoI_&XjOVMJ8z%r#Z>0Y7o8otF>XD6is&H{-hoo!e%;ZHDEFW*Q7^LG z>CfO}FfN*gl4J~}J}5Pvi;nV*xPgN6^TWyGP%c*OS(Rbm(YB4TH15D>-J1|Yg3SoS z7ITW|3<;jJiI|=xe3mn1m3j3@U30tW{LtZ!#8dLBv#zX8SQKB z`+8;-A95M$m7_OCM@O#^_9b8LZ!Z*W99?4+h!v+c9oeH1V>kHqv4)-H zW3N4a;(yF5Gu9`7D$q4=|oJ#u*2PX7J=*{xDj}t_{HDc1)FAns6fJjYtC_k=j~^5 zzmW}wungxJGl)e49xX1Ob+9Ke0(xQ1L}nV9n5G{Kl`v_)@}4J)w~KxB!)nQ%gSA2j zeCO++kxwda&~vDDqyo|!lc2dr=|eKFAliRK7L31%pQ$!L#TA`lSQWuBFUy5 zNSmshz}{JeyX2TSsY?7XT!O=^2mJ0MTs$KLeUfyJx8VYUs)#lO(>cp0UDrAoTl1f? zF0x86!~0bcvL+&O_51Kbb8$hkFd%-NylT{SNqDm!h6I(k;2 z3s!nfcVB_uPb{o?gHocoFEjYw(Bag1M~XXKz?RCVMQtV<$KBHD;oFDax9n}KyBEuD z23JP61c>%WI>fpSvyNM~J(l*zInVy}1OmBZ*(ZSp1b_e#00KY&2mk>f00e*l5C8%| z00;m9AOHk_01yBIKmZ8*sRiuKZ%?R5OG!ytRsNzcMc{i)Hd$1*aX$s&3=8@H+R&3CEZIzt7cx`&#+mvXVXSfoGPJCU+)2-E_3|K zT4JysgRp#GYl#8ZO@~-dmqPweiNP}G^t%(N0e@O7@ZvxK2mk>f00e*l5C8%|00;m9 YAOHk_01yBIKmZ5;0U!VbfPhutpPq4Ea{vGU diff --git a/tools/bigwig_outlier_bed/test-data/table2_sample b/tools/bigwig_outlier_bed/test-data/table2_sample index a06f259fefb..63b67991163 100644 --- a/tools/bigwig_outlier_bed/test-data/table2_sample +++ b/tools/bigwig_outlier_bed/test-data/table2_sample @@ -1,29 +1,12 @@ Descriptive measures bigwig bigwig_sample -contig chr21 -n 48129895 -mean 0.003446 -std 0.400601 -min 0.000000 -max 100.000000 -qtop 0.00 -qbot 0.00 -chr21: 0.000000 occurs 48125565 times -chr21: 20.000000 occurs 1660 times -chr21: 40.000000 occurs 1610 times -chr21: 50.000000 occurs 5 times -chr21: 60.000000 occurs 840 times -chr21: 80.000000 occurs 195 times -chr21: 100.000000 occurs 20 times -Descriptive measures -bigwig 1.bigwig contig Merlin n 180929 mean 6354.313736 std 9951.749936 min 0.000000 max 32767.000000 -qtop 24359.00 +qtop 0.00 qbot 0.00 First/Last 10 value counts Value Count @@ -47,28 +30,28 @@ Value Count 32745.00 10 32766.00 10 32767.00 10 -Histogram of 1.bigwig bigwig values -1.bigwig_Merlin 1638.35 | 114,349 | ************************************************** -1.bigwig_Merlin 3276.70 | 3,650 | * -1.bigwig_Merlin 4915.05 | 3,620 | * -1.bigwig_Merlin 6553.40 | 3,570 | * -1.bigwig_Merlin 8191.75 | 3,570 | * -1.bigwig_Merlin 9830.10 | 3,410 | * -1.bigwig_Merlin 11468.45 | 3,610 | * -1.bigwig_Merlin 13106.80 | 3,170 | * -1.bigwig_Merlin 14745.15 | 3,310 | * -1.bigwig_Merlin 16383.50 | 3,390 | * -1.bigwig_Merlin 18021.85 | 3,270 | * -1.bigwig_Merlin 19660.20 | 3,680 | * -1.bigwig_Merlin 21298.55 | 3,560 | * -1.bigwig_Merlin 22936.90 | 3,490 | * -1.bigwig_Merlin 24575.25 | 3,530 | * -1.bigwig_Merlin 26213.60 | 3,460 | * -1.bigwig_Merlin 27851.95 | 3,470 | * -1.bigwig_Merlin 29490.30 | 3,500 | * -1.bigwig_Merlin 31128.65 | 3,920 | * -1.bigwig_Merlin 32767.00 | 3,400 | * -1.bigwig_Merlin ------------ |------------ | -1.bigwig_Merlin N= | 180,929 | -1.bigwig_Merlin ------------ |------------ | +Histogram of bigwig_sample bigwig values +bigwig_sample_Merlin 1638.35 | 114,349 | ************************************************** +bigwig_sample_Merlin 3276.70 | 3,650 | * +bigwig_sample_Merlin 4915.05 | 3,620 | * +bigwig_sample_Merlin 6553.40 | 3,570 | * +bigwig_sample_Merlin 8191.75 | 3,570 | * +bigwig_sample_Merlin 9830.10 | 3,410 | * +bigwig_sample_Merlin 11468.45 | 3,610 | * +bigwig_sample_Merlin 13106.80 | 3,170 | * +bigwig_sample_Merlin 14745.15 | 3,310 | * +bigwig_sample_Merlin 16383.50 | 3,390 | * +bigwig_sample_Merlin 18021.85 | 3,270 | * +bigwig_sample_Merlin 19660.20 | 3,680 | * +bigwig_sample_Merlin 21298.55 | 3,560 | * +bigwig_sample_Merlin 22936.90 | 3,490 | * +bigwig_sample_Merlin 24575.25 | 3,530 | * +bigwig_sample_Merlin 26213.60 | 3,460 | * +bigwig_sample_Merlin 27851.95 | 3,470 | * +bigwig_sample_Merlin 29490.30 | 3,500 | * +bigwig_sample_Merlin 31128.65 | 3,920 | * +bigwig_sample_Merlin 32767.00 | 3,400 | * +bigwig_sample_Merlin ------------ |------------ | +bigwig_sample_Merlin N= | 180,929 | +bigwig_sample_Merlin ------------ |------------ | diff --git a/tools/bigwig_outlier_bed/test-data/table3_sample b/tools/bigwig_outlier_bed/test-data/table3_sample index 43c4fd115cb..843acbea4dd 100644 --- a/tools/bigwig_outlier_bed/test-data/table3_sample +++ b/tools/bigwig_outlier_bed/test-data/table3_sample @@ -1,57 +1,12 @@ Descriptive measures -bigwig bigwig_sample -contig Merlin -n 180929 -mean 6354.313736 -std 9951.749936 +bigwig fake.bigwig +contig chr1 +n 1000 +mean 0.840000 +std 0.366606 min 0.000000 -max 32767.000000 -qtop 24359.00 -qbot 0.00 -First/Last 10 value counts -Value Count -0.00 110889 -13.00 30 -22.00 10 -26.00 10 -29.00 10 -40.00 10 -43.00 10 -53.00 10 -57.00 10 -60.00 20 -32721.00 10 -32724.00 10 -32726.00 10 -32728.00 10 -32730.00 10 -32731.00 20 -32737.00 10 -32745.00 10 -32766.00 10 -32767.00 10 -Histogram of bigwig_sample bigwig values -bigwig_sample_Merlin 1638.35 | 114,349 | ************************************************** -bigwig_sample_Merlin 3276.70 | 3,650 | * -bigwig_sample_Merlin 4915.05 | 3,620 | * -bigwig_sample_Merlin 6553.40 | 3,570 | * -bigwig_sample_Merlin 8191.75 | 3,570 | * -bigwig_sample_Merlin 9830.10 | 3,410 | * -bigwig_sample_Merlin 11468.45 | 3,610 | * -bigwig_sample_Merlin 13106.80 | 3,170 | * -bigwig_sample_Merlin 14745.15 | 3,310 | * -bigwig_sample_Merlin 16383.50 | 3,390 | * -bigwig_sample_Merlin 18021.85 | 3,270 | * -bigwig_sample_Merlin 19660.20 | 3,680 | * -bigwig_sample_Merlin 21298.55 | 3,560 | * -bigwig_sample_Merlin 22936.90 | 3,490 | * -bigwig_sample_Merlin 24575.25 | 3,530 | * -bigwig_sample_Merlin 26213.60 | 3,460 | * -bigwig_sample_Merlin 27851.95 | 3,470 | * -bigwig_sample_Merlin 29490.30 | 3,500 | * -bigwig_sample_Merlin 31128.65 | 3,920 | * -bigwig_sample_Merlin 32767.00 | 3,400 | * -bigwig_sample_Merlin ------------ |------------ | -bigwig_sample_Merlin N= | 180,929 | -bigwig_sample_Merlin ------------ |------------ | - +max 1.000000 +qtop 1.00 +qbot 1.00 +chr1: 0.000000 occurs 160 times +chr1: 1.000000 occurs 840 times diff --git a/tools/bigwig_outlier_bed/test-data/table_only_sample b/tools/bigwig_outlier_bed/test-data/table_only_sample index 48e766c525e..02242436452 100644 --- a/tools/bigwig_outlier_bed/test-data/table_only_sample +++ b/tools/bigwig_outlier_bed/test-data/table_only_sample @@ -1,74 +1,12 @@ Descriptive measures -bigwig bigwig_sample -contig Merlin -n 180929 -mean 6354.313736 -std 9951.749936 +bigwig fake.bigwig +contig chr1 +n 1000 +mean 0.840000 +std 0.366606 min 0.000000 -max 32767.000000 +max 1.000000 qtop noqhi qbot noqlo -First/Last 10 value counts -Value Count -0.00 110889 -13.00 30 -22.00 10 -26.00 10 -29.00 10 -40.00 10 -43.00 10 -53.00 10 -57.00 10 -60.00 20 -32721.00 10 -32724.00 10 -32726.00 10 -32728.00 10 -32730.00 10 -32731.00 20 -32737.00 10 -32745.00 10 -32766.00 10 -32767.00 10 -Histogram of bigwig_sample bigwig values -bigwig_sample_Merlin 1638.35 | 114,349 | ************************************************** -bigwig_sample_Merlin 3276.70 | 3,650 | * -bigwig_sample_Merlin 4915.05 | 3,620 | * -bigwig_sample_Merlin 6553.40 | 3,570 | * -bigwig_sample_Merlin 8191.75 | 3,570 | * -bigwig_sample_Merlin 9830.10 | 3,410 | * -bigwig_sample_Merlin 11468.45 | 3,610 | * -bigwig_sample_Merlin 13106.80 | 3,170 | * -bigwig_sample_Merlin 14745.15 | 3,310 | * -bigwig_sample_Merlin 16383.50 | 3,390 | * -bigwig_sample_Merlin 18021.85 | 3,270 | * -bigwig_sample_Merlin 19660.20 | 3,680 | * -bigwig_sample_Merlin 21298.55 | 3,560 | * -bigwig_sample_Merlin 22936.90 | 3,490 | * -bigwig_sample_Merlin 24575.25 | 3,530 | * -bigwig_sample_Merlin 26213.60 | 3,460 | * -bigwig_sample_Merlin 27851.95 | 3,470 | * -bigwig_sample_Merlin 29490.30 | 3,500 | * -bigwig_sample_Merlin 31128.65 | 3,920 | * -bigwig_sample_Merlin 32767.00 | 3,400 | * -bigwig_sample_Merlin ------------ |------------ | -bigwig_sample_Merlin N= | 180,929 | -bigwig_sample_Merlin ------------ |------------ | - -Descriptive measures -bigwig 1.bigwig -contig chr21 -n 48129895 -mean 0.003446 -std 0.400601 -min 0.000000 -max 100.000000 -qtop noqhi -qbot noqlo -chr21: 0.000000 occurs 48125565 times -chr21: 20.000000 occurs 1660 times -chr21: 40.000000 occurs 1610 times -chr21: 50.000000 occurs 5 times -chr21: 60.000000 occurs 840 times -chr21: 80.000000 occurs 195 times -chr21: 100.000000 occurs 20 times +chr1: 0.000000 occurs 160 times +chr1: 1.000000 occurs 840 times diff --git a/tools/bigwig_outlier_bed/test-data/table_qlo_notset_sample b/tools/bigwig_outlier_bed/test-data/table_qlo_notset_sample index b1f3e88a656..7ee2574ccc1 100644 --- a/tools/bigwig_outlier_bed/test-data/table_qlo_notset_sample +++ b/tools/bigwig_outlier_bed/test-data/table_qlo_notset_sample @@ -6,7 +6,7 @@ mean 6354.313736 std 9951.749936 min 0.000000 max 32767.000000 -qtop 31906.72 +qtop 6436.00 qbot noqlo First/Last 10 value counts Value Count diff --git a/tools/bigwig_outlier_bed/test-data/table_sample b/tools/bigwig_outlier_bed/test-data/table_sample index 006e15ade41..0303b44c424 100644 --- a/tools/bigwig_outlier_bed/test-data/table_sample +++ b/tools/bigwig_outlier_bed/test-data/table_sample @@ -6,7 +6,7 @@ mean 6354.313736 std 9951.749936 min 0.000000 max 32767.000000 -qtop 31906.72 +qtop 7293.00 qbot 0.00 First/Last 10 value counts Value Count