From 87c78db6ce48a548de39d72ce263450aa133ce93 Mon Sep 17 00:00:00 2001 From: Terry Date: Tue, 10 Dec 2024 16:40:32 -0500 Subject: [PATCH 1/2] Added method HaplotypeGraph.hapidToSeqLength() --- .../maizegenetics/phgv2/api/HaplotypeGraph.kt | 15 +++++++++++ .../cli/CreateFastaFromHvcfRangeFastaTest.kt | 27 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/src/main/kotlin/net/maizegenetics/phgv2/api/HaplotypeGraph.kt b/src/main/kotlin/net/maizegenetics/phgv2/api/HaplotypeGraph.kt index f803fb62..db3cabea 100644 --- a/src/main/kotlin/net/maizegenetics/phgv2/api/HaplotypeGraph.kt +++ b/src/main/kotlin/net/maizegenetics/phgv2/api/HaplotypeGraph.kt @@ -562,4 +562,19 @@ class HaplotypeGraph(hvcfFiles: List) { return altHeaderMap[hapid]?.refChecksum ?: "" } + /** + * Returns a map of haplotype id to sequence length. + * This is calculated from the AltHeaderMetaData regions. + */ + fun hapidToSeqLength(): Map { + + return altHeaderMap.map { (hapid, altHeader) -> + val seqLength = altHeader.regions.sumOf { (start, end) -> + end.position - start.position + 1 + } + Pair(hapid, seqLength) + }.toMap() + + } + } diff --git a/src/test/kotlin/net/maizegenetics/phgv2/cli/CreateFastaFromHvcfRangeFastaTest.kt b/src/test/kotlin/net/maizegenetics/phgv2/cli/CreateFastaFromHvcfRangeFastaTest.kt index 9ac05a3e..e31f1b32 100644 --- a/src/test/kotlin/net/maizegenetics/phgv2/cli/CreateFastaFromHvcfRangeFastaTest.kt +++ b/src/test/kotlin/net/maizegenetics/phgv2/cli/CreateFastaFromHvcfRangeFastaTest.kt @@ -1,8 +1,10 @@ package net.maizegenetics.phgv2.cli import com.github.ajalt.clikt.testing.test +import net.maizegenetics.phgv2.api.HaplotypeGraph import net.maizegenetics.phgv2.brapi.createSmallSeqTiledb import net.maizegenetics.phgv2.utils.getChecksum +import net.maizegenetics.phgv2.utils.seqFromAGC import org.apache.logging.log4j.LogManager import org.junit.jupiter.api.BeforeAll import org.junit.jupiter.api.Test @@ -27,6 +29,8 @@ class CreateFastaFromHvcfRangeFastaTest { private val dbPath = "${exportHvcfDir}/tiledb_ref_range_fasta" + val HVCF_PATTERN = Regex("""(\.hvcf|\.h\.vcf|\.hvcf\.gz|\.h\.vcf\.gz)$""") + @BeforeAll @JvmStatic fun setup() { @@ -77,4 +81,27 @@ class CreateFastaFromHvcfRangeFastaTest { } + @Test + fun testHaplotypeGraphHapidToSeqLength() { + + val inputFiles = + File(multiInputDir) + .walk() + .filter { HVCF_PATTERN.containsMatchIn(it.name) } + .map { it.absolutePath } + .toList() + + require(inputFiles.isNotEmpty()) { "At least one HVCF file should be specified." } + + val graph = HaplotypeGraph(inputFiles) + + graph.altHeaders() + .map { it.key } + .forEach { hapid -> + val temp = seqFromAGC(dbPath, graph, hapid, range) + seq = temp.first + } + + } + } \ No newline at end of file From 14d235b8c808db0f64b763df29e657ad0119d014 Mon Sep 17 00:00:00 2001 From: Terry Date: Wed, 11 Dec 2024 10:24:25 -0500 Subject: [PATCH 2/2] Added unit test for HaplotypeGraph.hapidToSeqLength() --- .../cli/CreateFastaFromHvcfRangeFastaTest.kt | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/test/kotlin/net/maizegenetics/phgv2/cli/CreateFastaFromHvcfRangeFastaTest.kt b/src/test/kotlin/net/maizegenetics/phgv2/cli/CreateFastaFromHvcfRangeFastaTest.kt index e31f1b32..3252ed14 100644 --- a/src/test/kotlin/net/maizegenetics/phgv2/cli/CreateFastaFromHvcfRangeFastaTest.kt +++ b/src/test/kotlin/net/maizegenetics/phgv2/cli/CreateFastaFromHvcfRangeFastaTest.kt @@ -3,6 +3,7 @@ package net.maizegenetics.phgv2.cli import com.github.ajalt.clikt.testing.test import net.maizegenetics.phgv2.api.HaplotypeGraph import net.maizegenetics.phgv2.brapi.createSmallSeqTiledb +import net.maizegenetics.phgv2.utils.Position import net.maizegenetics.phgv2.utils.getChecksum import net.maizegenetics.phgv2.utils.seqFromAGC import org.apache.logging.log4j.LogManager @@ -87,7 +88,7 @@ class CreateFastaFromHvcfRangeFastaTest { val inputFiles = File(multiInputDir) .walk() - .filter { HVCF_PATTERN.containsMatchIn(it.name) } + .filter { HVCF_PATTERN.containsMatchIn(it.name) } .map { it.absolutePath } .toList() @@ -95,11 +96,26 @@ class CreateFastaFromHvcfRangeFastaTest { val graph = HaplotypeGraph(inputFiles) - graph.altHeaders() - .map { it.key } - .forEach { hapid -> - val temp = seqFromAGC(dbPath, graph, hapid, range) - seq = temp.first + val hapidToSeqLength = graph.hapidToSeqLength() + + graph.ranges() + .forEach { range -> + graph.hapIdToSampleGametes(range).keys + .forEach { hapid -> + seqFromAGC( + dbPath, + graph, + hapid, + Pair(Position(range.contig, range.start), Position(range.contig, range.end)) + ) + .let { (seq, _) -> + assertEquals( + hapidToSeqLength.getValue(hapid), + seq.length, + "hapid: $hapid seq length: ${seq.length} != ${hapidToSeqLength.getValue(hapid)}" + ) + } + } } }