Skip to content

Commit

Permalink
Merge pull request #263 from maize-genetics/graph-hapid-seqlen
Browse files Browse the repository at this point in the history
Graph hapid seqlen
  • Loading branch information
tcasstevens authored Dec 11, 2024
2 parents 254e75d + 14d235b commit 6460407
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 0 deletions.
15 changes: 15 additions & 0 deletions src/main/kotlin/net/maizegenetics/phgv2/api/HaplotypeGraph.kt
Original file line number Diff line number Diff line change
Expand Up @@ -562,4 +562,19 @@ class HaplotypeGraph(hvcfFiles: List<String>) {
return altHeaderMap[hapid]?.refChecksum ?: ""
}

/**
* Returns a map of haplotype id to sequence length.
* This is calculated from the AltHeaderMetaData regions.
*/
fun hapidToSeqLength(): Map<String, Int> {

return altHeaderMap.map { (hapid, altHeader) ->
val seqLength = altHeader.regions.sumOf { (start, end) ->
end.position - start.position + 1
}
Pair(hapid, seqLength)
}.toMap()

}

}
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package net.maizegenetics.phgv2.cli

import com.github.ajalt.clikt.testing.test
import net.maizegenetics.phgv2.api.HaplotypeGraph
import net.maizegenetics.phgv2.brapi.createSmallSeqTiledb
import net.maizegenetics.phgv2.utils.Position
import net.maizegenetics.phgv2.utils.getChecksum
import net.maizegenetics.phgv2.utils.seqFromAGC
import org.apache.logging.log4j.LogManager
import org.junit.jupiter.api.BeforeAll
import org.junit.jupiter.api.Test
Expand All @@ -27,6 +30,8 @@ class CreateFastaFromHvcfRangeFastaTest {

private val dbPath = "${exportHvcfDir}/tiledb_ref_range_fasta"

val HVCF_PATTERN = Regex("""(\.hvcf|\.h\.vcf|\.hvcf\.gz|\.h\.vcf\.gz)$""")

@BeforeAll
@JvmStatic
fun setup() {
Expand Down Expand Up @@ -77,4 +82,42 @@ class CreateFastaFromHvcfRangeFastaTest {

}

@Test
fun testHaplotypeGraphHapidToSeqLength() {

val inputFiles =
File(multiInputDir)
.walk()
.filter { HVCF_PATTERN.containsMatchIn(it.name) }
.map { it.absolutePath }
.toList()

require(inputFiles.isNotEmpty()) { "At least one HVCF file should be specified." }

val graph = HaplotypeGraph(inputFiles)

val hapidToSeqLength = graph.hapidToSeqLength()

graph.ranges()
.forEach { range ->
graph.hapIdToSampleGametes(range).keys
.forEach { hapid ->
seqFromAGC(
dbPath,
graph,
hapid,
Pair(Position(range.contig, range.start), Position(range.contig, range.end))
)
.let { (seq, _) ->
assertEquals(
hapidToSeqLength.getValue(hapid),
seq.length,
"hapid: $hapid seq length: ${seq.length} != ${hapidToSeqLength.getValue(hapid)}"
)
}
}
}

}

}

0 comments on commit 6460407

Please sign in to comment.