Skip to content

Commit

Permalink
Tests for Variant Models (bigdatagenomics#26)
Browse files Browse the repository at this point in the history
* Create testing stubs for variant models

* finished tests for LogisiticVariantModel

* Finished LinearVariantModel tests

* update description of constructVariantModel fns
  • Loading branch information
kunalgosar authored and nathanielparke committed Oct 24, 2017
1 parent 76020f8 commit 3eb19c1
Show file tree
Hide file tree
Showing 11 changed files with 295 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ case class LinearVariantModel(uniqueID: String,
val regressionName = "Linear Regression"

def update(genotypes: CalledVariant, phenotypes: Map[String, Phenotype]): LinearVariantModel = {
val batchVariantModel = constructVariantModel(uniqueID, applyToSite(phenotypes, genotypes, allelicAssumption))
val batchVariantModel = constructUpdatedVariantModel(uniqueID, applyToSite(phenotypes, genotypes, allelicAssumption))
mergeWith(batchVariantModel)
}

Expand All @@ -61,7 +61,7 @@ case class LinearVariantModel(uniqueID: String,
val updatedResidualDegreesOfFreedom = updateResidualDegreesOfFreedom(variantModel.association.numSamples)
val updatedtStatistic = calculateTStatistic(updatedWeights, updatedGeneticParameterStandardError)
val updatedPValue = calculatePValue(updatedtStatistic, updatedResidualDegreesOfFreedom)
constructVariantModel(this.uniqueID,
constructUpdatedVariantModel(this.uniqueID,
updatedSsDeviations,
updatedSsResiduals,
updatedGeneticParameterStandardError,
Expand Down Expand Up @@ -181,15 +181,30 @@ case class LinearVariantModel(uniqueID: String,
pvalue
}

def constructVariantModel(variantID: String,
updatedSsDeviations: Double,
updatedSsResiduals: Double,
updatedGeneticParameterStandardError: Double,
updatedtStatistic: Double,
updatedResidualDegreesOfFreedom: Int,
updatedPValue: Double,
updatedWeights: List[Double],
updatedNumSamples: Int): LinearVariantModel = {
/**
* Creates an updated LinearVariantModel from the current model with a new
* Association object on the specified parameters.
*
* @param variantId Variant Id of the new VariantModel
* @param updatedSsDeviations New ssDeviations
* @param updatedSsResiduals New ssResiduals
* @param updatedGeneticParameterStandardError New geneticParameterStandardError
* @param updatedtStatistic New tStatistic
* @param updatedResidualDegreesOfFreedom New residualDegreesOfFreedom
* @param updatedPValue New pValue
* @param updatedWeights New weights
* @param updatedNumSamples New numSamples
* @return Returns a new LinearVariantModel
*/
def constructUpdatedVariantModel(variantID: String,
updatedSsDeviations: Double,
updatedSsResiduals: Double,
updatedGeneticParameterStandardError: Double,
updatedtStatistic: Double,
updatedResidualDegreesOfFreedom: Int,
updatedPValue: Double,
updatedWeights: List[Double],
updatedNumSamples: Int): LinearVariantModel = {

val updatedAssociation = LinearAssociation(ssDeviations = updatedSsDeviations,
ssResiduals = updatedSsResiduals,
Expand All @@ -211,8 +226,16 @@ case class LinearVariantModel(uniqueID: String,
phaseSetId)
}

def constructVariantModel(variantID: String,
association: LinearAssociation): LinearVariantModel = {
/**
* Creates an updated LinearVariantModel from the current model to contain
* the input Association object.
*
* @param variantId VariantId of the new VariantModel
* @param association New association object
* @return Returns a new LinearVariantModel
*/
def constructUpdatedVariantModel(variantID: String,
association: LinearAssociation): LinearVariantModel = {
LinearVariantModel(variantID,
association,
phenotype,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ case class LogisticVariantModel(uniqueID: String,
val updatedWeights = updateWeights(variantModel.association.weights, variantModel.association.numSamples)
val updatedWaldStatistic = calculateWaldStatistic(updatedGeneticParameterStandardError, updatedWeights)
val updatedPValue = calculatePvalue(updatedWaldStatistic)
constructVariantModel(this.uniqueID,
constructUpdatedVariantModel(this.uniqueID,
updatedGeneticParameterStandardError,
updatedPValue,
updatedWeights,
Expand Down Expand Up @@ -89,7 +89,7 @@ case class LogisticVariantModel(uniqueID: String,
}

/**
* Returns the p value associated with the genetic parameter in the regression model
* Returns the p value associated with the genetic parameter in the regression model
* by running the wald statistic associated with genetic parameter through chi distribution
* with one degree of freedom.
*
Expand All @@ -106,18 +106,29 @@ case class LogisticVariantModel(uniqueID: String,

//TODO: add validation stringency here rather than just creating empty association object
val batchVariantModel = try {
constructVariantModel(uniqueID, applyToSite(phenotypes, genotypes, allelicAssumption))
constructUpdatedVariantModel(uniqueID, applyToSite(phenotypes, genotypes, allelicAssumption))
} catch {
case error: SingularMatrixException => throw new SingularMatrixException()
}
mergeWith(batchVariantModel)
}

def constructVariantModel(variantId: String,
updatedGeneticParameterStandardError: Double,
updatedPValue: Double,
updatedWeights: List[Double],
updatedNumSamples: Int): LogisticVariantModel = {
/**
* Creates an updated LogisticVariantModel from the current model with a new
* Association object on the specified parameters.
*
* @param variantId Variant Id of the new VariantModel
* @param updatedGeneticParameterStandardError New geneticParameterStandardError
* @param updatedPValue New pValue
* @param updatedWeights New weights
* @param updatedNumSamples New numSamples
* @return Returns a new LogisticVariantModel
*/
def constructUpdatedVariantModel(variantId: String,
updatedGeneticParameterStandardError: Double,
updatedPValue: Double,
updatedWeights: List[Double],
updatedNumSamples: Int): LogisticVariantModel = {

val association = LogisticAssociation(weights = updatedWeights,
geneticParameterStandardError = updatedGeneticParameterStandardError,
Expand All @@ -135,8 +146,16 @@ case class LogisticVariantModel(uniqueID: String,
phaseSetId)
}

def constructVariantModel(variantId: String,
association: LogisticAssociation): LogisticVariantModel = {
/**
* Creates an updated LogisticVariantModel from the current model to contain
* the input Association object.
*
* @param variantId VariantId of the new VariantModel
* @param association New association object
* @return Returns a new LogisticVariantModel
*/
def constructUpdatedVariantModel(variantId: String,
association: LogisticAssociation): LogisticVariantModel = {
LogisticVariantModel(variantId,
association,
phenotype,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
package org.bdgenomics.gnocchi.models.variant

import org.bdgenomics.gnocchi.GnocchiFunSuite
import org.bdgenomics.gnocchi.primitives.association.LinearAssociation
import org.scalactic.Tolerance._

class LinearVariantModelSuite extends GnocchiFunSuite {

sparkTest("Test constructUpdatedVariantModel works") {
val assoc = LinearAssociation(ssDeviations = 0.5,
ssResiduals = 0.5,
geneticParameterStandardError = 0.5,
tStatistic = 0.5,
residualDegreesOfFreedom = 2,
pValue = 0.5,
weights = List(0.5, 0.5),
numSamples = 10)

val variantModel = LinearVariantModel("rs123456", assoc, "", 1, 1, "A", "C", "")

val newVariantModel = variantModel.constructUpdatedVariantModel("rs234567",
0.1,
0.2,
0.3,
0.4,
100,
0.6,
List(0.7, 0.8),
500)

// Assert that all values in the LinearVariantModel object match expected
// The following values should be updated given the new parameters
assert(newVariantModel.uniqueID === "rs234567")
assert(newVariantModel.association.ssDeviations === 0.1)
assert(newVariantModel.association.ssResiduals === 0.2)
assert(newVariantModel.association.geneticParameterStandardError === 0.3)
assert(newVariantModel.association.tStatistic === 0.4)
assert(newVariantModel.association.residualDegreesOfFreedom === 100)
assert(newVariantModel.association.pValue === 0.6)
assert(newVariantModel.association.weights === List(0.7, 0.8))
assert(newVariantModel.association.numSamples === 500)

// The following values should match the original ones
assert(newVariantModel.phenotype === "")
assert(newVariantModel.chromosome === 1)
assert(newVariantModel.position === 1)
assert(newVariantModel.referenceAllele === "A")
assert(newVariantModel.alternateAllele === "C")
assert(newVariantModel.allelicAssumption === "")
}

sparkTest("Test constructUpdatedVariantModel with association parameter works") {
val assoc = LinearAssociation(ssDeviations = 0.5,
ssResiduals = 0.5,
geneticParameterStandardError = 0.5,
tStatistic = 0.5,
residualDegreesOfFreedom = 2,
pValue = 0.5,
weights = List(0.5, 0.5),
numSamples = 10)

val variantModel = LinearVariantModel("rs123456", assoc, "", 1, 1, "A", "C", "")

val newAssoc = LinearAssociation(ssDeviations = 0.1,
ssResiduals = 0.2,
geneticParameterStandardError = 0.3,
tStatistic = 0.4,
residualDegreesOfFreedom = 100,
pValue = 0.6,
weights = List(0.7, 0.8),
numSamples = 500)

val newVariantModel = variantModel.constructUpdatedVariantModel("rs234567", newAssoc)

// Assert that all values in the LinearVariantModel object match expected
// The following values should be updated given the new parameters
assert(newVariantModel.uniqueID === "rs234567")
assert(newVariantModel.association.ssDeviations === 0.1)
assert(newVariantModel.association.ssResiduals === 0.2)
assert(newVariantModel.association.geneticParameterStandardError === 0.3)
assert(newVariantModel.association.tStatistic === 0.4)
assert(newVariantModel.association.residualDegreesOfFreedom === 100)
assert(newVariantModel.association.pValue === 0.6)
assert(newVariantModel.association.weights === List(0.7, 0.8))
assert(newVariantModel.association.numSamples === 500)

// The following values should match the original ones
assert(newVariantModel.phenotype === "")
assert(newVariantModel.chromosome === 1)
assert(newVariantModel.position === 1)
assert(newVariantModel.referenceAllele === "A")
assert(newVariantModel.alternateAllele === "C")
assert(newVariantModel.allelicAssumption === "")
}

sparkTest("Test LinearVariantModel.mergeWith works") {
val firstAssoc = LinearAssociation(ssDeviations = 0.5,
ssResiduals = 0.5,
geneticParameterStandardError = 0.5,
tStatistic = 0.5,
residualDegreesOfFreedom = 2,
pValue = 0.5,
weights = List(0.5, 0.5),
numSamples = 10)

val firstVariantModel = LinearVariantModel("rs123456", firstAssoc, "", 1, 1, "A", "C", "")

val secondAssoc = LinearAssociation(ssDeviations = 0.2,
ssResiduals = 0.3,
geneticParameterStandardError = 0.4,
tStatistic = 0.6,
residualDegreesOfFreedom = 1,
pValue = 0.6,
weights = List(0.7, 0.8),
numSamples = 10)

val secondVariantModel = LinearVariantModel("rs123456", secondAssoc, "", 1, 1, "A", "C", "")

// Merge firstVariantModel with secondVariantModel
val mergedVariantModel = firstVariantModel.mergeWith(secondVariantModel)

assert(mergedVariantModel.association.numSamples === 20)
assert(mergedVariantModel.association.weights === List(0.6, 0.65))
assert(mergedVariantModel.association.ssDeviations === 0.7)
assert(mergedVariantModel.association.ssResiduals === 0.8)
assert(mergedVariantModel.association.geneticParameterStandardError === 0.2519 +- 0.0001)
assert(mergedVariantModel.association.residualDegreesOfFreedom === 12)
assert(mergedVariantModel.association.tStatistic === 2.5796 +- 0.0001)
assert(mergedVariantModel.association.pValue === 0.0241 +- 0.0001)
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.bdgenomics.gnocchi.models.variant

import org.bdgenomics.gnocchi.GnocchiFunSuite
import org.bdgenomics.gnocchi.primitives.association.LogisticAssociation
import org.scalactic.Tolerance._

class LogisticVariantModelSuite extends GnocchiFunSuite {

sparkTest("Test constructUpdatedVariantModel works") {
val assoc = LogisticAssociation(geneticParameterStandardError = 0.5,
pValue = 0.5,
weights = List(0.5, 0.5),
numSamples = 10)

val variantModel = LogisticVariantModel("rs123456", assoc, "", 1, 1, "A", "C", "")
val newVariantModel = variantModel.constructUpdatedVariantModel("rs234567", 0.1, 0.2, List(0.3, 0.4), 1)

// Assert that all values in the LogisticVariantModel object match expected
// The following values should be updated given the new parameters
assert(newVariantModel.uniqueID === "rs234567")
assert(newVariantModel.association.geneticParameterStandardError === 0.1)
assert(newVariantModel.association.pValue === 0.2)
assert(newVariantModel.association.weights === List(0.3, 0.4))
assert(newVariantModel.association.numSamples === 1)

// The following values should match the original ones
assert(newVariantModel.phenotype === "")
assert(newVariantModel.chromosome === 1)
assert(newVariantModel.position === 1)
assert(newVariantModel.referenceAllele === "A")
assert(newVariantModel.alternateAllele === "C")
assert(newVariantModel.allelicAssumption === "")
}

sparkTest("Test constructUpdatedVariantModel with association parameter works") {
val assoc = LogisticAssociation(geneticParameterStandardError = 0.5,
pValue = 0.5,
weights = List(0.5, 0.5),
numSamples = 10)

val variantModel = LogisticVariantModel("rs123456", assoc, "", 1, 1, "A", "C", "")

val newAssoc = LogisticAssociation(geneticParameterStandardError = 0.1,
pValue = 0.2,
weights = List(0.3, 0.4),
numSamples = 1)

val newVariantModel = variantModel.constructUpdatedVariantModel("rs234567", newAssoc)

// Assert that all values in the LogisticVariantModel object match expected
// The following values should be updated given the new parameters
assert(newVariantModel.uniqueID === "rs234567")
assert(newVariantModel.association.geneticParameterStandardError === 0.1)
assert(newVariantModel.association.pValue === 0.2)
assert(newVariantModel.association.weights === List(0.3, 0.4))
assert(newVariantModel.association.numSamples === 1)

// The following values should match the original ones
assert(newVariantModel.phenotype === "")
assert(newVariantModel.chromosome === 1)
assert(newVariantModel.position === 1)
assert(newVariantModel.referenceAllele === "A")
assert(newVariantModel.alternateAllele === "C")
assert(newVariantModel.allelicAssumption === "")
}

sparkTest("Test LogisticVariantModel.mergeWith works") {
val firstAssoc = LogisticAssociation(geneticParameterStandardError = 0.5,
pValue = 0.5,
weights = List(0.5, 0.5),
numSamples = 10)
val firstVariantModel = LogisticVariantModel("rs123456", firstAssoc, "", 1, 1, "A", "C", "")

val secondAssoc = LogisticAssociation(geneticParameterStandardError = 0.2,
pValue = 0.2,
weights = List(0.2, 0.8),
numSamples = 5)
val secondVariantModel = LogisticVariantModel("rs123456", secondAssoc, "", 1, 1, "A", "C", "")

// Merge firstVariantModel with secondVariantModel
val mergedVariantModel = firstVariantModel.mergeWith(secondVariantModel)

// Assert values in the merged model match expected
assert(mergedVariantModel.association.numSamples === 15)

// Assert that new geneticParameterStandardError is the weighted average of
// the geneticParameterStandardErrors of the first and second model
assert(mergedVariantModel.association.geneticParameterStandardError === 0.4)

// Assert that the new weights are weighted averages of the weights from
// the first and second model
assert(mergedVariantModel.association.weights === List(0.4, 0.6))

// The new p-value should be 0.1336...
assert(mergedVariantModel.association.pValue === 0.1336 +- 0.0001)
}
}

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit 3eb19c1

Please sign in to comment.