Skip to content

Commit

Permalink
feat: add va spec pydantic models (#6)
Browse files Browse the repository at this point in the history
close #3
  • Loading branch information
korikuzma authored Nov 13, 2024
1 parent fe006b5 commit 53f02ac
Show file tree
Hide file tree
Showing 10 changed files with 581 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/ga4gh-va-spec
url: https://pypi.org/p/ga4gh.va_spec
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing
steps:
Expand Down
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "submodules/va_spec"]
path = submodules/va_spec
url = https://github.com/ga4gh/va-spec
branch = 1.x
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ keywords = [
]
requires-python = ">=3.10"
dynamic = ["version"]
dependencies = []
dependencies = [
"ga4gh.vrs~=2.0.0a12",
"ga4gh.cat_vrs~=0.1.0",
"pydantic==2.*"
]

[project.optional-dependencies]
dev = [
Expand Down Expand Up @@ -132,7 +136,9 @@ ignore = [
# ANN102 - missing-type-cls
# S101 - assert
# B011 - assert-false
# N815 - mixed-case-variable-in-class-scope
"tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "B011"]
"src/ga4gh/va_spec/profiles/*" = ["ANN102", "N815"]

[tool.setuptools.packages.find]
where = ["src"]
Expand Down
44 changes: 44 additions & 0 deletions src/ga4gh/va_spec/profiles/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Package for VA-Spec Python implementation"""

from .assay_var_effect import (
AssayVariantEffectClinicalClassificationStatement,
AssayVariantEffectFunctionalClassificationStatement,
AssayVariantEffectMeasurementStudyResult,
AveClinicalClassification,
AveFunctionalClassification,
)
from .caf_study_result import CohortAlleleFrequencyStudyResult
from .var_path_stmt import PenetranceQualifier, VariantPathogenicityStatement
from .var_study_stmt import (
AlleleOriginQualifier,
AllelePrevalenceQualifier,
DiagnosticPredicate,
OncogenicPredicate,
PrognosticPredicate,
TherapeuticResponsePredicate,
VariantDiagnosticStudyStatement,
VariantOncogenicityStudyStatement,
VariantPrognosticStudyStatement,
VariantTherapeuticResponseStudyStatement,
)

__all__ = [
"AveFunctionalClassification",
"AveClinicalClassification",
"AssayVariantEffectFunctionalClassificationStatement",
"AssayVariantEffectClinicalClassificationStatement",
"AssayVariantEffectMeasurementStudyResult",
"CohortAlleleFrequencyStudyResult",
"PenetranceQualifier",
"VariantPathogenicityStatement",
"AlleleOriginQualifier",
"DiagnosticPredicate",
"OncogenicPredicate",
"PrognosticPredicate",
"TherapeuticResponsePredicate",
"AllelePrevalenceQualifier",
"VariantDiagnosticStudyStatement",
"VariantOncogenicityStudyStatement",
"VariantPrognosticStudyStatement",
"VariantTherapeuticResponseStudyStatement",
]
134 changes: 134 additions & 0 deletions src/ga4gh/va_spec/profiles/assay_var_effect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""VA Spec Assay Variant Effect statement and study result Profiles"""

from __future__ import annotations

from enum import Enum
from typing import Literal

from ga4gh.cat_vrs.core_models import CategoricalVariant
from ga4gh.core.entity_models import (
IRI,
Coding,
DataSet,
Method,
StatementBase,
StudyGroup,
StudyResult,
StudyResultBase,
)
from ga4gh.vrs.models import MolecularVariation
from pydantic import ConfigDict, Field


class AveFunctionalClassification(str, Enum):
"""The functional classification of the variant effect in the assay."""

NORMAL = "normal"
INDETERMINATE = "indeterminate"
ABNORMAL = "abnormal"


class AveClinicalClassification(str, Enum):
"""The clinical strength of evidence of the variant effect in the assay."""

PS3_STRONG = "PS3_Strong"
PS3_MODERATE = "PS3_Moderate"
PS3_SUPPORTING = "PS3_Supporting"
BS3_STRONG = "BS3_Strong"
BS3_MODERATE = "BS3_Moderate"
BS3_SUPPORTING = "BS3_Supporting"


class AssayVariantEffectFunctionalClassificationStatement(StatementBase):
"""A statement that assigns a functional classification to a variant effect from a functional assay."""

model_config = ConfigDict(use_enum_values=True)

type: Literal["AssayVariantEffectFunctionalClassificationStatement"] = Field(
"AssayVariantEffectFunctionalClassificationStatement",
description="MUST be 'AssayVariantEffectFunctionalClassificationStatement'.",
)
subjectVariant: MolecularVariation | CategoricalVariant | IRI = Field(
...,
description="A protein or genomic contextual or canonical molecular variant.",
)
predicate: Literal["hasAssayVariantEffectFor"] = Field(
"hasAssayVariantEffectFor",
description="The relationship declared to hold between the subject and the object of the Statement.",
)
objectAssay: IRI | Coding = Field(
...,
description="The assay that is evaluated for the variant effect. (e.g growth in haploid cell culture protein stability in fluorescence assay)",
)
classification: AveFunctionalClassification = Field(
...,
description="The functional classification of the variant effect in the assay.",
)
specifiedBy: Method | IRI | None = Field(
None,
description="The method that specifies the functional classification of the variant effect in the assay.",
)


class AssayVariantEffectClinicalClassificationStatement(StatementBase):
"""A statement that assigns a clinical strength of evidence to a variant effect from a functional assay."""

model_config = ConfigDict(use_enum_values=True)

type: Literal["AssayVariantEffectClinicalClassificationStatement"] = Field(
"AssayVariantEffectClinicalClassificationStatement",
description="MUST be 'AssayVariantEffectClinicalClassificationStatement'.",
)
subjectVariant: MolecularVariation | CategoricalVariant | IRI = Field(
...,
description="A protein or genomic contextual or canonical molecular variant.",
)
predicate: Literal["hasAssayVariantEffectFor"] = Field(
"hasAssayVariantEffectFor",
description="The relationship declared to hold between the subject and the object of the Statement.",
)
objectAssay: IRI | Coding = Field(
...,
description="The assay that is evaluated for the variant effect. (e.g growth in haploid cell culture protein stability in fluorescence assay)",
)
classification: AveClinicalClassification = Field(
...,
description="The clinical strength of evidence of the variant effect in the assay.",
)
specifiedBy: Method | IRI | None = Field(
None,
description="The method that specifies the clinical strength of evidence of the variant effect in the assay.",
)


class AssayVariantEffectMeasurementStudyResult(StudyResultBase):
"""A StudyResult that reports a variant effect score from a functional assay."""

model_config = ConfigDict(use_enum_values=True)

type: Literal["AssayVariantEffectMeasurementStudyResult"] = Field(
"AssayVariantEffectMeasurementStudyResult",
description="MUST be 'AssayVariantEffectMeasurementStudyResult'.",
)
componentResult: list[StudyResult] | None = Field(
None,
description="Another StudyResult comprised of data items about the same focus as its parent Result, but based on a more narrowly scoped analysis of the foundational data (e.g. an analysis based on data about a subset of the parent Results full study population) .",
)
studyGroup: StudyGroup | None = Field(
None,
description="A description of a specific group or population of subjects interrogated in the ResearchStudy that produced the data captured in the StudyResult.",
)
focusVariant: MolecularVariation | IRI | None = Field(
None,
description="The human mapped representation of the variant that is the subject of the Statement.",
)
score: float | None = Field(
None, description="The score of the variant effect in the assay."
)
specifiedBy: Method | IRI | None = Field(
None,
description="The assay that was used to measure the variant effect with all the various properties",
)
sourceDataSet: list[DataSet] | None = Field(
None, description="The full data set that this measurement is a part of"
)
49 changes: 49 additions & 0 deletions src/ga4gh/va_spec/profiles/caf_study_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""VA Spec Cohort Allele Frequency (population frequency) Study Result Standard Profile"""

from __future__ import annotations

from typing import Literal

from ga4gh.core.entity_models import (
DataSet,
StudyResult,
StudyResultBase,
)
from ga4gh.vrs.models import Allele
from pydantic import ConfigDict, Field


class CohortAlleleFrequencyStudyResult(StudyResultBase):
"""A StudyResult that reports measures related to the frequency of an Allele in a cohort"""

model_config = ConfigDict(use_enum_values=True)

type: Literal["CohortAlleleFrequencyStudyResult"] = Field(
"CohortAlleleFrequencyStudyResult",
description="MUST be 'CohortAlleleFrequencyStudyResult'.",
)
sourceDataSet: list[DataSet] | None = Field(
None,
description="The dataset from which the CohortAlleleFrequencyStudyResult was reported.",
)
focusAllele: Allele | str = Field(
...,
description="The specific subject or experimental unit in a Study that data in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.",
)
focusAlleleCount: int = Field(
..., description="The number of occurrences of the focusAllele in the cohort."
)
locusAlleleCount: int = Field(
...,
description="The number of occurrences of all alleles at the locus in the cohort (sometimes referred to as 'allele number')",
)
focusAlleleFrequency: float = Field(
..., description="The frequency of the focusAllele in the cohort."
)
cohort: list[StudyResult] = Field(
..., description="The cohort from which the frequency was derived."
)
subCohortFrequency: list[CohortAlleleFrequencyStudyResult] | None = Field(
None,
description="A list of CohortAlleleFrequency objects describing subcohorts of the cohort currently being described. This creates a recursive relationship and subcohorts can be further subdivided into more subcohorts. This enables, for example, the description of different ancestry groups and sexes among those ancestry groups.",
)
54 changes: 54 additions & 0 deletions src/ga4gh/va_spec/profiles/var_path_stmt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""VA Spec Variant Pathogenicity Statement Standard Profile"""

from enum import Enum
from typing import Literal

from ga4gh.cat_vrs.core_models import CategoricalVariant
from ga4gh.core.domain_models import Condition, Gene
from ga4gh.core.entity_models import IRI, Coding, StatementBase
from ga4gh.vrs.models import Variation
from pydantic import ConfigDict, Field


class PenetranceQualifier(str, Enum):
"""Reports the penetrance of the pathogenic effect - i.e. the extent to which the
variant impact is expressed by individuals carrying it as a measure of the
proportion of carriers exhibiting the condition.
"""

HIGH = "high"
LOW = "low"
RISK_ALLELE = "risk allele"


class VariantPathogenicityStatement(StatementBase):
"""A Statement describing the role of a variant in causing an inherited condition."""

model_config = ConfigDict(use_enum_values=True)

type: Literal["VariantPathogenicityStatement"] = Field(
"VariantPathogenicityStatement",
description="MUST be 'VariantPathogenicityStatement'.",
)
subjectVariant: Variation | CategoricalVariant | IRI = Field(
..., description="A variant that is the subject of the Statement."
)
predicate: Literal["isCausalFor"] = Field(
"isCausalFor",
description="The relationship declared to hold between the subject and the object of the Statement.",
)
objectCondition: Condition | IRI = Field(
..., description="The Condition for which the variant impact is stated."
)
penetranceQualifier: PenetranceQualifier | None = Field(
None,
description="Reports the penetrance of the pathogenic effect - i.e. the extent to which the variant impact is expressed by individuals carrying it as a measure of the proportion of carriers exhibiting the condition.",
)
modeOfInheritanceQualifier: list[Coding] | None = Field(
None,
description="Reports a pattern of inheritance expected for the pathogenic effect of the variant. Use HPO terms within the hierarchy of 'HP:0000005' (mode of inheritance) to specify.",
)
geneContextQualifier: Gene | IRI | None = Field(
None,
description="Reports the gene through which the pathogenic effect asserted for the variant is mediated (i.e. it is the variant's impact on this gene that is responsible for causing the condition).",
)
Loading

0 comments on commit 53f02ac

Please sign in to comment.