Skip to content

Commit

Permalink
Notebook with very minimal starting point
Browse files Browse the repository at this point in the history
  • Loading branch information
Mrinal-Thomas-Epic committed Nov 4, 2024
1 parent 6b35c8e commit 77d84d1
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 0 deletions.
105 changes: 105 additions & 0 deletions notebooks/cnv_matching.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"from ga4gh.vrs.dataproxy import create_dataproxy\n",
"seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n",
"seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)\n",
"\n",
"import os\n",
"os.environ[\"UTA_DB_URL\"] = \"postgresql://anonymous:[email protected]:5432/uta/uta_20210129b\"\n",
"os.environ[\"SEQREPO_ROOT_DIR\"] = \"https://services.genomicmedlab.org/seqrepo\"\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"ename": "OSError",
"evalue": "Unable to open SeqRepo directory /usr/local/share/seqrepo/latest",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[13], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mga4gh\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcat_vrs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore_models\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CategoricalVariant, Constraint, CopyCountConstraint, DefiningContextConstraint, CopyChangeConstraint\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mga4gh\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvrs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CopyNumberCount, CopyNumberChange, Range\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvariation\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m parsed_to_cn_var\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmatch_copy_number_count\u001b[39m(copy_number_count: CopyNumberCount,\n\u001b[1;32m 6\u001b[0m categorical_variation: CategoricalVariant):\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(member \u001b[38;5;241m==\u001b[39m copy_number_count \u001b[38;5;28;01mfor\u001b[39;00m member \u001b[38;5;129;01min\u001b[39;00m categorical_variation\u001b[38;5;241m.\u001b[39mmembers):\n",
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/variation/main.py:68\u001b[0m\n\u001b[1;32m 64\u001b[0m TO_COPY_NUMBER_VARIATION \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTo Copy Number Variation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 65\u001b[0m ALIGNMENT_MAPPER \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAlignment Mapper\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 68\u001b[0m query_handler \u001b[38;5;241m=\u001b[39m \u001b[43mQueryHandler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;129m@asynccontextmanager\u001b[39m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlifespan\u001b[39m(app: FastAPI) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m AsyncGenerator: \u001b[38;5;66;03m# noqa: ARG001\u001b[39;00m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Configure FastAPI instance lifespan.\u001b[39;00m\n\u001b[1;32m 74\u001b[0m \n\u001b[1;32m 75\u001b[0m \u001b[38;5;124;03m :param app: FastAPI app instance\u001b[39;00m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;124;03m :return: async context handler\u001b[39;00m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n",
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/variation/query.py:32\u001b[0m, in \u001b[0;36mQueryHandler.__init__\u001b[0;34m(self, gene_query_handler)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 25\u001b[0m gene_query_handler: GeneQueryHandler \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 26\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 27\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Initialize QueryHandler instance.\u001b[39;00m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;124;03m :param gene_query_handler: Gene normalizer query handler instance. If this is\u001b[39;00m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;124;03m provided, will use a current instance. If this is not provided, will create\u001b[39;00m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124;03m a new instance.\u001b[39;00m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 32\u001b[0m cool_seq_tool \u001b[38;5;241m=\u001b[39m \u001b[43mCoolSeqTool\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mseqrepo_access \u001b[38;5;241m=\u001b[39m cool_seq_tool\u001b[38;5;241m.\u001b[39mseqrepo_access\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m gene_query_handler:\n",
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/cool_seq_tool/app.py:85\u001b[0m, in \u001b[0;36mCoolSeqTool.__init__\u001b[0;34m(self, transcript_file_path, lrg_refseqgene_path, mane_data_path, db_url, sr, force_local_files)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Initialize CoolSeqTool class.\u001b[39;00m\n\u001b[1;32m 48\u001b[0m \n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03mInitialization with default resource locations is straightforward:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;124;03m versions of static data files -- just use most recently available, if any\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m sr:\n\u001b[0;32m---> 85\u001b[0m sr \u001b[38;5;241m=\u001b[39m \u001b[43mSeqRepo\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mSEQREPO_ROOT_DIR\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mseqrepo_access \u001b[38;5;241m=\u001b[39m SeqRepoAccess(sr)\n\u001b[1;32m 87\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtranscript_mappings \u001b[38;5;241m=\u001b[39m TranscriptMappings(\n\u001b[1;32m 88\u001b[0m transcript_file_path\u001b[38;5;241m=\u001b[39mtranscript_file_path,\n\u001b[1;32m 89\u001b[0m lrg_refseqgene_path\u001b[38;5;241m=\u001b[39mlrg_refseqgene_path,\n\u001b[1;32m 90\u001b[0m from_local\u001b[38;5;241m=\u001b[39mforce_local_files,\n\u001b[1;32m 91\u001b[0m )\n",
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/biocommons/seqrepo/seqrepo.py:120\u001b[0m, in \u001b[0;36mSeqRepo.__init__\u001b[0;34m(self, root_dir, writeable, upcase, translate_ncbi_namespace, check_same_thread, use_sequenceproxy, fd_cache_size)\u001b[0m\n\u001b[1;32m 117\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir):\n\u001b[0;32m--> 120\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnable to open SeqRepo directory \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir))\n\u001b[1;32m 122\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msequences \u001b[38;5;241m=\u001b[39m FastaDir(\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_seq_path,\n\u001b[1;32m 124\u001b[0m writeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_writeable,\n\u001b[1;32m 125\u001b[0m check_same_thread\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_same_thread,\n\u001b[1;32m 126\u001b[0m fd_cache_size\u001b[38;5;241m=\u001b[39mSEQREPO_FD_CACHE_MAXSIZE \u001b[38;5;28;01mif\u001b[39;00m SEQREPO_FD_CACHE_MAXSIZE \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m fd_cache_size\n\u001b[1;32m 127\u001b[0m )\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maliases \u001b[38;5;241m=\u001b[39m SeqAliasDB(\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_db_path,\n\u001b[1;32m 130\u001b[0m writeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_writeable,\n\u001b[1;32m 131\u001b[0m check_same_thread\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_same_thread,\n\u001b[1;32m 132\u001b[0m )\n",
"\u001b[0;31mOSError\u001b[0m: Unable to open SeqRepo directory /usr/local/share/seqrepo/latest"
]
}
],
"source": [
"from ga4gh.cat_vrs.core_models import CategoricalVariant, Constraint, CopyCountConstraint, DefiningContextConstraint, CopyChangeConstraint\n",
"from ga4gh.vrs.models import CopyNumberCount, CopyNumberChange, Range, SequenceLocation\n",
"\n",
"def match_copy_number_count(copy_number_count: CopyNumberCount,\n",
" categorical_variation: CategoricalVariant):\n",
" if any(member == copy_number_count for member in categorical_variation.members):\n",
" return True\n",
"\n",
" count_constraints = get_constraints_of_type(categorical_variation.constraints, CopyCountConstraint)\n",
" if not all(check_overlap(copy_number_count.copies, constraint.copies) for constraint in count_constraints):\n",
" return False\n",
"\n",
" location_constraints = get_constraints_of_type(categorical_variation.constraints, DefiningContextConstraint)\n",
" for constraint in location_constraints:\n",
" if isinstance(constraint.definingContext, SequenceLocation):\n",
" if not constraint.definingContext.sequenceReference.id == \\\n",
" copy_number_count.location.location.sequenceReference.id:\n",
" return False\n",
" \n",
" # TODO: location coordinates\n",
"\n",
" else:\n",
" pass\n",
"\n",
"def get_constraints_of_type(constraints_list: list[Constraint], constraint_type:type):\n",
" return [c for c in constraints_list if isinstance(c, constraint_type)]\n",
"\n",
"def check_overlap(val1: int|range, val2: int|range):\n",
" if isinstance(val1, Range):\n",
" if isinstance(val2, Range):\n",
" return val1[0] < val2[0] < val1[1] \\\n",
" or val1[0] < val2[1] < val1[1]\n",
" elif isinstance(val2, int):\n",
" return val1[0] < val2 < val1[1]\n",
" \n",
" elif isinstance(val1, int):\n",
" if isinstance(val2, Range):\n",
" return val2[0] < val1 < val2[1]\n",
" \n",
" elif isinstance(val2, int):\n",
" return val2 == val2"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "3.12",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ga4gh.vrs[extras]
variation-normalizer

0 comments on commit 77d84d1

Please sign in to comment.