-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Notebook with very minimal starting point
- Loading branch information
1 parent
6b35c8e
commit 77d84d1
Showing
2 changed files
with
107 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from ga4gh.vrs.dataproxy import create_dataproxy\n", | ||
"seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n", | ||
"seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)\n", | ||
"\n", | ||
"import os\n", | ||
"os.environ[\"UTA_DB_URL\"] = \"postgresql://anonymous:[email protected]:5432/uta/uta_20210129b\"\n", | ||
"os.environ[\"SEQREPO_ROOT_DIR\"] = \"https://services.genomicmedlab.org/seqrepo\"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"ename": "OSError", | ||
"evalue": "Unable to open SeqRepo directory /usr/local/share/seqrepo/latest", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", | ||
"Cell \u001b[0;32mIn[13], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mga4gh\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcat_vrs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore_models\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CategoricalVariant, Constraint, CopyCountConstraint, DefiningContextConstraint, CopyChangeConstraint\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mga4gh\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvrs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CopyNumberCount, CopyNumberChange, Range\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvariation\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m parsed_to_cn_var\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmatch_copy_number_count\u001b[39m(copy_number_count: CopyNumberCount,\n\u001b[1;32m 6\u001b[0m categorical_variation: CategoricalVariant):\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(member \u001b[38;5;241m==\u001b[39m copy_number_count \u001b[38;5;28;01mfor\u001b[39;00m member \u001b[38;5;129;01min\u001b[39;00m categorical_variation\u001b[38;5;241m.\u001b[39mmembers):\n", | ||
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/variation/main.py:68\u001b[0m\n\u001b[1;32m 64\u001b[0m TO_COPY_NUMBER_VARIATION \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTo Copy Number Variation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 65\u001b[0m ALIGNMENT_MAPPER \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAlignment Mapper\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 68\u001b[0m query_handler \u001b[38;5;241m=\u001b[39m \u001b[43mQueryHandler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;129m@asynccontextmanager\u001b[39m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlifespan\u001b[39m(app: FastAPI) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m AsyncGenerator: \u001b[38;5;66;03m# noqa: ARG001\u001b[39;00m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Configure FastAPI instance lifespan.\u001b[39;00m\n\u001b[1;32m 74\u001b[0m \n\u001b[1;32m 75\u001b[0m \u001b[38;5;124;03m :param app: FastAPI app instance\u001b[39;00m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;124;03m :return: async context handler\u001b[39;00m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n", | ||
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/variation/query.py:32\u001b[0m, in \u001b[0;36mQueryHandler.__init__\u001b[0;34m(self, gene_query_handler)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 25\u001b[0m gene_query_handler: GeneQueryHandler \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 26\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 27\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Initialize QueryHandler instance.\u001b[39;00m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;124;03m :param gene_query_handler: Gene normalizer query handler instance. If this is\u001b[39;00m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;124;03m provided, will use a current instance. If this is not provided, will create\u001b[39;00m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124;03m a new instance.\u001b[39;00m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 32\u001b[0m cool_seq_tool \u001b[38;5;241m=\u001b[39m \u001b[43mCoolSeqTool\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mseqrepo_access \u001b[38;5;241m=\u001b[39m cool_seq_tool\u001b[38;5;241m.\u001b[39mseqrepo_access\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m gene_query_handler:\n", | ||
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/cool_seq_tool/app.py:85\u001b[0m, in \u001b[0;36mCoolSeqTool.__init__\u001b[0;34m(self, transcript_file_path, lrg_refseqgene_path, mane_data_path, db_url, sr, force_local_files)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Initialize CoolSeqTool class.\u001b[39;00m\n\u001b[1;32m 48\u001b[0m \n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03mInitialization with default resource locations is straightforward:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;124;03m versions of static data files -- just use most recently available, if any\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m sr:\n\u001b[0;32m---> 85\u001b[0m sr \u001b[38;5;241m=\u001b[39m \u001b[43mSeqRepo\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mSEQREPO_ROOT_DIR\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mseqrepo_access \u001b[38;5;241m=\u001b[39m SeqRepoAccess(sr)\n\u001b[1;32m 87\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtranscript_mappings \u001b[38;5;241m=\u001b[39m TranscriptMappings(\n\u001b[1;32m 88\u001b[0m transcript_file_path\u001b[38;5;241m=\u001b[39mtranscript_file_path,\n\u001b[1;32m 89\u001b[0m lrg_refseqgene_path\u001b[38;5;241m=\u001b[39mlrg_refseqgene_path,\n\u001b[1;32m 90\u001b[0m from_local\u001b[38;5;241m=\u001b[39mforce_local_files,\n\u001b[1;32m 91\u001b[0m )\n", | ||
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/biocommons/seqrepo/seqrepo.py:120\u001b[0m, in \u001b[0;36mSeqRepo.__init__\u001b[0;34m(self, root_dir, writeable, upcase, translate_ncbi_namespace, check_same_thread, use_sequenceproxy, fd_cache_size)\u001b[0m\n\u001b[1;32m 117\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir):\n\u001b[0;32m--> 120\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnable to open SeqRepo directory \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir))\n\u001b[1;32m 122\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msequences \u001b[38;5;241m=\u001b[39m FastaDir(\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_seq_path,\n\u001b[1;32m 124\u001b[0m writeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_writeable,\n\u001b[1;32m 125\u001b[0m check_same_thread\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_same_thread,\n\u001b[1;32m 126\u001b[0m fd_cache_size\u001b[38;5;241m=\u001b[39mSEQREPO_FD_CACHE_MAXSIZE \u001b[38;5;28;01mif\u001b[39;00m SEQREPO_FD_CACHE_MAXSIZE \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m fd_cache_size\n\u001b[1;32m 127\u001b[0m )\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maliases \u001b[38;5;241m=\u001b[39m SeqAliasDB(\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_db_path,\n\u001b[1;32m 130\u001b[0m writeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_writeable,\n\u001b[1;32m 131\u001b[0m check_same_thread\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_same_thread,\n\u001b[1;32m 132\u001b[0m )\n", | ||
"\u001b[0;31mOSError\u001b[0m: Unable to open SeqRepo directory /usr/local/share/seqrepo/latest" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from ga4gh.cat_vrs.core_models import CategoricalVariant, Constraint, CopyCountConstraint, DefiningContextConstraint, CopyChangeConstraint\n", | ||
"from ga4gh.vrs.models import CopyNumberCount, CopyNumberChange, Range, SequenceLocation\n", | ||
"\n", | ||
"def match_copy_number_count(copy_number_count: CopyNumberCount,\n", | ||
" categorical_variation: CategoricalVariant):\n", | ||
" if any(member == copy_number_count for member in categorical_variation.members):\n", | ||
" return True\n", | ||
"\n", | ||
" count_constraints = get_constraints_of_type(categorical_variation.constraints, CopyCountConstraint)\n", | ||
" if not all(check_overlap(copy_number_count.copies, constraint.copies) for constraint in count_constraints):\n", | ||
" return False\n", | ||
"\n", | ||
" location_constraints = get_constraints_of_type(categorical_variation.constraints, DefiningContextConstraint)\n", | ||
" for constraint in location_constraints:\n", | ||
" if isinstance(constraint.definingContext, SequenceLocation):\n", | ||
" if not constraint.definingContext.sequenceReference.id == \\\n", | ||
" copy_number_count.location.location.sequenceReference.id:\n", | ||
" return False\n", | ||
" \n", | ||
" # TODO: location coordinates\n", | ||
"\n", | ||
" else:\n", | ||
" pass\n", | ||
"\n", | ||
"def get_constraints_of_type(constraints_list: list[Constraint], constraint_type:type):\n", | ||
" return [c for c in constraints_list if isinstance(c, constraint_type)]\n", | ||
"\n", | ||
"def check_overlap(val1: int|range, val2: int|range):\n", | ||
" if isinstance(val1, Range):\n", | ||
" if isinstance(val2, Range):\n", | ||
" return val1[0] < val2[0] < val1[1] \\\n", | ||
" or val1[0] < val2[1] < val1[1]\n", | ||
" elif isinstance(val2, int):\n", | ||
" return val1[0] < val2 < val1[1]\n", | ||
" \n", | ||
" elif isinstance(val1, int):\n", | ||
" if isinstance(val2, Range):\n", | ||
" return val2[0] < val1 < val2[1]\n", | ||
" \n", | ||
" elif isinstance(val2, int):\n", | ||
" return val2 == val2" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "3.12", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.1" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
ga4gh.vrs[extras] | ||
variation-normalizer |