From 359d31a0bda17341467c07467cab874e926d5b2e Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 19 Sep 2024 15:16:43 +0200 Subject: [PATCH] update model and example --- database-example/README.md | 44 +-- database-example/UseDatabase.ipynb | 262 ++++++++++++--- database-example/create_tables.py | 2 +- gen.toml | 2 +- .../STRENDADB_light_20240904_fix.md | 315 ++++++++++++++++++ 5 files changed, 562 insertions(+), 63 deletions(-) create mode 100644 specifications/STRENDADB_light_20240904_fix.md diff --git a/database-example/README.md b/database-example/README.md index c0e96ca..2cc3c86 100644 --- a/database-example/README.md +++ b/database-example/README.md @@ -24,7 +24,7 @@ _Output_ ```bash ๐ŸŽ‰ Connected -๐Ÿš€ Creating tables for data model ../specifications/STRENDADB_light_20240208.md +๐Ÿš€ Creating tables for data model ../specifications/STRENDADB_light_20240904_fix.md โ”‚ โ”œโ”€โ”€ Table __model_meta__ not existing. Adding to DB! โ”œโ”€โ”€ Added table model 'DATA_MODEL' to __model_meta__ table @@ -33,16 +33,17 @@ _Output_ โ”œโ”€โ”€ Added table model 'Experiment' to __model_meta__ table โ”œโ”€โ”€ Added table model 'ProteinDescription' to __model_meta__ table โ”œโ”€โ”€ Added table model 'ProteinSource' to __model_meta__ table -โ”œโ”€โ”€ Added table model 'Modifications' to __model_meta__ table +โ”œโ”€โ”€ Added table model 'SequenceModifications' to __model_meta__ table +โ”œโ”€โ”€ Added table model 'PosttranslationalModifications' to __model_meta__ table โ”œโ”€โ”€ Added table model 'ProteinReaction' to __model_meta__ table โ”œโ”€โ”€ Added table model 'Dataset' to __model_meta__ table โ”œโ”€โ”€ Added table model 'AssayConditions' to __model_meta__ table โ”œโ”€โ”€ Added table model 'SmallAssayComponents' to __model_meta__ table โ”œโ”€โ”€ Added table model 'MacromolecularComponents' to __model_meta__ table โ”œโ”€โ”€ Added table model 'RoleOfComponent' to __model_meta__ table +โ”œโ”€โ”€ Added table model 'CompoundClassification' to __model_meta__ table โ”œโ”€โ”€ Added table model 'ResultsSet' to __model_meta__ table -โ”œโ”€โ”€ Added table model 'InitialKinetics' to __model_meta__ table -โ”œโ”€โ”€ Added table model 'Parameter' to __model_meta__ table +โ”œโ”€โ”€ Added table model 'InitialKineticsParameters' to __model_meta__ table โ”œโ”€โ”€ Added table model 'Activation' to __model_meta__ table โ”œโ”€โ”€ Added table model 'Inhibition' to __model_meta__ table โ”œโ”€โ”€ Created table 'Publication' @@ -50,16 +51,17 @@ _Output_ โ”œโ”€โ”€ Created table 'Experiment' โ”œโ”€โ”€ Created table 'ProteinDescription' โ”œโ”€โ”€ Created table 'ProteinSource' -โ”œโ”€โ”€ Created table 'Modifications' +โ”œโ”€โ”€ Created table 'SequenceModifications' +โ”œโ”€โ”€ Created table 'PosttranslationalModifications' โ”œโ”€โ”€ Created table 'ProteinReaction' โ”œโ”€โ”€ Created table 'Dataset' โ”œโ”€โ”€ Created table 'AssayConditions' โ”œโ”€โ”€ Created table 'SmallAssayComponents' โ”œโ”€โ”€ Created table 'MacromolecularComponents' โ”œโ”€โ”€ Created table 'RoleOfComponent' +โ”œโ”€โ”€ Created table 'CompoundClassification' โ”œโ”€โ”€ Created table 'ResultsSet' -โ”œโ”€โ”€ Created table 'InitialKinetics' -โ”œโ”€โ”€ Created table 'Parameter' +โ”œโ”€โ”€ Created table 'InitialKineticsParameters' โ”œโ”€โ”€ Created table 'Activation' โ”œโ”€โ”€ Created table 'Inhibition' โ”œโ”€โ”€ Added primary key 'id' to table Publication @@ -67,37 +69,37 @@ _Output_ โ”œโ”€โ”€ Added primary key 'id' to table Experiment โ”œโ”€โ”€ Added primary key 'id' to table ProteinDescription โ”œโ”€โ”€ Added primary key 'id' to table ProteinSource -โ”œโ”€โ”€ Added primary key 'id' to table Modifications +โ”œโ”€โ”€ Added primary key 'id' to table SequenceModifications +โ”œโ”€โ”€ Added primary key 'id' to table PosttranslationalModifications โ”œโ”€โ”€ Added primary key 'id' to table ProteinReaction โ”œโ”€โ”€ Added primary key 'id' to table Dataset โ”œโ”€โ”€ Added primary key 'id' to table AssayConditions โ”œโ”€โ”€ Added primary key 'id' to table SmallAssayComponents โ”œโ”€โ”€ Added primary key 'id' to table MacromolecularComponents โ”œโ”€โ”€ Added primary key 'id' to table RoleOfComponent +โ”œโ”€โ”€ Added primary key 'id' to table CompoundClassification โ”œโ”€โ”€ Added primary key 'id' to table ResultsSet -โ”œโ”€โ”€ Added primary key 'id' to table InitialKinetics -โ”œโ”€โ”€ Added primary key 'id' to table Parameter +โ”œโ”€โ”€ Added primary key 'id' to table InitialKineticsParameters โ”œโ”€โ”€ Added primary key 'id' to table Activation โ”œโ”€โ”€ Added primary key 'id' to table Inhibition โ”œโ”€โ”€ Added join table 'Publication_author_Author' โ”œโ”€โ”€ Added join table 'Publication_experiment_Experiment' โ”œโ”€โ”€ Added foreign key 'protein_assay__fk' to table 'Experiment' -โ”œโ”€โ”€ Added foreign key 'modifications__fk' to table 'ProteinDescription' -โ”œโ”€โ”€ Added foreign key 'source__fk' to table 'ProteinDescription' +โ”œโ”€โ”€ Added foreign key 'protein_sequence__fk' to table 'ProteinDescription' +โ”œโ”€โ”€ Added foreign key 'posttranslational_modifications__fk' to table 'ProteinDescription' +โ”œโ”€โ”€ Added foreign key 'protein_source__fk' to table 'ProteinDescription' โ”œโ”€โ”€ Added foreign key 'reaction__fk' to table 'ProteinDescription' -โ”œโ”€โ”€ Added join table 'ProteinDescription_characteristics_Dataset' +โ”œโ”€โ”€ Added join table 'ProteinDescription_protein_characterization_Dataset' โ”œโ”€โ”€ Added join table 'Dataset_assay_conditions_AssayConditions' -โ”œโ”€โ”€ Added join table 'Dataset_result_set_ResultsSet' -โ”œโ”€โ”€ Added foreign key 'initial_kinetics__fk' to table 'ResultsSet' +โ”œโ”€โ”€ Added join table 'Dataset_results_set_ResultsSet' +โ”œโ”€โ”€ Added foreign key 'small_assay_components__fk' to table 'AssayConditions' +โ”œโ”€โ”€ Added foreign key 'macromolecular_components__fk' to table 'AssayConditions' +โ”œโ”€โ”€ Added foreign key 'role__fk' to table 'MacromolecularComponents' +โ”œโ”€โ”€ Added foreign key 'initial_kinetic_parameters__fk' to table 'ResultsSet' โ”œโ”€โ”€ Added foreign key 'activation__fk' to table 'ResultsSet' โ”œโ”€โ”€ Added foreign key 'inhibition__fk' to table 'ResultsSet' -โ”œโ”€โ”€ Added foreign key 'km__fk' to table 'InitialKinetics' -โ”œโ”€โ”€ Added foreign key 'kcat__fk' to table 'InitialKinetics' -โ”œโ”€โ”€ Added foreign key 'v__fk' to table 'InitialKinetics' -โ”œโ”€โ”€ Added foreign key 'kcat_over_km__fk' to table 'InitialKinetics' -โ”œโ”€โ”€ Added foreign key 'v_over_km__fk' to table 'InitialKinetics' โ”‚ -โ•ฐโ”€โ”€ ๐ŸŽ‰ Created all tables for data model ../specifications/STRENDADB_light_20240208.md +โ•ฐโ”€โ”€ ๐ŸŽ‰ Created all tables for data model ../specifications/STRENDADB_light_20240904_fix.md ``` ## Working with the database diff --git a/database-example/UseDatabase.ipynb b/database-example/UseDatabase.ipynb index 6216d67..47830d3 100644 --- a/database-example/UseDatabase.ipynb +++ b/database-example/UseDatabase.ipynb @@ -2,7 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, + "id": "a11cc25d-8c46-4f11-adee-b7bccba862b3", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "!pip install rich\n", + "!pip install git+https://github.com/JR-1991/sdrdm-database.git" + ] + }, + { + "cell_type": "code", + "execution_count": 12, "id": "1c534e43-b832-4d0c-954d-9d6d7440814a", "metadata": {}, "outputs": [ @@ -15,6 +28,8 @@ } ], "source": [ + "import rich\n", + "\n", "from sdRDM import DataModel\n", "from sdrdm_database import DBConnector\n", "from sdrdm_database.dbconnector import SupportedBackends\n", @@ -32,18 +47,7 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "fcc59938-9f63-43d6-b34e-4d67acf5151f", - "metadata": {}, - "outputs": [], - "source": [ - "# Build Python classes\n", - "libStrenda = DataModel.from_markdown(\"../specifications/STRENDADB_light_20240208.md\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "f225b616-690c-4575-a1a8-d5f9fa5fc49e", "metadata": {}, "outputs": [ @@ -53,17 +57,17 @@ "['Activation',\n", " 'AssayConditions',\n", " 'Author',\n", + " 'CompoundClassification',\n", " 'Dataset',\n", " 'Dataset_assay_conditions_AssayConditions',\n", - " 'Dataset_result_set_ResultsSet',\n", + " 'Dataset_results_set_ResultsSet',\n", " 'Experiment',\n", " 'Inhibition',\n", - " 'InitialKinetics',\n", + " 'InitialKineticsParameters',\n", " 'MacromolecularComponents',\n", - " 'Modifications',\n", - " 'Parameter',\n", + " 'PosttranslationalModifications',\n", " 'ProteinDescription',\n", - " 'ProteinDescription_characteristics_Dataset',\n", + " 'ProteinDescription_protein_characterization_Dataset',\n", " 'ProteinReaction',\n", " 'ProteinSource',\n", " 'Publication',\n", @@ -71,23 +75,24 @@ " 'Publication_experiment_Experiment',\n", " 'ResultsSet',\n", " 'RoleOfComponent',\n", + " 'SequenceModifications',\n", " 'SmallAssayComponents',\n", " '__model_meta__']" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Check tables\n", + "# Check, if the tables have been created!\n", "db.connection.list_tables()" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 14, "id": "bb2c49e7-2f97-4c33-86c8-16b60c683808", "metadata": {}, "outputs": [ @@ -96,37 +101,214 @@ "output_type": "stream", "text": [ "Publication\n", - "โ”œโ”€โ”€ id [value=daf3d60d-de35-401a-99a8-8216f5116b4e]\n", + "โ”œโ”€โ”€ id [value=4c63272b-e99e-4e05-aa09-cf751fa4d62b]\n", + "โ”œโ”€โ”€ title [value=Test]\n", "โ”œโ”€โ”€ doi [value=SomeDOI]\n", "โ”œโ”€โ”€ pmid [value=SomePMID]\n", - "โ”œโ”€โ”€ author [value=None]\n", - "โ”‚ โ””โ”€โ”€ 0\n", - "โ”‚ โ””โ”€โ”€ Author\n", - "โ”‚ โ”œโ”€โ”€ id [value=8fc97783-978f-497b-8c83-662e0fcd8799]\n", - "โ”‚ โ”œโ”€โ”€ name [value=StrendaMan]\n", - "โ”‚ โ””โ”€โ”€ affiliation [value=Strenda]\n", - "โ””โ”€โ”€ experiment [value=None]\n", + "โ””โ”€โ”€ author [value=None]\n", " โ””โ”€โ”€ 0\n", - " โ””โ”€โ”€ Experiment\n", - " โ”œโ”€โ”€ id [value=f45cf9d6-8662-4f45-b649-b5e323b3e5d5]\n", - " โ”œโ”€โ”€ name [value=Experiment]\n", - " โ””โ”€โ”€ assay_type [value=Type]\n", + " โ””โ”€โ”€ Author\n", + " โ”œโ”€โ”€ id [value=02e113da-37b1-486f-8dfa-231ffcf11940]\n", + " โ”œโ”€โ”€ name [value=StrendaMan]\n", + " โ””โ”€โ”€ affiliation [value=Strenda]\n", "ClassNode(/Publication, class_name=None, constants={}, id=None, module=None, outer_type=None)\n" ] } ], "source": [ - "# Build dataset to insert later\n", - "dataset = libStrenda.Publication(doi=\"SomeDOI\", pmid=\"SomePMID\")\n", - "dataset.add_to_author(name=\"StrendaMan\", affiliation=\"Strenda\")\n", + "# Grab the \"Publication\" model from the Database\n", + "Publication = db.get_table_api(\"Publication\")\n", "\n", - "experiment = dataset.add_to_experiment(\n", - " name=\"Experiment\",\n", - " assay_type=\"Type\",\n", - ")\n", + "# Build dataset to insert later!\n", + "dataset = Publication(title=\"Test\", doi=\"SomeDOI\", pmid=\"SomePMID\")\n", + "dataset.add_to_author(name=\"StrendaMan\", affiliation=\"Strenda\")\n", "\n", "print(dataset.tree())" ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "daa29c3d-17b5-41e5-a48b-b81da377eb46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
โœ… Inserted 1 rows into the database.\n",
+       "
\n" + ], + "text/plain": [ + "โœ… Inserted \u001b[1;36m1\u001b[0m rows into the database.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Insert the row into the database\n", + "db.insert(dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "80aaf8db-2b7a-415d-8c33-b9748fc3e07a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
โœ… Inserted 20 rows into the database.\n",
+       "
\n" + ], + "text/plain": [ + "โœ… Inserted \u001b[1;36m20\u001b[0m rows into the database.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Let's create more random datasets\n", + "import random\n", + "\n", + "options = [\"Strenda\", \"Biocatalysis\", \"EnzymeML\", \"Workshop\"] \n", + "datasets = []\n", + "\n", + "for _ in range(20):\n", + " dataset = libStrenda.Publication(\n", + " title=random.choice(options),\n", + " doi=random.choice(options),\n", + " pmid=random.choice(options),\n", + " )\n", + " \n", + " dataset.add_to_author(name=\"StrendaMan\", affiliation=\"Strenda\")\n", + "\n", + " datasets.append(dataset)\n", + "\n", + "# Add all of them at once!\n", + "db.insert(*datasets)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a14bcacf-8998-4552-a94c-68af22b0f307", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
21 entries found!\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m21\u001b[0m entries found!\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Publication\n", + "โ”œโ”€โ”€ id [value=00c65cff-21d0-493f-81aa-5d32d8992474]\n", + "โ”œโ”€โ”€ title [value=Test]\n", + "โ”œโ”€โ”€ doi [value=SomeDOI]\n", + "โ”œโ”€โ”€ pmid [value=SomePMID]\n", + "โ””โ”€โ”€ author [value=None]\n", + " โ””โ”€โ”€ 0\n", + " โ””โ”€โ”€ Author\n", + " โ”œโ”€โ”€ id [value=6bf4f0db-82eb-431e-9eef-139a83149bca]\n", + " โ”œโ”€โ”€ name [value=StrendaMan]\n", + " โ””โ”€โ”€ affiliation [value=Strenda]\n" + ] + }, + { + "data": { + "text/html": [ + "
ClassNode(/Publication, class_name=None, constants={}, id=None, module=None, outer_type=None)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mClassNode\u001b[0m\u001b[1m(\u001b[0m\u001b[35m/\u001b[0m\u001b[95mPublication\u001b[0m, \u001b[33mclass_name\u001b[0m=\u001b[3;35mNone\u001b[0m, \u001b[33mconstants\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[33mid\u001b[0m=\u001b[3;35mNone\u001b[0m, \u001b[33mmodule\u001b[0m=\u001b[3;35mNone\u001b[0m, \u001b[33mouter_type\u001b[0m=\u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Lets fetch them from the database!\n", + "result = db.get(db.get_table_api(\"Publication\"))\n", + "\n", + "# Should be 21! Lets inspect the amount and the first entry\n", + "rich.print(f\"{len(result)} entries found!\")\n", + "rich.print(result[0].tree())" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "931a7297-ca7a-4551-abaf-02bab72351d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n",
+       "โ”ƒ title        โ”ƒ doi          โ”ƒ pmid         โ”ƒ id                                   โ”ƒ\n",
+       "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n",
+       "โ”‚ !string      โ”‚ string       โ”‚ string       โ”‚ !string                              โ”‚\n",
+       "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+       "โ”‚ Test         โ”‚ SomeDOI      โ”‚ SomePMID     โ”‚ 00c65cff-21d0-493f-81aa-5d32d8992474 โ”‚\n",
+       "โ”‚ Workshop     โ”‚ Biocatalysis โ”‚ Biocatalysis โ”‚ 0b345fa6-7b82-4544-ac66-f08f3e91cfee โ”‚\n",
+       "โ”‚ Workshop     โ”‚ Biocatalysis โ”‚ Workshop     โ”‚ 0f25b517-1a5e-4da8-8dc9-7b5a87658832 โ”‚\n",
+       "โ”‚ EnzymeML     โ”‚ Workshop     โ”‚ Strenda      โ”‚ 20ac6092-8697-4714-a120-9e476d3a613f โ”‚\n",
+       "โ”‚ Workshop     โ”‚ Strenda      โ”‚ Workshop     โ”‚ 267b9093-0252-47b8-96f2-5fae11398c44 โ”‚\n",
+       "โ”‚ Workshop     โ”‚ Strenda      โ”‚ Strenda      โ”‚ 289c4ab5-1677-4c7b-886a-d84b88b164a3 โ”‚\n",
+       "โ”‚ Workshop     โ”‚ EnzymeML     โ”‚ EnzymeML     โ”‚ 3b63d1dc-a6bd-4fb1-a65a-ae6af97712cc โ”‚\n",
+       "โ”‚ Biocatalysis โ”‚ Biocatalysis โ”‚ EnzymeML     โ”‚ 3da1b160-5e29-47bb-9912-3d32193b2486 โ”‚\n",
+       "โ”‚ Strenda      โ”‚ Biocatalysis โ”‚ Workshop     โ”‚ 44f9a8af-c387-4a65-9761-de63ffbc75e0 โ”‚\n",
+       "โ”‚ Strenda      โ”‚ Strenda      โ”‚ Biocatalysis โ”‚ 4d108d69-4f02-4ada-a506-52198f7e64c7 โ”‚\n",
+       "โ”‚ โ€ฆ            โ”‚ โ€ฆ            โ”‚ โ€ฆ            โ”‚ โ€ฆ                                    โ”‚\n",
+       "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n",
+       "
\n" + ], + "text/plain": [ + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n", + "โ”ƒ\u001b[1m \u001b[0m\u001b[1mtitle\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mdoi\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mpmid\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mid\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0mโ”ƒ\n", + "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n", + "โ”‚ \u001b[2m!string\u001b[0m โ”‚ \u001b[2mstring\u001b[0m โ”‚ \u001b[2mstring\u001b[0m โ”‚ \u001b[2m!string\u001b[0m โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ \u001b[32mTest \u001b[0m โ”‚ \u001b[32mSomeDOI \u001b[0m โ”‚ \u001b[32mSomePMID \u001b[0m โ”‚ \u001b[32m00c65cff-21d0-493f-81aa-5d32d8992474\u001b[0m โ”‚\n", + "โ”‚ \u001b[32mWorkshop \u001b[0m โ”‚ \u001b[32mBiocatalysis\u001b[0m โ”‚ \u001b[32mBiocatalysis\u001b[0m โ”‚ \u001b[32m0b345fa6-7b82-4544-ac66-f08f3e91cfee\u001b[0m โ”‚\n", + "โ”‚ \u001b[32mWorkshop \u001b[0m โ”‚ \u001b[32mBiocatalysis\u001b[0m โ”‚ \u001b[32mWorkshop \u001b[0m โ”‚ \u001b[32m0f25b517-1a5e-4da8-8dc9-7b5a87658832\u001b[0m โ”‚\n", + "โ”‚ \u001b[32mEnzymeML \u001b[0m โ”‚ \u001b[32mWorkshop \u001b[0m โ”‚ \u001b[32mStrenda \u001b[0m โ”‚ \u001b[32m20ac6092-8697-4714-a120-9e476d3a613f\u001b[0m โ”‚\n", + "โ”‚ \u001b[32mWorkshop \u001b[0m โ”‚ \u001b[32mStrenda \u001b[0m โ”‚ \u001b[32mWorkshop \u001b[0m โ”‚ \u001b[32m267b9093-0252-47b8-96f2-5fae11398c44\u001b[0m โ”‚\n", + "โ”‚ \u001b[32mWorkshop \u001b[0m โ”‚ \u001b[32mStrenda \u001b[0m โ”‚ \u001b[32mStrenda \u001b[0m โ”‚ \u001b[32m289c4ab5-1677-4c7b-886a-d84b88b164a3\u001b[0m โ”‚\n", + "โ”‚ \u001b[32mWorkshop \u001b[0m โ”‚ \u001b[32mEnzymeML \u001b[0m โ”‚ \u001b[32mEnzymeML \u001b[0m โ”‚ \u001b[32m3b63d1dc-a6bd-4fb1-a65a-ae6af97712cc\u001b[0m โ”‚\n", + "โ”‚ \u001b[32mBiocatalysis\u001b[0m โ”‚ \u001b[32mBiocatalysis\u001b[0m โ”‚ \u001b[32mEnzymeML \u001b[0m โ”‚ \u001b[32m3da1b160-5e29-47bb-9912-3d32193b2486\u001b[0m โ”‚\n", + "โ”‚ \u001b[32mStrenda \u001b[0m โ”‚ \u001b[32mBiocatalysis\u001b[0m โ”‚ \u001b[32mWorkshop \u001b[0m โ”‚ \u001b[32m44f9a8af-c387-4a65-9761-de63ffbc75e0\u001b[0m โ”‚\n", + "โ”‚ \u001b[32mStrenda \u001b[0m โ”‚ \u001b[32mStrenda \u001b[0m โ”‚ \u001b[32mBiocatalysis\u001b[0m โ”‚ \u001b[32m4d108d69-4f02-4ada-a506-52198f7e64c7\u001b[0m โ”‚\n", + "โ”‚ \u001b[2mโ€ฆ\u001b[0m โ”‚ \u001b[2mโ€ฆ\u001b[0m โ”‚ \u001b[2mโ€ฆ\u001b[0m โ”‚ \u001b[2mโ€ฆ\u001b[0m โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# You can also view individual tables\n", + "db.connection.table(\"Publication\")" + ] } ], "metadata": { diff --git a/database-example/create_tables.py b/database-example/create_tables.py index c6b351b..6a08c82 100644 --- a/database-example/create_tables.py +++ b/database-example/create_tables.py @@ -12,4 +12,4 @@ ) # Create the tables in the database -db.create_tables("../specifications/STRENDADB_light_20240208.md") +db.create_tables("../specifications/STRENDADB_light_20240904_fix.md") diff --git a/gen.toml b/gen.toml index f25045e..679b648 100644 --- a/gen.toml +++ b/gen.toml @@ -1,7 +1,7 @@ [meta] name = "STRENDA-DB" description = "This is a code and schema generator for Strenda Biocatalysis." -paths = ["specifications/STRENDADB_light_20240208.md"] +paths = ["specifications/STRENDADB_light_20240904_fix.md"] [generate] xml-schema = { out = "schemes/strenda.xsd" } diff --git a/specifications/STRENDADB_light_20240904_fix.md b/specifications/STRENDADB_light_20240904_fix.md new file mode 100644 index 0000000..1917e9b --- /dev/null +++ b/specifications/STRENDADB_light_20240904_fix.md @@ -0,0 +1,315 @@ +# STRENDA DB + +## Administration + +## Data + +### Publication + +This is the publication that includes the experimental results of an enzyme kinetics characterization. + +- **title** + - Type: string + - Description: title of the publication +- doi + - Type: string + - Description: identifier of the publication +- pmid + - Type: string + - Description: identifier of the publication as indexed in PubMed +- author + - Type: Author[] +- experiment + - Type: Experiment[] + +### Author + +- **name** + - Type: string + - Description: name of the author. Nomenclature: Family name initials first name +- affiliation + - Type: string + - Description: name of organizaton, department, city, country +- email + - Type: string + - Description: email address +- orcid + - Type: string + - Description: ORCID idenfier + +### Experiment + +- name_of_experiment + - Type: string + - Description: name of the experiment, just for internal purposes +- type_of_assay + - Type: string + - Description: name of the assay +- direction_of_the_assay + - Type: string +- definition_of_the_compound_monitored + - Type: string + - Description: +- continuously_monitored + - Type: string + - Description: selection of the stopping procedure +- directly_monitored + - Type: string +- protein_assay + - Type: ProteinDescription + +### ProteinDescription + +- uniprotkb_ac + - Type: string + - Description: identifier obtained from UniProtKB +- protein_name + - Type: string + - Description: name of the protein as of UniProtKB +- protein_sequence + - Type: SequenceModifications + - Description: amino acid sequence as from UniProtKB +- posttranslational_modifications + - Type: PosttranslationalModifications +- protein_source + - Type: ProteinSource +- reaction + - Type: ProteinReaction +- protein_characterization + - Type: Dataset[] + +### ProteinSource + +- expression_system + - Type: string + - Description: Description of expression system if heterologously expressed +- organism + - Type: string + - Description: as in UniProtKB provided, name of organism +- taxon_id + - Type: string + - Description: ID as obtained from NCBI Taxonomy +- strain + - Type: string + - Description: name or identifier of the strain +- cell_type + - Type: string + - Description: determination of the cell in which the protein is expressed +- tissue + - Type: string + - Description: determination of the tissue, ideally BTO is used +- localization + - Type: string + - Description: determination of the localization (membran, cytosol, etc.) + +### SequenceModifications + +- sequence_modification + - Type: string + - Description: modified amino acid sequence +- specification_of_the_type_of_modification + - Type: string + - Description: Description of the types of modifications + +### PosttranslationalModifications + +- determination_of_ptm + - Type: string + - Description: Phosphorylation, Glycosylation, Acetylation, Hydroxylation, Methylation, Other + +### ProteinReaction + +- ec_number + - Type: string + - Description: EC number obtained from ExplorEnz +- reaction_as_in_explorenz + - Type: string + - Description: reaction as described in ExplorEnz +- comment + - Type: string + - Description: comment on the protein reaction if not properly described in ExplorEnz + +### Dataset + +- name + - Type: string + - Description: name of the dataset +- assay_conditions + - Type: AssayConditions[] +- results_set + - Type: ResultsSet[] +- doi + - Type: string + - Description: DOI of the dataset + +### AssayConditions + +- small_assay_components + - Type: SmallAssayComponents + - Description: Description of the compound used in the assay +- macromolecular_components + - Type: MacromolecularComponents + - Description: Description of the macromolecular components +- concentration_of_the_assayed_protein + - Type: string + - Description: value with unit +- description_of_concentration_measurement + - Type: string + - Description: free text field +- ph + - Type: string + - Description: value and unit +- pd + - Type: string + - Description: value and unit +- temperature + - Type: string + - Description: value and unit, K and Celsius + +### SmallAssayComponents + +- role + - Type: string + - Description: role in the assay, i.e. substrate, product, etc. +- initial_concentration_fixed + - Type: string + - Description: unit, value +- initial_concentration_varied + - Type: string + - Description: concentration range, value, unit +- compound_name + - Type: string + - Description: name as obtained from PubChem +- inchi + - Type: string + - Description: InChi string +- iupac + - Type: string + - Description: IUPAC name +- chebi_id + - Type: string + - Description: identifier +- pubchem_cid + - Type: string + - Description: identifier + +### MacromolecularComponents + +- role + - Type: RoleOfComponent + - Description: role in the assay, i.e. substrate, product, etc. +- initial_concentration_fixed + - Type: string + - Description: unit, value +- initial_concentration_varied + - Type: string + - Description: concentration range, value, unit +- classification + - Type: string + - Description: protein, carbohydrate, DNA, RNA, etc. +- compound_name + - Type: string + - Description: name as obtained from PubChem +- inchi + - Type: string + - Description: InChi string +- iupac + - Type: string + - Description: IUPAC name +- database_used + - Type: string + - Description: name of the database +- identifier + - Type: string + - Description: identifier + +### RoleOfComponent + +- substrate + - Type: string + - Description: substrate of chemical reaction +- product + - Type: string + - Description: production of chemical reaction +- inhibitor + - Type: string + - Description: component that inhibits the chemical reaction +- activator + - Type: string + - Description: component that activates/enhances the chemical reaction + +### CompoundClassification + +- protein + - Type: string +- carbohydrate + - Type: string +- dna + - Type: string +- rna + - Type: string +- lipid + - Type: string +- other + - Type: string + +### ResultsSet + +- name + - Type: string + - Description: Name of the results set +- initial_kinetic_parameters + - Type: InitialKineticsParameters + - Description: Km, kcat, V, kcat/Km, V/Km +- activation + - Type: Activation +- inhibition + - Type: Inhibition + +### InitialKineticsParameters + +- km + - Type: string + - descriptor: value, SE, unit +- kcat + - Type: string + - descriptor: value, SE, unit +- v + - Type: string + - descriptor: value, SE, unit +- kcat_over_km + - Type: string + - descriptor: value, SE, unit +- v_over_km + - Type: string + - descriptor: value, SE, unit + +### Activation + +- affinity_constant + - Type: string + - descriptor: value, SE, unit, true or apparent +- velocity_no_activator + - Type: string + - Description: velocity without activator +- velocity_at_max_concentration + - Type: string + - Description: velocity at maximum concentration of activator +- saturation + - Type: string + - Description: statement whether the concentration was saturating or not +- incluence_no_activator + - Type: string + - Description: Influence on MM kinetics. Schema: Value, SE, Unit, true/apparent +- influence_at_maxconcentration + - Type: string + - Description: Influence on MM kinetics. Schema: Value, SE, Unit, true/apparent + +### Inhibition + +- reversibility_yes + - Type: string + - Description: inhbition type - competitive, uncompetitive, mixed, kic, SE, Unit, Math function +- reversibility_no + - Type: string + - Description: ki, SE, Unit, Comment