From 683e50f751717538ddb2ecc2092632e743bc4295 Mon Sep 17 00:00:00 2001
From: cb-Hades <81743695+cb-Hades@users.noreply.github.com>
Date: Fri, 9 Feb 2024 12:07:59 +0100
Subject: [PATCH] Started reconstructing extension module

- progress found in the notebooks under dev
#1, #2
---
 .gitignore                      |   2 +
 dev/CB_extension_remod.ipynb    | 551 ++++++++++++++++++++++++++++++++
 dev/CB_under_construction.ipynb |  41 +++
 specimen.egg-info/SOURCES.txt   |   1 -
 4 files changed, 594 insertions(+), 1 deletion(-)
 create mode 100644 dev/CB_extension_remod.ipynb
 create mode 100644 dev/CB_under_construction.ipynb

diff --git a/.gitignore b/.gitignore
index ad233c2..00b8e0d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,5 @@ data/media.csv
 data/media.numbers
 
 example/
+
+build/
diff --git a/dev/CB_extension_remod.ipynb b/dev/CB_extension_remod.ipynb
new file mode 100644
index 0000000..e398f78
--- /dev/null
+++ b/dev/CB_extension_remod.ipynb
@@ -0,0 +1,551 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cobra\n",
+    "import pandas as pd\n",
+    "\n",
+    "from random import choice \n",
+    "from string import ascii_uppercase, digits\n",
+    "from typing import Literal\n",
+    "\n",
+    "from refinegems.io import load_a_table_from_database, load_model_cobra"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mapped_genes_path = '/Users/carolinb/Documents/104 Masterthesis/klebsiella-pipeline/example/thesis/Kp_std/03_refinement/step1-extension/genes_mapped.csv'\n",
+    "mg = pd.read_csv(mapped_genes_path)\n",
+    "mapped_genes_comp = map_BiGG_reactions(mapped_genes_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "draft_model_path = '/Users/carolinb/Documents/104 Masterthesis/klebsiella-pipeline/example/thesis/Kp_std/02_generate_draft_model/Kp_std_draft.xml'\n",
+    "draft_model = load_model_cobra(draft_model_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### generally useful functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# @TODO: docs\n",
+    "def create_random_id(model:cobra.Model, entity_type='reac', prefix='') -> str:\n",
+    "\n",
+    "    match type:\n",
+    "        case 'reac':\n",
+    "            all_ids = [_.id for _ in model.reactions]\n",
+    "        case 'meta':\n",
+    "            all_ids = [_.id for _ in model.metabolites]\n",
+    "\n",
+    "    prefix = f'{prefix}{entity_type}'\n",
+    "    var = ''.join(choice(ascii_uppercase + digits) for i in range(6))\n",
+    "    label = prefix + var\n",
+    "    j = 6\n",
+    "    \n",
+    "    while True:\n",
+    "        \n",
+    "        for i in range(36**6): # make sure it does not run endlessly\n",
+    "            if label in all_ids:\n",
+    "                label = prefix + ''.join(choice(ascii_uppercase + digits) for i in range(j))\n",
+    "            else:\n",
+    "                return label\n",
+    "            \n",
+    "        j = j + 1\n",
+    "\n",
+    "\n",
+    "# @TODO: \n",
+    "#     more namespace options\n",
+    "# @TODO: docs\n",
+    "# @TEST\n",
+    "def match_id_to_namespace(model_entity:[cobra.Reaction, cobra.Metabolite], namespace:Literal['BiGG']) -> None:\n",
+    "\n",
+    "    match model_entity:\n",
+    "\n",
+    "        # Reaction\n",
+    "        # --------\n",
+    "        case cobra.Reaction():\n",
+    "            match namespace:\n",
+    "\n",
+    "                case 'BiGG':\n",
+    "                    if 'bigg.reaction' in model_entity.annotation.keys():\n",
+    "                        model_entity.id = model_entity.annotation['bigg.reaction']\n",
+    "\n",
+    "                case _:\n",
+    "                    mes = f'Unknown input for namespace: {namespace}'\n",
+    "                    raise ValueError(mes)\n",
+    "                \n",
+    "        # Metabolite\n",
+    "        # ----------\n",
+    "        case cobra.Metabolite():\n",
+    "            match namespace:\n",
+    "\n",
+    "                case 'BiGG':\n",
+    "                    if 'bigg.metabolite' in model_entity.annotation.keys():\n",
+    "                        model_entity.id = model_entity.annotation['bigg.reaction'] + '_' + model_entity.compartment\n",
+    "\n",
+    "                case _:\n",
+    "                    mes = f'Unknown input for namespace: {namespace}'\n",
+    "                    raise ValueError(mes)\n",
+    "        # Error\n",
+    "        # -----\n",
+    "        case _:\n",
+    "            mes = f'Unknown type for model_entity: {type(model_entity)}'\n",
+    "            raise TypeError(mes)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### reworking functions for extension\n",
+    "\n",
+    "#### mapping"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# @NOTE changed\n",
+    "def map_Bigg_reactions_row(row, namespace):\n",
+    "    \"\"\"Map a single entry from the table in map_BiGG_reactions() to the BiGG reaction namespace.\n",
+    "\n",
+    "    :param row:       A single row of the table.\n",
+    "    :type  row:       pd.Series\n",
+    "    :param namespace: The BiGG reaction namespace table.\n",
+    "    :type  namespace: pd.DataFrame\n",
+    "    :returns:         The edited row.\n",
+    "    :rtype:           pd.Series\n",
+    "    \"\"\"\n",
+    "\n",
+    "    \"\"\"\n",
+    "    @TODO\n",
+    "        NOTE: only works for cases, where KEGG.reaction in row contains EXACTLY one entry\n",
+    "              in the rare case that multiple reactions belong to one enzyme, they are omitted\n",
+    "              in this search\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # match by EC number AND KEGG id\n",
+    "    matches = namespace.loc[namespace['EC Number'].str.contains(row['EC number']) & namespace['KEGG Reaction'].str.contains(row['KEGG.reaction'])]\n",
+    "\n",
+    "    # case 1 : no matches\n",
+    "    if matches.empty:\n",
+    "        return row\n",
+    "\n",
+    "    # case 2 : exactly one match\n",
+    "    elif len(matches) == 1:\n",
+    "        row['bigg_id'] = matches['id'].values[0]\n",
+    "\n",
+    "    # case 3 : multiple matches\n",
+    "    #          often due to reaction being in different compartments\n",
+    "    else:\n",
+    "        row['bigg_id'] = ' '.join(matches['id'].values)\n",
+    "\n",
+    "    return row\n",
+    "\n",
+    "\n",
+    "# @TEST : fitted to refinegems\n",
+    "# @CHECK : connections, e.g. input is now a param short \n",
+    "def map_BiGG_reactions(table_file):\n",
+    "    \"\"\"Map the output of map_to_KEGG() to a BiGG namespace file (rewritten-type, see auxilliaries).\n",
+    "\n",
+    "    :param table_file: The path to the saved table from running map_to_KEGG().\n",
+    "    :type  table_file: string\n",
+    "    :returns:          The table with an additional column for the mapping to BiGG reactions.\n",
+    "    :rtype:            pd.DataFrame\n",
+    "    \"\"\"\n",
+    "\n",
+    "    r_namespace = load_a_table_from_database('bigg_reactions', False)\n",
+    "\n",
+    "    table = pd.read_csv(table_file)\n",
+    "    table['bigg_id'] = pd.Series(dtype='str')\n",
+    "\n",
+    "    table = table.apply(lambda row: map_Bigg_reactions_row(row,r_namespace), axis=1)\n",
+    "\n",
+    "    return table\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### actual extension"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# @TODO\n",
+    "# @TEST\n",
+    "# @DOCS wrong\n",
+    "# UNDER CONSTRUCTION\n",
+    "def build_metabolite_mnx(metabolite, model, mnx_chem_prop, mnx_chem_xref, bigg_metabolites, namespace):\n",
+    "    \"\"\"Create or retrieve (from model) a metabolite based on its MetaNetX ID.\n",
+    "\n",
+    "    :param metabolite:        The MetaNetX ID of the metabolite.\n",
+    "    :type  metabolite:        string\n",
+    "    :param model:             The underlying genome-scale metabolic model.\n",
+    "    :type  model:             cobra.model\n",
+    "    :param mnx_chem_xref:     The chem_xref table from MetaNetX\n",
+    "    :type  mnx_chem_xref:     pd.DataFrame\n",
+    "    :param mnx_chem_prop:     The chem_prop table from MetaNetX\n",
+    "    :type  mnx_chem_prop:     pd.DataFrame\n",
+    "    :param bigg_metabolites:  The BiGG compound namespace table.\n",
+    "    :type  bigg_metabolites:  pd.DataFrame\n",
+    "    :returns:                 The retrieved or newly build metabolite.\n",
+    "    :rtype:                   cobra.Metabolite\n",
+    "    \"\"\"\n",
+    "\n",
+    "    metabolite_prop = mnx_chem_prop[mnx_chem_prop['ID']==metabolite]\n",
+    "    metabolite_anno = mnx_chem_xref[mnx_chem_xref['ID']==metabolite]\n",
+    "    model_mnx = [x.annotation['metanetx.chemical'] for x in model.metabolites if 'metanetx.chemical' in x.annotation]\n",
+    "\n",
+    "    # fast check if compound already in model\n",
+    "    # ------------------------------------------\n",
+    "    # @TODO ..........................................\n",
+    "        #   currently no checking for compartments\n",
+    "        #   first match will be taken (most often cytosol one)\n",
+    "        #   regardless of the compartment\n",
+    "        #.............................................\n",
+    "    # step 1: check if MetaNetX ID in model\n",
+    "    if metabolite in model_mnx:\n",
+    "        matches = [x.id for x in model.metabolites if 'metanetx.chemical' in x.annotation and x.annotation['metanetx.chemical']==metabolite]\n",
+    "\n",
+    "    # step 2: if yes, retrieve metabolite from model\n",
+    "    #  case 1: multiple matches found\n",
+    "        if len(matches) > 1:\n",
+    "            # ................\n",
+    "            # @TODO see above\n",
+    "            # ................\n",
+    "            match = model.metabolites.get_by_id(matches[0])\n",
+    "        #  case 2: only one match found\n",
+    "        else:\n",
+    "            match = model.metabolites.get_by_id(matches[0])\n",
+    "\n",
+    "        # step 3: add metabolite\n",
+    "        return match\n",
+    "\n",
+    "    # if not, create new metabolite\n",
+    "    # -----------------------------\n",
+    "    else:\n",
+    "\n",
+    "        # step 1: create a random metabolite ID\n",
+    "        # ...........................\n",
+    "        # @TODO : compartment problem \n",
+    "        # - does it have to be in the name?\n",
+    "        # ...........................\n",
+    "        new_metabolite = cobra.Metabolite(create_random_id(model, 'meta','SPECIMEN')) \n",
+    "\n",
+    "\n",
+    "        # step 2: add features\n",
+    "        # --------------------\n",
+    "        # @TODO ..........................................\n",
+    "        #   currently no checking for compartments\n",
+    "        #   defaults to c\n",
+    "        #   makes it difficult to add exchange reactions\n",
+    "        #.................................................\n",
+    "        new_metabolite.formula = metabolite_prop['formula'].iloc[0]\n",
+    "        new_metabolite.name = metabolite_prop['name'].iloc[0]\n",
+    "        new_metabolite.charge = metabolite_prop['charge'].iloc[0]\n",
+    "        new_metabolite.compartment = 'c'\n",
+    "\n",
+    "        # step 3: add notes\n",
+    "        # -----------------\n",
+    "        new_metabolite.notes['added via'] = 'metanetx.chemical'\n",
+    "\n",
+    "        # step 4: add annotations\n",
+    "        # -----------------------\n",
+    "        new_metabolite.annotation['metanetx.chemical'] = metabolite_prop['ID'].iloc[0]\n",
+    "        new_metabolite.annotation['chebi'] = metabolite_prop['reference'].iloc[0].upper()\n",
+    "        if not pd.isnull(metabolite_prop['InChIKey'].iloc[0]):\n",
+    "            new_metabolite.annotation['inchikey'] = metabolite_prop['InChIKey'].iloc[0].split('=')[1]\n",
+    "        for db in ['kegg.compound','metacyc.compound','seed.compound','bigg.metabolite']:\n",
+    "            db_matches = metabolite_anno[metabolite_anno['source'].str.contains(db)]\n",
+    "            if len(db_matches) == 1:\n",
+    "                 new_metabolite.annotation[db] = db_matches['source'].iloc[0].split(':',1)[1]\n",
+    "            elif len(db_matches) > 1:\n",
+    "                new_metabolite.annotation[db] = [m.split(':',1)[1] for m in db_matches['source'].tolist()]\n",
+    "\n",
+    "        # if no BiGG was found in MetaNetX, try reverse search in BiGG\n",
+    "        if metabolite in bigg_metabolites['MetaNetX (MNX) Chemical']:\n",
+    "            new_metabolite.annotation['bigg.metabolite'] =  bigg_metabolites[bigg_metabolites['MetaNetX (MNX) Chemical']==metabolite].iloc[0]\n",
+    "        \n",
+    "        # add additional information from bigg if possible    \n",
+    "        if 'bigg.metabolite' in new_metabolite.annotation.keys():\n",
+    "            bigg_information = bigg_metabolites[bigg_metabolites['bigg_id'].str.contains('|'.join(new_metabolite.annotation['bigg.metabolite']))]\n",
+    "            db_id_bigg = {'BioCyc':'biocyc', 'MetaNetX (MNX) Chemical':'metanetx.chemical','SEED Compound':'seed.compound','InChI Key':'inchikey'}\n",
+    "            for db in db_id_bigg:\n",
+    "                info = bigg_information[db].dropna().to_list()\n",
+    "                if len(info) > 0:\n",
+    "                    info = ','.join(info)\n",
+    "                    info = [x.strip() for x in info.split(',')] # make sure all entries are a separate list object\n",
+    "                    new_metabolite.annotation[db_id_bigg[db]] = info\n",
+    "\n",
+    "        # step 5: change ID according to namespace\n",
+    "        # ----------------------------------------\n",
+    "        match_id_to_namespace(new_metabolite,namespace)\n",
+    "       \n",
+    "        # step 6: re-check existence of ID in model\n",
+    "        # -----------------------------------------\n",
+    "        if new_metabolite.id in [_.id for _ in model.metabolites]:\n",
+    "            return model.metabolites.get_by_id(new_metabolite.id)\n",
+    "        \n",
+    "    return new_metabolite\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "# seems fine\n",
+    "def add_gene(model, reaction, row, first=False):\n",
+    "    \"\"\"Add a new gene to a genome-scale metabolic cobra model.\n",
+    "\n",
+    "    :param model:    The model.\n",
+    "    :type  model:    cobra.Model\n",
+    "    :param reaction: The reaction id to add the gene to.\n",
+    "    :type  reaction: string\n",
+    "    :param row:      A single row of the output table of map_BiGG_reactions().\n",
+    "    :type  row:      pd.Series\n",
+    "    :param first:    Shows, if gene is the first gene to be added to the reaction.\n",
+    "    :type  first:    bool, true if gene is first to be added.\n",
+    "    :returns:       The updated model.\n",
+    "    :rtype: cobra.Model\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # add gene\n",
+    "    if first or model.reactions.get_by_id(reaction).gene_reaction_rule == '':\n",
+    "        model.reactions.get_by_id(reaction).gene_reaction_rule = row[0]\n",
+    "    else:\n",
+    "        model.reactions.get_by_id(reaction).gene_reaction_rule = model.reactions.get_by_id(reaction).gene_reaction_rule + ' or ' + row[0]\n",
+    "\n",
+    "    # add name\n",
+    "    model.genes.get_by_id(row[0]).name = row[1]\n",
+    "\n",
+    "    # add annotations\n",
+    "    if not pd.isnull(row[4]):\n",
+    "        model.genes.get_by_id(row[0]).annotation['ncbigene'] = row[4]\n",
+    "    model.genes.get_by_id(row[0]).annotation['ncbiprotein'] = row[2].split('.')[0]\n",
+    "    # note: annotations like sbo, kegg.genes and uniprot missing\n",
+    "\n",
+    "    return model\n",
+    "\n",
+    "\n",
+    "# UNDER CONSTRUCTION\n",
+    "def add_reaction(model,row,reac_xref,reac_prop,chem_xref,chem_prop,bigg_metabolites, namespace:str='BiGG', exclude_dna=True, exclude_rna=True):\n",
+    "\n",
+    "    # create reaction object\n",
+    "    reac = cobra.Reaction(create_random_id(model,'reac','SPECIMEN'))\n",
+    "\n",
+    "    # ----------------------------\n",
+    "    # curate reaction via MetaNetX\n",
+    "    # ----------------------------\n",
+    "    # try kegg.reaction --> metanetx.reaction\n",
+    "    if F'kegg.reaction:{row[\"KEGG.reaction\"]}' in list(reac_xref['source']):\n",
+    "        \n",
+    "        # get MetaNetX ID\n",
+    "        met_reac_kegg = reac_xref[reac_xref['source']==F'kegg.reaction:{row[\"KEGG.reaction\"]}']\n",
+    "        met_reac = reac_prop[reac_prop['ID']==met_reac_kegg['ID'].iloc[0]]\n",
+    "\n",
+    "        # make sure exactly one entry is parsed\n",
+    "        # @TODO : parallel parsing\n",
+    "        if len(met_reac) > 1:\n",
+    "            print(F'Warning: multiple matches for kegg.reaction {row[\"KEGG.reaction\"]} found. Only first one will be used.')\n",
+    "            met_reac = met_reac.head(1)\n",
+    "\n",
+    "        # add name\n",
+    "        # --------\n",
+    "        #     from MetaNetX KEGG description\n",
+    "        reac.name = met_reac_kegg['description'].iloc[0].split('|')[0]\n",
+    "\n",
+    "        # add notes\n",
+    "        # ---------\n",
+    "        reac.notes['creation'] = 'via MetaNetX'\n",
+    "        reac.notes['KEGG.information'] = row['KEGG.notes']\n",
+    "\n",
+    "        # add metabolites\n",
+    "        # ----------------\n",
+    "        reac.add_metabolites(get_metabolites_mnx(model,met_reac['mnx equation'].iloc[0],chem_xref,chem_prop,bigg_metabolites))\n",
+    "        #@TODO .............\n",
+    "        #   direction of reaction\n",
+    "        #   ---> current solution:\n",
+    "        #        use one direction only\n",
+    "        # ..................\n",
+    "\n",
+    "        # add annotations\n",
+    "        # ---------------\n",
+    "        reac.annotation['ec-code'] = row['EC number']\n",
+    "        reac.annotation['kegg.reaction'] = row['KEGG.reaction']\n",
+    "        reac.annotation['metanetx.reaction'] = met_reac_kegg['ID'].iloc[0]\n",
+    "        met_reac_anno = reac_xref[reac_xref['ID']==met_reac_kegg['ID'].iloc[0]]\n",
+    "        for db in ['metacyc.reaction','seed.reaction','rhea','bigg.reaction']:\n",
+    "            db_matches = met_reac_anno[met_reac_anno['source'].str.contains(db)]\n",
+    "            if len(db_matches) == 1:\n",
+    "                reac.annotation[db] = db_matches['source'].iloc[0].split(':',1)[1]\n",
+    "            elif len(db_matches) > 1:\n",
+    "                reac.annotation[db] = [r.split(':',1)[1] for r in db_matches['source'].tolist()]\n",
+    "            else:\n",
+    "                continue\n",
+    "    \n",
+    "    # if not possible, use information from KEGG only\n",
+    "    # ------------------------\n",
+    "    # curate reaction via KEGG\n",
+    "    # ------------------------\n",
+    "    else:\n",
+    "        \n",
+    "        # retrieve reaction information from KEGG\n",
+    "        reac_kegg = kegg_reaction_parser(row['KEGG.reaction'])\n",
+    "\n",
+    "        # add name\n",
+    "        # --------\n",
+    "        #     from KEGG name\n",
+    "        reac.name = reac_kegg['name']\n",
+    "\n",
+    "        # add notes\n",
+    "        # ---------\n",
+    "        reac.notes['creation'] = 'via KEGG'\n",
+    "        reac.notes['KEGG.information'] = row['KEGG.notes']\n",
+    "\n",
+    "        # add metabolites\n",
+    "        # ----------------\n",
+    "        reac.add_metabolites(get_metabolites_kegg(model,reac_kegg['equation'],chem_xref,chem_prop,bigg_metabolites))\n",
+    "            #@TODO .............\n",
+    "            #   direction of reaction\n",
+    "            #   ---> current solution:\n",
+    "            #        use one direction only\n",
+    "            # ..................\n",
+    "\n",
+    "        # add annotations\n",
+    "        # ---------------\n",
+    "        reac.annotation['ec-code'] = row['EC number']\n",
+    "        reac.annotation['kegg.reaction'] = row['KEGG.reaction']\n",
+    "        for db, identifiers in reac_kegg['db'].items():\n",
+    "            if len(identifiers) == 1:\n",
+    "                reac.annotation[db] = identifiers[0]\n",
+    "            else:\n",
+    "                reac.annotation[db] = identifiers\n",
+    "\n",
+    "\n",
+    "    # --------------------------------------\n",
+    "    # re-set ID to fit namespace if possible\n",
+    "    # --------------------------------------\n",
+    "    match_id_to_namespace(reac)\n",
+    "\n",
+    "    # ---------------------\n",
+    "    # add reaction to model\n",
+    "    # ---------------------\n",
+    "    \n",
+    "    # if the ID change results in an ID already in the model, use that reaction\n",
+    "    if reac.id in [_.id for _ in model.reactions]:\n",
+    "        print(f'{reac.id} already in model, not added a second time.')\n",
+    "    else:\n",
+    "        # check if reaction is complete\n",
+    "        # and fullfills the requirements / parameters\n",
+    "        if isreaction_complete(reac, exclude_dna, exclude_rna):\n",
+    "            model.add_reactions([reac])\n",
+    "        else:\n",
+    "            print(F'reaction {reac.name} for gene {row[\"locus_tag\"]} could not be completely reconstructed, not added to model.')\n",
+    "            return model\n",
+    "\n",
+    "    # --------\n",
+    "    # add gene\n",
+    "    # --------\n",
+    "    # check if gene is already in model\n",
+    "    if row['locus_tag'] in model.genes:\n",
+    "        # only need to add the gene reaction rule, if not already in\n",
+    "        if not model.reactions.get_by_id(reac.id).gene_reaction_rule or len(model.reactions.get_by_id(reac.id).gene_reaction_rule) == 0:\n",
+    "            model.reactions.get_by_id(reac.id).gene_reaction_rule = row['locus_tag']\n",
+    "        else:\n",
+    "            model.reactions.get_by_id(reac.id).gene_reaction_rule = model.reactions.get_by_id(reac.id).gene_reaction_rule + ' or ' + row['locus_tag']\n",
+    "    else:\n",
+    "        # add (to) gene reaction rule and curate new gene object\n",
+    "        if not model.reactions.get_by_id(reac.id).gene_reaction_rule or len(model.reactions.get_by_id(reac.id).gene_reaction_rule) == 0:\n",
+    "            model = add_gene(model, reac.id, row, first=True)\n",
+    "        else:\n",
+    "            model = add_gene(model, reac.id, row, first=False)\n",
+    "\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test Area 51"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'c'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "draft_model.metabolites[0].compartment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "sprg",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/dev/CB_under_construction.ipynb b/dev/CB_under_construction.ipynb
new file mode 100644
index 0000000..8e8cb0b
--- /dev/null
+++ b/dev/CB_under_construction.ipynb
@@ -0,0 +1,41 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from refinegems.io import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "mbi",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/specimen.egg-info/SOURCES.txt b/specimen.egg-info/SOURCES.txt
index db4b78b..3e5a204 100644
--- a/specimen.egg-info/SOURCES.txt
+++ b/specimen.egg-info/SOURCES.txt
@@ -19,7 +19,6 @@ src/specimen/__init__.py
 src/specimen/cmd_access.py
 src/specimen/workflow.py
 src/specimen/classes/__init__.py
-src/specimen/classes/medium.py
 src/specimen/classes/reports.py
 src/specimen/core/__init__.py
 src/specimen/core/analysis.py