diff --git a/postgres-initdb.d/create_features_base_table.sql b/postgres-initdb.d/create_features_base_table.sql new file mode 100644 index 0000000..b18e4fb --- /dev/null +++ b/postgres-initdb.d/create_features_base_table.sql @@ -0,0 +1,21 @@ +CREATE TABLE IF NOT EXISTS features.base_table ( + external_id TEXT NOT NULL, + specie_id INTEGER NOT NULL, + equiv_nscore INTEGER, + equiv_nscore_transferred INTEGER, + equiv_fscore INTEGER, + equiv_pscore INTEGER, + equiv_hscore INTEGER, + array_score INTEGER, + array_score_transferred INTEGER, + experimental_score INTEGER, + experimental_score_transferred INTEGER, + database_score INTEGER, + database_score_transferred INTEGER, + textmining_score INTEGER, + textmining_score_transferred INTEGER, + neighborhood_score INTEGER, + fusion_score INTEGER, + cooccurence_score INTEGER, + value NUMBER NOT NULL + ); diff --git a/server/analyzer/interactor.py b/server/analyzer/interactor.py new file mode 100644 index 0000000..bb180dc --- /dev/null +++ b/server/analyzer/interactor.py @@ -0,0 +1,6 @@ +from server.analyzer.scores import compute_score + + +async def compute_and_write_score(db, net_nx, species, score, score_threshold): + score_dict = compute_score(score, net_nx, species) + await db.write_analysis_score(db, species, score, score_dict, score_threshold) diff --git a/server/analyzer/main.py b/server/analyzer/main.py new file mode 100644 index 0000000..9bc9c3b --- /dev/null +++ b/server/analyzer/main.py @@ -0,0 +1,21 @@ +from asyncio import get_event_loop, gather +from server.analyzer.interactor import compute_and_write_score +from server.analyzer.scores import SCORES +from server.sources.stringdb import StringDB + + +async def main(species, score_threshold): + async with StringDB() as db: + net = await db.get_network(species, score_threshold) + net_nx = net.to_networkx() + + tasks = [compute_and_write_score(db, net_nx, species, score, score_threshold) + for score in SCORES] + gather(*tasks) + + +if __name__ == '__main__': + loop = get_event_loop() + species = 6239 + for i in range(11): + loop.run_until_complete(main(species, {'database_score': i*100})) diff --git a/server/analyzer/scores.py b/server/analyzer/scores.py new file mode 100644 index 0000000..56726f0 --- /dev/null +++ b/server/analyzer/scores.py @@ -0,0 +1,94 @@ +from networkx.algorithms.distance_measures import eccentricity +from networkx.algorithms.centrality import ( + betweenness_centrality, closeness_centrality, degree_centrality, + eigenvector_centrality, subgraph_centrality) +from networkx.algorithms.cluster import clustering +SCORES = {} + + +def score(name): + def decorator(fnx): + SCORES[name] = fnx + return fnx + return decorator + + +def compute_score(score, net, species): + print('computing', score, 'for', species) + r = score_factory(score)(net) + print('done computing', score, 'for', species) + return r + + +def score_factory(score): + if score in SCORES: + return SCORES[score] + raise NotImplementedError + + +@score('DC') +def degree_centrality_score(net): + return degree_centrality(net) + + +@score('EC') +def eccentricity_centrality_score(net): + return {vertex: 1 / e for vertex, e in eccentricity(net).items()} + + +@score('CC') +def closeness_centrality_score(net): + return closeness_centrality(net) + + +@score('EigenC') +def eigenvector_centrality_score(net): + return eigenvector_centrality(net) + + +@score('BC') +def betweenness_centrality_score(net): + return betweenness_centrality(net) + + +@score('SC') +def subgraph_centrality_score(net): + return subgraph_centrality(net) + + +# TODO compute score +# @score(SoECC) +def soecc_score(net): + pass + + +# TODO compute score +# @score('NC') +def neighborhood_centrality_score(net): + pass + + +@score('LAC') +def local_average_connectivity_centrality_score(net): + # Extracted from `A New Method for Identifying Essential Proteins Based on + # Network Topology Properties and Protein Complexes` + lac = {} + for vertex in net: + number_neighbors = net.degree(vertex) + if not number_neighbors: + lac[vertex] = 0 + else: + subgraph = net.subgraph(net.neighbors(vertex)) + lac[vertex] = 2 * subgraph.size() / number_neighbors + return lac + + +@score('LCC') +def local_clustering_coefficient_score(net): + return clustering(net) + + +# TODO compute score +# @score('ME') +def me_score(net): + pass diff --git a/server/sources/stringdb.py b/server/sources/stringdb.py index 4ac387b..3c1ab70 100644 --- a/server/sources/stringdb.py +++ b/server/sources/stringdb.py @@ -1,7 +1,7 @@ import aiopg import igraph import numpy as np - +from networkx import Graph from server.sources.network import Network from server.sources.bitscore import TricolBitscoreMatrix @@ -35,6 +35,12 @@ def to_igraph(self): return graph + def to_networkx(self): + G = Graph() + G.add_nodes_from(self.external_ids) + G.add_edges_from(self.edges) + return G + async def get_species(self, db): if not self._species: self._species = await db.get_species(self.string_ids) @@ -188,7 +194,8 @@ async def get_protein_sequences(self, species_id): return dict(rows) - async def get_network(self, species_id, score_thresholds={}, external_ids=None): + async def get_network(self, species_id, score_thresholds={}, + external_ids=None): if external_ids is None: external_ids = await self.get_protein_external_ids(species_id) @@ -231,8 +238,7 @@ async def get_network(self, species_id, score_thresholds={}, external_ids=None): network.node_node_links where node_type_b = %(species_id)s; - """; - + """ await cursor.execute(sql, {'species_id': species_id}) @@ -335,3 +341,23 @@ async def get_string_go_annotations(self, protein_ids=None, taxid=None): rows = await cursor.fetchall() return rows if rows else None + + async def ensure_table_exists(self, score): + sql = f" CREATE TABLE IF NOT EXISTS features.{score}_table INHERITS features.base_table" + async with self._get_cursor() as cursor: + await cursor.execute(sql) + + async def write_analysis_score(self, specie, score, score_dict, + score_threshold): + await self.ensure_table_exists(score) + score_threshold_values = ','.join(score_threshold.values()) + values = ','.join(f'({p}, {specie}, {score_threshold_values}, {value})' + for p, value in score_dict) + score_threshold_keys = ','.join(score_threshold) + sql = f""" + INSERT INTO features.{score}_table + (external_id, specie_id, {score_threshold_keys}, value) VALUES + {values}; + """ + async with self._get_cursor() as cursor: + await cursor.execute(sql)