Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add analyzer scores #15

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions postgres-initdb.d/create_features_base_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CREATE TABLE IF NOT EXISTS features.base_table (
external_id TEXT NOT NULL,
specie_id INTEGER NOT NULL,
equiv_nscore INTEGER,
equiv_nscore_transferred INTEGER,
equiv_fscore INTEGER,
equiv_pscore INTEGER,
equiv_hscore INTEGER,
array_score INTEGER,
array_score_transferred INTEGER,
experimental_score INTEGER,
experimental_score_transferred INTEGER,
database_score INTEGER,
database_score_transferred INTEGER,
textmining_score INTEGER,
textmining_score_transferred INTEGER,
neighborhood_score INTEGER,
fusion_score INTEGER,
cooccurence_score INTEGER,
value NUMBER NOT NULL
);
6 changes: 6 additions & 0 deletions server/analyzer/interactor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from server.analyzer.scores import compute_score


async def compute_and_write_score(db, net_nx, species, score, score_threshold):
score_dict = compute_score(score, net_nx, species)
await db.write_analysis_score(db, species, score, score_dict, score_threshold)
21 changes: 21 additions & 0 deletions server/analyzer/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from asyncio import get_event_loop, gather
from server.analyzer.interactor import compute_and_write_score
from server.analyzer.scores import SCORES
from server.sources.stringdb import StringDB


async def main(species, score_threshold):
async with StringDB() as db:
net = await db.get_network(species, score_threshold)
net_nx = net.to_networkx()

tasks = [compute_and_write_score(db, net_nx, species, score, score_threshold)
for score in SCORES]
gather(*tasks)


if __name__ == '__main__':
loop = get_event_loop()
species = 6239
for i in range(11):
loop.run_until_complete(main(species, {'database_score': i*100}))
94 changes: 94 additions & 0 deletions server/analyzer/scores.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from networkx.algorithms.distance_measures import eccentricity
from networkx.algorithms.centrality import (
betweenness_centrality, closeness_centrality, degree_centrality,
eigenvector_centrality, subgraph_centrality)
from networkx.algorithms.cluster import clustering
SCORES = {}


def score(name):
def decorator(fnx):
SCORES[name] = fnx
return fnx
return decorator


def compute_score(score, net, species):
print('computing', score, 'for', species)
r = score_factory(score)(net)
print('done computing', score, 'for', species)
return r


def score_factory(score):
if score in SCORES:
return SCORES[score]
raise NotImplementedError


@score('DC')
def degree_centrality_score(net):
return degree_centrality(net)


@score('EC')
def eccentricity_centrality_score(net):
return {vertex: 1 / e for vertex, e in eccentricity(net).items()}


@score('CC')
def closeness_centrality_score(net):
return closeness_centrality(net)


@score('EigenC')
def eigenvector_centrality_score(net):
return eigenvector_centrality(net)


@score('BC')
def betweenness_centrality_score(net):
return betweenness_centrality(net)


@score('SC')
def subgraph_centrality_score(net):
return subgraph_centrality(net)


# TODO compute score
# @score(SoECC)
def soecc_score(net):
pass


# TODO compute score
# @score('NC')
def neighborhood_centrality_score(net):
pass


@score('LAC')
def local_average_connectivity_centrality_score(net):
# Extracted from `A New Method for Identifying Essential Proteins Based on
# Network Topology Properties and Protein Complexes`
lac = {}
for vertex in net:
number_neighbors = net.degree(vertex)
if not number_neighbors:
lac[vertex] = 0
else:
subgraph = net.subgraph(net.neighbors(vertex))
lac[vertex] = 2 * subgraph.size() / number_neighbors
return lac


@score('LCC')
def local_clustering_coefficient_score(net):
return clustering(net)


# TODO compute score
# @score('ME')
def me_score(net):
pass
34 changes: 30 additions & 4 deletions server/sources/stringdb.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import aiopg
import igraph
import numpy as np

from networkx import Graph
from server.sources.network import Network
from server.sources.bitscore import TricolBitscoreMatrix

Expand Down Expand Up @@ -35,6 +35,12 @@ def to_igraph(self):

return graph

def to_networkx(self):
G = Graph()
G.add_nodes_from(self.external_ids)
G.add_edges_from(self.edges)
return G

async def get_species(self, db):
if not self._species:
self._species = await db.get_species(self.string_ids)
Expand Down Expand Up @@ -188,7 +194,8 @@ async def get_protein_sequences(self, species_id):

return dict(rows)

async def get_network(self, species_id, score_thresholds={}, external_ids=None):
async def get_network(self, species_id, score_thresholds={},
external_ids=None):
if external_ids is None:
external_ids = await self.get_protein_external_ids(species_id)

Expand Down Expand Up @@ -231,8 +238,7 @@ async def get_network(self, species_id, score_thresholds={}, external_ids=None):
network.node_node_links
where
node_type_b = %(species_id)s;
""";

"""

await cursor.execute(sql, {'species_id': species_id})

Expand Down Expand Up @@ -335,3 +341,23 @@ async def get_string_go_annotations(self, protein_ids=None, taxid=None):
rows = await cursor.fetchall()

return rows if rows else None

async def ensure_table_exists(self, score):
sql = f" CREATE TABLE IF NOT EXISTS features.{score}_table INHERITS features.base_table"
async with self._get_cursor() as cursor:
await cursor.execute(sql)

async def write_analysis_score(self, specie, score, score_dict,
score_threshold):
await self.ensure_table_exists(score)
score_threshold_values = ','.join(score_threshold.values())
values = ','.join(f'({p}, {specie}, {score_threshold_values}, {value})'
for p, value in score_dict)
score_threshold_keys = ','.join(score_threshold)
sql = f"""
INSERT INTO features.{score}_table
(external_id, specie_id, {score_threshold_keys}, value) VALUES
{values};
"""
async with self._get_cursor() as cursor:
await cursor.execute(sql)