From 7579f45b2032e123a08dfd2f2a5fa84d2be52f87 Mon Sep 17 00:00:00 2001 From: Vincent Kataikko Date: Tue, 12 Sep 2023 14:30:17 +0200 Subject: [PATCH] Changes to WIP_query_update --- backend/src/cypher_queries.py | 596 ------------------ backend/src/enrichment.py | 4 +- backend/src/enrichment_graph.py | 11 +- .../components/enrichment/EnrichmentTool.vue | 14 +- .../select_options/BetweenesCentrality.vue | 83 --- .../select_options/EigenvectorCentrality.vue | 60 -- .../toolbar/select_options/HubsSelection.vue | 107 ---- .../toolbar/select_options/PageRank.vue | 63 -- .../src/components/visualization/MainVis.vue | 2 - 9 files changed, 7 insertions(+), 933 deletions(-) delete mode 100644 backend/src/cypher_queries.py delete mode 100644 frontend/src/components/toolbar/select_options/BetweenesCentrality.vue delete mode 100644 frontend/src/components/toolbar/select_options/EigenvectorCentrality.vue delete mode 100644 frontend/src/components/toolbar/select_options/HubsSelection.vue delete mode 100644 frontend/src/components/toolbar/select_options/PageRank.vue diff --git a/backend/src/cypher_queries.py b/backend/src/cypher_queries.py deleted file mode 100644 index af5b5c96..00000000 --- a/backend/src/cypher_queries.py +++ /dev/null @@ -1,596 +0,0 @@ -""" -Collection of Cypher queries for writing and reading the resulting -Neo4j graph database. -""" - -# ========================= Creating queries ========================= - -from utils import batches -from ast import literal_eval -import uuid -import subprocess - -TERM_FILE = uuid.uuid4() - - -def add_compound(graph, params): - """ - Create a compound with the specified id and - name. - """ - - query = """ - UNWIND $batch as entry - MERGE (compound:Compound { - id: entry.id, - name: entry.name - }) - """ - graph.run(query, params) - - -def add_disease(graph, params): - """ - Create a disease with the specified id and - name. - """ - - query = """ - UNWIND $batch as entry - MERGE (disease:Disease { - id: entry.id, - name: entry.name - }) - """ - graph.run(query, params) - - -def add_drug(graph, params): - """ - Create a drug with the specified id and - name. - """ - - query = """ - UNWIND $batch as entry - MERGE (drug:Drug { - id: entry.id, - name: entry.name - }) - """ - graph.run(query, params) - - -def add_class_parent_and_child(graph, params): - """ - Create parent - child relationship between - two pathway classes. - """ - - query = """ - UNWIND $batch as entry - MERGE (parent:Class { - name: entry.name_parent - }) - MERGE (child:Class { - name: entry.name_child - }) - MERGE (child)-[:IN]->(parent) - """ - graph.run(query, params) - - -def add_pathway(graph, params): - """ - Create a pathway with the specified id, name, - description and species to which it belongs. - After that, connect it to the corresponding class. - """ - - query = """ - UNWIND $batch as entry - MATCH (class:Class { - name: entry.class - }) - CREATE (pathway:Pathway { - id: entry.id, - name: entry.name, - description: entry.description, - species_id: entry.species_id - })-[:IN]->(class) - """ - - graph.run(query, params) - - -def add_protein(graph, params): - """ - Create a protein with the specified id, external id, - name, description and species to which it belongs. - """ - - query = """ - UNWIND $batch as entry - CREATE (protein:Protein { - external_id: entry.external_id, - name: toUpper(entry.preferred_name), - description: entry.annotation, - species_id: entry.species_id - }) - """ - graph.run(query, params) - - -def add_action(graph, params): - """ - For an existing protein - protein pair, create / update (merge) the given - action associated with the given pathway. - - If the action's "mode" is the same, the action is updated only if the current - provided score is higher than the previous. - """ - - query = """ - UNWIND $batch as entry - MATCH (protein1:Protein { - id: entry.id1 - }) - - MATCH (protein2:Protein { - id: entry.id2 - }) - - MERGE (protein1)-[action:ACTION { - mode: entry.mode - }]->(protein2) - ON CREATE SET action.score = entry.score - ON MATCH SET action.score = CASE action.score - WHEN entry.score > action.score - THEN action.score = entry.score - ELSE action.score - END - """ - graph.run(query, params) - - -def add_association(graph, params): - """ - For an existing protein - protein pair, create the association - between them. - """ - - query = """ - UNWIND $batch as entry - - MATCH (protein1:Protein { - external_id: entry.id1 - }) - - MATCH (protein2:Protein { - external_id: entry.id2 - }) - - CREATE (protein1)-[a:ASSOCIATION { - combined: entry.combined_score - }]->(protein2) - """ - graph.run(query, params) - - -# ========================= Connecting queries ========================= - - -def connect_protein_and_pathway(graph, params): - """ - Creates IN association for the given protein and - pathway. - """ - - query = """ - UNWIND $batch as entry - - MATCH (protein:Protein { - external_id: entry.protein_external_id - }) - - MATCH(pathway:Pathway { - id: entry.pathway_id - }) - - CREATE (protein)-[:IN]->(pathway) - """ - graph.run(query, params) - - -def connect_compound_and_pathway(graph, params): - """ - Creates IN association for the given compound and - pathway. - """ - - query = """ - UNWIND $batch as entry - - MATCH (compound:Compound { - id: entry.compound_id - }) - - MATCH (pathway:Pathway { - id: entry.pathway_id - }) - - CREATE (compound)-[:IN]->(pathway) - """ - graph.run(query, params) - - -def connect_disease_and_pathway(graph, params): - """ - Creates IN association for the given disease and - pathway. - """ - - query = """ - UNWIND $batch as entry - - MATCH (disease:Disease { - id: entry.disease_id - }) - - MATCH (pathway:Pathway { - id: entry.pathway_id - }) - - CREATE (disease)-[:IN]->(pathway) - """ - graph.run(query, params) - - -def connect_drug_and_pathway(graph, params): - """ - Creates IN association for the given drug and - pathway. - """ - - query = """ - UNWIND $batch as entry - - MATCH (drug:Drug { - id: entry.drug_id - }) - - MATCH (pathway:Pathway { - id: entry.pathway_id - }) - - CREATE (drug)-[:IN]->(pathway) - """ - graph.run(query, params) - - -# ========================= Schema queries ========================= - - -def create_constraints(graph): - """ - Creates node constraints for the Neo4j graph database. - """ - - queries = [ - # Protein - "CREATE CONSTRAINT ON (protein:Protein) ASSERT protein.id IS UNIQUE", - "CREATE CONSTRAINT ON (protein:Protein) ASSERT protein.external_id IS UNIQUE", - # Pathway - # "CREATE CONSTRAINT ON (pathway:Pathway) ASSERT pathway.id IS UNIQUE", - # Compound - # "CREATE CONSTRAINT ON (compound:Compound) ASSERT compound.id IS UNIQUE", - # Drug - # "CREATE CONSTRAINT ON (drug:Drug) ASSERT drug.id IS UNIQUE", - # Disease - # "CREATE CONSTRAINT ON (disease:Disease) ASSERT disease.id IS UNIQUE" - ] - - for query in queries: - graph.run(query) - - -def create_protein_index(graph): - """ - Creates 'Protein' node index on the attribute 'name'. - """ - - queries = ["CREATE INDEX ON :Protein(name)"] - - for query in queries: - graph.run(query) - - -def create_kegg_index(graph): - """ - Creates KEGG data node indexes: - - for 'Protein' on 'name' - - for 'Class' on 'name' - """ - - queries = ["CREATE INDEX ON :Pathway(name)", "CREATE INDEX ON :Class(name)"] - - for query in queries: - graph.run(query) - - -# ========================= Server warm-up ========================= - - -def warm_up(graph): - query = """ - MATCH (n) - OPTIONAL MATCH (n)-[r]->() - RETURN count(n.id) + count(r.combined); - """ - print("Warming up Neo4j...") - graph.run(query) - print("Done.") - - -# ========================= List queries ========================= -def get_protein_list(graph): - """ - Retrieve a list of proteins including the protein ID - and the protein name. - """ - - query = """ - MATCH (protein:Protein) - RETURN protein.external_id AS id, - protein.name AS name, - protein.species_id AS species_id - """ - - return graph.run(query) - - -def get_pathway_list(graph): - """ - Retrieve a list of pathways including the pathway ID - and the pathway name. - """ - - query = """ - MATCH (pathway:Pathway) - RETURN pathway.id AS id, - pathway.name AS name, - pathway.species_id AS species_id - """ - - return graph.run(query) - - -def get_class_list(graph): - """ - Retrieve a list of all available pathway - class names. - """ - - query = """ - MATCH (class:Class) - RETURN class.name AS name - """ - - return graph.run(query) - - -# ========================= Subgraph queries ========================= - - -def get_protein_subgraph(graph, protein_id, threshold=0): - """ - For the given protein, return the Neo4j subgraph - of the protein, all other associated proteins and - the common pathways. - """ - - # Neo4j query - query = """ - MATCH (protein:Protein { - id: $protein_id - }) - USING INDEX protein:Protein(id) - WITH protein - MATCH (protein)-[:IN]->(pathway:Pathway) - WITH protein, COLLECT(DISTINCT pathway) AS pathways - MATCH (protein)-[association:ASSOCIATION]-(other:Protein) - WHERE association.combined >= $threshold - RETURN protein, pathways, COLLECT({ - combined_score: association.combined, - other: other - }) AS associations - """ - - assert 0 <= threshold <= 1000, "Combined score threshold should be in range [0, 1000]!" - - param_dict = dict(protein_id=protein_id, threshold=threshold) - return graph.run(query, param_dict) - - -def get_proteins_subgraph(graph, protein_ids, threshold=0, external=False): - """ - For the given list of proteins, return the Neo4j - subgraph of the proteins, all associations between - them and common pathways. - """ - - # Neo4j query - query = ( - """ - MATCH (protein:Protein) - """ - + ("WHERE protein.external_id IN $protein_ids$" if external else "WHERE protein.id IN $protein_ids") - + """ - WITH COLLECT(protein) AS proteins - WITH proteins, SIZE(proteins) AS num_proteins - UNWIND RANGE(0, num_proteins - 1) AS i - UNWIND RANGE(i + 1, num_proteins - 1) AS j - WITH proteins, proteins[i] AS protein1, proteins[j] AS protein2 - OPTIONAL MATCH (protein1)-[association:ASSOCIATION]-(protein2) - WHERE association.combined >= {threshold} - WITH proteins, protein1, association, protein2 - OPTIONAL MATCH (protein1)-[:IN]->(pathway:Pathway)<-[:IN]-(protein2) - RETURN proteins AS proteins, COLLECT(DISTINCT pathway) AS pathways, COLLECT({ - protein1_id: protein1.id, - combined_score: association.combined, - protein2_id: protein2.id, - pathway_id: pathway.id - }) AS associations - """ - ) - - assert 0 <= threshold <= 1000, "Combined score threshold should be in range [0, 1000]!" - - param_dict = dict(protein_ids=protein_ids, threshold=threshold) - return graph.run(query, param_dict) - - -def get_pathway_subgraph(graph, pathway_id, threshold=0): - """ - For the given pathway, return the Neo4j subgraph - of the pathway, all contained proteins and - the class hierarchy of the pathway. - """ - - # Neo4j query - query = """ - MATCH (pathway:Pathway { - id: $pathway_id - }) - USING INDEX pathway:Pathway(id) - WITH pathway - OPTIONAL MATCH (class:Class)<-[:IN*]-(pathway) - WITH pathway, COLLECT(DISTINCT class) AS classes - MATCH (protein:Protein)-->(pathway) - WITH classes, COLLECT(protein) AS proteins - WITH classes, proteins, SIZE(proteins) AS num_proteins - UNWIND RANGE(0, num_proteins - 1) AS i - UNWIND RANGE(i + 1, num_proteins - 1) AS j - WITH classes, proteins, proteins[i] AS protein1, proteins[j] AS protein2 - MATCH (protein1)-[association:ASSOCIATION]-(protein2) - // WHERE association.combined >= $threshold - RETURN classes, proteins, COLLECT({ - protein1_id: protein1.id, - combined_score: association.combined, - protein2_id: protein2.id - }) AS associations - """ - - assert 0 <= threshold <= 1000, "Combined score threshold should be in range [0, 1000]!" - - param_dict = dict(pathway_id=pathway_id, threshold=threshold) - return graph.run(query, param_dict) - - -def get_class_subgraph(graph, name): - """ - For the given pathway class, return the Neo4j subgraph - of the class hierarchy and pathways attached to the - leaves of the hierarchy tree. - """ - - query = """ - MATCH (class:Class { - name: {name} - }) - USING INDEX class:Class(name) - // "fuzzy" search: WHERE toUpper(class.name) =~ (".*" + toUpper({name}) + ".*") - WITH class - OPTIONAL MATCH (class)<-[:IN*]-(pathway:Pathway) - RETURN class, COLLECT(DISTINCT pathway) as pathways - """ - - param_dict = dict(name=name) - return graph.run(query, param_dict) - - -def get_num_proteins(): - """Use Cypher query call to get the total number of proteins - Args: - Right now none, default organism is mus musculus - When more organism databases are implemented, - add species_id to clarify from which organism - Return: - Number of proteins(int) - """ - - # TODO: change to credentials.yml - data = subprocess.run( - [ - "cypher-shell", - "-a", - "bolt://localhost:7687", - "-u", - "neo4j", - "-p", - "pgdb", - "MATCH (n:Protein) RETURN count(n)", - ], - capture_output=True, - encoding="utf-8", - ) - - # Check standard output 'stdout' whether it's empty to control errors - if not data.stdout: - raise Exception(data.stderr) - - # all proteins of organism: background proteins - num_proteins = int(data.stdout[9:]) - return num_proteins - - -# number of proteins in the whole organism -NUM_PROTEINS = get_num_proteins() - - -def create_term_df(): - """Use Cypher query to create a dataframe with all terms and their - properties from the database - Args: - Right now none, default organism is mus musculus - When more organism databases are implemented, - add species_id to clarify from which organism - """ - - query = ( - """ - WITH "MATCH (term:Terms) - RETURN term.external_id AS id, term.name AS name, term.category AS category, term.proteins AS proteins" - AS query - CALL apoc.export.csv.query(query, "/tmp/""" - + repr(TERM_FILE) - + """.csv", {}) - YIELD file, source, format, nodes, relationships, properties, time, rows, batchSize, batches, done, data - RETURN file, source, format, nodes, relationships, properties, time, rows, batchSize, batches, done, data; - """ - ) - - with open("/tmp/query" + repr(TERM_FILE) + ".txt", "w") as query_text: - query_text.write("%s" % query) - - # Run the cypher query in cypher shell via terminal - # TODO: change to credentials.yml - - data = subprocess.run( - [ - "cypher-shell", - "-a", - "bolt://localhost:7687", - "-u", - "neo4j", - "-p", - "pgdb", - "-f", - "/tmp/query" + repr(TERM_FILE) + ".txt", - ], - capture_output=True, - encoding="utf-8", - ) - # Check standard output 'stdout' whether it's empty to control errors - if not data.stdout: - raise Exception(data.stderr) diff --git a/backend/src/enrichment.py b/backend/src/enrichment.py index aef2e2eb..35d6cfa0 100644 --- a/backend/src/enrichment.py +++ b/backend/src/enrichment.py @@ -78,9 +78,7 @@ def functional_enrichment(driver: neo4j.Driver, in_proteins, species_id: Any): # Get number of all proteins in the organism (from Cypher) bg_proteins = queries.get_number_of_proteins(driver, species_id) num_in_prot = len(in_proteins) - - # TODO: Improve runtime? - + prots = set(in_proteins) # pandas DataFrames for nodes and edges csv.field_size_limit(sys.maxsize) diff --git a/backend/src/enrichment_graph.py b/backend/src/enrichment_graph.py index 78d740af..552e127c 100644 --- a/backend/src/enrichment_graph.py +++ b/backend/src/enrichment_graph.py @@ -14,8 +14,6 @@ _BACKEND_JAR_PATH = "../gephi/target/gephi.backend-1.0-SNAPSHOT.jar" - - def get_functional_graph(list_enrichment, species_id): stopwatch = Stopwatch() @@ -23,9 +21,7 @@ def get_functional_graph(list_enrichment, species_id): if list_enrichment is not None: list_term = [i["id"] for i in list_enrichment] - # Create a query to find all associations between protein_ids and create a file with all properties - def create_query_assoc(): - # Query for terms based on protein input + driver = database.get_driver() # Execute the query and retrieve the CSV data terms, source, target, score = queries.get_terms_connected_by_overlap(driver, list_term, species_id) @@ -117,7 +113,10 @@ def create_query_assoc(): sub_proteins.append(node["attributes"]["Ensembl ID"]) else: node["color"] = "rgb(255,255,153)" - node["hidden"] = True + + for edge in sigmajs_data["edges"]: + if edge["source"] not in ensembl_sub and edge["target"] not in ensembl_sub: + edge["color"] = "rgba(255,255,153,0.2)" sigmajs_data["subgraph"] = sub_proteins diff --git a/frontend/src/components/enrichment/EnrichmentTool.vue b/frontend/src/components/enrichment/EnrichmentTool.vue index d5fb2e4f..84db68bf 100644 --- a/frontend/src/components/enrichment/EnrichmentTool.vue +++ b/frontend/src/components/enrichment/EnrichmentTool.vue @@ -367,18 +367,6 @@ font-weight: 900; font-family: 'Roboto Mono', monospace; - } - #result_select{ - appearance: none; - border-style: none; - width: 100%; - padding: 10px; - background: none; - color: white; - height: 100%; - font-weight: 900; - font-family: 'Roboto Mono', monospace; - } .tabsystem-enrichment { position: relative; @@ -415,7 +403,7 @@ .selected { background-color: rgba(255,0,0,0.7); /* Customize the selected style as desired */ } - + #visualize-favourites { margin-left:5px; background-color: rgba(0, 0, 0, 0.5); diff --git a/frontend/src/components/toolbar/select_options/BetweenesCentrality.vue b/frontend/src/components/toolbar/select_options/BetweenesCentrality.vue deleted file mode 100644 index ec0c2f09..00000000 --- a/frontend/src/components/toolbar/select_options/BetweenesCentrality.vue +++ /dev/null @@ -1,83 +0,0 @@ - - - - - \ No newline at end of file diff --git a/frontend/src/components/toolbar/select_options/EigenvectorCentrality.vue b/frontend/src/components/toolbar/select_options/EigenvectorCentrality.vue deleted file mode 100644 index 4d26d6ae..00000000 --- a/frontend/src/components/toolbar/select_options/EigenvectorCentrality.vue +++ /dev/null @@ -1,60 +0,0 @@ - - - - - \ No newline at end of file diff --git a/frontend/src/components/toolbar/select_options/HubsSelection.vue b/frontend/src/components/toolbar/select_options/HubsSelection.vue deleted file mode 100644 index 8bb2ba37..00000000 --- a/frontend/src/components/toolbar/select_options/HubsSelection.vue +++ /dev/null @@ -1,107 +0,0 @@ - - - - - \ No newline at end of file diff --git a/frontend/src/components/toolbar/select_options/PageRank.vue b/frontend/src/components/toolbar/select_options/PageRank.vue deleted file mode 100644 index 7a01033b..00000000 --- a/frontend/src/components/toolbar/select_options/PageRank.vue +++ /dev/null @@ -1,63 +0,0 @@ - - - - - \ No newline at end of file diff --git a/frontend/src/components/visualization/MainVis.vue b/frontend/src/components/visualization/MainVis.vue index 6be89d8c..c84c765c 100644 --- a/frontend/src/components/visualization/MainVis.vue +++ b/frontend/src/components/visualization/MainVis.vue @@ -654,8 +654,6 @@ export default { com.graph_state = state - if(com.active_node) return - if (state == null) { com.reset() }