Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Network relabelling #338

Merged
merged 11 commits into from
Nov 15, 2024
2 changes: 1 addition & 1 deletion PopPUNK/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

'''PopPUNK (POPulation Partitioning Using Nucleotide Kmers)'''

__version__ = '2.7.1'
__version__ = '2.7.2'

# Minimum sketchlib version
SKETCHLIB_MAJOR = 2
Expand Down
25 changes: 18 additions & 7 deletions PopPUNK/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,15 +559,26 @@ def outputsForCytoscape(G, G_mst, isolate_names, clustering, outPrefix, epiCsv,
save_network(G, prefix = outPrefix, suffix = suffix, use_graphml = True)

# Save each component too (useful for very large graphs)
example_cluster_title = list(clustering.keys())[0]
component_assignments, component_hist = gt.label_components(G)
for component_idx in range(len(component_hist)):
remove_list = []
for vidx, v_component in enumerate(component_assignments.a):
if v_component != component_idx:
remove_list.append(vidx)
G.remove_vertex(remove_list)
G.purge_vertices()
save_network(G, prefix = outPrefix, suffix = "_component_" + str(component_idx + 1), use_graphml = True)
# Naming must reflect the full graph size
component_name = component_idx + 1
get_component_name = (use_partial_query_graph is not None)
# Filter the graph for the current component
comp_filter = G.new_vertex_property("bool")
for v in G.vertices():
comp_filter[v] = (component_assignments[v] == component_idx)
# If using partial query graph find the component name from the clustering
if get_component_name and comp_filter[v]:
example_isolate_name = seqLabels[int(v)]
component_name = clustering[example_cluster_title][example_isolate_name]
get_component_name = False
nickjcroucher marked this conversation as resolved.
Show resolved Hide resolved
G_component = gt.GraphView(G, vfilt=comp_filter)
# Purge the component to remove unreferenced vertices (optional but recommended)
G_component.purge_vertices()
# Save the component network
save_network(G_component, prefix = outPrefix, suffix = "_component_" + str(component_name), use_graphml = True)

if G_mst != None:
isolate_labels = isolateNameToLabel(G_mst.vp.id)
Expand Down
8 changes: 2 additions & 6 deletions PopPUNK/visualise.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,21 +699,17 @@ def generate_visualisations(query_db,
if gpu_graph:
genomeNetwork = cugraph_to_graph_tool(genomeNetwork, isolateNameToLabel(all_seq))
# Hard delete from network to remove samples (mask doesn't work neatly)
if include_files is not None and not use_partial_query_graph:
if include_files is not None:
genomeNetwork = remove_nodes_from_graph(genomeNetwork, all_seq, viz_subset, use_gpu = gpu_graph)
elif rank_fit is not None:
genomeNetwork = sparse_mat_to_network(sparse_mat, combined_seq, use_gpu = gpu_graph)
else:
sys.stderr.write('Cytoscape output requires a network file or lineage rank fit to be provided\n')
sys.exit(1)
# If network has been pruned then only use the appropriate subset of names - otherwise use all names
# for full network
node_labels = viz_subset if (use_partial_query_graph is not None or include_files is not None) \
else combined_seq
sys.stderr.write('Preparing outputs for cytoscape\n')
outputsForCytoscape(genomeNetwork,
mst_graph,
node_labels,
combined_seq,
isolateClustering,
output,
info_csv,
Expand Down
Loading