From df72dc09463a73c29de1d1ac2da1740d0a62ad87 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 24 Jan 2019 15:56:11 -0600 Subject: [PATCH] Add python bindings for SSSP, incorporates some of Ishika's PR 41 --- .gitignore | 1 + CMakeLists.txt | 4 + python/.gitignore | 2 + python/cugraph.pyx | 4 + python/graph/c_graph.pxd | 78 ++++++++++ python/graph/c_graph.pyx | 217 +++++++++++++++++++++++++++ python/graph/test_graph.py | 128 ++++++++++++++++ python/nvgraph/c_nvgraph.pxd | 9 ++ python/nvgraph/c_nvgraph.pyx | 1 + python/pagerank/c_pagerank.h | 5 - python/pagerank/c_pagerank.pxd | 74 +-------- python/pagerank/pagerank_wrapper.pyx | 168 +++------------------ python/pagerank/test_pagerank.py | 6 +- python/sssp/c_sssp.pxd | 2 + python/sssp/sssp_wrapper.pyx | 61 ++++++++ python/sssp/test_sssp.py | 100 ++++++++++++ setup.py | 64 +++++++- 17 files changed, 690 insertions(+), 234 deletions(-) create mode 100644 .gitignore create mode 100644 python/.gitignore create mode 100644 python/cugraph.pyx create mode 100755 python/graph/c_graph.pxd create mode 100755 python/graph/c_graph.pyx create mode 100755 python/graph/test_graph.py create mode 100644 python/nvgraph/c_nvgraph.pxd create mode 100644 python/nvgraph/c_nvgraph.pyx delete mode 100644 python/pagerank/c_pagerank.h mode change 100644 => 100755 python/pagerank/c_pagerank.pxd mode change 100644 => 100755 python/pagerank/pagerank_wrapper.pyx mode change 100644 => 100755 python/pagerank/test_pagerank.py create mode 100644 python/sssp/c_sssp.pxd create mode 100644 python/sssp/sssp_wrapper.pyx create mode 100644 python/sssp/test_sssp.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..378eac25d31 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build diff --git a/CMakeLists.txt b/CMakeLists.txt index b42a8d079e5..beec2034f54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,6 +126,9 @@ if(NVG_PLUGIN) find_library(NVGRAPH_LIBRARY "nvgraph_st" HINTS "$ENV{NVGRAPH_ROOT}/lib" "$ENV{CONDA_PREFIX}/lib") + find_path(NVGRAPH_INCLUDE "nvgraph" + HINTS "$ENV{NVGRAPH_ROOT}/include" + "$ENV{CONDA_PREFIX}/include") endif() ################################################################################################### @@ -135,6 +138,7 @@ include_directories( "${CMAKE_CURRENT_SOURCE_DIR}/src" "${CUDA_INCLUDE_DIRS}" "${CUDF_INCLUDE}" + "${NVGRAPH_INCLUDE}" "${CMAKE_CURRENT_BINARY_DIR}/gunrock/" "${CMAKE_CURRENT_BINARY_DIR}/gunrock/externals/moderngpu/include" "${CMAKE_CURRENT_BINARY_DIR}/gunrock/externals/cub" diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 00000000000..23a3af14582 --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,2 @@ +*.cpp +__pycache__ diff --git a/python/cugraph.pyx b/python/cugraph.pyx new file mode 100644 index 00000000000..3b53351436a --- /dev/null +++ b/python/cugraph.pyx @@ -0,0 +1,4 @@ +include "graph/c_graph.pyx" +include "nvgraph/c_nvgraph.pyx" +include "pagerank/pagerank_wrapper.pyx" +include "sssp/sssp_wrapper.pyx" diff --git a/python/graph/c_graph.pxd b/python/graph/c_graph.pxd new file mode 100755 index 00000000000..5219813498d --- /dev/null +++ b/python/graph/c_graph.pxd @@ -0,0 +1,78 @@ +from libcpp cimport bool + +cdef extern from "cudf.h": + + ctypedef enum gdf_error: + pass + + ctypedef enum gdf_dtype: + GDF_invalid=0, + GDF_INT8, + GDF_INT16, + GDF_INT32, + GDF_INT64, + GDF_FLOAT32, + GDF_FLOAT64, + GDF_DATE32, + GDF_DATE64, + GDF_TIMESTAMP, + GDF_CATEGORY, + GDF_STRING, + N_GDF_TYPES + + ctypedef unsigned char gdf_valid_type + ctypedef size_t gdf_size_type + + struct gdf_column_: + void *data + gdf_valid_type *valid + gdf_size_type size + gdf_dtype dtype + gdf_size_type null_count + + ctypedef gdf_column_ gdf_column + + cdef gdf_error gdf_column_view_augmented(gdf_column *column, + void *data, + gdf_valid_type *valid, + gdf_size_type size, + gdf_dtype dtype, + gdf_size_type null_count) + + +cdef extern from "cugraph.h": + + struct gdf_edge_list: + gdf_column *src_indices + gdf_column *dest_indices + gdf_column *edge_data + + struct gdf_adj_list: + gdf_column *offsets + gdf_column *indices + gdf_column *edge_data + gdf_error get_vertex_identifiers(gdf_column *identifiers) + gdf_error get_source_indices(gdf_column *indices) + + struct gdf_graph: + gdf_edge_list *edgeList + gdf_adj_list *adjList + gdf_adj_list *transposedAdjList + + + cdef gdf_error gdf_edge_list_view(gdf_graph *graph, + const gdf_column *source_indices, + const gdf_column *destination_indices, + const gdf_column *edge_data) + cdef gdf_error gdf_add_edge_list(gdf_graph *graph) + cdef gdf_error gdf_delete_edge_list(gdf_graph *graph) + cdef gdf_error gdf_adj_list_view (gdf_graph *graph, + const gdf_column *offsets, + const gdf_column *indices, + const gdf_column *edge_data) + cdef gdf_error gdf_add_adj_list(gdf_graph *graph) + cdef gdf_error gdf_delete_adj_list(gdf_graph *graph) + cdef gdf_error gdf_add_transpose(gdf_graph *graph) + cdef gdf_error gdf_delete_transpose(gdf_graph *graph) + + diff --git a/python/graph/c_graph.pyx b/python/graph/c_graph.pyx new file mode 100755 index 00000000000..e0da6726956 --- /dev/null +++ b/python/graph/c_graph.pyx @@ -0,0 +1,217 @@ +from c_graph cimport * +from libcpp cimport bool +from libc.stdint cimport uintptr_t +from libc.stdlib cimport calloc, malloc, free +import cudf +from librmm_cffi import librmm as rmm +import numpy as np + +dtypes = {np.int32: GDF_INT32, np.int64: GDF_INT64, np.float32: GDF_FLOAT32, np.float64: GDF_FLOAT64} + +cdef create_column(col): + + x= malloc(sizeof(gdf_column)) + cdef gdf_column* c_col = malloc(sizeof(gdf_column)) + cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column) + #cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column) + + err = gdf_column_view_augmented(c_col, + data_ptr, + 0, + len(col), + dtypes[col.dtype.type], + col.null_count) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + cdef uintptr_t col_ptr = c_col + return col_ptr + +class Graph: + """ + cuGraph graph class containing basic graph creation and transformation operations. + """ + def __init__(self): + """ + Returns + ------- + Graph : cuGraph.Graph. + Examples + -------- + >>> import cuGraph + >>> G = cuGraph.Graph() + """ + cdef gdf_graph* graph + graph = calloc(1,sizeof(gdf_graph)) + + cdef uintptr_t graph_ptr = graph + self.graph_ptr = graph_ptr + + + def add_edge_list(self, source_col, dest_col, value_col=None): + """ + Warp existing gdf columns representing an edge list in a gdf_graph. cuGraph does not own the memory used to represent this graph. This function does not allocate memory. + The cuGraph graph should not already contain the connectivity information as an edge list. + If successful, the cuGraph graph descriptor contains the newly added edge list (edge_data is optional). + Parameters + ---------- + source_indices : gdf_column + This gdf_column of size E (number of edges) contains the index of the source for each edge. + Indices must be in the range [0, V-1]. + destination_indices : gdf_column + This gdf_column of size E (number of edges) contains the index of the destination for each edge. + Indices must be in the range [0, V-1]. + edge_data (optional) : gdf_column + This pointer can be ``none``. If not, this gdf_column of size E (number of edges) contains the weiht for each edge. + The type expected to be floating point. + Examples + -------- + >>> import cuGraph + >>> import cudf + >>> from scipy.io import mmread + >>> M = ReadMtxFile(graph_file) + >>> sources = cudf.Series(M.row) + >>> destinations = cudf.Series(M.col) + >>> G = cuGraph.Graph() + >>> G.add_edge_list(sources,destinations,none) + + """ + + cdef uintptr_t graph = self.graph_ptr + cdef uintptr_t source=create_column(source_col) + cdef uintptr_t dest=create_column(dest_col) + cdef uintptr_t value + if value_col is None: + value = 0 + else: + value=create_column(value_col) + + err = gdf_edge_list_view(graph, + source, + dest, + value) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + def view_edge_list(self): + """ + Display the edge list. + """ + cdef uintptr_t graph = self.graph_ptr + cdef gdf_graph* g = graph + col_size = g.edgeList.src_indices.size + + cdef uintptr_t src_col_data = g.edgeList.src_indices.data + cdef uintptr_t dest_col_data = g.edgeList.dest_indices.data + + src_data = rmm.device_array_from_ptr(src_col_data, + nelem=col_size, + dtype=np.int32, + finalizer=rmm._make_finalizer(src_col_data, 0)) + dest_data = rmm.device_array_from_ptr(dest_col_data, + nelem=col_size, + dtype=np.int32, + finalizer=rmm._make_finalizer(dest_col_data, 0)) + + return cudf.Series(src_data), cudf.Series(dest_data) + + def to_edge_list(self): + """ + Compute the edge list from adjacency list and return sources and destinations as cudf Series. + """ + cdef uintptr_t graph = self.graph_ptr + err = gdf_add_edge_list(graph) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + cdef gdf_graph* g = graph + col_size = g.edgeList.src_indices.size + cdef uintptr_t src_col_data = g.edgeList.src_indices.data + cdef uintptr_t dest_col_data = g.edgeList.dest_indices.data + + src_data = rmm.device_array_from_ptr(src_col_data, + nelem=col_size, + dtype=np.int32)#, + #finalizer=rmm._make_finalizer(src_col_data, 0)) + dest_data = rmm.device_array_from_ptr(dest_col_data, + nelem=col_size, + dtype=np.int32)#, + #finalizer=rmm._make_finalizer(dest_col_data, 0)) + + return cudf.Series(src_data), cudf.Series(dest_data) + + def delete_edge_list(self): + """ + Delete the edge list. + """ + cdef uintptr_t graph = self.graph_ptr + err = gdf_delete_edge_list(graph) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + def add_adj_list(self, offsets_col, indices_col, value_col): + """ + Warp existing gdf columns representing an adjacency list in a gdf_graph. + """ + cdef uintptr_t graph = self.graph_ptr + cdef uintptr_t offsets=create_column(offsets_col) + cdef uintptr_t indices=create_column(indices_col) + cdef uintptr_t value + if value_col is None: + value = 0 + else: + value=create_column(value_col) + + err = gdf_adj_list_view(graph, + offsets, + indices, + value) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + def to_adj_list(self): + """ + Compute the adjacency list from edge list and return offsets and indices as cudf Series. + """ + cdef uintptr_t graph = self.graph_ptr + err = gdf_add_adj_list(graph) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + cdef gdf_graph* g = graph + col_size_off = g.adjList.offsets.size + col_size_ind = g.adjList.indices.size + + cdef uintptr_t offsets_col_data = g.adjList.offsets.data + cdef uintptr_t indices_col_data = g.adjList.indices.data + + offsets_data = rmm.device_array_from_ptr(offsets_col_data, + nelem=col_size_off, + dtype=np.int32, + finalizer=rmm._make_finalizer(offsets_col_data, 0)) + indices_data = rmm.device_array_from_ptr(indices_col_data, + nelem=col_size_ind, + dtype=np.int32, + finalizer=rmm._make_finalizer(indices_col_data, 0)) + + return cudf.Series(offsets_data), cudf.Series(indices_data) + + def delete_adj_list(self): + """ + Delete the adjacency list. + """ + cdef uintptr_t graph = self.graph_ptr + err = gdf_delete_adj_list(graph) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + def add_transpose(self): + """ + Compute the transposed adjacency list from the edge list and add it to the existing graph. + """ + cdef uintptr_t graph = self.graph_ptr + err = gdf_add_transpose(graph) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + def delete_transpose(self): + """ + Delete the transposed adjacency list. + """ + cdef uintptr_t graph = self.graph_ptr + err = gdf_delete_transpose(graph) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + diff --git a/python/graph/test_graph.py b/python/graph/test_graph.py new file mode 100755 index 00000000000..987c10d62e8 --- /dev/null +++ b/python/graph/test_graph.py @@ -0,0 +1,128 @@ +import cugraph +import cudf +import pytest +import numpy as np +import networkx as nx +from scipy.io import mmread + +print ('Networkx version : {} '.format(nx.__version__)) + + +def ReadMtxFile(mmFile): + print('Reading '+ str(mmFile) + '...') + return mmread(mmFile).asfptype() + +def compare_series(series_1, series_2): + for i in range(len(series_1)): + if(series_1[i] != series_2[i]): + return 0 + return 1 + +datasets = ['/datasets/networks/karate.mtx', '/datasets/golden_data/graphs/dblp.mtx'] + +@pytest.mark.parametrize('graph_file', datasets) + +def test_add_edge_list_to_adj_list(graph_file): + + M = ReadMtxFile(graph_file) + sources = cudf.Series(M.row) + destinations = cudf.Series(M.col) + + nnz_per_row = {r : 0 for r in range(M.get_shape()[0])} + for nnz in range(M.getnnz()): + nnz_per_row[M.row[nnz]] = 1 + nnz_per_row[M.row[nnz]] + for nnz in range(M.getnnz()): + M.data[nnz] = 1.0/float(nnz_per_row[M.row[nnz]]) + + M = M.tocsr() + if M is None : + raise TypeError('Could not read the input graph') + if M.shape[0] != M.shape[1]: + raise TypeError('Shape is not square') + + offsets_exp = cudf.Series(M.indptr) + indices_exp = cudf.Series(M.indices) + #values = cudf.Series(np.ones(len(sources), dtype = np.float64)) + + # cugraph add_egde_list to_adj_list call + G = cugraph.Graph() + G.add_edge_list(sources,destinations, None) + offsets, indices = G.to_adj_list() + + assert compare_series(offsets, offsets_exp) + assert compare_series(indices, indices_exp) + +@pytest.mark.parametrize('graph_file', datasets) + +def test_add_adj_list_to_edge_list(graph_file): + + M = ReadMtxFile(graph_file) + + nnz_per_row = {r : 0 for r in range(M.get_shape()[0])} + for nnz in range(M.getnnz()): + nnz_per_row[M.row[nnz]] = 1 + nnz_per_row[M.row[nnz]] + for nnz in range(M.getnnz()): + M.data[nnz] = 1.0/float(nnz_per_row[M.row[nnz]]) + + M = M.tocsr() + if M is None : + raise TypeError('Could not read the input graph') + if M.shape[0] != M.shape[1]: + raise TypeError('Shape is not square') + + offsets = cudf.Series(M.indptr) + indices = cudf.Series(M.indices) + #values = cudf.Series(np.ones(len(sources), dtype = np.float64)) + + M = M.tocoo() + sources_exp = cudf.Series(M.row) + destinations_exp = cudf.Series(M.col) + + # cugraph add_adj_list to_edge_list call + G = cugraph.Graph() + G.add_adj_list(offsets, indices, None) + sources, destinations = G.to_edge_list() + + assert compare_series(sources, sources_exp) + assert compare_series(destinations, destinations_exp) + +''' +@pytest.mark.parametrize('graph_file', datasets) + +def test_delete_edge_list_delete_adj_list(graph_file): + + M = ReadMtxFile(graph_file) + sources = cudf.Series(M.row) + destinations = cudf.Series(M.col) + + nnz_per_row = {r : 0 for r in range(M.get_shape()[0])} + for nnz in range(M.getnnz()): + nnz_per_row[M.row[nnz]] = 1 + nnz_per_row[M.row[nnz]] + for nnz in range(M.getnnz()): + M.data[nnz] = 1.0/float(nnz_per_row[M.row[nnz]]) + + M = M.tocsr() + if M is None : + raise TypeError('Could not read the input graph') + if M.shape[0] != M.shape[1]: + raise TypeError('Shape is not square') + + offsets = cudf.Series(M.indptr) + indices = cudf.Series(M.indices) + #values = cudf.Series(np.ones(len(sources), dtype = np.float64)) + + # cugraph delete_adj_list delete_edge_list call + G = cugraph.Graph() + G.add_edge_list(sources, destinations, None) + G.delete_edge_list() + with pytest.raises(cudf.bindings.GDFError.GDFError) as excinfo: + G.to_adj_list() + assert excinfo.value.errcode.decode() == 'GDF_INVALID_API_CALL' + + G.add_adj_list(offsets, indices, None) + G.delete_adj_list() + with pytest.raises(cudf.bindings.GDFError.GDFError) as excinfo: + G.to_edge_list() + assert excinfo.value.errcode.decode() == 'GDF_INVALID_API_CALL' + +''' diff --git a/python/nvgraph/c_nvgraph.pxd b/python/nvgraph/c_nvgraph.pxd new file mode 100644 index 00000000000..982cfcfcb62 --- /dev/null +++ b/python/nvgraph/c_nvgraph.pxd @@ -0,0 +1,9 @@ +from c_graph cimport * +from libcpp cimport bool + +cdef extern from "nvgraph_gdf.h": + + cdef gdf_error gdf_sssp_nvgraph(gdf_graph *gdf_G, + const int *source_vert, + gdf_column *sssp_distances) + diff --git a/python/nvgraph/c_nvgraph.pyx b/python/nvgraph/c_nvgraph.pyx new file mode 100644 index 00000000000..4a6f680d6f9 --- /dev/null +++ b/python/nvgraph/c_nvgraph.pyx @@ -0,0 +1 @@ +from c_nvgraph cimport * diff --git a/python/pagerank/c_pagerank.h b/python/pagerank/c_pagerank.h deleted file mode 100644 index d74f20d1422..00000000000 --- a/python/pagerank/c_pagerank.h +++ /dev/null @@ -1,5 +0,0 @@ -#include "types.h" - -//extern "C" { -#include "functions.h" -//} diff --git a/python/pagerank/c_pagerank.pxd b/python/pagerank/c_pagerank.pxd old mode 100644 new mode 100755 index 2f953b74d1f..f6b08c3a384 --- a/python/pagerank/c_pagerank.pxd +++ b/python/pagerank/c_pagerank.pxd @@ -1,75 +1,7 @@ - +from c_graph cimport * from libcpp cimport bool -cdef extern from "cudf.h": - - ctypedef enum gdf_error: - pass - - ctypedef enum gdf_dtype: - GDF_invalid=0, - GDF_INT8, - GDF_INT16, - GDF_INT32, - GDF_INT64, - GDF_FLOAT32, - GDF_FLOAT64, - GDF_DATE32, - GDF_DATE64, - GDF_TIMESTAMP, - GDF_CATEGORY, - GDF_STRING, - N_GDF_TYPES - - ctypedef unsigned char gdf_valid_type - ctypedef size_t gdf_size_type - - struct gdf_column_: - void *data - gdf_valid_type *valid - gdf_size_type size - gdf_dtype dtype - - - ctypedef gdf_column_ gdf_column - - cdef gdf_error gdf_column_view_augmented(gdf_column *column, - void *data, - gdf_valid_type *valid, - gdf_size_type size, - gdf_dtype dtype, - gdf_size_type null_count) - - cdef gdf_error gdf_column_view_new(gdf_column *column, - void *data) +cdef extern from "cugraph.h": -cdef extern from "c_pagerank.h": - - struct gdf_edge_list: - gdf_column *src_indices - gdf_column *dest_indices - gdf_column *edge_data - - struct gdf_adj_list: - gdf_column *offsets - gdf_column *indices - gdf_column *edge_data - - struct gdf_graph: - gdf_edge_list *edgeList - gdf_adj_list *adjList - gdf_adj_list *transposedAdjList - - - cdef gdf_error gdf_edge_list_view(gdf_graph *graph, - const gdf_column *source_indices, - const gdf_column *destination_indices, - const gdf_column *edge_data) - - cdef gdf_error gdf_adj_list_view (gdf_graph *graph, - const gdf_column *offsets, - const gdf_column *indices, - const gdf_column *edge_data) - - cdef gdf_error gdf_add_transpose(gdf_graph *graph) cdef gdf_error gdf_pagerank(gdf_graph *graph, gdf_column *pagerank, float alpha, float tolerance, int max_iter, bool has_guess) + diff --git a/python/pagerank/pagerank_wrapper.pyx b/python/pagerank/pagerank_wrapper.pyx old mode 100644 new mode 100755 index 11ca64a4aaa..66ded8211ac --- a/python/pagerank/pagerank_wrapper.pyx +++ b/python/pagerank/pagerank_wrapper.pyx @@ -1,4 +1,5 @@ from c_pagerank cimport * +from c_graph cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t from libc.stdlib cimport calloc, malloc, free @@ -7,149 +8,6 @@ from librmm_cffi import librmm as rmm #from pygdf import Column import numpy as np -dtypes = {np.int32: GDF_INT32, np.int64: GDF_INT64, np.float32: GDF_FLOAT32, np.float64: GDF_FLOAT64} - -def _get_ctype_ptr(obj): - # The manner to access the pointers in the gdf's might change, so - # encapsulating access in the following 3 methods. They might also be - # part of future gdf versions. - return obj.device_ctypes_pointer.value - -def _get_column_data_ptr(obj): - return _get_ctype_ptr(obj._column._data.to_gpu_array()) - -def _get_column_valid_ptr(obj): - return _get_ctype_ptr(obj._column._mask.to_gpu_array()) - -#def _get_gdf_as_matrix_ptr(gdf): -# return self._get_ctype_ptr(gdf.as_gpu_matrix()) - -cdef create_column(col): - - x= malloc(sizeof(gdf_column)) - cdef gdf_column* c_col = malloc(sizeof(gdf_column)) - cdef uintptr_t data_ptr = _get_column_data_ptr(col) - #cdef uintptr_t valid_ptr = _get_column_valid_ptr(col) - - gdf_column_view_augmented(c_col, - data_ptr, - 0, - len(col), - dtypes[col.dtype.type], - col.null_count) - - cdef uintptr_t col_ptr = c_col - return col_ptr - -class Graph: - """ - cuGraph graph class containing basic graph creation and transformation operations. - """ - def __init__(self): - """ - Returns - ------- - Graph : cuGraph.Graph. - - Examples - -------- - >>> import cuGraph - >>> G = cuGraph.Graph() - """ - cdef gdf_graph* graph - graph = calloc(1,sizeof(gdf_graph)) - - cdef uintptr_t graph_ptr = graph - self.graph_ptr = graph_ptr - - - def add_edge_list(self, source_col, dest_col, value_col=None): - """ - Warp existing gdf columns representing an edge list in a gdf_graph. cuGraph does not own the memory used to represent this graph. This function does not allocate memory. - The cuGraph graph should not already contain the connectivity information as an edge list. - If successful, the cuGraph graph descriptor contains the newly added edge list (edge_data is optional). - - Parameters - ---------- - source_indices : gdf_column - This gdf_column of size E (number of edges) contains the index of the source for each edge. - Indices must be in the range [0, V-1]. - destination_indices : gdf_column - This gdf_column of size E (number of edges) contains the index of the destination for each edge. - Indices must be in the range [0, V-1]. - edge_data (optional) : gdf_column - This pointer can be ``none``. If not, this gdf_column of size E (number of edges) contains the weiht for each edge. - The type expected to be floating point. - - Examples - -------- - >>> import cuGraph - >>> import cudf - >>> from scipy.io import mmread - >>> M = ReadMtxFile(graph_file) - >>> sources = cudf.Series(M.row) - >>> destinations = cudf.Series(M.col) - >>> G = cuGraph.Graph() - >>> G.add_edge_list(sources,destinations,none) - - """ - - cdef uintptr_t graph = self.graph_ptr - cdef uintptr_t source=create_column(source_col) - cdef uintptr_t dest=create_column(dest_col) - cdef uintptr_t value - if value_col is None: - value = 0 - else: - value=create_column(value_col) - - gdf_edge_list_view(graph, - source, - dest, - value) - - def view_edge_list(self): - ##TO DO - """ - Display the edge list. - """ - cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph* g = graph - size = g.edgeList.src_indices.size - print(size) - cdef object cffi_view = g.edgeList.src_indices - data = cudf._gdf.cffi_view_to_column_mem(cffi_view) - #return pygdf.Series(data) - return 0 - - #def add_adj_list(self, offsets_col, indices_col, value_col): - # """ - # Warp existing gdf columns representing an adjacency list in a gdf_graph. - # """ - # ##TO TEST - # cdef uintptr_t graph = self.graph_ptr - # cdef uintptr_t offsets=create_column(offsets_col) - # cdef uintptr_t indices=create_column(indices_col) - # cdef uintptr_t value - # if value_col is None: - # value = 0 - # else: - # value=create_column(value_col) - # - # gdf_adj_list_view(graph, - # offsets, - # indices, - # value) - - - def add_transpose(self): - """ - Compute the transposed adjacency list from the edge list and add it to the existing graph. - """ - cdef uintptr_t graph = self.graph_ptr - gdf_add_transpose(graph) - - cpdef pagerank(G,alpha=0.85, max_iter=100, tol=1.0e-5): """ Find the PageRank vertex values for a graph. cuGraph computes an approximation of the Pagerank eigenvector using the power method. @@ -183,16 +41,24 @@ cpdef pagerank(G,alpha=0.85, max_iter=100, tol=1.0e-5): >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() - >>> G.add_edge_list(sources,destinations,none) + >>> G.add_edge_list(sources,destinations,None) >>> pr = cuGraph.pagerank(G, alpha = 0.85, max_iter = 500, tol = 1.0e-05) """ + cdef uintptr_t graph = G.graph_ptr - gdf_add_transpose(graph) + err = gdf_add_transpose(graph) + cudf.bindings.cudf_cpp.check_gdf_error(err) cdef gdf_graph* g = graph - size = g.transposedAdjList.offsets.size - pagerank = cudf.Series(np.zeros(g.transposedAdjList.offsets.size,dtype=np.float64)) - cdef uintptr_t pagerank_ptr = create_column(pagerank) - - gdf_pagerank(graph, pagerank_ptr, alpha, tol, max_iter, 0) - return pagerank + df = cudf.DataFrame() + df['vertex'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.int32)) + cdef uintptr_t identifier_ptr = create_column(df['vertex']) + df['pagerank'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.float32)) + cdef uintptr_t pagerank_ptr = create_column(df['pagerank']) + + err = g.transposedAdjList.get_vertex_identifiers(identifier_ptr) + cudf.bindings.cudf_cpp.check_gdf_error(err) + err = gdf_pagerank(graph, pagerank_ptr, alpha, tol, max_iter, 0) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + return df diff --git a/python/pagerank/test_pagerank.py b/python/pagerank/test_pagerank.py old mode 100644 new mode 100755 index 7716285a22b..2e1bd49255d --- a/python/pagerank/test_pagerank.py +++ b/python/pagerank/test_pagerank.py @@ -19,18 +19,18 @@ def cugraph_Call(M, max_iter, tol, alpha): sources = cudf.Series(M.row) destinations = cudf.Series(M.col) # values = cudf.Series(np.ones(len(sources), dtype = np.float64)) - + # cugraph Pagerank Call G = cugraph.Graph() G.add_edge_list(sources, destinations, None) t1 = time.time() - pr = cugraph.pagerank(G, alpha=alpha, max_iter=max_iter, tol=tol) + df = cugraph.pagerank(G, alpha=alpha, max_iter=max_iter, tol=tol) t2 = time.time() - t1 print('Time : '+str(t2)) # Sort Pagerank values sorted_pr = [] - for i, rank in enumerate(pr): + for i, rank in enumerate(df['pagerank']): sorted_pr.append((i, rank)) return sorted(sorted_pr, key=lambda x: x[1], reverse=True) diff --git a/python/sssp/c_sssp.pxd b/python/sssp/c_sssp.pxd new file mode 100644 index 00000000000..0c24d540b34 --- /dev/null +++ b/python/sssp/c_sssp.pxd @@ -0,0 +1,2 @@ +from c_nvgraph cimport * + diff --git a/python/sssp/sssp_wrapper.pyx b/python/sssp/sssp_wrapper.pyx new file mode 100644 index 00000000000..22bdc95e61c --- /dev/null +++ b/python/sssp/sssp_wrapper.pyx @@ -0,0 +1,61 @@ +from c_sssp cimport * +from c_graph cimport * +from libcpp cimport bool +from libc.stdint cimport uintptr_t +from libc.stdlib cimport calloc, malloc, free +from libc.float cimport FLT_MAX_EXP +import cudf +from librmm_cffi import librmm as rmm +#from pygdf import Column +import numpy as np + +cpdef sssp(G, source): + """ + Compute the distance from the specified source to all vertices in the connected component. The distances column will + store the distance from the source to each vertex. + + Parameters + ---------- + graph : cuGraph.Graph + cuGraph graph descriptor, should contain the connectivity information as an edge list (edge weights are not used for this algorithm). + The transposed adjacency list will be computed if not already present. + source : int + Index of the source vertex + + Returns + ------- + distances : cudf.Series + distances[i] is the distance of vertex i from the source + + Examples + -------- + >>> M = ReadMtxFile(graph_file) + >>> sources = cudf.Series(M.row) + >>> destinations = cudf.Series(M.col) + >>> G = cuGraph.Graph() + >>> G.add_edge_list(sources,destinations,None) + >>> distances = cuGraph.sssp(G, source) + """ + + cdef uintptr_t graph = G.graph_ptr + err = gdf_add_transpose(graph) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + cdef gdf_graph* g = graph + + df = cudf.DataFrame() + df['vertex'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.int32)) + cdef uintptr_t identifier_ptr = create_column(df['vertex']) + df['distance'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.float32)) + cdef uintptr_t distance_ptr = create_column(df['distance']) + + err = g.transposedAdjList.get_vertex_identifiers(identifier_ptr) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + cdef int[1] sources + sources[0] = source + err = gdf_sssp_nvgraph(graph, sources, distance_ptr) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + return df + diff --git a/python/sssp/test_sssp.py b/python/sssp/test_sssp.py new file mode 100644 index 00000000000..93db0bccd7b --- /dev/null +++ b/python/sssp/test_sssp.py @@ -0,0 +1,100 @@ +import cugraph +import cudf +import time +from scipy.io import mmread +import networkx as nx +import numpy as np +import pytest + +print('Networkx version : {} '.format(nx.__version__)) + + +def ReadMtxFile(mmFile): + print('Reading ' + str(mmFile) + '...') + return mmread(mmFile).asfptype() + + +def cugraph_Call(M, source): + + # Device data + sources = cudf.Series(M.row) + destinations = cudf.Series(M.col) + + print('sources size = ' + str(len(sources))) + print('destinations size = ' + str(len(destinations))) + + # cugraph Pagerank Call + G = cugraph.Graph() + G.add_edge_list(sources, destinations, None) + + print('cugraph Solving... ') + t1 = time.time() + + dist = cugraph.sssp(G, source) + + t2 = time.time() - t1 + print('Time : '+str(t2)) + + distances = [] + for i, d in enumerate(dist['distance']): + distances.append((i, d)) + + return distances + + +def networkx_Call(M, source): + + print('Format conversion ... ') + M = M.tocsr() + if M is None: + raise TypeError('Could not read the input graph') + if M.shape[0] != M.shape[1]: + raise TypeError('Shape is not square') + + # should be autosorted, but check just to make sure + if not M.has_sorted_indices: + print('sort_indices ... ') + M.sort_indices() + + # Directed NetworkX graph + Gnx = nx.DiGraph(M) + + print('NX Solving... ') + t1 = time.time() + + path=nx.single_source_shortest_path(Gnx, source) + + t2 = time.time() - t1 + + print('Time : ' + str(t2)) + + return path + + +datasets = ['datasets/networks/dolphins.mtx', + 'datasets/networks/karate.mtx', + 'datasets/golden_data/graphs/dblp.mtx'] + +source = [1] + + +@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('source', source) +def test_sssp(graph_file, source): + + M = ReadMtxFile(graph_file) + cu_paths = cugraph_Call(M, source) + nx_paths = networkx_Call(M, source) + + # Calculating mismatch + err = 0 + + for i in range(len(cu_paths)): + if (cu_paths[i][1] != np.finfo(np.float32).max): + if(cu_paths[i][1] != (len(nx_paths[cu_paths[i][0]])-1)): + err = err + 1 + else: + if (cu_paths[i][0] in nx_paths.keys()): + err = err + 1 + + assert err == 0 diff --git a/setup.py b/setup.py index af52dedb995..e3cfc14be6e 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,4 @@ +from os.path import join as pjoin from setuptools import setup from setuptools.extension import Extension from Cython.Build import cythonize @@ -12,6 +13,58 @@ 'cython' ] +def find_in_path(name, path): + "Find a file in a search path" + #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ + for dir in path.split(os.pathsep): + binpath = pjoin(dir, name) + if os.path.exists(binpath): + return os.path.abspath(binpath) + return None + +def locate_cuda(): + """Locate the CUDA environment on the system + Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' + and values giving the absolute path to each directory. + Starts by looking for the CUDAHOME env variable. If not found, everything + is based on finding 'nvcc' in the PATH. + """ + + # first check if the CUDAHOME env variable is in use + if 'CUDAHOME' in os.environ: + home = os.environ['CUDAHOME'] + nvcc = pjoin(home, 'bin', 'nvcc') + else: + # otherwise, search the PATH for NVCC + nvcc = find_in_path('nvcc', os.environ['PATH']) + if nvcc is None: + raise EnvironmentError('The nvcc binary could not be ' + 'located in your $PATH. Either add it to your path, or set $CUDAHOME') + home = os.path.dirname(os.path.dirname(nvcc)) + + cudaconfig = {'home':home, 'nvcc':nvcc, + 'include': pjoin(home, 'include'), + 'lib64': pjoin(home, 'lib64')} + for k, v in iter(cudaconfig.items()): + if not os.path.exists(v): + raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) + + return cudaconfig + +def locate_nvgraph(): + if 'CONDA_PREFIX' in os.environ: + nvgraph_found = find_in_path('lib/libnvgraph_st.so', os.environ['CONDA_PREFIX']) + if nvgraph_found is None: + nvgraph_found = find_in_path('libnvgraph_st.so', os.environ['LD_LIBRARY_PATH']) + if nvgraph_found is None: + raise EnvironmentError('The nvgraph library could not be located') + nvgraph_config = {'include':pjoin(os.path.dirname(os.path.dirname(nvgraph_found)), 'include', 'nvgraph'), + 'lib':os.path.dirname(nvgraph_found)} + return nvgraph_config + +CUDA = locate_cuda() +NVGRAPH = locate_nvgraph() + # Obtain the numpy include directory. This logic works across numpy versions. try: numpy_include = numpy.get_include() @@ -20,21 +73,24 @@ # temporary fix. cudf 0.5 will have a cudf.get_include() cudf_include = os.path.normpath(sys.prefix) + '/include' - -cython_files = ['python/pagerank/pagerank_wrapper.pyx'] + +cython_files = ['python/cugraph.pyx'] extensions = [ Extension("cugraph", sources=cython_files, include_dirs=[numpy_include, cudf_include, + NVGRAPH['include'], + CUDA['include'], 'src', + 'src/plugin/nvgraph', 'include', '../gunrock', '../gunrock/externals/moderngpu/include', '../gunrock/externals/cub'], - library_dirs=[get_python_lib()], - libraries=['cugraph', 'cudf'], + library_dirs=[get_python_lib(), NVGRAPH['lib']], + libraries=['nvggdf', 'nvgraph_st', 'cugraph', 'cudf'], language='c++', extra_compile_args=['-std=c++11']) ]