Skip to content

Commit

Permalink
merged Dev -> default
Browse files Browse the repository at this point in the history
  • Loading branch information
ebergamini committed Jun 23, 2016
0 parents commit 39295bd
Show file tree
Hide file tree
Showing 964 changed files with 3,558,057 additions and 0 deletions.
71 changes: 71 additions & 0 deletions .hgignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

syntax: regexp
\.d$
\.trace$
\.mk$
\.o$
.DS_Store
\.pyc$
\.a$
\.so$
\checkpoint.ipynb
build.conf
.sconsign.dblite
/_NetworKit.cpp$
src/python/NetworKit.egg-info/
NetworKit-Tests-D
lamg-2.2-2
CMG_101014
instances
bench*
.cproject
.project
.settings*
Benchmark*
plots
\.out$
\.egg$
\~$
\.dot$
\.project$
\.cproject$
^tracelog
\.json$
\.csv
\.dat

scripts/SparsificationEvaluation/input/
scripts/SparsificationEvaluation/output/
scripts/SparsificationEvaluation/figures
input/backbones/
build/
Doc/Website
Doc/Documentation

syntax: regexp
^output$
syntax: regexp
^sandbox$
syntax: regexp
^profiling$
syntax: regexp
^EnsembleClustering-D$
syntax: regexp
^EnsembleClustering-D$
syntax: regexp
^EnsembleClustering-D$
syntax: regexp
^EnsembleClustering-DPar$
syntax: regexp
^EnsembleClustering-O$
syntax: regexp
/NetworKit-Comm*$
syntax: regexp
/*.orig
/*.eps
syntax: regexp
^doxygen$

syntax: glob
NetworKit-Tests-*
include/NetworKit
5 changes: 5 additions & 0 deletions .pydevproject
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?eclipse-pydev version="1.0"?><pydev_project>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
</pydev_project>
346 changes: 346 additions & 0 deletions Doc/DevGuide.mdown

Large diffs are not rendered by default.

Binary file added Doc/DevGuide.pdf
Binary file not shown.
297 changes: 297 additions & 0 deletions Doc/Notebooks/GephiStreaming_UserGuide.ipynb

Large diffs are not rendered by default.

306 changes: 306 additions & 0 deletions Doc/Notebooks/Link_Prediction.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Link Prediction in NetworKit"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this notebook, the usage of the link prediction module will be demonstrated.\n",
"\n",
"1) Load NetworKit module as well as external modules (e.g. for supervised approaches)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-1-f46d96c0d377>, line 1)",
"output_type": "error",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-1-f46d96c0d377>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m cd ../../\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"cd ../../\n",
"from networkit import linkprediction as lp, readGraph, Format\n",
"# Needed to draw plots\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt \n",
"# Machine learning\n",
"from sklearn import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2) Helper function to draw ROC curve in the notebook"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def drawROC(indexName, fpr, tpr):\n",
" plt.suptitle(indexName, fontsize=18, fontweight=\"bold\")\n",
" plt.xticks([z / 10 for z in range(11)])\n",
" plt.yticks([z / 10 for z in range(11)])\n",
" plt.xlabel(\"False positive rate\", fontsize=18)\n",
" plt.ylabel(\"True positive rate\", fontsize=18)\n",
" plt.axis([0, 1, 0, 1])\n",
" curve, = plt.plot(fpr, tpr)\n",
" plt.plot([0, 1], [0, 1], 'grey', ls='--')\n",
"def drawUnsupervisedROC(indexName, predictions):\n",
" fpr, tpr = roc.getCurve(predictions)\n",
" drawROC(indexName, fpr, tpr)\n",
"def drawSupervisedROC(indexName, predictions, testLabels):\n",
" predictionLabels = [p[1] for p in predictions]\n",
" fpr, tpr, thresholds = metrics.roc_curve(testLabels, predictionLabels)\n",
" drawROC(indexName, fpr, tpr)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"3) Load an actual network that should be analyzed"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'readGraph' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-3-ea0025a838ff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtestGraph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreadGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"input/celegans_metabolic.graph\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFormat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMETIS\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Loaded graph with\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtestGraph\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumberOfEdges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"edges.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'readGraph' is not defined"
]
}
],
"source": [
"testGraph = readGraph(\"input/celegans_metabolic.graph\", Format.METIS)\n",
"print(\"Loaded graph with\", testGraph.numberOfEdges(), \"edges.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"4) Initialize ROC/PR metrics for testGraph"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"roc = lp.ROCMetric(testGraph)\n",
"pr = lp.PrecisionRecallMetric(testGraph)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"5) Generate training and feature graphs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"trainingGraph = lp.RandomLinkSampler.byPercentage(testGraph, 0.9)\n",
"print(\"Left out\", testGraph.numberOfEdges() - trainingGraph.numberOfEdges(), \"edges from test graph\")\n",
"featureGraph = lp.RandomLinkSampler.byPercentage(trainingGraph, 0.7)\n",
"print(\"Left out\", trainingGraph.numberOfEdges() - featureGraph.numberOfEdges(), \"edges from training graph\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"6) Initialize similarity indices"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"featureCommonNeighborsIndex = lp.CommonNeighborsIndex(featureGraph)\n",
"featureJaccardIndex = lp.JaccardIndex(featureGraph)\n",
"featurePreferentialAttachmentIndex = lp.PreferentialAttachmentIndex(featureGraph)\n",
"featureAdamicAdarIndex = lp.AdamicAdarIndex(featureGraph)\n",
"featureUDegreeIndex = lp.UDegreeIndex(featureGraph)\n",
"featureVDegreeIndex = lp.VDegreeIndex(featureGraph)\n",
"featureAlgebraicDistanceIndex = lp.AlgebraicDistanceIndex(featureGraph, 5, 15)\n",
"featureAlgebraicDistanceIndex.preprocess()\n",
"featureNeighborhoodDistanceIndex = lp.NeighborhoodDistanceIndex(featureGraph)\n",
"featureTotalNeighborsIndex = lp.TotalNeighborsIndex(featureGraph)\n",
"featureNeighborsMeasureIndex = lp.NeighborsMeasureIndex(featureGraph)\n",
"featureSameCommunityIndex = lp.SameCommunityIndex(featureGraph)\n",
"featureAdjustedRandIndex = lp.AdjustedRandIndex(featureGraph)\n",
"featureResourceAllocationIndex = lp.ResourceAllocationIndex(featureGraph)\n",
"\n",
"commonNeighborsIndex = lp.CommonNeighborsIndex(trainingGraph)\n",
"jaccardIndex = lp.JaccardIndex(trainingGraph)\n",
"preferentialAttachmentIndex = lp.PreferentialAttachmentIndex(trainingGraph)\n",
"adamicAdarIndex = lp.AdamicAdarIndex(trainingGraph)\n",
"uDegreeIndex = lp.UDegreeIndex(trainingGraph)\n",
"vDegreeIndex = lp.VDegreeIndex(trainingGraph)\n",
"algebraicDistanceIndex = lp.AlgebraicDistanceIndex(trainingGraph, 5, 15)\n",
"algebraicDistanceIndex.preprocess()\n",
"neighborhoodDistanceIndex = lp.NeighborhoodDistanceIndex(trainingGraph)\n",
"totalNeighborsIndex = lp.TotalNeighborsIndex(trainingGraph)\n",
"neighborsMeasureIndex = lp.NeighborsMeasureIndex(trainingGraph)\n",
"sameCommunityIndex = lp.SameCommunityIndex(trainingGraph)\n",
"adjustedRandIndex = lp.AdjustedRandIndex(trainingGraph)\n",
"resourceAllocationIndex = lp.ResourceAllocationIndex(trainingGraph)\n",
"\n",
"featureLinkPredictors = (featureCommonNeighborsIndex, featureJaccardIndex, featureAdamicAdarIndex, featurePreferentialAttachmentIndex, featureUDegreeIndex, featureVDegreeIndex, featureAlgebraicDistanceIndex, featureNeighborhoodDistanceIndex, featureTotalNeighborsIndex, featureNeighborsMeasureIndex, featureSameCommunityIndex, featureResourceAllocationIndex, featureAdjustedRandIndex)\n",
"linkPredictors = (commonNeighborsIndex, jaccardIndex, adamicAdarIndex, preferentialAttachmentIndex, uDegreeIndex, vDegreeIndex, algebraicDistanceIndex, neighborhoodDistanceIndex, totalNeighborsIndex, neighborsMeasureIndex, sameCommunityIndex, resourceAllocationIndex, adjustedRandIndex)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"7) Generate testing and training sets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"trainingSet = lp.MissingLinksFinder(featureGraph).findAtDistance(2)\n",
"testingSet = lp.MissingLinksFinder(trainingGraph).findAtDistance(2)\n",
"\n",
"labelTrain = lp.getLabels(trainingSet, trainingGraph)\n",
"featureTrain = lp.getFeatures(trainingSet, *featureLinkPredictors)\n",
"\n",
"labelTest = lp.getLabels(testingSet, testGraph)\n",
"featureTest = lp.getFeatures(testingSet, *linkPredictors)\n",
"\n",
"print(\"Training set size:\", len(trainingSet))\n",
"print(\"Testing set size:\", len(testingSet))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Create and train classifier\n",
"dtc = ensemble.BaggingClassifier(n_estimators=25, max_features=0.2, n_jobs=-1)\n",
"dtc.fit(featureTrain, labelTrain)\n",
"# Actual prediction\n",
"predictions = dtc.predict_proba(featureTest)\n",
"drawSupervisedROC(\"DT Bagging\", predictions, labelTest)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cnPreds = commonNeighborsIndex.runOn(testingSet)\n",
"drawUnsupervisedROC(\"Common Neighbors\", cnPreds)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"lp.LinkThresholder.byCount(cnPreds, 10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Loading

0 comments on commit 39295bd

Please sign in to comment.