merged Dev -> default

kit-parco · Jun 23, 2016 · 39295bd · 39295bd
commit 39295bd
Show file tree

Hide file tree

Showing 964 changed files with 3,558,057 additions and 0 deletions.
diff --git a/.hgignore b/.hgignore
@@ -0,0 +1,71 @@
+
+syntax: regexp
+\.d$
+\.trace$
+\.mk$
+\.o$
+.DS_Store
+\.pyc$
+\.a$
+\.so$
+\checkpoint.ipynb
+build.conf
+.sconsign.dblite
+/_NetworKit.cpp$
+src/python/NetworKit.egg-info/
+NetworKit-Tests-D
+lamg-2.2-2
+CMG_101014
+instances
+bench*
+.cproject
+.project
+.settings*
+Benchmark*
+plots
+\.out$
+\.egg$
+\~$
+\.dot$
+\.project$
+\.cproject$
+^tracelog
+\.json$
+\.csv
+\.dat
+
+scripts/SparsificationEvaluation/input/
+scripts/SparsificationEvaluation/output/
+scripts/SparsificationEvaluation/figures
+input/backbones/
+build/
+Doc/Website
+Doc/Documentation
+
+syntax: regexp
+^output$
+syntax: regexp
+^sandbox$
+syntax: regexp
+^profiling$
+syntax: regexp
+^EnsembleClustering-D$
+syntax: regexp
+^EnsembleClustering-D$
+syntax: regexp
+^EnsembleClustering-D$
+syntax: regexp
+^EnsembleClustering-DPar$
+syntax: regexp
+^EnsembleClustering-O$
+syntax: regexp
+/NetworKit-Comm*$
+syntax: regexp
+/*.orig
+/*.eps
+syntax: regexp
+^doxygen$
+
+syntax: glob
+NetworKit-Tests-*
+include/NetworKit
diff --git a/.pydevproject b/.pydevproject
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?eclipse-pydev version="1.0"?><pydev_project>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
+</pydev_project>
diff --git a/Doc/DevGuide.mdown b/Doc/DevGuide.mdown
diff --git a/Doc/DevGuide.pdf b/Doc/DevGuide.pdf
diff --git a/Doc/Notebooks/GephiStreaming_UserGuide.ipynb b/Doc/Notebooks/GephiStreaming_UserGuide.ipynb
diff --git a/Doc/Notebooks/Link_Prediction.ipynb b/Doc/Notebooks/Link_Prediction.ipynb
@@ -0,0 +1,306 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Link Prediction in NetworKit"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this notebook, the usage of the link prediction module will be demonstrated.\n",
+    "\n",
+    "1) Load NetworKit module as well as external modules (e.g. for supervised approaches)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "invalid syntax (<ipython-input-1-f46d96c0d377>, line 1)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-1-f46d96c0d377>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    cd ../../\u001b[0m\n\u001b[0m        ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
+     ]
+    }
+   ],
+   "source": [
+    "cd ../../\n",
+    "from networkit import linkprediction as lp, readGraph, Format\n",
+    "# Needed to draw plots\n",
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt \n",
+    "# Machine learning\n",
+    "from sklearn import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "2) Helper function to draw ROC curve in the notebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def drawROC(indexName, fpr, tpr):\n",
+    "    plt.suptitle(indexName, fontsize=18, fontweight=\"bold\")\n",
+    "    plt.xticks([z / 10 for z in range(11)])\n",
+    "    plt.yticks([z / 10 for z in range(11)])\n",
+    "    plt.xlabel(\"False positive rate\", fontsize=18)\n",
+    "    plt.ylabel(\"True positive rate\", fontsize=18)\n",
+    "    plt.axis([0, 1, 0, 1])\n",
+    "    curve, = plt.plot(fpr, tpr)\n",
+    "    plt.plot([0, 1], [0, 1], 'grey', ls='--')\n",
+    "def drawUnsupervisedROC(indexName, predictions):\n",
+    "    fpr, tpr = roc.getCurve(predictions)\n",
+    "    drawROC(indexName, fpr, tpr)\n",
+    "def drawSupervisedROC(indexName, predictions, testLabels):\n",
+    "    predictionLabels = [p[1] for p in predictions]\n",
+    "    fpr, tpr, thresholds = metrics.roc_curve(testLabels, predictionLabels)\n",
+    "    drawROC(indexName, fpr, tpr)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "3) Load an actual network that should be analyzed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'readGraph' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-3-ea0025a838ff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtestGraph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreadGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"input/celegans_metabolic.graph\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFormat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMETIS\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Loaded graph with\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtestGraph\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumberOfEdges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"edges.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'readGraph' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "testGraph = readGraph(\"input/celegans_metabolic.graph\", Format.METIS)\n",
+    "print(\"Loaded graph with\", testGraph.numberOfEdges(), \"edges.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "4) Initialize ROC/PR metrics for testGraph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "roc = lp.ROCMetric(testGraph)\n",
+    "pr = lp.PrecisionRecallMetric(testGraph)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "5) Generate training and feature graphs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "trainingGraph = lp.RandomLinkSampler.byPercentage(testGraph, 0.9)\n",
+    "print(\"Left out\", testGraph.numberOfEdges() - trainingGraph.numberOfEdges(), \"edges from test graph\")\n",
+    "featureGraph = lp.RandomLinkSampler.byPercentage(trainingGraph, 0.7)\n",
+    "print(\"Left out\", trainingGraph.numberOfEdges() - featureGraph.numberOfEdges(), \"edges from training graph\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "6) Initialize similarity indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "featureCommonNeighborsIndex = lp.CommonNeighborsIndex(featureGraph)\n",
+    "featureJaccardIndex = lp.JaccardIndex(featureGraph)\n",
+    "featurePreferentialAttachmentIndex = lp.PreferentialAttachmentIndex(featureGraph)\n",
+    "featureAdamicAdarIndex = lp.AdamicAdarIndex(featureGraph)\n",
+    "featureUDegreeIndex = lp.UDegreeIndex(featureGraph)\n",
+    "featureVDegreeIndex = lp.VDegreeIndex(featureGraph)\n",
+    "featureAlgebraicDistanceIndex = lp.AlgebraicDistanceIndex(featureGraph, 5, 15)\n",
+    "featureAlgebraicDistanceIndex.preprocess()\n",
+    "featureNeighborhoodDistanceIndex = lp.NeighborhoodDistanceIndex(featureGraph)\n",
+    "featureTotalNeighborsIndex = lp.TotalNeighborsIndex(featureGraph)\n",
+    "featureNeighborsMeasureIndex = lp.NeighborsMeasureIndex(featureGraph)\n",
+    "featureSameCommunityIndex = lp.SameCommunityIndex(featureGraph)\n",
+    "featureAdjustedRandIndex = lp.AdjustedRandIndex(featureGraph)\n",
+    "featureResourceAllocationIndex = lp.ResourceAllocationIndex(featureGraph)\n",
+    "\n",
+    "commonNeighborsIndex = lp.CommonNeighborsIndex(trainingGraph)\n",
+    "jaccardIndex = lp.JaccardIndex(trainingGraph)\n",
+    "preferentialAttachmentIndex = lp.PreferentialAttachmentIndex(trainingGraph)\n",
+    "adamicAdarIndex = lp.AdamicAdarIndex(trainingGraph)\n",
+    "uDegreeIndex = lp.UDegreeIndex(trainingGraph)\n",
+    "vDegreeIndex = lp.VDegreeIndex(trainingGraph)\n",
+    "algebraicDistanceIndex = lp.AlgebraicDistanceIndex(trainingGraph, 5, 15)\n",
+    "algebraicDistanceIndex.preprocess()\n",
+    "neighborhoodDistanceIndex = lp.NeighborhoodDistanceIndex(trainingGraph)\n",
+    "totalNeighborsIndex = lp.TotalNeighborsIndex(trainingGraph)\n",
+    "neighborsMeasureIndex = lp.NeighborsMeasureIndex(trainingGraph)\n",
+    "sameCommunityIndex = lp.SameCommunityIndex(trainingGraph)\n",
+    "adjustedRandIndex = lp.AdjustedRandIndex(trainingGraph)\n",
+    "resourceAllocationIndex = lp.ResourceAllocationIndex(trainingGraph)\n",
+    "\n",
+    "featureLinkPredictors = (featureCommonNeighborsIndex, featureJaccardIndex, featureAdamicAdarIndex, featurePreferentialAttachmentIndex, featureUDegreeIndex, featureVDegreeIndex, featureAlgebraicDistanceIndex, featureNeighborhoodDistanceIndex, featureTotalNeighborsIndex, featureNeighborsMeasureIndex, featureSameCommunityIndex, featureResourceAllocationIndex, featureAdjustedRandIndex)\n",
+    "linkPredictors = (commonNeighborsIndex, jaccardIndex, adamicAdarIndex, preferentialAttachmentIndex, uDegreeIndex, vDegreeIndex, algebraicDistanceIndex, neighborhoodDistanceIndex, totalNeighborsIndex, neighborsMeasureIndex, sameCommunityIndex, resourceAllocationIndex, adjustedRandIndex)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "7) Generate testing and training sets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "trainingSet = lp.MissingLinksFinder(featureGraph).findAtDistance(2)\n",
+    "testingSet = lp.MissingLinksFinder(trainingGraph).findAtDistance(2)\n",
+    "\n",
+    "labelTrain = lp.getLabels(trainingSet, trainingGraph)\n",
+    "featureTrain = lp.getFeatures(trainingSet, *featureLinkPredictors)\n",
+    "\n",
+    "labelTest = lp.getLabels(testingSet, testGraph)\n",
+    "featureTest = lp.getFeatures(testingSet, *linkPredictors)\n",
+    "\n",
+    "print(\"Training set size:\", len(trainingSet))\n",
+    "print(\"Testing set size:\", len(testingSet))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# Create and train classifier\n",
+    "dtc = ensemble.BaggingClassifier(n_estimators=25, max_features=0.2, n_jobs=-1)\n",
+    "dtc.fit(featureTrain, labelTrain)\n",
+    "# Actual prediction\n",
+    "predictions = dtc.predict_proba(featureTest)\n",
+    "drawSupervisedROC(\"DT Bagging\", predictions, labelTest)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "cnPreds = commonNeighborsIndex.runOn(testingSet)\n",
+    "drawUnsupervisedROC(\"Common Neighbors\", cnPreds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "lp.LinkThresholder.byCount(cnPreds, 10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}