{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Network Analysis" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import networkx as nx\n", "from networkx.drawing.nx_agraph import graphviz_layout\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import numpy as np\n", "import dzcnapy_plotlib as dzcnapy\n", "import csv\n", "import math\n", "import collections as coll\n", "from networkx.algorithms import community as com\n", "import community as lou\n", "\n", "# Importazione dataset\n", "with open(\"dataset.csv\") as infile:\n", " csv_reader = csv.reader(infile)\n", " G = nx.Graph(csv_reader)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Nodes, Edges, Density" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "#Nodes: 37702\n", "#Edges: 289004\n", "#Edges if graph was a full mesh: 710701551\n", "Density: 0.04%\n" ] } ], "source": [ "# Nodi, Archi, Densità \n", "numNodes = G.number_of_nodes()\n", "numEdges = G.number_of_edges()\n", "allEdges = int(numNodes * (numNodes-1) / 2)\n", "density = nx.density(G) * 100\n", " \n", "# constants\n", "N = numNodes\n", "M = int(numEdges / numNodes)\n", "\n", "print(\"#Nodes:\", numNodes)\n", "print(\"#Edges:\", numEdges)\n", "print(\"#Edges if graph was a full mesh:\", allEdges)\n", "print(\"Density: %.2f%%\" % density)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Detecting and removing self-loops" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "#Self-loops: 0\n" ] } ], "source": [ "ns = G.number_of_selfloops()\n", "print(\"#Self-loops:\", ns)\n", "\n", "if ns > 0:\n", " # removing self-loops\n", " G.remove_edges_from(G.selfloop_edges())\n", " print(\"#Edges without self-loops:\", G.number_of_edges())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Detecting and removing isolates" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "#isolates: 0\n" ] } ], "source": [ "ni = nx.number_of_isolates(G)\n", "print(\"#isolates:\", ni)\n", "\n", "if ni > 0:\n", " # remove isolates\n", " G.remove_nodes_from(nx.isolates(G))\n", " print(\"#Nodes without isolates\", G.number_of_nodes())\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Degree" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Average, variance and standard deviation" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Average degree: 15.33\n", "Variance 6526.21\n", "Standard deviation 80.78\n" ] } ], "source": [ "avgDeg = (2*G.number_of_edges())/(G.number_of_nodes())\n", "print(\"Average degree: %.2f\" % avgDeg)\n", "\n", "deg = [G.degree(n) for n in G.nodes]\n", "var = np.var(deg)\n", "devstd = math.sqrt(var)\n", "\n", "print(\"Variance {:.2f}\".format(var))\n", "print(\"Standard deviation {:.2f}\".format(devstd))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Linear scale distribution" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.\n", "findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Degree distribution\n", "degrees = sorted([d for n, d in G.degree()], reverse=True)\n", "degreeCount = coll.Counter(degrees)\n", "x, y = zip(*degreeCount.items())\n", "\n", "plt.figure() \n", "plt.plot(x, y, 'go-') \n", "plt.xlabel('Degree')\n", "plt.ylabel('Frequency')\n", "plt.title('Degree Distribution') \n", "plt.title('Degree Distribution with linear scale')\n", "plt.savefig('plots/LinScaleDegreeDistr.png')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Logarithmic scale distribution" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.scatter(x, y, s=50, c=\"green\")\n", "plt.xlim(0.9, max(x))\n", "plt.ylim(0.9, max(y))\n", "plt.xscale('log')\n", "plt.yscale('log')\n", "plt.xlabel(\"Degree\")\n", "plt.ylabel(\"Frequency\")\n", "plt.title('Degree Distribution with logarithmic scale') \n", "plt.savefig('plots/LogScaleDegreeDistr.png')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Clustering coefficient" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Transitivity coefficient of the network: 1.24%\n", "Average clustering coefficient 0.17\n" ] } ], "source": [ "# TODO eseguire\n", "\n", "if False:\n", " trans= nx.transitivity(G)*100\n", " # fraction of triadic closures (closed triangles) found in the network\n", " print(\"Transitivity coefficient of the network: %.2f%%\" %trans)\n", "\n", " # Clustering coefficient\n", " acc = nx.average_clustering(G)\n", " print (\"Average clustering coefficient {:.2f}\".format(acc))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Greatest Connected Component " ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": true }, "outputs": [], "source": [ "\n", "numCC = nx.number_connected_components(G)\n", "gcc = max(nx.connected_component_subgraphs(G), key=len)\n", "\n", "# TODO eseguire\n", "\n", "if False:\n", " nodesGcc = Gcc.nodes()\n", " edgesGcc = Gcc.edges()\n", " nx.write_graphml(Gcc, \"graphs/GCC.graphml\");\n", "\n", " print(\"Numero di componenti connesse:\", numCC)\n", " print(\"Numero nodi GCC:\", len(nodesGcc))\n", " print(\"Numero archi GCC:\", len(edgesGcc))\n", " print(\"Percentuale di nodi sul totale %.2f%%:\" %(len(nodesGcc)/len(G.nodes())*100))\n", " print(\"Percentuale di archi sul totale %.2f%%:\" %(len(edgesGcc)/len(G.edges())*100))\n", " print(\"Densità : {:.2f}\".format(nx.density(Gcc) * 100))\n", " print(\"Distanza media: {:.2f}\".format(nx.average_shortest_path_length(Gcc)))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Distanze GCC" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m<ipython-input-13-e177362aa0f9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mgcc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnodes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mnodeDists\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msingle_source_shortest_path_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgcc\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;31m#if i%1000 == 0:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;31m# print(i)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.6/site-packages/networkx/algorithms/shortest_paths/unweighted.py\u001b[0m in \u001b[0;36msingle_source_shortest_path_length\u001b[0;34m(G, source, cutoff)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0mcutoff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'inf'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0mnextlevel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_single_shortest_path_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnextlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcutoff\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.6/site-packages/networkx/algorithms/shortest_paths/unweighted.py\u001b[0m in \u001b[0;36m_single_shortest_path_length\u001b[0;34m(adj, firstlevel, cutoff)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mseen\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0mseen\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlevel\u001b[0m \u001b[0;31m# set the level of vertex v\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m \u001b[0mnextlevel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madj\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# add neighbors of v\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 93\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0mlevel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.6/site-packages/networkx/classes/coreviews.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0miter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_atlas\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 54\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_atlas\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "# TODO eseguire\n", "\n", "distDict = {}\n", "#i=1\n", "row = []\n", "for n in gcc.nodes():\n", " nodeDists = nx.single_source_shortest_path_length(gcc,n)\n", " #if i%1000 == 0:\n", " # print(i)\n", " \n", " for d in nodeDists:\n", " #if (int(d) in marked):\n", " # continue\n", " if nodeDists[d] in distDict:\n", " distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1\n", " else:\n", " distDict[nodeDists[d]] = 1\n", " row.append(nodeDists[d])\n", " #i += 1\n", "\n", "distDict.pop(0)\n", "\n", "avgDist, cnt = zip(*distDict.items()) \n", "\n", "plt.bar(avgDist, cnt, width=0.3, color='b')\n", "plt.title(\"Distance Distribution for G\")\n", "plt.ylabel(\"Frequency\")\n", "plt.xlabel(\"Shortest Path Distance\")\n", "#plt.savefig('plots/DistDistributionGlobal.png')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### GCC Eccentricity - Diameter - Radius - Center - Periphery " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m<ipython-input-15-edc2c6347a7b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m#Eccentricity\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mecc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meccentricity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgcc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# Adding eccentricity data to gcc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mecc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.6/site-packages/networkx/algorithms/distance_measures.py\u001b[0m in \u001b[0;36meccentricity\u001b[0;34m(G, v, sp)\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnbunch_iter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msp\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 226\u001b[0;31m \u001b[0mlength\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnetworkx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msingle_source_shortest_path_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 227\u001b[0m \u001b[0mL\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlength\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.6/site-packages/networkx/algorithms/shortest_paths/unweighted.py\u001b[0m in \u001b[0;36msingle_source_shortest_path_length\u001b[0;34m(G, source, cutoff)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0mcutoff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'inf'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0mnextlevel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_single_shortest_path_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnextlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcutoff\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.6/site-packages/networkx/algorithms/shortest_paths/unweighted.py\u001b[0m in \u001b[0;36m_single_shortest_path_length\u001b[0;34m(adj, firstlevel, cutoff)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mseen\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0mseen\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlevel\u001b[0m \u001b[0;31m# set the level of vertex v\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m \u001b[0mnextlevel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madj\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# add neighbors of v\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 93\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0mlevel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.6/site-packages/networkx/classes/coreviews.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0miter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_atlas\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 54\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_atlas\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "#Eccentricity\n", "ecc = nx.eccentricity(gcc)\n", "\n", "# Adding eccentricity data to gcc\n", "for k in ecc.keys():\n", " Gcc.node[k]['eccentricity'] = ecc.get(k)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "diameterGcc = nx.diameter(gcc, ecc)\n", "radiusGcc = nx.radius(gcc, ecc)\n", "centerGcc = nx.center(gcc, e=ecc)\n", "peripheryGcc = nx.periphery(gcc, e=ecc)\n", "\n", "print (\"Diameter GCC:\", diameterGcc)\n", "print (\"Radius GCC\", radiusGcc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Adding data to gcc\n", "nx.set_node_attributes(gcc, 0, 'center')\n", "nx.set_node_attributes(gcc, 0, 'periphery')\n", "\n", "for v in range(len(centerGcc)):\n", " gcc.node[centerGcc[v]][\"center\"] = 1\n", "\n", "for v in range(len(peripheryGcc)):\n", " gcc.node[peripheryGcc[v]][\"periphery\"] = 1\n", " \n", "nx.write_graphml(gcc, \"graphs/GccEcc.graphml\");" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Distanze" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Distanza media su tutta la rete\n", "distDict = {}\n", "#i=1\n", "#marked = set()\n", "row = []\n", "for n in G.nodes():\n", " nodeDists = nx.single_source_shortest_path_length(G,n)\n", " #if i%1000 == 0:\n", " # print(i)\n", " \n", " for d in nodeDists:\n", " #if (int(d) in marked):\n", " # continue\n", " if nodeDists[d] in distDict:\n", " distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1\n", " else:\n", " distDict[nodeDists[d]] = 1\n", " row.append(nodeDists[d])\n", " #i += 1\n", " #marked.add(int(n))\n", "\n", "avgShortPathG = np.average(row)\n", "distDict.pop(0)\n", "\n", "avgDist, cnt = zip(*distDict.items()) \n", "\n", "print(\"Average Distance {:.2f}\".format(avgShortPathG))\n", "\n", "plt.bar(avgDist, cnt, width=0.3, color='b')\n", "plt.title(\"Distance Distribution for G\")\n", "plt.ylabel(\"Frequency\")\n", "plt.xlabel(\"Shortest Path Distance\")\n", "plt.savefig('plots/DistDistributionGlobal.png')\n", "plt.show()\n", "\n", "\n", "#print(\"Numero componenti connesse:\", nx.number_connected_components(G))\n", "#print(\"Distanza media:\", nx.average_shortest_path_length(G))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Degree correlation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# The following code fragment calculates the dictionary and separates the keys and values into \n", "# two lists my_degree and their_degree:\n", "\n", "my_degree, their_degree = zip(*nx.average_degree_connectivity(G).items())\n", "\n", "plt.scatter(my_degree, their_degree, s=50, c=\"b\",)\n", "plt.xscale('log')\n", "plt.yscale('log')\n", "plt.xlabel(\"k\")\n", "plt.ylabel(\"$k_{nn}(k)$\")\n", "plt.savefig('plots/Assortativity.png')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Communities" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4-Clique Communities" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Clique computed\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m<ipython-input-5-42a7882a13fe>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Clique computed\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mlClique\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcl\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommK\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mlClique\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcl\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.6/site-packages/networkx/algorithms/community/kclique.py\u001b[0m in \u001b[0;36mk_clique_communities\u001b[0;34m(G, k, cliques)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0mperc_graph\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_nodes_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcliques\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mclique\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcliques\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 69\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0madj_clique\u001b[0m \u001b[0;32min\u001b[0m \u001b[0m_get_adjacent_cliques\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclique\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmembership_dict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 70\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclique\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mintersection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madj_clique\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0mperc_graph\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_edge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclique\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0madj_clique\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "commK = com.k_clique_communities(G, 4)\n", "\n", "print(\"Clique computed\")\n", "lClique = 0\n", "for i,cl in enumerate(commK):\n", " lClique += 1\n", " for n in cl:\n", " G.node[n][\"kClique\"] = i+1\n", " \n", "print(\"Numero 4-Clique communities: \", lClique)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Modularity based communities (Louvain)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "part = lou.best_partition(G)\n", "mod = lou.modularity(part,G)\n", "\n", "part_as_seriesG = pd.Series(part)\n", "part_as_seriesG.sort_values()\n", "part_as_seriesG.value_counts() \n", "\n", "print(\"Numero Louvain communities: \", part_as_seriesG.value_counts().size)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Saving Communities Attribute\n", "nx.set_node_attributes(G, 0, 'LvnG')\n", "for k in part.keys():\n", " part[k]+= 1\n", "\n", "for i in part.keys():\n", " G.node[i][\"LvnG\"] = part.get(i)\n", "\n", "nx.write_graphml(G, \"graphs/GComm.graphml\");" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Centralities" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dgr = nx.degree_centrality(G)\n", "clo = nx.closeness_centrality(G)\n", "har = nx.harmonic_centrality(G)\n", "eig = nx.eigenvector_centrality(G)\n", "bet = nx.betweenness_centrality(G)\n", "pgr = nx.pagerank(G)\n", "hits = nx.hits(G)\n", "\n", "centralities = pd.concat(\n", " [pd.Series(c) for c in (hits[1], eig, pgr, har, clo, hits[0], dgr, bet)],\n", " axis=1)\n", "\n", "centralities.columns = (\"Authorities\", \"Eigenvector\", \"PageRank\",\n", " \"Harmonic Closeness\", \"Closeness\", \"Hubs\",\n", " \"Degree\", \"Betweenness\")\n", "centralities[\"Harmonic Closeness\"] /= centralities.shape[0]\n", "\n", "# Calculate the correlations for each pair of centralities\n", "c_df = centralities.corr()\n", "ll_triangle = np.tri(c_df.shape[0], k=-1)\n", "c_df *= ll_triangle\n", "c_series = c_df.stack().sort_values()\n", "c_series.tail()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 2 }