#!/usr/bin/env python # coding: utf-8 # # Network Analysis - Random Barabasi Model # In[2]: import networkx as nx from networkx.drawing.nx_agraph import graphviz_layout import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import pandas as pd import numpy as np import dzcnapy_plotlib as dzcnapy import csv import math import collections as coll from networkx.algorithms import community as com import community as lou # Importazione dataset per calcolo di N, M with open("dataset.csv") as infile: csv_reader = csv.reader(infile) G = nx.Graph(csv_reader) N = G.number_of_nodes() E = G.number_of_edges() M = int(E / N) G = nx.barabasi_albert_graph(N, M) nx.write_graphml(G, "graphs/Barabasi.graphml"); # ## Nodes, Edges, Density # In[8]: # Nodi, Archi, Densità numNodes = G.number_of_nodes() numEdges = G.number_of_edges() allEdges = int(numNodes * (numNodes-1) / 2) density = nx.density(G) * 100 # constants N = numNodes M = int(numEdges / numNodes) print("#Nodes:", numNodes) print("#Edges:", numEdges) print("#Edges if graph was a full mesh:", allEdges) print("Density: %.2f%%" % density) # ## Degree # ### Average, variance and standard deviation # In[5]: avgDeg = (2*G.number_of_edges())/(G.number_of_nodes()) print("Average degree: %.2f" % avgDeg) deg = [G.degree(n) for n in G.nodes] var = np.var(deg) devstd = math.sqrt(var) print("Variance {:.2f}".format(var)) print("Standard deviation {:.2f}".format(devstd)) # ### Linear scale distribution # In[6]: # Degree distribution degrees = sorted([d for n, d in G.degree()], reverse=True) degreeCount = coll.Counter(degrees) x, y = zip(*degreeCount.items()) plt.figure() plt.plot(x, y, 'go-') plt.xlabel('Degree') plt.ylabel('Frequency') plt.title('Degree Distribution') plt.title('Degree Distribution with linear scale') plt.savefig('plots/LinScaleDegreeDistr.png') plt.show() # ### Logarithmic scale distribution # In[7]: plt.scatter(x, y, s=50, c="green") plt.xlim(0.9, max(x)) plt.ylim(0.9, max(y)) plt.xscale('log') plt.yscale('log') plt.xlabel("Degree") plt.ylabel("Frequency") plt.title('Degree Distribution with logarithmic scale') plt.savefig('plots/LogScaleDegreeDistr.png') # ## Clustering coefficient trans= nx.transitivity(G)*100 # fraction of triadic closures (closed triangles) found in the network print("Transitivity coefficient of the network: %.2f%%" %trans) # Clustering coefficient acc = nx.average_clustering(G) print ("Average clustering coefficient {:.2f}".format(acc)) # ## Greatest Connected Component # In[11]: numCC = nx.number_connected_components(G) gcc = max(nx.connected_component_subgraphs(G), key=len) nodesgcc = gcc.nodes() edgesgcc = gcc.edges() nx.write_graphml(gcc, "graphs/GCC.graphml"); print("Numero di componenti connesse:", numCC) print("Numero nodi GCC:", len(nodesgcc)) print("Numero archi GCC:", len(edgesgcc)) print("Percentuale di nodi sul totale %.2f%%:" %(len(nodesgcc)/len(G.nodes())*100)) print("Percentuale di archi sul totale %.2f%%:" %(len(edgesgcc)/len(G.edges())*100)) print("Densità: {:.2f}".format(nx.density(gcc) * 100)) print("Distanza media: {:.2f}".format(nx.average_shortest_path_length(gcc))) # ### Distanze GCC # In[13]: # TODO eseguire distDict = {} #i=1 row = [] for n in gcc.nodes(): nodeDists = nx.single_source_shortest_path_length(gcc,n) #if i%1000 == 0: # print(i) for d in nodeDists: #if (int(d) in marked): # continue if nodeDists[d] in distDict: distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1 else: distDict[nodeDists[d]] = 1 row.append(nodeDists[d]) #i += 1 distDict.pop(0) avgDist, cnt = zip(*distDict.items()) plt.bar(avgDist, cnt, width=0.3, color='b') plt.title("Distance Distribution for G") plt.ylabel("Frequency") plt.xlabel("Shortest Path Distance") #plt.savefig('plots/DistDistributionGlobal.png') plt.show() # ### GCC Eccentricity - Diameter - Radius - Center - Periphery # In[15]: #Eccentricity ecc = nx.eccentricity(gcc) # Adding eccentricity data to gcc for k in ecc.keys(): gcc.node[k]['eccentricity'] = ecc.get(k) # In[ ]: diametergcc = nx.diameter(gcc, ecc) radiusgcc = nx.radius(gcc, ecc) centergcc = nx.center(gcc, e=ecc) peripherygcc = nx.periphery(gcc, e=ecc) print ("Diameter GCC:", diametergcc) print ("Radius GCC", radiusgcc) # In[ ]: #Adding data to gcc nx.set_node_attributes(gcc, 0, 'center') nx.set_node_attributes(gcc, 0, 'periphery') for v in range(len(centergcc)): gcc.node[centergcc[v]]["center"] = 1 for v in range(len(peripherygcc)): gcc.node[peripherygcc[v]]["periphery"] = 1 nx.write_graphml(gcc, "graphs/gccEcc.graphml"); # ## Distanze # In[ ]: # Distanza media su tutta la rete distDict = {} #i=1 #marked = set() row = [] for n in G.nodes(): nodeDists = nx.single_source_shortest_path_length(G,n) #if i%1000 == 0: # print(i) for d in nodeDists: #if (int(d) in marked): # continue if nodeDists[d] in distDict: distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1 else: distDict[nodeDists[d]] = 1 row.append(nodeDists[d]) #i += 1 #marked.add(int(n)) avgShortPathG = np.average(row) distDict.pop(0) avgDist, cnt = zip(*distDict.items()) print("Average Distance {:.2f}".format(avgShortPathG)) plt.bar(avgDist, cnt, width=0.3, color='b') plt.title("Distance Distribution for G") plt.ylabel("Frequency") plt.xlabel("Shortest Path Distance") plt.savefig('plots/DistDistributionGlobal.png') plt.show() #print("Numero componenti connesse:", nx.number_connected_components(G)) #print("Distanza media:", nx.average_shortest_path_length(G)) # ## Degree correlation # In[ ]: # The following code fragment calculates the dictionary and separates the keys and values into # two lists my_degree and their_degree: my_degree, their_degree = zip(*nx.average_degree_connectivity(G).items()) plt.scatter(my_degree, their_degree, s=50, c="b",) plt.xscale('log') plt.yscale('log') plt.xlabel("k") plt.ylabel("$k_{nn}(k)$") plt.savefig('plots/Assortativity.png') # ## Communities # ### 4-Clique Communities # In[5]: # NO k-cliques bcos muh memory # commK = com.k_clique_communities(G, 4) # print("Clique computed") # lClique = 0 # for i,cl in enumerate(commK): # lClique += 1 # for n in cl: # G.node[n]["kClique"] = i+1 # print("Numero 4-Clique communities: ", lClique) # ### Modularity based communities (Louvain) # In[ ]: part = lou.best_partition(G) mod = lou.modularity(part,G) part_as_seriesG = pd.Series(part) part_as_seriesG.sort_values() part_as_seriesG.value_counts() print("Numero Louvain communities: ", part_as_seriesG.value_counts().size) # In[ ]: #Saving Communities Attribute nx.set_node_attributes(G, 0, 'LvnG') for k in part.keys(): part[k]+= 1 for i in part.keys(): G.node[i]["LvnG"] = part.get(i) nx.write_graphml(G, "graphs/GComm.graphml"); # ## Centralities # In[ ]: dgr = nx.degree_centrality(G) clo = nx.closeness_centrality(G) har = nx.harmonic_centrality(G) eig = nx.eigenvector_centrality(G) bet = nx.betweenness_centrality(G) pgr = nx.pagerank(G) hits = nx.hits(G) centralities = pd.concat( [pd.Series(c) for c in (hits[1], eig, pgr, har, clo, hits[0], dgr, bet)], axis=1) centralities.columns = ("Authorities", "Eigenvector", "PageRank", "Harmonic Closeness", "Closeness", "Hubs", "Degree", "Betweenness") centralities["Harmonic Closeness"] /= centralities.shape[0] # Calculate the correlations for each pair of centralities c_df = centralities.corr() ll_triangle = np.tri(c_df.shape[0], k=-1) c_df *= ll_triangle c_series = c_df.stack().sort_values() c_series.tail() # In[ ]: