This commit is contained in:
Francesco Mecca 2019-12-05 16:45:39 +01:00
parent 3f8e640377
commit 4218915fbf
13 changed files with 1721948 additions and 0 deletions

View file

@ -0,0 +1,2 @@
*~
__pycache__

View file

@ -0,0 +1,370 @@
#!/usr/bin/env python
# coding: utf-8
# # Network Analysis - Random Barabasi Model
# In[2]:
import networkx as nx
from networkx.drawing.nx_agraph import graphviz_layout
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import dzcnapy_plotlib as dzcnapy
import csv
import math
import collections as coll
from networkx.algorithms import community as com
import community as lou
# Importazione dataset per calcolo di N, M
with open("dataset.csv") as infile:
csv_reader = csv.reader(infile)
G = nx.Graph(csv_reader)
N = G.number_of_nodes()
E = G.number_of_edges()
M = int(E / N)
G = nx.barabasi_albert_graph(N, M)
nx.write_graphml(G, "graphs/Barabasi.graphml");
# ## Nodes, Edges, Density
# In[8]:
# Nodi, Archi, Densità
numNodes = G.number_of_nodes()
numEdges = G.number_of_edges()
allEdges = int(numNodes * (numNodes-1) / 2)
density = nx.density(G) * 100
# constants
N = numNodes
M = int(numEdges / numNodes)
print("#Nodes:", numNodes)
print("#Edges:", numEdges)
print("#Edges if graph was a full mesh:", allEdges)
print("Density: %.2f%%" % density)
# ## Degree
# ### Average, variance and standard deviation
# In[5]:
avgDeg = (2*G.number_of_edges())/(G.number_of_nodes())
print("Average degree: %.2f" % avgDeg)
deg = [G.degree(n) for n in G.nodes]
var = np.var(deg)
devstd = math.sqrt(var)
print("Variance {:.2f}".format(var))
print("Standard deviation {:.2f}".format(devstd))
# ### Linear scale distribution
# In[6]:
# Degree distribution
degrees = sorted([d for n, d in G.degree()], reverse=True)
degreeCount = coll.Counter(degrees)
x, y = zip(*degreeCount.items())
plt.figure()
plt.plot(x, y, 'go-')
plt.xlabel('Degree')
plt.ylabel('Frequency')
plt.title('Degree Distribution')
plt.title('Degree Distribution with linear scale')
plt.savefig('plots/LinScaleDegreeDistr.png')
plt.show()
# ### Logarithmic scale distribution
# In[7]:
plt.scatter(x, y, s=50, c="green")
plt.xlim(0.9, max(x))
plt.ylim(0.9, max(y))
plt.xscale('log')
plt.yscale('log')
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.title('Degree Distribution with logarithmic scale')
plt.savefig('plots/LogScaleDegreeDistr.png')
# ## Clustering coefficient
trans= nx.transitivity(G)*100
# fraction of triadic closures (closed triangles) found in the network
print("Transitivity coefficient of the network: %.2f%%" %trans)
# Clustering coefficient
acc = nx.average_clustering(G)
print ("Average clustering coefficient {:.2f}".format(acc))
# ## Greatest Connected Component
# In[11]:
numCC = nx.number_connected_components(G)
gcc = max(nx.connected_component_subgraphs(G), key=len)
nodesgcc = gcc.nodes()
edgesgcc = gcc.edges()
nx.write_graphml(gcc, "graphs/GCC.graphml");
print("Numero di componenti connesse:", numCC)
print("Numero nodi GCC:", len(nodesgcc))
print("Numero archi GCC:", len(edgesgcc))
print("Percentuale di nodi sul totale %.2f%%:" %(len(nodesgcc)/len(G.nodes())*100))
print("Percentuale di archi sul totale %.2f%%:" %(len(edgesgcc)/len(G.edges())*100))
print("Densità: {:.2f}".format(nx.density(gcc) * 100))
print("Distanza media: {:.2f}".format(nx.average_shortest_path_length(gcc)))
# ### Distanze GCC
# In[13]:
# TODO eseguire
distDict = {}
#i=1
row = []
for n in gcc.nodes():
nodeDists = nx.single_source_shortest_path_length(gcc,n)
#if i%1000 == 0:
# print(i)
for d in nodeDists:
#if (int(d) in marked):
# continue
if nodeDists[d] in distDict:
distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1
else:
distDict[nodeDists[d]] = 1
row.append(nodeDists[d])
#i += 1
distDict.pop(0)
avgDist, cnt = zip(*distDict.items())
plt.bar(avgDist, cnt, width=0.3, color='b')
plt.title("Distance Distribution for G")
plt.ylabel("Frequency")
plt.xlabel("Shortest Path Distance")
#plt.savefig('plots/DistDistributionGlobal.png')
plt.show()
# ### GCC Eccentricity - Diameter - Radius - Center - Periphery
# In[15]:
#Eccentricity
ecc = nx.eccentricity(gcc)
# Adding eccentricity data to gcc
for k in ecc.keys():
gcc.node[k]['eccentricity'] = ecc.get(k)
# In[ ]:
diametergcc = nx.diameter(gcc, ecc)
radiusgcc = nx.radius(gcc, ecc)
centergcc = nx.center(gcc, e=ecc)
peripherygcc = nx.periphery(gcc, e=ecc)
print ("Diameter GCC:", diametergcc)
print ("Radius GCC", radiusgcc)
# In[ ]:
#Adding data to gcc
nx.set_node_attributes(gcc, 0, 'center')
nx.set_node_attributes(gcc, 0, 'periphery')
for v in range(len(centergcc)):
gcc.node[centergcc[v]]["center"] = 1
for v in range(len(peripherygcc)):
gcc.node[peripherygcc[v]]["periphery"] = 1
nx.write_graphml(gcc, "graphs/gccEcc.graphml");
# ## Distanze
# In[ ]:
# Distanza media su tutta la rete
distDict = {}
#i=1
#marked = set()
row = []
for n in G.nodes():
nodeDists = nx.single_source_shortest_path_length(G,n)
#if i%1000 == 0:
# print(i)
for d in nodeDists:
#if (int(d) in marked):
# continue
if nodeDists[d] in distDict:
distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1
else:
distDict[nodeDists[d]] = 1
row.append(nodeDists[d])
#i += 1
#marked.add(int(n))
avgShortPathG = np.average(row)
distDict.pop(0)
avgDist, cnt = zip(*distDict.items())
print("Average Distance {:.2f}".format(avgShortPathG))
plt.bar(avgDist, cnt, width=0.3, color='b')
plt.title("Distance Distribution for G")
plt.ylabel("Frequency")
plt.xlabel("Shortest Path Distance")
plt.savefig('plots/DistDistributionGlobal.png')
plt.show()
#print("Numero componenti connesse:", nx.number_connected_components(G))
#print("Distanza media:", nx.average_shortest_path_length(G))
# ## Degree correlation
# In[ ]:
# The following code fragment calculates the dictionary and separates the keys and values into
# two lists my_degree and their_degree:
my_degree, their_degree = zip(*nx.average_degree_connectivity(G).items())
plt.scatter(my_degree, their_degree, s=50, c="b",)
plt.xscale('log')
plt.yscale('log')
plt.xlabel("k")
plt.ylabel("$k_{nn}(k)$")
plt.savefig('plots/Assortativity.png')
# ## Communities
# ### 4-Clique Communities
# In[5]:
# NO k-cliques bcos muh memory
# commK = com.k_clique_communities(G, 4)
# print("Clique computed")
# lClique = 0
# for i,cl in enumerate(commK):
# lClique += 1
# for n in cl:
# G.node[n]["kClique"] = i+1
# print("Numero 4-Clique communities: ", lClique)
# ### Modularity based communities (Louvain)
# In[ ]:
part = lou.best_partition(G)
mod = lou.modularity(part,G)
part_as_seriesG = pd.Series(part)
part_as_seriesG.sort_values()
part_as_seriesG.value_counts()
print("Numero Louvain communities: ", part_as_seriesG.value_counts().size)
# In[ ]:
#Saving Communities Attribute
nx.set_node_attributes(G, 0, 'LvnG')
for k in part.keys():
part[k]+= 1
for i in part.keys():
G.node[i]["LvnG"] = part.get(i)
nx.write_graphml(G, "graphs/GComm.graphml");
# ## Centralities
# In[ ]:
dgr = nx.degree_centrality(G)
clo = nx.closeness_centrality(G)
har = nx.harmonic_centrality(G)
eig = nx.eigenvector_centrality(G)
bet = nx.betweenness_centrality(G)
pgr = nx.pagerank(G)
hits = nx.hits(G)
centralities = pd.concat(
[pd.Series(c) for c in (hits[1], eig, pgr, har, clo, hits[0], dgr, bet)],
axis=1)
centralities.columns = ("Authorities", "Eigenvector", "PageRank",
"Harmonic Closeness", "Closeness", "Hubs",
"Degree", "Betweenness")
centralities["Harmonic Closeness"] /= centralities.shape[0]
# Calculate the correlations for each pair of centralities
c_df = centralities.corr()
ll_triangle = np.tri(c_df.shape[0], k=-1)
c_df *= ll_triangle
c_series = c_df.stack().sort_values()
c_series.tail()
# In[ ]:

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,57 @@
"""
Library for plotting graphs in DZCNAPY
"""
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rc("font", family="Arial")
matplotlib.style.use("grayscale")
attrs = {
"edge_color" : "gray",
"font_family" : "Liberation Sans Narrow",
"font_size" : 15,
"font_weight" : "bold",
"node_color" : "pink",
"node_size" : 700,
"width" : 2,
}
thick_attrs = attrs.copy()
thick_attrs["alpha"] = 0.5
thick_attrs["width"] = 15
small_attrs = attrs.copy()
small_attrs["node_size"] = 50
small_attrs["font_size"] = 10
medium_attrs = small_attrs.copy()
medium_attrs["node_size"] = 250
def set_extent(positions, axes, title=None):
"""
Given node coordinates pos and the subplot,
calculate and set its extent.
"""
axes.tick_params(labelbottom="off")
axes.tick_params(labelleft="off")
if title:
axes.set_title(title)
x_values, y_values = zip(*positions.values())
x_max = max(x_values)
y_max = max(y_values)
x_min = min(x_values)
y_min = min(y_values)
x_margin = (x_max - x_min) * 0.1
y_margin = (y_max - y_min) * 0.1
try:
axes.set_xlim(x_min - x_margin, x_max + x_margin)
axes.set_ylim(y_min - y_margin, y_max + y_margin)
except AttributeError:
axes.xlim(x_min - x_margin, x_max + x_margin)
axes.ylim(y_min - y_margin, y_max + y_margin)
def plot(fname, save = False):
plt.tight_layout()
if save:
plt.savefig("plots/{}.pdf".format(fname), dpi=600)
plt.show()

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
#Nodes: 37702
#Edges: 263865
#Edges if graph was a full mesh: 710701551
Density: 0.04%
Average degree: 14.00
Variance 372.92
Standard deviation 19.31
/usr/lib64/python3.6/site-packages/matplotlib/font_manager.py:1331: UserWarning: findfont: Font family [
'Arial'] not found. Falling back to DejaVu Sans
(prop.get_family(), self.defaultFamily[fontext]))
Transitivity coefficient of the network: 0.24%
Average clustering coefficient 0.00
Numero di componenti connesse: 1
Numero nodi GCC: 37702
Numero archi GCC: 263865
Percentuale di nodi sul totale 100.00%:
Percentuale di archi sul totale 100.00%:
Densità: 0.04
Distanza media: 3.69
Diameter GCC: 6
Radius GCC 4
Average Distance 3.69
Numero Louvain communities: 23

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB