This commit is contained in:
Francesco Mecca 2019-12-05 16:45:57 +01:00
parent 4218915fbf
commit 0f65a7e86c
13 changed files with 1496562 additions and 0 deletions

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,424 @@
# #!/usr/bin/env python
# # coding: utf-8
# # # Network Analysis
# # In[2]:
import networkx as nx
from networkx.drawing.nx_agraph import graphviz_layout
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import dzcnapy_plotlib as dzcnapy
import csv
import math
import collections as coll
from networkx.algorithms import community as com
import community as lou
# Importazione dataset
with open("dataset.csv") as infile:
csv_reader = csv.reader(infile)
G = nx.Graph(csv_reader)
# # ## Nodes, Edges, Density
# # In[8]:
# # Nodi, Archi, Densità
# numNodes = G.number_of_nodes()
# numEdges = G.number_of_edges()
# allEdges = int(numNodes * (numNodes-1) / 2)
# density = nx.density(G) * 100
# # constants
# N = numNodes
# M = int(numEdges / numNodes)
# print("#Nodes:", numNodes)
# print("#Edges:", numEdges)
# print("#Edges if graph was a full mesh:", allEdges)
# print("Density: %.2f%%" % density)
# # ## Detecting and removing self-loops
# # In[3]:
# ns = G.number_of_selfloops()
# print("#Self-loops:", ns)
# if ns > 0:
# # removing self-loops
# G.remove_edges_from(G.selfloop_edges())
# print("#Edges without self-loops:", G.number_of_edges())
# # ## Detecting and removing isolates
# # In[4]:
# ni = nx.number_of_isolates(G)
# print("#isolates:", ni)
# if ni > 0:
# # remove isolates
# G.remove_nodes_from(nx.isolates(G))
# print("#Nodes without isolates", G.number_of_nodes())
# # ## Degree
# # ### Average, variance and standard deviation
# # In[5]:
# avgDeg = (2*G.number_of_edges())/(G.number_of_nodes())
# print("Average degree: %.2f" % avgDeg)
# deg = [G.degree(n) for n in G.nodes]
# var = np.var(deg)
# devstd = math.sqrt(var)
# print("Variance {:.2f}".format(var))
# print("Standard deviation {:.2f}".format(devstd))
# # ### Linear scale distribution
# # In[6]:
# # Degree distribution
# degrees = sorted([d for n, d in G.degree()], reverse=True)
# degreeCount = coll.Counter(degrees)
# x, y = zip(*degreeCount.items())
# plt.figure()
# plt.plot(x, y, 'go-')
# plt.xlabel('Degree')
# plt.ylabel('Frequency')
# plt.title('Degree Distribution')
# plt.title('Degree Distribution with linear scale')
# plt.savefig('plots/LinScaleDegreeDistr.png')
# plt.show()
# # ### Logarithmic scale distribution
# # In[7]:
# plt.scatter(x, y, s=50, c="green")
# plt.xlim(0.9, max(x))
# plt.ylim(0.9, max(y))
# plt.xscale('log')
# plt.yscale('log')
# plt.xlabel("Degree")
# plt.ylabel("Frequency")
# plt.title('Degree Distribution with logarithmic scale')
# plt.savefig('plots/LogScaleDegreeDistr.png')
# # ## Clustering coefficient
# # In[8]:
# trans= nx.transitivity(G)*100
# # fraction of triadic closures (closed triangles) found in the network
# print("Transitivity coefficient of the network: %.2f%%" %trans)
# # Clustering coefficient
# acc = nx.average_clustering(G)
# print ("Average clustering coefficient {:.2f}".format(acc))
# # ## Greatest Connected Component
# # In[11]:
# numCC = nx.number_connected_components(G)
# gcc = max(nx.connected_component_subgraphs(G), key=len)
# nodesgcc = gcc.nodes()
# edgesgcc = gcc.edges()
# nx.write_graphml(gcc, "graphs/GCC.graphml");
# print("Numero di componenti connesse:", numCC)
# print("Numero nodi GCC:", len(nodesgcc))
# print("Numero archi GCC:", len(edgesgcc))
# print("Percentuale di nodi sul totale %.2f%%:" %(len(nodesgcc)/len(G.nodes())*100))
# print("Percentuale di archi sul totale %.2f%%:" %(len(edgesgcc)/len(G.edges())*100))
# print("Densità: {:.2f}".format(nx.density(gcc) * 100))
# print("Distanza media: {:.2f}".format(nx.average_shortest_path_length(gcc)))
# print('linea 165')
# # ### Distanze GCC
# # In[13]:
# if True:
# distDict = {}
# #i=1
# row = []
# for n in gcc.nodes():
# nodeDists = nx.single_source_shortest_path_length(gcc,n)
# #if i%1000 == 0:
# # print(i)
# for d in nodeDists:
# #if (int(d) in marked):
# # continue
# if nodeDists[d] in distDict:
# distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1
# else:
# distDict[nodeDists[d]] = 1
# row.append(nodeDists[d])
# #i += 1
# distDict.pop(0)
# print('linea 194')
# plt.bar(distDict.keys(), distDict.values(), width=0.3, color='b')
# plt.title("Distance Distribution for G")
# plt.ylabel("Frequency")
# plt.xlabel("Shortest Path Distance")
# #plt.savefig('plots/DistDistributionGlobal.png')
# plt.show()
# print('linea 204')
# # ### GCC Eccentricity - Diameter - Radius - Center - Periphery
# # In[15]:
# #Eccentricity
# ecc = nx.eccentricity(gcc)
# # Adding eccentricity data to gcc
# for k in ecc.keys():
# gcc.node[k]['eccentricity'] = ecc.get(k)
# # In[ ]:
# diametergcc = nx.diameter(gcc, ecc)
# radiusgcc = nx.radius(gcc, ecc)
# centergcc = nx.center(gcc, e=ecc)
# peripherygcc = nx.periphery(gcc, e=ecc)
# print ("Diameter GCC:", diametergcc)
# print ("Radius GCC", radiusgcc)
# # In[ ]:
# print('linea 231')
# #Adding data to gcc
# nx.set_node_attributes(gcc, 0, 'center')
# nx.set_node_attributes(gcc, 0, 'periphery')
# for v in range(len(centergcc)):
# gcc.node[centergcc[v]]["center"] = 1
# for v in range(len(peripherygcc)):
# gcc.node[peripherygcc[v]]["periphery"] = 1
# nx.write_graphml(gcc, "graphs/gccEcc.graphml");
# # ## Distanze
# print('linea 248')
# Distanza media su tutta la rete
if True:
distDict = {}
#i=1
#marked = set()
row = []
for n in G.nodes():
nodeDists = nx.single_source_shortest_path_length(G,n)
#if i%1000 == 0:
# print(i)
for d in nodeDists:
#if (int(d) in marked):
# continue
if nodeDists[d] in distDict:
distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1
else:
distDict[nodeDists[d]] = 1
row.append(nodeDists[d])
#i += 1
#marked.add(int(n))
avgShortPathG = np.average(row)
distDict.pop(0)
print("Average Distance {:.2f}".format(avgShortPathG))
plt.bar(distDict.keys(), distDict.values(), width=0.3, color='b')
plt.title("Distance Distribution for G")
plt.ylabel("Frequency")
plt.xlabel("Shortest Path Distance")
plt.savefig('plots/DistDistributionGlobal.png')
plt.show()
# print('linea 285')
# #print("Numero componenti connesse:", nx.number_connected_components(G))
# #print("Distanza media:", nx.average_shortest_path_length(G))
# # ## Degree correlation
# # In[ ]:
# # The following code fragment calculates the dictionary and separates the keys and values into
# # two lists my_degree and their_degree:
# npDict = nx.average_degree_connectivity(G)
# plt.scatter(npDict.keys(), npDict.values(), s=50, c="b",)
# plt.xscale('log')
# plt.yscale('log')
# plt.xlabel("k")
# plt.ylabel("$k_{nn}(k)$")
# plt.savefig('plots/Assortativity.png')
# # ## Communities
# # ### 4-Clique Communities
# # In[5]:
# print('linea 314')
print("""
#Nodes: 37702
#Edges: 289004
#Edges if graph was a full mesh: 710701551
Density: 0.04%
#Self-loops: 0
#isolates: 0
Average degree: 15.33
Variance 6526.21
Standard deviation 80.78
Transitivity coefficient of the network: 1.24%
Average clustering coefficient 0.17
Numero di componenti connesse: 2
Numero nodi GCC: 37700
Numero archi GCC: 289003
Percentuale di nodi sul totale 99.99%:
Percentuale di archi sul totale 100.00%:
Densità: 0.04
Distanza media: 3.25
linea 165
linea 194
linea 204
Diameter GCC: 11
Radius GCC 6
linea 231
linea 248
Average Distance 3.25
linea 285
linea 314
""")
# commK = com.k_clique_communities(G, 4)
# NO k-cliques bcos muh memory
# print("Clique computed")
# lClique = 0
# for i,cl in enumerate(commK):
# lClique += 1
# for n in cl:
# G.node[n]["kClique"] = i+1
# print("Numero 4-Clique communities: ", lClique)
# ### Modularity based communities (Louvain)
# In[ ]:
# print('linea 332')
# part = lou.best_partition(G)
# mod = lou.modularity(part,G)
# print('linea 368')
# part_as_seriesG = pd.Series(part)
# print('linea 369')
# part_as_seriesG.sort_values()
# print('linea 370')
# part_as_seriesG.value_counts()
# print("Numero Louvain communities: ", part_as_seriesG.value_counts().size)
# # In[ ]:
# #Saving Communities Attribute
# nx.set_node_attributes(G, 0, 'LvnG')
# for k in part.keys():
# part[k]+= 1
# for i in part.keys():
# G.node[i]["LvnG"] = part.get(i)
# nx.write_graphml(G, "graphs/GComm.graphml");
# # ## Centralities
# # In[ ]:
# print('linea 397')
# dgr = nx.degree_centrality(G)
# clo = nx.closeness_centrality(G)
# har = nx.harmonic_centrality(G)
# eig = nx.eigenvector_centrality(G)
# bet = nx.betweenness_centrality(G)
# pgr = nx.pagerank(G)
# hits = nx.hits(G)
# centralities = pd.concat(
# [pd.Series(c) for c in (hits[1], eig, pgr, har, clo, hits[0], dgr, bet)],
# axis=1)
# centralities.columns = ("Authorities", "Eigenvector", "PageRank",
# "Harmonic Closeness", "Closeness", "Hubs",
# "Degree", "Betweenness")
# centralities["Harmonic Closeness"] /= centralities.shape[0]
# # Calculate the correlations for each pair of centralities
# c_df = centralities.corr()
# ll_triangle = np.tri(c_df.shape[0], k=-1)
# c_df *= ll_triangle
# c_series = c_df.stack().sort_values()
# c_series.tail()

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,57 @@
"""
Library for plotting graphs in DZCNAPY
"""
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rc("font", family="Arial")
matplotlib.style.use("grayscale")
attrs = {
"edge_color" : "gray",
"font_family" : "Liberation Sans Narrow",
"font_size" : 15,
"font_weight" : "bold",
"node_color" : "pink",
"node_size" : 700,
"width" : 2,
}
thick_attrs = attrs.copy()
thick_attrs["alpha"] = 0.5
thick_attrs["width"] = 15
small_attrs = attrs.copy()
small_attrs["node_size"] = 50
small_attrs["font_size"] = 10
medium_attrs = small_attrs.copy()
medium_attrs["node_size"] = 250
def set_extent(positions, axes, title=None):
"""
Given node coordinates pos and the subplot,
calculate and set its extent.
"""
axes.tick_params(labelbottom="off")
axes.tick_params(labelleft="off")
if title:
axes.set_title(title)
x_values, y_values = zip(*positions.values())
x_max = max(x_values)
y_max = max(y_values)
x_min = min(x_values)
y_min = min(y_values)
x_margin = (x_max - x_min) * 0.1
y_margin = (y_max - y_min) * 0.1
try:
axes.set_xlim(x_min - x_margin, x_max + x_margin)
axes.set_ylim(y_min - y_margin, y_max + y_margin)
except AttributeError:
axes.xlim(x_min - x_margin, x_max + x_margin)
axes.ylim(y_min - y_margin, y_max + y_margin)
def plot(fname, save = False):
plt.tight_layout()
if save:
plt.savefig("plots/{}.pdf".format(fname), dpi=600)
plt.show()

View file

@ -0,0 +1,22 @@
/usr/lib64/python3.6/site-packages/matplotlib/font_manager.py:1331: UserWarning: findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans
(prop.get_family(), self.defaultFamily[fontext]))
Traceback (most recent call last):
File "assignment2.py", line 136, in <module>
trans= nx.transitivity(G)*100
File "/usr/lib64/python3.6/site-packages/networkx/algorithms/cluster.py", line 400, in transitivity
triangles = sum(t for v, d, t, _ in _triangles_and_degree_iter(G))
File "/usr/lib64/python3.6/site-packages/networkx/algorithms/cluster.py", line 400, in <genexpr>
triangles = sum(t for v, d, t, _ in _triangles_and_degree_iter(G))
File "/usr/lib64/python3.6/site-packages/networkx/algorithms/cluster.py", line 87, in _triangles_and_degree_iter
gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs)
File "/usr/lib64/python3.6/collections/__init__.py", line 535, in __init__
self.update(*args, **kwds)
File "/usr/lib64/python3.6/collections/__init__.py", line 622, in update
_count_elements(self, iterable)
File "/usr/lib64/python3.6/site-packages/networkx/algorithms/cluster.py", line 87, in <genexpr>
gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs)
File "/usr/lib64/python3.6/site-packages/networkx/classes/graph.py", line 458, in __getitem__
return self.adj[n]
File "/usr/lib64/python3.6/site-packages/networkx/classes/graph.py", line 336, in adj
@property
KeyboardInterrupt

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,36 @@
#Nodes: 37702
#Edges: 289004
#Edges if graph was a full mesh: 710701551
Density: 0.04%
#Self-loops: 0
#isolates: 0
Average degree: 15.33
Variance 6526.21
Standard deviation 80.78
Transitivity coefficient of the network: 1.24%
Average clustering coefficient 0.17
Numero di componenti connesse: 2
Numero nodi GCC: 37700
Numero archi GCC: 289003
Percentuale di nodi sul totale 99.99%:
Percentuale di archi sul totale 100.00%:
Densità: 0.04
Distanza media: 3.25
linea 165
linea 194
linea 204
Diameter GCC: 11
Radius GCC 6
linea 231
linea 248
Average Distance 3.25
linea 285
linea 314
Clique computed
linea 332
linea 368
linea 369
linea 370
Numero Louvain communities: 28
linea 363

Binary file not shown.

After

(image error) Size: 27 KiB

Binary file not shown.

After

(image error) Size: 24 KiB

Binary file not shown.

After

(image error) Size: 21 KiB

Binary file not shown.

After

(image error) Size: 24 KiB