avrc :3
This commit is contained in:
parent
4218915fbf
commit
0f65a7e86c
13 changed files with 1496562 additions and 0 deletions
anno3/avrc/assignments/coff
693
anno3/avrc/assignments/coff/assignment2.ipynb
Normal file
693
anno3/avrc/assignments/coff/assignment2.ipynb
Normal file
File diff suppressed because one or more lines are too long
424
anno3/avrc/assignments/coff/modello_github/assignment2.py
Normal file
424
anno3/avrc/assignments/coff/modello_github/assignment2.py
Normal file
|
@ -0,0 +1,424 @@
|
|||
# #!/usr/bin/env python
|
||||
# # coding: utf-8
|
||||
|
||||
# # # Network Analysis
|
||||
|
||||
# # In[2]:
|
||||
|
||||
|
||||
import networkx as nx
|
||||
from networkx.drawing.nx_agraph import graphviz_layout
|
||||
import matplotlib as mpl
|
||||
mpl.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import dzcnapy_plotlib as dzcnapy
|
||||
import csv
|
||||
import math
|
||||
import collections as coll
|
||||
from networkx.algorithms import community as com
|
||||
import community as lou
|
||||
|
||||
# Importazione dataset
|
||||
with open("dataset.csv") as infile:
|
||||
csv_reader = csv.reader(infile)
|
||||
G = nx.Graph(csv_reader)
|
||||
|
||||
|
||||
# # ## Nodes, Edges, Density
|
||||
|
||||
# # In[8]:
|
||||
|
||||
|
||||
# # Nodi, Archi, Densità
|
||||
# numNodes = G.number_of_nodes()
|
||||
# numEdges = G.number_of_edges()
|
||||
# allEdges = int(numNodes * (numNodes-1) / 2)
|
||||
# density = nx.density(G) * 100
|
||||
|
||||
# # constants
|
||||
# N = numNodes
|
||||
# M = int(numEdges / numNodes)
|
||||
|
||||
# print("#Nodes:", numNodes)
|
||||
# print("#Edges:", numEdges)
|
||||
# print("#Edges if graph was a full mesh:", allEdges)
|
||||
# print("Density: %.2f%%" % density)
|
||||
|
||||
|
||||
# # ## Detecting and removing self-loops
|
||||
|
||||
# # In[3]:
|
||||
|
||||
|
||||
# ns = G.number_of_selfloops()
|
||||
# print("#Self-loops:", ns)
|
||||
|
||||
# if ns > 0:
|
||||
# # removing self-loops
|
||||
# G.remove_edges_from(G.selfloop_edges())
|
||||
# print("#Edges without self-loops:", G.number_of_edges())
|
||||
|
||||
|
||||
# # ## Detecting and removing isolates
|
||||
|
||||
# # In[4]:
|
||||
|
||||
|
||||
# ni = nx.number_of_isolates(G)
|
||||
# print("#isolates:", ni)
|
||||
|
||||
# if ni > 0:
|
||||
# # remove isolates
|
||||
# G.remove_nodes_from(nx.isolates(G))
|
||||
# print("#Nodes without isolates", G.number_of_nodes())
|
||||
|
||||
|
||||
# # ## Degree
|
||||
|
||||
# # ### Average, variance and standard deviation
|
||||
|
||||
# # In[5]:
|
||||
|
||||
|
||||
# avgDeg = (2*G.number_of_edges())/(G.number_of_nodes())
|
||||
# print("Average degree: %.2f" % avgDeg)
|
||||
|
||||
# deg = [G.degree(n) for n in G.nodes]
|
||||
# var = np.var(deg)
|
||||
# devstd = math.sqrt(var)
|
||||
|
||||
# print("Variance {:.2f}".format(var))
|
||||
# print("Standard deviation {:.2f}".format(devstd))
|
||||
|
||||
|
||||
# # ### Linear scale distribution
|
||||
|
||||
# # In[6]:
|
||||
|
||||
|
||||
# # Degree distribution
|
||||
# degrees = sorted([d for n, d in G.degree()], reverse=True)
|
||||
# degreeCount = coll.Counter(degrees)
|
||||
# x, y = zip(*degreeCount.items())
|
||||
|
||||
# plt.figure()
|
||||
# plt.plot(x, y, 'go-')
|
||||
# plt.xlabel('Degree')
|
||||
# plt.ylabel('Frequency')
|
||||
# plt.title('Degree Distribution')
|
||||
# plt.title('Degree Distribution with linear scale')
|
||||
# plt.savefig('plots/LinScaleDegreeDistr.png')
|
||||
# plt.show()
|
||||
|
||||
|
||||
# # ### Logarithmic scale distribution
|
||||
|
||||
# # In[7]:
|
||||
|
||||
|
||||
# plt.scatter(x, y, s=50, c="green")
|
||||
# plt.xlim(0.9, max(x))
|
||||
# plt.ylim(0.9, max(y))
|
||||
# plt.xscale('log')
|
||||
# plt.yscale('log')
|
||||
# plt.xlabel("Degree")
|
||||
# plt.ylabel("Frequency")
|
||||
# plt.title('Degree Distribution with logarithmic scale')
|
||||
# plt.savefig('plots/LogScaleDegreeDistr.png')
|
||||
|
||||
|
||||
# # ## Clustering coefficient
|
||||
|
||||
# # In[8]:
|
||||
|
||||
# trans= nx.transitivity(G)*100
|
||||
# # fraction of triadic closures (closed triangles) found in the network
|
||||
# print("Transitivity coefficient of the network: %.2f%%" %trans)
|
||||
|
||||
# # Clustering coefficient
|
||||
# acc = nx.average_clustering(G)
|
||||
# print ("Average clustering coefficient {:.2f}".format(acc))
|
||||
|
||||
|
||||
# # ## Greatest Connected Component
|
||||
|
||||
# # In[11]:
|
||||
|
||||
|
||||
|
||||
# numCC = nx.number_connected_components(G)
|
||||
# gcc = max(nx.connected_component_subgraphs(G), key=len)
|
||||
|
||||
# nodesgcc = gcc.nodes()
|
||||
# edgesgcc = gcc.edges()
|
||||
# nx.write_graphml(gcc, "graphs/GCC.graphml");
|
||||
|
||||
# print("Numero di componenti connesse:", numCC)
|
||||
# print("Numero nodi GCC:", len(nodesgcc))
|
||||
# print("Numero archi GCC:", len(edgesgcc))
|
||||
# print("Percentuale di nodi sul totale %.2f%%:" %(len(nodesgcc)/len(G.nodes())*100))
|
||||
# print("Percentuale di archi sul totale %.2f%%:" %(len(edgesgcc)/len(G.edges())*100))
|
||||
# print("Densità: {:.2f}".format(nx.density(gcc) * 100))
|
||||
# print("Distanza media: {:.2f}".format(nx.average_shortest_path_length(gcc)))
|
||||
# print('linea 165')
|
||||
|
||||
|
||||
# # ### Distanze GCC
|
||||
|
||||
# # In[13]:
|
||||
|
||||
|
||||
|
||||
# if True:
|
||||
# distDict = {}
|
||||
# #i=1
|
||||
# row = []
|
||||
# for n in gcc.nodes():
|
||||
# nodeDists = nx.single_source_shortest_path_length(gcc,n)
|
||||
# #if i%1000 == 0:
|
||||
# # print(i)
|
||||
|
||||
# for d in nodeDists:
|
||||
# #if (int(d) in marked):
|
||||
# # continue
|
||||
# if nodeDists[d] in distDict:
|
||||
# distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1
|
||||
# else:
|
||||
# distDict[nodeDists[d]] = 1
|
||||
# row.append(nodeDists[d])
|
||||
# #i += 1
|
||||
|
||||
# distDict.pop(0)
|
||||
|
||||
# print('linea 194')
|
||||
# plt.bar(distDict.keys(), distDict.values(), width=0.3, color='b')
|
||||
# plt.title("Distance Distribution for G")
|
||||
# plt.ylabel("Frequency")
|
||||
# plt.xlabel("Shortest Path Distance")
|
||||
# #plt.savefig('plots/DistDistributionGlobal.png')
|
||||
# plt.show()
|
||||
|
||||
|
||||
# print('linea 204')
|
||||
# # ### GCC Eccentricity - Diameter - Radius - Center - Periphery
|
||||
|
||||
# # In[15]:
|
||||
|
||||
|
||||
# #Eccentricity
|
||||
# ecc = nx.eccentricity(gcc)
|
||||
|
||||
# # Adding eccentricity data to gcc
|
||||
# for k in ecc.keys():
|
||||
# gcc.node[k]['eccentricity'] = ecc.get(k)
|
||||
|
||||
|
||||
# # In[ ]:
|
||||
|
||||
|
||||
# diametergcc = nx.diameter(gcc, ecc)
|
||||
# radiusgcc = nx.radius(gcc, ecc)
|
||||
# centergcc = nx.center(gcc, e=ecc)
|
||||
# peripherygcc = nx.periphery(gcc, e=ecc)
|
||||
|
||||
# print ("Diameter GCC:", diametergcc)
|
||||
# print ("Radius GCC", radiusgcc)
|
||||
|
||||
|
||||
# # In[ ]:
|
||||
|
||||
# print('linea 231')
|
||||
|
||||
# #Adding data to gcc
|
||||
# nx.set_node_attributes(gcc, 0, 'center')
|
||||
# nx.set_node_attributes(gcc, 0, 'periphery')
|
||||
|
||||
# for v in range(len(centergcc)):
|
||||
# gcc.node[centergcc[v]]["center"] = 1
|
||||
|
||||
# for v in range(len(peripherygcc)):
|
||||
# gcc.node[peripherygcc[v]]["periphery"] = 1
|
||||
|
||||
# nx.write_graphml(gcc, "graphs/gccEcc.graphml");
|
||||
|
||||
|
||||
# # ## Distanze
|
||||
# print('linea 248')
|
||||
|
||||
# Distanza media su tutta la rete
|
||||
if True:
|
||||
distDict = {}
|
||||
#i=1
|
||||
#marked = set()
|
||||
row = []
|
||||
for n in G.nodes():
|
||||
nodeDists = nx.single_source_shortest_path_length(G,n)
|
||||
#if i%1000 == 0:
|
||||
# print(i)
|
||||
|
||||
for d in nodeDists:
|
||||
#if (int(d) in marked):
|
||||
# continue
|
||||
if nodeDists[d] in distDict:
|
||||
distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1
|
||||
else:
|
||||
distDict[nodeDists[d]] = 1
|
||||
row.append(nodeDists[d])
|
||||
#i += 1
|
||||
#marked.add(int(n))
|
||||
|
||||
avgShortPathG = np.average(row)
|
||||
distDict.pop(0)
|
||||
|
||||
print("Average Distance {:.2f}".format(avgShortPathG))
|
||||
|
||||
plt.bar(distDict.keys(), distDict.values(), width=0.3, color='b')
|
||||
plt.title("Distance Distribution for G")
|
||||
plt.ylabel("Frequency")
|
||||
plt.xlabel("Shortest Path Distance")
|
||||
plt.savefig('plots/DistDistributionGlobal.png')
|
||||
plt.show()
|
||||
|
||||
|
||||
# print('linea 285')
|
||||
# #print("Numero componenti connesse:", nx.number_connected_components(G))
|
||||
# #print("Distanza media:", nx.average_shortest_path_length(G))
|
||||
|
||||
|
||||
# # ## Degree correlation
|
||||
|
||||
# # In[ ]:
|
||||
|
||||
|
||||
# # The following code fragment calculates the dictionary and separates the keys and values into
|
||||
# # two lists my_degree and their_degree:
|
||||
|
||||
# npDict = nx.average_degree_connectivity(G)
|
||||
|
||||
# plt.scatter(npDict.keys(), npDict.values(), s=50, c="b",)
|
||||
# plt.xscale('log')
|
||||
# plt.yscale('log')
|
||||
# plt.xlabel("k")
|
||||
# plt.ylabel("$k_{nn}(k)$")
|
||||
# plt.savefig('plots/Assortativity.png')
|
||||
|
||||
|
||||
# # ## Communities
|
||||
|
||||
# # ### 4-Clique Communities
|
||||
|
||||
# # In[5]:
|
||||
|
||||
# print('linea 314')
|
||||
|
||||
print("""
|
||||
#Nodes: 37702
|
||||
#Edges: 289004
|
||||
#Edges if graph was a full mesh: 710701551
|
||||
Density: 0.04%
|
||||
#Self-loops: 0
|
||||
#isolates: 0
|
||||
Average degree: 15.33
|
||||
Variance 6526.21
|
||||
Standard deviation 80.78
|
||||
Transitivity coefficient of the network: 1.24%
|
||||
Average clustering coefficient 0.17
|
||||
Numero di componenti connesse: 2
|
||||
Numero nodi GCC: 37700
|
||||
Numero archi GCC: 289003
|
||||
Percentuale di nodi sul totale 99.99%:
|
||||
Percentuale di archi sul totale 100.00%:
|
||||
Densità: 0.04
|
||||
Distanza media: 3.25
|
||||
linea 165
|
||||
linea 194
|
||||
linea 204
|
||||
Diameter GCC: 11
|
||||
Radius GCC 6
|
||||
linea 231
|
||||
linea 248
|
||||
Average Distance 3.25
|
||||
linea 285
|
||||
linea 314
|
||||
""")
|
||||
|
||||
# commK = com.k_clique_communities(G, 4)
|
||||
|
||||
# NO k-cliques bcos muh memory
|
||||
|
||||
# print("Clique computed")
|
||||
|
||||
# lClique = 0
|
||||
# for i,cl in enumerate(commK):
|
||||
# lClique += 1
|
||||
# for n in cl:
|
||||
# G.node[n]["kClique"] = i+1
|
||||
|
||||
# print("Numero 4-Clique communities: ", lClique)
|
||||
|
||||
|
||||
# ### Modularity based communities (Louvain)
|
||||
|
||||
# In[ ]:
|
||||
|
||||
# print('linea 332')
|
||||
|
||||
# part = lou.best_partition(G)
|
||||
# mod = lou.modularity(part,G)
|
||||
|
||||
# print('linea 368')
|
||||
# part_as_seriesG = pd.Series(part)
|
||||
# print('linea 369')
|
||||
# part_as_seriesG.sort_values()
|
||||
# print('linea 370')
|
||||
# part_as_seriesG.value_counts()
|
||||
|
||||
# print("Numero Louvain communities: ", part_as_seriesG.value_counts().size)
|
||||
|
||||
|
||||
# # In[ ]:
|
||||
|
||||
|
||||
# #Saving Communities Attribute
|
||||
# nx.set_node_attributes(G, 0, 'LvnG')
|
||||
# for k in part.keys():
|
||||
# part[k]+= 1
|
||||
|
||||
# for i in part.keys():
|
||||
# G.node[i]["LvnG"] = part.get(i)
|
||||
|
||||
# nx.write_graphml(G, "graphs/GComm.graphml");
|
||||
|
||||
|
||||
# # ## Centralities
|
||||
|
||||
# # In[ ]:
|
||||
|
||||
|
||||
# print('linea 397')
|
||||
# dgr = nx.degree_centrality(G)
|
||||
# clo = nx.closeness_centrality(G)
|
||||
# har = nx.harmonic_centrality(G)
|
||||
# eig = nx.eigenvector_centrality(G)
|
||||
# bet = nx.betweenness_centrality(G)
|
||||
# pgr = nx.pagerank(G)
|
||||
# hits = nx.hits(G)
|
||||
|
||||
# centralities = pd.concat(
|
||||
# [pd.Series(c) for c in (hits[1], eig, pgr, har, clo, hits[0], dgr, bet)],
|
||||
# axis=1)
|
||||
|
||||
# centralities.columns = ("Authorities", "Eigenvector", "PageRank",
|
||||
# "Harmonic Closeness", "Closeness", "Hubs",
|
||||
# "Degree", "Betweenness")
|
||||
# centralities["Harmonic Closeness"] /= centralities.shape[0]
|
||||
|
||||
# # Calculate the correlations for each pair of centralities
|
||||
# c_df = centralities.corr()
|
||||
# ll_triangle = np.tri(c_df.shape[0], k=-1)
|
||||
# c_df *= ll_triangle
|
||||
# c_series = c_df.stack().sort_values()
|
||||
# c_series.tail()
|
||||
|
289004
anno3/avrc/assignments/coff/modello_github/dataset.csv
Normal file
289004
anno3/avrc/assignments/coff/modello_github/dataset.csv
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,57 @@
|
|||
"""
|
||||
Library for plotting graphs in DZCNAPY
|
||||
"""
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
matplotlib.rc("font", family="Arial")
|
||||
matplotlib.style.use("grayscale")
|
||||
|
||||
attrs = {
|
||||
"edge_color" : "gray",
|
||||
"font_family" : "Liberation Sans Narrow",
|
||||
"font_size" : 15,
|
||||
"font_weight" : "bold",
|
||||
"node_color" : "pink",
|
||||
"node_size" : 700,
|
||||
"width" : 2,
|
||||
}
|
||||
thick_attrs = attrs.copy()
|
||||
thick_attrs["alpha"] = 0.5
|
||||
thick_attrs["width"] = 15
|
||||
|
||||
small_attrs = attrs.copy()
|
||||
small_attrs["node_size"] = 50
|
||||
small_attrs["font_size"] = 10
|
||||
|
||||
medium_attrs = small_attrs.copy()
|
||||
medium_attrs["node_size"] = 250
|
||||
|
||||
def set_extent(positions, axes, title=None):
|
||||
"""
|
||||
Given node coordinates pos and the subplot,
|
||||
calculate and set its extent.
|
||||
"""
|
||||
axes.tick_params(labelbottom="off")
|
||||
axes.tick_params(labelleft="off")
|
||||
if title:
|
||||
axes.set_title(title)
|
||||
|
||||
x_values, y_values = zip(*positions.values())
|
||||
x_max = max(x_values)
|
||||
y_max = max(y_values)
|
||||
x_min = min(x_values)
|
||||
y_min = min(y_values)
|
||||
x_margin = (x_max - x_min) * 0.1
|
||||
y_margin = (y_max - y_min) * 0.1
|
||||
try:
|
||||
axes.set_xlim(x_min - x_margin, x_max + x_margin)
|
||||
axes.set_ylim(y_min - y_margin, y_max + y_margin)
|
||||
except AttributeError:
|
||||
axes.xlim(x_min - x_margin, x_max + x_margin)
|
||||
axes.ylim(y_min - y_margin, y_max + y_margin)
|
||||
|
||||
def plot(fname, save = False):
|
||||
plt.tight_layout()
|
||||
if save:
|
||||
plt.savefig("plots/{}.pdf".format(fname), dpi=600)
|
||||
plt.show()
|
22
anno3/avrc/assignments/coff/modello_github/errors.txt
Normal file
22
anno3/avrc/assignments/coff/modello_github/errors.txt
Normal file
|
@ -0,0 +1,22 @@
|
|||
/usr/lib64/python3.6/site-packages/matplotlib/font_manager.py:1331: UserWarning: findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans
|
||||
(prop.get_family(), self.defaultFamily[fontext]))
|
||||
Traceback (most recent call last):
|
||||
File "assignment2.py", line 136, in <module>
|
||||
trans= nx.transitivity(G)*100
|
||||
File "/usr/lib64/python3.6/site-packages/networkx/algorithms/cluster.py", line 400, in transitivity
|
||||
triangles = sum(t for v, d, t, _ in _triangles_and_degree_iter(G))
|
||||
File "/usr/lib64/python3.6/site-packages/networkx/algorithms/cluster.py", line 400, in <genexpr>
|
||||
triangles = sum(t for v, d, t, _ in _triangles_and_degree_iter(G))
|
||||
File "/usr/lib64/python3.6/site-packages/networkx/algorithms/cluster.py", line 87, in _triangles_and_degree_iter
|
||||
gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs)
|
||||
File "/usr/lib64/python3.6/collections/__init__.py", line 535, in __init__
|
||||
self.update(*args, **kwds)
|
||||
File "/usr/lib64/python3.6/collections/__init__.py", line 622, in update
|
||||
_count_elements(self, iterable)
|
||||
File "/usr/lib64/python3.6/site-packages/networkx/algorithms/cluster.py", line 87, in <genexpr>
|
||||
gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs)
|
||||
File "/usr/lib64/python3.6/site-packages/networkx/classes/graph.py", line 458, in __getitem__
|
||||
return self.adj[n]
|
||||
File "/usr/lib64/python3.6/site-packages/networkx/classes/graph.py", line 336, in adj
|
||||
@property
|
||||
KeyboardInterrupt
|
326705
anno3/avrc/assignments/coff/modello_github/graphs/GCC.graphml
Normal file
326705
anno3/avrc/assignments/coff/modello_github/graphs/GCC.graphml
Normal file
File diff suppressed because it is too large
Load diff
402113
anno3/avrc/assignments/coff/modello_github/graphs/GComm.graphml
Normal file
402113
anno3/avrc/assignments/coff/modello_github/graphs/GComm.graphml
Normal file
File diff suppressed because it is too large
Load diff
477508
anno3/avrc/assignments/coff/modello_github/graphs/gccEcc.graphml
Normal file
477508
anno3/avrc/assignments/coff/modello_github/graphs/gccEcc.graphml
Normal file
File diff suppressed because it is too large
Load diff
36
anno3/avrc/assignments/coff/modello_github/log.txt
Normal file
36
anno3/avrc/assignments/coff/modello_github/log.txt
Normal file
|
@ -0,0 +1,36 @@
|
|||
#Nodes: 37702
|
||||
#Edges: 289004
|
||||
#Edges if graph was a full mesh: 710701551
|
||||
Density: 0.04%
|
||||
#Self-loops: 0
|
||||
#isolates: 0
|
||||
Average degree: 15.33
|
||||
Variance 6526.21
|
||||
Standard deviation 80.78
|
||||
Transitivity coefficient of the network: 1.24%
|
||||
Average clustering coefficient 0.17
|
||||
Numero di componenti connesse: 2
|
||||
Numero nodi GCC: 37700
|
||||
Numero archi GCC: 289003
|
||||
Percentuale di nodi sul totale 99.99%:
|
||||
Percentuale di archi sul totale 100.00%:
|
||||
Densità: 0.04
|
||||
Distanza media: 3.25
|
||||
linea 165
|
||||
linea 194
|
||||
linea 204
|
||||
Diameter GCC: 11
|
||||
Radius GCC 6
|
||||
linea 231
|
||||
linea 248
|
||||
Average Distance 3.25
|
||||
linea 285
|
||||
linea 314
|
||||
|
||||
Clique computed
|
||||
linea 332
|
||||
linea 368
|
||||
linea 369
|
||||
linea 370
|
||||
Numero Louvain communities: 28
|
||||
linea 363
|
Binary file not shown.
After (image error) Size: 27 KiB |
Binary file not shown.
After (image error) Size: 24 KiB |
Binary file not shown.
After (image error) Size: 21 KiB |
Binary file not shown.
After (image error) Size: 24 KiB |
Loading…
Reference in a new issue