avrc :3

2019-12-05 16:45:39 +01:00 · 2019-12-05 16:45:39 +01:00 · 4218915fbf
commit 4218915fbf
parent 3f8e640377
13 changed files with 1721948 additions and 0 deletions
--- a/anno3/avrc/assignments/coff/.gitignore
+++ b/anno3/avrc/assignments/coff/.gitignore
@ -0,0 +1,2 @@
 *~
 __pycache__
--- a/anno3/avrc/assignments/coff/barabaso/assignment2-barabasi.py
+++ b/anno3/avrc/assignments/coff/barabaso/assignment2-barabasi.py
@ -0,0 +1,370 @@
 #!/usr/bin/env python
 # coding: utf-8
 # # Network Analysis - Random Barabasi Model
 # In[2]:
 import networkx as nx
 from networkx.drawing.nx_agraph import graphviz_layout
 import matplotlib as mpl
 mpl.use('Agg')
 import matplotlib.pyplot as plt
 import pandas as pd
 import numpy as np
 import dzcnapy_plotlib as dzcnapy
 import csv
 import math
 import collections as coll
 from networkx.algorithms import community as com
 import community as lou
 # Importazione dataset per calcolo di N, M
 with open("dataset.csv") as infile:
    csv_reader = csv.reader(infile)
    G = nx.Graph(csv_reader)
 N = G.number_of_nodes()
 E = G.number_of_edges()
 M = int(E / N)
 G = nx.barabasi_albert_graph(N, M) 
 nx.write_graphml(G, "graphs/Barabasi.graphml");
 # ## Nodes, Edges, Density
 # In[8]:
 # Nodi, Archi, Densità
 numNodes = G.number_of_nodes()
 numEdges = G.number_of_edges()
 allEdges = int(numNodes * (numNodes-1) / 2)
 density = nx.density(G) * 100
 # constants
 N = numNodes
 M = int(numEdges / numNodes)
 print("#Nodes:", numNodes)
 print("#Edges:", numEdges)
 print("#Edges if graph was a full mesh:", allEdges)
 print("Density: %.2f%%" % density)
 # ## Degree
 # ### Average, variance and standard deviation
 # In[5]:
 avgDeg = (2*G.number_of_edges())/(G.number_of_nodes())
 print("Average degree: %.2f" % avgDeg)
 deg = [G.degree(n) for n in G.nodes]
 var = np.var(deg)
 devstd = math.sqrt(var)
 print("Variance {:.2f}".format(var))
 print("Standard deviation {:.2f}".format(devstd))
 # ### Linear scale distribution
 # In[6]:
 # Degree distribution
 degrees = sorted([d for n, d in G.degree()], reverse=True)
 degreeCount = coll.Counter(degrees)
 x, y = zip(*degreeCount.items())
 plt.figure()  
 plt.plot(x, y, 'go-') 
 plt.xlabel('Degree')
 plt.ylabel('Frequency')
 plt.title('Degree Distribution') 
 plt.title('Degree Distribution with linear scale')
 plt.savefig('plots/LinScaleDegreeDistr.png')
 plt.show()
 # ### Logarithmic scale distribution
 # In[7]:
 plt.scatter(x, y, s=50, c="green")
 plt.xlim(0.9, max(x))
 plt.ylim(0.9, max(y))
 plt.xscale('log')
 plt.yscale('log')
 plt.xlabel("Degree")
 plt.ylabel("Frequency")
 plt.title('Degree Distribution with logarithmic scale') 
 plt.savefig('plots/LogScaleDegreeDistr.png')
 # ## Clustering coefficient
 trans= nx.transitivity(G)*100
 # fraction of triadic closures (closed triangles) found in the network
 print("Transitivity coefficient of the network: %.2f%%" %trans)
 # Clustering coefficient
 acc = nx.average_clustering(G)
 print ("Average clustering coefficient {:.2f}".format(acc))
 # ## Greatest Connected Component 
 # In[11]:
 numCC = nx.number_connected_components(G)
 gcc = max(nx.connected_component_subgraphs(G), key=len)
 nodesgcc = gcc.nodes()
 edgesgcc = gcc.edges()
 nx.write_graphml(gcc, "graphs/GCC.graphml");
 print("Numero di componenti connesse:", numCC)
 print("Numero nodi GCC:", len(nodesgcc))
 print("Numero archi GCC:", len(edgesgcc))
 print("Percentuale di nodi sul totale %.2f%%:" %(len(nodesgcc)/len(G.nodes())*100))
 print("Percentuale di archi sul totale %.2f%%:" %(len(edgesgcc)/len(G.edges())*100))
 print("Densità: {:.2f}".format(nx.density(gcc) * 100))
 print("Distanza media: {:.2f}".format(nx.average_shortest_path_length(gcc)))
 # ### Distanze GCC
 # In[13]:
 # TODO eseguire
 distDict = {}
 #i=1
 row = []
 for n in gcc.nodes():
    nodeDists = nx.single_source_shortest_path_length(gcc,n)
    #if i%1000 == 0:
    #    print(i)
    for d in nodeDists:
        #if (int(d) in marked):
        #    continue
        if nodeDists[d] in distDict:
            distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1
        else:
            distDict[nodeDists[d]] = 1
        row.append(nodeDists[d])
    #i += 1
 distDict.pop(0)
 avgDist, cnt = zip(*distDict.items()) 
 plt.bar(avgDist, cnt, width=0.3, color='b')
 plt.title("Distance Distribution for G")
 plt.ylabel("Frequency")
 plt.xlabel("Shortest Path Distance")
 #plt.savefig('plots/DistDistributionGlobal.png')
 plt.show()
 # ### GCC Eccentricity - Diameter - Radius - Center - Periphery 
 # In[15]:
 #Eccentricity
 ecc = nx.eccentricity(gcc)
 # Adding eccentricity data to gcc
 for k in ecc.keys():
    gcc.node[k]['eccentricity'] = ecc.get(k)
 # In[ ]:
 diametergcc = nx.diameter(gcc, ecc)
 radiusgcc = nx.radius(gcc, ecc)
 centergcc = nx.center(gcc, e=ecc)
 peripherygcc = nx.periphery(gcc, e=ecc)
 print ("Diameter GCC:", diametergcc)
 print ("Radius GCC", radiusgcc)
 # In[ ]:
 #Adding data to gcc
 nx.set_node_attributes(gcc, 0, 'center')
 nx.set_node_attributes(gcc, 0, 'periphery')
 for v in range(len(centergcc)):
    gcc.node[centergcc[v]]["center"] = 1
 for v in range(len(peripherygcc)):
    gcc.node[peripherygcc[v]]["periphery"] = 1
 nx.write_graphml(gcc, "graphs/gccEcc.graphml");
 # ## Distanze
 # In[ ]:
 # Distanza media su tutta la rete
 distDict = {}
 #i=1
 #marked = set()
 row = []
 for n in G.nodes():
    nodeDists = nx.single_source_shortest_path_length(G,n)
    #if i%1000 == 0:
    #    print(i)
    for d in nodeDists:
        #if (int(d) in marked):
        #    continue
        if nodeDists[d] in distDict:
            distDict[nodeDists[d]] = distDict[nodeDists[d]] + 1
        else:
            distDict[nodeDists[d]] = 1
        row.append(nodeDists[d])
    #i += 1
    #marked.add(int(n))
 avgShortPathG = np.average(row)
 distDict.pop(0)
 avgDist, cnt = zip(*distDict.items()) 
 print("Average Distance {:.2f}".format(avgShortPathG))
 plt.bar(avgDist, cnt, width=0.3, color='b')
 plt.title("Distance Distribution for G")
 plt.ylabel("Frequency")
 plt.xlabel("Shortest Path Distance")
 plt.savefig('plots/DistDistributionGlobal.png')
 plt.show()
 #print("Numero componenti connesse:", nx.number_connected_components(G))
 #print("Distanza media:", nx.average_shortest_path_length(G))
 # ## Degree correlation
 # In[ ]:
 # The following code fragment calculates the dictionary and separates the keys and values into 
 # two lists my_degree and their_degree:
 my_degree, their_degree = zip(*nx.average_degree_connectivity(G).items())
 plt.scatter(my_degree, their_degree, s=50, c="b",)
 plt.xscale('log')
 plt.yscale('log')
 plt.xlabel("k")
 plt.ylabel("$k_{nn}(k)$")
 plt.savefig('plots/Assortativity.png')
 # ## Communities
 # ### 4-Clique Communities
 # In[5]:
 # NO k-cliques bcos muh memory
 # commK  = com.k_clique_communities(G, 4)
 # print("Clique computed")
 # lClique = 0
 # for i,cl in enumerate(commK):
 #     lClique += 1
 #     for n in cl:
 #         G.node[n]["kClique"] = i+1
 # print("Numero 4-Clique communities: ", lClique)
 # ### Modularity based communities (Louvain)
 # In[ ]:
 part = lou.best_partition(G)
 mod = lou.modularity(part,G)
 part_as_seriesG = pd.Series(part)
 part_as_seriesG.sort_values()
 part_as_seriesG.value_counts() 
 print("Numero Louvain communities: ", part_as_seriesG.value_counts().size)
 # In[ ]:
 #Saving Communities Attribute
 nx.set_node_attributes(G, 0, 'LvnG')
 for k in part.keys():
    part[k]+= 1
 for i in part.keys():
    G.node[i]["LvnG"] = part.get(i)
 nx.write_graphml(G, "graphs/GComm.graphml");
 # ## Centralities
 # In[ ]:
 dgr = nx.degree_centrality(G)
 clo = nx.closeness_centrality(G)
 har = nx.harmonic_centrality(G)
 eig = nx.eigenvector_centrality(G)
 bet = nx.betweenness_centrality(G)
 pgr = nx.pagerank(G)
 hits = nx.hits(G)
 centralities = pd.concat(
    [pd.Series(c) for c in (hits[1], eig, pgr, har, clo, hits[0], dgr, bet)],
    axis=1)
 centralities.columns = ("Authorities", "Eigenvector", "PageRank",
                        "Harmonic Closeness", "Closeness", "Hubs",
                        "Degree", "Betweenness")
 centralities["Harmonic Closeness"] /= centralities.shape[0]
 # Calculate the correlations for each pair of centralities
 c_df = centralities.corr()
 ll_triangle = np.tri(c_df.shape[0], k=-1)
 c_df *= ll_triangle
 c_series = c_df.stack().sort_values()
 c_series.tail()
 # In[ ]:
--- a/anno3/avrc/assignments/coff/barabaso/dataset.csv
+++ b/anno3/avrc/assignments/coff/barabaso/dataset.csv
--- a/anno3/avrc/assignments/coff/barabaso/dzcnapy_plotlib.py
+++ b/anno3/avrc/assignments/coff/barabaso/dzcnapy_plotlib.py
@ -0,0 +1,57 @@
 """
 Library for plotting graphs in DZCNAPY
 """
 import matplotlib
 import matplotlib.pyplot as plt
 matplotlib.rc("font", family="Arial")
 matplotlib.style.use("grayscale")
 attrs = {
    "edge_color" : "gray",
    "font_family" : "Liberation Sans Narrow",
    "font_size" : 15,
    "font_weight" : "bold",
    "node_color" : "pink",
    "node_size" : 700,
    "width" : 2,
 }
 thick_attrs = attrs.copy()
 thick_attrs["alpha"] = 0.5
 thick_attrs["width"] = 15
 small_attrs = attrs.copy()
 small_attrs["node_size"] = 50
 small_attrs["font_size"] = 10
 medium_attrs = small_attrs.copy()
 medium_attrs["node_size"] = 250
 def set_extent(positions, axes, title=None):
    """
    Given node coordinates pos and the subplot,
    calculate and set its extent.
    """
    axes.tick_params(labelbottom="off")
    axes.tick_params(labelleft="off")
    if title:
        axes.set_title(title)
    x_values, y_values = zip(*positions.values())
    x_max = max(x_values)
    y_max = max(y_values)
    x_min = min(x_values)
    y_min = min(y_values)
    x_margin = (x_max - x_min) * 0.1
    y_margin = (y_max - y_min) * 0.1
    try:
        axes.set_xlim(x_min - x_margin, x_max + x_margin)
        axes.set_ylim(y_min - y_margin, y_max + y_margin)
    except AttributeError:
        axes.xlim(x_min - x_margin, x_max + x_margin)
        axes.ylim(y_min - y_margin, y_max + y_margin)
 def plot(fname, save = False):
    plt.tight_layout()
    if save:
        plt.savefig("plots/{}.pdf".format(fname), dpi=600)
    plt.show()
--- a/anno3/avrc/assignments/coff/barabaso/graphs/Barabasi.graphml
+++ b/anno3/avrc/assignments/coff/barabaso/graphs/Barabasi.graphml
--- a/anno3/avrc/assignments/coff/barabaso/graphs/GCC.graphml
+++ b/anno3/avrc/assignments/coff/barabaso/graphs/GCC.graphml
--- a/anno3/avrc/assignments/coff/barabaso/graphs/GComm.graphml
+++ b/anno3/avrc/assignments/coff/barabaso/graphs/GComm.graphml
--- a/anno3/avrc/assignments/coff/barabaso/graphs/gccEcc.graphml
+++ b/anno3/avrc/assignments/coff/barabaso/graphs/gccEcc.graphml
--- a/anno3/avrc/assignments/coff/barabaso/log.txt
+++ b/anno3/avrc/assignments/coff/barabaso/log.txt
@ -0,0 +1,23 @@
 #Nodes: 37702
 #Edges: 263865
 #Edges if graph was a full mesh: 710701551
 Density: 0.04%
 Average degree: 14.00
 Variance 372.92
 Standard deviation 19.31
 /usr/lib64/python3.6/site-packages/matplotlib/font_manager.py:1331: UserWarning: findfont: Font family [
 'Arial'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))
 Transitivity coefficient of the network: 0.24%
 Average clustering coefficient 0.00
 Numero di componenti connesse: 1
 Numero nodi GCC: 37702
 Numero archi GCC: 263865
 Percentuale di nodi sul totale 100.00%:
 Percentuale di archi sul totale 100.00%:
 Densità: 0.04
 Distanza media: 3.69
 Diameter GCC: 6
 Radius GCC 4
 Average Distance 3.69
 Numero Louvain communities:  23
--- a/anno3/avrc/assignments/coff/barabaso/plots/Assortativity.png
+++ b/anno3/avrc/assignments/coff/barabaso/plots/Assortativity.png
--- a/anno3/avrc/assignments/coff/barabaso/plots/DistDistributionGlobal.png
+++ b/anno3/avrc/assignments/coff/barabaso/plots/DistDistributionGlobal.png
--- a/anno3/avrc/assignments/coff/barabaso/plots/LinScaleDegreeDistr.png
+++ b/anno3/avrc/assignments/coff/barabaso/plots/LinScaleDegreeDistr.png
--- a/anno3/avrc/assignments/coff/barabaso/plots/LogScaleDegreeDistr.png
+++ b/anno3/avrc/assignments/coff/barabaso/plots/LogScaleDegreeDistr.png