#!/usr/bin/python3

import igraph as ig
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from collections import Counter
from statsmodels.distributions.empirical_distribution import ECDF as ecdf
import random
import math

def basic_stats(g):
    deg = g.degree()
    stats = dict();
    stats["n"] = g.vcount()
    stats["m"] = g.ecount()
    stats["max degree"] = max(deg)
    stats["min degree"] = min(deg)
    stats["average degree"] = np.mean(deg)
    # stats["mode"] = np.mode(degree_sequence)
    stats["median degree"] = np.median(deg)
    stats["99th percentile of degree (numpy)"] = np.quantile(deg, 0.99)
    degree_sequence = sorted(deg)
    stats["variance of degree"] = np.var(deg)
    stats["99th percentile of degree"] = degree_sequence[math.floor(0.99*len(degree_sequence))]
    stats["1th percentile of degree"] = degree_sequence[math.floor(0.01*len(degree_sequence))]

    # stats["local clustering coeff."] = g.transitivity_avglocal_undirected(mode="nan")

    # stats["diameter"] = g.diameter()  # super slow
    return stats


if __name__ == "__main__":
    datadir = "Datasets/"
    graphs = dict()
    ## read the GitHub edge list as tuples and build undirected graph
    ## each node index is stored in vertex attribute "id"
    name = "Github Dev Graph"
    print(f"Reading {name}")
    df = pd.read_csv(datadir + "GitHubDevelopers/musae_git_edges.csv")
    g = ig.Graph.TupleList(
        [tuple(x) for x in df.values], directed=False, vertex_name_attr="id"
    )
    graphs[name] = g

    name = "Github ML Dev Graph"
    print(f"Constructing {name}")
    df = pd.read_csv(datadir + "GitHubDevelopers/musae_git_target.csv")
    # print(df.values[0])
    ml_devs = [v[0] for v in df.values if v[2]]
    g_ml = g.subgraph(ml_devs)
    graphs[name] = g_ml

    name = "Github Non-ML Dev Graph"
    print(f"Constructing {name}")
    non_ml_devs = [v[0] for v in df.values if not v[2]]
    g_non_ml = g.subgraph(non_ml_devs)
    graphs[name] = g_non_ml

    name = "Movie Actors Graph"
    print(f"Reading {name}")
    g = ig.Graph.Read_Ncol(datadir + "Actors/movie_actors.net", directed=False)
    graphs[name] = g

    name = "European Power Grid"
    print(f"Reading {name}")
    grid = ig.Graph.Read_Ncol(datadir + "GridEurope/gridkit_europe-highvoltage.edges", directed=False).simplify()
    graphs[name] = grid

    name = "North American Power Grid"
    print(f"Reading {name}")
    dfe = pd.read_csv(datadir + "GridNorthAmerica/gridkit_north_america-highvoltage-links.csv")
    dfv = pd.read_csv(datadir + "GridNorthAmerica/gridkit_north_america-highvoltage-vertices.csv")
    vmap = { dfv.v_id[i]:i for i in range(len(dfv)) }
    edges = [(vmap[dfe.v_id_1[i]], vmap[dfe.v_id_2[i]]) for i in range(len(dfe))]
    g = ig.Graph.TupleList(edges, directed=False,vertex_name_attr="id").simplify()
    graphs[name] = g

    for name in graphs:
        g = graphs[name]
        stats = basic_stats(g)
        print(f"\n\nBasic stats for {name}")
        for s in stats:
            print("{}: {}".format(s, stats[s]))

        deg_dist = [0] * (stats["max degree"] + 1)
        for d in g.degree():
            deg_dist[d] += 1

        X = [x for x in range(len(deg_dist))]
        Y = deg_dist
        # Don't show the smallest and largest 1%
        n1 = int(stats["1th percentile of degree"])
        n99 = int(stats["99th percentile of degree"])
        plt.bar(X[n1:n99], Y[n1:n99])
        plt.title(f"{name}: vertex degrees", fontsize=16)
        plt.xlabel("degree", fontsize=14)
        plt.ylabel("frequency", fontsize=14)
        plt.show()


    # Let's draw the European power grid
    # df = pd.read_csv(datadir + "GridEurope/gridkit_europe-highvoltage.vertices")
    # grid.add_vertices(range(grid.vcount(), len(df.values)))
    #
    # grid.vs["lon"] = list(df.lon)
    # grid.vs["lat"] = list(df.lat)
    #
    # grid.layout = [(v["lon"], -v["lat"]) for v in grid.vs]
    # # fig, ax = plt.subplots()
    # ig.plot(grid, layout=grid.layout, bbox=(0, 0, 600, 450), target="grid.pdf")


        # # Plot clustering coefficient as function of degree
        # X = [x for x in g.degree() if x > 1]
        # Y = [x for x in g.transitivity_local_undirected() if not np.isnan(x)]
        # plt.plot(X, Y, ".", color="grey")
        # plt.title("local clustering coefficient", fontsize=16)
        # plt.xlabel("degree", fontsize=14)
        # plt.ylabel("local clustering coefficient", fontsize=14)
        # plt.show()