# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# This script implements the simple contagion discussed in the paper
#
# --------
# Scaling law for the impact of mutant contagion
# by Jonas S. Juul and Steven H. Strogatz
# --------
#
# and calculates the corresponding descendant distributions. 
#
# The script was written by Jonas S. Juul, jonas.juul  at  nbi.ku.dk, and
# implements the contagion on various choices of networks. 
# 
# If you wish to use this script, please consider citing the corresponding 
# paper.
#
# Please do not distribute this script without the consent of the authors.
#
# For speed, consider running this script using pypy.
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------


import numpy as np 
import networkx as nx 
import math 
import random 
import copy

# Function to make descendant distribution
def make_distribution(V) : # V is an array. V[i] is the parent of node i. For the seed, V[seed] = -1 
    # Make array D. D[i] will be the number of descendants of node i
    D = [0]*len(V)
    for i in range (len(V)) : #Loops over all nodes. For each i, we 'climb up' toward the root of the epidemic tree until the seed is reached. Each node passed on the way, we grant D[j] +=1 (because they have i as a descendant)
        found_seed = False
        position = i +0 #start at position i
        while (found_seed == False) :
            next_position = V[position] +0 # next position is the parent of the current position
            if (next_position == -2.5) :
                found_seed = True #For the empirical networks, next_position == -2.5 means that the node is not infected. Skip this.
                break
            elif (next_position == -1) :
                found_seed = True #If the next position is at the top of the epidemic tree, proceed to next i
                break
            if (next_position != -1 and next_position != -2.5) :
                D[next_position] += 1 # Increase D[next_position] by one, because that node has node i as a descendant
            if (next_position >= 0) :
                position = next_position +0 # Proceed to next position, further up the epidemic tree.
    return sorted(D,reverse = True) # Outputs an array with the number of descendants of nodes, in descending order.

# Define the number of realizations of the contagion on networks.
Nexp = 1000
# Define the number of nodes in the networks
N = 10000
# Define the number of nodes that should be infected before stopping the spreading (Not equal to N if network == 'Facebook' or 'Twitter')
N_max = N
# Define array to keep track of histograms of descendants
Result_D = [0]*N
# Choose network type: k-regular, Erdős-Rényi, Small-world, Complete, the Facebook Athletes graph or the Twitter subgraph (final two from the SNAP database)
networks = ['Regular','ER','Small-world','Complete','Facebook','Twitter']
network = networks[0]
# Define the parameter used to name the file containing the simulation results.
name_var = 'NaN'
if (network == 'Regular') :
    # Define k, which defines k-regular networks.
    k = 10  
    # Make degree sequence. In this case, a k-regular graph.
    Degree_seq =[k]*N 
    # In this case, make name_var k
    name_var = k    
elif (network == 'ER') :
    # Define p, the probability that each possible edge exists
    p = 0.010
    # In this case, make name_var p
    name_var = p
elif (network == 'Small-world') :
    # Define p, the probability that a node gets a shortcut to another node chosen uniformly at random.
    p = 0.01
    # In this case, make name_var p
    name_var = p    
if (network=='Facebook') :
    # Create graph
    G = nx.Graph()
    # Open file containing the edges
    f = open('athletes_edges.csv','r')
    # Read first line. It does not contain information about edges
    f.readline()
    # Now read all lines in the file, and insert the listed edges in G
    for line in f :
        line.strip()
        columns = line.split(',')
        G.add_edge(int(columns[0]),int(columns[1]))
    # discard everything but the largest connected component
    lcc = max(nx.connected_components(G),key=len)    
    G = G.subgraph(lcc)
    # Define N
    N = len(G.nodes())
    # Define array to keep track of histograms of descendants
    Result_D = [0]*N    
if (network=='Twitter') :
    # Create graph
    G = nx.Graph()
    # Open file containing the edges
    f = open('twitter_combined.txt','r')
    # Read first line. It does not contain information about edges
    f.readline()
    # Because node numbers don't go from 0 to N, define a dictionary that lets us number nodes in this way.
    node_numbers = {}
    # Define a variable, which will keep track of which number the next node being added to the dictionary node_numbers, will get
    next_node_number = 0
    # Now read all lines in the file, and insert the listed edges in G
    for line in f :
        line.strip()
        columns = line.split(' ')
        # This takes time, so print progress report for every 1000 nodes added to the dictionary node_numbers
        if (next_node_number/1000 == next_node_number/1000.) :
            print next_node_number,"different nodes imported"
        # If one of the nodes in the next edge is not in node_numbers, add the node to the dictionary
        if (int(columns[0]) not in node_numbers.keys()) :
            node_numbers[int(columns[0])] = next_node_number +0
            # Advance next_node_number by 1, so we avoid giving 2 different nodes the same new number
            next_node_number += 1
        if (int(columns[1]) not in node_numbers.keys()) :
            node_numbers[int(columns[1])] = next_node_number +0
            # Advance next_node_number by 1, so we avoid giving 2 different nodes the same new number
            next_node_number += 1
        # Add the edge to G
        G.add_edge(node_numbers[int(columns[0])],node_numbers[int(columns[1])])
    # Remove self-loops
    G.remove_edges_from(G.selfloop_edges())
    # Define N again
    N = len(G.nodes())       
    # discard everything but the largest connected component  
    lcc = max(nx.connected_components(G),key=len)   
    G = G.subgraph(lcc)
    # Define array to keep track of histograms of descendants
    Result_D = [0]*N    


# Now make Nexp experiments.
for exp in range (Nexp) :
    print exp
    if (network == 'Regular') :
        # Define a graph. In this case a k-regular configuration-model graph.
        G = nx.configuration_model(Degree_seq) # Make the graph
    elif (network == 'ER') :
        # Define a graph. In this case, an Erdős-Rényi graph.
        G = nx.fast_gnp_random_graph(N,p)
    elif (network == 'Small-world') :
        # Define a graph. In this case, a Watts-Strogatz small-world graph. 2 is the total number of neighbors for nodes with no short cuts.
        G = nx.newman_watts_strogatz_graph(N,2,p)
    elif (network == 'Complete') :
        # Define a graph. In this case a complete graph.
        G = nx.complete_graph(N)
    # Remove self-loops. If self-loops exist, the variable 'already_infected' below can be True, and the code will work anyway. 
    # Removing self-loops is not necessary for the empirical networks since the same network is used for every simulation.
    if (network != 'Facebook' and network != 'Twitter') :
        G.remove_edges_from(G.selfloop_edges())
    # Make an array to keep track of whether nodes are Susceptible or Infectious. i is susceptible if S[i] = 0, and infectious if S[i] = 1
    State = [0]*N
    # Define seed. 
    seed = int(random.uniform(0,1)*N)#random.choice(list(G.nodes()))
    # Make seed infectious.
    State[seed] += 1 + 0
    # Make array to keep track of parents of all nodes. This defines the epidemic tree
    Epidemic_tree = [-2.5]*N
    # The seed has no well-defined parent. We choose that Epidemic_tree[seed] = -1, which lets us easily recognize the seed when we encounter it in the function make_distribution()
    Epidemic_tree[seed] = -1 +0 
    # Make graph to keep track of possible edges over which the infection can spread next.
    Epidemic_graph = nx.Graph()
    # Add all nodes to the Epidemic graph
    for i in range (N) :
        Epidemic_graph.add_node(i)
    # Add all edges over which the infection can spread from the seed
    for edge in G.edges(seed) :
        Epidemic_graph.add_edge(*edge)
    # Define variable to keep track of the number of nodes currently infected in this realization. Only affects print statement below.
    infectious_nodes_counter = 1
    # Now keep infecting nodes until there are no more edges over which the contagion can spread.
    while (len(Epidemic_graph.edges()) > 0 and infectious_nodes_counter < N_max) :
        # variable to catch instances of nodes being chosen to get infected if they are already infected
        already_infected = False
        # Choose a random edge over which the contagion spreads next
        next_spread = copy.copy(random.choice(list(Epidemic_graph.edges())))
        # Check which way the contagion spreads: from next_spread[0] to next_spread[1] or the opposite way
        if (State[next_spread[0]] == 0 and State[next_spread[1]] == 1)  :
            # In this case, next_spread[0] is the node being infected
            newly_infected = next_spread[0] +0
            # Update its state (record that it is now infectious)
            State[newly_infected] += 1 +0
            # record that its parent is next_spread[1]
            Epidemic_tree[next_spread[0]] = next_spread[1] +0
            # record that another node has been infected.
            infectious_nodes_counter +=1
        elif (State[next_spread[0]] == 1 and State[next_spread[1]] == 0) :
            # In this case, next_spread[1] is the node being infected
            newly_infected = next_spread[1] +0
            # Update its state (record that it is now infectious)
            State[newly_infected] += 1 +0
            # record that its parent is next_spread[0]
            Epidemic_tree[next_spread[1]] = next_spread[0] +0
            # record that another node has been infected.
            infectious_nodes_counter += 1
        else :
            # If the pair does not consist of 1 susceptible and 1 infected node, remove edge from epidemic graph and do not update epidemic graph below. This happens if the graph contains self loops.
            already_infected = True
            # Remove edge from epidemic graph.
            Epidemic_graph.remove_edge(*next_spread)
        # If 1 node was infected, and 1 was susceptible when choosing the edge above.
        if (already_infected == False ) :
            # Check all edges connecting the infected node to other nodes
            for edge in (G.edges(newly_infected)) :
                # If the edge is in the epidemic graph,  the neighbour is infected so remove from possible spreading 
                if (edge in Epidemic_graph.edges()) :
                    Epidemic_graph.remove_edge(*edge)
                # Otherwise the neighbour is susceptible so add to possible spreading 
                else : 
                    Epidemic_graph.add_edge(*edge)
        # Print the experiment number and the number of nodes that are infected so far in the realization. Do this every time another 100 nodes have been infected
        if (infectious_nodes_counter/100. == infectious_nodes_counter/100) :
            print exp,infectious_nodes_counter
        # Count the number of descendants each node has
    Descendants = make_distribution(Epidemic_tree)
    # Record the number of descendants of each node. If a node has d descendants, add 1/float(Nexp) to entry d in Results_D
    for i in range (N) :
        Result_D[int(Descendants[i] + 0.001)] += 1./float(Nexp)

# Finally, save the  obtained distribution
f = open('Distribution_%s_var%s_N%s_Nexp%s.txt'%(network,name_var,N,Nexp),'w')
f.write('Number of nodes having i descendents (i being rows below, starting from 0)')
for i in range (len(Result_D)) :
    f.write('\n%.9f'%(float(Result_D[i])/float(N)))

f.close()