# -*- coding: utf-8 -*- # ----------------------------------------------------------------------- # ----------------------------------------------------------------------- # This script implements the simple contagion discussed in the paper # # -------- # Scaling law for the impact of mutant contagion # by Jonas S. Juul and Steven H. Strogatz # -------- # # and calculates the corresponding descendant distributions. # # The script was written by Jonas S. Juul, jonas.juul at nbi.ku.dk, and # implements the contagion on various choices of networks. # # If you wish to use this script, please consider citing the corresponding # paper. # # Please do not distribute this script without the consent of the authors. # # For speed, consider running this script using pypy. # ----------------------------------------------------------------------- # ----------------------------------------------------------------------- import numpy as np import networkx as nx import math import random import copy # Function to make descendant distribution def make_distribution(V) : # V is an array. V[i] is the parent of node i. For the seed, V[seed] = -1 # Make array D. D[i] will be the number of descendants of node i D = [0]*len(V) for i in range (len(V)) : #Loops over all nodes. For each i, we 'climb up' toward the root of the epidemic tree until the seed is reached. Each node passed on the way, we grant D[j] +=1 (because they have i as a descendant) found_seed = False position = i +0 #start at position i while (found_seed == False) : next_position = V[position] +0 # next position is the parent of the current position if (next_position == -2.5) : found_seed = True #For the empirical networks, next_position == -2.5 means that the node is not infected. Skip this. break elif (next_position == -1) : found_seed = True #If the next position is at the top of the epidemic tree, proceed to next i break if (next_position != -1 and next_position != -2.5) : D[next_position] += 1 # Increase D[next_position] by one, because that node has node i as a descendant if (next_position >= 0) : position = next_position +0 # Proceed to next position, further up the epidemic tree. return sorted(D,reverse = True) # Outputs an array with the number of descendants of nodes, in descending order. # Define the number of realizations of the contagion on networks. Nexp = 1000 # Define the number of nodes in the networks N = 10000 # Define the number of nodes that should be infected before stopping the spreading (Not equal to N if network == 'Facebook' or 'Twitter') N_max = N # Define array to keep track of histograms of descendants Result_D = [0]*N # Choose network type: k-regular, Erdős-Rényi, Small-world, Complete, the Facebook Athletes graph or the Twitter subgraph (final two from the SNAP database) networks = ['Regular','ER','Small-world','Complete','Facebook','Twitter'] network = networks[0] # Define the parameter used to name the file containing the simulation results. name_var = 'NaN' if (network == 'Regular') : # Define k, which defines k-regular networks. k = 10 # Make degree sequence. In this case, a k-regular graph. Degree_seq =[k]*N # In this case, make name_var k name_var = k elif (network == 'ER') : # Define p, the probability that each possible edge exists p = 0.010 # In this case, make name_var p name_var = p elif (network == 'Small-world') : # Define p, the probability that a node gets a shortcut to another node chosen uniformly at random. p = 0.01 # In this case, make name_var p name_var = p if (network=='Facebook') : # Create graph G = nx.Graph() # Open file containing the edges f = open('athletes_edges.csv','r') # Read first line. It does not contain information about edges f.readline() # Now read all lines in the file, and insert the listed edges in G for line in f : line.strip() columns = line.split(',') G.add_edge(int(columns[0]),int(columns[1])) # discard everything but the largest connected component lcc = max(nx.connected_components(G),key=len) G = G.subgraph(lcc) # Define N N = len(G.nodes()) # Define array to keep track of histograms of descendants Result_D = [0]*N if (network=='Twitter') : # Create graph G = nx.Graph() # Open file containing the edges f = open('twitter_combined.txt','r') # Read first line. It does not contain information about edges f.readline() # Because node numbers don't go from 0 to N, define a dictionary that lets us number nodes in this way. node_numbers = {} # Define a variable, which will keep track of which number the next node being added to the dictionary node_numbers, will get next_node_number = 0 # Now read all lines in the file, and insert the listed edges in G for line in f : line.strip() columns = line.split(' ') # This takes time, so print progress report for every 1000 nodes added to the dictionary node_numbers if (next_node_number/1000 == next_node_number/1000.) : print next_node_number,"different nodes imported" # If one of the nodes in the next edge is not in node_numbers, add the node to the dictionary if (int(columns[0]) not in node_numbers.keys()) : node_numbers[int(columns[0])] = next_node_number +0 # Advance next_node_number by 1, so we avoid giving 2 different nodes the same new number next_node_number += 1 if (int(columns[1]) not in node_numbers.keys()) : node_numbers[int(columns[1])] = next_node_number +0 # Advance next_node_number by 1, so we avoid giving 2 different nodes the same new number next_node_number += 1 # Add the edge to G G.add_edge(node_numbers[int(columns[0])],node_numbers[int(columns[1])]) # Remove self-loops G.remove_edges_from(G.selfloop_edges()) # Define N again N = len(G.nodes()) # discard everything but the largest connected component lcc = max(nx.connected_components(G),key=len) G = G.subgraph(lcc) # Define array to keep track of histograms of descendants Result_D = [0]*N # Now make Nexp experiments. for exp in range (Nexp) : print exp if (network == 'Regular') : # Define a graph. In this case a k-regular configuration-model graph. G = nx.configuration_model(Degree_seq) # Make the graph elif (network == 'ER') : # Define a graph. In this case, an Erdős-Rényi graph. G = nx.fast_gnp_random_graph(N,p) elif (network == 'Small-world') : # Define a graph. In this case, a Watts-Strogatz small-world graph. 2 is the total number of neighbors for nodes with no short cuts. G = nx.newman_watts_strogatz_graph(N,2,p) elif (network == 'Complete') : # Define a graph. In this case a complete graph. G = nx.complete_graph(N) # Remove self-loops. If self-loops exist, the variable 'already_infected' below can be True, and the code will work anyway. # Removing self-loops is not necessary for the empirical networks since the same network is used for every simulation. if (network != 'Facebook' and network != 'Twitter') : G.remove_edges_from(G.selfloop_edges()) # Make an array to keep track of whether nodes are Susceptible or Infectious. i is susceptible if S[i] = 0, and infectious if S[i] = 1 State = [0]*N # Define seed. seed = int(random.uniform(0,1)*N)#random.choice(list(G.nodes())) # Make seed infectious. State[seed] += 1 + 0 # Make array to keep track of parents of all nodes. This defines the epidemic tree Epidemic_tree = [-2.5]*N # The seed has no well-defined parent. We choose that Epidemic_tree[seed] = -1, which lets us easily recognize the seed when we encounter it in the function make_distribution() Epidemic_tree[seed] = -1 +0 # Make graph to keep track of possible edges over which the infection can spread next. Epidemic_graph = nx.Graph() # Add all nodes to the Epidemic graph for i in range (N) : Epidemic_graph.add_node(i) # Add all edges over which the infection can spread from the seed for edge in G.edges(seed) : Epidemic_graph.add_edge(*edge) # Define variable to keep track of the number of nodes currently infected in this realization. Only affects print statement below. infectious_nodes_counter = 1 # Now keep infecting nodes until there are no more edges over which the contagion can spread. while (len(Epidemic_graph.edges()) > 0 and infectious_nodes_counter < N_max) : # variable to catch instances of nodes being chosen to get infected if they are already infected already_infected = False # Choose a random edge over which the contagion spreads next next_spread = copy.copy(random.choice(list(Epidemic_graph.edges()))) # Check which way the contagion spreads: from next_spread[0] to next_spread[1] or the opposite way if (State[next_spread[0]] == 0 and State[next_spread[1]] == 1) : # In this case, next_spread[0] is the node being infected newly_infected = next_spread[0] +0 # Update its state (record that it is now infectious) State[newly_infected] += 1 +0 # record that its parent is next_spread[1] Epidemic_tree[next_spread[0]] = next_spread[1] +0 # record that another node has been infected. infectious_nodes_counter +=1 elif (State[next_spread[0]] == 1 and State[next_spread[1]] == 0) : # In this case, next_spread[1] is the node being infected newly_infected = next_spread[1] +0 # Update its state (record that it is now infectious) State[newly_infected] += 1 +0 # record that its parent is next_spread[0] Epidemic_tree[next_spread[1]] = next_spread[0] +0 # record that another node has been infected. infectious_nodes_counter += 1 else : # If the pair does not consist of 1 susceptible and 1 infected node, remove edge from epidemic graph and do not update epidemic graph below. This happens if the graph contains self loops. already_infected = True # Remove edge from epidemic graph. Epidemic_graph.remove_edge(*next_spread) # If 1 node was infected, and 1 was susceptible when choosing the edge above. if (already_infected == False ) : # Check all edges connecting the infected node to other nodes for edge in (G.edges(newly_infected)) : # If the edge is in the epidemic graph, the neighbour is infected so remove from possible spreading if (edge in Epidemic_graph.edges()) : Epidemic_graph.remove_edge(*edge) # Otherwise the neighbour is susceptible so add to possible spreading else : Epidemic_graph.add_edge(*edge) # Print the experiment number and the number of nodes that are infected so far in the realization. Do this every time another 100 nodes have been infected if (infectious_nodes_counter/100. == infectious_nodes_counter/100) : print exp,infectious_nodes_counter # Count the number of descendants each node has Descendants = make_distribution(Epidemic_tree) # Record the number of descendants of each node. If a node has d descendants, add 1/float(Nexp) to entry d in Results_D for i in range (N) : Result_D[int(Descendants[i] + 0.001)] += 1./float(Nexp) # Finally, save the obtained distribution f = open('Distribution_%s_var%s_N%s_Nexp%s.txt'%(network,name_var,N,Nexp),'w') f.write('Number of nodes having i descendents (i being rows below, starting from 0)') for i in range (len(Result_D)) : f.write('\n%.9f'%(float(Result_D[i])/float(N))) f.close()