Open in CoCalc

Transmission Process

2015 Raazesh Sainudiin

This is code accompanying the technical report 'The Transmission Process' by Raazesh Sainudiin and David Welch. The code is for clarity and simplicity of implementation (it is not efficient for large scale simulations).

def CountsDict(X): '''convert a list X into a Dictionary of counts or frequencies''' CD = {} for x in X: CD[x] = (CD[x] + 1) if (x in CD) else 1 return CD def markAsInfected(C,v,m): '''mark node v as infected with marker m on each of the incoming edges of v in SICN C''' for e in C.incoming_edge_iterator([v]): C.set_edge_label(e[0],e[1],m) def susceptibleOutEdges(C,vs): '''return the the susceptible outedges of node v in vs in SICN C''' SOE = [e for e in C.outgoing_edge_iterator(vs) if e[2]==None] return SOE def growTransmissionTree(Ttree, pDict, z, infector, infectee): '''grow the transmission tree Ttree and update pathsDict pDict by adding the z-th infection event with infector -> infectee ''' LBT = LabelledBinaryTree newSubTree = LBT([LBT([None,None], label=infector), LBT([None, None], label=infectee)], label=z).clone() path2Infector = pDict[infector] if z==1: Ttree = newSubTree else: Ttree[tuple(path2Infector)] =newSubTree #print ascii_art(Ttree) pDict[infector]=path2Infector+[0] pDict[infectee]=path2Infector+[1] pDict[z]=path2Infector return Ttree def forgetLeafLabels(T): '''return the transmission tree T with all leaf labels set to 0''' leafLabelSet=set(T.leaf_labels()) leafUnlabelledT=T.map_labels(lambda z:0 if (z in leafLabelSet) else z) return leafUnlabelledT def forgetAllLabels(T): '''return the transmission tree T with all node labels removed''' return T.shape() def justTree(T): '''return the transmission tree T as nonplanar unranked unlabelled tree''' return Graph(T.shape().to_undirected_graph(),immutable=True) def transmissionProcessTC(C,initialI): '''return transmission tree outcome of the DTDS transmission MC on SICN C with initial infection at node initialI''' #initialisation of SICN z=0 # infection event count markAsInfected(C,initialI,'infected') infectedIs = [initialI] popSize=C.order() # initialisation of Transmission Tree pathsDict={} # dictionary of nodes -> paths from root in tree LBT = LabelledBinaryTree # individuals in tree are labelled by "i"+str(integer_label) T = LBT([None,None],label="i"+str(initialI)).clone() pathsDict["i"+str(initialI)]=[] while (len(infectedIs) < popSize): z=z+1 # increment infection event count currentSOE = susceptibleOutEdges(C,infectedIs) numberInfected=len(currentSOE) nextEdge = currentSOE[randrange(0,numberInfected)] C.set_edge_label(nextEdge[0],nextEdge[1],z) infectedIs.append(nextEdge[1]) markAsInfected(C,nextEdge[1],'inf') T=growTransmissionTree(T, pathsDict, z, "i"+str(nextEdge[0]),"i"+str(nextEdge[1])) #print "step z = ",z; print ascii_art(T); print "--------------------" return T.as_ordered_tree(with_leaves=False)

Three example host contact networks

SI Contact Network is the Complete Graph

Let the SICN be the complete graph. We get the distribution of transmission trees at various resolutions next.

graphs.CompleteGraph(4).to_directed().show()
# demo transmissionProcessTC(graphs.CompleteGraph(4).to_directed(),0)
step z = 1 1_ / \ i0 i3 -------------------- step z = 2 __1__ / \ 2_ i3 / \ i0 i2 -------------------- step z = 3 ____1____ / \ _2__ i3 / \ i0 3_ / \ i2 i1 -------------------- 1[2[i0[], 3[i2[], i1[]]], i3[]]
ts=[transmissionProcessTC(graphs.CompleteGraph(4).to_directed(),0) for _ in range(100000)] d=CountsDict(ts) print len(d) d
36 {1[3[i0[], i2[]], 2[i1[], i3[]]]: 2824, 1[i0[], 2[i1[], 3[i3[], i2[]]]]: 2752, 1[i0[], 2[3[i1[], i3[]], i2[]]]: 2799, 1[2[i0[], i2[]], 3[i1[], i3[]]]: 2793, 1[2[i0[], 3[i1[], i2[]]], i3[]]: 2650, 1[2[i0[], 3[i1[], i3[]]], i2[]]: 2717, 1[3[i0[], i3[]], 2[i2[], i1[]]]: 2726, 1[i0[], 2[3[i3[], i1[]], i2[]]]: 2780, 1[2[i0[], i3[]], 3[i2[], i1[]]]: 2771, 1[i0[], 2[3[i3[], i2[]], i1[]]]: 2762, 1[2[3[i0[], i1[]], i3[]], i2[]]: 2770, 1[2[i0[], 3[i2[], i3[]]], i1[]]: 2751, 1[i0[], 2[i2[], 3[i1[], i3[]]]]: 2850, 1[i0[], 2[i3[], 3[i1[], i2[]]]]: 2719, 1[2[i0[], 3[i2[], i1[]]], i3[]]: 2835, 1[3[i0[], i2[]], 2[i3[], i1[]]]: 2793, 1[3[i0[], i1[]], 2[i3[], i2[]]]: 2823, 1[2[i0[], i1[]], 3[i2[], i3[]]]: 2815, 1[2[i0[], i1[]], 3[i3[], i2[]]]: 2798, 1[i0[], 2[3[i1[], i2[]], i3[]]]: 2782, 1[i0[], 2[i3[], 3[i2[], i1[]]]]: 2741, 1[3[i0[], i1[]], 2[i2[], i3[]]]: 2754, 1[2[i0[], 3[i3[], i1[]]], i2[]]: 2824, 1[2[i0[], i2[]], 3[i3[], i1[]]]: 2792, 1[i0[], 2[i2[], 3[i3[], i1[]]]]: 2865, 1[2[3[i0[], i1[]], i2[]], i3[]]: 2913, 1[3[i0[], i3[]], 2[i1[], i2[]]]: 2849, 1[2[i0[], 3[i3[], i2[]]], i1[]]: 2661, 1[i0[], 2[i1[], 3[i2[], i3[]]]]: 2792, 1[2[3[i0[], i2[]], i1[]], i3[]]: 2740, 1[i0[], 2[3[i2[], i3[]], i1[]]]: 2780, 1[2[i0[], i3[]], 3[i1[], i2[]]]: 2689, 1[2[3[i0[], i3[]], i2[]], i1[]]: 2764, 1[2[3[i0[], i2[]], i3[]], i1[]]: 2769, 1[i0[], 2[3[i2[], i1[]], i3[]]]: 2731, 1[2[3[i0[], i3[]], i1[]], i2[]]: 2826}
d=CountsDict([forgetLeafLabels(t) for t in ts]) d
{1[0[], 2[3[0[], 0[]], 0[]]]: 16634, 1[0[], 2[0[], 3[0[], 0[]]]]: 16719, 1[2[3[0[], 0[]], 0[]], 0[]]: 16782, 1[3[0[], 0[]], 2[0[], 0[]]]: 16769, 1[2[0[], 0[]], 3[0[], 0[]]]: 16658, 1[2[0[], 3[0[], 0[]]], 0[]]: 16438}
d=CountsDict([forgetAllLabels(t) for t in ts]) d
{[[[], []], [[], []]]: 33427, [[[[], []], []], []]: 16782, [[], [[[], []], []]]: 16634, [[], [[], [[], []]]]: 16719, [[[], [[], []]], []]: 16438}
d=CountsDict([justTree(t) for t in ts]) d
{Graph on 7 vertices: 66573, Graph on 7 vertices: 33427}
graphs.CompleteGraph(5).to_directed().show()
ts=[transmissionProcessTC(graphs.CompleteGraph(5).to_directed(),0) for _ in range(100000)] d=CountsDict([forgetLeafLabels(t) for t in ts]) print len(d) d
24 {1[3[0[], 0[]], 2[4[0[], 0[]], 0[]]]: 4283, 1[0[], 2[3[0[], 0[]], 4[0[], 0[]]]]: 4119, 1[2[4[0[], 0[]], 0[]], 3[0[], 0[]]]: 4232, 1[3[0[], 4[0[], 0[]]], 2[0[], 0[]]]: 4124, 1[0[], 2[0[], 3[4[0[], 0[]], 0[]]]]: 4110, 1[3[4[0[], 0[]], 0[]], 2[0[], 0[]]]: 4164, 1[2[0[], 4[0[], 0[]]], 3[0[], 0[]]]: 4244, 1[4[0[], 0[]], 2[3[0[], 0[]], 0[]]]: 4195, 1[2[0[], 3[0[], 4[0[], 0[]]]], 0[]]: 4158, 1[2[0[], 0[]], 3[4[0[], 0[]], 0[]]]: 3997, 1[2[3[0[], 0[]], 4[0[], 0[]]], 0[]]: 4094, 1[2[4[0[], 0[]], 3[0[], 0[]]], 0[]]: 4200, 1[0[], 2[4[0[], 0[]], 3[0[], 0[]]]]: 4186, 1[2[0[], 3[0[], 0[]]], 4[0[], 0[]]]: 4170, 1[2[3[0[], 4[0[], 0[]]], 0[]], 0[]]: 4205, 1[0[], 2[3[0[], 4[0[], 0[]]], 0[]]]: 4165, 1[0[], 2[3[4[0[], 0[]], 0[]], 0[]]]: 4109, 1[2[3[0[], 0[]], 0[]], 4[0[], 0[]]]: 4188, 1[0[], 2[0[], 3[0[], 4[0[], 0[]]]]]: 4179, 1[2[3[4[0[], 0[]], 0[]], 0[]], 0[]]: 4222, 1[4[0[], 0[]], 2[0[], 3[0[], 0[]]]]: 4081, 1[2[0[], 3[4[0[], 0[]], 0[]]], 0[]]: 4212, 1[2[0[], 0[]], 3[0[], 4[0[], 0[]]]]: 4186, 1[3[0[], 0[]], 2[0[], 4[0[], 0[]]]]: 4177}
d=CountsDict([forgetAllLabels(t) for t in ts]) print len(d) d
14 {[[[[], [[], []]], []], []]: 4205, [[], [[[], []], [[], []]]]: 8305, [[[], []], [[], [[], []]]]: 12444, [[[], [[[], []], []]], []]: 4212, [[[[], []], [[], []]], []]: 8294, [[[], [[], []]], [[], []]]: 12538, [[], [[[[], []], []], []]]: 4109, [[[[], []], []], [[], []]]: 12584, [[], [[], [[], [[], []]]]]: 4179, [[[], []], [[[], []], []]]: 12475, [[[[[], []], []], []], []]: 4222, [[[], [[], [[], []]]], []]: 4158, [[], [[[], [[], []]], []]]: 4165, [[], [[], [[[], []], []]]]: 4110}
d=CountsDict([justTree(t) for t in ts]) len(d) d
3 {Graph on 9 vertices: 33360, Graph on 9 vertices: 50041, Graph on 9 vertices: 16599}

SI Contact Network is the Path Graph

Let the SICN be the path graph. We get the distribution of transmission trees at various resolutions next.

digraphs.Path(4).show()
# demo transmissionProcessTC(digraphs.Path(4),0)
step z = 1 1_ / \ i0 i1 -------------------- step z = 2 _1__ / \ i0 2_ / \ i1 i2 -------------------- step z = 3 __1__ / \ i0 _2__ / \ i1 3_ / \ i2 i3 -------------------- 1[i0[], 2[i1[], 3[i2[], i3[]]]]
ts=[transmissionProcessTC(digraphs.Path(4),0) for _ in range(1000)] d=CountsDict(ts) print len(d) d
1 {1[i0[], 2[i1[], 3[i2[], i3[]]]]: 1000}
d=CountsDict([forgetAllLabels(t) for t in ts]) print len(d) d
1 {[[], [[], [[], []]]]: 1000}
d=CountsDict([justTree(t) for t in ts]) len(d) d
1 {Graph on 7 vertices: 1000}

SI Contact Network is the Star Network

Let the SICN be the star network. We get the distribution of transmission trees at various resolutions next.

g=graphs.StarGraph(3).to_directed() g.show()
# demo transmissionProcessTC(graphs.StarGraph(3).to_directed(),0)
step z = 1 1_ / \ i0 i3 -------------------- step z = 2 __1__ / \ 2_ i3 / \ i0 i2 -------------------- step z = 3 __1___ / \ __2__ i3 / \ 3_ i2 / \ i0 i1 -------------------- 1[2[3[i0[], i1[]], i2[]], i3[]]
ts=[transmissionProcessTC(graphs.StarGraph(3).to_directed(),0) for _ in range(10000)] d=CountsDict(ts) print len(d) d
6 {1[2[3[i0[], i2[]], i1[]], i3[]]: 1633, 1[2[3[i0[], i1[]], i2[]], i3[]]: 1645, 1[2[3[i0[], i3[]], i2[]], i1[]]: 1692, 1[2[3[i0[], i2[]], i3[]], i1[]]: 1682, 1[2[3[i0[], i1[]], i3[]], i2[]]: 1674, 1[2[3[i0[], i3[]], i1[]], i2[]]: 1674}
d=CountsDict([forgetLeafLabels(t) for t in ts]) print len(d) d
1 {1[2[3[0[], 0[]], 0[]], 0[]]: 10000}
d=CountsDict([forgetAllLabels(t) for t in ts]) print len(d) d
1 {[[[[], []], []], []]: 10000}
d=CountsDict([justTree(t) for t in ts]) len(d) d
1 {Graph on 7 vertices: 10000}

Likelihood Functions & Sufficient Statistics

based on tree topology without labels or branch-lengths

Note one can include branch-lengths into the likelihood easily via the exponential rates for the continuous time Markov processes that the discrete jump chains studied here. This will be necessary to conduct statistical inference for applied epidemiological models.

def splitsSequence(T): '''return a list of tuples (left,right) split sizes at each split node''' l = [] LabelledBinaryTree(T).post_order_traversal(lambda node: l.append((node[0].node_number(),node[1].node_number()))) return l def prob_RPT(T,a,b): '''probability of ranked planar tree T under beta-splitting model a,b>-1, where (a+1,b+1) are the parameters of the beta distribution''' return prod(map(lambda x: beta(x[0]+a+1,x[1]+b+1)/beta(a+1,b+1), splitsSequence(T))) def negLogLkl_SplitPairCounts(spc,a,b): '''-log likelihood of multiple independent ranked planar trees through their sufficient statistics of the frequence of split-pair counts spc= [(nL_i,nR_i,c_i): i=1,..,K] under beta-splitting model a,b>-1, where (a+1,b+1) are the parameters of the beta distribution -- This implements first Equation in Thm 1 involving beta functions''' return -RR(sum(map(lambda x: x[2]*log(1.0*beta(x[0]+a+1,x[1]+b+1)/beta(a+1,b+1)), spc))) def splitPairsCounts(TS): '''list of the frequency of all distinct split-pairs, i.e. (# of left splits, # right splits) beloe each internal node in each transmission tree in the list TS of transmission trees''' splitPairCounts=sorted(CountsDict(flatten([splitsSequence(t) for t in TS],max_level=1)).items()) return [(x[0][0],x[0][1],x[1]) for x in splitPairCounts] def splitPairsCountsDict(TS): '''dictionary of the frequency of all distinct split-pairs, i.e. (# of left splits, # right splits) beloe each internal node in each transmission tree in the list TS of transmission trees''' #splitPairCounts=sorted(CountsDict(flatten([splitsSequence(t) for t in TS],max_level=1))) sD = CountsDict(flatten([splitsSequence(t) for t in TS],max_level=1)) return sD def logLklOfASplitPair(a,b,nL,nR): '''beta(nL+a+1,nR+b+1)/beta(a+1,b+1) without beta functions via Eqn 2 in Thm 1''' A1=sum([log((b+j)/(b+j+a)) for j in range(nR+1)]) A2=sum([log((a+i)/(a+i+b+nR+1)) for i in range(nL+1)]) A3=log(b*a/((a+b)*(a+b+1))) return A1+A2-A3 def negLogLkl_SplitPairCounts2(spc,a,b): '''-log likelihood of multiple independent ranked planar trees through their sufficient statistics of the frequency of split-pair counts spc= [(nL_i,nR_i,c_i): i=1,..,K] under beta-splitting model a,b>-1, where (a+1,b+1) are the parameters of the beta distribution -- This implements second Equation in Thm 1 without beta functions''' return -(sum(map(lambda x: x[2]*logLklOfASplitPair(a,b,x[0],x[1]), spc))) def LklOfASplitPair(a,b,nL,nR): '''beta(nL+a+1,nR+b+1)/beta(a+1,b+1) without beta functions via Eqn 2 in Thm 1''' A1=prod([((b+j)/(b+j+a)) for j in range(nR+1)]) A2=prod([((a+i)/(a+i+b+nR+1)) for i in range(nL+1)]) A3=(b*a/((a+b)*(a+b+1))) return (A1*A2)/A3 def negLogLkl_SplitPairCounts2Prod(spc,a,b): '''- log likelihood of multiple independent ranked planar trees through their sufficient statistics of the frequency of split-pair counts spc= [(nL_i,nR_i,c_i): i=1,..,K] under beta-splitting model a,b>-1, where (a+1,b+1) are the parameters of the beta distribution -- This implements second Equation in Thm 1 without beta functions''' return -(sum(map(lambda x: x[2]*log(LklOfASplitPair(a,b,x[0],x[1])), spc)))

Demo of the mle for a complete graph with 50 nodes and 10 sampled trees

%time # demo of the mle for a complete graph with 50 nodes and 10 sampled trees c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 # simulation settings n=50 reps=10 # trial 1: make a list of 10 independent transmission trees on complete graph with 50 nodes ts1=[transmissionProcessTC(graphs.CompleteGraph(n).to_directed(),0) for _ in range(reps)] spc1=splitPairsCounts(ts1) # trial 2: make another list of 10 independent transmission trees on complete graph with 50 nodes ts2=[transmissionProcessTC(graphs.CompleteGraph(n).to_directed(),0) for _ in range(reps)] spc2=splitPairsCounts(ts2)
CPU time: 1.94 s, Wall time: 3.89 s
spc1=splitPairsCounts(ts1)
# slower more explicit method of finding MLE # (don't use this for larger simuations!) def negLkl1(AB): # negative log likelihood function for trees from trial 1 return sum([-log(1.0*prob_RPT(ts1[j],AB[0],AB[1])) for j in range(reps)]) def negLkl2(AB): # negative log likelihood function for trees from trial 1 return sum([-log(1.0*prob_RPT(ts2[j],AB[0],AB[1])) for j in range(reps)]) %time mle=minimize_constrained(negLkl1,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 1 print [n,reps,mle] %time mle=minimize_constrained(negLkl2,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 2 print [n,reps,mle]
CPU time: 49.64 s, Wall time: 100.20 s [50, 10, (-0.06665985295552748, -0.05038096402831602)] CPU time: 41.69 s, Wall time: 84.02 s [50, 10, (0.004389330784528777, -0.04321792852343554)]
# faster MLE computation using sufficient statistics of split-pair frequencies # (don't use this for larger simuations!) def negLkl1(AB): return negLogLkl_SplitPairCounts(spc1,AB[0],AB[1]) def negLkl2(AB): return negLogLkl_SplitPairCounts(spc2,AB[0],AB[1]) %time mle=minimize_constrained(negLkl1,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 1 print [n,reps,mle] %time mle=minimize_constrained(negLkl2,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 2 print [n,reps,mle]
CPU time: 9.43 s, Wall time: 19.07 s [50, 10, (-0.06665985290623813, -0.05038096406044273)] CPU time: 7.18 s, Wall time: 14.59 s [50, 10, (0.004740228015888741, -0.04293877679246926)]
# fastest and numerically most robust MLE computation using sufficient statistics # USE this for larger simulations def negLkl1(AB): return negLogLkl_SplitPairCounts2(spc1,AB[0],AB[1]) def negLkl2(AB): return negLogLkl_SplitPairCounts2(spc2,AB[0],AB[1]) %time mle=minimize_constrained(negLkl1,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 1 print [n,reps,mle] %time mle=minimize_constrained(negLkl2,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 2 print [n,reps,mle]
CPU time: 4.73 s, Wall time: 9.52 s [50, 10, (-0.06644703305884028, -0.05022885910313697)] CPU time: 9.34 s, Wall time: 18.87 s [50, 10, (0.004665943054046424, -0.043007797370934346)]

Sufficient Statistics of the parameters specifying the distribution for Beta-splitting Transmission Tree topologies

len(ts1),len(ts2) # number of trees in each trial
(10, 10)
def denMatOfsplitPairsCounts(ts,n,EMF): '''dense matrix of split pair counts of transmission trees in the list ts normalise to sum to 1 if EMF = Empirical Mass Function = True ''' spcD=splitPairsCountsDict(ts); spcD[(0,n)]=0; spcD[(n,0)]=0; ss=matrix(spcD).dense_matrix() if EMF: ss=ss/sum(sum(ss)) return ss

Let's save these plots

mp1=matrix_plot(np.log1p(denMatOfsplitPairsCounts(ts1,n,0)) , cmap='summer', colorbar=True) mp2=matrix_plot(np.log1p(denMatOfsplitPairsCounts(ts2,n,0)) , cmap='summer', colorbar=True)
mp2.show()
%sh pwd
/projects/58dfa924-55ae-4b6c-9fd4-1cd0ef49eb7c
%sh #mkdir /projects/58dfa924-55ae-4b6c-9fd4-1cd0ef49eb7c/figures && ls ls
2015-10-25-165503.sagews 2016-05-30-transmissionProcess.sagews figures
mp1.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp1.png') mp1.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp1.eps') mp1.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp1.pdf') mp2.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp2.png') mp2.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp2.eps') mp2.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp2.pdf')

Let's do a more exhaustive simulation next

%time # demo of the mle for a complete graph with 50 nodes and 10 sampled trees c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 # simulation settings n=50 reps=1000 # trial 1: make a list of 10 independent transmission trees on complete graph with 50 nodes ts3=[transmissionProcessTC(graphs.CompleteGraph(n).to_directed(),0) for _ in range(reps)] spc3=splitPairsCounts(ts3)
CPU time: 74.06 s, Wall time: 150.04 s
spc3=splitPairsCounts(ts3)
def negLkl3(AB): return negLogLkl_SplitPairCounts2(spc3,AB[0],AB[1]) %time mle=minimize_constrained(negLkl3,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 1 print [n,reps,mle]
CPU time: 76.10 s, Wall time: 153.64 s [50, 1000, (0.027896177617489963, 0.032449437588353475)]

Let's plot the sufficient stats for this longer run.

#mp3=matrix_plot(np.log1p(denMatOfsplitPairsCounts(ts3,n,1)*np.double(1000)), norm='value', cmap='summer') # a log(1+prob*1000) plot of the mp3=matrix_plot(np.log1p(denMatOfsplitPairsCounts(ts3,n,0)*np.double(1)) , cmap='summer', colorbar=True)#, norm='value', cmap='summer', colorbar=True) # a log(1+freq) plot of the
mp3.show()
mp3.save('./figures/SuffStatsCompleteGraph_n50_reps1000_mp3.png') mp3.save('./figures/SuffStatsCompleteGraph_n50_reps1000_mp3.eps') mp3.save('./figures/SuffStatsCompleteGraph_n50_reps1000_mp3.pdf')
%sh ls figures/
SuffStatsCompleteGraph_n50_reps10_mp1.eps SuffStatsCompleteGraph_n50_reps10_mp1.pdf SuffStatsCompleteGraph_n50_reps10_mp1.png SuffStatsCompleteGraph_n50_reps10_mp2.eps SuffStatsCompleteGraph_n50_reps10_mp2.pdf SuffStatsCompleteGraph_n50_reps10_mp2.png

Some Deterministic Contact Networks

Bidirectional Circular Network

g=digraphs.Circulant(5,[1,-1]) g.show()

To Make Latex tikz-graph see http://doc.sagemath.org/html/en/reference/graphs/sage/graphs/graph_latex.html.

H = digraphs.Circulant(6,[1,-1]) H.set_latex_options( graphic_size=(5,5), vertex_size=0.2, edge_thickness=0.04, edge_color='green', vertex_color='green', vertex_label_color='red', format='tkz_graph', #tkz_style='Art', #layout='acyclic' ) latex(H)
\begin{tikzpicture} % \useasboundingbox (0,0) rectangle (5.0cm,5.0cm); % \definecolor{cv0}{rgb}{0.0,0.502,0.0} \definecolor{cfv0}{rgb}{1.0,1.0,1.0} \definecolor{clv0}{rgb}{1.0,0.0,0.0} \definecolor{cv1}{rgb}{0.0,0.502,0.0} \definecolor{cfv1}{rgb}{1.0,1.0,1.0} \definecolor{clv1}{rgb}{1.0,0.0,0.0} \definecolor{cv2}{rgb}{0.0,0.502,0.0} \definecolor{cfv2}{rgb}{1.0,1.0,1.0} \definecolor{clv2}{rgb}{1.0,0.0,0.0} \definecolor{cv3}{rgb}{0.0,0.502,0.0} \definecolor{cfv3}{rgb}{1.0,1.0,1.0} \definecolor{clv3}{rgb}{1.0,0.0,0.0} \definecolor{cv4}{rgb}{0.0,0.502,0.0} \definecolor{cfv4}{rgb}{1.0,1.0,1.0} \definecolor{clv4}{rgb}{1.0,0.0,0.0} \definecolor{cv5}{rgb}{0.0,0.502,0.0} \definecolor{cfv5}{rgb}{1.0,1.0,1.0} \definecolor{clv5}{rgb}{1.0,0.0,0.0} \definecolor{cv0v1}{rgb}{0.0,0.502,0.0} \definecolor{cv0v5}{rgb}{0.0,0.502,0.0} \definecolor{cv1v0}{rgb}{0.0,0.502,0.0} \definecolor{cv1v2}{rgb}{0.0,0.502,0.0} \definecolor{cv2v1}{rgb}{0.0,0.502,0.0} \definecolor{cv2v3}{rgb}{0.0,0.502,0.0} \definecolor{cv3v2}{rgb}{0.0,0.502,0.0} \definecolor{cv3v4}{rgb}{0.0,0.502,0.0} \definecolor{cv4v3}{rgb}{0.0,0.502,0.0} \definecolor{cv4v5}{rgb}{0.0,0.502,0.0} \definecolor{cv5v0}{rgb}{0.0,0.502,0.0} \definecolor{cv5v4}{rgb}{0.0,0.502,0.0} % \Vertex[style={minimum size=0.2cm,draw=cv0,fill=cfv0,text=clv0,shape=circle},LabelOut=false,L=\hbox{$0$},x=5.0cm,y=2.5cm]{v0} \Vertex[style={minimum size=0.2cm,draw=cv1,fill=cfv1,text=clv1,shape=circle},LabelOut=false,L=\hbox{$1$},x=3.75cm,y=5.0cm]{v1} \Vertex[style={minimum size=0.2cm,draw=cv2,fill=cfv2,text=clv2,shape=circle},LabelOut=false,L=\hbox{$2$},x=1.25cm,y=5.0cm]{v2} \Vertex[style={minimum size=0.2cm,draw=cv3,fill=cfv3,text=clv3,shape=circle},LabelOut=false,L=\hbox{$3$},x=0.0cm,y=2.5cm]{v3} \Vertex[style={minimum size=0.2cm,draw=cv4,fill=cfv4,text=clv4,shape=circle},LabelOut=false,L=\hbox{$4$},x=1.25cm,y=0.0cm]{v4} \Vertex[style={minimum size=0.2cm,draw=cv5,fill=cfv5,text=clv5,shape=circle},LabelOut=false,L=\hbox{$5$},x=3.75cm,y=0.0cm]{v5} % \Edge[lw=0.04cm,style={post, bend right,color=cv0v1,},](v0)(v1) \Edge[lw=0.04cm,style={post, bend right,color=cv0v5,},](v0)(v5) \Edge[lw=0.04cm,style={post, bend right,color=cv1v0,},](v1)(v0) \Edge[lw=0.04cm,style={post, bend right,color=cv1v2,},](v1)(v2) \Edge[lw=0.04cm,style={post, bend right,color=cv2v1,},](v2)(v1) \Edge[lw=0.04cm,style={post, bend right,color=cv2v3,},](v2)(v3) \Edge[lw=0.04cm,style={post, bend right,color=cv3v2,},](v3)(v2) \Edge[lw=0.04cm,style={post, bend right,color=cv3v4,},](v3)(v4) \Edge[lw=0.04cm,style={post, bend right,color=cv4v3,},](v4)(v3) \Edge[lw=0.04cm,style={post, bend right,color=cv4v5,},](v4)(v5) \Edge[lw=0.04cm,style={post, bend right,color=cv5v0,},](v5)(v0) \Edge[lw=0.04cm,style={post, bend right,color=cv5v4,},](v5)(v4) % \end{tikzpicture}
ts=[transmissionProcessTC(digraphs.Circulant(6,[1,-1]),0) for _ in range(10)]
T = ts[3] view(T)
#latex(T)

Just checking that the trees are uniformly distributed over 2^{n-1} ORBOR trees := Only-Right-Branching-subtrees-Of-Root-node trees (for peace of mind :).

ts=[transmissionProcessTC(digraphs.Circulant(3,[1,-1]),0) for _ in range(10000)] d=CountsDict(ts) print len(d) d
4 {1[2[i0[], i2[]], i1[]]: 2598, 1[2[i0[], i1[]], i2[]]: 2465, 1[i0[], 2[i2[], i1[]]]: 2492, 1[i0[], 2[i1[], i2[]]]: 2445}
ts=[transmissionProcessTC(digraphs.Circulant(4,[1,-1]),0) for _ in range(10000)] d=CountsDict(ts) print len(d) d
8 {1[2[i0[], i1[]], 3[i3[], i2[]]]: 1264, 1[i0[], 2[i1[], 3[i2[], i3[]]]]: 1226, 1[i0[], 2[i3[], 3[i2[], i1[]]]]: 1255, 1[2[i0[], 3[i1[], i2[]]], i3[]]: 1240, 1[3[i0[], i3[]], 2[i1[], i2[]]]: 1264, 1[2[i0[], 3[i3[], i2[]]], i1[]]: 1240, 1[3[i0[], i1[]], 2[i3[], i2[]]]: 1324, 1[2[i0[], i3[]], 3[i1[], i2[]]]: 1187}
for t in d.keys(): ascii_art(t)
__1____ / / 2__ 3__ / / / / i0 i1 i3 i2 __1___ / / i0 _2___ / / i1 3__ / / i2 i3 __1___ / / i0 _2___ / / i3 3__ / / i2 i1 ___1____ / / _2___ i3 / / i0 3__ / / i1 i2 __1____ / / 3__ 2__ / / / / i0 i3 i1 i2 ___1____ / / _2___ i1 / / i0 3__ / / i3 i2 __1____ / / 3__ 2__ / / / / i0 i1 i3 i2 __1____ / / 2__ 3__ / / / / i0 i3 i1 i2

MLE of transmission trees from the bidirectional circular network.

%time # demo of the mle for a BalancedTree(2,k) graph with k=4,5,6 or n=31,63,127 nodes and 1 or 10 sampled trees c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 mles=[] n=50 reps=1 # all reps have the same unlabelled transmission tree topology trials=5 for i in range(trials): ts=[transmissionProcessTC(digraphs.Circulant(n,[1,-1]),0) for _ in range(reps)] spc=splitPairsCounts(ts) def negLkl(AB): return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0) x = [n,reps, i, mle, negLkl(mle)] print x mles.append(x) #mles
[50, 1, 0, (-0.9879913505605317, 1.4153032756705888), 40.135686299805656] [50, 1, 1, (-0.9874402261518601, 1.56295126382436), 40.71467705498834] [50, 1, 2, (-0.9878012505019622, 1.5625111252046917), 40.04358386390558] [50, 1, 3, (-0.9878790181095873, 1.4641878740256553), 40.60995308631155] [50, 1, 4, (-0.9874316013790505, 1.5708318403090265), 40.781443512465984] CPU time: 33.09 s, Wall time: 33.79 s
print (n, reps, trials, mean([x[3][0] for x in mles]), std([x[3][0] for x in mles]), mean([x[3][1] for x in mles]), std([x[3][1] for x in mles]))
(50, 1, 5, -0.9880406098481203, 0.0006220653042760461, 1.4583751679644803, 0.15345032109817106)
# (50, 1, 5, -0.9880406098481203, 0.0006220653042760461, 1.4583751679644803, 0.15345032109817106) # (50, 100, 5, -0.9878920259080713, 3.460969702959854e-05, 1.5188788157835482, 0.006721406185256709)

Let us repeat the same computation over the relative frequency of split-pairs as opposed to the frequency over a larger number of replicate transmission trees.

%time # demo of the mle for a BalancedTree(2,k) graph with k=4,5,6 or n=31,63,127 nodes and 1 or 10 sampled trees c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 mles=[] n=50 reps=100 # all reps have the same unlabelled transmission tree topology trials=5 for ti in range(trials): ts=[transmissionProcessTC(digraphs.Circulant(n,[1,-1]),0) for _ in range(reps)] spc=splitPairsCounts(ts) s=sum([x[2] for x in spc]) spcRf=[] for i in range(len(spc)): spcRf.append((spc[i][0],spc[i][1],spc[i][2]/s)) def negLkl(AB): return negLogLkl_SplitPairCounts2(spcRf,AB[0],AB[1]) mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0) x = [n,reps, i, mle, negLkl(mle)] print x mles.append(x)
[50, 100, 47, (-0.9879013644873202, 1.5266937292679985), 0.8208525765889505] [50, 100, 52, (-0.9878709556921021, 1.5201429877791235), 0.8183424648045935] [50, 100, 47, (-0.9880598397493002, 1.4514110195907763), 0.8222490702279951] [50, 100, 50, (-0.9879350966628759, 1.5017073723940886), 0.8186764955583364] [50, 100, 47, (-0.98763930925989, 1.517007946626109), 0.8225781541777649] CPU time: 60.00 s, Wall time: 60.83 s
print (n, reps, trials, mean([x[3][0] for x in mles]), std([x[3][0] for x in mles]), mean([x[3][1] for x in mles]), std([x[3][1] for x in mles]))
(50, 100, 5, -0.9878813131702978, 0.00015316627305464838, 1.5033926111316191, 0.030470561504457778)
a=-0.98; b=1.5; BetaD = RealDistribution('beta', [a+1, b+1],seed=0) show(BetaD.plot(xmin=0,xmax=1),figsize=[7,2]);
ascii_art(ts[4]) # labelled transmission tree
____1______ / / __2___ i1 / / i0 _3___ / / i4 4__ / / i3 i2

Balanced Tree Network

g=graphs.BalancedTree(3,2).to_directed() g.show()

To Make Latex tikz-graph see http://doc.sagemath.org/html/en/reference/graphs/sage/graphs/graph_latex.html.

H = graphs.BalancedTree(2,3).to_directed() #H = graphs.BalancedTree(3,2).to_directed() H.set_latex_options( graphic_size=(5,5), vertex_size=0.2, edge_thickness=0.04, edge_color='green', vertex_color='green', vertex_label_color='red', format='tkz_graph', #tkz_style='Art', layout='acyclic' )
#view(H) #latex(H)
#latex(H)

Just checking prob for left-branching d-shark transmission trees (ignoring all labels) under balanced tree network - for peace of mind :)

d,h=3,2 g=graphs.BalancedTree(d,h).to_directed() g.show()
d,h=3,3 ts=[transmissionProcessTC(graphs.BalancedTree(d,h).to_directed(),0) for _ in range(10000)] #myDict=CountsDict(ts) #myDict=CountsDict([forgetLeafLabels(t) for t in ts]) myDict=CountsDict([forgetAllLabels(t) for t in ts]) print len(myDict) #myDict
1

The unlabelled transmission tree is unique.

for t in myDict.keys(): # d,h=3,3 "left-branching 3-shark" tree print ascii_art(t)
_______________________________o________________________________ / / ________________________o__________________________ ________o__________ / / / / ____________o_____________ ________o__________ _______o________ _o__ / / / / / / / / o ________o__________ _______o________ _o__ ___o_____ _o__ o__ o / / / / / / / / / / / / _______o________ _o__ ___o_____ _o__ o__ o o _o__ o__ o o_ o / / / / / / / / / / / / / / / / ___o_____ _o__ o__ o o _o__ o__ o o_ o o__ o o_ o o o / / / / / / / / / / / / / / / / o _o__ o__ o o_ o o__ o o_ o o o o_ o o o / / / / / / / / / / / / o__ o o_ o o o o_ o o o o o / / / / / / o_ o o o o o / / o o
# so long so far... could pursue the rank placements on balanced trees
graphs.BalancedTree(2,6).to_directed().order()
127
%time # demo of the mle for a BalancedTree(2,k) graph with k=4,5,6 or n=31,63,127 nodes and 1 or 10 sampled trees c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 mles=[] d,h=2,6 n=graphs.BalancedTree(d,h).to_directed().order() reps=1 # all reps have the same unlabelled transmission tree topology trials=2 for i in range(trials): ts=[transmissionProcessTC(graphs.BalancedTree(d,h).to_directed(),0) for _ in range(reps)] spc=splitPairsCounts(ts) def negLkl(AB): return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0) x = [d,h, n,reps, i, mle] print x mles.append(x) #mles
[2, 6, 127, 1, 0, (-0.3259079515975824, -0.05752026912960705)] [2, 6, 127, 1, 1, (-0.3259079515975824, -0.05752026912960705)] CPU time: 2.50 s, Wall time: 2.86 s
#[2, 6, 127, 1, 0, (-0.3259079515975824, -0.05752026912960705)] #[2, 7, 255, 1, 0, (-0.3694074748854663, -0.10600727506430978)] #[2, 8, 511, 1, 0, (-0.39276938517987087, -0.1329531458925401)] #[2, 9, 1023, 1, 0, (-0.4051981012968259, -0.14770363723074506)] #[2, 10, 2047, 1, 0, (-0.41245772002324016, -0.15622461056846243)]
#ts=[transmissionProcessTC(graphs.BalancedTree(2,3).to_directed(),0) for _ in range(5)] ts=[transmissionProcessTC(graphs.BalancedTree(3,2).to_directed(),0) for _ in range(5)] ascii_art(ts[0]) # labelled transmission tree is always the "left-branching 3-shark" ignoring labels
_____________1______________ / / __________2___________ __3___ / / / / _____5______ __4____ _6___ i4 / / / / / / i0 __7___ __8___ i10 12_ i6 / / / / / / _9___ i7 10_ i11 i1 i5 / / / / 11_ i9 i3 i12 / / i2 i8
T = ts[4] #view(T) #latex(T)

Toroidal Grid Network

def toroidal2DGrid(n): G = graphs.GridGraph([n,n]) #G.show() # long time G.add_edges([((i,0),(i,n-1)) for i in range(n)]) G.add_edges([((0,i),(n-1,i)) for i in range(n)]) G.relabel() return G
ts=[transmissionProcessTC(toroidal2DGrid(10).to_directed(),0) for _ in range(10)] # gives several distinct unlabelled trees depending on the sequence of infection events d=CountsDict([forgetAllLabels(t) for t in ts]) len(d) d.values()
10 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
d=CountsDict([justTree(t) for t in ts]) # gives several distinct trees depending on the sequence of infection events len(d) d.values()
10 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
%time # demo of the mle - way too long... c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 # simulation settings m=10 n=m^2 reps=10 repeatMLE=5 for i in range(repeatMLE): ts=[transmissionProcessTC(toroidal2DGrid(m).to_directed(),0) for _ in range(reps)] spc=splitPairsCounts(ts) def negLkl(AB): return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0) print [i, n,reps,mle]
[0, 100, 10, (-0.7282546466954618, -0.25283759776817866)] [1, 100, 10, (-0.7292292718997252, -0.2201738282763696)] [2, 100, 10, (-0.7391465680059263, -0.34600059265553634)] [3, 100, 10, (-0.7646102471001511, -0.3748667219996737)] [4, 100, 10, (-0.7249471325940556, -0.20176053588330745)] CPU time: 130.43 s, Wall time: 132.76 s

Let us see graph in latex.

H = toroidal2DGrid(3).to_directed() H.set_latex_options( graphic_size=(5,5), vertex_size=0.2, edge_thickness=0.04, edge_color='green', vertex_color='green', vertex_label_color='red', format='tkz_graph', #tkz_style='Art', #layout='acyclic' )
#view(H) #latex(H)
ts=[transmissionProcessTC(toroidal2DGrid(3).to_directed(),0) for _ in range(10)]
#view(ts[0])
#latex(ts[0])
#view(ts[1])
#latex(ts[1])
#view(ts[3]) #latex(T)
#latex(ts[3])

3D Toroidal Grid

def toroidal3DGrid(n): G = graphs.GridGraph([n,n,n]) #G.show() # long time G.add_edges([((0,i,j),(n-1,i,j)) for i in range(n) for j in range(n)]) G.add_edges([((i,0,j),(i,n-1,j)) for i in range(n) for j in range(n)]) G.add_edges([((i,j,0),(i,j,n-1)) for i in range(n) for j in range(n)]) G.relabel() return G
%time # demo of the mle c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 m=10 n=m^3 reps=1 # all reps have the same unlabelled transmission tree topology for i in range(5): ts=[transmissionProcessTC(toroidal3DGrid(m).to_directed(),0) for _ in range(reps)] spc=splitPairsCounts(ts) def negLkl(AB): return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0) print [n,reps, i, mle]
[1000, 1, 0, (-0.6771966619586535, -0.3721134969526218)] [1000, 1, 1, (-0.6972972063571803, -0.38528953496008167)] [1000, 1, 2, (-0.7112738575413484, -0.34137924850162177)] [1000, 1, 3, (-0.6613671977776394, -0.33657729645960566)] [1000, 1, 4, (-0.695638773816162, -0.34113181129019937)] CPU time: 317.66 s, Wall time: 337.35 s

Some Random Contact Networks

Erdos-Renyi

def ErdosReyniConnectedCompOf0(n,p,reps): '''return reps many transmission trees from the connected component containing initial infection vertex 0''' ts=[] i=0; MAXTrials=10000; successfulTrials=0; while (successfulTrials<reps or i>MAXTrials): i=i+1 g0=graphs.RandomGNP(n,p).to_directed() g=g0.subgraph(g0.connected_component_containing_vertex(0)) if g.order()>1: # just making sure we have at least 2 nodes in the connected component containing 0 #print g.order(), g.size() ts.append(transmissionProcessTC(g,0)) successfulTrials=successfulTrials+1 return ts
%time # demo of the mle mleList=[] c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 n=100 #Lambdas=[floor(RR(n/2^i)) for i in range(ceil(log(n,2)))]; Lambdas.reverse(); #Lambdas Lambdas=sorted(Set([floor(RR(n/(5/4)^i)) for i in range(ceil(log(n,5/4)))])); for L in Lambdas: prob=RR(L/n) # edge prob in Erdos-Reyni random graph on n nodes = lambda/n where lambda = expected degree of a node reps=30 # all reps have the same unlabelled transmission tree topology for i in range(5): ts= ErdosReyniConnectedCompOf0(n,prob,reps) spc=splitPairsCounts(ts) def negLkl(AB): return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0) # mean number of connected components = pop size = # leaves in transmission tree meanNumCc=RR(mean([(t.node_number()+1)/2 for t in ts])) mleL=[n, prob, L, reps, meanNumCc, mle] print mleL mleList.append(mleL)
n=100.; print log(n)/n # edge probability threshold for getting a connected component
0.0460517018598809
n=100; print log(100.) # average node degree threshold for getting a connected component
4.60517018598809
%time # demo of the mle - way too long... c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 # simulation settings mles=[] n,prob,reps,repeatMLE=100,0.05,10,5 #50,5/49.,100,5 for i in range(repeatMLE): ts=ErdosReyniConnectedCompOf0(n,prob,reps) spc=splitPairsCounts(ts) def negLkl(AB): #return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) # our standard method - not ok for p>0.0 return negLogLkl_SplitPairCounts2Prod(spc,AB[0],AB[1]) # good rechecker mle=minimize_constrained(negLkl,[c_1,c_2],[.0,.0],disp=0) meanNumCc=RR(mean([(t.node_number()+1)/2 for t in ts])) x = [i, n, prob, reps, mle, negLkl(mle), meanNumCc] print x mles.append(x) y=mean([x[4][0] for x in mles]),std([x[4][0] for x in mles]), mean([x[4][1] for x in mles]),std([x[4][1] for x in mles]); [x.N(digits=4) for x in y]
[0, 100, 0.0500000000000000, 10, (-0.43650558632162567, -0.24249152980744434), 3535.9280535485523, 99.4000000000000] [1, 100, 0.0500000000000000, 10, (-0.4562341378877489, -0.19480411238385664), 3534.3677701917973, 99.6000000000000] [2, 100, 0.0500000000000000, 10, (-0.4420223893585999, -0.2586485772614803), 3545.7527804346046, 99.6000000000000] [3, 100, 0.0500000000000000, 10, (-0.509171740912101, -0.3622283066596848), 3550.5430207260933, 99.9000000000000] [3, 100, 0.0500000000000000, 10, (-0.509171740912101, -0.3622283066596848), 3550.5430207260933, 99.9000000000000] [-0.4427, 0.05002, -0.2309, 0.09689] CPU time: 35.98 s, Wall time: 36.32 s [-0.4427, 0.05002, -0.2309, 0.09689] CPU time: 35.98 s, Wall time: 36.32 s
# n,prob,reps,repeatMLE=50,0.04,30,5 #[0, 50, 2, 1.00000000000000, 30, (-0.7667591970714925, -0.5672234464838595), 2773.4718576096257, 37.5000000000000] #[1, 50, 2, 1.00000000000000, 30, (-0.7214867804712249, -0.5415563735009684), 2571.6445754280776, 34.4000000000000] #[2, 50, 2, 1.00000000000000, 30, (-0.7393765497380875, -0.525764191886687), 2701.583541657241, 36.4000000000000] #[3, 50, 2, 1.00000000000000, 30, (-0.7482689119705859, -0.5327256368435153), 2883.349138665519, 38.3333333333333] #[4, 50, 2, 1.00000000000000, 30, (-0.7660644698091582, -0.5746573248509714), 2740.2940119367827, 36.8333333333333] #[-0.7484, 0.01907, -0.5484, 0.02150] # n,prob,reps,repeatMLE=50,0.06,30,5 #[0, 50, 2, 1.00000000000000, 30, (-0.5951261508841413, -0.38549653008638246), 3783.127909979816, 45.6666666666667] #[1, 50, 2, 1.00000000000000, 30, (-0.6303056569753376, -0.43061971591340364), 3900.4206600762795, 47.2000000000000] #[2, 50, 2, 1.00000000000000, 30, (-0.6098958904200529, -0.396836976185501), 3909.283696080642, 47.1666666666667] #[3, 50, 2, 1.00000000000000, 30, (-0.5563622923900977, -0.3258917498969631), 3967.752465957892, 47.5000000000000] #[4, 50, 2, 1.00000000000000, 30, (-0.6139741520197212, -0.4041535631904723), 3891.4915292888013, 47.0333333333333] #[-0.6011, 0.02799, -0.3886, 0.03879] # n,prob,reps,repeatMLE=50,3/49.,30,5 [0, 50, 3, 1.00000000000000, 30, (-0.5983777634278724, -0.4291393006309947), 3976.460464836187, 47.6666666666667] [1, 50, 3, 1.00000000000000, 30, (-0.5707923610262577, -0.3961141455439096), 4005.615823705353, 47.8333333333333] [2, 50, 3, 1.00000000000000, 30, (-0.6183197359092514, -0.4261981038048322), 3949.032714636033, 47.5666666666667] [3, 50, 3, 1.00000000000000, 30, (-0.6048013904814242, -0.37676154716542365), 3773.912360800056, 45.6333333333333] [4, 50, 3, 1.00000000000000, 30, (-0.5593275350868996, -0.290756860450979), 3891.852472272532, 46.9000000000000] [-0.5903, 0.02450, -0.3838, 0.05637] # [-0.8544, 0.009650, -0.6907, 0.01825] # compare from SmallWorld(n=50,k=3,p=1) # n,prob,reps,repeatMLE=100,5/99.,30,5 - compare with SmallWorld(n=100,k=5,p=1) [-0.5079, 0.02486, -0.2263, 0.03785] # [-0.4106, 0.02340, -0.2228, 0.03605] # n,prob,reps,repeatMLE=50,5/49.,30,5 - compare with SmallWorld(n=50,k=5,p=1) [-0.4674, 0.01723, -0.1759, 0.02064] [-0.3682, 0.04354, -0.1984, 0.03857] # n,prob,reps,repeatMLE=50,5/49.=0.102040816326531,100,5 [-0.3798, 0.01348, -0.2068, 0.01602]

Random Regular Network

d,n=3,10 # n>d>2, and n*d is even H = graphs.RandomRegular(d,n).to_directed() H.set_latex_options( graphic_size=(5,5), vertex_size=0.2, edge_thickness=0.04, edge_color='green', vertex_color='green', vertex_label_color='red', format='tkz_graph', tkz_style='Art', #layout='acyclic' ) view(H)
d3-based renderer not yet implemented
#graphs.RandomRegular?
%time # demo of the mle - way too long... c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 # simulation settings mles=[] d,n=4,1000 # n>d>2, and n*d is even reps=1 repeatMLE=5 for i in range(repeatMLE): ts=[transmissionProcessTC(graphs.RandomRegular(d,n).to_directed(),0) for _ in range(reps)] spc=splitPairsCounts(ts) def negLkl(AB): return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0) x = [i, n,reps, mle] print x mles.append(x)
[0, 1000, 1, (-0.5694710622607956, 0.07235568882556642)] [1, 1000, 1, (-0.5813499039580335, -0.0008921069166473263)] [2, 1000, 1, (-0.5125503757396102, 0.12009937792771662)] [3, 1000, 1, (-0.5972839322945308, 0.005026801639200447)] [4, 1000, 1, (-0.516945909495096, 0.17203343447026423)] CPU time: 189.21 s, Wall time: 193.57 s
#mles
#mles=[[0, 1000, 1, (-0.5694710622607956, 0.07235568882556642)], [1, 1000, 1, (-0.5813499039580335, -0.0008921069166473263)], [2, 1000, 1, (-0.5125503757396102, 0.12009937792771662)], [3, 1000, 1, (-0.5972839322945308, 0.005026801639200447)], [4, 1000, 1, (-0.516945909495096, 0.17203343447026423)]] y=mean([x[3][0] for x in mles]),std([x[3][0] for x in mles]), mean([x[3][1] for x in mles]),std([x[3][1] for x in mles]); [x.N(digits=4) for x in y]
[-0.5555, 0.03854, 0.07372, 0.07434]
#

Small World Random Network

g=graphs.RandomNewmanWattsStrogatz(7, 2, 0.5) # no rewiring is done :( g.show()
import networkx # so we use the networkx implementation g = DiGraph(networkx.watts_strogatz_graph(7,2,0.2)) g.show()
networkx.watts_strogatz_graph?
File: /projects/sage/sage-6.10/local/lib/python2.7/site-packages/networkx-1.10-py2.7.egg/networkx/generators/random_graphs.py Signature : networkx.watts_strogatz_graph(n, k, p, seed=None)
%sh cat /projects/sage/sage-6.10/local/lib/python2.7/site-packages/networkx-1.10-py2.7.egg/networkx/generators/random_graphs.py #, the man dawg!
g.is_connected()
True
g.set_pos(g.layout_circular()) view(g)
d3-based renderer not yet implemented
def findMaxDegAndItsVertex(gr): '''find the maximum degree in a graph and its vertex with smalelst ID''' maxD=0; maxDv=0; for vd in gr.degree_iterator(range(n),True): #print vd if vd[1] > maxD: maxD=vd[1]; maxDv=vd[0]; return maxD,maxDv import networkx # so we use the networkx implementation def ConnectedSmallWorldFromMostpopular(n,k,p,reps): '''return reps many transmission trees from the connected small-world network initialized from the most popular smallest node''' ts=[] i=0; MAXTrials=10000; successfulTrials=0; while (successfulTrials<reps or i>MAXTrials): i=i+1 g0 = DiGraph(networkx.watts_strogatz_graph(n,k,p)) if g0.is_connected(): # just making sure we have connected network #print g0.order(), g0.size() maxDV=findMaxDegAndItsVertex(g0) # to initialize from the most popular smallest node ts.append(transmissionProcessTC(g0,maxDV[1])) successfulTrials=successfulTrials+1 return ts def ConnectedSmallWorldFromAnywhere(n,k,p,reps): '''return reps many transmission trees from the connected connected small-world network initialize from a random node''' ts=[] i=0; MAXTrials=10000; successfulTrials=0; while (successfulTrials<reps or i>MAXTrials): i=i+1 g0 = DiGraph(networkx.watts_strogatz_graph(n,k,p)) if g0.is_connected(): # just making sure we have connected network #print g0.order(), g0.size() # to initialize at a random node, say 0 - it's too noisy! ts.append(transmissionProcessTC(g0,0)) successfulTrials=successfulTrials+1 return ts
ts=ConnectedSmallWorldFromAnywhere(10,3,0.5,10)
view(ts[5])
view(ts[1])
def spc2spcRF(spc): '''to turn the split-pair counts into split-pair relative frequencies to interrogate local optimization madness... - should be really using rigorous interval global optimization here!''' s=sum([x[2] for x in spc]) spcRf=[] for i in range(len(spc)): spcRf.append((spc[i][0],spc[i][1],spc[i][2]/s)) return spcRf

demo of the mle - needs numerical TLC...

%time # demo of the mle - way too long... c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 # simulation settings mles=[] n,k,p,reps,repeatMLE=50,5,0.10,30,5 for i in range(repeatMLE): #ts=ConnectedSmallWorldFromMostpopular(n,k,p,reps) ts=ConnectedSmallWorldFromAnywhere(n,k,p,reps) #spc=spc2spcRF(splitPairsCounts(ts)) # only needed if freqs need normalization for numerics... spc=splitPairsCounts(ts) def negLkl(AB): # our standard method - not ok for n > 50 due to SICN-circularity's implications for number screen wrt log... #return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) #our standard method ok for n>=50 return negLogLkl_SplitPairCounts2Prod(spc,AB[0],AB[1]) # ok for n=50 but not n=100 as it goes to boundary... mle=minimize_constrained(negLkl,[c_1,c_2],[.0,.0],disp=0) x = [i, n, k, p, reps, mle, negLkl(mle)] print x mles.append(x) y=mean([x[5][0] for x in mles]),std([x[5][0] for x in mles]), mean([x[5][1] for x in mles]),std([x[5][1] for x in mles]); [x.N(digits=4) for x in y]
[0, 50, 5, 0.100000000000000, 30, (-0.7901047708585681, -0.5123970145900477), 3959.400691176991] [1, 50, 5, 0.100000000000000, 30, (-0.792985103738636, -0.5320012580821087), 3969.5922900547057] [2, 50, 5, 0.100000000000000, 30, (-0.7660557278845852, -0.4567793609019606), 3997.3087416263606] [3, 50, 5, 0.100000000000000, 30, (-0.8042278913396516, -0.5413159277438977), 3934.9083824469567] [4, 50, 5, 0.100000000000000, 30, (-0.8058510174706832, -0.5226030064194106), 3919.10213741337] [-0.7918, 0.01596, -0.5130, 0.03323] CPU time: 94.41 s, Wall time: 100.74 s
print 1,\ 2
1 2
# n,k,p,reps,repeatMLE=50,2,0.10,1,5 #-0.9880, 0.0001708, 1.463, 0.06736 # for ConnectedSmallWorldFromMostpopular #-0.9613, 0.01639, 0.2630, 1.103 # ConnectedSmallWorldFromAnywhere # n,k,p,reps,repeatMLE=50,2,0.10,30,5 #-0.9652, 0.002863, -0.3828, 0.1171 # ConnectedSmallWorldFromAnywhere - more std error - matters where you start! #-0.9618, 0.003047, -0.4147, 0.03203 # ConnectedSmallWorldFromMostpopular # n,k,p,reps,repeatMLE=50,2,0.20,30,5 #[-0.9395, 0.003329, -0.5707, 0.02880] # ConnectedSmallWorldFromAnywhere # n,k,p,reps,repeatMLE=50,2,0.50,30,5 #[-0.8807, 0.003303, -0.6570, 0.01039] # ConnectedSmallWorldFromAnywhere # n,k,p,reps,repeatMLE=50,2,1.0,30,5 #[-0.8568, 0.01075, -0.6965, 0.01943] # ConnectedSmallWorldFromAnywhere # n,k,p,reps,repeatMLE=50,3,1.0,30,5 - compare with ER(50,3/49) model #[0, 50, 3, 1.00000000000000, 30, (-0.8635534058029828, -0.6890444879379554), 3723.1785805652808] #[1, 50, 3, 1.00000000000000, 30, (-0.8408527225144719, -0.6604981618863403), 3830.8419905583555] #[2, 50, 3, 1.00000000000000, 30, (-0.8637882040562047, -0.7061756425142889), 3726.06304636956] #[3, 50, 3, 1.00000000000000, 30, (-0.8536921622583518, -0.6940200264032614), 3795.859886477451] #[4, 50, 3, 1.00000000000000, 30, (-0.8503343394196402, -0.7036196315204895), 3795.7697716850957] #[-0.8544, 0.009650, -0.6907, 0.01825] # n,k,p,reps,repeatMLE=50,5,1.0,30,5 #[0, 50, 5, 1.00000000000000, 30, (-0.46872394191150984, -0.1706522325446755), 4272.575792123848] #[1, 50, 5, 1.00000000000000, 30, (-0.4624673070275521, -0.15427392762755668), 4272.324189044862] #[2, 50, 5, 1.00000000000000, 30, (-0.46951488058811636, -0.20172631679701244), 4276.462579144995] #[3, 50, 5, 1.00000000000000, 30, (-0.492187467507606, -0.1929439971887712), 4263.705922020358] #[4, 50, 5, 1.00000000000000, 30, (-0.4441155982889501, -0.16004700793846094), 4280.36354017808] #[-0.4674, 0.01723, -0.1759, 0.02064] # n,k,p,reps,repeatMLE=100,5,1.0,30,5 #[0, 100, 5, 1.00000000000000, 30, (-0.4707601633123625, -0.17234296564126989), 10635.95799150573] #[1, 100, 5, 1.00000000000000, 30, (-0.5363716388086307, -0.2676407471091624), 10602.98862147951] #[2, 100, 5, 1.00000000000000, 30, (-0.5047203692358055, -0.2089861380097876), 10615.295023641434] #[3, 100, 5, 1.00000000000000, 30, (-0.5037854832989932, -0.25475858261388856), 10635.368084525608] #[4, 100, 5, 1.00000000000000, 30, (-0.5238938785617278, -0.22761135503827465), 10597.336990398386] #[-0.5079, 0.02486, -0.2263, 0.03785] # n,k,p,reps,repeatMLE=50,5,1.0,100,5 # compare to ER [-0.3798, 0.01348, -0.2068, 0.01602] - but n too small for this #[-0.4667, 0.01843, -0.1787, 0.03319] # n,k,p,reps,repeatMLE=50,5,0.10,30,5 # [-0.7918, 0.01596, -0.5130, 0.03323] ConnectedSmallWorldFromAnywhere #-------------------------------- # ConnectedSmallWorldFromMostpopular---------------- # n,k,p,reps,repeatMLE=50,2,0.0,1,5 #[-0.9878, 0.0003034, 1.492, 0.08432] # - too much std error #[-0.9562, 0.01362, 0.07724, 0.7675] # - too much std error # n,k,p,reps,repeatMLE=50,2,0.0,30,5 #[-0.9878, 0.0001516, 1.514, 0.01222] # n,k,p,reps,repeatMLE=50,2,0.10,30,5 #[-0.9621, 0.002718, -0.4277, 0.08598] # n,k,p,reps,repeatMLE=50,2,0.20,30,5 #[-0.9375, 0.004620, -0.5683, 0.01939] # n,k,p,reps,repeatMLE=50,2,0.50,30,5 #[-0.8632, 0.008181, -0.6471, 0.03722] # n,k,p,reps,repeatMLE=50,2,1.0,30,5 #[-0.8239, 0.01428, -0.6669, 0.01321] # n,k,p,reps,repeatMLE=50,5,0.10,30,5 #[-0.7530, 0.01572, -0.4751, 0.04671] # end of ConnectedSmallWorldFromMostpopular---------
#
# as p approaches 1 the small-world model approached Erdos-Reyni(n, ERProb) k=10; n=100 ERProb=k/(n-1.) ERProb
0.101010101010101
%time # demo of the mle - way too long... c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 # simulation settings mles=[] n,k,p,reps,repeatMLE=100,10,1.0,30,5 for i in range(repeatMLE): ts=ConnectedSmallWorldFromAnywhere(n,k,p,reps) spc=splitPairsCounts(ts) def negLkl(AB): return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0) x = [i, n, k, p, reps, mle] print x mles.append(x) y=mean([x[5][0] for x in mles]),std([x[5][0] for x in mles]), mean([x[5][1] for x in mles]),std([x[5][1] for x in mles]); [x.N(digits=4) for x in y]
[0, 100, 10, 1.00000000000000, 30, (-0.18366815774645348, -0.0622024764611244)] [1, 100, 10, 1.00000000000000, 30, (-0.135273856784625, 0.019714938321309886)] [2, 100, 10, 1.00000000000000, 30, (-0.18847740102144908, -0.026794367265656576)] [3, 100, 10, 1.00000000000000, 30, (-0.14693200683533253, -0.003317084447332199)] [4, 100, 10, 1.00000000000000, 30, (-0.242606677603161, -0.08930663649334744)] [-0.1794, 0.04212, -0.03238, 0.04393] CPU time: 275.38 s, Wall time: 301.08 s

Preferential Attachment Random Network

n,m=15,2; g=graphs.RandomBarabasiAlbert(n,m) g.show()
def findMaxDegAndItsVertex(gr): '''find the maximum degree in a graph and its vertex with smalelst ID''' maxD=0; maxDv=0; for vd in gr.degree_iterator(range(n),True): #print vd if vd[1] > maxD: maxD=vd[1]; maxDv=vd[0]; return maxD,maxDv
findMaxDegAndItsVertex(g)
(10, 2)
n,m=15,10; g=graphs.RandomBarabasiAlbert(n,m) print findMaxDegAndItsVertex(g) g.show()
(14, 10)
def PrefAttachmentFromMostPopular(n,m,reps): '''return reps many transmission trees from the preferential attachment model with initial infection from the smallest node with the maximum degree''' ts=[] for i in range(reps): g=graphs.RandomBarabasiAlbert(n,m) maxDV=findMaxDegAndItsVertex(g) ts.append(transmissionProcessTC(g.to_directed(),maxDV[1])) return ts
%time # demo of the mle - way too long... c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 # simulation settings mles=[] n,m,reps,repeatMLE=100,1,30,10 for i in range(repeatMLE): ts=PrefAttachmentFromMostPopular(n,m,reps) spc=splitPairsCounts(ts) def negLkl(AB): return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0) x = [i, n, m, reps, mle] print x mles.append(x)
[0, 100, 1, 30, (-0.37414180696123656, -0.8290932447900436)] [1, 100, 1, 30, (-0.3236397755375402, -0.8314890535433167)] [2, 100, 1, 30, (-0.2468637183101474, -0.8042617935543253)] [3, 100, 1, 30, (-0.33050561419852764, -0.8159645696149106)] [4, 100, 1, 30, (-0.3070428692968694, -0.8171910942641389)] [5, 100, 1, 30, (-0.3930318773221692, -0.8290483487359996)] [3, 100, 1, 30, (-0.33050561419852764, -0.8159645696149106)] [4, 100, 1, 30, (-0.3070428692968694, -0.8171910942641389)] [5, 100, 1, 30, (-0.3930318773221692, -0.8290483487359996)] [6, 100, 1, 30, (-0.28123221418745825, -0.8123073810368434)] [7, 100, 1, 30, (-0.3861042546882455, -0.8353259786808699)] [8, 100, 1, 30, (-0.35248668135875144, -0.8321393837645847)] [9, 100, 1, 30, (-0.27975485700284897, -0.8080369576128273)] CPU time: 3120.90 s, Wall time: 3211.50 s
mles
[[0, 100, 1, 30, (-0.37414180696123656, -0.8290932447900436)], [1, 100, 1, 30, (-0.3236397755375402, -0.8314890535433167)], [2, 100, 1, 30, (-0.2468637183101474, -0.8042617935543253)], [3, 100, 1, 30, (-0.33050561419852764, -0.8159645696149106)], [4, 100, 1, 30, (-0.3070428692968694, -0.8171910942641389)], [5, 100, 1, 30, (-0.3930318773221692, -0.8290483487359996)], [6, 100, 1, 30, (-0.28123221418745825, -0.8123073810368434)], [7, 100, 1, 30, (-0.3861042546882455, -0.8353259786808699)], [8, 100, 1, 30, (-0.35248668135875144, -0.8321393837645847)], [9, 100, 1, 30, (-0.27975485700284897, -0.8080369576128273)]]
######### #n,m,reps,repeatMLE=100,2,30,10 #mles=[[0, 100, 2, 30, (-0.22665379663514496, -0.6500365441924358)], [1, 100, 2, 30, (-0.25247638037582687, -0.6740522715490318)], [2, 100, 2, 30, (-0.27092350497911694, -0.6818376619669556)], [3, 100, 2, 30, (-0.20801219786250535, -0.6629331739317452)], [4, 100, 2, 30, (-0.20181548669784008, -0.6735625013240737)], [5, 100, 2, 30, (-0.21620078970637727, -0.6622066332612844)], [6, 100, 2, 30, (-0.2897399652564357, -0.667458689497513)], [7, 100, 2, 30, (-0.26265154465133816, -0.6386910379716887)], [8, 100, 2, 30, (-0.28873448085827796, -0.6603197512893558)], [9, 100, 2, 30, (-0.22625577133555644, -0.6756400458385884)]] #mle mean and std-err #[-0.2443, 0.03283, -0.6647, 0.01294] ########## #n,m,reps,repeatMLE=100,1,30,10 #mles=[[0, 100, 1, 30, (-0.37414180696123656, -0.8290932447900436)], [1, 100, 1, 30, (-0.3236397755375402, -0.8314890535433167)], [2, 100, 1, 30, (-0.2468637183101474, -0.8042617935543253)], [3, 100, 1, 30, (-0.33050561419852764, -0.8159645696149106)], [4, 100, 1, 30, (-0.3070428692968694, -0.8171910942641389)], [5, 100, 1, 30, (-0.3930318773221692, -0.8290483487359996)], [6, 100, 1, 30, (-0.28123221418745825, -0.8123073810368434)], [7, 100, 1, 30, (-0.3861042546882455, -0.8353259786808699)], [8, 100, 1, 30, (-0.35248668135875144, -0.8321393837645847)], [9, 100, 1, 30, (-0.27975485700284897, -0.8080369576128273)]] #mle mean and std-err #[-0.3275, 0.04932, -0.8215, 0.01121] y=mean([x[4][0] for x in mles]),std([x[4][0] for x in mles]), mean([x[4][1] for x in mles]),std([x[4][1] for x in mles]); [x.N(digits=4) for x in y]
[-0.3275, 0.04932, -0.8215, 0.01121]

Now let us initialize randomly from one of the first m nodes at the beginning of the preferential attachment algorithm

%time # demo of the mle - way too long... c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1 c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1 # simulation settings mles=[] n,m=100,2 reps=30 repeatMLE=10 for i in range(repeatMLE): ts=[transmissionProcessTC(graphs.RandomBarabasiAlbert(n,m).to_directed(),0) for _ in range(reps)] spc=splitPairsCounts(ts) def negLkl(AB): return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0) x = [i, n, m, reps, mle] print x mles.append(x)

Drawing some figures

n,m=25,1 H = graphs.RandomBarabasiAlbert(n,m) H.set_latex_options( graphic_size=(8,8), vertex_size=0.1, edge_thickness=0.04, edge_color='green', vertex_color='green', vertex_label_color='red', format='tkz_graph', tkz_style='Art', #layout='acyclic' ) view(H)
d3-based renderer not yet implemented
maxDV=findMaxDegAndItsVertex(H) print maxDV T=transmissionProcessTC(H.to_directed(),maxDV[1])
(12, 2)
ascii_art(T)
______1________ / / _________2___________ i7 / / ________3__________ i23 / / ___4____ ___5_____ / / / / ____6_____ i11 __13__ 11_ / / / / / / _____7______ i5 21_ i22 i8 i24 / / / / _______8________ i10 i6 i20 / / __________9____________ i16 / / __________10__________ i17 / / __________12__________ ___18____ / / / / _______14________ _17__ i3 __22__ / / / / / / 16_ __15___ 19_ i0 23_ i19 / / / / / / / / i2 i18 __20__ i21 i1 i4 i9 i12 / / _24_ i15 / / i13 i14
#latex(H)
#latex(T)