Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

Full Likelihood Inference Procedure based on SFS

Project: UnfoldingSFS
Views: 350

UnfoldingSFS

Full Likelihood Inference from the Site Frequency Spectrum of a Non-recombining Locus

Raazesh Sainudiin and Amandine Véber

This is a public code accompanying the research article:

The Sampler

%auto import pylab import numpy as np # set random seed of the random number generator set_random_seed(0) np.random.seed(int(1235)) # updating the topology def DProduct(j,betaSpl): '''compute the product terms appearing in the probability of a split''' if j<2: prod=1 else: x=j+betaSpl prod=x for k in range(1,j-1): prod= prod*(x-k) return prod def BetaSplit(betaSpl,m,J): '''indicator of m being split into J(>= m/2) and m-J conditionally on no previous split, and the corresponding event probability, under the Beta-splitting model''' if (J==ceil(m/2)): I=1 w=1 else: mm = floor(m/2) if(betaSpl==0): #Kingman case D = sum(2/(m-1) for k in range(mm+1,J+1)) if (m%2==0): D = D + 1/(m-1) p=(2/(m-1))/D else: D = 2*(sum(binomial(m,k)* DProduct(k,betaSpl)* DProduct(m-k,betaSpl) for k in range(mm+1,J+1))) if (m%2==0): D = D+ binomial(m,mm)* ((DProduct(mm,betaSpl))**2) p = 2*(binomial(m,J))*DProduct(J,betaSpl)*DProduct(m-J,betaSpl)/D U = np.random.random_sample() if (U<p): I=1 w=p else: I=0 w=1-p return [I,w] def UncondSplit(betaSpl,m,J): '''compute the probability that m is split into J>=m/2 and m-J in Beta-splitting model without calling the beta functions''' if(betaSpl==0): #Kingman case if(2*J==m and m%2==0): p=1/(m-1) else: p=2/(m-1) else: mm= floor(m/2) D = 2*(sum(binomial(m,k)*DProduct(k,betaSpl)*DProduct(m-k,betaSpl) for k in range(mm+1,m))) if (m%2==0): aux2 = binomial(m,mm)* ((DProduct(mm,betaSpl))**2) D= D+ aux2 N= binomial(m,J)* DProduct(J,betaSpl)* DProduct(m-J,betaSpl) if (2*J==m): p=N/D else: p=2*N/D return p def IndexSplit(n,k,V): '''index of largest block if larger than k, according to n-vector V''' m=k-1 for i in range(n-1,k-1,-1): if (V[i]>0): m=i break return m def Sstep(betaSpl,n,j,C,F,w): '''column filling for j less than n/2 in Beta-splitting model, with control sequence C and proposal weight w. F[k,0] gives the size of the largest edge created by the split between epochs k-1 and k, F[k,n] gives the size of the edge split between epochs k-1 and k.''' J=n-j F[2,n]=n if (sum(F[2,J+1:n])==1): F[2,J]=0 else: if ( (C[J]>0) or ( (j==floor(n/2)) and (n%2 == 1) ) ): F[2,J]=1 else: A= BetaSplit(betaSpl,n,J) F[2,J]= A[0] w= w*(A[1]) if F[2,J]>0: F[2,0]=J C[J]=0 for k in range(3,j+2): if (sum(F[k,J+1:n])>0): F[k,J]=0 else: m= IndexSplit(n,J,F[k-1,0:n+1]) if (C[J]>0): F[k,J]=1 F[k,n]=m C[J]=0 if (F[k-1,J]==0): F[k,0]=J else: if (m<J): F[k,J]=0 elif (m==J): U=np.random.uniform() q= (m-1)/(n-k+1) if (U< q): F[k,J]=0; F[k,n]=J; w= w*q else: F[k,J]=1 w=w*(1-q) elif (J==ceil(m/2)): F[k,J]=1 else: F[k,n]=m A=BetaSplit(betaSpl,m,J) F[k,J]=A[0] if (F[k,J]==1): F[k,0]=J w= w*(A[1]) return [F,w] def Hstep(betaSpl,n,C,F,w): '''column filling for j=n/2, n even and j>1''' j=floor(n/2) F[2,n]=n if (sum(F[2,j+1:n])==0): F[2,j]=2 F[2,0]=j C[j]=0 else: F[2,j]=0 if (F[2,j]==0): for k in range(3,j+2): if (sum(F[k,j+1:n])>0): F[k,j]=0 else: m= IndexSplit(n,j,F[k-1,0:n+1]) if (C[j]>0): F[k,j]=1 F[k,n]=m C[j]=0 if (F[k-1,j]==0): F[k,0]=j else: if (m<j): F[k,j]=0 elif (m==j): U=np.random.uniform() q= (m-1)/(n-k+1) if (U<q): F[k,j]=0 F[k,n]=j w= w*q else: F[k,j]=1 w= w*(1-q) elif (j==ceil(m/2)): F[k,j]=1 F[k,0]=j else: F[k,n]=m A = BetaSplit(betaSpl,m,j) F[k,j]= A[0] if (F[k,j]==1): F[k,0]=j w= w*(A[1]) else: F[3,j]=1 F[3,n]=j for k in range(4,j+2): m= IndexSplit(n,j,F[k-1,0:n+1]) if (m<j): F[k,j]=0 else: U=np.random.uniform() q= (m-1)/(n-k+1) if (U<q): F[k,j]=0 F[k,n]=m w= w*q else: F[k,j]=1 w= w*(1-q) return [F,w] def Lstep(betaSpl,n,j,C,F,w): '''column filling for j larger than n/2 in Beta-splitting model, with control sequence C''' J=n-j F[2,n]=n F[2,J]=F[2,j] if F[2,J]>0: C[J]=0 for k in range(3,j+2): m=0 for i in range(J+1,n): if F[k-1,i]>F[k,i]: m=i break if (m==0): if ((n-pylab.dot(pylab.arange(J,n,1),F[k-1,J:n]))-k+1+sum(F[k-1,J:n])==0): F[k,J]=F[k-1,J]-1 F[k,n]=J elif ( (C[J]==0) or (sum(F[k,J+1:n])>0) or (F[k-1,J]>1) ): if (F[k-1,J]==0): F[k,J]=0 else: U=np.random.uniform() q= F[k-1,J]*(J-1)/(n-k+1-sum(F[k,l]*(l-1) for l in range(J+1,n))) if (U< q): F[k,J]=F[k-1,J]-1 F[k,n]=J w= w*q else: F[k,J]=F[k-1,J] w=w*(1-q) else: F[k,J]=F[k-1,J] elif (m > 2*J): F[k,n]=m F[k,J]=F[k-1,J]+(F[k,m-J]-F[k-1,m-J]) elif ( (m==2*J) and (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])==0) ): F[k,n]=m F[k,J]=F[k-1,J]+2 F[k,0]=J elif ( (m <= 2*J) and (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])>0) ): F[k,n]=m F[k,J]=F[k-1,J] elif ( (J==ceil(m/2)) and (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])==0) ): F[k,n]=m F[k,J]=F[k-1,J]+1 F[k,0]=J elif ( (m < 2*J) and (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])==0) ): F[k,n]=m if ( (C[J]>0) and (F[k-1,J]==0) ): F[k,J]=1 F[k,0]=J else: A= BetaSplit(betaSpl,m,J) F[k,J]=F[k-1,J] + A[0] if (A[0]==1): F[k,0]=J w= w*(A[1]) return [F,w] def Twostep(betaSpl,n,C,F,w): '''column filling for j=n-2 in Beta-splitting model, with control sequence C useless here''' j=n-2 F[2,n]=n F[2,2]=F[2,j] for k in range(3,j+2): m=0 for i in range(3,n): if F[k-1,i]>F[k,i]: m=i break if (m==0): F[k,2]=F[k-1,2]-1 F[k,n]=2 elif (m > 4): F[k,n]=m F[k,2]=F[k-1,2]+F[k,m-2]-F[k-1,m-2] elif ( (m==4) and (F[k,3]-F[k-1,3]==0) ): F[k,n]=m F[k,2]=F[k-1,2]+2 F[k,0]=2 elif ( (m==4) and (F[k,3]-F[k-1,3]>0) ): F[k,n]=m F[k,2]=F[k-1,2] elif (m==3): F[k,n]=m F[k,2]=F[k-1,2]+1 F[k,0]=2 C[j]=0 return [F,w] def Onestep(betaSpl,n,C,F,w): '''column filling for j=n-1 in Beta-splitting model, with control sequence C useless here''' j=n-1 F[2,n]=n F[2,1]=F[2,j] for k in range(3,j+1): m=0 for i in range(3,n): if F[k-1,i]>F[k,i]: m=i break if (m>2): F[k,1]=F[k-1,1]+(F[k,m-1]-F[k-1,m-1]) F[k,n]=m else: F[k,1]=F[k-1,1]+2 F[k,0]=1 F[k,n]=2 F[n,1]=n F[n,n]=2 F[n,0]=1 C[j]=0 return [F,w] # Distributing the mutations def GammaDensity(k,theta,x): '''Density of a Gamma(k,theta) at point x. theta inverse of the rate parameter in companion paper to comply with Python definition of gamma(a,b) distribution''' y= 1/(gamma(k)*(theta**k)) z= y*(x**(k-1))*(exp(-x/theta)) return z def Mutate(A,n,j,s,theta,F,M,T,w1,w2): '''distribute the s mutations carried by n-j individuals using F and the information T on time already gathered. Updates at the same time the weight w1 associated with mutations and w2 associated with the times''' J=n-j if (s==0): M[0:n+1,J]= pylab.zeros(n+1) else: V=[(F[k,J]*T[k]) for k in range(0,n+1)] L= sum(V[2:j+2]) W=[(V[k]/L) for k in range(0,n+1)] M[0:n+1,J]= np.random.multinomial(s,W) d = multinomial([int(i) for i in M[0:n+1,J]]) dd = RR(d) for k in range(0,n+1): dd = dd * (RR(W[k])**(RR(M[k,J]))) w1 = RR(w1)*dd for k in range (2,j+2): if F[k,J]>0: a=1+(sum(M[k,J:n])) b=1/(A[k]+theta*(sum(F[k,J:n]))) T[k]= np.random.gamma( a, b ) w2 = (w2) * (GammaDensity(RR(a),RR(b),RR(T[k]))) return [M,T,w1,w2] #Producing F and M from S (global algorithm) def Control(n,S): '''build the control vector C from S, with sample size n''' C=pylab.zeros(n+1) for k in range(0,n+1): if (S[k]>0): C[k]=1 return C def BuildingFM(A,betaSpl,n,S,theta,F,M,T,w,w1,w2,k1,k2): '''modify topology F, mutation matrix M and epoch time vector T consistent with the SFS S, from column n-k1 to n-k2 in Beta-splitting model and with a priori rates A. Updates the weights w, w1 and w2 as well.''' C=Control(n,S) X=[F,w] Y=[M,T,w1,w2] if(n==2): X[0][2,0:3]=[1,2,2] C[1]=0 Y=Mutate(A,n,1,S[1],theta,X[0],M,T,w1,w2) elif(n==3): if k1==1: X[0][2,0:4]=[2,1,1,3] C[2]=0 Y=Mutate(A,n,1,S[2],theta,X[0],M,T,w1,w2) if k2==2: X[0][3,0:4]=[1,3,0,2] C[1]=0 Y=Mutate(A,n,2,S[1],theta,X[0],Y[0],Y[1],Y[2],Y[3]) else: K2=min(ceil(n/2)-1,k2) for j in range(k1,K2+1): X=Sstep(betaSpl,n,j,C,X[0],X[1]) Y=Mutate(A,n,j,S[n-j],theta,X[0],Y[0],Y[1],Y[2],Y[3]) if (n%2 == 0) and (k2 > K2) and (k1 <= floor(n/2)): j=floor(n/2) X=Hstep(betaSpl,n,C,X[0],X[1]) Y=Mutate(A,n,j,S[j],theta,X[0],Y[0],Y[1],Y[2],Y[3]) K1=max(k1,floor(n/2)+1) K2=min(k2,n-3) for j in range(K1,K2+1): X=Lstep(betaSpl,n,j,C,X[0],X[1]) Y=Mutate(A,n,j,S[n-j],theta,X[0],Y[0],Y[1],Y[2],Y[3]) if (k1 < n-1) and (k2 > n-3): X=Twostep(betaSpl,n,C,X[0],X[1]) Y=Mutate(A,n,n-2,S[2],theta,X[0],Y[0],Y[1],Y[2],Y[3]) if (k2 == n-1): X=Onestep(betaSpl,n,C,X[0],X[1]) Y=Mutate(A,n,n-1,S[1],theta,X[0],Y[0],Y[1],Y[2],Y[3]) return [X[0],Y[0],Y[1],X[1],Y[2],Y[3]] def MakeHistory(A,betaSpl,n,S,theta): '''return SFS-history (F,M,T) compatible with SFS S, and proposal weights (w,w1,w2). n sampling size, A vector of a priori rates for epoch times, betaSpl a priori parameter of Beta-splitting model, theta scaled mutation rate''' M = pylab.zeros((n+1,n+1)) F = pylab.zeros((n+1,n+1)) T = pylab.zeros(n+1) for k in range(0,n+1): if (A[k]==0): T[k]=1 else: T[k]=np.random.exponential(1/A[k]) k1=1 k2=n-1 X=BuildingFM(A,betaSpl,n,S,theta,F,M,T,1,1,1,k1,k2) return X def MakeHistory0(A,betaSpl,n,theta): '''return F and M-matrices independent of the mutation, and times sampled according to the a priori coalescence rates''' S = pylab.zeros(n+1) M = pylab.zeros((n+1,n+1)) F = pylab.zeros((n+1,n+1)) T = [1 for k in range(0,n+1)] k1=1 k2=n-1 X= BuildingFM(A,betaSpl,n,S,theta,F,M,T,1,1,1,k1,k2) MM = pylab.zeros((n+1,n+1)) TT = pylab.zeros(n+1) for k in range(0,n+1): if (A[k]==0): TT[k]=1 else: TT[k]=np.random.exponential(1/A[k]) return [X[0],MM,TT,X[3],1.,ProbOfTHom(A,TT,n)]

Simulating Data

%auto #Making SFS from F and T def MakeSFS(F,T,theta,n): '''create SFS out of L=TF and theta = per locus scaled mutation rate''' S=pylab.zeros(n+1) L=pylab.dot(T,F) for i in range(1,n): S[i]=np.random.poisson(theta*L[i]) return S #Producing epoch times def KingmanRates(n): '''return Kingman rates''' return pylab.array([0.,0.]+[k*(k-1)/2.0 for k in range(2,n+1)]) def EpochtimeConst(R,N0): '''vector of exponential times with rates given in vector R and constant pop size N0''' return [np.random.exponential(N0/r) for r in R] def EpochtimeExpGr(Rk,N0,g,s): '''return a sample of time Tk of epoch k with sample-size dependent rate Rk, starting from time s, in an exponentially growing population with growth rate g from initial pop size N0''' if g==0: return np.random.exponential(N0/Rk) else: return (1./g)*log(1.0 - (exp(-g*s)*N0*g*(log(np.random.uniform())))/Rk) def EpochTimesExpGr(n,R,N0,g): '''return a sample vector of inhomogenous exponential RVs, with sample-size dependent epoch-specific coalescent rates given by vector R in an exponentially growing population with growth rate g from initial pop size N0''' t=pylab.zeros(n+1) t[n]=EpochtimeExpGr(R[n],N0,g,0) for k in range(n-1,1,-1): t[k]=EpochtimeExpGr(R[k],N0,g,t[n:k+1:-1].sum()) return t def EpochtimeBtl(k,N0,a,b,eps,s): '''return a sample of time Tk of epoch k starting from time s, under Kingman coalescent with pop size eps times N0 between times a and b, and N0 otherwise''' if (s>=b): return np.random.exponential(2*N0/(k*(k-1))) else: U= np.random.uniform() if (s>a): if ( U < 1- exp(-k*(k-1)*(b-s)/( 2*eps*N0 )) ): return -2*eps*N0*log(1-U)/( k*(k-1) ) else: return -2*N0*log(1-U)/( k*(k-1) ) - (b-s)*(1/eps - 1) else: if ( U < 1- exp(-k*(k-1)*(a-s)/(2*N0)) ): return -2*N0*log(1-U)/(k*(k-1)) elif ( U < 1- exp(- k*(k-1)*( (b-a)/eps + a-s )/(2*N0))): return eps*(-2*N0*log(1-U)/(k*(k-1)) + (a-s)*(1/eps - 1) ) else: return -2*N0*log(1-U)/(k*(k-1)) - (b-a)*(1/eps - 1) def EpochTimesBtl(n,N0,a,b,eps): '''return a sample vector of inhomogenous exponential RVs, under Kingman coalescent with pop size eps times N0 between times a and b, and N0 otherwise. n is sample size''' t=pylab.zeros(n+1) t[n]=EpochtimeBtl(n,N0,a,b,eps,0) for k in range(n-1,1,-1): t[k]=EpochtimeBtl(k,N0,a,b,eps,t[n:k+1:-1].sum()) return t #Creating SFS and associated pair (F,T) in classical scenarios def MakeS(betaSpl,N0,g,mu,n): '''make SFS data S from given parameters and return T,F,S for exponential growth model of demography (g=0 is standard Kingman coalescent)''' T=EpochTimesExpGr(n,KingmanRates(n),N0,g) A = [1 for k in range(0,n+1)] # this is just dummy X = MakeHistory0(A,betaSpl,n,mu) S=MakeSFS(X[0],T,mu,n) return [T,X[0],S] def MakeS_Btl(betaSpl,N0,a,b,eps,mu,n): '''make sfs data S from given parameters and return T,F,S for bottleneck model of demography''' T=EpochTimesBtl(n,N0,a,b,eps) A = [1 for k in range(0,n+1)] # this is just dummy X = MakeHistory0(A,betaSpl,n,mu) S=MakeSFS(X[0],T,mu,n) return [T,X[0],S]

Computing Probabilities and Likelihoods

%auto # Probability of a Topology in Beta-splitting model def ProbOfF(betaSpl,f,n): '''compute probability of topology F in (unconditioned) Beta-splitting model, given beta and sample size n''' f[1,n]=1 pr=RR(1.0) for k in range(2,n+1): mk=floor(f[k,n]) lk=floor(f[k,0]) pr=pr*(f[k-1,mk]*(mk-1)/(n-k+1))*UncondSplit(betaSpl,mk,lk) return pr # Densities of rate-parametrized epoch times def ProbOfTHom(r,t,n): '''return the density of the time vector t=[t_2,...t_n] under coalescent model with epoch-dependent rates r(k)''' a2 = RR(1.0) for i in range(2,n+1): a2 = a2 * RR(r[i]) * exp(-(RR(t[i]) * RR(r[i]))) return a2 def ProbOfTExG(g,t,n): '''return the density of the time vector t=[t_2,...t_n] under Kingman coalescent with exponentially growing population with growth rate g''' if(g==0): a=RR(1.0) for k in range(2,n+1,1): kC2=RR(k*(k-1)/2); a = a * kC2 * exp(-kC2*t[k]) else: Tnk = RR(t[n]); a = RR(n*(n-1)/2 * exp(g*Tnk - n*(n-1)/(2*g) * 1 * (exp(g*t[n])-1) ) ); for k in range(n-1,1,-1): kC2=RR(k*(k-1)/2); a = a * kC2 * exp(g*(Tnk+t[k]) - ( kC2/g * exp(g*(Tnk)) * (exp(g*t[k])-1) ) ); Tnk=Tnk+t[k]; return a def ProbOfTBtl(a,b,eps,N0,t,n): '''return the density of the time vector t=[t_2,...,t_n] under the Kingman with bottleneck scenario where pop size is eps*N0 between times a and b, and N0 otherwise''' if (t[n]>= b): bin= RR(n*(n-1)/(2*N0)) x= RR(bin*exp( - bin*((b-a)*(1/eps -1)+t[n]) )) for k in range(2,n): bink= RR(k*(k-1)/(2*N0)) x = x* bink * exp(-bink*t[k]) else: bart=pylab.zeros(n+2) bart[n]=t[n] for k in range(n-1,1,-1): bart[k]= bart[k+1]+t[k] K=2 KK=2 for k in range(n,1,-1): if (bart[k]>a): K=k+1 break for k in range(K-1,1,-1): if (bart[k]>b): KK=k+1 break if (K==KK) and (K>2): bin = RR((K-1)*(K-2)/(2*N0)) x = RR(bin*exp( -bin*( (b-a)*(1/eps -1)+ t[K-1] ) )) for j in range(2,K-1): binj= j*(j-1)/(2*N0) x= x * RR(binj * exp(- binj * t[j])) for j in range(K,n+1): binj= j*(j-1)/(2*N0) x= x * RR(binj * exp(- binj*t[j])) elif (K==KK) and (K==2): x = ProbOfTHom(KingmanRates(n)/N0,t,n) elif (KK>2): bin1 = (K-1)*(K-2)/(2*N0) bin2 = (KK-1)*(KK-2)/(2*N0) x = RR((bin1/eps)*(bin2)*exp( -bin1*(a-bart[K] + (bart[K-1]-a)/eps) -bin2*( (b-bart[KK])/eps + bart[KK-1]-b ) )) for j in range(2,KK-1): binj= j*(j-1)/(2*N0) x= x* RR( binj*exp(-binj*t[j]) ) for j in range(KK,K-1): binj= j*(j-1)/(2*eps*N0) x= x* RR( binj*exp(-binj*t[j]) ) for j in range(K,n+1): binj= j*(j-1)/(2*N0) x= x* RR( binj*exp(-binj*t[j]) ) else: bin1 = (K-1)*(K-2)/(2*N0) x = RR((bin1/eps)*exp( -bin1*(a-bart[K] + (bart[K-1]-a)/eps) )) for j in range(2,K-1): binj= j*(j-1)/(2*eps*N0) x= x* RR( binj*exp(-binj*t[j]) ) for j in range(K,n+1): binj= j*(j-1)/(2*N0) x= x* RR( binj*exp(-binj*t[j]) ) return x # Conditional probabilities def ProbOfMGivenFTmu(f,m,t,mu,n): '''return the P(M | F,T,mu), mu scaled mutation rate (theta earlier)''' a=RR(1.0) for j in range(1,n): for k in range(2,n+1): r=mu*RR(f[k,j])*RR(t[k]) a=a*exp(-r)*r^(RR(m[k,j]))/factorial(RR(m[k,j])) return a def ProbOfSfsGivenFTmu(f,s,t,mu,n): '''return the P(S | F,T,mu) of SFS S given topology F and epoch times T, mu scaled mutation rate''' a=RR(1.0) L=pylab.dot(t,f) for j in range(1,n): r=mu*RR(L[j]) a=a*exp(-r)*r^(RR(s[j]))/factorial(RR(s[j])) return a def ProbOfFMTGivenRatesBetaMu(f,m,t,r,betaSpl,mu,n): '''return P(F,M,T) given rates, beta, and mu in Beta-splitting model with epoch-wise homogeneous coalescence rates given by vector r''' return ProbOfF(betaSpl,f,n)*ProbOfTHom(r,t,n)*ProbOfMGivenFTmu(f,m,t,mu,n) def ProbOfFMTGivenGrowthBetaMu(f,m,t,g,betaSpl,mu,n): '''return P(F,M,T) given exponential growth rate, beta, and mu in Beta-splitting model with exponential growth of pop size''' return ProbOfF(betaSpl,f,n)*ProbOfTExG(g,t,n)*ProbOfMGivenFTmu(f,m,t,mu,n) def ProbOfFSfsTGivenGrowthBetaMu(f,s,t,g,betaSpl,mu,n): '''return P(F,S,T) given exponential growth rate, betaSpl, and mu''' return ProbOfF(betaSpl,f,n)*ProbOfTExG(g,t,n)*ProbOfSfsGivenFTmu(f,s,t,mu,n) def ProbOfFSfsTGivenBottleneckBetaMu(f,s,t,a,b,eps,N0,betaSpl,mu,n): '''return P(F,S,T) given bottleneck model parameters: a,b,eps,N0, and betaSpl and mu''' return ProbOfF(betaSpl,f,n)*ProbOfTBtl(a,b,eps,N0,t,n)*ProbOfSfsGivenFTmu(f,s,t,mu,n) # Multi-locus based likelihood of parameters under different scenarios def LklOfBetaThetaGrowthRates(betaSpl,Mu,g,GrRates,n,sfsList,Reps): '''return the log likelihood across loci for beta, Mu and growth rate g, specifying a vector GrRates of a priori coalescence rates. sfsList is a list of SFS at all loci considered, Reps is the number of particles produced per locus to evaluate the per-locus likelihood.''' jointLklAcrossLoci=RR(1.0) jointLogLklAcrossLoci=RR(0.0) for i in range(len(sfsList)): sfs=sfsList[i] Lkl2=pylab.zeros(Reps) for r in range(Reps): Y=MakeHistory(GrRates,betaSpl,n,sfs,Mu) wt=Y[3]*Y[4]*Y[5] numwt2=ProbOfFSfsTGivenGrowthBetaMu(Y[0],sfs,Y[2],g,betaSpl,Mu,n) Lkl2[r]=numwt2/wt Lkl2Mean=Lkl2.mean() jointLklAcrossLoci=jointLklAcrossLoci*Lkl2Mean jointLogLklAcrossLoci=jointLogLklAcrossLoci+log(Lkl2Mean) print 'beta, mu, g, lkl, logLkl = ',betaSpl,' ,',Mu,' ,',g,' ,',jointLklAcrossLoci,' ,',jointLogLklAcrossLoci return jointLogLklAcrossLoci def LklOfBetaThetaGrowth(betaSpl,Mu,g,n,sfsList,Reps): '''return the log likelihood estimates for beta parameter betaSpl, mutation rate Mu and growth rate g from list of observed S, by first computing a priori mean coalescence rates and then produce Reps particles per locus.''' RateSamples=1000 # you may need to increase this for larger n! GrRates = GetRatesForGrowth(g,n,RateSamples) jointLogLklAcrossLoci = LklOfBetaThetaGrowthRates(betaSpl,Mu,g,GrRates,n,sfsList,Reps) return jointLogLklAcrossLoci def LklOfBetaThetaBottleneckRates(betaSpl,Mu,a,b,eps,N0,btlnckRates,n,sfsList,Reps): '''return the lok likelihood across loci for beta, Mu, a, b, eps and N0, specifying a vector btlnckRates of a priori coalescence rates. sfsList is a list of SFS at all loci considered, Reps is the number of particles produced per locus to evaluate the per-locus likelihood.''' jointLogLklAcrossLoci=RR(0.0) for i in range(len(sfsList)): sfs=sfsList[i] Lkl2=pylab.zeros(Reps) for r in range(Reps): Y=MakeHistory(btlnckRates,betaSpl,n,sfs,Mu) wt=Y[3]*Y[4]*Y[5] numwt2=ProbOfFSfsTGivenBottleneckBetaMu(Y[0],sfs,Y[2],a,b,eps,N0,betaSpl,Mu,n) Lkl2[r]=numwt2/wt Lkl2Mean=Lkl2.mean() jointLogLklAcrossLoci=jointLogLklAcrossLoci+log(Lkl2Mean) print 'beta, mu, a, b, eps, N0, logLkl = ',betaSpl,' ,',Mu,' ,',a,' ,',b,' ,',eps,' ,',N0,' ,',jointLogLklAcrossLoci return jointLogLklAcrossLoci def LklOfBetaThetaBottleneck(betaSpl,Mu,a,b,eps,N0,n,sfsList,Reps): '''return the log likelihood estimates for beta, Mu and bottleneck parameters a,b,eps,N0 for observed SFS in sfsList, by first computing a priori mean coalescence rates in this scenario. Reps is number of particles produced for each SFS''' RateSamples=1000 # you may need to increase this for larger n! BtlRates = GetRatesForBottleneck(a,b,eps,N0,n,RateSamples) jointLogLklAcrossLoci = LklOfBetaThetaBottleneckRates(betaSpl,Mu,a,b,eps,N0,BtlRates,n,sfsList,Reps) return jointLogLklAcrossLoci ############################################################################### # This Halton sequence code is by Richard P. Muller ([email protected]). # http://pistol.googlecode.com/svn/trunk/Pistol/QuasiRandom.py prime = [2.0,3.0,5.0,7.0,11.0,13.0,17.0,19.0,23.0,29.0,31.0,37.0, 41.0,43.0,47.0,53.0,59.0,61.0,67.0,71.0,73.0,79.0,83.0, 89.0,97.0,101.0,103.0,107.0,109.0,113.0,127.0,131.0,137.0, 139.0,149.0,151.0,157.0,163.0,167.0,173.0] class HaltonSequence: """Another algorithm to compute a d-dimensional Halton Sequence. This one gives the same results as the other.""" def __init__(self,dim,atmost=10000): if dim > len(prime): raise "dim < %d " % len(prime) self.dim = dim self.err = 0.9/(atmost*prime[dim-1]) self.prime = [0]*self.dim self.quasi = [0]*self.dim for i in range(self.dim): self.prime[i] = 1./prime[i] self.quasi[i] = self.prime[i] return def __call__(self): for i in range(self.dim): t = self.prime[i] f = 1.-self.quasi[i] g = 1. h = t while f-h < self.err: g = h h = h*t self.quasi[i] = g+h-f return self.quasi # END of Halton sequence code by Richard P. Muller ([email protected]). ###############################################################################

Computing useful statistics

%auto def WattersonsThetaEst(s,n): '''return Watterson's estimate of mutation rate based on number of segregating sites s and average length of Kingman's coalescent with sample size n''' est=0 for i in range(1,n): est=est+(2/i) return s/est def GetRatesForGrowth(g,n,RateSamples): '''return the mean estimated rates for each epoch under growth g and sample size n, based on RateSamples samplings from the law''' A=KingmanRates(n) B=pylab.zeros(n+1) for j in range(RateSamples): B=B+EpochTimesExpGr(n,A,1,g) for j in range(2,n+1): B[j]= RateSamples/B[j] return B def GetRatesForBottleneck(a,b,eps,N0,n,RateSamples): '''return the mean estimated rates for each epoch under Kingman's coalescent with bottleneck model with start and end times a<b, with initial population size N0 and population reduction eps, based on RateSamples samplings from the law''' B=pylab.zeros(n+1) for j in range(RateSamples): B=B+EpochTimesBtl(n,N0,a,b,eps) for j in range(2,n+1): B[j]= RateSamples/B[j] return B

Producing data to test the algorithms under the exponential growth model

# dataset 1G - KINGMAN'S COALESCENT WITH NO GROWTH # Produce a list of NumLoci2 SFS from Kingman's coalescent with growth set_random_seed(12456546546) np.random.seed(int(78786767)) n=15 N0=1 # this is fixed at unity as in Hudson's ms TrueBeta=0 TrueG=0.0 TrueTheta=10 NumLoci1=30 NumLoci2=100 TFS=[] for i in range(NumLoci2): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs)
# mean number of segregating sites across loci segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites
70.05
# Likelihood based on the full list of SFS set_random_seed(12456546546) np.random.seed(int(1234)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci2)] BetaList=[-1.0,0.0,10.0]; GrowthList=[0.0,1,10]; ThetaList=[1,10,100]; results=[] Reps=100; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) print logLAcrossAllLoci results.append([Reps,n,NumLoci2,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 0.000000000000000 , 0.0 , -7667.7511712 -7667.7511712 beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 0.000000000000000 , 0.0 , -5226.21481293 -5226.21481293 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 0.000000000000000 , 0.0 , -10086.1480954 -10086.1480954 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 0.0 , -7599.85131455 -7599.85131455 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -5110.13049221 -5110.13049221 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -10196.7979451 -10196.7979451 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 0.000000000000000 , 0.0 , -7643.79941623 -7643.79941623 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 0.000000000000000 , 0.0 , -5324.05597448 -5324.05597448 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 0.000000000000000 , 0.0 , -10321.0669668 -10321.0669668 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 1 , 0.0 , -5909.20719092 -5909.20719092 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 1 , 0.0 , -10086.4618185 -10086.4618185 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 1 , 0.0 , -5786.1424593 -5786.1424593 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 1 , 0.0 , -10154.3873678 -10154.3873678 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 1 , 0.0 , -5853.30748592 -5853.30748592 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 1 , 0.0 , -10255.5327358 -10255.5327358 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 10 , 0.0 , -9975.7902135 -9975.7902135 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10 , 0.0 , -10014.8543315 -10014.8543315 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 10 , 0.0 , -10230.7874703 -10230.7874703 [Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5110.1304922098898]
# Likelihood based on the partial list of SFS set_random_seed(12456546546) np.random.seed(int(1234)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)] BetaList=[-1.0,0.0,10.0]; GrowthList=[0.0,1,10]; ThetaList=[1,10,100]; results=[] Reps=100; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) print logLAcrossAllLoci results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 0.000000000000000 , 0.0 , -2337.02890526 -2337.02890526 beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 0.000000000000000 , 0.0 , -1576.56551326 -1576.56551326 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 0.000000000000000 , 0.0 , -3065.09464016 -3065.09464016 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 0.0 , -2345.4279983 -2345.4279983 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -1569.62519957 -1569.62519957 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -3031.26008774 -3031.26008774 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 0.000000000000000 , 0.0 , -2340.03771926 -2340.03771926 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 0.000000000000000 , 0.0 , -1581.81045985 -1581.81045985 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 0.000000000000000 , 0.0 , -3145.99935957 -3145.99935957 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 1 , 0.0 , -1817.21117217 -1817.21117217 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 1 , 0.0 , -3047.49606705 -3047.49606705 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 1 , 0.0 , -1732.80522191 -1732.80522191 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 1 , 0.0 , -3036.44416147 -3036.44416147 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 1 , 0.0 , -1812.15345891 -1812.15345891 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 1 , 0.0 , -3157.4826845 -3157.4826845 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 10 , 0.0 , -3031.21732179 -3031.21732179 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10 , 0.0 , -3019.13628679 -3019.13628679 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 10 , 0.0 , -3108.13197622 -3108.13197622 [Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [100, 15, 30, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -1569.6251995691184]
# dataset 2G - KINGMAN'S COALESCENT WITH GROWTH - SAMPLE SIZE n=2 set_random_seed(12456546) np.random.seed(int(787867)) n=2 N0=1 TrueBeta=0 TrueG=10.0 TrueTheta=10 NumLoci1=30 NumLoci2=100 TFS=[] for i in range(NumLoci2): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites across loci segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites
3.82
beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 1.84642497925e-38 , -86.8849822079 -86.8849822079 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 3.27549135261e-43 , -97.8246911096 -97.8246911096 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 4.13081400383e-70 , -159.762482027 -159.762482027 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 1 , 7.19903227912e-51 , -115.457893131 -115.457893131 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 1 , 1.45992857523e-40 , -91.7250162063 -91.7250162063 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 1 , 9.10762500924e-70 , -158.971844534 -158.971844534 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10 , 2.04450578169e-187 , -429.868256301 -429.868256301 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10 , 7.32408674123e-32 , -71.6915545056 -71.6915545056 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10 , 1.32896401033e-66 , -151.686216439 -151.686216439 [Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [1000, 2, 30, 0, 10.0000000000000, 10, 0.000000000000000, 10, 10, -71.691554505644334]
# dataset 3G - UNBALANCED TREE WITH GROWTH set_random_seed(124566) np.random.seed(int(787607)) n=10 N0=1 TrueBeta=-1.9 TrueG=10.0 TrueTheta=10 NumLoci1=30 NumLoci2=100 TFS=[] for i in range(NumLoci2): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites across loci segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites
14.63
set_random_seed(1245655) np.random.seed(int(123545)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)] BetaList=[-1.9,0.0,10.0]; GrowthList=[0.0,1,10]; ThetaList=[1,10,100]; results=[] Reps=500; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) print logLAcrossAllLoci results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
beta, mu, g, lkl, logLkl = -1.90000000000000 , 1 , 0.000000000000000 , 8.92612469103e-153 , -350.106536893 -350.106536893 beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 0.000000000000000 , 3.72263396154e-213 , -489.136193336 -489.136193336 beta, mu, g, lkl, logLkl = -1.90000000000000 , 100 , 0.000000000000000 , 0.0 , -1373.7491213 -1373.7491213 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 1.04931918813e-194 , -446.653366479 -446.653366479 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 1.41195612133e-283 , -651.286605254 -651.286605254 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -1618.3152301 -1618.3152301 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 0.000000000000000 , 1.44084065211e-221 , -508.506078822 -508.506078822 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 0.000000000000000 , 0.0 , -746.607157891 -746.607157891 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 0.000000000000000 , 0.0 , -1772.76619681 -1772.76619681 beta, mu, g, lkl, logLkl = -1.90000000000000 , 1 , 1 , 7.46222329377e-232 , -532.189888176 -532.189888176 beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 1 , 1.28135329712e-208 , -478.68978256 -478.68978256 beta, mu, g, lkl, logLkl = -1.90000000000000 , 100 , 1 , 0.0 , -1371.19351795 -1371.19351795 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 1 , 2.60833233832e-272 , -625.344434228 -625.344434228 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 1 , 8.00576248154e-286 , -656.459175004 -656.459175004 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 1 , 0.0 , -1620.4503413 -1620.4503413 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 1 , 1.00165324615e-308 , -709.194556761 -709.194556761 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 1 , 0.0 , -749.966447065 -749.966447065 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 1 , 0.0 , -1783.23476919 -1783.23476919 beta, mu, g, lkl, logLkl = -1.90000000000000 , 1 , 10 , 0.0 , -1618.83362947 -1618.83362947 beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 10 , 1.02978520119e-228 , -524.960050965 -524.960050965 beta, mu, g, lkl, logLkl = -1.90000000000000 , 100 , 10 , 0.0 , -1346.84482594 -1346.84482594 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10 , 0.0 , -1740.11270886 -1740.11270886 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10 , 3.00817303193e-318 , -731.120727431 -731.120727431 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10 , 0.0 , -1606.96795791 -1606.96795791 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 10 , 0.0 , -1739.04369827 -1739.04369827 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 10 , 0.0 , -826.161229329 -826.161229329 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 10 , 0.0 , -1755.56846088 -1755.56846088 [Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [500, 10, 30, -1.90000000000000, 10.0000000000000, 10, -1.90000000000000, 1, 0.000000000000000, -350.10653689256128]
# dataset 4G - MORE BALANCED TREE WITH NO GROWTH set_random_seed(12456625) np.random.seed(int(78764507)) n=10 N0=1 TrueBeta=10 TrueG=0.0 TrueTheta=10 NumLoci1=30 NumLoci2=100 TFS=[] for i in range(NumLoci2): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites across loci segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites
50.86
set_random_seed(1245655) np.random.seed(int(123545)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)] BetaList=[-1.9,0.0,10,20,50]; GrowthList=[0.0]; ThetaList=[1,10,100]; results=[] Reps=500; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) print logLAcrossAllLoci results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
beta, mu, g, lkl, logLkl = -1.90000000000000 , 1 , 0.000000000000000 , 0.0 , -1154.54179629 -1154.54179629 beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 0.000000000000000 , 0.0 , -780.297563836 -780.297563836 beta, mu, g, lkl, logLkl = -1.90000000000000 , 100 , 0.000000000000000 , 0.0 , -1767.67463989 -1767.67463989 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 0.0 , -951.140203765 -951.140203765 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 1.8497040141e-244 , -561.215737057 -561.215737057 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -1530.84525417 -1530.84525417 beta, mu, g, lkl, logLkl = 10 , 1 , 0.000000000000000 , 0.0 , -899.806101283 -899.806101283 beta, mu, g, lkl, logLkl = 10 , 10 , 0.000000000000000 , 2.09476181715e-241 , -554.183567556 -554.183567556 beta, mu, g, lkl, logLkl = 10 , 100 , 0.000000000000000 , 0.0 , -1511.60217057 -1511.60217057 beta, mu, g, lkl, logLkl = 20 , 1 , 0.000000000000000 , 0.0 , -883.534377758 -883.534377758 beta, mu, g, lkl, logLkl = 20 , 10 , 0.000000000000000 , 3.78782789921e-241 , -553.59121467 -553.59121467 beta, mu, g, lkl, logLkl = 20 , 100 , 0.000000000000000 , 0.0 , -1535.52126976 -1535.52126976 beta, mu, g, lkl, logLkl = 50 , 1 , 0.000000000000000 , 0.0 , -898.52707297 -898.52707297 beta, mu, g, lkl, logLkl = 50 , 10 , 0.000000000000000 , 2.07326978324e-237 , -544.983540073 -544.983540073 beta, mu, g, lkl, logLkl = 50 , 100 , 0.000000000000000 , 0.0 , -1507.09522843 -1507.09522843 [Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [500, 10, 30, 10, 0.000000000000000, 10, 50, 10, 0.000000000000000, -544.98354007314447]
# dataset 4Gbis - SAME WITH LARGER BETA set_random_seed(12456625) np.random.seed(int(78764507)) n=10 N0=1 TrueBeta=50 TrueG=0.0 TrueTheta=10 NumLoci1=30 NumLoci2=100 TFS=[] for i in range(NumLoci2): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites across loci segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites
54.55
set_random_seed(1245655) np.random.seed(int(123545)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)] BetaList=[-1.9,0.0,20,50,70,100]; GrowthList=[0.0]; ThetaList=[10]; results=[] Reps=500; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) print logLAcrossAllLoci results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 0.000000000000000 , 0.0 , -779.448068357 -779.448068357 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 1.15508278542e-247 , -568.594345952 -568.594345952 beta, mu, g, lkl, logLkl = 20 , 10 , 0.000000000000000 , 1.44022837574e-232 , -533.834939879 -533.834939879 beta, mu, g, lkl, logLkl = 50 , 10 , 0.000000000000000 , 4.92456037301e-226 , -518.78999601 -518.78999601 beta, mu, g, lkl, logLkl = 70 , 10 , 0.000000000000000 , 9.84020154202e-222 , -508.887414452 -508.887414452 beta, mu, g, lkl, logLkl = 100 , 10 , 0.000000000000000 , 5.46929618175e-225 , -516.382495984 -516.382495984 [Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [500, 10, 30, 50, 0.000000000000000, 10, 70, 10, 0.000000000000000, -508.88741445191135]
# same computations, with more loci (100 instead of 30) set_random_seed(1245655) np.random.seed(int(123545)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci2)] BetaList=[-1.9,0.0,20,50,70,100]; GrowthList=[0.0]; ThetaList=[10]; results=[] Reps=500; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) print logLAcrossAllLoci results.append([Reps,n,NumLoci2,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 0.000000000000000 , 0.0 , -2678.13043644 -2678.13043644 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -1906.68164481 -1906.68164481 beta, mu, g, lkl, logLkl = 20 , 10 , 0.000000000000000 , 0.0 , -1771.44766253 -1771.44766253 beta, mu, g, lkl, logLkl = 50 , 10 , 0.000000000000000 , 0.0 , -1771.74216851 -1771.74216851 beta, mu, g, lkl, logLkl = 70 , 10 , 0.000000000000000 , 0.0 , -1759.10585603 -1759.10585603 beta, mu, g, lkl, logLkl = 100 , 10 , 0.000000000000000 , 0.0 , -1798.32218429 -1798.32218429 [Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [500, 10, 100, 50, 0.000000000000000, 10, 70, 10, 0.000000000000000, -1759.1058560291142]
# dataset 5G - LESS EXTREME DEPARTURES FROM Beta=0 set_random_seed(12456546546) np.random.seed(int(78786767)) n=15 N0=1 TrueBeta=-1 TrueG=0.0 TrueTheta=10 NumLoci=30 TFS=[] for i in range(NumLoci): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs) # infer set_random_seed(12456546546) np.random.seed(int(1234)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[-1.0,0.0,10.0]; GrowthList=[0.0,10.0,100.0]; ThetaList=[1,10,100]; results=[] Reps=20; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 0.000000000000000 , 0.0 , -2485.44180413698 beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 0.000000000000000 , 0.0 , -1595.65973228989 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 0.000000000000000 , 0.0 , -3009.74146339138 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 0.0 , -2497.49538365434 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -1629.09844522608 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -3054.63957052542 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 0.000000000000000 , 0.0 , -2524.46093727679 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 0.000000000000000 , 0.0 , -1721.68556524245 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 0.000000000000000 , 0.0 , -3233.84204749802 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 10.0000000000000 , 0.0 , -3029.81399220508 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10.0000000000000 , 0.0 , -3063.60307631410 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 10.0000000000000 , 0.0 , -3196.15624867809 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 100.000000000000 , 0.0 , -infinity [Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [20, 15, 30, -1, 0.000000000000000, 10, -1.00000000000000, 10, 0.000000000000000, -1595.65973228989]
# dataset 6G - SAME WITH MORE BALANCED TREES set_random_seed(12456546546) np.random.seed(int(78786767)) n=15 N0=1 TrueBeta=10.0 TrueG=0.0 TrueTheta=10 NumLoci=30 TFS=[] for i in range(NumLoci): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs) # infer set_random_seed(12456546546) np.random.seed(int(1234)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[-1.0,0.0,10.0]; GrowthList=[0.0,10.0,100.0]; ThetaList=[1,10,100]; results=[] Reps=20; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 0.000000000000000 , 0.0 , -2426.00860033181 beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 0.000000000000000 , 0.0 , -1807.30350551266 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 0.000000000000000 , 0.0 , -3353.27443791154 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 0.0 , -2386.08740026739 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -1764.68461686015 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -3296.49162836633 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 0.000000000000000 , 0.0 , -2318.48151263816 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 0.000000000000000 , 0.0 , -1651.92027054266 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 0.000000000000000 , 0.0 , -3295.30336629113 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 10.0000000000000 , 0.0 , -3334.17111890078 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10.0000000000000 , 0.0 , -3336.53844782303 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 10.0000000000000 , 0.0 , -3275.05019894113 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 100.000000000000 , 0.0 , -infinity [Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [20, 15, 30, 10.0000000000000, 0.000000000000000, 10, 10.0000000000000, 10, 0.000000000000000, -1651.92027054266]
# dataset 6G' - SAME WITH slightly MORE BALANCED TREES but with large theta # this is to check against the best fitted beta and theta in the beta-splitting model fitted against the complex ms simulation from msdoc below set_random_seed(12456546546) np.random.seed(int(78786767)) n=15 N0=1 TrueBeta=5.0 TrueG=0.0 TrueTheta=100 NumLoci=30 TFS=[] for i in range(NumLoci): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs) # infer set_random_seed(12456546546) np.random.seed(int(1234)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[-1.0,0.0,5.0]; GrowthList=[0.0]; ThetaList=[10,100,1000]; results=[] Reps=20; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 0.000000000000000 , 0.0 , -5347.9254917 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 0.000000000000000 , 0.0 , -4181.90160246 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000 , 0.000000000000000 , 0.0 , -7318.4541281 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -4506.96656944 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -3629.03506764 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000 , 0.000000000000000 , 0.0 , -6476.40522229 beta, mu, g, lkl, logLkl = 5.00000000000000 , 10 , 0.000000000000000 , 0.0 , -4211.11864562 beta, mu, g, lkl, logLkl = 5.00000000000000 , 100 , 0.000000000000000 , 0.0 , -3316.56095862 beta, mu, g, lkl, logLkl = 5.00000000000000 , 1000 , 0.000000000000000 , 0.0 , -5760.49369151 [Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [20, 15, 30, 5.00000000000000, 0.000000000000000, 100, 5.00000000000000, 100, 0.000000000000000, -3316.5609586219052]
Sanity check: increased mutation rate (i.e., more SNPs) gives better estimates
# dataset 7G with g=2 and increased theta set_random_seed(12533) np.random.seed(np.int(1245)) n=15 N0=1 TrueBeta=0.0 TrueG=2.0 TrueTheta=100.0 NumLoci=60 TFS=[] for i in range(NumLoci): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs) #tfs[2]
# mean number of segregating sites segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites
364.35
set_random_seed(12451116) np.random.seed(int(1234)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[-1.0,0.0,10.0]; GrowthList=[0.0,1.0,2.0,3.0,5.0,10.0]; ThetaList=[100.0]; results=[] Reps=200; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -4907.78167671551 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -4770.02044249150 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -4733.57552978068 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -4858.67216675298 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -4758.11451797914 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -4754.97193836299 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -4938.48773788207 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -4692.51974709802 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -4719.45055633706 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -5014.20031830045 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -4846.31369921827 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -4836.91856516081 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -5949.63834428161 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -5767.59346562225 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -5728.97568897941 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -13897.3116586207 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -13681.8415389646 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -infinity
# report the most likely values print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [200, 15, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 2.00000000000000, -4692.51974709802]
# dataset 8G - with increased n set_random_seed(122323533) np.random.seed(np.int(1211145)) n=30 N0=1 TrueBeta=0.0 TrueG=2.0 TrueTheta=100.0 NumLoci=60 TFS=[] for i in range(NumLoci): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs) #tfs[2]
# mean number of segregating sites segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites
462.516666667
set_random_seed(12451116) # we can find signal of growth and beta over theta (gHat=1.0 instead of trueG=2.0) np.random.seed(int(1234)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[-1.0,0.0,10.0]; GrowthList=[0.0,1.0,2.0,3.0,5.0,10.0]; ThetaList=[1.0,10.0,100.0,1000.0]; results=[] Reps=200; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,5000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) results
beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 0.000000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 0.000000000000000 , 0.0 , -16422.3009309419 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -16028.4737429655 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 0.000000000000000 , 0.0 , -26795.5988144158 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 0.000000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 0.000000000000000 , 0.0 , -16128.6146952371 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -15662.5259045307 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 0.000000000000000 , 0.0 , -26254.8450548033 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 0.000000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 0.000000000000000 , 0.0 , -16153.5678644303 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -15850.5218021710 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 0.000000000000000 , 0.0 , -26538.9522004612 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -15961.8514298296 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 1.00000000000000 , 0.0 , -27117.3003394868 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -15537.5894574872 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 1.00000000000000 , 0.0 , -26370.1676322233 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -15705.6247847785 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 1.00000000000000 , 0.0 , -26446.3182949772 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -16152.0144288948 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 2.00000000000000 , 0.0 , -26887.1974399624 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -15768.9988127428 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 2.00000000000000 , 0.0 , -26070.4776896200 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -15698.4926490336 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 2.00000000000000 , 0.0 , -26270.5375180653 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -16327.6260523775 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 3.00000000000000 , 0.0 , -26921.0641552768 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -15834.3765202690 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 3.00000000000000 , 0.0 , -26239.3022865899 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -15929.4729365962 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 3.00000000000000 , 0.0 , -26456.7767878908 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -17140.0109560034 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 5.00000000000000 , 0.0 , -26809.5379888034 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -16710.2692409932 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 5.00000000000000 , 0.0 , -26264.8777998098 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -17005.1235072424 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 5.00000000000000 , 0.0 , -26267.6976961831 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 10.0000000000000 , 0.0 , -26533.4086149551 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 10.0000000000000 , 0.0 , -26197.9966739432 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 10.0000000000000 , 0.0 , -26294.5184967989 [[200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 0.000000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 0.000000000000000, -16422.3009309419], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 0.000000000000000, -16028.4737429655], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 0.000000000000000, -26795.5988144158], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 0.000000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 0.000000000000000, -16128.6146952371], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 0.000000000000000, -15662.5259045307], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 0.000000000000000, -26254.8450548033], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 0.000000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 0.000000000000000, -16153.5678644303], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 0.000000000000000, -15850.5218021710], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 0.000000000000000, -26538.9522004612], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 1.00000000000000, -15961.8514298296], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 1.00000000000000, -27117.3003394868], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 1.00000000000000, -15537.5894574872], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 1.00000000000000, -26370.1676322233], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 1.00000000000000, -15705.6247847785], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 1.00000000000000, -26446.3182949772], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 2.00000000000000, -16152.0144288948], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 2.00000000000000, -26887.1974399624], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 2.00000000000000, -15768.9988127428], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 2.00000000000000, -26070.4776896200], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 2.00000000000000, -15698.4926490336], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 2.00000000000000, -26270.5375180653], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 3.00000000000000, -16327.6260523775], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 3.00000000000000, -26921.0641552768], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 3.00000000000000, -15834.3765202690], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 3.00000000000000, -26239.3022865899], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 3.00000000000000, -15929.4729365962], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 3.00000000000000, -26456.7767878908], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 5.00000000000000, -17140.0109560034], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 5.00000000000000, -26809.5379888034], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 5.00000000000000, -16710.2692409932], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 5.00000000000000, -26264.8777998098], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 5.00000000000000, -17005.1235072424], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 5.00000000000000, -26267.6976961831], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 10.0000000000000, -26533.4086149551], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 10.0000000000000, -26197.9966739432], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 10.0000000000000, -26294.5184967989]]
# report the most likely values print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 1.00000000000000, -15537.5894574872]

Producing data to test the algorithms under bottleneck model

# dataset B1 - RECENT MILD BOTTLENECK - SMALL SAMPLE SIZE set_random_seed(656005) np.random.seed(int(10043)) n=3 N0=1 TrueBeta=0 TrueA=0.05 TrueGap=0.1 TrueB=TrueA+TrueGap TrueEps=0.01 TrueTheta=50 NumLoci1=30 NumLoci2=100 TFS=[] for i in range(NumLoci2): tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites # inference set_random_seed(12456546) np.random.seed(int(12345)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci2)] BetaList=[0.0]; AList=[0.005,0.05,0.5,1.0,5.0]; EpsList=[0.001,0.01, 0.1,1.0,10]; ThetaList=[TrueTheta]; results=[] Reps=500; # number of particles for a in AList: b=a+TrueGap for eps in EpsList: btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci2,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][13]>results[MaxI][13]: MaxI=i results[MaxI]
OUTPUT: beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -3608.48793736 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -1141.76086703 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -581.719746685 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -762.958933525 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -997.965051971 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -1502.2515838 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -679.440596264 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -530.748263574 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -770.327024986 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -967.593805413 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -766.899089643 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -760.754601725 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -765.649526704 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -781.649732233 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -769.549791934 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -773.433328349 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -773.235962739 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -773.176668532 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -767.145018239 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -774.944687062 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -781.470446754 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -774.247268458 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -754.533162385 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -780.409887946 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -774.066273148 The ML parameters are: beta, mu, a, b, eps, N0, logLkl = 0.0, 50, 0.05, 0.15, 0.10, 1 , -530.748263574 true parameters are = 0.0, 50, 0.05, 0.15, 0.01, 1 so eps is over estimated.
# inference with less loci but more particles set_random_seed(12456546) np.random.seed(int(12345)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)] BetaList=[0.0]; AList=[0.005,0.05,0.5,1.0,5.0]; EpsList=[0.001,0.01, 0.1,1.0,10]; ThetaList=[TrueTheta]; results=[] Reps=1000; # number of particles for a in AList: b=a+TrueGap for eps in EpsList: btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci1,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -1163.51882255 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -365.696350645 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -173.175337965 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1.00000000000000 , 1 , -227.303776888 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 1 , -431.531782353 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -193.513111235 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -152.052859489 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -225.725399602 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -287.570401128 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -229.547239398 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -229.415438751 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -224.735694798 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -229.699559894 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -224.716661467 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -226.39264498 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -225.275990284 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -231.04895718 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -221.720299295 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -225.739136262 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -226.219278671 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -226.809838563 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -228.359573602 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -218.018749341 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -229.290564857
# dataset B2 - RECENT MILD BOTTLENECK - LARGER SAMPLE SIZE set_random_seed(656005) np.random.seed(int(10043)) n=20 N0=1 TrueBeta=0 TrueA=0.05 TrueGap=0.1 TrueB=TrueA+TrueGap TrueEps=0.01 TrueTheta=50 NumLoci=30 TFS=[] for i in range(NumLoci): tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites #inference set_random_seed(124546) np.random.seed(int(123545)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[0.0]; AList=[0.005,0.05,0.5,1.0,5.0]; EpsList=[0.001,0.01, 0.1,1.0,10] ThetaList=[TrueTheta]; results=[] Reps=500; # number of particles for a in AList: b=a+TrueGap for eps in EpsList: btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
47.8333333333 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -10891.5750864 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -4825.7295686 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -3846.83371819 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -4703.34086634 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -7525.41896187 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -5415.94989691 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -4043.44802175 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -3842.49433871 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -4268.33273154 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -3846.97192437 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -3865.06924946 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -3887.6771275 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -3814.16498995 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -3843.67431366 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -3871.95893156 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -3817.47356273 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -3879.2569377 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -3874.41545538 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -3859.62890493 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -3834.15887986 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -3895.44941551 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -3841.71901123 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -3827.64421938 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -3864.20955157
# dataset B3 - RECENT MILD BOTTLENECK -SMALL SAMPLE SIZE - HIGHER MUTATION RATE set_random_seed(656005) np.random.seed(int(10043)) n=10 N0=1 TrueBeta=0 TrueA=0.05 TrueGap=0.1 TrueB=TrueA+TrueGap TrueEps=0.01 TrueTheta=150 NumLoci=30 TFS=[] for i in range(NumLoci): tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites #inference set_random_seed(124546) np.random.seed(int(123545)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[0.0]; AList=[0.005,0.05,0.5,1.0,5.0]; EpsList=[0.001,0.01, 0.1,1.0,10] ThetaList=[TrueTheta]; results=[] Reps=500; # number of particles for a in AList: b=a+TrueGap for eps in EpsList: btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
100.633333333 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -4143.05110455 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -1565.313949 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -1521.12915228 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -1996.62453825 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -1931.59287118 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -1272.43522785 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -1537.94575198 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -1870.70083238 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -1525.7970105 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -1516.00736295 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -1532.82907382 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -1524.09678295 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -1508.19143274 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -1516.78782758 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -1504.09091572 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -1535.60390314 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -1510.11801896 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -1521.08752307 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -1521.71754983 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -1514.28750067 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -1531.01126246 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -1505.00522372 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -1533.90259185
# dataset B3bis - RECENT MILD BOTTLENECK -SMALL SAMPLE SIZE - EVEN HIGHER MUTATION RATE set_random_seed(656005) np.random.seed(int(10043)) n=10 N0=1 TrueBeta=0 TrueA=0.05 TrueGap=0.1 TrueB=TrueA+TrueGap TrueEps=0.01 TrueTheta=500 NumLoci=30 TFS=[] for i in range(NumLoci): tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites #inference set_random_seed(124546) np.random.seed(int(123545)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[0.0]; AList=[0.005,0.05,0.5,1.0,5.0]; EpsList=[0.001,0.01, 0.1,1.0,10] ThetaList=[TrueTheta]; results=[] Reps=500; # number of particles for a in AList: b=a+TrueGap for eps in EpsList: btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
367.7 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -6259.71012625 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -2004.72334607 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -1901.35655085 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -2394.39501464 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -2854.14393912 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -1638.08135101 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -1900.43610638 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -2352.27307662 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -1889.43841676 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -1887.05300588 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -1886.20850772 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -1920.82108268 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -1916.17760265 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -1901.56034954 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -1903.36337373 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -1892.45424539 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -1903.58549086 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -1875.72009448 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -1904.70426324 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -1883.57480008 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -1908.90330908 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -1883.73005132 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -1918.55885588
With small mutation rates
# dataset B4 - RECENT MILD BOTTLENECK WITH LOW MUTATION RATE - SAMPLE SIZE 3, 1000 LOCI set_random_seed(656005) np.random.seed(int(10043)) n=3 N0=1 # this is fixed at unity as in Hudson's ms TrueBeta=0 TrueA=0.05 TrueGap=0.1 TrueB=TrueA+TrueGap TrueEps=0.01 TrueTheta=5 NumLoci=1000 TFS=[] for i in range(NumLoci): tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites # inference set_random_seed(12456546) np.random.seed(int(12345)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[0.0]; AList=[0.005,0.05,0.5,1.0,5.0]; EpsList=[0.001,0.01, 0.1,1.0,10] ThetaList=[TrueTheta]; results=[] Reps=200; # number of particles for a in AList: b=a+TrueGap for eps in EpsList: btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
1.284 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -8199.16658344159 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -4845.14168480036 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -2003.64279794536 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -3325.75934805400 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -4173.86472893806 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -5531.93157332962 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -3343.26549075243 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -1990.78719179484 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -3324.88864932211 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -4026.85032236171 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -3442.50066084301 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -3356.50634363227 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -3233.20706100601 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -3336.05667148710 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -3337.73846438619 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -3415.27071905189 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -3376.21020027055 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -3315.83668731960 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -3348.51498307126 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -3330.94713321818 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -3335.65005101939 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -3309.76392134207 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -3328.76412119589 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -3350.04170366840 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -3323.10652571638
We don't detect non-existant bottlenecks
# dataset B5 - NO BOTTLENECKS - SAMPLE SIZE 5, 100 LOCI set_random_seed(656005) np.random.seed(int(10043)) n=5 N0=1 # this is fixed at unity as in Hudson's ms TrueBeta=0 TrueA=0.05 #dummy TrueGap=0.1 #dummy TrueB=TrueA+TrueGap TrueEps=1.0 #no reduction in pop size TrueTheta=5 NumLoci=100 TFS=[] for i in range(NumLoci): tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites #inference set_random_seed(12456546) np.random.seed(int(12345)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[0.0]; AList=[0.005,0.05,0.5,1.0,5.0]; EpsList=[0.001,0.01, 0.1,1.0,10] ThetaList=[TrueTheta]; results=[] Reps=1000; # number of particles for a in AList: b=a+TrueGap for eps in EpsList: btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
19.71 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -14893.7528941 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -5307.23388516 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -1017.54379745 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -377.626413346 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -371.38288873 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -23592.9313808 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -4618.22388074 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -833.920280682 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -366.136998204 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -368.620578748 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -5251.96210255 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -1152.24064874 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -469.945393142 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -378.677354715 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -374.894630666 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -2053.68035855 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -698.33914174 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -405.69827546 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -358.672249758 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -385.701546557 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -377.856904102 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -383.864605249 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -370.258771678 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -353.622678588 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -372.561354959
# dataset B6 - NO BOTTLENECKS - SAMPLE SIZE 10, 30 LOCI set_random_seed(656) np.random.seed(int(143)) n=10 N0=1 # this is fixed at unity as in Hudson's ms TrueBeta=0 TrueA=0.05 #dummy TrueGap=0.1 #dummy TrueB=TrueA+TrueGap TrueEps=1.0 #no reduction in pop size TrueTheta=5 NumLoci=30 TFS=[] for i in range(NumLoci): tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n) TFS.append(tfs) # mean number of segregating sites segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)]; meanSegSites=mean(segSitesAcrossLoci); print meanSegSites #inference set_random_seed(12456) np.random.seed(int(15)) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)] BetaList=[0.0]; AList=[0.005,0.05,0.5,1.0,5.0]; EpsList=[0.001,0.01, 0.1,1.0,10] ThetaList=[TrueTheta]; results=[] Reps=1000; # number of particles for a in AList: b=a+TrueGap for eps in EpsList: btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps) results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
30.0333333333 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -4020.72436877 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -1294.12377314 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -463.904646526 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -435.304203016 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -4286.62612305 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -1046.51737715 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -455.09475188 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -423.612616743 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -2541.73302458 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -936.115534231 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -544.935290976 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -452.606864125 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -483.952790351 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -1092.86592868 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -659.687709815 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -475.99086912 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -438.91106984 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -465.69934625 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -461.106491242 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -458.676482539 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -453.921258108 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -461.540988399 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -430.72283051

Simulating Complex Demographical and Structural Scenarios for β\beta-Effective Population Size (β,Ne)(\beta, N_e)

The next cell is just testing if ms is already installed.

If not run the second cell below which will be downloading and compiling a slightly augmented version of Hudson's ms (just for extracting SFS from the the binary incidence matrices).

There is no need to run the second cell below is ms is already installed in /home/user/msdir/ms/.

1
1
%sh ms 5 2 -T
ms 5 2 -T 48467 38575 3753 // (5:1.335576295853,(3:0.165384739637,(2:0.131984353065,(1:0.024002499878,4:0.024002499878):0.107981853187):0.033400386572):1.170191526413); // ((1:0.169807076454,3:0.169807076454):0.590738654137,(4:0.208807185292,(2:0.181732580066,5:0.181732580066):0.027074605227):0.551738560200);
%sh rm -rf /home/user/msdir && cd /home/user && wget https://github.com/lamastex/lce/raw/master/msSimulations/msdir.tar.gz && tar zxvf msdir.tar.gz && cd msdir && gcc -O3 -o ms ms.c streec.c rand1.c -lm && gcc -o sample_statsSFS sample_statsSFS.c tajd.c -lm export PATH="/home/user/msdir:$PATH" ls -al
--2018-02-12 21:38:14-- https://github.com/lamastex/lce/raw/master/msSimulations/msdir.tar.gz Resolving github.com (github.com)... 192.30.253.112, 192.30.253.113 Connecting to github.com (github.com)|192.30.253.112|:443... connected. HTTP request sent, awaiting response... 302 Found Location: https://raw.githubusercontent.com/lamastex/lce/master/msSimulations/msdir.tar.gz [following] --2018-02-12 21:38:15-- https://raw.githubusercontent.com/lamastex/lce/master/msSimulations/msdir.tar.gz Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ... Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 402688 (393K) [application/octet-stream] Saving to: ‘msdir.tar.gz.6’ msdir.tar.gz.6 100%[===================>] 393.25K --.-KB/s in 0.004s 2018-02-12 21:38:15 (86.8 MB/s) - ‘msdir.tar.gz.6’ saved [402688/402688] msdir/ msdir/._rand1.c msdir/.DS_Store msdir/streec.c msdir/params msdir/rand1t.c msdir/R_example_1/ msdir/R_example_1/thetabayesfig.pdf msdir/R_example_1/._thetabayesfig.pdf msdir/R_example_1/thetabayes.R msdir/R_example_1/._thetabayes.R msdir/._microsat.c msdir/out msdir/._msdoc.pdf msdir/tst msdir/._R_example_3 msdir/._streec.c msdir/._R_example_1 msdir/sample_statsSFS.c msdir/._readms.output.R msdir/R_example_2/ msdir/R_example_2/._demog_infer_simple.R msdir/R_example_2/._demoginfersimplefig.pdf msdir/R_example_2/demoginfersimplefig.pdf msdir/R_example_2/demog_infer_simple.R msdir/._clms msdir/._stats.c msdir/seedms msdir/readms.output.R msdir/._sample_stats.c msdir/ms.c msdir/time2epoch.py msdir/._seedms msdir/ort msdir/rand2t.c msdir/._params msdir/microsat.c msdir/msdoc.pdf msdir/rand2.c msdir/tst2 msdir/._col1 msdir/._readme msdir/readme msdir/ms.h msdir/sample_stats.c msdir/tajd.c msdir/._rand2t.c msdir/._rand2.c msdir/._migmat msdir/._tajd.c msdir/._.DS_Store msdir/._ms.h msdir/dist3.c msdir/stats.c msdir/._dist3.c msdir/._ms.c msdir/R_example_3/ msdir/R_example_3/demog_infer2d.R msdir/R_example_3/demoginfer2dfig2.pdf msdir/R_example_3/._demoginfer2dfig2.pdf msdir/R_example_3/._demog_infer2d.R msdir/._rand1t.c msdir/migmat msdir/clms msdir/R_example_4/ msdir/R_example_4/._demoginfermcmcfig1.pdf msdir/R_example_4/demog_infer_mcmc.R msdir/R_example_4/demoginfermcmcfig2.pdf msdir/R_example_4/demoginfermcmcfig1.pdf msdir/R_example_4/._demoginfermcmcfig2.pdf msdir/R_example_4/._demog_infer_mcmc.R msdir/col1 msdir/._R_example_2 msdir/rand1.c msdir/ms.out msdir/ms msdir/sample_statsSFS msdir/._R_example_4 msdir/tst2Ts
ms.c: In function ‘main’:
ms.c:148:42: warning: ignoring return value of ‘scanf’, declared with attribute warn_unused_result [-Wunused-result]
if( ntbs > 0 ) for( k=0; k<ntbs; k++) scanf(" %s", tbsparamstrs[k] );
                                          ^
sample_statsSFS.c:14:1: warning: return type defaults to ‘int’ [-Wimplicit-int]
main(argc,argv)
 ^
sample_statsSFS.c: In function ‘main’:
sample_statsSFS.c:67:9: warning: implicit declaration of function ‘biggerlist’ [-Wimplicit-function-declaration]
biggerlist(nsam,maxsites, list) ;
         ^
total 428
drwxr-x--- 6 user user     62 Feb 12 21:38 .
drwx------ 6 user user     29 Feb 12 21:38 ..
-rw-r----- 1 user user 6148 Nov 8 22:39 .DS_Store -rw-r----- 1 user user 222 Nov 8 22:39 ._.DS_Store
-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_1
-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_2
-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_3
-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_4
-rwxr----- 1 user user    222 Mar  9  2003 ._clms
-rwxr----- 1 user user    222 Jan  5  2004 ._col1
-rw-r----- 1 user user 222 Nov 5 2006 ._dist3.c -rw-r----- 1 user user 222 Nov 7 2006 ._microsat.c -rw-r----- 1 user user 222 Mar 9 2003 ._migmat -rw-r--r-- 1 user user 975 Nov 8 20:23 ._ms.c -rw-r----- 1 user user 514 Nov 8 19:15 ._ms.h -rw-r--r-- 1 user user 177 Oct 16 19:03 ._msdoc.pdf -rw-r----- 1 user user 222 Mar 8 2003 ._params -rw-r----- 1 user user 222 May 3 2007 ._rand1.c -rw-r----- 1 user user 222 May 3 2007 ._rand1t.c -rw-r----- 1 user user 222 May 3 2007 ._rand2.c -rw-r----- 1 user user 222 May 3 2007 ._rand2t.c -rw-r----- 1 user user 222 Mar 8 2003 ._readme -rw-r----- 1 user user 222 May 31 2007 ._readms.output.R -rw-r----- 1 user user 222 May 21 2007 ._sample_stats.c -rw-r----- 1 user user 222 Feb 4 2010 ._seedms -rw-r----- 1 user user 222 Mar 8 2003 ._stats.c -rw-r----- 1 user user 958 Mar 22 2017 ._streec.c -rw-r----- 1 user user 222 Mar 8 2003 ._tajd.c
drwxr-x--- 2 user user      6 Jan 11 07:17 R_example_1
drwxr-x--- 2 user user      6 Jan 11 07:17 R_example_2
drwxr-x--- 2 user user      6 Jan 11 07:17 R_example_3
drwxr-x--- 2 user user      8 Jan 11 07:17 R_example_4
-rwxr----- 1 user user     43 Mar  9  2003 clms
-rwxr----- 1 user user     23 Jan  5  2004 col1
-rw-r----- 1 user user 1531 Nov 5 2006 dist3.c -rw-r----- 1 user user 2937 Nov 7 2006 microsat.c -rw-r----- 1 user user 69 Mar 9 2003 migmat
-rwxr-xr-x 1 user user  53248 Feb 12 21:38 ms
-rw-r--r-- 1 user user 36485 Jan 12 12:46 ms.c -rw-r----- 1 user user 1226 Nov 8 19:15 ms.h -rw-r--r-- 1 user user 534 Jan 11 07:19 ms.out -rw-r--r-- 1 user user 226165 Oct 16 19:03 msdoc.pdf -rw-r--r-- 1 user user 110655 Jan 12 08:12 ort -rw-r--r-- 1 user user 1050 Jan 11 15:28 out -rw-r----- 1 user user 16 Mar 8 2003 params -rw-r----- 1 user user 1225 May 3 2007 rand1.c -rw-r----- 1 user user 821 May 3 2007 rand1t.c -rw-r----- 1 user user 816 May 3 2007 rand2.c -rw-r----- 1 user user 580 May 3 2007 rand2t.c -rw-r----- 1 user user 1386 Mar 8 2003 readme -rw-r----- 1 user user 3501 May 31 2007 readms.output.R -rw-r----- 1 user user 4675 May 21 2007 sample_stats.c
-rwxr-xr-x 1 user user  17976 Feb 12 21:38 sample_statsSFS
-rw-r----- 1 user user 5723 Jan 11 16:19 sample_statsSFS.c -rw-r----- 1 user user 17 Jan 12 13:23 seedms -rw-r----- 1 user user 1751 Mar 8 2003 stats.c -rw-r----- 1 user user 22032 Mar 22 2017 streec.c -rw-r----- 1 user user 1858 Mar 8 2003 tajd.c -rw-r--r-- 1 user user 1183 Jan 12 13:20 time2epoch.py -rw-r--r-- 1 user user 2419 Jan 12 13:23 tst -rw-r--r-- 1 user user 71 Jan 12 06:31 tst2 -rw-r--r-- 1 user user 49 Jan 12 08:06 tst2Ts
%sh ms
Too few command line arguments usage: ms nsam howmany Options: -t theta (this option and/or the next must be used. Theta = 4*N0*u ) -s segsites ( fixed number of segregating sites) -T (Output gene tree.) -F minfreq Output only sites with freq of minor allele >= minfreq. -r rho nsites (rho here is 4Nc) -c f track_len (f = ratio of conversion rate to rec rate. tracklen is mean length.) if rho = 0., f = 4*N0*g, with g the gene conversion rate. -G alpha ( N(t) = N0*exp(-alpha*t) . alpha = -log(Np/Nr)/t -I npop n1 n2 ... [mig_rate] (all elements of mig matrix set to mig_rate/(npop-1) -m i j m_ij (i,j-th element of mig matrix set to m_ij.) -ma m_11 m_12 m_13 m_21 m_22 m_23 ...(Assign values to elements of migration matrix.) -n i size_i (popi has size set to size_i*N0 -g i alpha_i (If used must appear after -M option.) The following options modify parameters at the time 't' specified as the first argument: -eG t alpha (Modify growth rate of all pop's.) -eg t i alpha_i (Modify growth rate of pop i.) -eM t mig_rate (Modify the mig matrix so all elements are mig_rate/(npop-1) -em t i j m_ij (i,j-th element of mig matrix set to m_ij at time t ) -ema t npop m_11 m_12 m_13 m_21 m_22 m_23 ...(Assign values to elements of migration matrix.) -eN t size (Modify pop sizes. New sizes = size*N0 ) -en t i size_i (Modify pop size of pop i. New size of popi = size_i*N0 .) -es t i proportion (Split: pop i -> pop-i + pop-npop, npop increases by 1. proportion is probability that each lineage stays in pop-i. (p, 1-p are admixt. proport. Size of pop npop is set to N0 and alpha = 0.0 , size and alpha of pop i are unchanged. -ej t i j ( Join lineages in pop i and pop j into pop j size, alpha and M are unchanged. -eA t popi num ( num Ancient samples from popi at time t -f filename ( Read command line arguments from file filename.) -p n ( Specifies the precision of the position output. n is the number of digits after the decimal.) -a (Print out allele ages.) See msdoc.pdf for explanation of these parameters. ms

To get SFS from ms output do as follows

n=15; NumLoci=100; TrueTheta=10; # make sure these agree with `ms n NumLoci -t TrueTheta` in the `%sh mc .. ` cell below N0=1.0; TrueG=0.0; TrueBeta=0;
%sh ms 15 100 -t 10.0 | sample_statsSFS > output
%sh pwd head -n 2 output
/home/user/msdir 10 4 6 1 0 8 0 0 2 0 0 0 0 0 23 1 0 1 2 0 0 0 0 0 0 0 0 2
msSfsDataRaw = np.genfromtxt('/home/user/msdir/output', delimiter=' ') msSfsMultiLocusData = [np.insert( np.insert(a, -1, 0., axis=0), 0, 0., axis=0) for a in msSfsDataRaw] #msSfsMultiLocusData # the Watterson estimator of theta under panmictic Kingman prior print (mean([np.sum(msSfsMultiLocusData[j]) for j in range(0,NumLoci)])) / (sum(np.array([1.0/i for i in range(1,n,1)])))
9.86602647531
TrueBeta=0; TFS=[] for i in range(NumLoci): tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n) TFS.append(tfs) sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
#print sfsMultiLocusData print (mean([np.sum(msSfsMultiLocusData[j]) for j in range(0,NumLoci)])) / (sum(np.array([1.0/i for i in range(1,n,1)])))
10.1120620483
# dataset 1G (REPEATED via ms for testing purposes) - KINGMAN'S COALESCENT WITH NO GROWTH # Likelihood based on the full list of SFS set_random_seed(9811545) np.random.seed(int(918675)) BetaList=[-1.0,0.0,10.0]; GrowthList=[0.0]; ThetaList=[1,10,100]; results=[] Reps=20; # number of particles for g in GrowthList: #GrRates = GetRatesForGrowth(g,n,1000) GrRates=[0.,0.]; for i in range(2,n+1,1): GrRates.append(1.0*binomial(i,2)) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!! #print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta) print([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI] # from multiple replicate datasets #[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] # [20, 15, 100, 0, 0.0, 10, 0.0, 10, 0.0, -5093.2238698216834] # [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5147.4560231561763]
[20, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 1, 0.000000000000000, -8575.5514680327287] [20, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 10, 0.000000000000000, -5217.8312013003724] [20, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 100, 0.000000000000000, -7626.3498637938847] [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 1, 0.000000000000000, -8479.243014885189] [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5147.4560231561763] [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 100, 0.000000000000000, -7780.1337279236359] [20, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 1, 0.000000000000000, -8562.8114291459169] [20, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 10, 0.000000000000000, -5268.6403181905471] [20, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 100, 0.000000000000000, -8040.3624073076298] [Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5147.4560231561763]
# dataset 1G (REPEATED via ms for testing purposes) - KINGMAN'S COALESCENT WITH NO GROWTH # Likelihood based on the full list of SFS set_random_seed(9871545) np.random.seed(int(918675)) BetaList=[-1.0,0.0,10.0]; GrowthList=[0.0]; ThetaList=[1,10,100]; results=[] Reps=100; # number of particles for g in GrowthList: #GrRates = GetRatesForGrowth(g,n,1000) GrRates=[0.,0.]; for i in range(2,n+1,1): GrRates.append(1.0*binomial(i,2)) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!! #print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta) print([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI] # from multiple replicate datasets #[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] # [20, 15, 100, 0, 0.0, 10, 0.0, 10, 0.0, -5093.2238698216834] # [100, 15, 100, 0, 0.0, 10, 0.0, 10, 0.0, -4719.0559858475472]
[100, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 1, 0.000000000000000, -8084.2905567486741] [100, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 10, 0.000000000000000, -4803.2402300140157] [100, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 100, 0.000000000000000, -7226.7640817544534] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 1, 0.000000000000000, -8034.6893234419967] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -4719.0559858475472] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -4719.0559858475472] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 100, 0.000000000000000, -7344.1201747338246] [100, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 1, 0.000000000000000, -8187.6802601310701] [100, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 10, 0.000000000000000, -4881.8177457366373] [100, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 100, 0.000000000000000, -7583.3041235116634] [Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -4719.0559858475472]
Reps=800; beta=0.0; theta=10.0; growth=0.0; GrRates=[0.,0.]; for i in range(2,n+1,1): GrRates.append(1.0*binomial(i,2)) print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]") for i in range(5): logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps) print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci]) ''' [Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci] [10, 15, 100, 0.000000000000000, 10.0000000000000, -5458.7347191116787] [10, 15, 100, 0.000000000000000, 10.0000000000000, -5254.6231241170144] [10, 15, 100, 0.000000000000000, 10.0000000000000, -5297.870156684322] [10, 15, 100, 0.000000000000000, 10.0000000000000, -5396.4229009903611] [10, 15, 100, 0.000000000000000, 10.0000000000000, -5294.8534748499615] [100, 15, 100, 0.000000000000000, 10.0000000000000, -4637.1829885643956] [100, 15, 100, 0.000000000000000, 10.0000000000000, -4575.514309145261] [100, 15, 100, 0.000000000000000, 10.0000000000000, -4612.6721939365625] [100, 15, 100, 0.000000000000000, 10.0000000000000, -4643.4324787908445] [100, 15, 100, 0.000000000000000, 10.0000000000000, -4647.9556338215662] [200, 15, 100, 0.000000000000000, 10.0000000000000, -4542.2875568237114] [200, 15, 100, 0.000000000000000, 10.0000000000000, -4532.9762385300965] [200, 15, 100, 0.000000000000000, 10.0000000000000, -4538.9995541760582] [200, 15, 100, 0.000000000000000, 10.0000000000000, -4518.7981532977683] [200, 15, 100, 0.000000000000000, 10.0000000000000, -4464.6882092921878] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4253.587709948677] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4285.4570525672298] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4229.0499889288321] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4306.3893655985867] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4315.2945672085107] '''
[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4253.587709948677] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4285.4570525672298] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4229.0499889288321] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4306.3893655985867] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4315.2945672085107] '\n[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5458.7347191116787]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5254.6231241170144]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5297.870156684322]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5396.4229009903611]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5294.8534748499615]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4637.1829885643956]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4575.514309145261]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4612.6721939365625]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4643.4324787908445]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4647.9556338215662]\n\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4542.2875568237114]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4532.9762385300965]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4538.9995541760582]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4518.7981532977683]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4464.6882092921878]\n\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115]\n'
Reps=400 for i in range(5): logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps) print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115]
# dataset 1G (REPEATED via ms for testing purposes) - KINGMAN'S COALESCENT WITH NO GROWTH # Likelihood based on the full list of SFS set_random_seed(987545) np.random.seed(int(18675)) BetaList=[-1.0,0.0,10.0]; GrowthList=[0.0,1.0,10.0]; ThetaList=[1,10,100]; results=[] Reps=20; # number of particles for g in GrowthList: GrRates = GetRatesForGrowth(g,n,1000) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!! print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta) results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][9]>results[MaxI][9]: MaxI=i results[MaxI]
%time # using same SFS data as above with standard coalescent from scipy.optimize import differential_evolution Reps=100; # number of particles with 100 is taking too long in this naive implementation, but REPS=20 has too much vriance in likelihood estimate! parametersAndLogLkl=[] for g in [0]: #GrRates = GetRatesForGrowth(g,n,1000) # slow - when g=0 we can do it analytically as below GrRates=[0.,0.]; for i in range(2,n+1,1): GrRates.append(1.0*binomial(i,2)) #logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) def negLkl(BT): f = -LklOfBetaThetaGrowthRates(RR(BT[0]),RR(BT[1]/4.0),g,GrRates,n,msSfsMultiLocusData,Reps); parametersAndLogLkl.append((BT[0],BT[1], -f)) # track the parameters for each likihood evaluation #print str(BT[0])+" , "+str(BT[1])+" : "+str(f) return f #negLogLkl_SplitPairCounts2(spc,B) bounds = [(-0.99,20.0),(1.0,100.0)] def mycallback(xk, convergence): print "mycallback "+str(xk)+" , "+ str(convergence) return convergence>1.0 result = differential_evolution(negLkl, bounds, disp=True, popsize=15, callback=mycallback, recombination=0.7, maxiter=100) result.x, result.fun #### see results in markdown for various runs of the same dataset
differential_evolution step 1: f(x)= 4840.34 mycallback [-0.07549672 8.09048459] , 0.0799216743708 differential_evolution step 2: f(x)= 4717.08 mycallback [ 1.27948965 11.49985163] , 0.0890399767062 differential_evolution step 3: f(x)= 4717.08 mycallback [ 1.27948965 11.49985163] , 0.117261285127 differential_evolution step 4: f(x)= 4717.08
REPS=100 differential_evolution step 1: f(x)= 4840.34 mycallback [-0.07549672 8.09048459] , 0.0799216743708 differential_evolution step 2: f(x)= 4717.08 mycallback [ 1.27948965 11.49985163] , 0.0890399767062 differential_evolution step 3: f(x)= 4717.08 mycallback [ 1.27948965 11.49985163] , 0.117261285127 differential_evolution step 4: f(x)= 4717.08

With just 10 particles (REPS=10) there is too much uncertainty in likelihood estimates for stable convergence of the stochastic optimisation algorithm.

REPS=10 differential_evolution step 1: f(x)= 5501.14 mycallback [ 6.67340526 14.94882129] , 0.0856814618557 differential_evolution step 2: f(x)= 5454.21 mycallback [ 19.73690787 11.70403212] , 0.102796928814 differential_evolution step 3: f(x)= 5386.65 mycallback [ 3.73906694 15.47908072] , 0.198542855185 differential_evolution step 4: f(x)= 5386.65 mycallback [ 3.73906694 15.47908072] , 0.260265815629 differential_evolution step 5: f(x)= 5375.97differential_evolution step 1: f(x)= 5501.14 mycallback [ 6.67340526 14.94882129] , 0.0856814618557 differential_evolution step 2: f(x)= 5454.21 mycallback [ 19.73690787 11.70403212] , 0.102796928814 differential_evolution step 3: f(x)= 5386.65 mycallback [ 3.73906694 15.47908072] , 0.198542855185 differential_evolution step 4: f(x)= 5386.65 mycallback [ 3.73906694 15.47908072] , 0.260265815629 differential_evolution step 5: f(x)= 5375.97 mycallback [ 5.91624121 14.41968427] , 1.01879779978 (array([ 5.91624121, 14.41968427]), 5375.9717345313738) CPU time: 5724.49 s, Wall time: 5794.37 s
%time from scipy.optimize import rosen, differential_evolution Reps=100; # number of particles with 100 is taking too long in this naive implementation, but REPS=20 has too much vriance in likelihood estimate! parametersAndLogLkl=[] for g in [0]: #GrRates = GetRatesForGrowth(g,n,1000) # slow - when g=0 we can do it analytically as below GrRates=[0.,0.]; for i in range(2,n+1,1): GrRates.append(1.0*binomial(i,2)) #logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) def negLkl(BT): f = -LklOfBetaThetaGrowthRates(RR(BT[0]),RR(BT[1]/4.0),g,GrRates,n,msSfsMultiLocusData,Reps); parametersAndLogLkl.append((BT[0],BT[1], -f)) # track the parameters for each likihood evaluation #print str(BT[0])+" , "+str(BT[1])+" : "+str(f) return f #negLogLkl_SplitPairCounts2(spc,B) bounds = [(-0.99,20.0),(1.0,150.0)] def mycallback(xk, convergence): print "mycallback "+str(xk)+" , "+ str(convergence) return convergence>1.0 result = differential_evolution(negLkl, bounds, disp=True, popsize=25, callback=mycallback, recombination=0.7, maxiter=100) # Reps=10 : popsize=15, recombination=0.7, maxiter=10 (array([ 5.89973178, 61.12517964]), 24876.259063730031) # Reps =10; beta, mu, g, lkl, logLkl = 8.27364821134604 , 17.2526514877720 , 0.000000000000000 , 0.0 , -24674.2850081 #bounds = [(0,2), (0, 2)] #result = differential_evolution(rosen, bounds) result.x, result.fun #### see results in markdown for various runs of the same dataset
from scipy.optimize import rosen, differential_evolution # this is very slow even with 10 particles .... :( but gets the ball-park answer betaHat,thetaHat, -logLkl = [ -0.02068417, 13.1918302 ]), 5217.4830580740918 Reps=2; # number of particles with 100 is taking too long in this naive implementation for g in GrowthList: #GrRates = GetRatesForGrowth(g,n,1000) # slow - when g=0 we can do it analytically as below GrRates=[0.,0.]; for i in range(2,n+1,1): GrRates.append(1.0*binomial(i,2)) #logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) def negLkl(BT): f = -LklOfBetaThetaGrowthRates(RR(BT[0]),RR(BT[1]/4.0),g,GrRates,n,msSfsMultiLocusData,Reps) #print BT[0],BT[1],f return f #negLogLkl_SplitPairCounts2(spc,B) bounds = [(-0.99,10.0),(1.0,50.0)] result = differential_evolution(negLkl, bounds, disp=True, popsize=10,maxiter=2) #bounds = [(0,2), (0, 2)] #result = differential_evolution(rosen, bounds) result.x, result.fun
beta, mu, g, lkl, logLkl = 0.192638876372166 , 3.10305863202379 , 0.000000000000000 , 0.0 , -5959.36028135 beta, mu, g, lkl, logLkl = 2.67132512883625 , 12.4986014807133 , 0.000000000000000 , 0.0 , -7372.41751058 beta, mu, g, lkl, logLkl = 7.49799987485964 , 1.97524202923657 , 0.000000000000000 , 0.0 , -6026.14589091 beta, mu, g, lkl, logLkl = -0.0285043371578597 , 7.46666808400616 , 0.000000000000000 , 0.0 , -6685.24590411 beta, mu, g, lkl, logLkl = 3.34795982225950 , 11.6497056655876 , 0.000000000000000 , 0.0 , -7278.767204 beta, mu, g, lkl, logLkl = 2.23636457002183 , 8.67266627919945 , 0.000000000000000 , 0.0 , -6876.02919136 beta, mu, g, lkl, logLkl = 3.78436729022016 , 6.09873216236385 , 0.000000000000000 , 0.0 , -6412.68424573 beta, mu, g, lkl, logLkl = 8.50757204007902 , 9.92991203928796 , 0.000000000000000 , 0.0 , -7113.78218942 beta, mu, g, lkl, logLkl = 6.06243292729823 , 0.548945366636503 , 0.000000000000000 , 0.0 , -7565.39248692 beta, mu, g, lkl, logLkl = 4.68965901099694 , 3.66388200572125 , 0.000000000000000 , 0.0 , -6246.55179671 beta, mu, g, lkl, logLkl = 4.35688866699788 , 10.6428001005425 , 0.000000000000000 , 0.0 , -7110.14703741 beta, mu, g, lkl, logLkl = 9.16936331214881 , 6.72651133507723 , 0.000000000000000 , 0.0 , -6544.03642779 beta, mu, g, lkl, logLkl = 9.85781452108139 , 1.30484065019712 , 0.000000000000000 , 0.0 , -6328.42461171 beta, mu, g, lkl, logLkl = 0.816395524051414 , 2.36332553286671 , 0.000000000000000 , 0.0 , -6016.30390969 beta, mu, g, lkl, logLkl = 7.21413656472797 , 9.31191598328838 , 0.000000000000000 , 0.0 , -7082.62897066 beta, mu, g, lkl, logLkl = 6.33536935538389 , 10.9597156910161 , 0.000000000000000 , 0.0 , -7149.73061066 beta, mu, g, lkl, logLkl = 8.27055336661729 , 4.97902583659657 , 0.000000000000000 , 0.0 , -6311.30477347 beta, mu, g, lkl, logLkl = 1.35918783360545 , 7.96286047667110 , 0.000000000000000 , 0.0 , -6628.6018027 beta, mu, g, lkl, logLkl = -0.466719496508359 , 5.57985090985656 , 0.000000000000000 , 0.0 , -6575.92260353 beta, mu, g, lkl, logLkl = 5.18711156639093 , 4.37825362756296 , 0.000000000000000 , 0.0 , -6322.40115164 beta, mu, g, lkl, logLkl = 1.67494249329244 , 9.67447060439608 , 0.000000000000000 , 0.0 , -6985.4346397 beta, mu, g, lkl, logLkl = 7.69490521727455 , 0.689216967151480 , 0.000000000000000 , 0.0 , -7275.37049679 beta, mu, g, lkl, logLkl = 3.49039227568789 , 10.3317546849555 , 0.000000000000000 , 0.0 , -7087.19841019 beta, mu, g, lkl, logLkl = 6.92038038979015 , 5.26746915289674 , 0.000000000000000 , 0.0 , -6313.42500847 beta, mu, g, lkl, logLkl = 6.95244948857194 , 3.45260834656504 , 0.000000000000000 , 0.0 , -6130.01887935 beta, mu, g, lkl, logLkl = -0.503205832268426 , 0.397528088764742 , 0.000000000000000 , 0.0 , -8442.41312608 beta, mu, g, lkl, logLkl = 3.78436729022016 , 1.35255462808006 , 0.000000000000000 , 0.0 , -6428.62478199 beta, mu, g, lkl, logLkl = 4.67615534518670 , 3.67883562201361 , 0.000000000000000 , 0.0 , -6209.3237204 beta, mu, g, lkl, logLkl = 6.06243292729823 , 8.23010766628102 , 0.000000000000000 , 0.0 , -6811.31764995 beta, mu, g, lkl, logLkl = 2.73429535133285 , 1.56861914031829 , 0.000000000000000 , 0.0 , -6179.49330062 beta, mu, g, lkl, logLkl = 2.96991796963890 , 3.64417865440990 , 0.000000000000000 , 0.0 , -6251.37216326 beta, mu, g, lkl, logLkl = 3.96307698196270 , 6.72651133507723 , 0.000000000000000 , 0.0 , -6590.47598199 beta, mu, g, lkl, logLkl = 5.16361988756579 , 10.8458915475144 , 0.000000000000000 , 0.0 , -7221.92190556 beta, mu, g, lkl, logLkl = 1.40874977574457 , 2.84325557950820 , 0.000000000000000 , 0.0 , -5991.35443199 beta, mu, g, lkl, logLkl = 0.00365597770044435 , 2.30213555712210 , 0.000000000000000 , 0.0 , -6134.61393871 beta, mu, g, lkl, logLkl = 8.71089382474266 , 1.40613546192693 , 0.000000000000000 , 0.0 , -6361.96423749 beta, mu, g, lkl, logLkl = 7.18125606152488 , 4.97902583659657 , 0.000000000000000 , 0.0 , -6309.35420598 beta, mu, g, lkl, logLkl = 1.57668630043092 , 7.96286047667110 , 0.000000000000000 , 0.0 , -6765.12718023 beta, mu, g, lkl, logLkl = -0.744870852752415 , 9.24946143904115 , 0.000000000000000 , 0.0 , -7133.00470978 beta, mu, g, lkl, logLkl = 5.18711156639093 , 4.55582647046258 , 0.000000000000000 , 0.0 , -6297.25132715 differential_evolution step 1: f(x)= 5959.36 beta, mu, g, lkl, logLkl = 0.192638876372166 , 5.33506215577862 , 0.000000000000000 , 0.0 , -6233.43365021 beta, mu, g, lkl, logLkl = 1.66364081833354 , 5.11887655987214 , 0.000000000000000 , 0.0 , -6262.96622871 beta, mu, g, lkl, logLkl = 0.338538343347064 , 4.38826218742826 , 0.000000000000000 , 0.0 , -6367.89190528 beta, mu, g, lkl, logLkl = 3.14236397409478 , 3.35484287574148 , 0.000000000000000 , 0.0 , -6104.87506939 beta, mu, g, lkl, logLkl = 5.86812039065937 , 5.84266857414679 , 0.000000000000000 , 0.0 , -6520.56924811 beta, mu, g, lkl, logLkl = 2.23636457002183 , 0.827913436542197 , 0.000000000000000 , 0.0 , -6952.95971211 beta, mu, g, lkl, logLkl = 7.25750359508364 , 3.99293525185519 , 0.000000000000000 , 0.0 , -6087.1047052 beta, mu, g, lkl, logLkl = 3.86074216660030 , 5.37814437768196 , 0.000000000000000 , 0.0 , -6231.30999424 beta, mu, g, lkl, logLkl = 3.81423543974023 , 3.81494975942578 , 0.000000000000000 , 0.0 , -6042.67108275 beta, mu, g, lkl, logLkl = 3.69353766263250 , 2.46897426036644 , 0.000000000000000 , 0.0 , -6142.37195855 beta, mu, g, lkl, logLkl = 1.11747214582904 , 4.39525471785516 , 0.000000000000000 , 0.0 , -6180.68009054 beta, mu, g, lkl, logLkl = -0.145170418590930 , 4.01785603039970 , 0.000000000000000 , 0.0 , -6256.13525601 beta, mu, g, lkl, logLkl = 9.85781452108139 , 3.04252144682377 , 0.000000000000000 , 0.0 , -6092.50468237 beta, mu, g, lkl, logLkl = 1.45813066782271 , 6.55180558519940 , 0.000000000000000 , 0.0 , -6667.19188988 beta, mu, g, lkl, logLkl = 3.69548178858569 , 2.58566058385317 , 0.000000000000000 , 0.0 , -6176.88539186 beta, mu, g, lkl, logLkl = 2.36042425504495 , 2.71042836372860 , 0.000000000000000 , 0.0 , -6025.08222264 beta, mu, g, lkl, logLkl = -0.722998687542751 , 4.19536282629316 , 0.000000000000000 , 0.0 , -6278.88214632 beta, mu, g, lkl, logLkl = 1.35918783360545 , 4.36786205432543 , 0.000000000000000 , 0.0 , -6252.91619478 beta, mu, g, lkl, logLkl = 3.99384188106299 , 1.62152859537633 , 0.000000000000000 , 0.0 , -6417.61327055 beta, mu, g, lkl, logLkl = 4.56751725155766 , 4.55582647046258 , 0.000000000000000 , 0.0 , -6119.39907965 differential_evolution step 2: f(x)= 5959.36 beta, mu, g, lkl, logLkl = 0.192638876372166 , 3.10305863202379 , 0.000000000000000 , 0.0 , -6131.56057775 beta, mu, g, lkl, logLkl = 0.192638886372166 , 3.10305863202379 , 0.000000000000000 , 0.0 , -6078.25877917 beta, mu, g, lkl, logLkl = 0.192638876372166 , 3.10305863452379 , 0.000000000000000 , 0.0 , -6284.41688891 beta, mu, g, lkl, logLkl = 10.0000000000000 , 0.250000000000000 , 0.000000000000000 , 0.0 , -9137.76313061 beta, mu, g, lkl, logLkl = 10.0000000100000 , 0.250000000000000 , 0.000000000000000 , 0.0 , -9149.80345139 beta, mu, g, lkl, logLkl = 10.0000000000000 , 0.250000002500000 , 0.000000000000000 , 0.0 , -9309.20554021 beta, mu, g, lkl, logLkl = 2.43072387055414 , 2.45197750720723 , 0.000000000000000 , 0.0 , -6126.73018708 beta, mu, g, lkl, logLkl = 2.43072388055414 , 2.45197750720723 , 0.000000000000000 , 0.0 , -5983.82942447 beta, mu, g, lkl, logLkl = 2.43072387055414 , 2.45197750970723 , 0.000000000000000 , 0.0 , -6263.1118694 beta, mu, g, lkl, logLkl = 0.613854406619796 , 2.98052285615436 , 0.000000000000000 , 0.0 , -6088.00262364 beta, mu, g, lkl, logLkl = 0.613854416619796 , 2.98052285615436 , 0.000000000000000 , 0.0 , -6036.85110754 beta, mu, g, lkl, logLkl = 0.613854406619796 , 2.98052285865436 , 0.000000000000000 , 0.0 , -5986.87893203 beta, mu, g, lkl, logLkl = 0.354165950124406 , 3.05606880405018 , 0.000000000000000 , 0.0 , -6026.5351121 beta, mu, g, lkl, logLkl = 0.354165960124406 , 3.05606880405018 , 0.000000000000000 , 0.0 , -6062.35359389 beta, mu, g, lkl, logLkl = 0.354165950124406 , 3.05606880655018 , 0.000000000000000 , 0.0 , -6172.25697922 beta, mu, g, lkl, logLkl = 0.233351673085460 , 3.09121487562159 , 0.000000000000000 , 0.0 , -6060.43865358 beta, mu, g, lkl, logLkl = 0.233351683085460 , 3.09121487562159 , 0.000000000000000 , 0.0 , -6051.7120833 beta, mu, g, lkl, logLkl = 0.233351673085460 , 3.09121487812159 , 0.000000000000000 , 0.0 , -6019.3662997 beta, mu, g, lkl, logLkl = 0.207377943213967 , 3.09877089127978 , 0.000000000000000 , 0.0 , -6109.53214353 beta, mu, g, lkl, logLkl = 0.207377953213967 , 3.09877089127978 , 0.000000000000000 , 0.0 , -6125.10610218 beta, mu, g, lkl, logLkl = 0.207377943213967 , 3.09877089377978 , 0.000000000000000 , 0.0 , -6155.02525146 beta, mu, g, lkl, logLkl = 0.197165171051696 , 3.10174188800430 , 0.000000000000000 , 0.0 , -6245.20614406 beta, mu, g, lkl, logLkl = 0.197165181051696 , 3.10174188800430 , 0.000000000000000 , 0.0 , -6104.44659838 beta, mu, g, lkl, logLkl = 0.197165171051696 , 3.10174189050430 , 0.000000000000000 , 0.0 , -6082.23437022 beta, mu, g, lkl, logLkl = 0.194314419637385 , 3.10257119986789 , 0.000000000000000 , 0.0 , -6094.10604157 beta, mu, g, lkl, logLkl = 0.194314429637385 , 3.10257119986789 , 0.000000000000000 , 0.0 , -6019.46527039 beta, mu, g, lkl, logLkl = 0.194314419637385 , 3.10257120236789 , 0.000000000000000 , 0.0 , -6125.24844111 beta, mu, g, lkl, logLkl = 0.193080660132688 , 3.10293011274572 , 0.000000000000000 , 0.0 , -6272.78116254 beta, mu, g, lkl, logLkl = 0.193080670132688 , 3.10293011274572 , 0.000000000000000 , 0.0 , -6126.20077927 beta, mu, g, lkl, logLkl = 0.193080660132688 , 3.10293011524572 , 0.000000000000000 , 0.0 , -6054.71655418 beta, mu, g, lkl, logLkl = 0.192822702475048 , 3.10300515518706 , 0.000000000000000 , 0.0 , -6106.41518998 beta, mu, g, lkl, logLkl = 0.192822712475048 , 3.10300515518706 , 0.000000000000000 , 0.0 , -5987.8044366 beta, mu, g, lkl, logLkl = 0.192822702475048 , 3.10300515768706 , 0.000000000000000 , 0.0 , -6150.26753875 beta, mu, g, lkl, logLkl = 0.192682158303393 , 3.10304604088068 , 0.000000000000000 , 0.0 , -6182.05215543 beta, mu, g, lkl, logLkl = 0.192682168303393 , 3.10304604088068 , 0.000000000000000 , 0.0 , -6081.14160241 beta, mu, g, lkl, logLkl = 0.192682158303393 , 3.10304604338068 , 0.000000000000000 , 0.0 , -6057.26764101 beta, mu, g, lkl, logLkl = 0.192654742319382 , 3.10305401646237 , 0.000000000000000 , 0.0 , -6259.96931151 beta, mu, g, lkl, logLkl = 0.192654752319382 , 3.10305401646237 , 0.000000000000000 , 0.0 , -6226.55881597 beta, mu, g, lkl, logLkl = 0.192654742319382 , 3.10305401896237 , 0.000000000000000 , 0.0 , -6240.92287717 beta, mu, g, lkl, logLkl = 0.192644034400764 , 3.10305713150211 , 0.000000000000000 , 0.0 , -6161.27725395 beta, mu, g, lkl, logLkl = 0.192644044400764 , 3.10305713150211 , 0.000000000000000 , 0.0 , -6178.52908633 beta, mu, g, lkl, logLkl = 0.192644034400764 , 3.10305713400211 , 0.000000000000000 , 0.0 , -6189.52660829 beta, mu, g, lkl, logLkl = 0.192640537825439 , 3.10305814869055 , 0.000000000000000 , 0.0 , -6181.86672518 beta, mu, g, lkl, logLkl = 0.192640547825439 , 3.10305814869055 , 0.000000000000000 , 0.0 , -6021.01092336 beta, mu, g, lkl, logLkl = 0.192640537825439 , 3.10305815119055 , 0.000000000000000 , 0.0 , -6030.10581767 beta, mu, g, lkl, logLkl = 0.192639447831879 , 3.10305846578049 , 0.000000000000000 , 0.0 , -6017.14996387 beta, mu, g, lkl, logLkl = 0.192639457831879 , 3.10305846578049 , 0.000000000000000 , 0.0 , -6139.22015951 beta, mu, g, lkl, logLkl = 0.192639447831879 , 3.10305846828049 , 0.000000000000000 , 0.0 , -6019.72745884 beta, mu, g, lkl, logLkl = 0.192639050147600 , 3.10305846295499 , 0.000000000000000 , 0.0 , -6048.12201106 beta, mu, g, lkl, logLkl = 0.192639060147600 , 3.10305846295499 , 0.000000000000000 , 0.0 , -6290.14258722 beta, mu, g, lkl, logLkl = 0.192639050147600 , 3.10305846545499 , 0.000000000000000 , 0.0 , -6232.48324842 beta, mu, g, lkl, logLkl = 0.192639388774753 , 3.10305846536090 , 0.000000000000000 , 0.0 , -6131.48137252 beta, mu, g, lkl, logLkl = 0.192639398774753 , 3.10305846536090 , 0.000000000000000 , 0.0 , -6088.72194232 beta, mu, g, lkl, logLkl = 0.192639388774753 , 3.10305846786090 , 0.000000000000000 , 0.0 , -6106.59784861 beta, mu, g, lkl, logLkl = 0.192639430401962 , 3.10305846565666 , 0.000000000000000 , 0.0 , -5980.92387623 beta, mu, g, lkl, logLkl = 0.192639440401962 , 3.10305846565666 , 0.000000000000000 , 0.0 , -6381.52342578 beta, mu, g, lkl, logLkl = 0.192639430401962 , 3.10305846815666 , 0.000000000000000 , 0.0 , -6252.54723446 beta, mu, g, lkl, logLkl = 0.192639418026174 , 3.10305846556873 , 0.000000000000000 , 0.0 , -6133.75725714 beta, mu, g, lkl, logLkl = 0.192639428026174 , 3.10305846556873 , 0.000000000000000 , 0.0 , -6049.16364513 beta, mu, g, lkl, logLkl = 0.192639418026174 , 3.10305846806873 , 0.000000000000000 , 0.0 , -5971.18672324 beta, mu, g, lkl, logLkl = 0.192639427615362 , 3.10305846563686 , 0.000000000000000 , 0.0 , -6128.07517595 beta, mu, g, lkl, logLkl = 0.192639437615362 , 3.10305846563686 , 0.000000000000000 , 0.0 , -5994.94295566 beta, mu, g, lkl, logLkl = 0.192639427615362 , 3.10305846813686 , 0.000000000000000 , 0.0 , -6167.57086263 beta, mu, g, lkl, logLkl = 0.192639430127376 , 3.10305846565470 , 0.000000000000000 , 0.0 , -6339.1254703 beta, mu, g, lkl, logLkl = 0.192639440127376 , 3.10305846565470 , 0.000000000000000 , 0.0 , -5981.17374362 beta, mu, g, lkl, logLkl = 0.192639430127376 , 3.10305846815471 , 0.000000000000000 , 0.0 , -6157.72285945 beta, mu, g, lkl, logLkl = 0.192639430401962 , 3.10305846565666 , 0.000000000000000 , 0.0 , -6083.80831441 beta, mu, g, lkl, logLkl = 0.192639440401962 , 3.10305846565666 , 0.000000000000000 , 0.0 , -6173.33870907 beta, mu, g, lkl, logLkl = 0.192639430401962 , 3.10305846815666 , 0.000000000000000 , 0.0 , -6051.74194185 (array([ 0.19263888, 12.41223453]), 5959.3602813466023)

If the above is run long enough it goes to (0.0,10.0).

A Complex Demographic Structured Sample

We will generates SFS using Hudson's ms using the scheme described by Fig. 3 on Page 19 of msdoc.pdf and estimate the effective beta-splitting population size.

n=15; NumLoci=100; TrueTheta=10.0; # make sure these agree with `ms n NumLoci -t TrueTheta ...` in the `%sh mc .. ` cell below N0=1.0; TrueG=0.0; TrueBeta=0.0; # actually TrueBeta is unknown or not applicable
%sh #ms 30 20 -t 10.0 -I 6 0 14 0 0 16 0 -m 1 2 2.5 -m 2 1 2.5 -m 2 3 2.5 -m 3 2 2.5 -m 4 5 2.5 -m 5 4 2.5 -m 5 6 2.5 -m 6 5 2.5 -em 2.0 3 4 2.5 -em 2.0 4 3 2.5 | sample_statsSFS > output2
%sh head -n 4 /home/user/msdir/output2
96 26 16 54 22 0 14 0 0 71 0 0 0 0 30 13 61 28 2 0 5 31 0 0 0 34 0 0 32 23 2 2 2 6 42 53 0 0 0 0 0 0 35 2 12 2 0 6 100 86 0 0 0 0 0 0
%sh ms 15 100 -t 10.0 -I 6 0 7 0 0 8 0 -m 1 2 2.5 -m 2 1 2.5 -m 2 3 2.5 -m 3 2 2.5 -m 4 5 2.5 -m 5 4 2.5 -m 5 6 2.5 -m 6 5 2.5 -em 2.0 3 4 2.5 -em 2.0 4 3 2.5 | sample_statsSFS > output2
msSfsDataRaw = np.genfromtxt('/home/user/msdir/output2', delimiter=' ') msSfsMultiLocusData = [np.insert( np.insert(a, -1, 0., axis=0), 0, 0., axis=0) for a in msSfsDataRaw] #msSfsMultiLocusData # the Watterson estimator of theta under panmictic Kingman prior print (mean([np.sum(msSfsMultiLocusData[j]) for j in range(0,NumLoci)])) / (sum(np.array([1.0/i for i in range(1,n,1)])))
77.8733342835
# Likelihood based on the full list of SFS # dataset Hudson's Fig 3 - fitting with Beta-splitting Effective Population Size set_random_seed(9842245) np.random.seed(int(4115345)) BetaList=[-1.0,0.0,5.0,10.0]; GrowthList=[0.0]; ThetaList=[1,10,100,1000]; results=[] Reps=1000; # number of particles for g in GrowthList: #GrRates = GetRatesForGrowth(g,n,1000) GrRates=[0.,0.]; for i in range(2,n+1,1): GrRates.append(1.0*binomial(i,2)) for beta in BetaList: for theta in ThetaList: logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!! #print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta) print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci]) results.append([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci]) # report the most likely values print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]") MaxI=0 for i in range(len(results)): if results[i][5]>results[MaxI][5]: MaxI=i results[MaxI] #[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci] #[20, 30, 20, 5.00000000000000, 100, -4912.0378080548035] #[100, 30, 20, 5.00000000000000, 100, -4699.5032281274443]

Results of a run with the complex Fig.3 for msdoc.pdf chooses a more balanced beta and larger theta.

[1000, 15, 100, -1.00000000000000, 1, -inf] [1000, 15, 100, -1.00000000000000, 10, -12061.540375880273] [1000, 15, 100, -1.00000000000000, 100, -6100.9810259876103] [1000, 15, 100, -1.00000000000000, 1000, -11881.563131517965] [1000, 15, 100, 0.000000000000000, 1, -inf] [1000, 15, 100, 0.000000000000000, 10, -11864.359615167663] [1000, 15, 100, 0.000000000000000, 100, -5795.9160992013267] [1000, 15, 100, 0.000000000000000, 1000, -11594.056550711328] [1000, 15, 100, 5.00000000000000, 1, -inf] [1000, 15, 100, 5.00000000000000, 10, -11736.957816906755] [1000, 15, 100, 5.00000000000000, 100, -5734.6545990340501] [1000, 15, 100, 5.00000000000000, 1000, -11523.970285428481] [1000, 15, 100, 10.0000000000000, 1, -inf] [1000, 15, 100, 10.0000000000000, 10, -11734.261194892686] [1000, 15, 100, 10.0000000000000, 100, -5812.3707795154705] [1000, 15, 100, 10.0000000000000, 1000, -11545.693530758796] [Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci] [1000, 15, 100, 5.00000000000000, 100, -5734.6545990340501]

There is variance in the likelihood estimates which decreases as a function of the number of particles.

[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci] [10, 30, 20, 5.00000000000000, 100.000000000000, -5532.3529474155057] [10, 30, 20, 5.00000000000000, 100.000000000000, -5218.1004939060304] [10, 30, 20, 5.00000000000000, 100.000000000000, -5287.3870924684761] [10, 30, 20, 5.00000000000000, 100.000000000000, -5265.937482150388] [10, 30, 20, 5.00000000000000, 100.000000000000, -5280.421730323982] [100, 30, 20, 5.00000000000000, 100.000000000000, -4751.6168398897598] [100, 30, 20, 5.00000000000000, 100.000000000000, -4677.2325203708433] [100, 30, 20, 5.00000000000000, 100.000000000000, -4811.3257313336544] [100, 30, 20, 5.00000000000000, 100.000000000000, -4799.1290494359873] [100, 30, 20, 5.00000000000000, 100.000000000000, -4739.2569980055923] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4443.9865339510743] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4484.4008663487266] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4451.5259917605035] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4475.0765597760737] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4441.5184195452257]
r10=[-5532.3529474155057, -5218.1004939060304, -5287.3870924684761, -5265.937482150388, -5280.421730323982] r100=[-4751.6168398897598, -4677.2325203708433, -4811.3257313336544, -4799.1290494359873, -4739.2569980055923] r1000=[-4443.9865339510743, -4484.4008663487266, -4451.5259917605035, -4475.0765597760737, -4441.5184195452257] print('num of particles, mean log lkh, std loglkh') print(10,mean(r10),std(r10)) print(100,mean(r100),std(r100)) print(1000,mean(r1000),std(r1000))
num of particles, mean log lkh, std loglkh (10, -5316.83994925288, 123.470797173940) (100, -4755.712227807167, 53.44271101877941) (1000, -4459.301674276321, 19.30074717347509)
Reps=100; beta=5.0; theta=100.0; growth=0.0; print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]") for i in range(5): logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps) print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci] [100, 30, 20, 5.00000000000000, 100.000000000000, -4751.6168398897598] [100, 30, 20, 5.00000000000000, 100.000000000000, -4677.2325203708433] [100, 30, 20, 5.00000000000000, 100.000000000000, -4811.3257313336544] [100, 30, 20, 5.00000000000000, 100.000000000000, -4799.1290494359873] [100, 30, 20, 5.00000000000000, 100.000000000000, -4739.2569980055923]
Reps=1000; beta=5.0; theta=100.0; growth=0.0; print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]") for i in range(5): logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps) print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4443.9865339510743] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4484.4008663487266] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4451.5259917605035] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4475.0765597760737] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4441.5184195452257]
︠5b212958-4b2c-465f-95a4-4df0ea57aa52︠