CoCalc Public Files2020-02-03 MoMath Stat.sagewsOpen with one click!
Author: Paul Zeitz
Views : 124
Description: statistics exploration for Momath mini course 2020
data=[random() for _ in range(100000)] histogram(data, bins=20)
data=[random()+random() for _ in range(100000)] histogram(data, bins=50)
data=[random()+random()+random()+random() for _ in range(100000)] histogram(data, bins=50)
data=[random()+random()+random()+random() for _ in range(100000)] histogram(data, bins=50)
def unfairCoin(nTrials, pHeads): sum = 0 for _ in range(nTrials): sum += (random()<pHeads) return sum n=2400; p =.6 sampleSize= 10000 data=[unfairCoin(n,p)/n for _ in range(sampleSize)]
b=srange(340/600,380/600,2/600) histogram(data, bins=b)
#random walk p=0.5 #prob of going R nSteps =100 location=0 path=[location] for k in range(nSteps): if random()<p: location += 1 else: location += -1 path.append(location) list_plot(path,plotjoined=true)
def rw(pWin,winPrize, losePrize,nDays): #simulate an n-day Fred/Wilma random walk sum=0 for _ in range(n): if random()<pWin: sum += winPrize else: sum += losePrize return sum nTrials=100000 p=.2; n=100 pwin=100;plose=-20 histogram([rw(p,pwin,plose,n) for _ in range(nTrials)], bins=35)
#coin simulation pHeads=0.5 nTosses=10 nTrials=10000 # use the rw function defined above, where we add 1 if "win" and 0 if "lose" # divide by n to make the output relative instead of absolute p=pHeads; n=nTosses pwin=1;plose=0 histogram([rw(p,pwin,plose,n)/n for _ in range(nTrials)], bins=n+1)
#coin simulation pHeads=0.5 nTosses=100 nTrials=100000 # use the rw function defined above, where we add 1 if "win" and 0 if "lose" # divide by n to make the output relative instead of absolute p=pHeads; n=nTosses pwin=1;plose=0 histogram([float(rw(p,pwin,plose,n)/n) for _ in range(nTrials)], bins=40)
#coin simulation pHeads=0.5 nTosses=400 nTrials=100000 # use the rw function defined above, where we add 1 if "win" and 0 if "lose" # divide by n to make the output relative instead of absolute p=pHeads; n=nTosses pwin=1;plose=0 data=[float(rw(p,pwin,plose,n)/n) for _ in range(nTrials)]
b=srange(.3,.7,.01); histogram(data, bins=b)
b=srange(.4,.6,.005); histogram(data, bins=b)
histogram([random()^2 for _ in range(100000)],bins=30)
histogram([random()-random() for _ in range(100000)],bins=40)
histogram([random()^2+random()^2+random()^2+random()^2+random()^2+random()^2 for _ in range(10000)],bins=20)
histogram([random()^2+random()^2 for _ in range(100000)],bins=20)
T = RealDistribution('gaussian', 1)
m=70 s=3 data=[m+T.get_random_element()*s for _ in range(10000)] b=srange(58, 82,1) histogram(data, bins=b)
m=66 s=3 data=[m+T.get_random_element()*s for _ in range(10000)] b=srange(58, 82,1) histogram(data, bins=b)
m=4 s=3 data=[m+T.get_random_element()*s*sqrt(2) for _ in range(10000)] b=srange(-8, 18,1) histogram(data, bins=b)
#create samples of male and female heights nsamples=1000 mSD=3 fSD=2.75 mMean=70 fMean=65.5 T = RealDistribution('gaussian', 1) femaledata=[fMean+fSD*T.get_random_element() for _ in range(nsamples)] maledata=[mMean+mSD*T.get_random_element() for _ in range(nsamples)] #pair them randomly, with no height preference pairs=zip(femaledata, maledata) pl=Graphics() pl += line([(54.5,70),(75.5,70)],color ='red') pl += line([(64.4,60),(64.5,80)], color='red') pl += line([(60,60),(76,76)], color='green') pl += text("$m=f$", (78, 78), fontsize=16, color='green') pl += text("$\mu_M$", (78, 70), fontsize=16, color='red') pl += text("$\mu_F$", (64.5, 82), fontsize=16, color='red') pl+=list_plot(pairs, axes_labels=['Female height','Male height']) pl.show(aspect_ratio=1)
match=0 for (x,y) in pairs: match += x>y match
136
#create samples of male and female heights nsamples=1000 mSD=3 fSD=2.75 mMean=70 fMean=64.5 delta=4 T = RealDistribution('gaussian', 1) femaledata=[fMean+fSD*T.get_random_element() for _ in range(nsamples)] #make males be exactly a certain amount taller than females maledata=[f+delta for f in femaledata] #pair them pairs=zip(femaledata, maledata) pl=Graphics() pl+=list_plot(pairs, axes_labels=['Female height','Male height']) pl.show(aspect_ratio=1)
#create samples of male and female heights nsamples=1000 mSD=3 fSD=2.75 fMean=64.5 delta=4 T = RealDistribution('gaussian', 1) femaledata=[fMean+fSD*T.get_random_element() for _ in range(nsamples)] #make males be normally distributed but taller than females on average #m height = f height + deltal + normal noise noiseSD=2 maledata=[f+delta +noiseSD*T.get_random_element() for f in femaledata] mMean=mean(maledata) #regression line computation f=finance.TimeSeries(femaledata); m = finance.TimeSeries(maledata) r=f.correlation(m) beta=r*std(maledata)/std(femaledata) alpha=mean(maledata)-beta*mean(femaledata) #pair them pairs=zip(femaledata, maledata) pl=Graphics() pl += plot(alpha+beta*x,(x,55,75),color='black',thickness=3) pl += line([(54.5,mMean),(75.5,mMean)],color ='red') pl += line([(64.4,60),(64.5,80)], color='red') pl += line([(60,60),(76,76)], color='green') pl += text("$m=f$", (77, 74), fontsize=16, color='green') pl += text("$m=1.007f+3.702$", (82, 80), fontsize=16, color='black') pl += text("$\mu_M$", (78, mMean), fontsize=16, color='red') pl += text("$\mu_F$", (64.5, 82), fontsize=16, color='red') pl+=list_plot(pairs, axes_labels=['Female height','Male height']) pl.show(aspect_ratio=1)
stats.std(femaledata)
2.77585776016294
std(femaledata)
2.77585776016294
f=finance.TimeSeries(femaledata); m = finance.TimeSeries(maledata) r=f.correlation(m) beta=r*std(maledata)/std(femaledata) alpha=mean(maledata)-beta*mean(femaledata)
beta,alpha
(1.00651548967066, 3.70212537918032)