CoCalc Public Files2020-02-03 MoMath Stat.sagews
Author: Paul Zeitz
Views : 124
Description: statistics exploration for Momath mini course 2020
data=[random() for _ in range(100000)]
histogram(data, bins=20)

data=[random()+random() for _ in range(100000)]
histogram(data, bins=50)

data=[random()+random()+random()+random() for _ in range(100000)]
histogram(data, bins=50)

data=[random()+random()+random()+random() for _ in range(100000)]
histogram(data, bins=50)

def unfairCoin(nTrials, pHeads):
sum = 0
for _ in range(nTrials):
return sum

n=2400; p =.6
sampleSize= 10000
data=[unfairCoin(n,p)/n for _ in range(sampleSize)]


b=srange(340/600,380/600,2/600)
histogram(data, bins=b)

#random walk
p=0.5 #prob of going R
nSteps =100
location=0
path=[location]
for k in range(nSteps):
if random()<p:
location += 1
else:
location += -1

path.append(location)
list_plot(path,plotjoined=true)

def rw(pWin,winPrize, losePrize,nDays):
#simulate an n-day Fred/Wilma random walk
sum=0
for _ in range(n):

if random()<pWin:
sum += winPrize
else:
sum += losePrize
return sum

nTrials=100000
p=.2; n=100
pwin=100;plose=-20
histogram([rw(p,pwin,plose,n) for _ in range(nTrials)], bins=35)

#coin simulation
nTosses=10
nTrials=10000
# use the rw function defined above, where we add 1 if "win" and 0 if "lose"
# divide by n to make the output relative instead of absolute
pwin=1;plose=0
histogram([rw(p,pwin,plose,n)/n for _ in range(nTrials)], bins=n+1)

#coin simulation
nTosses=100
nTrials=100000
# use the rw function defined above, where we add 1 if "win" and 0 if "lose"
# divide by n to make the output relative instead of absolute
pwin=1;plose=0
histogram([float(rw(p,pwin,plose,n)/n) for _ in range(nTrials)], bins=40)

#coin simulation
nTosses=400
nTrials=100000
# use the rw function defined above, where we add 1 if "win" and 0 if "lose"
# divide by n to make the output relative instead of absolute
pwin=1;plose=0
data=[float(rw(p,pwin,plose,n)/n) for _ in range(nTrials)]

b=srange(.3,.7,.01);
histogram(data, bins=b)

b=srange(.4,.6,.005);
histogram(data, bins=b)

histogram([random()^2 for _ in range(100000)],bins=30)

histogram([random()-random() for _ in range(100000)],bins=40)

histogram([random()^2+random()^2+random()^2+random()^2+random()^2+random()^2 for _ in range(10000)],bins=20)

histogram([random()^2+random()^2 for _ in range(100000)],bins=20)

T = RealDistribution('gaussian', 1)

m=70
s=3
data=[m+T.get_random_element()*s for _ in range(10000)]
b=srange(58, 82,1)
histogram(data, bins=b)

m=66
s=3
data=[m+T.get_random_element()*s for _ in range(10000)]
b=srange(58, 82,1)
histogram(data, bins=b)

m=4
s=3
data=[m+T.get_random_element()*s*sqrt(2) for _ in range(10000)]
b=srange(-8, 18,1)
histogram(data, bins=b)

#create samples of male and female heights

nsamples=1000
mSD=3
fSD=2.75
mMean=70
fMean=65.5
T = RealDistribution('gaussian', 1)
femaledata=[fMean+fSD*T.get_random_element() for _ in range(nsamples)]
maledata=[mMean+mSD*T.get_random_element() for _ in range(nsamples)]

#pair them randomly, with no height preference
pairs=zip(femaledata, maledata)
pl=Graphics()
pl += line([(54.5,70),(75.5,70)],color ='red')
pl += line([(64.4,60),(64.5,80)], color='red')
pl += line([(60,60),(76,76)], color='green')
pl += text("$m=f$", (78, 78), fontsize=16, color='green')
pl += text("$\mu_M$", (78, 70), fontsize=16, color='red')
pl += text("$\mu_F$", (64.5, 82), fontsize=16, color='red')
pl+=list_plot(pairs, axes_labels=['Female height','Male height'])
pl.show(aspect_ratio=1)

match=0
for (x,y) in pairs:
match += x>y
match

136
#create samples of male and female heights

nsamples=1000
mSD=3
fSD=2.75
mMean=70
fMean=64.5
delta=4
T = RealDistribution('gaussian', 1)
femaledata=[fMean+fSD*T.get_random_element() for _ in range(nsamples)]

#make males be exactly a certain amount taller than females
maledata=[f+delta for f in femaledata]

#pair them
pairs=zip(femaledata, maledata)
pl=Graphics()

pl+=list_plot(pairs, axes_labels=['Female height','Male height'])
pl.show(aspect_ratio=1)

#create samples of male and female heights

nsamples=1000
mSD=3
fSD=2.75

fMean=64.5
delta=4
T = RealDistribution('gaussian', 1)
femaledata=[fMean+fSD*T.get_random_element() for _ in range(nsamples)]

#make males be normally distributed but taller than females on average
#m height = f height + deltal + normal noise
noiseSD=2
maledata=[f+delta +noiseSD*T.get_random_element() for f in femaledata]
mMean=mean(maledata)

#regression line computation
f=finance.TimeSeries(femaledata); m = finance.TimeSeries(maledata)
r=f.correlation(m)
beta=r*std(maledata)/std(femaledata)
alpha=mean(maledata)-beta*mean(femaledata)

#pair them

pairs=zip(femaledata, maledata)
pl=Graphics()
pl += plot(alpha+beta*x,(x,55,75),color='black',thickness=3)
pl += line([(54.5,mMean),(75.5,mMean)],color ='red')
pl += line([(64.4,60),(64.5,80)], color='red')
pl += line([(60,60),(76,76)], color='green')
pl += text("$m=f$", (77, 74), fontsize=16, color='green')
pl += text("$m=1.007f+3.702$", (82, 80), fontsize=16, color='black')
pl += text("$\mu_M$", (78, mMean), fontsize=16, color='red')
pl += text("$\mu_F$", (64.5, 82), fontsize=16, color='red')
pl+=list_plot(pairs, axes_labels=['Female height','Male height'])
pl.show(aspect_ratio=1)

stats.std(femaledata)

2.77585776016294
std(femaledata)

2.77585776016294
f=finance.TimeSeries(femaledata); m = finance.TimeSeries(maledata)
r=f.correlation(m)
beta=r*std(maledata)/std(femaledata)
alpha=mean(maledata)-beta*mean(femaledata)


beta,alpha

(1.00651548967066, 3.70212537918032)