CoCalc -- 2020-02-03 MoMath Stat.sagews

statistics exploration for Momath mini course 2020

Path: 2020-02-03 MoMath Stat.sagews

Views: ³²⁶

data=[random() for _ in range(100000)]
histogram(data, bins=20)

data=[random()+random() for _ in range(100000)]
histogram(data, bins=50)

data=[random()+random()+random()+random() for _ in range(100000)]
histogram(data, bins=50)

data=[random()+random()+random()+random() for _ in range(100000)]
histogram(data, bins=50)

def unfairCoin(nTrials, pHeads):
    sum = 0
    for _ in range(nTrials):
        sum += (random()<pHeads)
    return sum

n=2400; p =.6
sampleSize= 10000
data=[unfairCoin(n,p)/n for _ in range(sampleSize)]

b=srange(340/600,380/600,2/600)
histogram(data, bins=b)

#random walk
p=0.5 #prob of going R
nSteps =100
location=0
path=[location]
for k in range(nSteps):
    if random()<p:
        location += 1
    else:
        location += -1
        
    path.append(location)
list_plot(path,plotjoined=true)

def rw(pWin,winPrize, losePrize,nDays):
    #simulate an n-day Fred/Wilma random walk
    sum=0
    for _ in range(n):
        
        if random()<pWin:
            sum += winPrize
        else:
            sum += losePrize
    return sum

nTrials=100000
p=.2; n=100
pwin=100;plose=-20
histogram([rw(p,pwin,plose,n) for _ in range(nTrials)], bins=35)

#coin simulation
pHeads=0.5
nTosses=10
nTrials=10000
# use the rw function defined above, where we add 1 if "win" and 0 if "lose"
# divide by n to make the output relative instead of absolute
p=pHeads; n=nTosses
pwin=1;plose=0
histogram([rw(p,pwin,plose,n)/n for _ in range(nTrials)], bins=n+1)

#coin simulation
pHeads=0.5
nTosses=100
nTrials=100000
# use the rw function defined above, where we add 1 if "win" and 0 if "lose"
# divide by n to make the output relative instead of absolute
p=pHeads; n=nTosses
pwin=1;plose=0
histogram([float(rw(p,pwin,plose,n)/n) for _ in range(nTrials)], bins=40)

#coin simulation
pHeads=0.5
nTosses=400
nTrials=100000
# use the rw function defined above, where we add 1 if "win" and 0 if "lose"
# divide by n to make the output relative instead of absolute
p=pHeads; n=nTosses
pwin=1;plose=0
data=[float(rw(p,pwin,plose,n)/n) for _ in range(nTrials)]

b=srange(.3,.7,.01);
histogram(data, bins=b)

b=srange(.4,.6,.005);
histogram(data, bins=b)

histogram([random()^2 for _ in range(100000)],bins=30)

histogram([random()-random() for _ in range(100000)],bins=40)

histogram([random()^2+random()^2+random()^2+random()^2+random()^2+random()^2 for _ in range(10000)],bins=20)

histogram([random()^2+random()^2 for _ in range(100000)],bins=20)

T = RealDistribution('gaussian', 1)

m=70
s=3
data=[m+T.get_random_element()*s for _ in range(10000)]
b=srange(58, 82,1)
histogram(data, bins=b)

m=66
s=3
data=[m+T.get_random_element()*s for _ in range(10000)]
b=srange(58, 82,1)
histogram(data, bins=b)

m=4
s=3
data=[m+T.get_random_element()*s*sqrt(2) for _ in range(10000)]
b=srange(-8, 18,1)
histogram(data, bins=b)

#create samples of male and female heights

nsamples=1000
mSD=3
fSD=2.75
mMean=70
fMean=65.5
T = RealDistribution('gaussian', 1)
femaledata=[fMean+fSD*T.get_random_element() for _ in range(nsamples)]
maledata=[mMean+mSD*T.get_random_element() for _ in range(nsamples)]

#pair them randomly, with no height preference
pairs=zip(femaledata, maledata)
pl=Graphics()
pl += line([(54.5,70),(75.5,70)],color ='red')
pl += line([(64.4,60),(64.5,80)], color='red')
pl += line([(60,60),(76,76)], color='green')
pl += text("$m=f$", (78, 78), fontsize=16, color='green')
pl += text("$\mu_M$", (78, 70), fontsize=16, color='red')
pl += text("$\mu_F$", (64.5, 82), fontsize=16, color='red')
pl+=list_plot(pairs, axes_labels=['Female height','Male height'])
pl.show(aspect_ratio=1)

match=0
for (x,y) in pairs:
    match += x>y
match

136

#create samples of male and female heights

nsamples=1000
mSD=3
fSD=2.75
mMean=70
fMean=64.5
delta=4
T = RealDistribution('gaussian', 1)
femaledata=[fMean+fSD*T.get_random_element() for _ in range(nsamples)]

#make males be exactly a certain amount taller than females
maledata=[f+delta for f in femaledata]

#pair them 
pairs=zip(femaledata, maledata)
pl=Graphics()

pl+=list_plot(pairs, axes_labels=['Female height','Male height'])
pl.show(aspect_ratio=1)

#create samples of male and female heights

nsamples=1000
mSD=3
fSD=2.75

fMean=64.5
delta=4
T = RealDistribution('gaussian', 1)
femaledata=[fMean+fSD*T.get_random_element() for _ in range(nsamples)]

#make males be normally distributed but taller than females on average
#m height = f height + deltal + normal noise
noiseSD=2
maledata=[f+delta +noiseSD*T.get_random_element() for f in femaledata]
mMean=mean(maledata)

#regression line computation
f=finance.TimeSeries(femaledata); m = finance.TimeSeries(maledata)
r=f.correlation(m)
beta=r*std(maledata)/std(femaledata)
alpha=mean(maledata)-beta*mean(femaledata)

#pair them 

pairs=zip(femaledata, maledata)
pl=Graphics()
pl += plot(alpha+beta*x,(x,55,75),color='black',thickness=3)
pl += line([(54.5,mMean),(75.5,mMean)],color ='red')
pl += line([(64.4,60),(64.5,80)], color='red')
pl += line([(60,60),(76,76)], color='green')
pl += text("$m=f$", (77, 74), fontsize=16, color='green')
pl += text("$m=1.007f+3.702$", (82, 80), fontsize=16, color='black')
pl += text("$\mu_M$", (78, mMean), fontsize=16, color='red')
pl += text("$\mu_F$", (64.5, 82), fontsize=16, color='red')
pl+=list_plot(pairs, axes_labels=['Female height','Male height'])
pl.show(aspect_ratio=1)

stats.std(femaledata)

2.77585776016294

std(femaledata)

2.77585776016294

f=finance.TimeSeries(femaledata); m = finance.TimeSeries(maledata)
r=f.correlation(m)
beta=r*std(maledata)/std(femaledata)
alpha=mean(maledata)-beta*mean(femaledata)

beta,alpha

(1.00651548967066, 3.70212537918032)


#210416  code for Price of a Game 210422
b=srange(0,800,100)
xm=666
#simulate die
data=[100*randint(1,6) for _ in range(60000)]
histogram(data,bins=b,align='left',xmax=xm)
float(mean(data))

350.39166666666665


#simulate lottery with two prizes z1, z2 with respective probabilities of p1, p2
def lottery(z1,z2,p1,p2):
    prize = 0
    r=random() #random spinner between 0 and 1
    if r<p1:
        prize=z1
    else:
        if r<p1+p2:
            prize=z2
    return prize

z1=5;z2=100; p1=0.1; p2=0.002
ticketPrice=1
expectedValue=z1*p1+z2*p2
nDays=1000
data=[]
wealth=0
for d in [1..nDays]:
    wealth += lottery(z1,z2, p1,p2)
    data.append((d,wealth))
lp=list_plot(data,plotjoined=true,axes_labels=['Day','Total'])
ep=plot(expectedValue*x,(x,1,nDays),color='red')
show(lp+ep)
save(lp+ep,'lottery1k.pdf')