CoCalc Public Filesgraded-Week 6 / hw6turnin.ipynbOpen with one click!
Author: Sophia Dsouza
Views : 57
Compute Environment: Ubuntu 20.04 (Default)
In [ ]:
In [6]:

Homework 6

Name: Sophia Dsouza

I collaborated with:

In [ ]:
# Question 1 #This method allows us to have an array instead of having a list of our simulation results. We can also use the chi squared test on an array which allows us to analyze our data further
In [12]:
#2 #Multiple testing is essential in order to confirm the results found in an experiment. This allows us to eliminate outside factors when the results are repeatedly similar and allows evidence for someone else to conduct the experiment.
In [ ]:
#3 The Bonferroni correction is the most conservative. A benefit of having a conservative controls have fewer chances of finding significant results however this is because when there are significant results they are very likely not due to random chance.Having less conservative controls may yield more siinificant resukts more often but they could be more likely due to random chance.
In [1]:
#4 #⍺ = 0.01. #0.001 (T1 vs T3) #0.003 (T2 vs T3) #0.006 (T3 vs T4) #0.0084 (T1 vs T4) #0.013 (T2 vs T4) #0.34 (T1 vs T2) 0.01*(1/4), 0.01*( 2/4), 0.01*(3/4), 0.01*(4/4), 0.01*(5/4), 0.01*(6/4)
(0.0025, 0.005, 0.0075, 0.01, 0.0125, 0.015)
In [2]:
#p(1) less than 𝛂⋅1/m so T1 and T3 are significantly different from eachother and we reject the null #p(2) less than 𝛂⋅1/m, therefore T2 and T3 are significantly different from eachother and we reject the null #p(3) less than 𝛂⋅1/m, therefore T3 and T4 are significantly different from eachother and we reject the null #p(4) less than 𝛂⋅1/m, therefore T1 and T4 are significantly different from eachother and we reject the null. #p(5) greater than 𝛂⋅1/m, therefore T2 and T4 are not significantly different from eachother so we cannot reject the null. #p(6) greater than 𝛂⋅1/m, therefore T1 and T2 are not significantly different from eachother so we cannot reject the null.
In [ ]:
In [ ]:
In [3]:
%matplotlib inline import matplotlib as mat import seaborn as sns import pandas as pd import numpy as np import matplotlib.pyplot as plt
In [4]:
#5
In [5]:
bath = [5,10,-4,11,-3,13,0,2,10,6,-1,8,10,-9] b_e = [6,10,0,14,0,15,4,5,11,7,20,9,11,21] exercise=[-12,-10,-7,-1,-1,0,0,0,0,0,2,4,5,5] colddata=pd.DataFrame(np.column_stack([bath, b_e, exercise])) colddata
0 1 2
0 5 6 -12
1 10 10 -10
2 -4 0 -7
3 11 14 -1
4 -3 0 -1
5 13 15 0
6 0 4 0
7 2 5 0
8 10 11 0
9 6 7 0
10 -1 20 2
11 8 9 4
12 10 11 5
13 -9 21 5
In [6]:
baths = pd.read_csv("contrast-baths.txt",sep='\t') baths
Bath Bath+Exercise Exercise
0 5 6 -12
1 10 10 -10
2 -4 0 -7
3 11 14 -1
4 -3 0 -1
5 13 15 0
6 0 4 0
7 2 5 0
8 10 11 0
9 6 7 0
10 -1 20 2
11 8 9 4
12 10 11 5
13 -9 21 5
In [7]:
baths_new = baths["Bath"] baths_new
0 5 1 10 2 -4 3 11 4 -3 5 13 6 0 7 2 8 10 9 6 10 -1 11 8 12 10 13 -9 Name: Bath, dtype: int64
In [8]:
p=sns.displot(data=bath) p.set(ylabel="count") p.set(xlabel="bath")
<seaborn.axisgrid.FacetGrid at 0x7f15a6635220>
In [9]:
p=sns.displot(data=b_e) p.set(ylabel="count") p.set(xlabel="bath and exercise")
<seaborn.axisgrid.FacetGrid at 0x7f1586a63670>
In [10]:
p=sns.displot(data=exercise) p.set(ylabel="count") p.set(xlabel="exercise")
<seaborn.axisgrid.FacetGrid at 0x7f158b398670>
In [11]:
#b There is no significant statsitical difference in hand volume between groups bath and/or exercise and the control group. We use the median and a 3 box resampling method with recentering to calculate our p values and confidence intervals. Sample size is n=14
In [12]:
list1 =["bath","b_e"] list1
['bath', 'b_e']
In [13]:
#c def Flike(df): columnmed=df.median() grandmed=np.median(df) num=(len(df))*sum(abs(columnmed-grandmed)) denom=np.sum(np.sum(abs(df-columnmed))) return(num/denom)
In [14]:
Fobs=Flike(baths) Fobs
0.75
In [15]:
Flikesnew=[] total=10000 bath = [5,10,-4,11,-3,13,0,2,10,6,-1,8,10,-9] b_e = [6,10,0,14,0,15,4,5,11,7,20,9,11,21] exercise= [-12,-10,-7,-1,-1,0,0,0,0,0,2,4,5,5] alldata=np.concatenate([bath, b_e, exercise]) for i in range(total): bathrand = np.random.choice(alldata, len(bath)) b_erand = np.random.choice(alldata, len(b_e)) exerciserand = np.random.choice(alldata, len(exercise)) column = np.column_stack((bathrand,b_erand,exerciserand)) randomdata = pd.DataFrame(column) newFlike = Flike(randomdata) Flikesnew.append(newFlike) pvalue = (sum(Flikesnew>=Fobs))/total display("P-value",pvalue)
'P-value'
0.0157
In [16]:
a=np.zeros(10000) for i in range(10000): bath1=np.random.choice(alldata,14) b_e1=np.random.choice(alldata,14) exercise1=np.random.choice(alldata,14) both=bath1, b_e1, exercise1 bothstack=np.column_stack(both) resultstack=pd.DataFrame(bothstack) f=Flike(resultstack) a[i]=f
In [ ]:
In [17]:
p=sns.displot(a,bins=30,kde=False) p.set(xlabel="Flike", ylabel="Number of times for Flike value or greater") plt.axvline(0.75, color="pink")
<matplotlib.lines.Line2D at 0x7f1586c49a00>
In [ ]:
In [ ]:
In [87]:
#d a.sort() Mlower = a[49] #find the bottom 0.5% Mupper = a[9949] #find the top 0.5% Mobs = np.median(a) #find the observed median Mupper = 2*Mobs - Mlower #find the upper pivotal median bvalue Mlower = 2*Mobs - Mupper #find the lower pivotal median value display(, Mlower, , Mupper) #display these found values p=sns.displot(a, kde=False, bins=25) #create a histogram of the resamples p.set(ylabel="Count") plt.axvline(Mlower, color="red"); plt.axvline(Mupper, color="green"); plt.axvline(Mobs, color="purple"); plt.axvline(Fobs, color="orange"); plt.axvline(-Fobs, color="orange");
'M Lower : Red Line'
0.0
'M Upper: Green Line'
0.6234817813765182
In [1]:
#e #cannot reject null because 0 is within the confidence interval
In [1]:
#f #I would recommend the bath and exercise because it showed the highest difference in results.
In [2]:
$ jupyter nbconvert --to pdf notebook.ipynb
File "<ipython-input-2-5ba3c88fe980>", line 1 $ jupyter nbconvert --to pdf notebook.ipynb ^ SyntaxError: invalid syntax
In [ ]: