# Run this code block to load the Tidyverse package
.libPaths(new = "~/Rlibs")
library(tidyverse)
library(modelr)
# Load the save file that preloads the dataset and
# the model.sin function
load("lab13.RData")
# To change the size of any plots, copy the code snippet
# below, uncomment it, and set the size of the width
# and height.
# Note: All subsequent figures will use the same size,
# unless you change the options() snippet and run it
# again.
# options(repr.plot.width=6, repr.plot.height=4)
# Test that dataset loaded
glimpse(t.data)
# Test that model.sin loaded
print(model.sin(T = 1, n = 0, x = 1/3))
ndays <- nrow(t.data)
t.data <- add_column(t.data, .before=TRUE, number_days=1:ndays)
t.data.filtered <- filter(t.data, t.avg > -99)
ggplot(t.data.filtered) + geom_point(aes(x=number_days, y=t.avg), size=0.5)
t.data.y9596<-filter(t.data.filtered, c(year==1995|year==1996))
ggplot(t.data.y9596) + geom_point(aes(x=number_days, y=t.avg), size=0.5)
In the northern hemisphere we tend to be hottest in July and coldest in January. The difference between January 1st and July 1st is roughly 180 days. The time it takes the Earth to make one rotation around the sun is roughly 365 days or a year.
model.T<-365
mod_n_0 <- lm(t.avg~model.sin(T=model.T, n=0, x=number_days),
data=t.data.filtered)
mod_n_1 <- lm(t.avg~model.sin(T=model.T, n=1, x=number_days),
data=t.data.filtered)
mod_n_2 <- lm(t.avg~model.sin(T=model.T, n=2, x=number_days),
data=t.data.filtered)
mod_n_3 <- lm(t.avg~model.sin(T=model.T, n=3, x=number_days),
data=t.data.filtered)
mod_n_4 <- lm(t.avg~model.sin(T=model.T, n=4, x=number_days),
data=t.data.filtered)
mod_n_5 <- lm(t.avg~model.sin(T=model.T, n=5, x=number_days),
data=t.data.filtered)
mod_n_5
mod_n_4
mod_n_3
mod_n_2
mod_n_1
mod_n_0
grid <- data_grid(data=t.data.y9596,
number_days=seq_range(number_days, n=1000,
expand=0.05))
grid <- gather_predictions(grid, mod_n_0, mod_n_1, mod_n_2,
mod_n_3, mod_n_4,
mod_n_5, .pred="t.avg")
ggplot(t.data.y9596) +
geom_point(aes(number_days, t.avg)) +
geom_line(data=grid, aes(number_days, t.avg),
color="red", size=1) +
facet_wrap(~model)
summary(mod_n_0)$adj.r.squared
sd(summary(mod_n_0)$residuals)
summary(mod_n_1)$adj.r.squared
sd(summary(mod_n_1)$residuals)
summary(mod_n_2)$adj.r.squared
sd(summary(mod_n_2)$residuals)
summary(mod_n_3)$adj.r.squared
sd(summary(mod_n_3)$residuals)
summary(mod_n_4)$adj.r.squared
sd(summary(mod_n_4)$residuals)
summary(mod_n_5)$adj.r.squared
sd(summary(mod_n_5)$residuals)
mod_n_3.5 <- lm(t.avg~model.sin(T=model.T, n=3.5, x=number_days),
data=t.data.filtered)
grid <- data_grid(data=t.data.y9596,
number_days=seq_range(number_days, n=1000,
expand=0.05))
grid <- add_predictions(grid, mod_n_3.5)
print(grid)
ggplot(t.data.y9596) +
geom_point(aes(number_days, t.avg)) +
geom_line(data=grid, aes(number_days, pred),
color="red", size=1)
summary(mod_n_3.5)$adj.r.squared
sd(summary(mod_n_3.5)$residuals)
model.n<-mod_n_3.5
summary(model.n)
A<- 56.19194
B<- 21.58927
n<- 3.5
#T
grid.final <- data_grid(t.data.filtered,
number_days=seq_range(number_days, n=10000,
expand=0.01))
grid.final <- add_predictions(grid.final, mod_n_3.5)
t.data.with.resid <- add_residuals(t.data.filtered, mod_n_3.5)
ggplot(t.data.with.resid) +
geom_point(aes(number_days, t.avg), size=0.5) +
geom_line(data=grid.final,
mapping=aes(number_days, pred),
color="red", size=1)
ggplot(t.data.with.resid) +
geom_point(aes(number_days, resid)) +
geom_ref_line(h=0)
ggplot(t.data.with.resid) +
geom_histogram(aes(resid), binwidth=1,
color="cyan4", fill="cyan2")