library('nycflights13')
library('tidyverse')
head(flights, 5)
flight_delays <- flights %>% filter(!is.na(dep_delay) & !is.na(arr_delay)) %>% group_by(carrier) %>% summarize(meanDepDelay = mean(dep_delay), meanArrDelay = mean(arr_delay))
flight_delays %>% arrange(meanArrDelay)
table<- merge(airlines, flight_delays, by="carrier") %>% arrange(-meanArrDelay)
table
plot(table$meanDepDelay, table$meanArrDelay)
flights %>% filter(!is.na(dep_delay) & !is.na(arr_delay)) %>% group_by(origin) %>% summarize(meanDepDelay = mean(dep_delay), meanArrDelay = mean(arr_delay)) %>% arrange(meanDepDelay)
summTable <- flights %>% filter(!is.na(dep_delay) & !is.na(arr_delay)) %>% group_by(dest) %>% summarize(meanDepDelay = mean(dep_delay), meanArrDelay = mean(arr_delay), numFlights = n()) %>% arrange(-meanDepDelay)
names(summTable)[1] <- 'faa'
airportnames <- airports[c('faa', 'name')]
merge(airportnames, summTable) %>% arrange(-meanDepDelay)
flight_delays <- flights %>% mutate(date = paste(year, month, day, sep="-")) %>% mutate(weekday = weekdays(as.Date(date))) %>% filter(!is.na(dep_delay) & !is.na(arr_delay)) %>% group_by(weekday) %>% summarize(meanDepDelay = mean(dep_delay), meanArrDelay = mean(arr_delay), volume = n())
flight_delays %>% arrange(meanDepDelay)
flight_delays <- flights %>%
mutate(date = paste(year, month, day, sep="-")) %>%
mutate(weekday = weekdays(as.Date(date))) %>%
filter(!is.na(dep_delay) & !is.na(arr_delay)) %>%
group_by(origin, weekday) %>%
summarize(meanDepDelay = mean(dep_delay), meanArrDelay = mean(arr_delay), volume = n())
names(flight_delays)
flight_delays %>%
ggplot( aes(meanDepDelay, meanArrDelay)) + geom_point()
flights_1day <- flights %>%
mutate(date = paste(year, month, day, sep="-")) %>%
filter(date=="2013-1-1") %>%
transform(group = cut(dep_time, breaks=c(0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400), labels=c('0-259', '300-559', '600-859', '900-1159', '1200-1459', '1500-1759', '1800-2059', '2100-2359') ) ) %>%
group_by(origin, group) %>%
summarize(volume = n())
flights_1day %>% filter(!is.na(group))
flights_1day <- flights %>%
mutate(date = paste(year, month, day, sep="-")) %>%
transform(group = cut(dep_time, breaks=c(0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400), labels=c('0-259', '300-559', '600-859', '900-1159', '1200-1459', '1500-1759', '1800-2059', '2100-2359') ) ) %>%
group_by(origin, group) %>%
summarize(volume = n())