if (FALSE) {
setwd("D:\\Dropbox\\Teaching\\TAU\\2014 - b\\data_viz\\code")
library(knitr)
spin("output_06.r")
}
Loading data
data(airquality)
Basic summary
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.0 Min. : 7 Min. : 1.70 Min. :56.0
## 1st Qu.: 18.0 1st Qu.:116 1st Qu.: 7.40 1st Qu.:72.0
## Median : 31.5 Median :205 Median : 9.70 Median :79.0
## Mean : 42.1 Mean :186 Mean : 9.96 Mean :77.9
## 3rd Qu.: 63.2 3rd Qu.:259 3rd Qu.:11.50 3rd Qu.:85.0
## Max. :168.0 Max. :334 Max. :20.70 Max. :97.0
## NA's :37 NA's :7
## Month Day
## Min. :5.00 Min. : 1.0
## 1st Qu.:6.00 1st Qu.: 8.0
## Median :7.00 Median :16.0
## Mean :6.99 Mean :15.8
## 3rd Qu.:8.00 3rd Qu.:23.0
## Max. :9.00 Max. :31.0
##
#' Attaching the objects
attach(airquality)
Avg. Temp per Month
tapply(Temp, Month, mean)
## 5 6 7 8 9
## 65.55 79.10 83.90 83.97 76.90
boxplot(Temp ~ Month)
It looks like a 2nd order linear model
Fitting a model
Month2 <- Month^2
fit <- lm(Temp ~ Month2 + Month)
summary(fit)
##
## Call:
## lm(formula = Temp ~ Month2 + Month)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.831 -4.445 -0.445 3.597 16.169
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -95.73 15.24 -6.28 3.5e-09 ***
## Month2 -3.28 0.32 -10.26 < 2e-16 ***
## Month 48.72 4.49 10.85 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.62 on 150 degrees of freedom
## Multiple R-squared: 0.517, Adjusted R-squared: 0.51
## F-statistic: 80.2 on 2 and 150 DF, p-value: <2e-16
It is significant!
Plotting the data and adding our predictions
boxplot(Temp ~ Month, at = 5:9, xlab = "Month", ylab = "Temp", las = 1)
fit_line <- function(x) predict(fit, newdata = data.frame(Month2 = x^2, Month = x))
curve(fit_line, from = 5, to = 9, col = 2, add = TRUE, lwd = 3)
Code with output
tapply(Temp, Month, mean)
## 5 6 7 8 9
## 65.55 79.10 83.90 83.97 76.90
Code with NO output
tapply(Temp, Month, mean)
No code but WITH output
## 5 6 7 8 9
## 65.55 79.10 83.90 83.97 76.90
With modifications
boxplot(Temp ~ Month, at = 5:9, xlab = "Month", ylab = "Temp", las = 1)
fit_line <- function(x) predict(fit, newdata = data.frame(Month2 = x^2, Month = x))
curve(fit_line, from = 5, to = 9, col = 2, add = TRUE, lwd = 3)