if (FALSE) {
    setwd("D:\\Dropbox\\Teaching\\TAU\\2014 - b\\data_viz\\code")
    library(knitr)
    spin("output_06.r")
}

Reading the data

Loading data

data(airquality)

Basic summary

summary(airquality)
##      Ozone          Solar.R         Wind            Temp     
##  Min.   :  1.0   Min.   :  7   Min.   : 1.70   Min.   :56.0  
##  1st Qu.: 18.0   1st Qu.:116   1st Qu.: 7.40   1st Qu.:72.0  
##  Median : 31.5   Median :205   Median : 9.70   Median :79.0  
##  Mean   : 42.1   Mean   :186   Mean   : 9.96   Mean   :77.9  
##  3rd Qu.: 63.2   3rd Qu.:259   3rd Qu.:11.50   3rd Qu.:85.0  
##  Max.   :168.0   Max.   :334   Max.   :20.70   Max.   :97.0  
##  NA's   :37      NA's   :7                                   
##      Month           Day      
##  Min.   :5.00   Min.   : 1.0  
##  1st Qu.:6.00   1st Qu.: 8.0  
##  Median :7.00   Median :16.0  
##  Mean   :6.99   Mean   :15.8  
##  3rd Qu.:8.00   3rd Qu.:23.0  
##  Max.   :9.00   Max.   :31.0  
## 
#' Attaching the objects
attach(airquality)

Checking Temp over Month

Avg. Temp per Month

tapply(Temp, Month, mean)
##     5     6     7     8     9 
## 65.55 79.10 83.90 83.97 76.90
boxplot(Temp ~ Month)

plot of chunk unnamed-chunk-5

It looks like a 2nd order linear model

Fitting a model

Month2 <- Month^2
fit <- lm(Temp ~ Month2 + Month)
summary(fit)
## 
## Call:
## lm(formula = Temp ~ Month2 + Month)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.831  -4.445  -0.445   3.597  16.169 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -95.73      15.24   -6.28  3.5e-09 ***
## Month2         -3.28       0.32  -10.26  < 2e-16 ***
## Month          48.72       4.49   10.85  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.62 on 150 degrees of freedom
## Multiple R-squared:  0.517,  Adjusted R-squared:  0.51 
## F-statistic: 80.2 on 2 and 150 DF,  p-value: <2e-16

It is significant!

Plotting the data and adding our predictions

boxplot(Temp ~ Month, at = 5:9, xlab = "Month", ylab = "Temp", las = 1)
fit_line <- function(x) predict(fit, newdata = data.frame(Month2 = x^2, Month = x))
curve(fit_line, from = 5, to = 9, col = 2, add = TRUE, lwd = 3)

plot of chunk unnamed-chunk-7

Examples of playing with code chunks:

Code with output

tapply(Temp, Month, mean)
##     5     6     7     8     9 
## 65.55 79.10 83.90 83.97 76.90

Code with NO output

tapply(Temp, Month, mean)

No code but WITH output

##     5     6     7     8     9 
## 65.55 79.10 83.90 83.97 76.90

With modifications

boxplot(Temp ~ Month, at = 5:9, xlab = "Month", ylab = "Temp", las = 1)
fit_line <- function(x) predict(fit, newdata = data.frame(Month2 = x^2, Month = x))
curve(fit_line, from = 5, to = 9, col = 2, add = TRUE, lwd = 3)

plot of chunk unnamed-chunk-11