require(knitr)

opts_chunk$set(cache = TRUE, echo = TRUE)
opts_chunk$set(dev = c("png", "pdf"), dpi = 700, out.extra = "WIDTH=\"50%\"")


# Set caching dir.create('cache') opts_knit$set(cache = TRUE, cache.path =
# 'cache/cache-') # , cache.path = 'cache')

# http://stackoverflow.com/questions/10628665/how-to-set-cache-false-for-a-knitr-markdown-document-and-override-code-chunk-set

# opts_knit$get()


# http://stackoverflow.com/questions/14829791/figure-sizes-with-pandoc-conversion-from-markdown-to-docx

Reading the data

Loading data

Taking a look at the data we have

str(iris)

Creating a data without the Species' labels

iris2 <- iris[, -5]

A linear model of petal lengh and width

fit <- lm(Petal.Length ~ Petal.Width, data = iris2)
summary(fit)
## 
## Call:
## lm(formula = Petal.Length ~ Petal.Width, data = iris2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3354 -0.3035 -0.0295  0.2578  1.3945 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.0836     0.0730    14.8   <2e-16 ***
## Petal.Width   2.2299     0.0514    43.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.478 on 148 degrees of freedom
## Multiple R-squared:  0.927,  Adjusted R-squared:  0.927 
## F-statistic: 1.88e+03 on 1 and 148 DF,  p-value: <2e-16
with(iris2, plot(Petal.Length ~ Petal.Width))
abline(fit, col = "darkblue", lwd = 3, lty = 4)

plot of chunk unnamed-chunk-6

# install.packages('pander')
require(pander)
pander(fit)
## 
## --------------------------------------------------------------
##      &nbsp;        Estimate   Std. Error   t value   Pr(>|t|) 
## ----------------- ---------- ------------ --------- ----------
##  **Petal.Width**     2.23       0.0514      43.39   4.675e-86 
## 
##  **(Intercept)**    1.084      0.07297      14.85   4.043e-31 
## --------------------------------------------------------------
## 
## Table: Fitting linear model: Petal.Length ~ Petal.Width
# install.packages('pander')
require(pander)
panderOptions("table.style", "rmarkdown")  # Used in conjunction with the chunk setting results='asis'
pander(head(iris2))
Sepal.Length Sepal.Width Petal.Length Petal.Width
5.1 3.5 1.4 0.2
4.9 3 1.4 0.2
4.7 3.2 1.3 0.2
4.6 3.1 1.5 0.2
5 3.6 1.4 0.2
5.4 3.9 1.7 0.4

A scatterplot matrix of the 4 variables in our data

Species <- iris[, 5]
col_Species <- c("#7DB0DD", "#86B875", "#E495A5")
pairs(iris2, lower.panel = NULL, col = col_Species[as.numeric(Species)], pch = 19, 
    cex = 1.4)
par(xpd = TRUE)
legend(x = 0, y = 0.4, cex = 2, legend = as.character(levels(Species)), fill = col_Species)

plot of chunk unnamed-chunk-9

par(xpd = NA)
# fix legend:
# https://stat.ethz.ch/pipermail/r-help/2001-September/015374.html

Clustering the flowers

Create a heirarchical clustering of the flowers

hc <- hclust(dist(scale(iris2)), method = "complete")
# Turning the hclust object into a dendrogram object for plotting
dend <- as.dendrogram(hc)
# install.packages('dendextend')
require(dendextend)
col_clust <- c("burlywood4", "black", "darkgrey")
dend <- color_branches(dend, k = 3, col = col_clust, groupLabels = 3:1)
dend <- color_labels(dend, col = col_Species[as.numeric(Species[order.dendrogram(dend)])])
dend <- hang.dendrogram(dend)

Viewing our dendrogram with colors per each cluster (cutting for 3 clusters)

plot(dend)
legend("topright", cex = 1.5, legend = as.character(levels(Species)), fill = col_Species)

plot of chunk unnamed-chunk-11

library(knitr)
knit2html("output_08.rmd")
purl("output_08.rmd")

# http://stackoverflow.com/questions/10646665/how-to-convert-r-markdown-to-html-i-e-what-does-knit-html-do-in-rstudio-0-9