This report is automatically generated with the R
package knitr
(version 1.5
)
.
# Reading the data ================ Loading data data(iris) # ?iris # Taking a look at the data we have str(iris)
## 'data.frame': 150 obs. of 5 variables: ## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... ## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... ## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... ## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... ## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
# Creating a data without the Species' labels iris2 <- iris[, -5] # A linear model of petal lengh and width fit <- lm(Petal.Length ~ Petal.Width, data = iris2) summary(fit)
## ## Call: ## lm(formula = Petal.Length ~ Petal.Width, data = iris2) ## ## Residuals: ## Min 1Q Median 3Q Max ## -1.3354 -0.3035 -0.0295 0.2578 1.3945 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 1.0836 0.0730 14.8 <2e-16 *** ## Petal.Width 2.2299 0.0514 43.4 <2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.478 on 148 degrees of freedom ## Multiple R-squared: 0.927, Adjusted R-squared: 0.927 ## F-statistic: 1.88e+03 on 1 and 148 DF, p-value: <2e-16
with(iris2, plot(Petal.Length ~ Petal.Width)) abline(fit, col = "darkblue", lwd = 3, lty = 4)
# A scatterplot matrix of the 4 variables in our data Species <- iris[, 5] col_Species <- c("#7DB0DD", "#86B875", "#E495A5") pairs(iris[, -5], lower.panel = NULL, col = col_Species[as.numeric(Species)], pch = 19, cex = 1.4) par(xpd = TRUE) legend(x = 0, y = 0.4, cex = 2, legend = as.character(levels(Species)), fill = col_Species)
par(xpd = NA) # fix legend: https://stat.ethz.ch/pipermail/r-help/2001-September/015374.html # Clustering the flowers ======================= Create a heirarchical clustering of the # flowers hc <- hclust(dist(scale(iris2)), method = "complete") # Turning the hclust object into a dendrogram object for plotting dend <- as.dendrogram(hc) # install.packages('dendextend') require(dendextend) col_clust <- c("burlywood4", "black", "darkgrey") dend <- color_branches(dend, k = 3, col = col_clust, groupLabels = 3:1) dend <- color_labels(dend, col = col_Species[as.numeric(Species[order.dendrogram(dend)])]) dend <- hang.dendrogram(dend) # Viewing our dendrogram with colors per each cluster (cutting for 3 clusters) plot(dend) legend("topright", cex = 1.5, legend = as.character(levels(Species)), fill = col_Species)
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 3.0.3 (2014-03-06) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## ## locale: ## [1] LC_COLLATE=Hebrew_Israel.1255 LC_CTYPE=Hebrew_Israel.1255 ## [3] LC_MONETARY=Hebrew_Israel.1255 LC_NUMERIC=C ## [5] LC_TIME=Hebrew_Israel.1255 ## ## attached base packages: ## [1] stats graphics grDevices datasets utils methods base ## ## other attached packages: ## [1] knitr_1.5 dendextendRcpp_0.5.1 Rcpp_0.11.1 dendextend_0.14.2 ## [5] colorspace_1.2-4 installr_0.14.5 ## ## loaded via a namespace (and not attached): ## [1] ape_3.1-1 evaluate_0.5.1 formatR_0.10 grid_3.0.3 highr_0.3 ## [6] lattice_0.20-27 nlme_3.1-115 stringr_0.6.2 tools_3.0.3
Sys.time()
## [1] "2014-05-29 09:03:14 IDT"