Saturday, October 17, 2015

Tree

#install.packages("ISLR") # if you don't have the package install it.
#install.packages("tree")# if you don't have the package install it.
library(ISLR)
library(tree)
attach(Carseats)
range(Sales)
high = ifelse(Sales >=8, "YES", "NO")
Carseats=data.frame(Carseats, high)
range(Sales)
set.seed(2)
str(Carseats)
range(high)
names(Carseats)
Carseats <- Carseats[,-1]
train <- sample(1:nrow(Carseats), nrow(Carseats)/2)
test = -train
training_data= Carseats[train,]
testing_data = Carseats[test,]
testing_high <- high[test]
tree_model <- tree(high~., training_data)
plot(tree_model)
text(tree_model,pretty=0)
#Predict
tree_pred = predict(tree_model, testing_data, type="class")
#Check model accuracy
error_rate = mean(tree_pred != testing_high) # 28%

##Pruning the tree. Cross-validate to see where to stop pruning
set.seed(9)
cv_tree = cv.tree(tree_model, FUN=prune.misclass)
cv_tree
names(cv_tree) #dev = cross_validation error rate  
plot(cv_tree$size,cv_tree$dev, type = "b")
#Pruned model
pruned_model = prune.misclass(tree_model, best=9)
plot(pruned_model)
text(pruned_model)
#Check accuracy
tree_pred2=predict(pruned_model, testing_data, type = "class")
error_rate2= mean(tree_pred2 != testing_high) # 23%

Monday, October 12, 2015

R 3.2.2 Error

I got this peculiar error. After installing ggplot2, i couldn't use it. R said there is no such package.
Solution: install package:‘Rcpp’
install.packages("Rcpp")

Sunday, October 11, 2015

Subsetting

# select 1 through 9th row, all columns
iris[1:9,]
# select 1st column through 4th column, all rows
iris[,1:4]
# calculate column sums
colSums(iris[,1:4])
rowSums(iris[1:2,1:4])
dim(iris) #150R x 5C

iris[c('Sepal.Length','Petal.Length')]
#iris$Sepal.Length<5 creates a vector of TRUE & FALSES
iris[iris$Sepal.Length<5,]

#redo  -- Same as previous code snippet
tf <- iris$Sepal.Length<5
iris[tf,]
str(th)
#redo with subset
subset(iris,Sepal.Length<5)

#random selection of sample
x <- sample(1:nrow(iris), 0.33 * nrow(iris),replace=F)
iris[x,]

Saturday, October 10, 2015

GGPlot2 Lesson1

Ref: http://www.statmethods.net/advgraphs/ggplot2.html
library(ggplot2)
 qplot(iris$Sepal.Length,iris$Sepal.Width)


 qplot(iris$Sepal.Length,iris$Sepal.Width, col=iris$Species)