https://riskprep.com/all-tutorials/36-exam-2/131-regression-alanysis
dataminer
Monday, February 22, 2016
Sunday, February 21, 2016
Visualization with ggvis
library(ggvis)
p <- ggvis(mtcars, x = ~wt, y = ~mpg, fill=~cyl)
layer_points(p)
#same output can be achieved in a different, flexible way
#%>% is interpreted at 'then'
mtcars %>%
ggvis(x= ~wt, y= ~mpg, fill= ~cyl) %>%
layer_points() %>%
layer_smooths()
#What about adding some interactivity?
#add input_slider
mtcars %>%
ggvis(x= ~wt, y= ~mpg, fill= ~cyl) %>%
layer_points() %>%
layer_smooths(span=input_slider(.2,1))
# With more options
mtcars %>%
ggvis(x= ~wt, y= ~mpg, fill= ~cyl) %>%
layer_points() %>%
layer_smooths(span=input_slider(.2,1), se=TRUE, fill := "green", stroke:="red", strokeWidth:=4)
#Play on
mtcars %>%
ggvis(x= ~wt, y= ~mpg, fill= ~cyl) %>%
layer_points() %>%
layer_smooths(span=input_slider(.2,1), se=TRUE, fill := "green", stroke:="red", strokeWidth:=4)
p <- ggvis(mtcars, x = ~wt, y = ~mpg, fill=~cyl)
layer_points(p)
#same output can be achieved in a different, flexible way
#%>% is interpreted at 'then'
mtcars %>%
ggvis(x= ~wt, y= ~mpg, fill= ~cyl) %>%
layer_points() %>%
layer_smooths()
#What about adding some interactivity?
#add input_slider
mtcars %>%
ggvis(x= ~wt, y= ~mpg, fill= ~cyl) %>%
layer_points() %>%
layer_smooths(span=input_slider(.2,1))
# With more options
mtcars %>%
ggvis(x= ~wt, y= ~mpg, fill= ~cyl) %>%
layer_points() %>%
layer_smooths(span=input_slider(.2,1), se=TRUE, fill := "green", stroke:="red", strokeWidth:=4)
#Play on
mtcars %>%
ggvis(x= ~wt, y= ~mpg, fill= ~cyl) %>%
layer_points() %>%
layer_smooths(span=input_slider(.2,1), se=TRUE, fill := "green", stroke:="red", strokeWidth:=4)
Saturday, October 17, 2015
Tree
#install.packages("ISLR") # if you don't have the package install it.
#install.packages("tree")# if you don't have the package install it.
library(ISLR)
library(tree)
attach(Carseats)
range(Sales)
high = ifelse(Sales >=8, "YES", "NO")
Carseats=data.frame(Carseats, high)
range(Sales)
set.seed(2)
str(Carseats)
range(high)
names(Carseats)
Carseats <- Carseats[,-1]
train <- sample(1:nrow(Carseats), nrow(Carseats)/2)
test = -train
training_data= Carseats[train,]
testing_data = Carseats[test,]
testing_high <- high[test]
tree_model <- tree(high~., training_data)
plot(tree_model)
text(tree_model,pretty=0)
#Predict
tree_pred = predict(tree_model, testing_data, type="class")
#Check model accuracy
error_rate = mean(tree_pred != testing_high) # 28%
##Pruning the tree. Cross-validate to see where to stop pruning
set.seed(9)
cv_tree = cv.tree(tree_model, FUN=prune.misclass)
cv_tree
names(cv_tree) #dev = cross_validation error rate
plot(cv_tree$size,cv_tree$dev, type = "b")
#Pruned model
pruned_model = prune.misclass(tree_model, best=9)
plot(pruned_model)
text(pruned_model)
#Check accuracy
tree_pred2=predict(pruned_model, testing_data, type = "class")
error_rate2= mean(tree_pred2 != testing_high) # 23%
#install.packages("tree")# if you don't have the package install it.
library(ISLR)
library(tree)
attach(Carseats)
range(Sales)
high = ifelse(Sales >=8, "YES", "NO")
Carseats=data.frame(Carseats, high)
range(Sales)
set.seed(2)
str(Carseats)
range(high)
names(Carseats)
Carseats <- Carseats[,-1]
train <- sample(1:nrow(Carseats), nrow(Carseats)/2)
test = -train
training_data= Carseats[train,]
testing_data = Carseats[test,]
testing_high <- high[test]
tree_model <- tree(high~., training_data)
plot(tree_model)
text(tree_model,pretty=0)
#Predict
tree_pred = predict(tree_model, testing_data, type="class")
#Check model accuracy
error_rate = mean(tree_pred != testing_high) # 28%
##Pruning the tree. Cross-validate to see where to stop pruning
set.seed(9)
cv_tree = cv.tree(tree_model, FUN=prune.misclass)
cv_tree
names(cv_tree) #dev = cross_validation error rate
plot(cv_tree$size,cv_tree$dev, type = "b")
#Pruned model
pruned_model = prune.misclass(tree_model, best=9)
plot(pruned_model)
text(pruned_model)
#Check accuracy
tree_pred2=predict(pruned_model, testing_data, type = "class")
error_rate2= mean(tree_pred2 != testing_high) # 23%
Monday, October 12, 2015
R 3.2.2 Error
I got this peculiar error. After installing ggplot2, i couldn't use it. R said there is no such package.
Solution: install package:‘Rcpp’
install.packages("Rcpp")
Solution: install package:‘Rcpp’
install.packages("Rcpp")
Sunday, October 11, 2015
Subsetting
# select 1 through 9th row, all columns
iris[1:9,]
# select 1st column through 4th column, all rows
iris[,1:4]
# calculate column sums
colSums(iris[,1:4])
rowSums(iris[1:2,1:4])
dim(iris) #150R x 5C
iris[c('Sepal.Length','Petal.Length')]
#iris$Sepal.Length<5 creates a vector of TRUE & FALSES
iris[iris$Sepal.Length<5,]
#redo -- Same as previous code snippet
tf <- iris$Sepal.Length<5
iris[tf,]
str(th)
#redo with subset
subset(iris,Sepal.Length<5)
#random selection of sample
x <- sample(1:nrow(iris), 0.33 * nrow(iris),replace=F)
iris[x,]
iris[1:9,]
# select 1st column through 4th column, all rows
iris[,1:4]
# calculate column sums
colSums(iris[,1:4])
rowSums(iris[1:2,1:4])
dim(iris) #150R x 5C
iris[c('Sepal.Length','Petal.Length')]
#iris$Sepal.Length<5 creates a vector of TRUE & FALSES
iris[iris$Sepal.Length<5,]
#redo -- Same as previous code snippet
tf <- iris$Sepal.Length<5
iris[tf,]
str(th)
#redo with subset
subset(iris,Sepal.Length<5)
#random selection of sample
x <- sample(1:nrow(iris), 0.33 * nrow(iris),replace=F)
iris[x,]
Saturday, October 10, 2015
GGPlot2 Lesson1
Ref: http://www.statmethods.net/advgraphs/ggplot2.html
library(ggplot2)
qplot(iris$Sepal.Length,iris$Sepal.Width)

qplot(iris$Sepal.Length,iris$Sepal.Width, col=iris$Species)

library(ggplot2)
qplot(iris$Sepal.Length,iris$Sepal.Width)
qplot(iris$Sepal.Length,iris$Sepal.Width, col=iris$Species)
Friday, October 9, 2015
Subscribe to:
Posts (Atom)