-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIntroduction to Random Forest in R_1.R
57 lines (48 loc) · 1.84 KB
/
Introduction to Random Forest in R_1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Set random seed to make results reproducible:
set.seed(17)
# Calculate the size of each of the data sets:
data_set_size <- floor(nrow(iris)/2)
# Generate a random sample of "data_set_size" indexes
indexes <- sample(1:nrow(iris), size = data_set_size)
# Assign the data to the correct sets
training <- iris[indexes,]
validation1 <- iris[-indexes,]
#import the package
library(randomForest)
# Perform training:
rf_classifier = randomForest(Species ~ ., data=training, ntree=100, mtry=2, importance=TRUE)
rf_classifier
varImpPlot(rf_classifier)
# Validation set assessment #1: looking at confusion matrix
prediction_for_table <- predict(rf_classifier,validation1[,-5])
table(observed=validation1[,5],predicted=prediction_for_table)
# Validation set assessment #2: ROC curves and AUC
# Needs to import ROCR package for ROC curve plotting:
library(ROCR)
# Calculate the probability of new observations belonging to each class
# prediction_for_roc_curve will be a matrix with dimensions data_set_size x number_of_classes
prediction_for_roc_curve <- predict(rf_classifier,validation1[,-5],type="prob")
# Use pretty colours:
pretty_colours <- c("#F8766D","#00BA38","#619CFF")
# Specify the different classes
classes <- levels(validation1$Species)
# For each class
for (i in 1:3)
{
# Define which observations belong to class[i]
true_values <- ifelse(validation1[,5]==classes[i],1,0)
# Assess the performance of classifier for class[i]
pred <- prediction(prediction_for_roc_curve[,i],true_values)
perf <- performance(pred, "tpr", "fpr")
if (i==1)
{
plot(perf,main="ROC Curve",col=pretty_colours[i])
}
else
{
plot(perf,main="ROC Curve",col=pretty_colours[i],add=TRUE)
}
# Calculate the AUC and print it to screen
auc.perf <- performance(pred, measure = "auc")
print(auc.perf@y.values)
}