library(caret)
library(tidymodels)
library(ggplot2)
data(segmentationData)
<-
segmentationData %>%
segmentationData ::select(EqSphereAreaCh1, PerimCh1, Class, Case) %>%
dplyrsetNames(c("PredictorA", "PredictorB", "Class", "Case")) %>%
mutate(Class = factor(ifelse(Class == "PS", "One", "Two")))
<-
example_train %>%
segmentationData ::filter(Case == "Train") %>%
dplyr::select(-Case)
dplyr
<-
example_test %>%
segmentationData ::filter(Case == "Test") %>%
dplyr::select(-Case) dplyr
Welcome to rtichoke blog!
This blog will be dedicated to the {rtichoke} package, which means that it will contain posts that are related to performance metrics and the possible related usability of {rtichoke}.
Replications
To make the package easier to use I plan to reproduce other people’s code with rtichoke, posts of this kind will be available under the category “replications”.
My first choice is to replicate the first example from the book “Feature Engineering and Selection: A Practical Approach for Predictive Models by Max Kuhn and Kjell Johnson”.
In this example you can see how Box-Cox transformation improves the discrimination capability of the logistic regression model without using any additional information.
Original Code
The code is almost identical to the original code that can be found on github.
Preparing the Data
Training the Models
<-
example_ctrl trainControl(method = "none",
classProbs = TRUE,
summaryFunction = twoClassSummary)
<- train(Class ~ PredictorA + PredictorB,
natural_terms data = example_train,
method = "glm",
metric = "ROC",
trControl = example_ctrl)
<- train(Class ~ PredictorA + PredictorB,
trans_terms data = example_train,
method = "glm",
preProc = "BoxCox",
metric = "ROC",
trControl = example_ctrl)
Creating Predictions
<- predict(natural_terms, example_test, type = "prob")[,1]
original_probs
<- predict(trans_terms, example_test, type = "prob")[,1]
transformed_probs
<- example_test$Class == "One" outcomes
Creating ROC Curve with yardstick
<-
natural_dat %>%
example_test mutate(
prob = original_probs) %>%
roc_curve(Class, prob) %>%
mutate(Format = "Natural Units")
<-
trans_dat %>%
example_test mutate(
prob = transformed_probs) %>%
roc_curve(Class, prob) %>%
mutate(Format = "Inverse Units")
<-
both_dat bind_rows(natural_dat, trans_dat) %>%
mutate(
Format = factor(Format, levels = c("Natural Units", "Inverse Units")))
<-
trans_roc_plot ggplot(both_dat) +
geom_step(aes(x = 1 - specificity, y = sensitivity, color = Format)) +
coord_equal() +
xlab("False Positive Rate") +
ylab("True Positive Rate") +
theme(legend.position = c(.8, .2)) +
scale_colour_manual(
values = c("Natural Units" = "grey",
"Inverse Units" = "black")) +
geom_abline(intercept = 0, slope = 1, col = "grey", lty = 2) +
theme_classic()
trans_roc_plot
rtichoke code
Creating ROC Curve with rtichoke
library(rtichoke)
create_roc_curve(
probs = list(
"Natural Units" = original_probs,
"Inverse Units" = transformed_probs
),reals = list(
outcomes
),size = 350,
col_values = c("grey", "black")
)
library(rtichoke)
create_roc_curve(
probs = list(
"Natural Units" = original_probs,
"Inverse Units" = transformed_probs
),reals = list(
outcomes
),stratified_by = "ppcr",
size = 350,
col_values = c("grey", "black")
)