ROC for SVM Model from ISLR

Introduction to Statistical Learning with Tidy Models

The following example of Support Vector Machine is taken from Introduction to Statistical Learning: a great introductory book to Data Science by Gareth James, Daniela Witten, Trevor Hastie and Rob Tibshirani.

The original code is taken from ISLR tidymodels labs by Emil Hvitfeldt.

Simulating the Data

library(tidymodels)
library(dplyr)

set.seed(1)

sim_data2 <- tibble(
  x1 = rnorm(200) + rep(c(2, -2, 0), c(100, 50, 50)),
  x2 = rnorm(200) + rep(c(2, -2, 0), c(100, 50, 50)),
  y  = factor(rep(c(1, 2), c(150, 50)))
)

set.seed(2)

sim_data2_test <- tibble(
  x1 = rnorm(200) + rep(c(2, -2, 0), c(100, 50, 50)),
  x2 = rnorm(200) + rep(c(2, -2, 0), c(100, 50, 50)),
  y  = factor(rep(c(1, 2), c(150, 50)))
)

Fitting Support Vector Machine Model

svm_rbf_spec <- svm_rbf() |>
  set_mode("classification") |>
  set_engine("kernlab")

svm_rbf_fit <- svm_rbf_spec |>
  fit(y ~ ., data = sim_data2)

yardstick_roc <- augment(svm_rbf_fit, new_data = sim_data2_test) |>
  roc_curve(truth = y, estimate = .pred_1)

yardstick_roc

# A tibble: 202 × 3
   .threshold specificity sensitivity
        <dbl>       <dbl>       <dbl>
 1  -Inf           0                1
 2     0.0763      0                1
 3     0.0799      0.0200           1
 4     0.0854      0.0400           1
 5     0.0961      0.0600           1
 6     0.0991      0.0800           1
 7     0.101       0.1              1
 8     0.104       0.12             1
 9     0.107       0.14             1
10     0.108       0.16             1
# ℹ 192 more rows

library(rtichoke)

svm_probs <- predict(svm_rbf_fit, 
        new_data = sim_data2_test,
        type = 'prob') |> 
  pull(.pred_1)

svm_performance_data <- prepare_performance_data(
  probs = list(svm_probs),
  reals = list(sim_data2_test$y == 1)
)

svm_performance_data |> 
  select(probability_threshold, specificity, sensitivity)

# A tibble: 101 × 3
   probability_threshold specificity sensitivity
                   <dbl>       <dbl>       <dbl>
 1                  0           0              1
 2                  0.01        0              1
 3                  0.02        0              1
 4                  0.03        0              1
 5                  0.04        0              1
 6                  0.05        0              1
 7                  0.06        0              1
 8                  0.07        0              1
 9                  0.08        0.04           1
10                  0.09        0.06           1
# ℹ 91 more rows

ROC Curve

yardstick
rtichoke

yardstick_roc |> 
  autoplot() +
  theme_classic()

svm_performance_data |> 
  plot_roc_curve(size = 350)