Here is another way to do PCA-LDA a.k.a. DAPC in R, if one has to find the best number of retained principal components for LDA (as you typically have to for large datasets with many predictors).
This solution uses the tidymodels package for hyperparameter tuning, applied to the Glass dataset.
Dependencies
library(mlbench)
data(Glass)
library(tidymodels)
library(discrim) # with tidymodels wrapper for MASS::lda
Specify tidymodels LDA classification model using the MASS engine
mod <- discrim::discrim_linear(mode = "classification", engine = "MASS")
mod %>% translate() # this is what the discrim::discrim_linear wrapper will do
#Linear Discriminant Model Specification (classification)
#
#Computational engine: MASS
#
#Model fit template:
#MASS::lda(formula = missing_arg(), data = missing_arg())
Create tidymodels preprocessing recipe (scale+center, PCA)
# specify missing_arg() [formula, data] using a tidymodels recipe
rec <- recipe(formula = Type ~ ., data = Glass) %>%
step_normalize(all_numeric_predictors()) %>%
step_pca(all_numeric_predictors(), num_comp = tune())
Specify tuning grid and control parameters
# tuning grid with hyperparameters in columns
# column name(s) must match tune() above
tuneGrid <- expand.grid(num_comp = 1:ncol(rec$template))
# control tune_grid() process below
trControl <- control_grid(verbose = TRUE, allow_par = FALSE)
Define tidymodels workflow (preprocessing steps, model fitting)
wflow <- workflow(preprocessor = rec, spec = mod)
Specify folds for cross-validation
set.seed(8482)
folds <- vfold_cv(rec$template, v = 5, repeats = 1, strata = "Type", pool = 0.1)
Fit a range of LDA models
# takes a while to process, decrease v and repeats above to speed up
fit_train <- wflow %>%
tune_grid(resamples = folds,
grid = tuneGrid,
metrics = metric_set(accuracy),# or specify multiple metrics
control = trControl)
Extract and plot performance metrics
met_train <- fit_train %>% collect_metrics()
ggplot(met_train, aes(x = num_comp, y = mean)) +
geom_line(color = "#3E4A89FF", size = 2, alpha = 0.6) +
scale_x_continuous(breaks = 1:ncol(rec$template)) +
facet_wrap(~.metric) +
theme_bw()
7 PC appear to be enough for classification:

Update workflow with the best hyperparameter
# show best 5
fit_train %>% show_best(metric = "accuracy")
## A tibble: 5 × 7
# num_comp .metric .estimator mean n std_err .config
# <int> <chr> <chr> <dbl> <int> <dbl> <chr>
#1 9 accuracy multiclass 0.626 5 0.0207 Preprocessor09_Model1
#2 10 accuracy multiclass 0.626 5 0.0207 Preprocessor10_Model1
#3 7 accuracy multiclass 0.626 5 0.0220 Preprocessor07_Model1
#4 8 accuracy multiclass 0.598 5 0.0225 Preprocessor08_Model1
#5 6 accuracy multiclass 0.579 5 0.0221 Preprocessor06_Model1
# select best, e.g. by applying the one-standard-error-rule
(bestTune <- fit_train %>% select_by_one_std_err(num_comp, metric = "accuracy"))
## A tibble: 1 × 9
# num_comp .metric .estimator mean n std_err .config .best .bound
# <int> <chr> <chr> <dbl> <int> <dbl> <chr> <dbl> <dbl>
#1 7 accuracy multiclass 0.626 5 0.0220 Preprocessor07_Model1 0.626 0.605
# finalize workflow
wflow_final <- wflow %>% finalize_workflow(bestTune)
# verify that the workflow was updated correctly
wflow$pre$actions$recipe$recipe$steps[[2]]$num_comp # should be tune()
# tune()
wflow_final$pre$actions$recipe$recipe$steps[[2]]$num_comp # should be 7
#7
Fit final PCA-LDA model to the full (training) dataset
fit_final <- wflow_final %>% fit(Glass)
class(fit_final$fit$fit$fit) # here is the MASS::lda object
#lda
Visualize fitted PCA-LDA model
# use predict() to get predicted class, posterior probs, and LDA scores
Glass.PCALDA <- tibble(Glass,
predict(fit_final, new_data = Glass, type = "class"), # predicted class
predict(fit_final, new_data = Glass, type = "prob"), # posterior prob. for classes
as_tibble(predict(fit_final, new_data = Glass, type = "raw")$x)) # LD scores
# verify that tidymodels did it right
Own.PCALDA <- lda(prcomp(Glass[,-10], center = T, scale. = T)$x[,1:7],
grouping = Glass[,10])
Own.PCALDA$x <- predict(Own.PCALDA)$x
all.equal(as_tibble(Own.PCALDA$x),
Glass.PCALDA %>% dplyr::select(starts_with("LD"))) # it's the same!
#TRUE
# plot
ggplot(Glass.PCALDA, aes(x = LD1, y = LD2)) +
geom_point(aes(color = Type, shape = .pred_class)) +
theme_bw() +
ggtitle("PCA-LDA (DAPC) on Glass dataset, using 7 PC")

Compare PCA-LDA and LDA
Own.LDA <- lda(scale(Glass[,-10], center = T, scale = T), grouping = Glass[,10])
Own.LDA$predict <- predict(Own.LDA)
Glass.LDA <- tibble(Glass,
.pred_class = Own.LDA$predict$class,
as_tibble(Own.LDA$predict$posterior) %>% rename_all(list(function(x){paste0(".pred_", x)})),
as_tibble(Own.LDA$predict$x))
# plot
ggplot(Glass.LDA, aes(x = LD1, y = LD2)) +
geom_point(aes(color = Type, shape = .pred_class)) +
theme_bw() +
ggtitle("LDA on Glass dataset")
# compare model accuracies
accuracy(Glass.PCALDA, truth = Type, estimate = .pred_class) # 66.8%
accuracy(Glass.LDA, truth = Type, estimate = .pred_class) # 67.3%

For this small dataset, dimensionality reduction via PCA does not lead to better classification results. However, the PCA-LDA procedure allows for application of LDA on very large datasets with a high degree of multi-collinearity (many highly correlated predictors). The PCA step removes multi-collinearity (which can be a problem for LDA), and the cross-validation procedure shown above identifies the optimal dimensionality reduction (to 7 PC) for classification.