download.file(url = "https://github.com/ramhiser/datamicroarray/raw/refs/heads/master/data/golub.RData",
destfile = "data/golub.RData")Tidy PCA
Lorem
load("data/golub.RData")library("tidyverse")── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("broom")golub_tidy <- golub |>
pluck("x") |>
as_tibble() |>
mutate(y = pluck(golub, "y")) |>
relocate(y)pca_data <- golub_tidy |>
select(-y) |>
prcomp(center = TRUE, scale. = TRUE)scree_data <- pca_data |>
tidy(matrix = "eigenvalues") |>
mutate(label = str_c("PC", PC, " (", round(percent*100, 1), "%)"))scree_data |>
ggplot(aes(x = PC,
y = percent)) +
geom_col(colour = "black",
alpha = 0.5) +
geom_hline(yintercept = 0) +
theme_minimal()
pca_data |>
augment(golub_tidy) |>
ggplot(mapping = aes(x = .fittedPC1,
y = .fittedPC2,
colour = y)) +
geom_vline(xintercept = 0) +
geom_hline(yintercept = 0) +
geom_point() +
stat_ellipse() +
scale_color_manual(values = c("ALL" = "#0072B2", "AML" = "#D55E00")) +
theme_minimal() +
labs(x = scree_data |> filter(PC == 1) |> pull(label),
y = scree_data |> filter(PC == 2) |> pull(label))