Show setup code
library(dplyr)
library(data.table)
library(ggplot2)
library(patchwork)
library(colorspace)
library(PolyGenius)We generate the PGS models from the Bellenguez et al. summary statistics (Bellenguez et al. 2022) from IEU OpenGWAS (Elsworth et al. 2020), to which we apply multiple different algorithms. The ClumpingThresholding runs use the EUR reference panel from the 1000 Genomes Project (The 1000 Genomes Project Consortium 2015). The key point is that a single generate$models(...) call can request several algorithms at once, while the execution engine resolves and reuses shared intermediate tasks such as summary-statistics retrieval, reference-panel preparation, clumping, and LD-matrix construction.
# Generate AD-PGS models
models <- generate$models(
sources = generate$sources$opengwas("ebi-a-GCST90027158"),
algorithms = c(
generate$algorithms$ClumpingThresholding(pval = c(5e-7, 5e-8, 5e-9), reference.panel = "EUR", variant.space = "HapMap3+"),
generate$algorithms$LDpred2(reference.panel = "EUR", ld.size = 3000, ld.thr = .01, ncores = 8, variant.space = "HapMap3+"),
generate$algorithms$lassosum2(reference.panel = "EUR", ld.size = 3000, ld.thr = .01, ncores = 8, variant.space = "HapMap3+")
)
)
models <- generate$models(
sources = generate$sources$opengwas("ebi-a-GCST90027158"),
algorithms = c(
generate$algorithms$ClumpingThresholding(pval = c(5e-7, 5e-8, 5e-9)),
generate$algorithms$LDpred2(ld.size = 3000, ld.thr = .01, ncores = 8),
generate$algorithms$lassosum2(ld.size = 3000, ld.thr = .01, ncores = 8),
),
reference.panel = "EUR",
variant.space = "HapMap3+"
)
modelsOnce the candidate models have been generated, we create a single PolyGeniusData object that combines the models, the cohort genotypes, and the phenotype table required for benchmarking. Here the evaluation cohort consists of two genotype datasets, the Amsterdam Dementia Cohort (ADC) (Flier and Scheltens 2018) and the Longitudinal Aging Study Amsterdam (LASA) (Hoogendijk et al. 2016, 2025). Since both cohort datasets are defined with the GRCh38 genome build, we first liftover the generated models.
liftover(models, "GRCh38")
data <- PolyGeniusData(
name = "evaluate",
models = models,
genotypes = c(
GenotypeInfo(path = "resources/cohorts/ADC/genotypes", format = "pfile", build = "GRCh38", name = "ADC"),
GenotypeInfo(path = "resources/cohorts/LASA/genotypes", format = "pfile", build = "GRCh38", name = "LASA")
),
obsm = list(
phenotypes = rbind(
fread("resources/cohorts/ADC/phenotypes/phenotypes.csv")[, c("demented", "sex")],
fread("resources/cohorts/LASA/phenotypes/phenotypes.csv")[, c("demented", "sex")]
) %>% mutate(demented = factor(demented, 0:1))
)
)
dataWe then run evaluate$benchmark(...) to orchestrate a joint model-assessment workflow which: - Assesses models’ ability to distinguish between demented and non demented samples - calibration - similarity - compare - risk strata - incremental
using dementia status as the outcome and sex as the baseline covariate.
# results <- evaluate$benchmark(
# on = data,
# outcome = demented,
# baseline.covariates = sex
# )
results1 <- evaluate$similarity(on = data)
results2 <- evaluate$discrimination(on = data, outcome = demented)
results3 <- evaluate$incremental(on = data, outcome = demented, baseline.covariates = sex)
results <- merge(merge(results1, results2), results3)
corr.p <- visualize$evaluate$similarity$heatmap(results,
center.zero = TRUE,
symmetrize.range = TRUE,
border = TRUE,
name = "Pearson correlation",
column_names_rot = 45,
width = data$n_mod * unit(8, "mm"),
height = data$n_mod * unit(8, "mm"))
ROC.p <- visualize$evaluate$discrimination$roc(results) +
scale_color_discrete_sequential("BuPu") +
theme(legend.position = c(.75, .2))The benchmark result can now be visualized from several complementary angles. The plotting block below collects overview, discrimination, calibration, comparison, similarity, incremental-value, and risk-stratification views that together support model ranking and prioritization.
pdf("Rplots.pdf", width = 7, height = 5)
print(visualize$evaluate$overview$metric.heatmap(results))
print(visualize$evaluate$overview$leaderboard(results))
print(visualize$evaluate$discrimination$curve(results))
print(
visualize$evaluate$discrimination$roc(results) +
ggplot2::theme(legend.position = "right") +
ggplot2::theme_classic()
)
print(visualize$evaluate$discrimination$pr(results))
print(visualize$evaluate$discrimination$confusion(results))
print(visualize$evaluate$discrimination$distribution(results))
print(visualize$evaluate$discrimination$forest(results))
print(visualize$evaluate$calibration$curve(results))
print(visualize$evaluate$calibration$slope.intercept(results))
print(visualize$evaluate$compare$delta.forest(results))
print(visualize$evaluate$compare$delta.heatmap(results))
print(visualize$evaluate$similarity$heatmap(results, border = TRUE))
print(visualize$evaluate$incremental$delta(results))
print(visualize$evaluate$risk.strata$profile(results))
print(visualize$evaluate$risk.strata$lift(results))
dev.off()