## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
    echo = TRUE,
    message = FALSE,
    warning = FALSE,
    cache = FALSE,
    dev = "png",
    dev.args = list(type = "cairo")
)
library(CLAMP)
library(bigstatsr)

## ----load-data----------------------------------------------------------------
data("dataWholeBlood")
data("majorCellTypes")
data("celltypeTargets")

# Scale each gene to mean 0 and variance 1
dataWholeBlood <- tscale(dataWholeBlood)

## ----load-priors-download, eval=FALSE-----------------------------------------
# # How to download pathway and cell marker libraries from Enrichr.
# # Not run during vignette build to avoid network calls; pre-fetched
# # .rds files are loaded in the next chunk instead.
# enrichr_url <- "https://maayanlab.cloud/Enrichr/geneSetLibrary"
# gmtList <- list(
#     CellMarkers = getGMT(
#         paste0(enrichr_url, "?mode=text&libraryName=CellMarker_2024"),
#         "CellMarker_2024"
#     ),
#     KEGG = getGMT(
#         paste0(enrichr_url, "?mode=text&libraryName=KEGG_2021_Human"),
#         "KEGG_2021_Human"
#     )
# )

## ----load-priors--------------------------------------------------------------
# Load pre-fetched gene set libraries bundled with the package
gmtList <- list(
    CellMarkers = readRDS(
        system.file("extdata", "CellMarker_2024.rds", package = "CLAMP")
    ),
    KEGG = readRDS(
        system.file("extdata", "KEGG_2021_Human.rds", package = "CLAMP")
    )
)

# Combine into a single sparse matrix
pathMatCell <- gmtListToSparseMat(gmtList)

# Load additional xCell reference matrix
data("xCell")

# Match pathways to the gene space of whole blood
matchedPathsWB <- getMatchedPathwayMatList(
    pathMatCell,
    xCell,
    new.genes = rownames(dataWholeBlood),
    min.genes = 2
)

## ----compute-svd--------------------------------------------------------------
set.seed(1)
wb_svd_k <- select_svd_k(dataWholeBlood)
wb_svd <- compute_svd(dataWholeBlood, k = wb_svd_k)
wb_clamp_k <- select_clamp_k(wb_svd,
    n_samples = ncol(dataWholeBlood),
    svd_k = wb_svd_k
)
wb_clamp_k

## ----fit-clampbase------------------------------------------------------------
wb_clamp_base <- CLAMPbase(
    dataWholeBlood,
    svdres     = wb_svd,
    clamp_k    = wb_clamp_k,
    trace      = FALSE,
    adaptive.p = 0.05
)

## ----fit-clampfull------------------------------------------------------------
wb_clamp_full <- CLAMPfull(
    dataWholeBlood,
    priorMat          = matchedPathsWB,
    svdres            = wb_svd,
    clamp.base.result = wb_clamp_base,
    clamp_k           = wb_clamp_k,
    trace             = TRUE,
    use_cpp           = TRUE
)

## ----compare-models, fig.width=6, fig.height=5--------------------------------
output <- compareBs(
    wb_clamp_base,
    wb_clamp_full,
    celltypeTargets,
    method = "s",
    xlab   = "CLAMPbase",
    ylab   = "CLAMPfull"
)

output$plot

## ----named-matrices-----------------------------------------------------------
# B: gene loadings (LVs × genes)
dim(wb_clamp_full$B)
wb_clamp_full$B[1:3, 1:4]

# Z: sample scores (LVs × samples)
dim(wb_clamp_full$Z)
wb_clamp_full$Z[1:3, 1:4]

## ----fit-clampfull-fbm--------------------------------------------------------
dataWholeBloodFBM <- bigstatsr::as_FBM(dataWholeBlood)

wb_clamp_full_fbm <- CLAMPfull(
    dataWholeBloodFBM,
    priorMat          = matchedPathsWB,
    svdres            = wb_svd,
    clamp.base.result = wb_clamp_base,
    clamp_k           = wb_clamp_k,
    trace             = TRUE,
    use_cpp           = TRUE
)

## ----compare-fbm, fig.width=6, fig.height=5-----------------------------------
output <- compareBs(
    wb_clamp_full,
    wb_clamp_full_fbm,
    celltypeTargets,
    method = "s",
    xlab   = "CLAMPfull (matrix)",
    ylab   = "CLAMPfull (FBM)"
)

output$plot

## ----session-info-------------------------------------------------------------
sessionInfo()

