## ----setup, include=FALSE-----------------------------------------------------
options(width=80)
knitr::opts_chunk$set(collapse=TRUE,
                      message=TRUE,
                      warning=TRUE,
                      comment="",
                      fig.align="center",
                      fig.wide=TRUE)

## ----message=FALSE------------------------------------------------------------
library(SummarizedExperiment)
library(GSVA)
library(GSVAdata)

data(geneprotExpCostaEtAl2021)
ls()
protExpCostaEtAl2021
assayNames(protExpCostaEtAl2021)

## ----eval=FALSE---------------------------------------------------------------
# library(GSEABase)
# 
# URL <- "https://data.broadinstitute.org/gsea-msigdb/msigdb/release/2024.1.Hs/c7.immunesigdb.v2024.1.Hs.symbols.gmt"
# c7.genesets <- readGMT(URL)

## ----echo=FALSE, message=FALSE, warning=FALSE---------------------------------
library(GSEABase)

gmtfname <- system.file("extdata", "c7.immunesigdb.v2024.1.Hs.symbols.gmt.gz",
                        package="GSVAdata")
c7.genesets <- readGMT(gmtfname)
c7.genesets

## -----------------------------------------------------------------------------
innatepat <- c("NKCELL_VS_.+_UP", "MAST_CELL_VS_.+_UP",
               "EOSINOPHIL_VS_.+_UP", "BASOPHIL_VS_.+_UP",
               "MACROPHAGE_VS_.+_UP", "NEUTROPHIL_VS_.+_UP")
innatepat <- paste(innatepat, collapse="|")
innategsets <- names(c7.genesets)[grep(innatepat, names(c7.genesets))]
length(innategsets)

adaptivepat <- c("CD4_TCELL_VS_.+_UP", "CD8_TCELL_VS_.+_UP", "BCELL_VS_.+_UP")
adaptivepat <- paste(adaptivepat, collapse="|")
adaptivegsets <- names(c7.genesets)[grep(adaptivepat, names(c7.genesets))]
excludepat <- c("NAIVE", "LUPUS", "MYELOID")
excludepat <- paste(excludepat, collapse="|")
adaptivegsets <- adaptivegsets[-grep(excludepat, adaptivegsets)]
length(adaptivegsets)

c7.genesets.filt <- c7.genesets[c(innategsets, adaptivegsets)]
length(c7.genesets.filt)

## -----------------------------------------------------------------------------
logCPMsWithNAs <- assay(protExpCostaEtAl2021, "logCPM")
logCPMsWithNAs[1:5, 1:5]
logCPMsWithNAs[is.na(assay(protExpCostaEtAl2021, "log2protexp"))] <- NA
logCPMsWithNAs[1:5, 1:5]

## -----------------------------------------------------------------------------
gsvaAnnotation(logCPMsWithNAs) <- EntrezIdentifier("org.Hs.eg.db")

## -----------------------------------------------------------------------------
gsvapar <- gsvaParam(logCPMsWithNAs, c7.genesets.filt, minSize=5)

## -----------------------------------------------------------------------------
es_gsva_everything <- gsva(gsvapar)
es_gsva_everything[1:10, 1:4]
all(is.na(es_gsva_everything))

## ----error=TRUE---------------------------------------------------------------
try({
gsvapar <- gsvaParam(logCPMsWithNAs, c7.genesets.filt, minSize=5,
                     use="all.obs")
})

## -----------------------------------------------------------------------------
gsvapar <- gsvaParam(logCPMsWithNAs, c7.genesets.filt, minSize=5,
                     use="na.rm")
es_gsva_narm <- gsva(gsvapar)
round(es_gsva_narm[1:10, 1:4], digits=2)

## -----------------------------------------------------------------------------
ssgseapar <- ssgseaParam(logCPMsWithNAs, c7.genesets.filt, minSize=5,
                         use="na.rm")
es_ssgsea_narm <- gsva(ssgseapar)
round(es_ssgsea_narm[1:10, 1:4], digits=2)

## -----------------------------------------------------------------------------
gsvaAnnotation(protExpCostaEtAl2021) <- EntrezIdentifier("org.Hs.eg.db")
gsvapar <- gsvaParam(protExpCostaEtAl2021, c7.genesets.filt,
                     assay="logCPM", minSize=5)
es_gsva <- gsva(gsvapar)
ssgseapar <- ssgseaParam(protExpCostaEtAl2021, c7.genesets.filt,
                         assay="logCPM", minSize=5)
es_ssgsea <- gsva(ssgseapar)

## ----missingdatacomp, echo=TRUE, fig.width=5, fig.height=5, out.width="600px", dpi=100, fig.cap="Correlation between enrichment scores calculated from RNA-seq expression profiles with missing values, and those calculated from the complete version of the same data."----
r_es_gsva <- r_es_ssgsea <- numeric(nrow(es_gsva))
for (i in 1:nrow(es_gsva)) {
    r_es_gsva[i] <- cor(assay(es_gsva)[i, ], es_gsva_narm[i, ], use="complete.obs")
    r_es_ssgsea[i] <- cor(assay(es_ssgsea)[i, ], es_ssgsea_narm[i, ], use="complete.obs")
}
boxplot(list(GSVA=r_es_gsva, ssGSEA=r_es_ssgsea),
	ylab="Correlation with complete data", las=1)

## ----session_info, cache=FALSE------------------------------------------------
sessionInfo()

