## ----preliminaries, echo=FALSE, results='hide', message=FALSE, warning=FALSE----
library(sva)
library(xtable)
library(Biobase)

## ----example1, message=FALSE, warning=FALSE-----------------------------------
library(curatedCRCData)

## ----example1tcgastep2--------------------------------------------------------
data(package="curatedCRCData")

## ----example1_load_tcga-------------------------------------------------------
data(TCGA.COAD_eset)
TCGA.COAD_eset

## ----example2loadstep1--------------------------------------------------------
source(system.file("extdata", 
"patientselection_all.config",package="curatedCRCData"))
ls()

## ----showls-------------------------------------------------------------------
sapply(ls(), get)

## ----example2loadstep2--------------------------------------------------------
source(system.file("extdata", "createEsetList.R", package = "curatedCRCData"))

## ----example2loadstep3--------------------------------------------------------
names(esets)

## ----expand-------------------------------------------------------------------
expandProbesets <- function (eset, sep = "///") 
{
    x <- lapply(featureNames(eset), function(x) strsplit(x, sep)[[1]])
    eset <- eset[order(sapply(x, length)), ]
    x <- lapply(featureNames(eset), function(x) strsplit(x, sep)[[1]])
    idx <- unlist(sapply(1:length(x), function(i) rep(i, length(x[[i]]))))
    xx <- !duplicated(unlist(x))
    idx <- idx[xx]
    x <- unlist(x)[xx]
    eset <- eset[idx, ]
    featureNames(eset) <- x
    eset
}

X <- TCGA.COAD_eset[head(grep("AA", featureNames(TCGA.COAD_eset))),]
exprs(X)[,1:3]
exprs(expandProbesets(X))[,1:3]

## ----heatmap, echo=FALSE, fig.cap="Available clinical annotation. This heatmap visualizes for each curated clinical characteristic (rows) the availability in each dataset (columns). Red indicates that the corresponding characteristic is available for at least one sample in the dataset."----
.esetsStats <- function(esets) {
    res <- lapply(varLabels(esets[[1]]), function(covar) unlist(sapply(esets, 
        function(X) sum(!is.na(X[[covar]]))>0)))
    names(res) <- varLabels(esets[[1]])    
    do.call(rbind, res)
}

df.r <- .esetsStats(esets)
M <- as.matrix(apply(df.r,c(1,2),ifelse,0,1))
colnames(M) <- gsub("_eset$", "", colnames(M))
# no need to show the sample ids
M <- M[-(1:2),]
heatmap(M[nrow(M):1,],scale="none",margins=c(8,10),Rowv=NA)

## ----esetToTableFuns----------------------------------------------------------
source(system.file("extdata", "summarizeEsets.R", package = "curatedCRCData"))

## ----esettable, echo=FALSE----------------------------------------------------
summary.table <- t(sapply(esets, getEsetData))
rownames(summary.table) <- sub("_eset", "", rownames(summary.table))

## ----writeesettable-----------------------------------------------------------
(myfile <- tempfile())
write.table(summary.table, file=myfile, row.names=FALSE, quote=TRUE, sep=",")

## ----xtable, echo=FALSE, results='asis'---------------------------------------
library(knitr)
kable(summary.table, caption="Datasets provided by curatedCRCData.")

## ----simplygetdata, eval=FALSE------------------------------------------------
# library(curatedCRCData)
# library(Biobase)
# data(TCGA.COAD_eset)
# write.csv(exprs(TCGA.COAD_eset), file="TCGA.COAD_eset_exprs.csv")
# write.csv(pData(TCGA.COAD_eset), file="TCGA.COAD_eset_clindata.csv")

## ----simplyseveraldatasets, eval=FALSE----------------------------------------
# data.to.fetch <- c("TCGA.COAD_eset", "GSE37317_eset")
# for (onedata in data.to.fetch){
#     print(paste("Fetching", onedata))
#     data(list=onedata)
#     write.csv(exprs(get(onedata)), file=paste(onedata, "_exprs.csv", sep=""))
#     write.csv(pData(get(onedata)), file=paste(onedata, "_clindata.csv", sep=""))
# }

## ----sessioninfo, echo=FALSE--------------------------------------------------
sessionInfo()

