createMAE {ELMER} | R Documentation |
This function will receive a gene expression and DNA methylation data objects and create a Multi Assay Experiment.
createMAE(exp, met, colData, sampleMap, linearize.exp = FALSE, filter.probes = NULL, met.na.cut = 0.2, filter.genes = NULL, met.platform = "450K", genome = NULL, save = TRUE, save.filename, TCGA = FALSE)
exp |
A Summaerized Experiment, a matrix or path of rda file only containing the data. Rownames should be either Ensembl gene id (ensembl_gene_id) or gene symbol (external_gene_name) |
met |
A Summaerized Experiment, a matrix or path of rda file only containing the data. |
colData |
A DataFrame or data.frame of the phenotype data for all participants |
sampleMap |
A DataFrame or data.frame of the matching samples and colnames of the gene expression and DNA methylation matrix. This should be used if your matrix have different columns names. This object must have columns primary (sample ID) and colname (names of the columns of the matrix). |
linearize.exp |
Take log2(exp + 1) in order to linearize relation between methylation and expression |
filter.probes |
A GRanges object contains the coordinate of probes which locate
within promoter regions or distal feature regions such as union enhancer from REMC and FANTOM5.
See |
met.na.cut |
Define the percentage of NA that the line should have to remove the probes for humanmethylation platforms. |
filter.genes |
List of genes ensemble ids to filter from object |
met.platform |
DNA methylation platform "450K" or "EPIC" |
genome |
Which is the default genome to make gene information. Options hg19 and hg38 |
save |
If TRUE, MAE object will be saved into a file named as the argument save.file if this was set, otherwise as mae_genome_met.platform.rda. |
save.filename |
Name of the rda file to save the object (must end in .rda) |
TCGA |
A logical. FALSE indicate data is not from TCGA (FALSE is default). TRUE indicates data is from TCGA and sample section will automatically filled in. |
A MultiAssayExperiment object
# NON TCGA example: matrices has diffetrent column names gene.exp <- S4Vectors::DataFrame(sample1.exp = c("ENSG00000141510"=2.3,"ENSG00000171862"=5.4), sample2.exp = c("ENSG00000141510"=1.6,"ENSG00000171862"=2.3)) dna.met <- S4Vectors::DataFrame(sample1.met = c("cg14324200"=0.5,"cg23867494"=0.1), sample2.met = c("cg14324200"=0.3,"cg23867494"=0.9)) sample.info <- S4Vectors::DataFrame(primary = c("sample1","sample2"), sample.type = c("Normal", "Tumor")) sampleMap <- S4Vectors::DataFrame(primary = c("sample1","sample1","sample2","sample2"), colname = c("sample1.exp","sample1.met", "sample2.exp","sample2.met")) mae <- createMAE(exp = gene.exp, met = dna.met, sampleMap = sampleMap, met.platform ="450K", colData = sample.info, genome = "hg38") # You can also use sample Mapping and Sample information tables from a tsv file # You can use the createTSVTemplates function to create the tsv files readr::write_tsv(as.data.frame(sampleMap), path = "sampleMap.tsv") readr::write_tsv(as.data.frame(sample.info), path = "sample.info.tsv") mae <- createMAE(exp = gene.exp, met = dna.met, sampleMap = "sampleMap.tsv", met.platform ="450K", colData = "sample.info.tsv", genome = "hg38") ## Not run: # TCGA example using TCGAbiolinks # Testing creating MultyAssayExperiment object # Load library library(TCGAbiolinks) library(SummarizedExperiment) samples <- c("TCGA-BA-4074", "TCGA-BA-4075", "TCGA-BA-4077", "TCGA-BA-5149", "TCGA-UF-A7JK", "TCGA-UF-A7JS", "TCGA-UF-A7JT", "TCGA-UF-A7JV") #1) Get gene expression matrix query.exp <- GDCquery(project = "TCGA-HNSC", data.category = "Transcriptome Profiling", data.type = "Gene Expression Quantification", workflow.type = "HTSeq - FPKM-UQ", barcode = samples) GDCdownload(query.exp) exp.hg38 <- GDCprepare(query = query.exp) # Aligned against Hg19 query.exp.hg19 <- GDCquery(project = "TCGA-HNSC", data.category = "Gene expression", data.type = "Gene expression quantification", platform = "Illumina HiSeq", file.type = "normalized_results", experimental.strategy = "RNA-Seq", barcode = samples, legacy = TRUE) GDCdownload(query.exp.hg19) exp.hg19 <- GDCprepare(query.exp.hg19) # Our object needs to have emsembl gene id as rownames rownames(exp.hg19) <- values(exp.hg19)$ensembl_gene_id # DNA Methylation query.met <- GDCquery(project = "TCGA-HNSC", legacy = TRUE, data.category = "DNA methylation", barcode = samples, platform = "Illumina Human Methylation 450") GDCdownload(query.met) met <- GDCprepare(query = query.met) distal.enhancer <- get.feature.probe(genome = "hg19",platform = "450k") # Consisering it is TCGA and SE mae.hg19 <- createMAE(exp = exp.hg19, met = met, TCGA = TRUE, genome = "hg19", filter.probes = distal.enhancer) values(getExp(mae.hg19)) mae.hg38 <- createMAE(exp = exp.hg38, met = met, TCGA = TRUE, genome = "hg38", filter.probes = distal.enhancer) values(getExp(mae.hg38)) # Consisering it is TCGA and not SE mae.hg19.test <- createMAE(exp = assay(exp.hg19), met = assay(met), TCGA = TRUE, genome = "hg19", filter.probes = distal.enhancer) mae.hg38 <- createMAE(exp = assay(exp.hg38), met = assay(met), TCGA = TRUE, genome = "hg38", filter.probes = distal.enhancer) values(getExp(mae.hg38)) # Consisering it is not TCGA and SE # DNA methylation and gene expression Objects should have same sample names in columns not.tcga.exp <- exp.hg19 colnames(not.tcga.exp) <- substr(colnames(not.tcga.exp),1,15) not.tcga.met <- met colnames(not.tcga.met) <- substr(colnames(not.tcga.met),1,15) phenotype.data <- data.frame(row.names = colnames(not.tcga.exp), samples = colnames(not.tcga.exp), group = c(rep("group1",4),rep("group2",4))) distal.enhancer <- get.feature.probe(genome = "hg19",platform = "450k") mae.hg19 <- createMAE(exp = not.tcga.exp, met = not.tcga.met, TCGA = FALSE, filter.probes = distal.enhancer, genome = "hg19", colData = phenotype.data) ## End(Not run) createMAE