hpGeneListComp {PCAN} | R Documentation |
This function compare a whole gene list to a set of HP terms using a matrix of semantic similarity.
hpGeneListComp(geneList, ssMatByGene, geneSSScore = NULL, ...)
geneList |
a vector providing the genes of interest. |
ssMatByGene |
a list (one element per gene) of matrix
of semantic similarity between HP terms as returned by
|
geneSSScore |
a vector of semantic similarity scores for all the genes in ssMatByGene list. If not provided these scores are computed from ssMatByGene. |
... |
parameters for |
A list with the following elements:
The original HP of interest.
The distribution of scores for all genes for the HP of interest.
The semantic similarity by gene.
For each gene which related HP terms best fits with the HP of interest (colnames of the elements of ssMatByGene).
The median of scores.
According to a wilcox.test
comparing genes
of interest to all the other genes.
Gene with the highest score among the genes of interest.
Maximum score.
Quantile of the scores compared to the whole list of gene.
Adjusted quantiles according Benjamini Hochberg
(link{p.adjust}
).
Patrice Godard
hpGeneHeatmap
, compareHPSets
,
hpSetCompSummary
and hpSetCompBestMatch
data(geneByHp, hp_descendants, package="PCAN") data(hp_ancestors, hpDef, package="PCAN") data(traitDef, geneDef, package="PCAN") data(hpByTrait, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ########################################### ## Compute information content of each HP according to associated genes ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## HP Comparison hpGeneResnik <- compareHPSets( hpSet1=names(ic), hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) hpMatByGene <- lapply( hpByGene, function(x){ hpGeneResnik[x, , drop=FALSE] } ) resnSss <- unlist(lapply( hpMatByGene, hpSetCompSummary, method="bma", direction="symSim" )) candScore <- resnSss[entrez] ########################################### ## The pathway consensus approach ## What about genes belonging to the same pathways as the candidate data(rPath, hsEntrezByRPath, package="PCAN") candPath <- names(hsEntrezByRPath)[which(unlist(lapply( hsEntrezByRPath, function(x) entrez %in% x )))] rPath[which(rPath$Pathway %in% candPath),] rPathRes <- hpGeneListComp( geneList=hsEntrezByRPath[[candPath]], ssMatByGene = hpMatByGene, geneSSScore = resnSss ) hist( resnSss, breaks=100, col="grey", ylim=c(0,5), xlab=expression(Sim[sym]), ylab="Density", main=paste( "Distribution of symmetric semantic similarity scores for all the", length(resnSss), "genes" ), probability=TRUE ) toAdd <- hist( rPathRes$scores, breaks=100, plot=FALSE ) for(i in 1:length(toAdd$density)){ polygon( x=toAdd$breaks[c(i, i+1, i+1, i)], y=c(0, 0, rep(toAdd$density[i], 2)), col="#BE000040", border="#800000FF" ) } legend( "topright", paste0( "Genes belonging to the ", candPath," pathway:\n'", rPath[which(rPath$Pathway %in% candPath),"Pathway_name"], "'\nand with a symmetric semantic similarity score (", sum(!is.na(rPathRes$scores)), "/", length(rPathRes$scores), ")\n", "p-value: ", signif(rPathRes$p.value, 2) ), pch=15, col="#BE000040", bty="n", cex=0.6 ) ## Assessing the symmetric semantic similarity for each gene in the pathway pathSss <- rPathRes$scores[which(!is.na(rPathRes$scores))] names(pathSss) <- geneDef[match(names(pathSss), geneDef$entrez), "symbol"] opar <- par(mar=c(7.1, 4.1, 4.1, 2.1)) barplot( sort(pathSss), las=2, ylab=expression(Sim[sym]), main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] ) p <- c(0.25, 0.5, 0.75, 0.95) abline( h=quantile(resnSss, probs=p), col="#BE0000", lty=c(2, 1, 2, 2), lwd=c(2, 2, 2, 1) ) text( rep(0,4), quantile(resnSss, probs=p), p, pos=3, offset=0, col="#BE0000", cex=0.6 ) legend( "topleft", paste0( "Quantiles of the distribution of symmetric semantic similarity\n", "scores for all the genes." ), lty=1, col="#BE0000", cex=0.6 ) par(opar) ## A heatmap showing the best HP match for each gene in the pathway geneLabels <- geneDef$symbol[which(!duplicated(geneDef$entrez))] names(geneLabels) <- geneDef$entrez[which(!duplicated(geneDef$entrez))] hpLabels <- hpDef$name names(hpLabels) <- hpDef$id hpGeneHeatmap( rPathRes, genesOfInterest=entrez, geneLabels=geneLabels, hpLabels=hpLabels, clustByGene=TRUE, clustByHp=TRUE, palFun=colorRampPalette(c("white", "red")), goiCol="blue", main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] ) ########################################### ## What about genes interacting with the candidate (including itself) data(hqStrNw, package="PCAN") neighbors <- unique(c( hqStrNw$gene1[which(hqStrNw$gene2==entrez)], hqStrNw$gene2[which(hqStrNw$gene1==entrez)], entrez )) neighRes <- hpGeneListComp( geneList=neighbors, ssMatByGene = hpMatByGene, geneSSScore = resnSss ) hist( resnSss, breaks=100, col="grey", ylim=c(0,10), xlab=expression(Sim[sym]), ylab="Density", main=paste( "Distribution of symmetric semantic similarity scores for all the", length(resnSss), "genes" ), probability=TRUE ) toAdd <- hist( neighRes$scores, breaks=100, plot=FALSE ) for(i in 1:length(toAdd$density)){ polygon( x=toAdd$breaks[c(i, i+1, i+1, i)], y=c(0, 0, rep(toAdd$density[i], 2)), col="#BE000040", border="#800000FF" ) } legend( "topright", paste0( "Genes interacting with ", geneDef[which(geneDef$entrez==entrez),"symbol"], " (", entrez, ")", "\nand with a symmetric semantic similarity score (", sum(!is.na(neighRes$scores)), "/", length(neighRes$scores), ")\n", "p-value: ", signif(neighRes$p.value, 2) ), pch=15, col="#BE000040", bty="n", cex=0.6 ) ## Assessing the symmetric semantic similarity score for each interacting gene neighSss <- neighRes$scores[which(!is.na(neighRes$scores))] names(neighSss) <- geneDef[match(names(neighSss), geneDef$entrez), "symbol"] opar <- par(mar=c(7.1, 4.1, 4.1, 2.1)) barplot( sort(neighSss), las=2, ylab=expression(Sim[sym]), main=paste0( "Genes interacting with ", geneDef[which(geneDef$entrez==entrez),"symbol"], " (", entrez, ")" ) ) p <- c(0.25, 0.5, 0.75, 0.95) abline( h=quantile(resnSss, probs=p), col="#BE0000", lty=c(2, 1, 2, 2), lwd=c(2, 2, 2, 1) ) text( rep(0,4), quantile(resnSss, probs=p), p, pos=3, offset=0, col="#BE0000", cex=0.6 ) legend( "topleft", paste0( "Quantiles of the distribution of symmetric semantic similarity\n", "scores for all the genes." ), lty=1, col="#BE0000", cex=0.6 ) par(opar) ## A heatmap showing the best HP match for each neighbor gene hpGeneHeatmap( neighRes, genesOfInterest=entrez, geneLabels=geneLabels, hpLabels=hpLabels, clustByGene=TRUE, clustByHp=TRUE, palFun=colorRampPalette(c("white", "red")), goiCol="blue", main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] )