## ----style, echo = FALSE, results = 'asis'------------------------------------
    BiocStyle::markdown()

## ----install, eval=FALSE------------------------------------------------------
# if(!requireNamespace("BiocManager", quietly=TRUE))
#     install.packages("BiocManager")
# BiocManager::install("queeems")

## ----TL queeems, eval=TRUE----------------------------------------------------
# ><>< # Load package into current R session
library("queeems")

# ><>< # Create useful summary function
infoests <- function(xdata){
    serr <- sd(xdata) / sqrt( length(xdata))
    avg <- mean( xdata )
    low <- avg  -  serr
    upp <- avg  +  serr
    outs <- c(low=low, avg=avg, upp=upp)
    return( outs )
}

# ><>< # Retrieve path to simulated tutorial data folder
datadir <- system.file("extdata/example/stemL/", package="queeems")

# ><>< # Indicate number of simulated data replicates
nreps <- 5

# ><>< # Set preferred Bayes factor threshold
bflimit <- 3

# ><>< # Set preferred saturated sites tolerance
ssprop <- 0.01

# Branch length
blent <- seq(0.03, 0.15, 0.03)
btags <- sprintf("%02.0f", blent*100)

# ><>< # Create data frames to save interesting outputs
cnames <- paste("bl", btags, sep="")
rnames <- c("low","avg","upp")
vnames <- paste("rep.", seq(1,nreps), sep="")
homoBF <- matrix(NA, nreps, length(btags), dimnames=list(vnames,cnames))
sbayes <- matrix(NA, nreps, length(btags), dimnames=list(vnames,cnames))
smrybx <- matrix(NA, 3, length(btags), dimnames=list(rnames,cnames))
cdnTmp <- aasTmp <- vector("numeric", 100*nreps)
nucTmp <- vector("numeric", 300*nreps)
nucent <- codent <- aasent <- smrybx

for(a1 in seq(1,length(btags))){
    # ><>< # Read merged molecular sequences data as text
    seqsname <- file.path(datadir, paste("seqs",btags[a1],".txt", sep=""))
    seqstext <- readLines(seqsname)

    # ><>< # Extract information about number of simulated replicates
    sbreaks <- which(seqstext == "  64  300")

    # ><>< # Analyse data replicates captured from merged data file
    for(a0 in seq(1,nreps)){
        nucentID <- seq(1+(a0-1)*300, a0*300)
        entropyID <- seq(1+(a0-1)*100, a0*100)

        # ><>< # Extract and temporarily save a sequence replicate
        init <- sbreaks[a0] + 1
        halt <- sbreaks[a0] + 128
        seqrep <- seqstext[seq(init,halt)]
        tempfile <- file.path(tempdir(), "iseqs.fasta")
        write.table(seqrep, file=tempfile, append=FALSE,
            quote=FALSE, row.names=FALSE, col.names=FALSE)

        # ><>< # Execute saturation analysis
        satest <- seqSaturation(tempfile, "A", 6, 0.4, 7, bflimit, ssprop)

        # ><>< # Extract Bayes factors
        homoBF[a0,a1] <- as.numeric( summary(satest)["fullBF",])
        sbayes[a0,a1] <- sum(BFs(satest) > bflimit)

        # ><>< # Execute codon entropy analysis
        tempentropy <- molentropy(tempfile, "codon", "shannon")
        cdnTmp[entropyID] <- sitentropies(tempentropy)

        # ><>< # Process nucleotide entropy analysis
        tempentropy <- molentropy(tempfile, "dna", "shannon")
        nucTmp[nucentID] <- sitentropies(tempentropy)

        # ><>< # Transform nucleotide to amino acid sequences
        dnaformat <- Biostrings::readDNAStringSet(tempfile)
        aaformat <- Biostrings::translate(dnaformat)
        Biostrings::writeXStringSet(aaformat, tempfile)

        # ><>< # Implement amino acid entropy analysis
        tempentropy <- molentropy(tempfile, "aa", "shannon")
        aasTmp[entropyID] <- sitentropies(tempentropy)
    }
    # ><>< # Summarise and save entropy values
    nucent[rnames,a1] <- infoests( nucTmp )[rnames]
    codent[rnames,a1] <- infoests( cdnTmp )[rnames]
    aasent[rnames,a1] <- infoests( aasTmp )[rnames]

    # ><>< # Provide analysis progress update
    message("Completed analyses of stem length 0.", btags[a1])
}
# ><>< # Site-wise Bayes factors peak
print( head( BFs(satest)))

# ><>< # Saturated site counts
print(sbayes)

# ><>< # Overall Bayes factor estimates
print(homoBF)

## ----treeLength BFs, eval=TRUE------------------------------------------------
# ><>< # Function that makes plotting easier
tfunc <- function(i, pty, clr, cx, mat, agl=90){
    arrows(i, mat["low",i], i, mat["upp",i], .1, agl, 3, colors()[clr], lwd=3)
    points(i,mat["avg",i],pch=pty,lwd=2,col=colors()[clr],cex=cx,bg="white")
    return(0)
}

# ><>< # Summarise overall Bayes factor values
bfsumm <- apply(homoBF,2,infoests)

par(mar=c(4.2,4.2,0.2,0.2))
plot(0, 0, col="white", bty="n", xlab="", ylab="",
    xlim=c(1.0,5.0), ylim=c(0.0,7.0), xaxt="n", yaxt="n")
phold <- vapply(seq(1,5), tfunc, 15, 50, 2.5, bfsumm, FUN.VALUE=0)
axis(1, seq(1,5), seq(3,15,3)/100*7, tck=-.01, lwd=2, padj=-.2, cex.axis=1.5)
axis(2, las=1, tck=-0.01, lwd=2, cex.axis=1.5, hadj=0.2)
text(1.3, 6.5, "A", cex=2.5, font=2)
mtext("Bayes factor", 2, 2.2, cex=2)
mtext("Tree Length", 1, 2.4, cex=2)
box(lwd=2)

## ----entropy, eval=TRUE-------------------------------------------------------
# ><>< # Scale entropy estimates by the maximum value
eScaledNuc <- nucent / (-log(1/4))
eScaledCodon <- codent / (-log(1/61))
eScaledAmino <- aasent / (-log(1/20))

# ><>< # Plot distributions of entropy estimates
par(mar=c(4.2,4.2,0.2,0.2))
bases <- c("Nucleotide","Codon","Amino acid")
plot(0, 0, col="white", bty="n", xlab="", ylab="",
    xlim=c(1.0,5.0), ylim=c(.19,0.57), xaxt="n", yaxt="n")
phold <- vapply(seq(1,5), tfunc, 0, 17, 1.5, eScaledNuc, FUN.VALUE=0)
phold <- vapply(seq(1,5), tfunc, 1, 60, 1.5, eScaledCodon, FUN.VALUE=0)
phold <- vapply(seq(1,5), tfunc, 8, 74, 1.5, eScaledAmino, FUN.VALUE=0)
axis(1, seq(1,5), seq(3,15,3)/100*7, tck=-.01, lwd=2, padj=-.5, cex.axis=1.5)
text(rep(3.7,3), c(.27,.24,.21)-.002, bases, cex=1.5, pos=4)
axis(2, las=1, tck=-0.01, lwd=2, cex.axis=1.5, hadj=0.7)
points(rep(3.6,3), c(.27,.24,.21), cex=2.5, lwd=2,
    pch=c(0,1,8), col=colors()[c(17,60,74)])
text(3.8, 0.30, "Base", cex=2, font=2)
mtext("Scaled Entropy", 2, 2.4, cex=2)
text(1.3, 0.54, "A.1", cex=2.5, font=2)
mtext("Tree Length", 1, 2.2, cex=2)
box(lwd=2)

## ----site BFs, eval=TRUE------------------------------------------------------
# ><>< # Site-wise Bayes factors from replicate 5 of length 1.05
par(mar=c(4.2,4.2,0.2,0.2))
plot(0, 0, col="white", bty="n", xlab="", ylab="",
    xlim=c(.6,4.4), ylim=c(.03,0.85), xaxt="n", yaxt="n")
mtext(c("Site-Wise Bayes Factors","Density"), c(1,2), 2.4, cex=2)
axis(2, las=1, tck=-0.01, lwd=2, cex.axis=1.5, hadj=0.7)
hist(BFs(satest), freq=FALSE, add=TRUE, xaxt="n",
    yaxt="n", main="", col=colors()[17], lwd=2)
axis(1, tck=-.01, lwd=2, padj=-.5, cex.axis=1.5)
text(4.1, 0.81, "A.2", cex=2, font=2)
box(lwd=2)

## ----dnds test, eval=TRUE-----------------------------------------------------
# ><>< # Retrieve path to simulated tutorial data folder
datadir <- system.file("extdata/example/varNS/", package="queeems")

# Non-synonymous variance value
nsvary <- seq(1, 5) / 10
ntags <- sprintf("%.0f", nsvary*10)

# ><>< # Create data frames to save interesting outputs
cnames <- paste("ns", ntags, sep="")
rnames <- c("low","avg","upp")
homoBF <- matrix(NA, nreps, length(ntags), dimnames=list(NULL,cnames))
sbayes <- matrix(NA, nreps, length(ntags), dimnames=list(NULL,cnames))

for(a1 in seq(1,length(ntags))){
    # ><>< # Read merged molecular sequences data as text
    seqsname <- file.path(datadir, paste("gdata",ntags[a1],".txt",sep=""))
    seqstext <- readLines(seqsname)

    # ><>< # Extract information about number of simulated replicates
    sbreaks <- which(seqstext == "  64  300")

    # ><>< # Analyse data replicates captured from merged data file
    for(a0 in seq(1,nreps)){

        # ><>< # Extract and temporarily save a sequence replicate
        init <- sbreaks[a0] + 1
        halt <- sbreaks[a0] + 128
        seqrep <- seqstext[seq(init,halt)]
        tempfile <- file.path(tempdir(), "iseqs.fasta")
        write.table(seqrep, file=tempfile, append=FALSE,
            quote=FALSE, row.names=FALSE, col.names=FALSE)

        # ><>< # Execute saturation analysis
        satest <- seqSaturation(tempfile, "A", 6, 0.4, 7, bflimit, ssprop)

        # ><>< # Extract Bayes factors
        homoBF[a0,a1] <- as.numeric( summary(satest)["fullBF",])
        sbayes[a0,a1] <- sum(BFs(satest) > bflimit)
    }
    # ><>< # Provide analysis progress update
    message("Completed analyses for \U03c3\U00b2 = 0.", ntags[a1])
}
# ><>< # Overall log(Bayes factor) estimates
print( log(homoBF))

# ><>< # Summarise overall Bayes factor and dN/dS values
bfsumm <- apply(log(homoBF),2,infoests)
dndsPath <- file.path(datadir, "dnds.csv")
dndses <- read.table(dndsPath, sep=";", header=TRUE)

# ><>< # Facilitate super-imposition of dN/dS and BFs
transplot <- function(x, xrng, newrng){
    xmin <- xrng[1]
    ymin <- newrng[1]
    xdiff <- diff(xrng)
    ydiff <- diff(newrng)
    newx <- (((x - xmin) / xdiff) * ydiff) + ymin
    return(newx)
}
newaxs <- seq(4,9)
oldaxs <- transplot(newaxs, c(5,9), c(-.4,2.3))
newDnDs <- transplot(dndses, c(5,9), c(-.4,2.3))
dndsumm <- apply(newDnDs, 2, infoests)

# ><>< # Add summaries of the BF values to plot
par(mar=c(4.4,4.2,0.2,3.0))
plot(0, 0, col="white", bty="n", xlab="", ylab="",
    xlim=c(1.0,5.0), ylim=c(-.4,2.3), xaxt="n", yaxt="n")
phold <- vapply(seq(1,5), tfunc, 15, 50, 2.5, bfsumm, FUN.VALUE=0)
axis(1, seq(1,5), nsvary, tck=-.01, lwd=2, padj=-.5, cex.axis=1.5)
axis(2, las=1, tck=-0.01, lwd=2, cex.axis=1.5, hadj=0.7)
mtext(expression(sigma[n]^2), 1, 3.4, cex=2)
mtext("Log(Bayes factor)", 2, 2.5, cex=2)
text(1.2, 2.2, "B", cex=2.5, font=2)

# ><>< # Include summaries of the dN/dS values
phold <- vapply(seq(1,5), tfunc, 23, 17, 2, dndsumm, 60, FUN.VALUE=0)
axis(4, oldaxs, newaxs, las=1, tck=-0.01, lwd=2, cex.axis=1.5, hadj=0.7)
mtext("dN/dS", 4, 2.0, cex=2)

# ><>< # Add legend to plot
text(c(2,2), c(2.2,2.0), c("BF","dN/dS"), cex=1.8, pos=4)
arrows(1.7, 2.21, 2.0, 2.21, .1, 90, 3, colors()[50], 1, 3)
arrows(1.7, 2.0, 2.0, 2.0, .1, 60, 3, colors()[17], 1, 3)
points(c(1.85,1.85), c(2.21,2.0), cex=2, bg="white",
    lwd=3, pch=c(15,23), col=colors()[c(50,17)])
box(lwd=2)

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()

