## ----setup, include=TRUE------------------------------------------------------
# Global knitr configuration
knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, message = FALSE, warning = FALSE)

# Load libraries required for the pipeline workflow
library(lncRna)
library(rtracklayer)
library(GenomicRanges)
library(IRanges)
library(gprofiler2)
library(plotly)
library(fmsb)
library(patchwork)

## ----create-mock-data---------------------------------------------------------
# 1. Sample GRanges object (mimicking an imported GTF file)
mockGtf <- GRanges(
  seqnames = "chr1",
  ranges = IRanges(start = c(100, 200, 400, 500), width = c(50, 60, 100, 20)),
  strand = "+",
  type = c("exon", "exon", "transcript", "exon"),
  gene_id = c("G1", "G1", "G2", "G2"),
  gene_biotype = c("protein_coding", "protein_coding", "lncRNA", "lncRNA"),
  transcript_id = c("tx1", "tx1", "tx2", "tx2"),
  transcript_biotype = c("protein_coding", "protein_coding", "lncRNA", "lncRNA"),
  exon_number = c("1", "2", NA, "1")
)

# 2. Partitioning sequences using createTrainTestSets
# This ensures we have ground truth sets for performance evaluation
all_ids <- c("tx1", "tx2", "tx3", "tx4", "tx5", "tx6")
set.seed(123)
sets <- createTrainTestSets(sequences = all_ids, percentTrain = 0.5, prefix = "demo")

# Define independent test sets (nc = non-coding, cds = coding)
ncTestSet <- sets$demo.test
cdsTestSet <- sets$demo.train

## ----extract-stats------------------------------------------------------------
# Calculate transcript length and exon count
transcriptStats <- getGtfStats(gtfObject = mockGtf)

# Extract biotype information at the gene level
geneBiotypes <- getBiotypes(refGtf = mockGtf, level = "gene")

print(head(transcriptStats))

## ----aggregate-and-venn-------------------------------------------------------
# Mocking aggregated results from CPC2, PLEK, and CPAT
mockCodPotList <- list(
  seqIDs = c("tx1", "tx2", "tx3", "tx4", "tx5", "tx6"),
  tools = list(
    CPC2 = c(0, 1, 1, 0, 1, 0),
    PLEK = c(0, 1, 0, 0, 1, 1),
    CPAT = c(0, 1, 1, 1, 0, 0)
  )
)

# Visualize tool agreement (Venn)
if (requireNamespace("venn", quietly = TRUE)) {
    plotVennCodPot(codPot = mockCodPotList)
}

## ----evaluation-pipeline------------------------------------------------------
# 1. Prepare evaluation sets by annotating predictions with ground truth
evaluationSummary <- prepareEvaluationSets(
  codPotList = mockCodPotList,
  ncTest = ncTestSet,
  cdsTest = cdsTestSet
)

# 2. Evaluate performance for each tool combination (e.g., "CPC2+PLEK")
combSummary <- evaluateToolCombinations(summaryList = evaluationSummary)

# 3. Calculate summary metrics
combStats <- bestToolCombination(combinationSummaryList = combSummary)
print(combStats)

## ----calculate-cm-------------------------------------------------------------
# Filter for methods with Accuracy >= 0.5 (low threshold for demo purposes)
# Use metricsData to provide values for filtering
allCms <- calculateCM(
  combinationSummaryList = combSummary,
  metricsData = combStats, 
  threshold = 0.5,
  returnOnlyHigh = TRUE,
  metricToExtract = "Accuracy",
  printMetricThresholds = TRUE
)

## ----viz-performance, fig.width=10, fig.height=6------------------------------
# Comparison of top combinations (Radar Plot)
methodsToPlot <- names(allCms)[1:min(3, length(allCms))]
plotRadarMetrics(cmList = allCms, methods = methodsToPlot, plotTitle = "Top Methods Comparison")

# Detailed metrics view (Clock Plot) in 'multiple' layout
plotClockMetrics(cmList = allCms, plotTitle = "Performance Metrics Breakdown", layout = "multiple")

## ----interactions-------------------------------------------------------------
# Identify Cis-interactions (genomic proximity)
feelncFile <- tempfile()
write.table(data.frame(isBest=1, lncRNA_gene="LNC1", partnerRNA_gene="GENE_A", distance=500), 
            feelncFile, sep="\t", row.names=FALSE)
cisInteractions <- findCisInteractions(FEELncClassesFile = feelncFile, maxDist = 1000)

# Identify Trans-interactions via expression correlation
mockExpr <- matrix(rnorm(50), nrow = 5, 
                   dimnames = list(c("LNC1", "LNC2", "GENE_A", "GENE_B", "GENE_C"), paste0("S", 1:10)))
mockExpr["LNC1",] <- mockExpr["GENE_A",] + rnorm(10, 0, 0.1)

transInteractions <- findTransInteractions(exprMatrix = mockExpr, rval = 0.8, pval = 0.05)

## ----sankey-viz---------------------------------------------------------------
# Sample enrichment result (mimicking gprofiler2::gost)
mockGost <- data.frame(term_id="GO:01", term_name="stress response", intersection="GENE_A")

# Annotate interactions with functional terms
interactionsProcessed <- annotateInteractions(gostResult = mockGost, 
                                               interactionTable = transInteractions, type = "trans")

# Generate interactive Sankey diagram
plotSankeyInteractions(interactionData = interactionsProcessed, groupBy = "term", 
                       selectIds = "stress response", title = "Functional Interaction Flow")

## ----session-info-------------------------------------------------------------
sessionInfo()

