## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  message = FALSE,
  warning = FALSE,
  tidy = FALSE
)
library(BiocStyle)

python_available <- tryCatch({
    proc <- basilisk::basiliskStart(immLynx::immLynxEnv)
    on.exit(basilisk::basiliskStop(proc))
    TRUE
}, error = function(e) {
    FALSE
})

## ----load---------------------------------------------------------------------
library(immLynx)
library(scran)
library(scater)
library(ggplot2)

data("immLynx_example")

## ----clustcr-compare, eval=python_available-----------------------------------
# sce_mcl_2 <- runClustTCR(immLynx_example,
#                             inflation = 2.0,
#                             column_prefix = "mcl_2")
# sce_mcl_3 <- runClustTCR(sce_mcl_2,
#                             inflation = 3.0,
#                             column_prefix = "mcl_3")
# 
# cat("MCL (inflation=2):",
#     length(unique(na.omit(sce_mcl_3$mcl_2_TRB))),
#     "clusters\n")
# cat("MCL (inflation=3):",
#     length(unique(na.omit(sce_mcl_3$mcl_3_TRB))),
#     "clusters\n")

## ----esm-models, eval=FALSE---------------------------------------------------
# sce_small <- runEmbeddings(
#   immLynx_example,
#   model_name = "facebook/esm2_t12_35M_UR50D",
#   reduction_name = "esm_small"
# )
# 
# sce_med <- runEmbeddings(
#   sce_small,
#   model_name = "facebook/esm2_t33_650M_UR50D",
#   reduction_name = "esm_medium"
# )
# 
# sce_med <- scater::runUMAP(sce_med,
#                       dimred = "esm_small",
#                       name = "umap_small")
# sce_med <- scater::runUMAP(sce_med,
#                       dimred = "esm_medium",
#                       name = "umap_medium")
# 
# p1 <- scater::plotReducedDim(sce_med,
#               dimred = "umap_small") +
#   ggtitle("ESM-2 35M")
# p2 <- scater::plotReducedDim(sce_med,
#               dimred = "umap_medium") +
#   ggtitle("ESM-2 650M")
# p1 + p2

## ----both-chains, eval=FALSE--------------------------------------------------
# sce_paired <- runEmbeddings(
#   immLynx_example,
#   chains = "both",
#   reduction_name = "tcr_paired"
# )
# 
# sce_paired <- scater::runUMAP(sce_paired,
#                          dimred = "tcr_paired")
# scater::plotReducedDim(sce_paired, dimred = "UMAP")

## ----screpertoire-integration, eval=python_available--------------------------
# sce_clust <- runClustTCR(immLynx_example,
#                             chains = "TRB")
# 
# comparison <- table(
#   clustcr = sce_clust$clustcr_TRB,
#   clonotype = sce_clust$CTstrict
# )
# 
# cat("Number of clustcr clusters:", nrow(comparison), "\n")
# cat("Number of unique clonotypes:", ncol(comparison), "\n")

## ----selection, eval=python_available-----------------------------------------
# sce_pgen <- runOLGA(immLynx_example,
#                        chains = "TRB")
# 
# ggplot(as.data.frame(colData(sce_pgen)), aes(x = Type,
#                             y = olga_pgen_log10_TRB)) +
#   geom_boxplot() +
#   labs(title = "TCR Generation Probability by Sample Type",
#        x = "Sample Type",
#        y = "log10(Pgen)")
# 
# low_pgen_cells <- which(colData(sce_pgen)$olga_pgen_log10_TRB < -15)
# cat("Cells with Pgen < 10^-15:", length(low_pgen_cells), "\n")

## ----combined-distance, eval=python_available---------------------------------
# dist_results <- runTCRdist(immLynx_example, chains = "beta")
# 
# d <- as.dist(dist_results$distances$pw_beta)
# hc <- hclust(d, method = "complete")
# n_unique <- nrow(dist_results$distances$pw_beta)
# clusters <- cutree(hc, k = min(50, n_unique))
# 
# cat("Distance matrix:", n_unique, "x", n_unique, "\n")
# cat("Hierarchical clusters:", length(unique(clusters)), "\n")

## ----large-data, eval=FALSE---------------------------------------------------
# sce_large <- runEmbeddings(
#   large_sce,
#   chunk_size = 16,
#   pool = "mean"
# )
# 
# sce_large <- runClustTCR(
#   large_sce,
#   inflation = 2.0
# )
# 
# sample_cells <- sample(colnames(large_sce), 5000)
# subset_obj <- large_sce[, sample_cells]
# dist_results <- runTCRdist(subset_obj)

## ----hla, eval=python_available-----------------------------------------------
# metaclones <- runMetaclonotypist(immLynx_example,
#                                  return_input = FALSE)
# 
# hla_data <- data.frame(
#   barcode = metaclones$barcode,
#   HLA_A_01_01 = sample(c(TRUE, FALSE),
#                        nrow(metaclones),
#                        replace = TRUE),
#   HLA_A_02_01 = sample(c(TRUE, FALSE),
#                        nrow(metaclones),
#                        replace = TRUE),
#   HLA_B_07_02 = sample(c(TRUE, FALSE),
#                        nrow(metaclones),
#                        replace = TRUE)
# )
# 
# hla_results <- runHLAassociation(metaclones,
#                                  hla_data)
# head(hla_results)

## ----export, eval=FALSE-------------------------------------------------------
# tcr_data <- extractTCRdata(immLynx_example,
#                            chains = "both",
#                            format = "wide")
# tcrdist_format <- convertToTcrdist(tcr_data,
#                                    chains = "both")
# write.csv(tcrdist_format, "tcr_for_tcrdist.csv", row.names = FALSE)
# 
# clusters <- data.frame(
#   barcode = colnames(sce_clust),
#   clustcr = sce_clust$clustcr_TRB
# )
# write.csv(clusters, "cluster_assignments.csv", row.names = FALSE)
# 
# embeddings <- reducedDim(sce_small, "esm_small")
# write.csv(embeddings, "tcr_embeddings.csv")

## ----validate, eval=FALSE-----------------------------------------------------
# validation <- validateTCRdata(tcr_data, check_sequences = TRUE)
# if (!validation$valid) {
#   warning(validation$errors)
# }

## ----seed, eval=FALSE---------------------------------------------------------
# set.seed(42)
# sce <- runClustTCR(sce)

## ----session, eval=TRUE-------------------------------------------------------
sessionInfo()