## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE,
  message = TRUE
)

## ----load---------------------------------------------------------------------
library(scTypeEval)

## ----minimal_workflow---------------------------------------------------------
library(Matrix)

# Generate example data
set.seed(123)
counts <- Matrix(rpois(50000, 5), nrow=500, ncol=100, sparse=TRUE)
rownames(counts) <- paste0("Gene", seq_len(500))
colnames(counts) <- paste0("Cell", seq_len(100))

metadata <- data.frame(
  celltype = rep(c("TypeA", "TypeB", "TypeC", "TypeD"), each=25),
  sample = rep(paste0("S", seq_len(5)), times=20),
  row.names = colnames(counts)
)

# Create object
sceval <- create_scTypeEval(matrix=counts, metadata=metadata)

# Process data
sceval <- run_processing_data(
  sceval,
  ident = "celltype",
  sample = "sample",
  min_samples = 3,
  min_cells = 5
)

# Identify features
sceval <- run_hvg(sceval,
                  var_method = "basic",
                  ngenes = 1000)

# Run PCA
sceval <- run_pca(sceval, ndim = 20)

# Compute dissimilarity
sceval <- run_dissimilarity(
  sceval,
  method = "Pseudobulk:Euclidean",
  reduction = TRUE
)

# Get consistency
results <- get_consistency(
  sceval,
  dissimilarity_slot = "Pseudobulk:Euclidean",
  consistency_metric = "silhouette"
)
print(results)

## ----seurat_example, message= F-----------------------------------------------
library(Seurat)

# Create Seurat object with example data generated earlier
seurat_obj <- Seurat::CreateSeuratObject(
  counts = counts,
  meta.data = metadata
)

sceval_seurat <- create_scTypeEval(seurat_obj)

# Continue with standard workflow

## ----sce_example, message = F-------------------------------------------------
library(SingleCellExperiment)

# Create SCE object with example data generated earlier
sce <- SingleCellExperiment::SingleCellExperiment(
  assays = list(counts = counts),
  colData = metadata
)

sceval_sce <- create_scTypeEval(sce)

# Continue with workflow as above

## ----compare_methods----------------------------------------------------------
# Compute different dissimilarity methods
sceval <- run_dissimilarity(
  sceval,
  method = "Pseudobulk:Euclidean",
  reduction = TRUE
)
sceval <- run_dissimilarity(
  sceval,
  method = "Pseudobulk:Cosine",
  reduction = TRUE
)
sceval <- run_dissimilarity(
  sceval,
  method = "WasserStein",
  reduction = TRUE
)

# Compare consistency across methods
dissimilarity_methods <- c("Pseudobulk:Euclidean",
                           "Pseudobulk:Cosine",
                           "WasserStein")
results_df <- 
  get_consistency(
    sceval,
    dissimilarity_slot = dissimilarity_methods, # compute for multiple dissimilarities
    consistency_metric = "silhouette"
  )

results_df

## ----multiple_metrics---------------------------------------------------------
# Compute multiple consistency metrics
consistency_metrics <- c("silhouette",
                         "NeighborhoodPurity",
                         "Average_similarity")

all_metrics <- 
  get_consistency(
    sceval,
    dissimilarity_slot = "Pseudobulk:Euclidean",
    consistency_metric = consistency_metrics
  )

all_metrics

## ----visualize----------------------------------------------------------------
# Heatmap of dissimilarities
plot_heatmap(
  sceval,
  dissimilarity_slot = "Pseudobulk:Euclidean",
  sort_consistency = "silhouette"
)

# Pseudobulk PCA per sample & cell type
plot_pca(
  sceval,
  reduction_slot = "pseudobulk"
)

## ----markers------------------------------------------------------------------
# Identify cell type markers
sceval <- run_gene_markers(
  sceval,
  method = "scran.findMarkers",
  ngenes_celltype = 50
)

# Use markers for dissimilarity calculation
sceval <- run_dissimilarity(
  sceval,
  method = "Pseudobulk:Euclidean",
  gene_list = "scran.findMarkers", # gene list recently added
  reduction = FALSE
)

## ----custom_gene_sets---------------------------------------------------------
# Add custom gene list
immune_genes <- c("CD3D", "CD4", "CD8A", "CD19", "CD14", "NCAM1")
sceval <- add_gene_list(
  sceval,
  gene_list = list("immune_markers" = immune_genes) # add a named list
)

# Run analysis on custom genes
sceval <- run_dissimilarity(
  sceval,
  method = "Pseudobulk:Euclidean",
  gene_list = "immune_markers" # name of the list to use
)

## ----session_info-------------------------------------------------------------
sessionInfo()

