## ----vignette-options, include = FALSE----------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## ----setup--------------------------------------------------------------------
library(fastRanges)
library(GenomicRanges)
library(S4Vectors)

data("fast_ranges_example", package = "fastRanges")
query <- fast_ranges_example$query
subject <- fast_ranges_example$subject

## ----inspect-example----------------------------------------------------------
names(fast_ranges_example)
query
subject

## ----example-files------------------------------------------------------------
system.file("extdata", "query_peaks.bed", package = "fastRanges")
system.file("extdata", "subject_genes.bed", package = "fastRanges")

## ----core-overlaps------------------------------------------------------------
hits <- fast_find_overlaps(query, subject, threads = 2)
hits

## ----core-overlaps-inspect----------------------------------------------------
cbind(
  query_id = S4Vectors::queryHits(hits),
  subject_id = S4Vectors::subjectHits(hits)
)

## ----core-counts--------------------------------------------------------------
counts <- fast_count_overlaps(query, subject, threads = 2)
counts

## ----core-any-----------------------------------------------------------------
any_hits <- fast_overlaps_any(query, subject, threads = 2)
any_hits

## ----compatibility-check------------------------------------------------------
ref <- GenomicRanges::findOverlaps(query, subject, ignore.strand = FALSE)

identical(
  cbind(S4Vectors::queryHits(hits), S4Vectors::subjectHits(hits)),
  cbind(S4Vectors::queryHits(ref), S4Vectors::subjectHits(ref))
)

## ----return-types-------------------------------------------------------------
ret_classes <- c(
  hits = class(fast_find_overlaps(query, subject, threads = 1))[1],
  count = class(fast_count_overlaps(query, subject, threads = 1))[1],
  any = class(fast_overlaps_any(query, subject, threads = 1))[1],
  nearest = class(fast_nearest(query, subject))[1],
  join = class(fast_overlap_join(query, subject, threads = 1))[1],
  coverage = class(fast_coverage(query))[1]
)

ret_classes

## ----index-build--------------------------------------------------------------
subject_index <- fast_build_index(subject)
subject_index

## ----index-use----------------------------------------------------------------
hits_from_index <- fast_find_overlaps(query, subject_index, threads = 2)

identical(
  cbind(S4Vectors::queryHits(hits_from_index), S4Vectors::subjectHits(hits_from_index)),
  cbind(S4Vectors::queryHits(hits), S4Vectors::subjectHits(hits))
)

## ----join-inner---------------------------------------------------------------
joined_inner <- fast_overlap_join(query, subject, join = "inner", threads = 2)
head(joined_inner)

## ----join-left----------------------------------------------------------------
joined_left <- fast_left_overlap_join(query, subject, threads = 2)
head(joined_left)

## ----join-semi-anti-----------------------------------------------------------
semi_tbl <- fast_semi_overlap_join(query, subject, threads = 2)
anti_tbl <- fast_anti_overlap_join(query, subject, threads = 2)

head(semi_tbl)
head(anti_tbl)

## ----nearest-queries----------------------------------------------------------
nearest_tbl <- fast_nearest(query, subject)
head(as.data.frame(nearest_tbl))

## ----directional-queries------------------------------------------------------
fast_precede(query, subject)
fast_follow(query, subject)

## ----grouped-setup------------------------------------------------------------
set.seed(1)
S4Vectors::mcols(subject)$group <- sample(
  c("promoter", "enhancer"),
  length(subject),
  replace = TRUE
)
S4Vectors::mcols(subject)$score <- seq_len(length(subject))

## ----grouped-counts-----------------------------------------------------------
by_group <- fast_count_overlaps_by_group(
  query,
  subject,
  group_col = "group",
  threads = 2
)

by_group

## ----grouped-aggregate--------------------------------------------------------
sum_score <- fast_overlap_aggregate(
  query,
  subject,
  value_col = "score",
  fun = "sum",
  threads = 2
)

sum_score

## ----self-overlaps------------------------------------------------------------
self_hits <- fast_self_overlaps(query, threads = 2)
self_hits

## ----overlap-clusters---------------------------------------------------------
clusters <- fast_cluster_overlaps(query, return = "data.frame")
head(clusters)

## ----overlap-windows----------------------------------------------------------
window_counts <- fast_window_count_overlaps(
  query,
  subject,
  window_width = 1000L,
  step_width = 500L,
  threads = 2
)

head(window_counts)

## ----range-algebra------------------------------------------------------------
query_reduced <- fast_reduce(query)
query_disjoint <- fast_disjoin(query)
query_gaps <- fast_gaps(query)

c(
  original = length(query),
  reduced = length(query_reduced),
  disjoint = length(query_disjoint),
  gaps = length(query_gaps)
)

## ----range-set-ops------------------------------------------------------------
union_ranges <- fast_range_union(query, subject)
intersect_ranges <- fast_range_intersect(query, subject)
setdiff_ranges <- fast_range_setdiff(query, subject)

c(
  union = length(union_ranges),
  intersect = length(intersect_ranges),
  setdiff = length(setdiff_ranges)
)

## ----coverage-----------------------------------------------------------------
query_cov <- fast_coverage(query)
query_cov

## ----tile-coverage------------------------------------------------------------
query_tiles <- fast_tile_coverage(
  query,
  tile_width = 1000L,
  step_width = 1000L
)

head(query_tiles)

## ----iterator-----------------------------------------------------------------
iter <- fast_find_overlaps_iter(query, subject_index, chunk_size = 2L, threads = 2)

fast_iter_has_next(iter)
chunk1 <- fast_iter_next(iter)
chunk1
fast_iter_has_next(iter)

## ----iterator-reset-----------------------------------------------------------
fast_iter_reset(iter)
all_iter_hits <- fast_iter_collect(iter)
length(all_iter_hits)

## ----index-io-----------------------------------------------------------------
tmp_index <- tempfile(fileext = ".rds")

fast_save_index(subject_index, tmp_index)
loaded_index <- fast_load_index(tmp_index)
fast_index_stats(loaded_index)

unlink(tmp_index)

## ----benchmark-demo-----------------------------------------------------------
base_time <- system.time({
  h_base <- GenomicRanges::findOverlaps(query, subject, ignore.strand = FALSE)
})

fast_time <- system.time({
  h_fast <- fast_find_overlaps(query, subject_index, ignore_strand = FALSE, threads = 2)
})

rbind(
  genomic_ranges = base_time[1:3],
  fast_ranges = fast_time[1:3]
)

c(base_hits = length(h_base), fast_hits = length(h_fast))

## ----extdata------------------------------------------------------------------
c(
  list.files(system.file("extdata", package = "fastRanges")),
  list.files(system.file("benchmarks", package = "fastRanges"))
)

## ----session-info-------------------------------------------------------------
sessionInfo()

