Contents

1 Introduction

Sequence-based TF affinity scoring can be conducted with the FIMO suite, see @Sonawane2017. We have serialized an object with references to FIMO outputs for 16 TFs.

suppressPackageStartupMessages({
library(TFutils)
library(GenomicRanges)
})
fimo16
## GenomicFiles object with 0 ranges and 16 files: 
## files: M0635_1.02sort.bed.gz, M3433_1.02sort.bed.gz, ..., M6159_1.02sort.bed.gz, M6497_1.02sort.bed.gz 
## detail: use files(), rowRanges(), colData(), ...

While the token bed is used in the filenames, the files are not actually bed format!

2 Importing with scanTabix

We can use reduceByRange to import selected scans.

if (.Platform$OS.type != "windows") {
 si = TFutils::seqinfo_hg19_chr17
 myg = GRanges("chr17", IRanges(38.07e6,38.09e6), seqinfo=si)
 colnames(fimo16) = fimo16$HGNC 
 lk2 = reduceByRange(fimo16[, c("POU2F1", "VDR")],
   MAP=function(r,f) scanTabix(f, param=r))
 str(lk2)
}

This result can be massaged into a GRanges or other desirable structure. fimo_granges takes care of this.

#fimo_ranges = function(gf, query) { # prototypical code
# rowRanges(gf) = query
# ans = reduceByRange(gf, MAP=function(r,f) scanTabix(f, param=r))
# ans = unlist(ans, recursive=FALSE)  # drop top list structure
# tabs = lapply(ans, lapply, function(x) {
#     con = textConnection(x)
#     on.exit(close(con))
#     dtf = read.delim(con, h=FALSE, stringsAsFactors=FALSE, sep="\t")
#     colnames(dtf) = c("chr", "start", "end", "rname", "score", "dir", "pval")
#     ans = with(dtf, GRanges(seqnames=chr, IRanges(start, end),
#            rname=rname, score=score, dir=dir, pval=pval))
#     ans
#     })
# GRangesList(unlist(tabs, recursive=FALSE))
#}
if (.Platform$OS.type != "windows") {
 rr = fimo_granges(fimo16[, c("POU2F1", "VDR")], myg)
 rr
}
sessionInfo()
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows Server 2022 x64 (build 20348)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=C                          
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] UpSetR_1.4.0                magrittr_2.0.3             
##  [3] dplyr_1.1.3                 gwascat_2.34.0             
##  [5] GSEABase_1.64.0             graph_1.80.0               
##  [7] annotate_1.80.0             XML_3.99-0.14              
##  [9] png_0.1-8                   ggplot2_3.4.4              
## [11] knitr_1.44                  data.table_1.14.8          
## [13] GO.db_3.18.0                GenomicFiles_1.38.0        
## [15] rtracklayer_1.62.0          Rsamtools_2.18.0           
## [17] Biostrings_2.70.0           XVector_0.42.0             
## [19] BiocParallel_1.36.0         SummarizedExperiment_1.32.0
## [21] GenomicRanges_1.54.0        GenomeInfoDb_1.38.0        
## [23] MatrixGenerics_1.14.0       matrixStats_1.0.0          
## [25] org.Hs.eg.db_3.18.0         AnnotationDbi_1.64.0       
## [27] IRanges_2.36.0              S4Vectors_0.40.0           
## [29] Biobase_2.62.0              BiocGenerics_0.48.0        
## [31] TFutils_1.22.0              BiocStyle_2.30.0           
## 
## loaded via a namespace (and not attached):
##  [1] jsonlite_1.8.7           GenomicFeatures_1.54.0   farver_2.1.1            
##  [4] rmarkdown_2.25           BiocIO_1.12.0            zlibbioc_1.48.0         
##  [7] vctrs_0.6.4              memoise_2.0.1            RCurl_1.98-1.12         
## [10] htmltools_0.5.6.1        S4Arrays_1.2.0           progress_1.2.2          
## [13] curl_5.1.0               cellranger_1.1.0         SparseArray_1.2.0       
## [16] sass_0.4.7               bslib_0.5.1              plyr_1.8.9              
## [19] cachem_1.0.8             GenomicAlignments_1.38.0 mime_0.12               
## [22] lifecycle_1.0.3          pkgconfig_2.0.3          Matrix_1.6-1.1          
## [25] R6_2.5.1                 fastmap_1.1.1            GenomeInfoDbData_1.2.11 
## [28] shiny_1.7.5.1            digest_0.6.33            colorspace_2.1-0        
## [31] RSQLite_2.3.1            filelock_1.0.2           labeling_0.4.3          
## [34] fansi_1.0.5              httr_1.4.7               abind_1.4-5             
## [37] compiler_4.3.1           bit64_4.0.5              withr_2.5.1             
## [40] DBI_1.1.3                biomaRt_2.58.0           rappdirs_0.3.3          
## [43] DelayedArray_0.28.0      rjson_0.2.21             tools_4.3.1             
## [46] httpuv_1.6.12            glue_1.6.2               restfulr_0.0.15         
## [49] promises_1.2.1           generics_0.1.3           gtable_0.3.4            
## [52] BSgenome_1.70.0          tzdb_0.4.0               hms_1.1.3               
## [55] xml2_1.3.5               utf8_1.2.4               pillar_1.9.0            
## [58] stringr_1.5.0            later_1.3.1              splines_4.3.1           
## [61] BiocFileCache_2.10.0     lattice_0.22-5           survival_3.5-7          
## [64] bit_4.0.5                tidyselect_1.2.0         miniUI_0.1.1.1          
## [67] gridExtra_2.3            bookdown_0.36            snpStats_1.52.0         
## [70] xfun_0.40                stringi_1.7.12           yaml_2.3.7              
## [73] evaluate_0.22            codetools_0.2-19         tibble_3.2.1            
## [76] BiocManager_1.30.22      cli_3.6.1                xtable_1.8-4            
## [79] munsell_0.5.0            jquerylib_0.1.4          Rcpp_1.0.11             
## [82] readxl_1.4.3             dbplyr_2.3.4             parallel_4.3.1          
## [85] ellipsis_0.3.2           readr_2.1.4              blob_1.2.4              
## [88] prettyunits_1.2.0        bitops_1.0-7             VariantAnnotation_1.48.0
## [91] scales_1.2.1             crayon_1.5.2             rlang_1.1.1             
## [94] KEGGREST_1.42.0