extractCytosinesFromFASTA {methimpute} | R Documentation |
Extract cytosine coordinates and context information from a FASTA file. Cytosines in ambiguous reference contexts are not reported.
extractCytosinesFromFASTA(file, contexts = c("CG", "CHG", "CHH"), anchor.C = NULL)
file |
A character with the file name. |
contexts |
The contexts that should be extracted. If the contexts are named, the returned object will use those names for the contexts. |
anchor.C |
A named vector with positions of the anchoring C in the |
A GRanges-class
object with coordinates of extracted cytosines and meta-data column 'context'.
## Read a non-compressed FASTA files: filepath <- system.file("extdata", "arabidopsis_sequence.fa.gz", package="methimpute") ## Only CG context cytosines <- extractCytosinesFromFASTA(filepath, contexts = 'CG') table(cytosines$context) ## Split CG context into subcontexts cytosines <- extractCytosinesFromFASTA(filepath, contexts = c('DCG', 'CCG'), anchor.C = c(DCG=2, CCG=2)) table(cytosines$context) ## With contexts that differ only by anchor cytosines <- extractCytosinesFromFASTA(filepath, contexts = c('DCG', 'CCG', 'CCG', 'CWG', 'CHH'), anchor.C = c(DCG=2, CCG=2, CCG=1, CWG=1, CHH=1)) table(cytosines$context) ## With named contexts contexts <- c(CG='DCG', CG='CCG', CHG='CCG', CHG='CWG', CHH='CHH') cytosines <- extractCytosinesFromFASTA(filepath, contexts = contexts, anchor.C = c(DCG=2, CCG=2, CCG=1, CWG=1, CHH=1)) table(cytosines$context)