library(NanoMethViz)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
This package comes with helper functions that import exon annotations
from the Bioconductor packages Homo.sapiens
and
Mus.musculus
. The functions
get_exons_homo_sapiens()
and
get_exons_mus_musculus()
simply take data from the
respective packages, and reorganise the columns such that we have seven
columns
This is used to provide gene annotations for the gene or region plots.
For other annotations, they will most likely be able to be imported
using rtracklayer::import()
and manipulated into the
desired format. As an example, we can use a small sample of the C.
Elegans gene annotation provided by ENSEMBL. rtracklayer
will import the annotation as a GRanges
object, this can be
coerced into a data.frame and manipuated using dplyr
.
anno <- rtracklayer::import(system.file(package = "NanoMethViz", "c_elegans.gtf.gz"))
head(anno)
## GRanges object with 6 ranges and 13 metadata columns:
## seqnames ranges strand | source type score phase
## <Rle> <IRanges> <Rle> | <factor> <factor> <numeric> <integer>
## [1] IV 9601517-9601695 - | WormBase exon NA <NA>
## [2] IV 9601040-9601345 - | WormBase exon NA <NA>
## [3] IV 9600828-9600953 - | WormBase exon NA <NA>
## [4] IV 9600627-9600780 - | WormBase exon NA <NA>
## [5] IV 9600002-9600392 - | WormBase exon NA <NA>
## [6] IV 9599702-9599873 - | WormBase exon NA <NA>
## gene_id transcript_id exon_number gene_name gene_source
## <character> <character> <character> <character> <character>
## [1] WBGene00000002 F27C8.1.1 1 aat-1 WormBase
## [2] WBGene00000002 F27C8.1.1 2 aat-1 WormBase
## [3] WBGene00000002 F27C8.1.1 3 aat-1 WormBase
## [4] WBGene00000002 F27C8.1.1 4 aat-1 WormBase
## [5] WBGene00000002 F27C8.1.1 5 aat-1 WormBase
## [6] WBGene00000002 F27C8.1.1 6 aat-1 WormBase
## gene_biotype transcript_source transcript_biotype exon_id
## <character> <character> <character> <character>
## [1] protein_coding WormBase protein_coding F27C8.1.1.e1
## [2] protein_coding WormBase protein_coding F27C8.1.1.e2
## [3] protein_coding WormBase protein_coding F27C8.1.1.e3
## [4] protein_coding WormBase protein_coding F27C8.1.1.e4
## [5] protein_coding WormBase protein_coding F27C8.1.1.e5
## [6] protein_coding WormBase protein_coding F27C8.1.1.e6
## -------
## seqinfo: 3 sequences from an unspecified genome; no seqlengths
anno <- anno %>%
as.data.frame() %>%
dplyr::rename(
chr = seqnames,
symbol = gene_name
) %>%
dplyr::select(gene_id, chr, strand, start, end, transcript_id, symbol)
head(anno)
## gene_id chr strand start end transcript_id symbol
## 1 WBGene00000002 IV - 9601517 9601695 F27C8.1.1 aat-1
## 2 WBGene00000002 IV - 9601040 9601345 F27C8.1.1 aat-1
## 3 WBGene00000002 IV - 9600828 9600953 F27C8.1.1 aat-1
## 4 WBGene00000002 IV - 9600627 9600780 F27C8.1.1 aat-1
## 5 WBGene00000002 IV - 9600002 9600392 F27C8.1.1 aat-1
## 6 WBGene00000002 IV - 9599702 9599873 F27C8.1.1 aat-1
Annotations can be simplified if full exon and isoform information is
not required. For example, genebody annotation can be represented as
single exon genes. For example we can take the example dataset and
transform the isoform annotations of Peg3 into a single genebody block.
The helper function exons_to_genes()
can help with this
common conversion.
nmr <- load_example_nanomethresult()
plot_gene(nmr, "Peg3")
new_exons <- NanoMethViz::exons(nmr) %>%
exons_to_genes() %>%
mutate(transcript_id = gene_id)
NanoMethViz::exons(nmr) <- new_exons
plot_gene(nmr, "Peg3")