MsDataHub 1.6.0
The MsDataHub package provides example mass spectrometry data,
peptide spectrum matches or quantitative data from proteomics and
metabolomics experiments. The data are served through the
ExperimentHub infrastructure, which allows download them only ones
and cache them for further use. Currently available data are summarised
in the table below and details in the next section.
library("MsDataHub")
DT::datatable(MsDataHub())To install the package:
if (!require("BiocManager"))
    install.packages("BiocManager")
BiocManager::install("MsDataHub")PestMix1_DDA.mzML and PestMix1_SWATH.mzML?TripleTOFLoad with
f <- PestMix1_DDA.mzML()## see ?MsDataHub and browseVignettes('MsDataHub') for documentation## loading from cachelibrary(Spectra)
Spectra(f)## MSn data (Spectra) with 7602 spectra in a MsBackendMzR backend:
##        msLevel     rtime scanIndex
##      <integer> <numeric> <integer>
## 1            1     0.231         1
## 2            1     0.351         2
## 3            1     0.471         3
## 4            1     0.591         4
## 5            1     0.711         5
## ...        ...       ...       ...
## 7598         1   899.491      7598
## 7599         1   899.613      7599
## 7600         1   899.747      7600
## 7601         1   899.872      7601
## 7602         1   899.993      7602
##  ... 33 more variables/columns.
## 
## file(s):
## 1e0e954878e0b6_7861f <- PestMix1_SWATH.mzML()## see ?MsDataHub and browseVignettes('MsDataHub') for documentation## loading from cacheSpectra(f)## MSn data (Spectra) with 8999 spectra in a MsBackendMzR backend:
##        msLevel     rtime scanIndex
##      <integer> <numeric> <integer>
## 1            2     0.203         1
## 2            2     0.300         2
## 3            2     0.397         3
## 4            2     0.494         4
## 5            2     0.591         5
## ...        ...       ...       ...
## 8995         2   899.527      8995
## 8996         2   899.624      8996
## 8997         2   899.721      8997
## 8998         2   899.818      8998
## 8999         2   899.915      8999
##  ... 33 more variables/columns.
## 
## file(s):
## 1e0e955f137cc1_786220171016_POOL_POS_1_105-134.mzML and 20171016_POOL_POS_3_105-134.mzML?sciexLoad with
f <- X20171016_POOL_POS_1_105.134.mzML()## see ?MsDataHub and browseVignettes('MsDataHub') for documentation## loading from cacheSpectra(f)## MSn data (Spectra) with 931 spectra in a MsBackendMzR backend:
##       msLevel     rtime scanIndex
##     <integer> <numeric> <integer>
## 1           1     0.280         1
## 2           1     0.559         2
## 3           1     0.838         3
## 4           1     1.117         4
## 5           1     1.396         5
## ...       ...       ...       ...
## 927         1   258.641       927
## 928         1   258.920       928
## 929         1   259.199       929
## 930         1   259.478       930
## 931         1   259.757       931
##  ... 33 more variables/columns.
## 
## file(s):
## 1e0e957f85077d_7859f <- X20171016_POOL_POS_3_105.134.mzML()## see ?MsDataHub and browseVignettes('MsDataHub') for documentation## loading from cacheSpectra(f)## MSn data (Spectra) with 931 spectra in a MsBackendMzR backend:
##       msLevel     rtime scanIndex
##     <integer> <numeric> <integer>
## 1           1     0.275         1
## 2           1     0.554         2
## 3           1     0.833         3
## 4           1     1.112         4
## 5           1     1.391         5
## ...       ...       ...       ...
## 927         1   258.636       927
## 928         1   258.915       928
## 929         1   259.194       929
## 930         1   259.473       930
## 931         1   259.752       931
##  ... 33 more variables/columns.
## 
## file(s):
## 1e0e9547eaa1e7_7860TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML.gz
and
TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid?PDX000001Load with
f <- TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.20141210.mzML.gz()## see ?MsDataHub and browseVignettes('MsDataHub') for documentation## loading from cacheSpectra(f)## MSn data (Spectra) with 7534 spectra in a MsBackendMzR backend:
##        msLevel     rtime scanIndex
##      <integer> <numeric> <integer>
## 1            1    0.4584         1
## 2            1    0.9725         2
## 3            1    1.8524         3
## 4            1    2.7424         4
## 5            1    3.6124         5
## ...        ...       ...       ...
## 7530         2   3600.47      7530
## 7531         2   3600.83      7531
## 7532         2   3601.18      7532
## 7533         2   3601.57      7533
## 7534         2   3601.98      7534
##  ... 33 more variables/columns.
## 
## file(s):
## 1e0e95128647c4_7858f <- TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.20141210.mzid()## see ?MsDataHub and browseVignettes('MsDataHub') for documentation## loading from cachelibrary(PSMatch)
PSM(f)## PSM with 5802 rows and 35 columns.
## names(35): sequence spectrumID ... subReplacementResidue subLocationcptac_a_b_c_peptides.txt, cptac_a_b_peptides.txt and
cptac_peptides.txt?cptacLoad with
library(QFeatures)
f <- cptac_peptides.txt()## see ?MsDataHub and browseVignettes('MsDataHub') for documentation## loading from cacheecols <- grep("Intensity\\.", names(read.delim(f)))
readSummarizedExperiment(f, ecols, sep = "\t")## class: SummarizedExperiment 
## dim: 11466 45 
## metadata(0):
## assays(1): ''
## rownames(11466): 1 2 ... 11465 11466
## rowData names(143): Sequence N.term.cleavage.window ...
##   Oxidation..M..site.IDs MS.MS.Count
## colnames(45): Intensity.6A_1 Intensity.6A_2 ... Intensity.6E_8
##   Intensity.6E_9
## colData names(0):cptac_a_b_c_peptides.txt()## see ?MsDataHub and browseVignettes('MsDataHub') for documentation## loading from cache##                                                       EH7804 
## "/home/biocbuild/.cache/R/ExperimentHub/1e0e95147925cd_7854"cptac_a_b_peptides.txt()## see ?MsDataHub and browseVignettes('MsDataHub') for documentation
## loading from cache##                                                       EH7805 
## "/home/biocbuild/.cache/R/ExperimentHub/1e0e9541f34035_7855"ko15.CDF?cdfLoad with
f <- ko15.CDF()## see ?MsDataHub and browseVignettes('MsDataHub') for documentation## loading from cacheSpectra(f)## MSn data (Spectra) with 1278 spectra in a MsBackendMzR backend:
##        msLevel     rtime scanIndex
##      <integer> <numeric> <integer>
## 1            1   2501.38         1
## 2            1   2502.94         2
## 3            1   2504.51         3
## 4            1   2506.07         4
## 5            1   2507.64         5
## ...        ...       ...       ...
## 1274         1   4493.56      1274
## 1275         1   4495.13      1275
## 1276         1   4496.69      1276
## 1277         1   4498.26      1277
## 1278         1   4499.82      1278
##  ... 33 more variables/columns.
## 
## file(s):
## 1e0e951bb72970_7853benchmarkingDIA.tsvReport.Derks2022.plexDIA.tsv?benchmarkingDIA.tsv and
?Report.Derks2022.plexDIA.tsvLoad with
library(QFeatures)
lfdia <- read.delim(MsDataHub::benchmarkingDIA.tsv())
readQFeaturesFromDIANN(lfdia)## An instance of class QFeatures containing 24 assays:
##  [1] U:\712006-Proteomics\Issues\Issue 253\DIANN\raw-data\RD139_Overlap_UPS1_0_1fmol_inj1.mzML: SummarizedExperiment with 28980 rows and 1 columns 
##  [2] U:\712006-Proteomics\Issues\Issue 253\DIANN\raw-data\RD139_Overlap_UPS1_0_1fmol_inj2.mzML: SummarizedExperiment with 29495 rows and 1 columns 
##  [3] U:\712006-Proteomics\Issues\Issue 253\DIANN\raw-data\RD139_Overlap_UPS1_0_1fmol_inj3.mzML: SummarizedExperiment with 29210 rows and 1 columns 
##  ...
##  [22] U:\712006-Proteomics\Issues\Issue 253\DIANN\raw-data\RD139_Overlap_UPS1_5fmol_inj1.mzML: SummarizedExperiment with 30941 rows and 1 columns 
##  [23] U:\712006-Proteomics\Issues\Issue 253\DIANN\raw-data\RD139_Overlap_UPS1_5fmol_inj2.mzML: SummarizedExperiment with 30321 rows and 1 columns 
##  [24] U:\712006-Proteomics\Issues\Issue 253\DIANN\raw-data\RD139_Overlap_UPS1_5fmol_inj3.mzML: SummarizedExperiment with 24168 rows and 1 columnsplexdia <- read.delim(MsDataHub::Report.Derks2022.plexDIA.tsv())
readQFeaturesFromDIANN(plexdia, multiplexing = "mTRAQ")## An instance of class QFeatures containing 54 assays:
##  [1] F:\JD\plexDIA\nPOP\wJD1146.raw: SummarizedExperiment with 2635 rows and 3 columns 
##  [2] F:\JD\plexDIA\nPOP\wJD1147.raw: SummarizedExperiment with 3000 rows and 3 columns 
##  [3] F:\JD\plexDIA\nPOP\wJD1148.raw: SummarizedExperiment with 2676 rows and 3 columns 
##  ...
##  [52] F:\JD\plexDIA\nPOP\wJD1203.raw: SummarizedExperiment with 4441 rows and 3 columns 
##  [53] F:\JD\plexDIA\nPOP\wJD1204.raw: SummarizedExperiment with 4416 rows and 3 columns 
##  [54] F:\JD\plexDIA\nPOP\wJD1205.raw: SummarizedExperiment with 4492 rows and 3 columnsMsDataHubMsDataHub, start by
opening an
issue
in the package’s GitHub repository and describe the new data. In
particular, provide information about it’s provenance, its use, its
format(s) and acknowledge that the data may be shared freely with
the community without any restrictions. You may provide an open
licence specifying the terms it can be re-used, typically a
CC-BY-SA license.ExperimentHub packages and GitHub pull requests, you may
directly send one that adds your data to the package. Make sure (1)
add appropriate references in the manual page and (2) to add
yourself as a contributor of the package in the DESCRIPTION file.## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.1 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.20-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] MsDataHub_1.6.0             QFeatures_1.16.0           
##  [3] MultiAssayExperiment_1.32.0 SummarizedExperiment_1.36.0
##  [5] Biobase_2.66.0              GenomicRanges_1.58.0       
##  [7] GenomeInfoDb_1.42.0         IRanges_2.40.0             
##  [9] MatrixGenerics_1.18.0       matrixStats_1.4.1          
## [11] PSMatch_1.10.0              Spectra_1.16.0             
## [13] BiocParallel_1.40.0         S4Vectors_0.44.0           
## [15] BiocGenerics_0.52.0         BiocStyle_2.34.0           
## 
## loaded via a namespace (and not attached):
##  [1] DBI_1.2.3               rlang_1.1.4             magrittr_2.0.3         
##  [4] clue_0.3-65             compiler_4.4.1          RSQLite_2.3.7          
##  [7] png_0.1-8               vctrs_0.6.5             reshape2_1.4.4         
## [10] stringr_1.5.1           ProtGenerics_1.38.0     pkgconfig_2.0.3        
## [13] MetaboCoreUtils_1.14.0  crayon_1.5.3            fastmap_1.2.0          
## [16] dbplyr_2.5.0            XVector_0.46.0          utf8_1.2.4             
## [19] rmarkdown_2.28          UCSC.utils_1.2.0        purrr_1.0.2            
## [22] bit_4.5.0               xfun_0.48               zlibbioc_1.52.0        
## [25] cachem_1.1.0            jsonlite_1.8.9          blob_1.2.4             
## [28] DelayedArray_0.32.0     parallel_4.4.1          cluster_2.1.6          
## [31] R6_2.5.1                bslib_0.8.0             stringi_1.8.4          
## [34] jquerylib_0.1.4         Rcpp_1.0.13             bookdown_0.41          
## [37] knitr_1.48              Matrix_1.7-1            igraph_2.1.1           
## [40] tidyselect_1.2.1        abind_1.4-8             yaml_2.3.10            
## [43] codetools_0.2-20        curl_5.2.3              lattice_0.22-6         
## [46] tibble_3.2.1            plyr_1.8.9              withr_3.0.2            
## [49] KEGGREST_1.46.0         evaluate_1.0.1          BiocFileCache_2.14.0   
## [52] ExperimentHub_2.14.0    Biostrings_2.74.0       pillar_1.9.0           
## [55] BiocManager_1.30.25     filelock_1.0.3          DT_0.33                
## [58] ncdf4_1.23              generics_0.1.3          BiocVersion_3.20.0     
## [61] glue_1.8.0              lazyeval_0.2.2          tools_4.4.1            
## [64] AnnotationHub_3.14.0    mzR_2.40.0              fs_1.6.4               
## [67] grid_4.4.1              tidyr_1.3.1             crosstalk_1.2.1        
## [70] MsCoreUtils_1.18.0      AnnotationDbi_1.68.0    GenomeInfoDbData_1.2.13
## [73] cli_3.6.3               rappdirs_0.3.3          fansi_1.0.6            
## [76] S4Arrays_1.6.0          dplyr_1.1.4             AnnotationFilter_1.30.0
## [79] sass_0.4.9              digest_0.6.37           SparseArray_1.6.0      
## [82] htmlwidgets_1.6.4       memoise_2.0.1           htmltools_0.5.8.1      
## [85] lifecycle_1.0.4         httr_1.4.7              mime_0.12              
## [88] bit64_4.5.2             MASS_7.3-61