chihaya 1.2.0
The chihaya package saves DelayedArray objects for efficient, portable and stable reproduction of delayed operations in a new R session or other programming frameworks.
Check out the specification for more details.
Make a DelayedArray object with some operations:
library(DelayedArray)
x <- DelayedArray(matrix(runif(1000), ncol=10))
x <- x[11:15,] / runif(5) 
x <- log2(x + 1)
x## <5 x 10> DelayedMatrix object of type "double":
##            [,1]       [,2]       [,3] ...       [,9]      [,10]
## [1,] 0.73973673 0.58482598 0.50105643   . 1.22763756 0.35689949
## [2,] 1.17609331 0.62296999 1.60920698   . 0.77382301 0.10414956
## [3,] 2.47977444 2.43294886 0.12887398   . 2.33256361 1.58847481
## [4,] 1.79550175 1.00687937 1.95446087   . 1.15837758 2.06716155
## [5,] 1.19091060 0.89691132 0.03755208   . 0.03353628 0.28871154showtree(x)## 5x10 double: DelayedMatrix object
## └─ 5x10 double: Stack of 2 unary iso op(s)
##    └─ 5x10 double: Unary iso op with args
##       └─ 5x10 double: Subset
##          └─ 100x10 double: [seed] matrix objectSave it into a HDF5 file with saveDelayed():
library(chihaya)
tmp <- tempfile(fileext=".h5")
saveDelayed(x, tmp)
rhdf5::h5ls(tmp)##                            group    name       otype  dclass      dim
## 0                              / delayed   H5I_GROUP                 
## 1                       /delayed    base H5I_DATASET   FLOAT    ( 0 )
## 2                       /delayed  method H5I_DATASET  STRING    ( 0 )
## 3                       /delayed    seed   H5I_GROUP                 
## 4                  /delayed/seed  method H5I_DATASET  STRING    ( 0 )
## 5                  /delayed/seed    seed   H5I_GROUP                 
## 6             /delayed/seed/seed   along H5I_DATASET INTEGER    ( 0 )
## 7             /delayed/seed/seed  method H5I_DATASET  STRING    ( 0 )
## 8             /delayed/seed/seed    seed   H5I_GROUP                 
## 9        /delayed/seed/seed/seed   index   H5I_GROUP                 
## 10 /delayed/seed/seed/seed/index       0 H5I_DATASET INTEGER        5
## 11       /delayed/seed/seed/seed    seed   H5I_GROUP                 
## 12  /delayed/seed/seed/seed/seed    data H5I_DATASET   FLOAT 100 x 10
## 13  /delayed/seed/seed/seed/seed  native H5I_DATASET INTEGER    ( 0 )
## 14            /delayed/seed/seed    side H5I_DATASET  STRING    ( 0 )
## 15            /delayed/seed/seed   value H5I_DATASET   FLOAT        5
## 16                 /delayed/seed    side H5I_DATASET  STRING    ( 0 )
## 17                 /delayed/seed   value H5I_DATASET   FLOAT    ( 0 )And then load it back in later:
y <- loadDelayed(tmp)
y## <5 x 10> DelayedMatrix object of type "double":
##            [,1]       [,2]       [,3] ...       [,9]      [,10]
## [1,] 0.73973673 0.58482598 0.50105643   . 1.22763756 0.35689949
## [2,] 1.17609331 0.62296999 1.60920698   . 0.77382301 0.10414956
## [3,] 2.47977444 2.43294886 0.12887398   . 2.33256361 1.58847481
## [4,] 1.79550175 1.00687937 1.95446087   . 1.15837758 2.06716155
## [5,] 1.19091060 0.89691132 0.03755208   . 0.03353628 0.28871154Of course, this is not a particularly interesting case as we end up saving the original array inside our HDF5 file anyway. The real fun begins when you have some more interesting seeds.
We can use the delayed nature of the operations to avoid breaking sparsity. For example:
library(Matrix)
x <- rsparsematrix(1000, 1000, density=0.01)
x <- DelayedArray(x) + runif(1000)
tmp <- tempfile(fileext=".h5")
saveDelayed(x, tmp)
rhdf5::h5ls(tmp)##            group     name       otype  dclass   dim
## 0              /  delayed   H5I_GROUP              
## 1       /delayed    along H5I_DATASET INTEGER ( 0 )
## 2       /delayed   method H5I_DATASET  STRING ( 0 )
## 3       /delayed     seed   H5I_GROUP              
## 4  /delayed/seed     data H5I_DATASET   FLOAT 10000
## 5  /delayed/seed dimnames   H5I_GROUP              
## 6  /delayed/seed  indices H5I_DATASET INTEGER 10000
## 7  /delayed/seed   indptr H5I_DATASET INTEGER  1001
## 8  /delayed/seed    shape H5I_DATASET INTEGER     2
## 9       /delayed     side H5I_DATASET  STRING ( 0 )
## 10      /delayed    value H5I_DATASET   FLOAT  1000file.info(tmp)[["size"]]## [1] 101996# Compared to a dense array.
tmp2 <- tempfile(fileext=".h5")
out <- HDF5Array::writeHDF5Array(x, tmp2, "data")
file.info(tmp2)[["size"]]## [1] 279459# Loading it back in.
y <- loadDelayed(tmp)
showtree(y)## 1000x1000 double: DelayedMatrix object
## └─ 1000x1000 double: Unary iso op with args
##    └─ 1000x1000 double, sparse: [seed] dgCMatrix objectWe can also store references to external files, thus avoiding data duplication:
library(HDF5Array)
test <- HDF5Array(tmp2, "data")
stuff <- log2(test + 1)
stuff## <1000 x 1000> DelayedMatrix object of type "double":
##               [,1]       [,2]       [,3] ...    [,999]   [,1000]
##    [1,]  0.8900077  0.8900077  0.8900077   . 0.8900077 0.8900077
##    [2,]  0.6783019  0.6783019 -0.1840064   . 0.6783019 0.6783019
##    [3,]  0.6541059  0.6541059  0.6541059   . 0.6541059 0.6541059
##    [4,]  0.6387384  0.6387384  0.6387384   . 0.6387384 0.6387384
##    [5,]  0.1126565  0.1126565  0.1126565   . 0.1126565 0.1126565
##     ...          .          .          .   .         .         .
##  [996,]  0.8909218  0.8909218  0.8909218   . 0.8909218 0.8909218
##  [997,]  0.5090896  0.5090896  0.5090896   . 0.5090896 0.5090896
##  [998,]  0.5549427  0.5549427  0.5549427   . 0.5549427 0.5549427
##  [999,]  0.3001307  0.3001307  0.3001307   . 0.3001307 0.3001307
## [1000,]  0.8263590  0.8263590  0.8263590   . 0.8263590 0.8263590tmp <- tempfile(fileext=".h5")
saveDelayed(stuff, tmp)
rhdf5::h5ls(tmp)##                 group       name       otype  dclass   dim
## 0                   /    delayed   H5I_GROUP              
## 1            /delayed       base H5I_DATASET   FLOAT ( 0 )
## 2            /delayed     method H5I_DATASET  STRING ( 0 )
## 3            /delayed       seed   H5I_GROUP              
## 4       /delayed/seed     method H5I_DATASET  STRING ( 0 )
## 5       /delayed/seed       seed   H5I_GROUP              
## 6  /delayed/seed/seed dimensions H5I_DATASET INTEGER     2
## 7  /delayed/seed/seed       file H5I_DATASET  STRING ( 0 )
## 8  /delayed/seed/seed       name H5I_DATASET  STRING ( 0 )
## 9  /delayed/seed/seed     sparse H5I_DATASET INTEGER ( 0 )
## 10 /delayed/seed/seed       type H5I_DATASET  STRING ( 0 )
## 11      /delayed/seed       side H5I_DATASET  STRING ( 0 )
## 12      /delayed/seed      value H5I_DATASET   FLOAT ( 0 )file.info(tmp)[["size"]] # size of the delayed operations + pointer to the actual file## [1] 49642y <- loadDelayed(tmp)
y## <1000 x 1000> DelayedMatrix object of type "double":
##               [,1]       [,2]       [,3] ...    [,999]   [,1000]
##    [1,]  0.8900077  0.8900077  0.8900077   . 0.8900077 0.8900077
##    [2,]  0.6783019  0.6783019 -0.1840064   . 0.6783019 0.6783019
##    [3,]  0.6541059  0.6541059  0.6541059   . 0.6541059 0.6541059
##    [4,]  0.6387384  0.6387384  0.6387384   . 0.6387384 0.6387384
##    [5,]  0.1126565  0.1126565  0.1126565   . 0.1126565 0.1126565
##     ...          .          .          .   .         .         .
##  [996,]  0.8909218  0.8909218  0.8909218   . 0.8909218 0.8909218
##  [997,]  0.5090896  0.5090896  0.5090896   . 0.5090896 0.5090896
##  [998,]  0.5549427  0.5549427  0.5549427   . 0.5549427 0.5549427
##  [999,]  0.3001307  0.3001307  0.3001307   . 0.3001307 0.3001307
## [1000,]  0.8263590  0.8263590  0.8263590   . 0.8263590 0.8263590sessionInfo()## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.18-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] HDF5Array_1.30.0      rhdf5_2.46.0          chihaya_1.2.0        
##  [4] DelayedArray_0.28.0   SparseArray_1.2.0     S4Arrays_1.2.0       
##  [7] abind_1.4-5           IRanges_2.36.0        S4Vectors_0.40.0     
## [10] MatrixGenerics_1.14.0 matrixStats_1.0.0     BiocGenerics_0.48.0  
## [13] Matrix_1.6-1.1        BiocStyle_2.30.0     
## 
## loaded via a namespace (and not attached):
##  [1] crayon_1.5.2        cli_3.6.1           knitr_1.44         
##  [4] rlang_1.1.1         xfun_0.40           jsonlite_1.8.7     
##  [7] htmltools_0.5.6.1   sass_0.4.7          rmarkdown_2.25     
## [10] grid_4.3.1          evaluate_0.22       jquerylib_0.1.4    
## [13] fastmap_1.1.1       Rhdf5lib_1.24.0     yaml_2.3.7         
## [16] bookdown_0.36       BiocManager_1.30.22 compiler_4.3.1     
## [19] Rcpp_1.0.11         rhdf5filters_1.14.0 XVector_0.42.0     
## [22] lattice_0.22-5      digest_0.6.33       R6_2.5.1           
## [25] bslib_0.5.1         tools_4.3.1         zlibbioc_1.48.0    
## [28] cachem_1.0.8