We will start off with our example dataset pulled from the ISGlobal Exposome Data Challenge 2021 (Maitre et al., 2022).
# Load example data
data("tidyexposomics_example")
# Create exposomic set object
expom <- create_exposomicset(
codebook = tidyexposomics_example$annotated_cb,
exposure = tidyexposomics_example$meta,
omics = list(
"Gene Expression" = tidyexposomics_example$exp_filt,
"Methylation" = tidyexposomics_example$methyl_filt
),
row_data = list(
"Gene Expression" = tidyexposomics_example$exp_fdata,
"Methylation" = tidyexposomics_example$methyl_fdata
)
)## Ensuring all omics datasets are matrices with column names.
## Creating SummarizedExperiment objects.
## Creating MultiAssayExperiment object.
## MultiAssayExperiment created successfully.
We will focus on a few exposure variable categories.
As in the main vignette, we will impute exposure data using
missforest.
# Impute missing values
expom <- run_impute_missing(
exposomicset = expom,
exposure_impute_method = "missforest",
exposure_cols = exp_vars
)## Imputing exposure data using method: missforest
And we will transform our exposure data to ensure it is more normally
distributed using the boxcox_best method.
# Transform variables
expom <- transform_exposure(
exposomicset = expom,
transform_method = "boxcox_best",
exposure_cols = exp_vars
)## Applying the boxcox_best transformation.
We can calculate exposome scores, which are a summary measure of
exposure. The run_exposome_score function is used to
calculate the exposome score. The exposure_cols argument is
used to set the columns to use for the exposome score. The
score_type argument is used to set the type of score to
calculate. Here we could use:
median: Calculates the median of the exposure
variables.
mean: Calculates the mean of the exposure
variables.
sum: Calculates the sum of the exposure
variables.
pca: Calculates the first principal component of the
exposure variables.
irt: Uses Item Response Theory to calculate the
exposome score.
quantile: Calculates the quantile of the exposure
variables.
var: Calculates the variance of the exposure
variables.
The score_column_name argument is used to set the name
of the column to store the exposome score in. Here we will define a
score for aerosols using a variety of different methods and demonstrate
their use in association with asthma status.
# determine which aerosol variables to use
aerosols <- c("h_pm25_ratio_preg_None", "h_pm10_ratio_preg_None")
# Create exposome scores
expom <- expom |>
run_exposome_score(
exposure_cols = aerosols,
score_type = "median",
score_column_name = "exposome_median_score"
) |>
run_exposome_score(
exposure_cols = aerosols,
score_type = "pca",
score_column_name = "exposome_pca_score"
) |>
run_exposome_score(
exposure_cols = aerosols,
score_type = "irt",
score_column_name = "exposome_irt_score"
) |>
run_exposome_score(
exposure_cols = aerosols,
score_type = "quantile",
score_column_name = "exposome_quantile_score"
) |>
run_exposome_score(
exposure_cols = aerosols,
score_type = "var",
score_column_name = "exposome_var_score"
)## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Calculating median exposure scores...
## Calculating PCA exposure scores...
## Calculating IRT exposure scores...
## Warning: EM cycles terminated after 500 iterations.
## Calculating quantile exposure scores...
## Calculating variance exposure scores...
We can then associate these exposome scores with asthma status using
the run_association function, just like we did before.
However, this time we specify our feature_set to be the
exposome scores we just calculated.
# Associate exposome scores with outcome
expom <- run_association(
exposomicset = expom,
outcome = "hs_asthma",
source = "exposures",
feature_set = c(
"exposome_median_score",
"exposome_pca_score",
"exposome_irt_score",
"exposome_quantile_score",
"exposome_var_score"
),
action = "add",
family = "binomial"
)## Running GLMs.
# Plot the association forest plot
plot_association(
exposomicset = expom,
source = "exposures",
terms = c(
"exposome_median_score",
"exposome_pca_score",
"exposome_irt_score",
"exposome_quantile_score",
"exposome_var_score"
),
filter_col = "p.value",
filter_thresh = 0.05,
r2_col = "r2"
)Associations of aerosol exposome scores with asthma status. The variance-based score has the strongest association with asthma status.
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tidyexposomics_0.99.16 MultiAssayExperiment_1.37.4
## [3] SummarizedExperiment_1.41.1 Biobase_2.71.0
## [5] GenomicRanges_1.63.2 Seqinfo_1.1.0
## [7] IRanges_2.45.0 S4Vectors_0.49.2
## [9] BiocGenerics_0.57.1 generics_0.1.4
## [11] MatrixGenerics_1.23.0 matrixStats_1.5.0
## [13] lubridate_1.9.5 forcats_1.0.1
## [15] stringr_1.6.0 dplyr_1.2.1
## [17] purrr_1.2.2 readr_2.2.0
## [19] tidyr_1.3.2 tibble_3.3.1
## [21] ggplot2_4.0.2 tidyverse_2.0.0
## [23] BiocStyle_2.39.0
##
## loaded via a namespace (and not attached):
## [1] naniar_1.1.0 httr_1.4.8 RColorBrewer_1.1-3
## [4] ggsci_5.0.0 tools_4.5.3 doRNG_1.8.6.3
## [7] backports_1.5.1 utf8_1.2.6 R6_2.6.1
## [10] DT_0.34.0 vegan_2.7-3 mgcv_1.9-4
## [13] permute_0.9-10 withr_3.0.2 gridExtra_2.3
## [16] progressr_0.19.0 cli_3.6.6 factoextra_2.0.0
## [19] RGCCA_3.0.3 labeling_0.4.3 sass_0.4.10
## [22] S7_0.2.1-1 randomForest_4.7-1.2 proxy_0.4-29
## [25] pbapply_1.7-4 foreign_0.8-91 R.utils_2.13.0
## [28] sessioninfo_1.2.3 parallelly_1.47.0 itertools_0.1-3
## [31] limma_3.67.2 rstudioapi_0.18.0 RSQLite_2.4.6
## [34] car_3.1-5 Matrix_1.7-5 clipr_0.8.0
## [37] abind_1.4-8 R.methodsS3_1.8.2 lifecycle_1.0.5
## [40] yaml_2.3.12 carData_3.0-6 recipes_1.3.2
## [43] SparseArray_1.11.13 BiocFileCache_3.1.0 grid_4.5.3
## [46] blob_1.3.0 promises_1.5.0 crayon_1.5.3
## [49] lattice_0.22-9 sys_3.4.3 maketools_1.3.2
## [52] pillar_1.11.1 knitr_1.51 corpcor_1.6.10
## [55] future.apply_1.20.2 mixOmics_6.35.1 codetools_0.2-20
## [58] glue_1.8.1 beepr_2.0 data.table_1.18.2.1
## [61] vctrs_0.7.3 Rdpack_2.6.6 testthat_3.3.2
## [64] gtable_0.3.6 assertthat_0.2.1 cachem_1.1.0
## [67] gower_1.0.2 xfun_0.57 rbibutils_2.4.1
## [70] S4Arrays_1.11.1 mime_0.13 prodlim_2026.03.11
## [73] survival_3.8-6 timeDate_4052.112 audio_0.1-12
## [76] iterators_1.0.14 hardhat_1.4.3 lava_1.9.0
## [79] statmod_1.5.1 ipred_0.9-15 nlme_3.1-169
## [82] fenr_1.9.2 bit64_4.6.0-1 filelock_1.0.3
## [85] splines2_0.5.4 bslib_0.10.0 Deriv_4.2.0
## [88] otel_0.2.0 rpart_4.1.27 colorspace_2.1-2
## [91] DBI_1.3.0 Hmisc_5.2-5 nnet_7.3-20
## [94] tidyselect_1.2.1 bit_4.6.0 compiler_4.5.3
## [97] curl_7.0.0 httr2_1.2.2 htmlTable_2.4.3
## [100] DelayedArray_0.37.1 stringfish_0.18.0 checkmate_2.3.4
## [103] scales_1.4.0 rappdirs_0.3.4 digest_0.6.39
## [106] mirai_2.6.1 rmarkdown_2.31 XVector_0.51.0
## [109] htmltools_0.5.9 pkgconfig_2.0.3 base64enc_0.1-6
## [112] SimDesign_2.25 dbplyr_2.5.2 fastmap_1.2.0
## [115] rlang_1.2.0 htmlwidgets_1.6.4 shiny_1.13.0
## [118] farver_2.1.2 jquerylib_0.1.4 jsonlite_2.0.0
## [121] BiocParallel_1.45.0 dcurver_0.9.3 ModelMetrics_1.2.2.2
## [124] R.oo_1.27.1 magrittr_2.0.5 Formula_1.2-5
## [127] patchwork_1.3.2 Rcpp_1.1.1-1 visdat_0.6.0
## [130] stringi_1.8.7 pROC_1.19.0.1 brio_1.1.5
## [133] MASS_7.3-65 plyr_1.8.9 parallel_4.5.3
## [136] listenv_0.10.1 ggrepel_0.9.8 splines_4.5.3
## [139] hms_1.1.4 igraph_2.2.3 ggpubr_0.6.3
## [142] ranger_0.18.0 ggsignif_0.6.4 rngtools_1.5.2
## [145] buildtools_1.0.0 reshape2_1.4.5 qs2_0.1.7
## [148] GPArotation_2025.3-1 tidybulk_2.1.2 evaluate_1.0.5
## [151] RcppParallel_5.1.11-2 BiocManager_1.30.27 tzdb_0.5.0
## [154] nanonext_1.8.2 foreach_1.5.2 missForest_1.6.1
## [157] httpuv_1.6.17 future_1.70.0 mirt_1.46.1
## [160] BiocBaseUtils_1.13.0 broom_1.0.12 xtable_1.8-8
## [163] e1071_1.7-17 RSpectra_0.16-2 rstatix_0.7.3
## [166] later_1.4.8 class_7.3-23 rARPACK_0.11-0
## [169] memoise_2.0.1 ellipse_0.5.0 cluster_2.1.8.2
## [172] timechange_0.4.0 globals_0.19.1 caret_7.0-1