Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: gDRutils
Type: Package
Title: A package with helper functions for processing drug response data
Version: 1.9.6
Date: 2026-03-23
Version: 1.9.7
Date: 2026-04-14
Authors@R: c(person("Bartosz", "Czech", role=c("aut"),
comment = c(ORCID = "0000-0002-9908-3007")),
person("Arkadiusz", "Gladki", role=c("cre", "aut"), email="gladki.arkadiusz@gmail.com",
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## gDRutils 1.9.7 - 2026-04-14
* add support for metadata in `merge_MAE`

## gDRutils 1.9.6 - 2026-03-23
* `standardize_MAE` standardizes also internal identifiers

Expand Down
146 changes: 136 additions & 10 deletions R/merge_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@
#' that can arise from multiple projects.
#' @param discard_keys Character vector of strings that will be discarded
#' during creating BumpyMatrix object.
#' @param title String specifying the final DataSetDB title. If NULL, auto-generates.
#' @param description String specifying the final DataSetDB description. If NULL, auto-generates.
#' @param source_name String specifying the standard DSDB source name. If NULL, auto-detects or uses "merged_analysis".
#' @param source_id String specifying the unique DSDB source ID. If NULL, uses "merged_dataset".
#' @keywords SE_operators
#'
#' @examples
#' mae1 <- get_synthetic_data("finalMAE_combo_2dose_nonoise")
#' mae2 <- get_synthetic_data("finalMAE_combo_2dose_nonoise")
#' merge_MAE(list(mae1 = mae1, mae2 = mae2))
#' merge_MAE(list(mae1 = mae1, mae2 = mae2), title = "Test", description = "Test MAE")
#'
#' @return Merged MultiAssayExperiment object.
#' @export
Expand All @@ -25,28 +29,112 @@ merge_MAE <- function(MAElist,
"control_type",
"iso_level",
"conc_1",
"conc_2")) {
"conc_2"),
title = NULL,
description = NULL,
source_name = NULL,
source_id = NULL) {

checkmate::assert_list(MAElist, types = "MultiAssayExperiment")
checkmate::assert_string(title, null.ok = TRUE)
checkmate::assert_string(description, null.ok = TRUE)
checkmate::assert_string(source_name, null.ok = TRUE)
checkmate::assert_string(source_id, null.ok = TRUE)

experiments <- unique(unlist(lapply(MAElist, names)))

merged_SE_assays <- lapply(experiments, function(exp_name) {
exp_list <- lapply(MAElist, function(mae) {
if (exp_name %in% names(mae)) {
mae[[exp_name]]
} else {
NULL
}
if (exp_name %in% names(mae)) mae[[exp_name]] else NULL
})
exp_list <- exp_list[!vapply(exp_list, is.null, FUN.VALUE = logical(1))]
merge_SE(exp_list)
})
names(merged_SE_assays) <- experiments

mae_names <- names(MAElist)
if (is.null(mae_names) || all(trimws(mae_names) == "")) {
mae_names <- paste0("Dataset_", seq_along(MAElist))
}
Comment thread
bczech marked this conversation as resolved.

all_sources <- list()
original_titles <- c()

for (mae in MAElist) {
for (exp in names(mae)) {
meta <- as.list(S4Vectors::metadata(mae[[exp]])$experiment_metadata)
if (length(meta) > 0) {
if (is.list(meta$sources)) all_sources <- c(all_sources, meta$sources)
if (!is.null(meta$title)) original_titles <- c(original_titles, meta$title)
}
}
}


if (is.null(title)) {
title <- sprintf("Merged MAE: %s", paste(mae_names, collapse = " + "))
}

if (is.null(description)) {
description <- sprintf("Synthetically merged dataset originating from: %s.", paste(mae_names, collapse = ", "))
unique_titles <- unique(original_titles)
if (length(unique_titles) > 0) {
description <- paste0(description, " Original Titles: [", paste(unique_titles, collapse = " | "), "]")
}
}

if (is.null(source_name)) {
if (length(all_sources) > 0) {
unique_names <- unique(vapply(all_sources, function(s) {
if (!is.null(s$name)) s$name else "unknown"
}, character(1)))

source_name <- if (length(unique_names) == 1 && unique_names[1] != "unknown") {
unique_names[1]
} else {
"merged_analysis"
}
} else {
source_name <- "merged_analysis"
}
}
Comment thread
bczech marked this conversation as resolved.

if (is.null(source_id)) {
source_id <- "merged_dataset"
}

synthetic_experiment_metadata <- list(
title = title,
description = description,
experimentalist = Sys.info()[["user"]],
sources = list(list(name = source_name, id = source_id))
)

for (i in seq_along(merged_SE_assays)) {
meta_list <- as.list(S4Vectors::metadata(merged_SE_assays[[i]]))
meta_list$experiment_metadata <- synthetic_experiment_metadata
S4Vectors::metadata(merged_SE_assays[[i]]) <- meta_list
}
Comment thread
bczech marked this conversation as resolved.

base_metadata <- as.list(S4Vectors::metadata(MAElist[[1]]))
if (length(base_metadata) == 0) base_metadata <- list()

if (!is.null(base_metadata$.internal$DataSetDB$dataset)) {
ds_meta <- as.list(base_metadata$.internal$DataSetDB$dataset)
ds_meta$title <- synthetic_experiment_metadata$title
ds_meta$description <- synthetic_experiment_metadata$description
ds_meta$sources <- synthetic_experiment_metadata$sources

internal_meta <- as.list(base_metadata$.internal)
internal_meta$DataSetDB <- as.list(internal_meta$DataSetDB)
internal_meta$DataSetDB$dataset <- ds_meta

base_metadata$.internal <- internal_meta
}

MultiAssayExperiment::MultiAssayExperiment(
experiments = MultiAssayExperiment::ExperimentList(merged_SE_assays),
metadata = Reduce(c, lapply(MAElist, S4Vectors::metadata))
Comment thread
bczech marked this conversation as resolved.
metadata = base_metadata
Comment thread
bczech marked this conversation as resolved.
)
}

Expand Down Expand Up @@ -207,7 +295,6 @@ merge_assay <- function(SElist,
list(DT = DT, BM = BM)
}


#' Identify unique metadata fields from a list of \code{SummarizedExperiment}s
#'
#' @param SElist named list of \code{SummarizedExperiment}s
Expand All @@ -233,7 +320,6 @@ identify_unique_se_metadata_fields <- function(SElist) {
})))
}


#' Merge metadata
#'
#' @param SElist named list of \code{SummarizedExperiment}s
Expand Down Expand Up @@ -261,12 +347,52 @@ merge_metadata <- function(SElist,
checkmate::assert_character(metadata_fields)

all_metadata <- lapply(metadata_fields, function(x) {

if (x %in% c("experiment_metadata", ".internal")) {

valid_metas <- lapply(SElist, function(se) S4Vectors::metadata(se)[[x]])
valid_metas <- valid_metas[!vapply(valid_metas, is.null, FUN.VALUE = logical(1))]

if (length(valid_metas) == 0) return(list())

if (x == "experiment_metadata") {
synth <- as.list(valid_metas[[1]])

all_sources <- list()
for (vm in valid_metas) {
vm_list <- as.list(vm)
if (is.list(vm_list$sources)) all_sources <- c(all_sources, vm_list$sources)
}

if (length(all_sources) > 0) {
unique_names <- unique(vapply(all_sources, function(s) {
if (!is.null(s$name)) s$name else "unknown"
}, character(1)))

std_name <- if (length(unique_names) == 1 && unique_names[1] != "unknown") {
unique_names[1]
} else {
"merged_analysis"
}

synth$sources <- list(list(name = std_name, id = "merged_dataset"))
} else {
synth$sources <- list()
}

return(synth)
}

return(as.list(valid_metas[[1]]))
}
Comment thread
bczech marked this conversation as resolved.

do.call(c, lapply(names(SElist), function(SE) {
meta <- list(S4Vectors::metadata(SElist[[SE]])[[x]])
names(meta) <- SE
meta
}))
})

names(all_metadata) <- metadata_fields
all_metadata
}
16 changes: 14 additions & 2 deletions man/merge_MAE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 30 additions & 7 deletions tests/testthat/test-merge_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ listSE <- lapply(listMAE, function(x) x[[2]])
names(listSE) <- c("combo1", "combo2")

listMAE2 <- lapply(list.files(system.file(package = "gDRtestData", "testdata"),
"final", full.names = TRUE)[1:2], qs::qread)
"final", full.names = TRUE)[1:2], qs::qread)
listSE2 <- lapply(listMAE, function(x) x[[1]])
names(listSE2) <- c("combo1", "combo2")

Expand All @@ -28,8 +28,11 @@ test_that("merge_metadata and identify_unique_se_metadata_fields work as expecte
metadata_fields <- identify_unique_se_metadata_fields(listSE)
mergedMetadata <- merge_metadata(listSE, metadata_fields)
expect_identical(names(mergedMetadata), metadata_fields)
expect_identical(names(mergedMetadata$experiment_metadata), names(listSE))

if ("experiment_metadata" %in% names(mergedMetadata)) {
expect_true("sources" %in% names(mergedMetadata$experiment_metadata))
}

listSE2 <- listSE
newMetaName <- "dummy_meta"
S4Vectors::metadata(listSE2$combo1)[[newMetaName]] <- list()
Expand All @@ -45,29 +48,31 @@ test_that("merge_SE works as expected", {
checkmate::expect_class(mergedSE$result, "SummarizedExperiment")
S4Vectors::metadata(mergedSE$result)[["df_raw_data"]] <- list(NULL)
validate_SE(mergedSE$result)

additional_col_name <- "QCS"
mergedSE2 <- purrr::quietly(merge_SE)(listSE, additional_col_name)
assayNormalized <- convert_se_assay_to_dt(mergedSE2$result, "Metrics")
expect_true(additional_col_name %in% names(assayNormalized))
expect_identical(unique(assayNormalized[[additional_col_name]]), names(listSE))
expect_identical(SummarizedExperiment::assayNames(listSE[[1]]),
SummarizedExperiment::assayNames(mergedSE[[1]]))
SummarizedExperiment::assayNames(mergedSE$result))
reset_env_identifiers()
})
})


test_that("merge_SE works as expected with combo matrix data", {
mergedSE <- purrr::quietly(merge_SE)(listSE2)
checkmate::expect_class(mergedSE$result, "SummarizedExperiment")
S4Vectors::metadata(mergedSE$result)[["df_raw_data"]] <- list(NULL)
validate_SE(mergedSE$result)

additional_col_name <- "QCS"
mergedSE2 <- purrr::quietly(merge_SE)(listSE2, additional_col_name)
assayNormalized <- convert_se_assay_to_dt(mergedSE2$result, "Metrics")
expect_true(additional_col_name %in% names(assayNormalized))
expect_identical(unique(assayNormalized[[additional_col_name]]), names(listSE))
expect_identical(SummarizedExperiment::assayNames(listSE2[[1]]),
SummarizedExperiment::assayNames(mergedSE[[1]]))
SummarizedExperiment::assayNames(mergedSE$result))
})

test_that("merge_SE works as expected with mixed data types", {
Expand All @@ -85,10 +90,28 @@ test_that("merge_SE works with data with additional perturbations", {
expect_equal(dim(mergedSE$result), c(10, 5))
})

test_that("merge_MAE works as expected", {
mergedMAE <- purrr::quietly(merge_MAE)(listMAE)
test_that("merge_MAE works as expected with synthetic metadata injection", {
custom_title <- "Unit Test Merged MAE"
custom_source_id <- "test_dataset_001"

mergedMAE <- purrr::quietly(merge_MAE)(
listMAE,
title = custom_title,
source_id = custom_source_id
)
checkmate::expect_class(mergedMAE$result, "MultiAssayExperiment")
validate_MAE(mergedMAE$result)

mae_meta <- S4Vectors::metadata(mergedMAE$result)
if (!is.null(mae_meta$.internal$DataSetDB$dataset)) {
expect_equal(mae_meta$.internal$DataSetDB$dataset$title, custom_title)
expect_equal(mae_meta$.internal$DataSetDB$dataset$sources[[1]]$id, custom_source_id)
}

se_meta <- S4Vectors::metadata(mergedMAE$result[[1]])$experiment_metadata
expect_equal(se_meta$title, custom_title)
expect_equal(se_meta$sources[[1]]$id, custom_source_id)

expect_identical(
SummarizedExperiment::assayNames(MultiAssayExperiment::experiments(listMAE[[1]])[[1]]),
SummarizedExperiment::assayNames(MultiAssayExperiment::experiments(mergedMAE$result)[[1]])
Expand Down
Loading