diff --git a/DESCRIPTION b/DESCRIPTION index e108588..f0d7439 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidySingleCellExperiment Title: Brings SingleCellExperiment to the Tidyverse -Version: 1.19.1 +Version: 1.19.2 Authors@R: c(person("Stefano", "Mangiola", comment=c(ORCID="0000-0001-7474-836X"), email="mangiolastefano@gmail.com", @@ -60,7 +60,8 @@ Suggests: celldex, dittoSeq, plotly, - rbibutils + rbibutils, + prettydoc VignetteBuilder: knitr RdMacros: @@ -70,6 +71,6 @@ biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, SingleCell, GeneExpression, Normalization, Clustering, QualityControl, Sequencing Encoding: UTF-8 -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 URL: https://github.com/stemangiola/tidySingleCellExperiment BugReports: https://github.com/stemangiola/tidySingleCellExperiment/issues diff --git a/NAMESPACE b/NAMESPACE index c8e6f8e..423ce68 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(add_count,SingleCellExperiment) S3method(anti_join,SingleCellExperiment) +S3method(append_samples,SingleCellExperiment) S3method(arrange,SingleCellExperiment) S3method(as_tibble,SingleCellExperiment) S3method(bind_cols,SingleCellExperiment) @@ -82,6 +83,7 @@ importFrom(dplyr,group_split) importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) +importFrom(dplyr,n) importFrom(dplyr,pull) importFrom(dplyr,rename) importFrom(dplyr,right_join) @@ -158,6 +160,7 @@ importFrom(tidyr,unnest) importFrom(tidyselect,all_of) importFrom(tidyselect,eval_select) importFrom(ttservice,aggregate_cells) +importFrom(ttservice,append_samples) importFrom(ttservice,bind_cols) importFrom(ttservice,bind_rows) importFrom(ttservice,join_features) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index baec5fc..8b77667 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -29,14 +29,7 @@ arrange.SingleCellExperiment <- function(.data, ..., .by_group=FALSE) { #' @name bind_rows #' @rdname bind_rows #' @inherit ttservice::bind_rows -#' -#' @examples -#' data(pbmc_small) -#' tt <- pbmc_small -#' bind_rows(tt, tt) -#' -#' tt_bind <- tt |> select(nCount_RNA, nFeature_RNA) -#' tt |> bind_cols(tt_bind) +#' @noRd #' #' @references #' Hutchison, W.J., Keyes, T.J., The tidyomics Consortium. et al. The tidyomics ecosystem: enhancing omic data analyses. Nat Methods 21, 1166–1170 (2024). https://doi.org/10.1038/s41592-024-02299-2 @@ -48,6 +41,13 @@ arrange.SingleCellExperiment <- function(.data, ..., .by_group=FALSE) { #' @importFrom SingleCellExperiment cbind #' @export bind_rows.SingleCellExperiment <- function(..., .id=NULL, add.cell.ids=NULL) { + lifecycle::deprecate_warn( + when = "1.19.2", + what = "bind_rows()", + with = "append_samples()", + details = "bind_rows is not a generic method in dplyr and may cause conflicts. Use append_samples." + ) + tts <- flatten_if(dots_values(...), is_spliced) new_obj <- SingleCellExperiment::cbind(tts[[1]], tts[[2]]) @@ -62,6 +62,45 @@ bind_rows.SingleCellExperiment <- function(..., .id=NULL, add.cell.ids=NULL) { new_obj } +#' @name append_samples +#' @rdname append_samples +#' @title Append samples from multiple SingleCellExperiment objects +#' +#' @description +#' Append samples from multiple SingleCellExperiment objects by column-binding them. +#' This function is equivalent to `cbind` but provides a tidyverse-like interface. +#' +#' @param x First SingleCellExperiment object to combine +#' @param ... Additional SingleCellExperiment objects to combine by samples +#' @param .id Object identifier (currently not used) +#' +#' @return A combined SingleCellExperiment object +#' +#' @examples +#' data(pbmc_small) +#' append_samples(pbmc_small, pbmc_small) +#' +#' @importFrom ttservice append_samples +#' @importFrom rlang flatten_if +#' @importFrom rlang is_spliced +#' @importFrom SingleCellExperiment cbind +#' @export +append_samples.SingleCellExperiment <- function(x, ..., .id = NULL) { + # Combine all arguments into a list + tts <- flatten_if(list(x, ...), is_spliced) + new_obj <- do.call(cbind, tts) + + # If duplicated cell names + if (any(duplicated(colnames(new_obj)))) { + warning("tidySingleCellExperiment says:", + " you have duplicated cell names, they will be made unique.") + unique_colnames <- make.unique(colnames(new_obj), sep = "_") + colnames(new_obj) <- unique_colnames + } + + new_obj +} + #' @importFrom rlang flatten_if #' @importFrom rlang is_spliced #' @importFrom rlang dots_values @@ -159,7 +198,7 @@ filter.SingleCellExperiment <- function(.data, ..., .preserve=FALSE) { #' @name group_by #' @rdname group_by #' @inherit dplyr::group_by -#' @seealso \code{} +#' @seealso \code{\link[dplyr]{group_by}} #' #' @examples #' data(pbmc_small) diff --git a/R/methods.R b/R/methods.R index 9d2c679..8c2b7f2 100755 --- a/R/methods.R +++ b/R/methods.R @@ -40,7 +40,7 @@ setClass("tidySingleCellExperiment", contains="SingleCellExperiment") #' @importFrom stringr str_subset #' @export setMethod("join_features", "SingleCellExperiment", function(.data, - features=NULL, all=FALSE, exclude_zeros=FALSE, shape="long", ...) { + features=NULL, all=FALSE, exclude_zeros=FALSE, shape="wide", ...) { # CRAN Note .cell <- NULL .feature <- NULL @@ -157,7 +157,7 @@ tidy.SingleCellExperiment <- function(object) { #' @importFrom dplyr mutate #' @importFrom dplyr pull #' @importFrom dplyr left_join -#' @importFrom dplyr unnest +#' @importFrom tidyr unnest #' @importFrom S4Vectors DataFrame #' @importFrom methods as #' @@ -170,6 +170,9 @@ setMethod("aggregate_cells", "SingleCellExperiment", function(.data, # Fix NOTEs feature <- NULL + .feature <- NULL + my_id_to_split_by___ <- NULL + assay_name <- NULL .sample <- enquo(.sample) # Subset only wanted assays diff --git a/R/methods_DEPRECATED.R b/R/methods_DEPRECATED.R index b832e7f..38a6019 100644 --- a/R/methods_DEPRECATED.R +++ b/R/methods_DEPRECATED.R @@ -35,7 +35,7 @@ join_transcripts <- transcripts=NULL, all=FALSE, exclude_zeros=FALSE, - shape="long", ...) + shape="wide", ...) { UseMethod("join_transcripts", .data) } @@ -45,7 +45,7 @@ join_transcripts.default <- transcripts=NULL, all=FALSE, exclude_zeros=FALSE, - shape="long", ...) + shape="wide", ...) { print("tidySingleCellExperiment says:", " This function cannot be applied to this object") @@ -56,7 +56,7 @@ join_transcripts.Seurat <- transcripts=NULL, all=FALSE, exclude_zeros=FALSE, - shape="long", ...) + shape="wide", ...) { deprecate_warn( "1.1.2", "join_transcripts()", diff --git a/R/tidyr_methods.R b/R/tidyr_methods.R index c2af43e..da6e406 100755 --- a/R/tidyr_methods.R +++ b/R/tidyr_methods.R @@ -93,6 +93,7 @@ unnest_single_cell_experiment <- function(data, cols, ..., #' @importFrom tidyr nest #' @importFrom rlang enquos #' @importFrom rlang := +#' @importFrom dplyr n #' @export nest.SingleCellExperiment <- function(.data, ..., .names_sep = NULL) { diff --git a/README.Rmd b/README.Rmd index c3d990f..228cdf0 100755 --- a/README.Rmd +++ b/README.Rmd @@ -461,7 +461,7 @@ pbmc_small_nested_interactions <- cell_signaling(genes=rownames(data), cluster=cluster) |> inter_network(data=data, signal=_, genes=rownames(data), cluster=cluster) %$% `individual-networks` |> - map_dfr(~ bind_rows(as_tibble(.x))) + map_dfr(~ append_samples(as_tibble(.x))) })) pbmc_small_nested_interactions |> diff --git a/README.md b/README.md index 9edd55f..b766b72 100755 --- a/README.md +++ b/README.md @@ -314,10 +314,6 @@ pbmc_small_pca <- ## TRUE, : You're computing too large a percentage of total singular values, use a ## standard svd instead. - ## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth = - ## TRUE, : did not converge--results might be invalid!; try increasing work or - ## maxit - ``` r pbmc_small_pca ``` @@ -742,7 +738,7 @@ pbmc_small_nested_interactions <- cell_signaling(genes=rownames(data), cluster=cluster) |> inter_network(data=data, signal=_, genes=rownames(data), cluster=cluster) %$% `individual-networks` |> - map_dfr(~ bind_rows(as_tibble(.x))) + map_dfr(~ append_samples(as_tibble(.x))) })) pbmc_small_nested_interactions |> diff --git a/inst/NEWS.rd b/inst/NEWS.rd index c4c99f7..23d1858 100644 --- a/inst/NEWS.rd +++ b/inst/NEWS.rd @@ -1,6 +1,14 @@ \name{NEWS} \title{News for Package \pkg{tidySingleCellExperiment}} +\section{Changes in version 1.19.2, Bioconductor 3.22 Release}{ +\itemize{ + \item Soft deprecated \code{bind_rows()} in favor of \code{append_samples()} from ttservice. + \item Added \code{append_samples()} method for SingleCellExperiment objects. + \item \code{bind_rows()} is not a generic method in dplyr and may cause conflicts. + \item Users are encouraged to use \code{append_samples()} instead. +}} + \section{Changes in version 1.4.0, Bioconductor 3.14 Release}{ \itemize{ \item Improved sample_n, and sample_frac functions. @@ -15,3 +23,10 @@ \item Use .cell for cell column name to avoid errors when cell column is defined by the user }} +\section{Changes in version 1.19.2, Bioconductor 3.22 Release}{ +\itemize{ + \item \strong{BREAKING CHANGE}: Changed default shape parameter in \code{join_features()} from "long" to "wide". + This means that \code{join_features()} now returns a SingleCellExperiment object by default instead of a tibble. + To get the old behavior, explicitly specify \code{shape="long"}. +}} + diff --git a/man/append_samples.Rd b/man/append_samples.Rd new file mode 100644 index 0000000..dc54941 --- /dev/null +++ b/man/append_samples.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{append_samples} +\alias{append_samples} +\alias{append_samples.SingleCellExperiment} +\title{Append samples from multiple SingleCellExperiment objects} +\usage{ +\method{append_samples}{SingleCellExperiment}(x, ..., .id = NULL) +} +\arguments{ +\item{x}{First SingleCellExperiment object to combine} + +\item{...}{Additional SingleCellExperiment objects to combine by samples} + +\item{.id}{Object identifier (currently not used)} +} +\value{ +A combined SingleCellExperiment object +} +\description{ +Append samples from multiple SingleCellExperiment objects by column-binding them. +This function is equivalent to `cbind` but provides a tidyverse-like interface. +} +\examples{ +data(pbmc_small) +append_samples(pbmc_small, pbmc_small) + +} diff --git a/man/bind_rows.Rd b/man/bind_rows.Rd deleted file mode 100644 index ed34bb5..0000000 --- a/man/bind_rows.Rd +++ /dev/null @@ -1,72 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dplyr_methods.R -\name{bind_rows} -\alias{bind_rows} -\alias{bind_rows.SingleCellExperiment} -\alias{bind_cols.SingleCellExperiment} -\alias{bind_cols} -\title{Efficiently bind multiple data frames by row and column} -\usage{ -\method{bind_rows}{SingleCellExperiment}(..., .id = NULL, add.cell.ids = NULL) - -\method{bind_cols}{SingleCellExperiment}(..., .id = NULL) -} -\arguments{ -\item{...}{Data frames to combine. - - Each argument can either be a data frame, a list that could be a data - frame, or a list of data frames. - - When row-binding, columns are matched by name, and any missing - columns will be filled with NA. - - When column-binding, rows are matched by position, so all data - frames must have the same number of rows. To match by value, not - position, see mutate-joins.} - -\item{.id}{Data frame identifier. - - When `.id` is supplied, a new column of identifiers is - created to link each row to its original data frame. The labels - are taken from the named arguments to `bind_rows()`. When a - list of data frames is supplied, the labels are taken from the - names of the list. If no names are found a numeric sequence is - used instead.} - -\item{add.cell.ids}{from Seurat 3.0 A character vector of length(x = c(x, y)). Appends the corresponding values to the start of each objects' cell names.} -} -\value{ -`bind_rows()` and `bind_cols()` return the same type as - the first input, either a data frame, `tbl_df`, or `grouped_df`. - -`bind_rows()` and `bind_cols()` return the same type as - the first input, either a data frame, `tbl_df`, or `grouped_df`. -} -\description{ -This is an efficient implementation of the common pattern of -`do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many -data frames into one. - -This is an efficient implementation of the common pattern of -`do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many -data frames into one. -} -\details{ -The output of `bind_rows()` will contain a column if that column -appears in any of the inputs. - -The output of `bind_rows()` will contain a column if that column -appears in any of the inputs. -} -\examples{ -data(pbmc_small) -tt <- pbmc_small -bind_rows(tt, tt) - -tt_bind <- tt |> select(nCount_RNA, nFeature_RNA) -tt |> bind_cols(tt_bind) - -} -\references{ -Hutchison, W.J., Keyes, T.J., The tidyomics Consortium. et al. The tidyomics ecosystem: enhancing omic data analyses. Nat Methods 21, 1166–1170 (2024). https://doi.org/10.1038/s41592-024-02299-2 -} diff --git a/man/group_by.Rd b/man/group_by.Rd index b88b8a3..35cbbc3 100644 --- a/man/group_by.Rd +++ b/man/group_by.Rd @@ -101,5 +101,5 @@ Hutchison, W.J., Keyes, T.J., The tidyomics Consortium. et al. The tidyomics eco Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R, et al. Welcome to the tidyverse. Journal of Open Source Software. 2019;4(43):1686. https://doi.org/10.21105/joss.01686 } \seealso{ -\code{} +\code{\link[dplyr]{group_by}} } diff --git a/man/join_features.Rd b/man/join_features.Rd index fad2c8c..62f2dc0 100644 --- a/man/join_features.Rd +++ b/man/join_features.Rd @@ -10,7 +10,7 @@ features = NULL, all = FALSE, exclude_zeros = FALSE, - shape = "long", + shape = "wide", ... ) } diff --git a/man/join_transcripts.Rd b/man/join_transcripts.Rd index c4dfa6a..13337d7 100644 --- a/man/join_transcripts.Rd +++ b/man/join_transcripts.Rd @@ -9,7 +9,7 @@ join_transcripts( transcripts = NULL, all = FALSE, exclude_zeros = FALSE, - shape = "long", + shape = "wide", ... ) } diff --git a/tests/testthat/test-dplyr_methods.R b/tests/testthat/test-dplyr_methods.R index 46cb83f..755b475 100755 --- a/tests/testthat/test-dplyr_methods.R +++ b/tests/testthat/test-dplyr_methods.R @@ -24,9 +24,9 @@ df$factor <- sample( # expect_identical(fd, df) # }) -test_that("bind_rows()", { +test_that("append_samples()", { # warn about duplicated cells names - expect_warning(fd <- bind_rows(df, df)) + expect_warning(fd <- append_samples(df, df)) # cell names should be unique after binding expect_true(!any(duplicated(pull(fd, .cell)))) }) diff --git a/tests/testthat/test-methods.R b/tests/testthat/test-methods.R index b42540e..2d8e52e 100644 --- a/tests/testthat/test-methods.R +++ b/tests/testthat/test-methods.R @@ -12,6 +12,13 @@ test_that("show()", { test_that("join_features()", { gs <- sample(rownames(df), 3) + # wide (default) + fd <- join_features(df, gs, assay="counts") + expect_s4_class(fd, "SingleCellExperiment") + expect_null(fd$.feature) + expect_identical( + unname(t(as.matrix(as_tibble(fd)[, make.names(gs)]))), + as.matrix(unname(counts(df)[gs, ]))) # long fd <- join_features(df, gs, shape="long") expect_s3_class(fd, "tbl_df") @@ -20,13 +27,6 @@ test_that("join_features()", { expect_identical( matrix(fd$.abundance_counts, nrow=length(gs)), as.matrix(unname(counts(df)[fd$.feature[seq_along(gs)], ]))) - # wide - fd <- join_features(df, gs, shape="wide", assay="counts") - expect_s4_class(fd, "SingleCellExperiment") - expect_null(fd$.feature) - expect_identical( - unname(t(as.matrix(as_tibble(fd)[, make.names(gs)]))), - as.matrix(unname(counts(df)[gs, ]))) }) test_that("as_tibble()", { diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 53230ce..5213f27 100755 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -3,8 +3,13 @@ title: "Overview of the tidySingleCellExperiment package" package: "`r BiocStyle::pkg_ver('tidySingleCellExperiment')`" author: "Stefano Mangiola" output: - BiocStyle::html_document: - toc_float: true + prettydoc::html_pretty: + theme: cayman + toc: true + toc_depth: 2 + number_sections: true + fig_caption: true + df_print: paged bibliography: tidySingleCellExperiment.bib vignette: > %\VignetteIndexEntry{Overview of the tidySingleCellExperiment package} @@ -179,7 +184,7 @@ Here we plot abundance of two features for each group. ```{r} pbmc_small_polished %>% - join_features(features=c("HLA-DRA", "LYZ")) %>% + join_features(features=c("HLA-DRA", "LYZ"), shape="long") %>% ggplot(aes(groups, .abundance_counts + 1, fill=groups)) + geom_boxplot(outlier.shape=NA) + geom_jitter(aes(size=nCount_RNA), alpha=0.5, width=0.2) + @@ -262,17 +267,36 @@ marker_genes <- map(~ .x %>% head(10) %>% rownames()) %>% - unlist() + unlist() %>% + unique() # Plot heatmap pbmc_small_cluster %>% - join_features(features=marker_genes) %>% + join_features(features=marker_genes, shape="long") %>% group_by(label) %>% heatmap( .row=.feature, .column=.cell, .value=.abundance_counts, scale="column") ``` +# Combining datasets + +We can use `append_samples()` to combine multiple SingleCellExperiment objects by samples. +This is useful when you have multiple datasets that you want to analyze together. + +```{r} +# Create two subsets of the data +pbmc_subset1 <- pbmc_small_cluster %>% + filter(groups == "g1") + +pbmc_subset2 <- pbmc_small_cluster %>% + filter(groups == "g2") + +# Combine them using append_samples +combined_data <- append_samples(pbmc_subset1, pbmc_subset2) +combined_data +``` + # Reduce dimensions We can calculate the first 3 UMAP dimensions using `r BiocStyle::Biocpkg("scater")`. @@ -454,7 +478,7 @@ pbmc_small_nested_interactions <- cell_signaling(genes=rownames(data), cluster=cluster) %>% inter_network(data=data, signal=., genes=rownames(data), cluster=cluster) %$% `individual-networks` %>% - map_dfr(~ bind_rows(as_tibble(.x))) + map_dfr(~ append_samples(as_tibble(.x))) })) pbmc_small_nested_interactions %>%