diff --git a/DESCRIPTION b/DESCRIPTION index a3c52e55..31e61414 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: SpatialData Title: Representation of Python's SpatialData in R Depends: R (>= 4.5) -Version: 0.99.26 +Version: 0.99.27 Description: Interface to Python's 'SpatialData', currently including: reticulate-based use of 'spatialdata-io' for reading of manufacturer data and writing to .zarr, on-disk representation of images/labels as diff --git a/NAMESPACE b/NAMESPACE index e0822d18..9fafee78 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,6 +52,7 @@ exportMethods(data) exportMethods(data_type) exportMethods(dim) exportMethods(element) +exportMethods(feature_key) exportMethods(getTable) exportMethods(hasTable) exportMethods(image) @@ -93,14 +94,13 @@ importFrom(DelayedArray,ConstantArray) importFrom(DelayedArray,DelayedArray) importFrom(DelayedArray,cbind) importFrom(DelayedArray,rbind) -importFrom(DelayedArray,realize) importFrom(Matrix,rowSums) +importFrom(Matrix,sparseMatrix) importFrom(Matrix,sparseVector) importFrom(Matrix,t) importFrom(RBGL,sp.between) importFrom(Rarr,read_zarr_attributes) importFrom(Rarr,zarr_overview) -importFrom(S4Arrays,as.array.Array) importFrom(S4Vectors,"metadata<-") importFrom(S4Vectors,coolcat) importFrom(S4Vectors,isSequence) @@ -112,6 +112,8 @@ importFrom(SingleCellExperiment,"int_metadata<-") importFrom(SingleCellExperiment,SingleCellExperiment) importFrom(SingleCellExperiment,int_colData) importFrom(SingleCellExperiment,int_metadata) +importFrom(SummarizedExperiment,"assay<-") +importFrom(SummarizedExperiment,"assayNames<-") importFrom(SummarizedExperiment,"colData<-") importFrom(SummarizedExperiment,assay) importFrom(SummarizedExperiment,colData) @@ -150,12 +152,14 @@ importFrom(methods,setReplaceMethod) importFrom(reticulate,import) importFrom(sf,"st_geometry<-") importFrom(sf,st_as_sf) +importFrom(sf,st_bbox) importFrom(sf,st_coordinates) +importFrom(sf,st_crop) importFrom(sf,st_distance) importFrom(sf,st_geometry) importFrom(sf,st_geometry_type) -importFrom(sf,st_point) -importFrom(sf,st_sfc) +importFrom(sf,st_intersects) +importFrom(sf,st_polygon) importFrom(utils,.DollarNames) importFrom(utils,head) importFrom(utils,tail) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index c92b0aa0..5c9f6541 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -57,6 +57,10 @@ setGeneric("rotate", \(x, t, ...) standardGeneric("rotate")) setGeneric("transform", \(x, ...) standardGeneric("transform")) setGeneric("translation", \(x, t, ...) standardGeneric("translation")) +# sda ---- + +setGeneric("feature_key", \(x, ...) standardGeneric("feature_key")) + # uts ---- setGeneric("layer", \(x, i, ...) standardGeneric("layer")) diff --git a/R/Zattrs.R b/R/Zattrs.R index 2d69664c..d21bec29 100644 --- a/R/Zattrs.R +++ b/R/Zattrs.R @@ -1,6 +1,8 @@ #' @name Zattrs #' @title The `Zattrs` class -#' +#' +#' @aliases feature_key +#' #' @param x list extracted from a OME-NGFF compliant .zattrs file. #' @param name character string for extraction (see ?base::`$`). #' @@ -16,6 +18,8 @@ #' CTname(z) #' CTtype(z) #' CTdata(z, "scale") +#' +#' feature_key(point(x)) #' #' @export Zattrs <- \(x=list()) { @@ -75,3 +79,9 @@ setMethod("$", "Zattrs", \(x, name) x[[name]]) if (!is.null(cs)) coolcat("channels(%d): %s\n", cs) } setMethod("show", "Zattrs", .showZattrs) + +#' @export +setMethod("feature_key", "Zattrs", \(x) x$spatialdata_attrs$feature_key) + +#' @export +setMethod("feature_key", "SpatialDataElement", \(x) feature_key(meta(x))) \ No newline at end of file diff --git a/R/coord_utils.R b/R/coord_utils.R index 83451fc4..440ad77c 100644 --- a/R/coord_utils.R +++ b/R/coord_utils.R @@ -1,6 +1,6 @@ #' @name coord-utils #' @title Coordinate transformations -#' @aliases axes CTname CTtype CTdata CTpath CTgraph addCT rmvCT +#' @aliases axes CTlist CTname CTtype CTdata CTpath CTgraph addCT rmvCT #' #' @param x \code{SpatialData}, an element, or \code{Zattrs}. #' @param i for \code{CTpath}, source node label; else, string or diff --git a/R/mask.R b/R/mask.R index 9f62a532..7a6658a4 100644 --- a/R/mask.R +++ b/R/mask.R @@ -1,15 +1,21 @@ #' @name mask #' @title Masking #' -#' @description ... +#' @description +#' Masking operations serve to aggregate data across layers, e.g., +#' counting points in shapes, averaging image channels by labels, etc. +#' For added flexibility, these may be carried out directly between elements, +#' or using an input \code{SpatialData} object and specifying element names. #' #' @param x \code{\link{SpatialData}} object. #' @param i,j character string; names of elements to mask, #' specifically, \code{i} will be masked by \code{j}, #' adding a \code{table} for \code{j} in \code{x}. +#' @param how character string; statistic to use for masking. +#' @param name function use to generate the new \code{table}'s name. #' @param ... optional arguments passed to and from other methods. #' -#' @return \code{\link{SingleCellExperiment}} +#' @return Input \code{SpatialData} object \code{x} with an additional table. #' #' @examples #' library(SingleCellExperiment) @@ -17,88 +23,144 @@ #' x <- system.file(x, package="SpatialData") #' x <- readSpatialData(x, tables=FALSE) #' -#' # count points in circles -#' x <- mask(x, "blobs_points", "blobs_circles") -#' x <- mask(x, "blobs_image", "blobs_labels") -#' tables(x) +#' # count points in shapes +#' y <- mask(x, "blobs_points", "blobs_circles") +#' tail(tables(y), 1) +#' +#' # average image channels by labels +#' y <- mask(x, "blobs_image", "blobs_labels") +#' tail(tables(y), 1) #' +#' library(SpatialData.data) +#' x <- get_demo_SDdata("merfish") +#' x <- readSpatialData(x) +#' +#' # sum table counts by shapes +#' y <- mask(x, "cells", "anatomical") +#' tail(tables(y), 1) +#' #' @export NULL -# TODO: table from point + shape, image + label etc. etc. etc. +.check_ij <- \(x, .) stopifnot(length(.) == 1, is.character(.), . %in% unlist(colnames(x))) #' @rdname mask +#' @importFrom methods as +#' @importFrom SummarizedExperiment assay assay<- #' @importFrom SingleCellExperiment int_colData int_colData<- int_metadata<- #' @export -setMethod("mask", "SpatialData", \(x, i, j, ...) { - stopifnot(length(i) == 1, is.character(i), i %in% unlist(colnames(x))) - stopifnot(length(j) == 1, is.character(j), j %in% unlist(colnames(x))) - # get element types - ls <- vapply( - list(i, j), \(e) rownames(x)[vapply(colnames(x), - \(es) e %in% es, logical(1))], character(1)) - a <- element(x, ls[[1]], i) - b <- element(x, ls[[2]], j) - t <- .mask(a, b, ...) - md <- list(region=j, - region_key="region", - instance_key="instance") - int_metadata(t)$spatialdata_attrs <- md - cd <- data.frame(region=j, instance=colnames(t)) - int_colData(t) <- cbind(int_colData(t), cd) - nm <- paste0(i, "_masked_by_", j) - `table<-`(x, nm, value=t) +setMethod("mask", c("SpatialData", "ANY", "ANY"), \(x, i, j, + how=NULL, name=\(i, j) sprintf("%s_by_%s", i, j), ...) { + .check_ij(x, i); .check_ij(x, j) + #if (!is.null(how)) how <- match.arg(how, c("sum", "mean")) + ok <- is.character(name) && length(name) == 1 && !name %in% tableNames(x) + nm <- if (is.function(name)) name(i, j) else if (ok) name else stop( + "Invalid 'name'; should be a function or a ", + "character string not yet in 'tableNames(x)'") + f <- \(i) names(which(rapply(colnames(x), \(.) i %in% ., "character"))) + .i <- element(x, f(i), i) + .j <- element(x, f(j), j) + t <- tryCatch(error=\(.) NULL, getTable(x, i)) + se <- .mask(.i, .j, how=how, table=t, ...) + md <- list(region=j, region_key="region", instance_key="instance") + int_metadata(se)$spatialdata_attrs <- md + assay(se) <- as(assay(se), "dgCMatrix") + cd <- int_colData(se) + cd$region <- j + cd$instance <- colnames(se) + int_colData(se) <- cd + `table<-`(x, nm, value=se) }) -setGeneric(".mask", \(a, b, ...) standardGeneric(".mask")) +setGeneric(".mask", \(i, j, ...) standardGeneric(".mask")) +#' @noRd #' @importFrom methods as -#' @importFrom Matrix rowSums sparseVector t +#' @importFrom Matrix sparseVector +#' @importFrom SummarizedExperiment assayNames<- #' @importFrom SingleCellExperiment SingleCellExperiment -#' @importFrom sf st_as_sf st_geometry_type st_sfc st_point st_distance -setMethod(".mask", c("PointFrame", "ShapeFrame"), \(a, b) { - n <- nrow(b <- st_as_sf(data(b))) - fk <- meta(a)$spatialdata_attrs$feature_key - switch(paste(st_geometry_type(b)[1]), - POINT={ - # realize one feature at a time - is <- split(seq_len(length(a)), a[[fk]]) - ns <- lapply(is, \(.) { - # make points 'sf'-compliant - xy <- as.data.frame(a[., c("x", "y")]) - ps <- st_sfc(lapply(asplit(xy, 1), st_point)) - # for each circle, count points within radius - z <- rowSums(st_distance(b, ps) < b$radius) - # sparsify counts - sv <- sparseVector(z[i <- z > 0], which(i), n) - sm <- as(sv, "sparseMatrix") - }) - # collect intro matrix w/ dim. features x circles - ns <- t(as(do.call(cbind, ns), "dgCMatrix")) - rownames(ns) <- names(is) - colnames(ns) <- seq(ncol(ns)) - }) - SingleCellExperiment(list(counts=ns)) +setMethod(".mask", c("ImageArray", "LabelArray"), \(i, j, how=NULL, ...) { + if (is.null(how)) { how <- "mean"; message("Missing 'how'; defaulting to 'mean'") } + stopifnot(dim(i)[-1] == dim(j)) + .j <- as(data(j), "sparseVector") + .j <- as.vector(.j[ok <- .j > 0]) + mx <- apply(data(i), 1, \(.i) { + .i <- as(.i, "sparseVector") + .i <- as.vector(.i[ok]) + tapply(.i, .j, how) + }) + colnames(mx) <- channels(i) + se <- SingleCellExperiment(list(t(mx))) + assayNames(se) <- how + return(se) }) +#' @noRd #' @importFrom methods as -#' @importFrom DelayedArray realize -#' @importFrom S4Arrays as.array.Array +#' @importFrom Matrix t rowSums sparseVector sparseMatrix #' @importFrom SingleCellExperiment SingleCellExperiment -setMethod(".mask", c("ImageArray", "LabelArray"), \(a, b, fun=mean) { - # TODO: somehow rewrite w/o realizing everything - # at once (maybe w/ 'DelayedArray::blockApply'?) - .a2v <- \(.) as.vector(as.array.Array(.)) - stopifnot(dim(a)[-1] == dim(b)) - w <- .a2v(data(b)); w[w == 0] <- NA - n <- length(i <- unique(w[!is.na(w)])) - ns <- vapply(seq_len(dim(a)[1]), \(.) { - v <- .a2v(data(a, 1)[., , ]) - tapply(v, w, sum, na.rm=TRUE) - }, numeric(n)) - ns <- t(as(ns, "dgCMatrix")) - dimnames(ns) <- list(seq(dim(a)[1]), i) +#' @importFrom sf st_as_sf st_geometry_type st_distance +setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL, ...) { + if (!is.null(how)) warning("Can only count when masking points; ignoring 'how'") + n <- nrow(j <- st_as_sf(data(j))) + fun <- switch(as.character(st_geometry_type(j[1, ])), + POINT=\(i, j) rowSums(st_distance(j, i) <= j$radius), + \(i, j) vapply(st_intersects(j, i), length, integer(1))) + # realize one feature at i time + is <- split(seq_len(length(i)), i[[feature_key(i)]]) + ns <- lapply(is, \(.) { + # make points 'sf'-compliant + i <- as.data.frame(i[., c("x", "y")]) + i <- st_as_sf(i, coords=c("x", "y")) + # for each shape, count intersecting points + z <- fun(i, j) + # sparsify counts + sv <- sparseVector(z[i <- z > 0], which(i), n) + sm <- as(sv, "sparseMatrix") + }) + # collect into matrix w/ dim. features x shapes + ns <- t(do.call(cbind, ns)) + rownames(ns) <- names(is) + colnames(ns) <- seq(ncol(ns)) SingleCellExperiment(list(counts=ns)) }) -setMethod(".mask", c("ANY", "ANY"), \(a, b) - stop("'mask'ing between these element types not supported.")) + +#' @noRd +#' @importFrom methods as +#' @importFrom Matrix sparseMatrix +#' @importFrom SummarizedExperiment assay +#' @importFrom SingleCellExperiment SingleCellExperiment +setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, how=NULL, table=NULL, value=NULL, assay=1, ...) { + # validity + if (is.null(table)) stop("Missing 'table'; can't mask shapes without") + ok <- is.null(value) || (is.character(value) && all(value %in% rownames(table))) + if (!ok) stop("Invalid 'value'; should be in 'rownames(table(x, i))'") + if (is.null(how)) { how <- "sum"; message("Missing 'how'; defaulting to 'sum'") } + if (is.character(how)) how <- match.arg(how, c("sum", "mean", "detected", "prop.detected")) + # grouping + js <- st_intersects(st_as_sf(data(j)), st_as_sf(data(i))) + is <- factor(integer(nrow(i)), seq(0, nrow(j))) + is[unlist(js)] <- rep(seq_along(js), lengths(js)) + ns <- tabulate(is, ni <- nlevels(is)) + # aggregation + mx <- assay(table, assay) + if (grepl("detected$", how)) mx <- mx > 0 + my <- sparseMatrix( + x=rep(1, length(is)), + i=seq_along(is), j=is, + dims=c(ncol(table), ni)) + mx <- mx %*% my + if (grepl("mean|prop", how)) mx <- t(t(mx)/ns) + # wrangling + mx <- as(mx, "dgCMatrix") + colnames(mx) <- levels(is) + mx <- list(mx); names(mx) <- how + se <- SingleCellExperiment(mx) + nm <- paste0("n_", meta(table)$region) + se[[nm]] <- ns + return(se) +}) + +#' @noRd +setMethod(".mask", c("ANY", "ANY"), \(i, j, ...) + stop("'mask'ing between these element types not yet supported")) diff --git a/R/query.R b/R/query.R index d75183a6..f5fa6bd3 100644 --- a/R/query.R +++ b/R/query.R @@ -1,125 +1,184 @@ #' @name query #' @title spatial queries #' +#' @description Spatial queries serve to subset \code{SpatialData} elements +#' according to a rectangular bounding box or arbitrary polygonal shapes. +#' Queries rely on lesser-/greater-equal and \code{sf::st_intersects} for +#' spatial operations (i.e., instances that intersect the query region +#' in any way are kept). For circle shapes, radii are currently ignored +#' (i.e., a circle is kept if its centroid intersects the query region). +#' #' @param x \code{SpatialData} element. -#' @param j scalar character or integer; index or name of coordinate space. +#' @param y query specification; +#' bounding box: length-4 numeric list with names 'xmin/xmax/ymin/ymax' +#' (order is irrelevant); polygon: numeric matrix with ≥ 3 rows and 2 columns. +#' @param i for \code{SpatialData}, index or name of table to query. #' @param ... optional arguments passed to and from other methods. #' #' @return same as input #' #' @examples -#' x <- file.path("extdata", "blobs.zarr") -#' x <- system.file(x, package="SpatialData") -#' x <- readSpatialData(x, tables=FALSE) +#' zs <- file.path("extdata", "blobs.zarr") +#' zs <- system.file(zs, package="SpatialData") +#' sd <- readSpatialData(zs, tables=FALSE) +#' +#' # helper for visualizing point coordinates +#' .xy <- \(.) data.frame(data(.)[c("x", "y")]) +#' +#' # bounding box +#' y <- list(xmin=11, xmax=44, ymin=22, ymax=55) +#' q <- query(p <- point(sd), y) +#' +#' plot(.xy(p), asp=1) +#' points(.xy(q), col="red") +#' rect(y$xmin, y$ymin, y$xmax, y$ymax, border="blue") #' -#' image(x, "box") <- query(image(x), xmin=0, xmax=30, ymin=30, ymax=50) +#' # polygon +#' y <- rbind(c(20,10), c(50,30), c(20,50), c(30,30)) +#' q <- query(p <- point(sd), y) #' -#' image(x) -#' image(x, "box") +#' plot(.xy(p), asp=1) +#' points(.xy(q), col="red") +#' lines(rbind(y, y[1, ]), col="blue") +#' +#' # shapes that intersect the query region are kept +#' y <- rbind(c(30,45), c(40,45), c(35,50)) +#' t <- query(s <- shape(sd, 3), y) +#' +#' require(sf, quietly=TRUE) +#' df <- st_coordinates(st_as_sf(data(s))) +#' fd <- st_coordinates(st_as_sf(data(t))) +#' plot( +#' asp=1, xlim=c(15, 60), ylim=c(15, 60), +#' rbind(y, y[1, ]), type="l", col="blue") +#' foo <- by(df, df[, "L2"], \(x) points(x, type="b", col="black")) +#' foo <- by(fd, fd[, "L2"], \(x) points(x, type="b", col="red")) NULL -.check_bb <- \(args) { - m <- match(names(args), c("xmin", "xmax", "ymin", "ymax")) - if (any(is.na(m)) || !identical(sort(m), seq_len(4))) - stop("currently only supporting bounding box query;", - " please provide 'xmin/xmax/ymin/ymax' as ...") -} - #' @rdname query +#' @importFrom dplyr filter #' @export -setMethod("query", "SpatialData", \(x, j=NULL, ...) { - # check validity of dots - args <- list(...) - .check_bb(args) - # guess coordinate space - stopifnot(length(j) == 1) - j <- if (is.null(j)) { - .guess_space(x) - } else { - if (is.character(j)) { - match.arg(j, CTname(x)) - } else if (is.numeric(j)) { - stopifnot(j > 0, j == round(j)) - CTname(x)[j] - } +setMethod("query", "SpatialData", \(x, ..., i) { + # TODO: need more example data to properly implement this; + # for now, just a proof of concept using 'spatialdata_attrs' + if (missing(i)) i <- 1 + if (!length(tables(x))) + stop("There aren't any tables") + if (is.numeric(i)) { + i <- tableNames(x)[i] + } else if (is.character(i)) { + i <- match.arg(i, tableNames(x)) } - # execute query - for (l in rownames(x)) - for (e in colnames(x)[[l]]) - x[[l]][[e]] <- query(x[[l]][[e]], j, ...) - return(x) + t <- x$tables[[i]] + ns <- vapply(nm <- colnames(x), length, integer(1)) + nm <- data.frame(layer=rep.int(names(nm), ns), region=unlist(nm)) + nm <- filter(nm, ...) + i <- match(nm$layer, .LAYERS) + j <- split(nm$region, nm$layer) + x <- x[i, j] + x$tables$table <- t + return(x) }) +.check_box <- \(bb) { + xy <- c("xmin", "xmax", "ymin", "ymax") + ok <- c(is.list(bb), + length(bb) == 4, setequal(names(bb), xy), + bb$xmin <= bb$xmax, bb$ymin <= bb$ymax, + is.numeric(bb <- unlist(bb)), !is.na(bb)) + if (!all(ok)) stop( + "Invalid bounding box query; should be length-4 ", + "numeric list with names 'xmin/xmax/ymin/ymax'") +} + +.check_pol <- \(mx) { + ok <- c( + is.matrix(mx), is.numeric(mx), + nrow(mx) >= 3, ncol(mx) == 2, + !is.na(mx), is.finite(mx)) + if (!all(ok)) stop( + "Invalid polygon query; should be numeric matrix ", + "with ≥ 3 rows and 2 columns (= xy-coordinates)") + # ensure polygon is closed + top <- mx[1, ] + bot <- mx[nrow(mx), ] + if (!all(top == bot)) + mx <- rbind(mx, top) + dup <- duplicated(as.data.frame(mx[-1, , drop=FALSE])) + if (any(dup)) stop("Invalid polygon query; found duplicated vertices") + return(mx) +} + +.query_sdArray <- \(x, y) { + if (is.matrix(y)) stop( + "Polygon query not supported for ", + "element of type 'image/labelArray'") + .check_box(y) + # protect image channels (i.e., + # only query spatial dimensions) + n <- length(d <- dim(x)) + if (n == 3) d <- d[-1] + # assure query is within bounds + y$xmin <- max(y$xmin, 0) + y$ymin <- max(y$ymin, 0) + y$ymax <- min(y$ymax, d[1]) + y$xmax <- min(y$xmax, d[2]) + # subset spatial dimensions + i <- seq(y$ymin, y$ymax) + j <- seq(y$xmin, y$xmax) + if (n == 3) { + return(x[, i, j]) + } else { + return(x[i, j]) + } +} + #' @rdname query #' @export -setMethod("query", "ImageArray", \(x, j, ...) { - qu <- list(...) - .check_bb(qu) - if (missing(j)) j <- 1 - if (is.numeric(j)) j <- CTname(x)[j] - stopifnot(length(j) == 1) - . <- grep(j, CTname(x)) - if (!length(.) || is.na(.)) stop("invalid 'j'") - # transform query into target space - ts <- CTpath(x, j) - xy <- list(c(qu$xmin, qu$xmax), c(qu$ymin, qu$ymax)) - xy <- data.frame(xy); names(xy) <- c("x", "y") - xy <- .trans_xy(xy, ts, TRUE) - xy <- lapply(xy, \(.) as.list(round(.))) - x <- x[, # crop (i.e., subset) array dimensions 2-3 - do.call(seq, xy[[2]]), - do.call(seq, xy[[1]])] - # transform array dimensions into target space - os <- data.frame(x=c(0, dim(x)[3]), y=c(0, dim(x)[2])) - os <- vapply(.trans_xy(os, ts), min, numeric(1)) - # add transformation of type translation as to - # offset origin by difference between new & old - os <- unlist(qu[c("xmin", "ymin")]) - os - if (!all(os == 0)) x <- addCT(x, - name=j, type="translation", - data=c(0, os[2], os[1])) - return(x) -}) +setMethod("query", "ImageArray", \(x, y) .query_sdArray(x, y)) #' @rdname query #' @export -setMethod("query", "LabelArray", \(x, ...) { - args <- list(...) - .check_bb(args) - d <- dim(x) - if (args$ymax > d[1]) args$ymax <- d[1] - if (args$xmax > d[2]) args$xmax <- d[2] - a <- data(x)[ - seq(args$ymin, args$ymax), - seq(args$xmin, args$xmax)] - x@data <- a - return(x) -}) +setMethod("query", "LabelArray", \(x, y) .query_sdArray(x, y)) #' @rdname query +#' @importFrom sf st_as_sf st_intersects st_polygon st_bbox st_crop #' @export -setMethod("query", "ShapeFrame", \(x, ...) { - args <- list(...) - .check_bb(args) - df <- st_as_sf(data(x)) - xy <- st_coordinates(df) - i <- - xy[, 1] >= args$xmin & - xy[, 1] <= args$xmax & - xy[, 2] >= args$ymin & - xy[, 2] <= args$ymax - x@data <- data(x)[which(i), ] +setMethod("query", "ShapeFrame", \(x, y) { + # TODO: this will drop geometries where any coordinate + # is out of bounds; keep but crop to boundary region? + if (is.matrix(y)) { + # TODO: currently ignoring 'radius' for circles (i.e., + # query based on centroids only); what does Python do? + mx <- .check_pol(y) + sf <- st_as_sf(data(x)) + ok <- st_intersects(sf, st_polygon(list(mx)), sparse=FALSE) + x@data <- x@data[which(ok), ] + return(x) + } + # note: non-spatial attributes (e.g., radius) give warnings? + .check_box(y) + sf <- st_as_sf(data(x)) + bb <- st_bbox(unlist(y)) + suppressWarnings(sf <- st_crop(sf, bb)) + x@data <- sf[names(x)] return(x) }) #' @rdname query +#' @importFrom sf st_as_sf st_polygon st_intersects +#' @importFrom dplyr collect filter #' @export -setMethod("query", "PointFrame", \(x, j, ...) { - args <- list(...) - .check_bb(args) - y <- filter(x, - x >= args$xmin, x <= args$xmax, - y >= args$ymin, y <= args$ymax) - x@data <- y@data - return(x) +setMethod("query", "PointFrame", \(x, y) { + if (is.matrix(y)) { + mx <- .check_pol(y) + xy <- st_as_sf(collect(data(x)[c("x", "y")]), coords=c("x", "y")) + ok <- st_intersects(xy, st_polygon(list(mx)), sparse=FALSE) + return(x[which(ok[, 1])]) + } else { + .check_box(bb <- y) + filter(x, + x >= bb$xmin, x <= bb$xmax, + y >= bb$ymin, y <= bb$ymax) + } }) diff --git a/R/sdArray.R b/R/sdArray.R index 8e005e5f..c1893892 100644 --- a/R/sdArray.R +++ b/R/sdArray.R @@ -8,6 +8,8 @@ #' dim,LabelArray-method #' length,ImageArray-method #' length,LabelArray-method +#' data_type,ImageArray-method +#' data_type,LabelArray-method #' #' @param x \code{ImageArray} or \code{LabelArray} #' @param k scalar index specifying which scale to extract. diff --git a/inst/NEWS b/inst/NEWS index 4c35e07a..f1cf28bc 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,12 @@ +changes in version 0.99.27 + +- spatial queries (aka subsetting) by bounding box + (all element types) or polygons (points and shapes), + including unit tests, documentation, visual examples +- masking draft to aggregate information across layers + (image by label, point by shape, shape by shape; + the latter aggregates values in an associated table) + changes in version 0.99.26 - added unit tests for existing transformations diff --git a/man/Array-methods.Rd b/man/Array-methods.Rd index 1990122e..2be1a08a 100644 --- a/man/Array-methods.Rd +++ b/man/Array-methods.Rd @@ -8,6 +8,8 @@ \alias{dim,LabelArray-method} \alias{length,ImageArray-method} \alias{length,LabelArray-method} +\alias{data_type,ImageArray-method} +\alias{data_type,LabelArray-method} \alias{data,sdArray-method} \alias{dim,sdArray-method} \alias{length,sdArray-method} diff --git a/man/SpatialData.Rd b/man/SpatialData.Rd index 2eb34b55..5c16f970 100644 --- a/man/SpatialData.Rd +++ b/man/SpatialData.Rd @@ -50,8 +50,8 @@ \alias{element,SpatialData,ANY,numeric-method} \alias{element,SpatialData,ANY,missing-method} \alias{element,SpatialData,ANY,ANY-method} -\alias{[[<-,SpatialData,numeric,ANY-method} -\alias{[[<-,SpatialData,character,ANY-method} +\alias{[[<-,SpatialData,numeric,ANY,ANY-method} +\alias{[[<-,SpatialData,character,ANY,ANY-method} \title{The `SpatialData` class} \usage{ SpatialData(images, labels, points, shapes, tables) @@ -88,9 +88,9 @@ SpatialData(images, labels, points, shapes, tables) \S4method{element}{SpatialData,ANY,ANY}(x, i, j) -\S4method{[[}{SpatialData,numeric,ANY}(x, i) <- value +\S4method{[[}{SpatialData,numeric,ANY,ANY}(x, i) <- value -\S4method{[[}{SpatialData,character,ANY}(x, i) <- value +\S4method{[[}{SpatialData,character,ANY,ANY}(x, i) <- value } \arguments{ \item{images}{list of \code{\link{ImageArray}}s} diff --git a/man/Zattrs.Rd b/man/Zattrs.Rd index 366844b1..06ddea16 100644 --- a/man/Zattrs.Rd +++ b/man/Zattrs.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/Zattrs.R \name{Zattrs} \alias{Zattrs} +\alias{feature_key} \alias{$,Zattrs-method} \title{The `Zattrs` class} \usage{ @@ -31,4 +32,6 @@ CTname(z) CTtype(z) CTdata(z, "scale") +feature_key(point(x)) + } diff --git a/man/coord-utils.Rd b/man/coord-utils.Rd index b1d3efd7..aab39e96 100644 --- a/man/coord-utils.Rd +++ b/man/coord-utils.Rd @@ -3,6 +3,7 @@ \name{coord-utils} \alias{coord-utils} \alias{axes} +\alias{CTlist} \alias{CTname} \alias{CTtype} \alias{CTdata} diff --git a/man/mask.Rd b/man/mask.Rd index 2caa01cf..9c248c12 100644 --- a/man/mask.Rd +++ b/man/mask.Rd @@ -5,7 +5,14 @@ \alias{mask,SpatialData-method} \title{Masking} \usage{ -\S4method{mask}{SpatialData}(x, i, j, ...) +\S4method{mask}{SpatialData}( + x, + i, + j, + how = NULL, + name = function(i, j) sprintf("\%s_by_\%s", i, j), + ... +) } \arguments{ \item{x}{\code{\link{SpatialData}} object.} @@ -14,13 +21,20 @@ specifically, \code{i} will be masked by \code{j}, adding a \code{table} for \code{j} in \code{x}.} +\item{how}{character string; statistic to use for masking.} + +\item{name}{function use to generate the new \code{table}'s name.} + \item{...}{optional arguments passed to and from other methods.} } \value{ -\code{\link{SingleCellExperiment}} +Input \code{SpatialData} object \code{x} with an additional table. } \description{ -... +Masking operations serve to aggregate data across layers, e.g., +counting points in shapes, averaging image channels by labels, etc. +For added flexibility, these may be carried out directly between elements, +or using an input \code{SpatialData} object and specifying element names. } \examples{ library(SingleCellExperiment) @@ -28,9 +42,20 @@ x <- file.path("extdata", "blobs.zarr") x <- system.file(x, package="SpatialData") x <- readSpatialData(x, tables=FALSE) -# count points in circles -x <- mask(x, "blobs_points", "blobs_circles") -x <- mask(x, "blobs_image", "blobs_labels") -tables(x) +# count points in shapes +y <- mask(x, "blobs_points", "blobs_circles") +tail(tables(y), 1) + +# average image channels by labels +y <- mask(x, "blobs_image", "blobs_labels") +tail(tables(y), 1) + +library(SpatialData.data) +x <- get_demo_SDdata("merfish") +x <- readSpatialData(x) + +# sum table counts by shapes +y <- mask(x, "cells", "anatomical") +tail(tables(y), 1) } diff --git a/man/query.Rd b/man/query.Rd index 66b8b488..83d23cf4 100644 --- a/man/query.Rd +++ b/man/query.Rd @@ -9,36 +9,72 @@ \alias{query,PointFrame-method} \title{spatial queries} \usage{ -\S4method{query}{SpatialData}(x, j = NULL, ...) +\S4method{query}{SpatialData}(x, ..., i) -\S4method{query}{ImageArray}(x, j, ...) +\S4method{query}{ImageArray}(x, y) -\S4method{query}{LabelArray}(x, ...) +\S4method{query}{LabelArray}(x, y) -\S4method{query}{ShapeFrame}(x, ...) +\S4method{query}{ShapeFrame}(x, y) -\S4method{query}{PointFrame}(x, j, ...) +\S4method{query}{PointFrame}(x, y) } \arguments{ \item{x}{\code{SpatialData} element.} -\item{j}{scalar character or integer; index or name of coordinate space.} - \item{...}{optional arguments passed to and from other methods.} + +\item{i}{for \code{SpatialData}, index or name of table to query.} + +\item{y}{query specification; +bounding box: length-4 numeric list with names 'xmin/xmax/ymin/ymax' +(order is irrelevant); polygon: numeric matrix with ≥ 3 rows and 2 columns.} } \value{ same as input } \description{ -spatial queries +Spatial queries serve to subset \code{SpatialData} elements +according to a rectangular bounding box or arbitrary polygonal shapes. +Queries rely on lesser-/greater-equal and \code{sf::st_intersects} for +spatial operations (i.e., instances that intersect the query region +in any way are kept). For circle shapes, radii are currently ignored +(i.e., a circle is kept if its centroid intersects the query region). } \examples{ -x <- file.path("extdata", "blobs.zarr") -x <- system.file(x, package="SpatialData") -x <- readSpatialData(x, tables=FALSE) +zs <- file.path("extdata", "blobs.zarr") +zs <- system.file(zs, package="SpatialData") +sd <- readSpatialData(zs, tables=FALSE) + +# helper for visualizing point coordinates +.xy <- \(.) data.frame(data(.)[c("x", "y")]) + +# bounding box +y <- list(xmin=11, xmax=44, ymin=22, ymax=55) +q <- query(p <- point(sd), y) + +plot(.xy(p), asp=1) +points(.xy(q), col="red") +rect(y$xmin, y$ymin, y$xmax, y$ymax, border="blue") + +# polygon +y <- rbind(c(20,10), c(50,30), c(20,50), c(30,30)) +q <- query(p <- point(sd), y) + +plot(.xy(p), asp=1) +points(.xy(q), col="red") +lines(rbind(y, y[1, ]), col="blue") -image(x, "box") <- query(image(x), xmin=0, xmax=30, ymin=30, ymax=50) +# shapes that intersect the query region are kept +y <- rbind(c(30,45), c(40,45), c(35,50)) +t <- query(s <- shape(sd, 3), y) -image(x) -image(x, "box") +require(sf, quietly=TRUE) +df <- st_coordinates(st_as_sf(data(s))) +fd <- st_coordinates(st_as_sf(data(t))) +plot( + asp=1, xlim=c(15, 60), ylim=c(15, 60), + rbind(y, y[1, ]), type="l", col="blue") +foo <- by(df, df[, "L2"], \(x) points(x, type="b", col="black")) +foo <- by(fd, fd[, "L2"], \(x) points(x, type="b", col="red")) } diff --git a/tests/testthat/test-mask.R b/tests/testthat/test-mask.R index 9039bfe6..1eebe0d0 100644 --- a/tests/testthat/test-mask.R +++ b/tests/testthat/test-mask.R @@ -3,24 +3,66 @@ x <- file.path("extdata", "blobs.zarr") x <- system.file(x, package="SpatialData") x <- readSpatialData(x, anndataR=TRUE) -test_that("mask(),ImageArray,LabelArray", { +test_that("mask,unsupported", { + nm <- list( + c(imageNames(x)[1], imageNames(x)[2]), # image,image + c(labelNames(x)[1], labelNames(x)[2]), # label,label + c(labelNames(x)[1], imageNames(x)[1]), # label,image + c(shapeNames(x)[1], pointNames(x)[1])) # shape,point + for (ij in nm) expect_error(mask(x, ij[1], ij[2])) +}) + +test_that("mask,ImageArray,LabelArray", { i <- "blobs_image" j <- "blobs_labels" - x <- SpatialData::mask(x, i, j, fun=sum) + # reproduce example data + y <- mask(x, i, j, how="sum") expect_equivalent( - assay(SpatialData::table(x, 1)), - assay(SpatialData::table(x, 2))) + assay(tables(y)[[2]]), + assay(tables(x)[[1]])) + # default to 'mean' with a message + expect_message(y <- mask(x, i, j)) + expect_silent(z <- mask(x, i, j, how="mean")) + expect_identical(y, z) }) -test_that("mask(),PointFrame,ShapeFrame", { +test_that("mask,PointFrame,ShapeFrame", { i <- "blobs_points" j <- "blobs_circles" - x <- SpatialData::mask(x, i, j) - t <- getTable(x, j) - md <- meta(point(x, i)) - md <- md$spatialdata_attrs - fk <- md$feature_key - nr <- length(unique(point(x, i)[[fk]])) + y <- mask(x, i, j) + t <- getTable(y, j) + fk <- feature_key(p <- point(x, i)) + np <- length(unique(p[[fk]])) nc <- nrow(shape(x, j)) - expect_equal(dim(t), c(nr, nc)) + expect_equal(dim(t), c(np, nc)) + # ignore 'how' with a warning + expect_warning(mask(x, i, j, how="sum")) +}) + +require(SpatialData.data, quietly=TRUE) +x <- get_demo_SDdata("merfish") +x <- readSpatialData(x) + +test_that("mask,ShapeFrame,ShapeFrame", { + i <- "cells" + j <- "anatomical" + # error without 'table' + y <- x; tables(y) <- list() + expect_error(mask(y, i, j)) + # default to 'sum' with a message + expect_message(y <- mask(x, i, j)) + expect_silent(z <- mask(x, i, j, how="sum")) + expect_identical(y, z) + old <- getTable(y, i) + new <- getTable(y, j) + expect_equal(dim(new), c(nrow(old), nrow(shape(x, j))+1)) + expect_equal(sum(assay(new)), sum(assay(old))) + expect_identical(rownames(new), rownames(old)) + expect_identical(meta(new)$region, j) + # 'value' should be a character + # vector of rownames in 'table(x, i)' + v <- sample(rownames(old), 5) + new <- getTable(mask(x, i, j, value=v), j) + expect_equal(sum(assay(new[v, ])), sum(assay(old[v, ]))) + expect_error(mask(x, i, j, value=`[<-`(v, i=1, "x"))) }) diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index ad46ca48..7964b12a 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -1,45 +1,93 @@ -suppressPackageStartupMessages(library(sf)) +require(sf, quietly=TRUE) x <- file.path("extdata", "blobs.zarr") x <- system.file(x, package="SpatialData") -x <- readSpatialData(x, tables=FALSE) +x <- readSpatialData(x, anndataR=TRUE) -test_that("query,...", { - # extract 1st element from every layer - lys <- lapply(seq_len(4), \(.) x[.,1][[.]][[1]]) - for (y in lys) { - # missing bounding box coordinates - expect_error(query(y, xmin=0, xmax=1, ymin=0)) - # # invalid coordinate space - # expect_error(query(y, ".", xmin=0, xmax=1, ymin=0, ymax=1)) - # expect_error(query(y, 100, xmin=0, xmax=1, ymin=0, ymax=1)) - } +test_that("query,.check_box", { + # valid + q <- list( + list(xmin=0, xmax=1, ymin=0, ymax=1), + list(xmin=-1, xmax=0, ymin=-1, ymax=0), + list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)) + for (. in q) expect_silent(.check_box(.)) + # invalid + q <- list( + list(xmin=0, xmax=1, ymin=0), + list(xmin=1, xmax=0, ymin=1, ymax=0), + list(xmin=0, xmax=-1, ymin=0, ymax=-1), + list(xmin=0, xmax=1, ymin=10, ymax=NA), + list(xmin=Inf, xmax=-Inf, ymin=Inf, ymax=-Inf)) + for (. in q) expect_error(.check_box(.)) +}) + +test_that("query,.check_pol", { + # valid + q <- list( + m <- matrix(seq_len(8), 4, 2), + rbind(c(1,1), c(2,2), c(3,3)), # open + rbind(c(1,1), c(2,2), c(3,3), c(1,1))) + for (. in q) expect_silent(.check_pol(.)) + # invalid + q <- list( + matrix(seq_len(6), 2, 3), # wrong dim. + matrix(numeric(6), 3, 2), # duplicates + `[<-`(m, i=1, j=1, value=Inf), # not finite + `[<-`(m, i=1, j=1, value=NA)) # missing value + for (. in q) expect_error(.check_pol(.)) }) test_that("query,ImageArray", { d <- dim(i <- image(x)) - # neither crop nor shift - expect_identical(query(i, xmin=0, xmax=d[3], ymin=0, ymax=d[2]), i) + # query equals dimensions + y <- list(xmin=0, xmax=d[3], ymin=0, ymax=d[2]) + expect_identical(query(i, y), i) # order is irrelevant - expect_identical(query(i, ymax=d[2], xmax=d[3], xmin=0, ymin=0), i) + y <- list(ymax=d[2], xmax=d[3], xmin=0, ymin=0) + expect_identical(query(i, y), i) # crop but don't shift - j <- query(i, xmin=0, xmax=w <- d[3]/2, ymin=0, ymax=h <- d[2]/4) - expect_equal(dim(j), c(3, h, w)) + y <- list(xmin=0, xmax=w <- d[3]/2, ymin=0, ymax=h <- d[2]/4) + expect_equal(dim(j <- query(i, y)), c(3, h, w)) expect_identical(CTlist(i), CTlist(j)) # crop and shift - j <- query(i, xmin=1, xmax=w <- d[3]/2, ymin=2, ymax=h <- d[2]/4) - expect_equal(dim(j), c(3, 1+h-2, 1+w-1)) - expect_equal(CTtype(j), t <- "translation") - expect_equivalent(CTlist(j)[[1]][[t]][[1]], c(0, 2, 1)) + y <- list( + xmin=dx <- 3, xmax=w <- d[3]/2, + ymin=dy <- 5, ymax=h <- d[2]/4) + expect_equal(dim(query(i, y)), c(3, 1+h-dy, 1+w-dx)) + # non-finite boundaries + y <- list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + expect_silent(query(i, y)) +}) + +test_that("query,LabelArray", { + d <- dim(l <- label(x)) + # query equals dimensions + y <- list(xmin=0, xmax=d[2], ymin=0, ymax=d[1]) + expect_identical(query(l, y), l) + # order is irrelevant + y <- list(ymax=d[1], xmax=d[2], xmin=0, ymin=0) + expect_identical(query(l, y), l) + # crop but don't shift + y <- list(xmin=0, xmax=w <- d[2]/2, ymin=0, ymax=h <- d[1]/4) + expect_equal(dim(m <- query(l, y)), c(h, w)) + expect_identical(CTlist(l), CTlist(m)) + # crop and shift + y <- list( + xmin=dx <- 3, xmax=w <- d[2]/2, + ymin=dy <- 5, ymax=h <- d[1]/4) + expect_equal(dim(query(l, y)), c(1+h-dy, 1+w-dx)) + # non-finite boundaries + y <- list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + expect_silent(query(l, y)) }) -test_that("query,PointFrame", { +test_that("query-box,PointFrame", { n <- length(p <- point(x)) # this shouldn't do anything - q <- query(p, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + q <- query(p, list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)) expect_is(data(q), "arrow_dplyr_query") expect_identical(collect(data(p)), collect(data(q))) # this should drop everything - q <- query(p, xmin=Inf, xmax=-Inf, ymin=Inf, ymax=-Inf) + q <- query(p, list(xmin=0, xmax=1e-3, ymin=0, ymax=1e-3)) expect_equal(nrow(collect(data(q))), 0) # proper query bb <- lapply(c("x", "y"), \(.) { @@ -47,22 +95,44 @@ test_that("query,PointFrame", { d <- c((d <- diff(range(v)))/4, d/2) names(d) <- paste0(., c("min", "max")) as.list(d) }) |> Reduce(f=c) - q <- do.call(query, c(list(x=p), bb)) + q <- do.call(query, c(list(x=p), list(bb))) df <- collect(data(p)) fd <- collect(data(q)) - i <- df$x >= bb$xmin & df$x <= bb$xmax & + i <- + df$x >= bb$xmin & df$x <= bb$xmax & df$y >= bb$ymin & df$y <= bb$ymax expect_identical(df[i, ], fd) }) -test_that("query,ShapeFrame", { +test_that("query-pol,PointFrame", { + n <- length(p <- point(x)) + f <- \(.) collect(data(.)) + # mock all-inclusive query + xy <- rbind(c(0,0), c(0,1e6), c(1e6,0)) + expect_identical(f(query(p, xy)), f(p)) + # sample random points & + # query tiny polygon around them + replicate(5, { + i <- sample(n, 1) + xy <- c(p[i]$x, p[i]$y) + i <- p$x == xy[1] & p$y == xy[2] + xy <- rbind( + xy+c(0, d <- 1e-6), + xy+c(-d,-d), xy+c(+d,-d)) + q <- query(p, xy) + expect_length(q, sum(i)) + expect_identical(f(q), f(p[which(i)])) + }) +}) + +test_that("query-box,ShapeFrame", { n <- length(s <- shape(x)) # mock query without any effect - t <- query(s, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) - expect_equal(collect(data(s)), collect(data(t))) + t <- query(s, list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)) + expect_equal(nrow(data(t)), nrow(data(s))) # this should drop everything - t <- query(s, xmin=Inf, xmax=-Inf, ymin=Inf, ymax=-Inf) - expect_equal(nrow(collect(data(t))), 0) + t <- query(s, list(xmin=0, xmax=1e-3, ymin=0, ymax=1e-3)) + expect_equal(nrow(t), 0) # proper query xy <- st_coordinates(st_as_sf(data(s))) xy <- data.frame(xy); names(xy) <- c("x", "y") @@ -70,6 +140,26 @@ test_that("query,ShapeFrame", { bb <- lapply(xy, \(.) c(.-1e-9, .+1e-9)) bb <- data.frame(t(unlist(bb))) names(bb) <- c("xmin", "xmax", "ymin", "ymax") - t <- do.call(query, c(list(x=s), bb)) + t <- do.call(query, c(list(x=s), list(bb))) expect_equal(s[i], t) }) + +test_that("query-pol,ShapeFrame", { + n <- length(s <- shape(x)) + # mock all-inclusive query + xy <- rbind(c(0,0), c(0,1e6), c(1e6,0)) + expect_equal(query(s, xy), s) + # sample random shapes & + # query tiny polygon around them + xy <- st_coordinates(st_as_sf(data(s))) + replicate(5, { + i <- sample(n, 1) + xy <- xy[i, ] + xy <- rbind( + xy+c(0, d <- 1e-6), + xy+c(-d,-d), xy+c(+d,-d)) + t <- query(s, xy) + expect_length(t, 1) + expect_equal(t, s[i]) + }) +})