From d9e6ff76fa408198eddc30ceec1ae37302e5b907 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Mon, 6 Apr 2026 20:03:27 +0200 Subject: [PATCH 01/35] use sf_crop for query on shapes --- NAMESPACE | 1 + R/query.R | 79 +++++++++++++++---------------------- man/query.Rd | 14 +++---- tests/testthat/test-query.R | 10 ++--- 4 files changed, 43 insertions(+), 61 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e0822d18..d4318fe4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -151,6 +151,7 @@ importFrom(reticulate,import) importFrom(sf,"st_geometry<-") importFrom(sf,st_as_sf) importFrom(sf,st_coordinates) +importFrom(sf,st_crop) importFrom(sf,st_distance) importFrom(sf,st_geometry) importFrom(sf,st_geometry_type) diff --git a/R/query.R b/R/query.R index d75183a6..8bbb0a88 100644 --- a/R/query.R +++ b/R/query.R @@ -8,21 +8,24 @@ #' @return same as input #' #' @examples -#' x <- file.path("extdata", "blobs.zarr") -#' x <- system.file(x, package="SpatialData") -#' x <- readSpatialData(x, tables=FALSE) +#' zs <- file.path("extdata", "blobs.zarr") +#' zs <- system.file(zs, package="SpatialData") +#' sd <- readSpatialData(zs, tables=FALSE) #' -#' image(x, "box") <- query(image(x), xmin=0, xmax=30, ymin=30, ymax=50) +#' image(sd, "box") <- query(image(sd), xmin=0, xmax=30, ymin=30, ymax=50) #' -#' image(x) -#' image(x, "box") +#' image(sd) +#' image(sd, "box") NULL +# TODO: query with polygonal boundary region + .check_bb <- \(args) { m <- match(names(args), c("xmin", "xmax", "ymin", "ymax")) if (any(is.na(m)) || !identical(sort(m), seq_len(4))) stop("currently only supporting bounding box query;", " please provide 'xmin/xmax/ymin/ymax' as ...") + stopifnot(length(args) == 4, is.numeric(unlist(args))) } #' @rdname query @@ -52,33 +55,15 @@ setMethod("query", "SpatialData", \(x, j=NULL, ...) { #' @rdname query #' @export -setMethod("query", "ImageArray", \(x, j, ...) { - qu <- list(...) - .check_bb(qu) - if (missing(j)) j <- 1 - if (is.numeric(j)) j <- CTname(x)[j] - stopifnot(length(j) == 1) - . <- grep(j, CTname(x)) - if (!length(.) || is.na(.)) stop("invalid 'j'") - # transform query into target space - ts <- CTpath(x, j) - xy <- list(c(qu$xmin, qu$xmax), c(qu$ymin, qu$ymax)) - xy <- data.frame(xy); names(xy) <- c("x", "y") - xy <- .trans_xy(xy, ts, TRUE) - xy <- lapply(xy, \(.) as.list(round(.))) - x <- x[, # crop (i.e., subset) array dimensions 2-3 - do.call(seq, xy[[2]]), - do.call(seq, xy[[1]])] - # transform array dimensions into target space - os <- data.frame(x=c(0, dim(x)[3]), y=c(0, dim(x)[2])) - os <- vapply(.trans_xy(os, ts), min, numeric(1)) - # add transformation of type translation as to - # offset origin by difference between new & old - os <- unlist(qu[c("xmin", "ymin")]) - os - if (!all(os == 0)) x <- addCT(x, - name=j, type="translation", - data=c(0, os[2], os[1])) - return(x) +setMethod("query", "ImageArray", \(x, ...) { + args <- list(...) + .check_bb(args) + d <- dim(x) + args$ymax <- min(args$ymax, d[2]) + args$xmax <- min(args$xmax, d[3]) + j <- seq(args$ymin, args$ymax) + k <- seq(args$xmin, args$xmax) + return(x[, j, k]) }) #' @rdname query @@ -87,28 +72,26 @@ setMethod("query", "LabelArray", \(x, ...) { args <- list(...) .check_bb(args) d <- dim(x) - if (args$ymax > d[1]) args$ymax <- d[1] - if (args$xmax > d[2]) args$xmax <- d[2] - a <- data(x)[ - seq(args$ymin, args$ymax), - seq(args$xmin, args$xmax)] - x@data <- a - return(x) + args$ymax <- min(args$ymax, d[1]) + args$xmax <- min(args$xmax, d[2]) + i <- seq(args$ymin, args$ymax) + j <- seq(args$xmin, args$xmax) + return(x[i, j]) }) #' @rdname query +#' @importFrom sf st_as_sf st_crop #' @export setMethod("query", "ShapeFrame", \(x, ...) { + # TODO: this will drop geometries where any coordinate + # is out of bounds; keep but crop to boundary region? args <- list(...) .check_bb(args) - df <- st_as_sf(data(x)) - xy <- st_coordinates(df) - i <- - xy[, 1] >= args$xmin & - xy[, 1] <= args$xmax & - xy[, 2] >= args$ymin & - xy[, 2] <= args$ymax - x@data <- data(x)[which(i), ] + sf <- st_as_sf(data(x)) + bb <- st_bbox(unlist(args)) + # note: non-spatial attributes (e.g., radius) gives warnings? + suppressWarnings(sf <- st_crop(sf, bb)) + x@data <- sf[names(x)] return(x) }) diff --git a/man/query.Rd b/man/query.Rd index 66b8b488..bc799592 100644 --- a/man/query.Rd +++ b/man/query.Rd @@ -11,7 +11,7 @@ \usage{ \S4method{query}{SpatialData}(x, j = NULL, ...) -\S4method{query}{ImageArray}(x, j, ...) +\S4method{query}{ImageArray}(x, ...) \S4method{query}{LabelArray}(x, ...) @@ -33,12 +33,12 @@ same as input spatial queries } \examples{ -x <- file.path("extdata", "blobs.zarr") -x <- system.file(x, package="SpatialData") -x <- readSpatialData(x, tables=FALSE) +zs <- file.path("extdata", "blobs.zarr") +zs <- system.file(zs, package="SpatialData") +sd <- readSpatialData(zs, tables=FALSE) -image(x, "box") <- query(image(x), xmin=0, xmax=30, ymin=30, ymax=50) +image(sd, "box") <- query(image(sd), xmin=0, xmax=30, ymin=30, ymax=50) -image(x) -image(x, "box") +image(sd) +image(sd, "box") } diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index ad46ca48..bdee8e41 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -1,4 +1,4 @@ -suppressPackageStartupMessages(library(sf)) +require(sf, quietly=TRUE) x <- file.path("extdata", "blobs.zarr") x <- system.file(x, package="SpatialData") x <- readSpatialData(x, tables=FALSE) @@ -28,8 +28,6 @@ test_that("query,ImageArray", { # crop and shift j <- query(i, xmin=1, xmax=w <- d[3]/2, ymin=2, ymax=h <- d[2]/4) expect_equal(dim(j), c(3, 1+h-2, 1+w-1)) - expect_equal(CTtype(j), t <- "translation") - expect_equivalent(CTlist(j)[[1]][[t]][[1]], c(0, 2, 1)) }) test_that("query,PointFrame", { @@ -59,10 +57,10 @@ test_that("query,ShapeFrame", { n <- length(s <- shape(x)) # mock query without any effect t <- query(s, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) - expect_equal(collect(data(s)), collect(data(t))) + expect_equal(nrow(data(t)), nrow(data(s))) # this should drop everything - t <- query(s, xmin=Inf, xmax=-Inf, ymin=Inf, ymax=-Inf) - expect_equal(nrow(collect(data(t))), 0) + t <- query(s, xmin=0, xmax=1e-3, ymin=0, ymax=1e-3) + expect_equal(nrow(t), 0) # proper query xy <- st_coordinates(st_as_sf(data(s))) xy <- data.frame(xy); names(xy) <- c("x", "y") From 9a07c576642c7e0da9d632874010dfdd9cc96c00 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Mon, 6 Apr 2026 20:06:02 +0200 Subject: [PATCH 02/35] import sf::st_bbox --- NAMESPACE | 1 + R/query.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index d4318fe4..ddf360b1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -150,6 +150,7 @@ importFrom(methods,setReplaceMethod) importFrom(reticulate,import) importFrom(sf,"st_geometry<-") importFrom(sf,st_as_sf) +importFrom(sf,st_bbox) importFrom(sf,st_coordinates) importFrom(sf,st_crop) importFrom(sf,st_distance) diff --git a/R/query.R b/R/query.R index 8bbb0a88..ea4bf4e7 100644 --- a/R/query.R +++ b/R/query.R @@ -80,7 +80,7 @@ setMethod("query", "LabelArray", \(x, ...) { }) #' @rdname query -#' @importFrom sf st_as_sf st_crop +#' @importFrom sf st_as_sf st_bbox st_crop #' @export setMethod("query", "ShapeFrame", \(x, ...) { # TODO: this will drop geometries where any coordinate From a8670050dbcb3b8547cb67e47eab4228cef952d4 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Mon, 6 Apr 2026 20:06:23 +0200 Subject: [PATCH 03/35] fix comm typo --- R/query.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/query.R b/R/query.R index ea4bf4e7..64b38f67 100644 --- a/R/query.R +++ b/R/query.R @@ -89,7 +89,7 @@ setMethod("query", "ShapeFrame", \(x, ...) { .check_bb(args) sf <- st_as_sf(data(x)) bb <- st_bbox(unlist(args)) - # note: non-spatial attributes (e.g., radius) gives warnings? + # note: non-spatial attributes (e.g., radius) give warnings? suppressWarnings(sf <- st_crop(sf, bb)) x@data <- sf[names(x)] return(x) From 27a1b5c8f8a0f28abd0ac8351b6694d88a2a13bd Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Mon, 6 Apr 2026 22:24:37 +0200 Subject: [PATCH 04/35] +polygon query for point/shape --- NAMESPACE | 2 ++ R/query.R | 54 ++++++++++++++++++++++++++++++++++++++++------------ man/query.Rd | 2 +- 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index ddf360b1..1699d4d5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -156,7 +156,9 @@ importFrom(sf,st_crop) importFrom(sf,st_distance) importFrom(sf,st_geometry) importFrom(sf,st_geometry_type) +importFrom(sf,st_intersects) importFrom(sf,st_point) +importFrom(sf,st_polygon) importFrom(sf,st_sfc) importFrom(utils,.DollarNames) importFrom(utils,head) diff --git a/R/query.R b/R/query.R index 64b38f67..ce331df8 100644 --- a/R/query.R +++ b/R/query.R @@ -20,20 +20,34 @@ NULL # TODO: query with polygonal boundary region -.check_bb <- \(args) { +.check_box <- \(args) { m <- match(names(args), c("xmin", "xmax", "ymin", "ymax")) if (any(is.na(m)) || !identical(sort(m), seq_len(4))) stop("currently only supporting bounding box query;", " please provide 'xmin/xmax/ymin/ymax' as ...") stopifnot(length(args) == 4, is.numeric(unlist(args))) } +.check_pol <- \(mx) { + ok <- c( + is.matrix(mx), is.numeric(mx), + nrow(mx) >= 3, ncol(mx) == 2) + if (!all(ok)) stop( + "Invalid polygon query; should be a numeric matrix ", + "with ≥ 3 rows and 2 columns (= xy-coordinates)") + # ensure polygon is closed + top <- mx[1, ] + bot <- mx[nrow(mx), ] + if (!all(top == bot)) + mx <- rbind(mx, top) + return(mx) +} #' @rdname query #' @export setMethod("query", "SpatialData", \(x, j=NULL, ...) { # check validity of dots args <- list(...) - .check_bb(args) + .check_box(args) # guess coordinate space stopifnot(length(j) == 1) j <- if (is.null(j)) { @@ -57,7 +71,7 @@ setMethod("query", "SpatialData", \(x, j=NULL, ...) { #' @export setMethod("query", "ImageArray", \(x, ...) { args <- list(...) - .check_bb(args) + .check_box(args) d <- dim(x) args$ymax <- min(args$ymax, d[2]) args$xmax <- min(args$xmax, d[3]) @@ -70,7 +84,7 @@ setMethod("query", "ImageArray", \(x, ...) { #' @export setMethod("query", "LabelArray", \(x, ...) { args <- list(...) - .check_bb(args) + .check_box(args) d <- dim(x) args$ymax <- min(args$ymax, d[1]) args$xmax <- min(args$xmax, d[2]) @@ -80,13 +94,20 @@ setMethod("query", "LabelArray", \(x, ...) { }) #' @rdname query -#' @importFrom sf st_as_sf st_bbox st_crop +#' @importFrom sf st_as_sf st_intersects st_polygon st_bbox st_crop #' @export setMethod("query", "ShapeFrame", \(x, ...) { # TODO: this will drop geometries where any coordinate # is out of bounds; keep but crop to boundary region? args <- list(...) - .check_bb(args) + if (length(args) == 1) { + mx <- .check_pol(mx <- args[[1]]) + sf <- st_as_sf(data(x)) + ok <- st_intersects(sf, st_polygon(list(mx)), sparse=FALSE) + x@data <- x@data[which(ok), ] + return(x) + } + .check_box(args) sf <- st_as_sf(data(x)) bb <- st_bbox(unlist(args)) # note: non-spatial attributes (e.g., radius) give warnings? @@ -96,13 +117,22 @@ setMethod("query", "ShapeFrame", \(x, ...) { }) #' @rdname query +#' @importFrom sf st_as_sf st_polygon st_intersects +#' @importFrom dplyr collect #' @export -setMethod("query", "PointFrame", \(x, j, ...) { +setMethod("query", "PointFrame", \(x, ...) { args <- list(...) - .check_bb(args) - y <- filter(x, - x >= args$xmin, x <= args$xmax, - y >= args$ymin, y <= args$ymax) - x@data <- y@data + if (length(args) == 1) { + mx <- .check_pol(mx <- args[[1]]) + xy <- st_as_sf(collect(data(x)[c("x", "y")]), coords=c("x", "y")) + ok <- st_intersects(xy, st_polygon(list(mx)), sparse=FALSE) + x@data <- x@data[which(ok), ] + } else { + .check_box(args) + y <- filter(x, + x >= args$xmin, x <= args$xmax, + y >= args$ymin, y <= args$ymax) + x@data <- y@data + } return(x) }) diff --git a/man/query.Rd b/man/query.Rd index bc799592..81a4fea2 100644 --- a/man/query.Rd +++ b/man/query.Rd @@ -17,7 +17,7 @@ \S4method{query}{ShapeFrame}(x, ...) -\S4method{query}{PointFrame}(x, j, ...) +\S4method{query}{PointFrame}(x, ...) } \arguments{ \item{x}{\code{SpatialData} element.} From 699897df15c066783eeb67610b3e66ccc4b18984 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Mon, 6 Apr 2026 22:41:25 +0200 Subject: [PATCH 05/35] tests for poly query --- R/query.R | 4 +++- tests/testthat/test-query.R | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/R/query.R b/R/query.R index ce331df8..3e85ee38 100644 --- a/R/query.R +++ b/R/query.R @@ -101,6 +101,8 @@ setMethod("query", "ShapeFrame", \(x, ...) { # is out of bounds; keep but crop to boundary region? args <- list(...) if (length(args) == 1) { + # TODO: currently ignoring 'radius' for circles (i.e., + # query based on centroids only); what does Python do? mx <- .check_pol(mx <- args[[1]]) sf <- st_as_sf(data(x)) ok <- st_intersects(sf, st_polygon(list(mx)), sparse=FALSE) @@ -118,7 +120,7 @@ setMethod("query", "ShapeFrame", \(x, ...) { #' @rdname query #' @importFrom sf st_as_sf st_polygon st_intersects -#' @importFrom dplyr collect +#' @importFrom dplyr collect filter #' @export setMethod("query", "PointFrame", \(x, ...) { args <- list(...) diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index bdee8e41..7a3e63b1 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -30,7 +30,7 @@ test_that("query,ImageArray", { expect_equal(dim(j), c(3, 1+h-2, 1+w-1)) }) -test_that("query,PointFrame", { +test_that("query-box,PointFrame", { n <- length(p <- point(x)) # this shouldn't do anything q <- query(p, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) @@ -53,7 +53,23 @@ test_that("query,PointFrame", { expect_identical(df[i, ], fd) }) -test_that("query,ShapeFrame", { +test_that("query-pol,PointFrame", { + n <- length(p <- point(x)) + # sample random points & + # query tiny polygon around them + replicate(5, { + i <- sample(n, 1) + xy <- c(p[i]$x, p[i]$y) + xy <- rbind( + xy+c(0, d <- 1e-6), + xy+c(-d,-d), xy+c(+d,-d)) + q <- query(p, xy) + expect_length(q, 1) + expect_equal(q, p[i]) + }) +}) + +test_that("query-box,ShapeFrame", { n <- length(s <- shape(x)) # mock query without any effect t <- query(s, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) @@ -71,3 +87,20 @@ test_that("query,ShapeFrame", { t <- do.call(query, c(list(x=s), bb)) expect_equal(s[i], t) }) + +test_that("query-pol,ShapeFrame", { + n <- length(s <- shape(x)) + xy <- st_coordinates(st_as_sf(data(s))) + # sample random shapes & + # query tiny polygon around them + replicate(5, { + i <- sample(n, 1) + xy <- xy[i, ] + xy <- rbind( + xy+c(0, d <- 1e-6), + xy+c(-d,-d), xy+c(+d,-d)) + t <- query(s, xy) + expect_length(t, 1) + expect_equal(t, s[i]) + }) +}) From 557878865cb54029215a1f200513271739de1bd5 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Mon, 6 Apr 2026 22:59:22 +0200 Subject: [PATCH 06/35] cleaner box/pol validity --- R/query.R | 19 +++++++++++-------- tests/testthat/test-query.R | 8 +++++++- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/R/query.R b/R/query.R index 3e85ee38..ae296251 100644 --- a/R/query.R +++ b/R/query.R @@ -20,19 +20,22 @@ NULL # TODO: query with polygonal boundary region -.check_box <- \(args) { - m <- match(names(args), c("xmin", "xmax", "ymin", "ymax")) - if (any(is.na(m)) || !identical(sort(m), seq_len(4))) - stop("currently only supporting bounding box query;", - " please provide 'xmin/xmax/ymin/ymax' as ...") - stopifnot(length(args) == 4, is.numeric(unlist(args))) +.check_box <- \(bb) { + xy <- c("xmin", "xmax", "ymin", "ymax") + ok <- c( + length(bb) == 4, setequal(names(bb), xy), + is.numeric(bb <- unlist(bb)), !is.na(bb)) + if (!all(ok)) stop( + "Invalid bounding box query; should be length-4 ", + "numeric vector with names 'xmin/xmax/ymin/ymax'") } + .check_pol <- \(mx) { ok <- c( is.matrix(mx), is.numeric(mx), nrow(mx) >= 3, ncol(mx) == 2) if (!all(ok)) stop( - "Invalid polygon query; should be a numeric matrix ", + "Invalid polygon query; should be numeric matrix ", "with ≥ 3 rows and 2 columns (= xy-coordinates)") # ensure polygon is closed top <- mx[1, ] @@ -109,10 +112,10 @@ setMethod("query", "ShapeFrame", \(x, ...) { x@data <- x@data[which(ok), ] return(x) } + # note: non-spatial attributes (e.g., radius) give warnings? .check_box(args) sf <- st_as_sf(data(x)) bb <- st_bbox(unlist(args)) - # note: non-spatial attributes (e.g., radius) give warnings? suppressWarnings(sf <- st_crop(sf, bb)) x@data <- sf[names(x)] return(x) diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index 7a3e63b1..0e27dede 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -55,6 +55,9 @@ test_that("query-box,PointFrame", { test_that("query-pol,PointFrame", { n <- length(p <- point(x)) + # mock all-inclusive query + xy <- rbind(c(0,0), c(0,1e6), c(1e6,0)) + expect_equal(query(p, xy), p) # sample random points & # query tiny polygon around them replicate(5, { @@ -90,9 +93,12 @@ test_that("query-box,ShapeFrame", { test_that("query-pol,ShapeFrame", { n <- length(s <- shape(x)) - xy <- st_coordinates(st_as_sf(data(s))) + # mock all-inclusive query + xy <- rbind(c(0,0), c(0,1e6), c(1e6,0)) + expect_equal(query(s, xy), s) # sample random shapes & # query tiny polygon around them + xy <- st_coordinates(st_as_sf(data(s))) replicate(5, { i <- sample(n, 1) xy <- xy[i, ] From ba6f8e028035eff257279ccb76e1b9ae47656907 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Mon, 6 Apr 2026 23:13:18 +0200 Subject: [PATCH 07/35] bug fix test edge case --- tests/testthat/test-query.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index 0e27dede..191c3ca5 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -63,12 +63,13 @@ test_that("query-pol,PointFrame", { replicate(5, { i <- sample(n, 1) xy <- c(p[i]$x, p[i]$y) + i <- p$x == xy[1] & p$y == xy[2] xy <- rbind( xy+c(0, d <- 1e-6), xy+c(-d,-d), xy+c(+d,-d)) q <- query(p, xy) - expect_length(q, 1) - expect_equal(q, p[i]) + expect_length(q, sum(i)) + expect_equal(q, p[which(i)]) }) }) From 9c083b91a2daf0ff9d95c56e09923b8ca506a309 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Mon, 6 Apr 2026 23:20:47 +0200 Subject: [PATCH 08/35] make copi happier --- R/query.R | 4 ++-- tests/testthat/test-query.R | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/query.R b/R/query.R index ae296251..3e4bdec1 100644 --- a/R/query.R +++ b/R/query.R @@ -131,13 +131,13 @@ setMethod("query", "PointFrame", \(x, ...) { mx <- .check_pol(mx <- args[[1]]) xy <- st_as_sf(collect(data(x)[c("x", "y")]), coords=c("x", "y")) ok <- st_intersects(xy, st_polygon(list(mx)), sparse=FALSE) - x@data <- x@data[which(ok), ] + return(x[which(ok[, 1])]) } else { .check_box(args) y <- filter(x, x >= args$xmin, x <= args$xmax, y >= args$ymin, y <= args$ymax) x@data <- y@data + return(x) } - return(x) }) diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index 191c3ca5..7c3032b1 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -55,9 +55,10 @@ test_that("query-box,PointFrame", { test_that("query-pol,PointFrame", { n <- length(p <- point(x)) + f <- \(.) collect(data(.)) # mock all-inclusive query xy <- rbind(c(0,0), c(0,1e6), c(1e6,0)) - expect_equal(query(p, xy), p) + expect_identical(f(query(p, xy)), f(p)) # sample random points & # query tiny polygon around them replicate(5, { @@ -69,7 +70,7 @@ test_that("query-pol,PointFrame", { xy+c(-d,-d), xy+c(+d,-d)) q <- query(p, xy) expect_length(q, sum(i)) - expect_equal(q, p[which(i)]) + expect_identical(f(q), f(p[which(i)])) }) }) From 374a0664ddc106c27970b337e6e37bcb293334a3 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Tue, 7 Apr 2026 18:23:22 +0200 Subject: [PATCH 09/35] more tests; more validity; documentation --- R/query.R | 93 +++++++++++++++++++++---------------- man/query.Rd | 36 +++++++++----- tests/testthat/test-query.R | 82 +++++++++++++++++++++++--------- 3 files changed, 137 insertions(+), 74 deletions(-) diff --git a/R/query.R b/R/query.R index 3e4bdec1..f55faaca 100644 --- a/R/query.R +++ b/R/query.R @@ -1,9 +1,15 @@ #' @name query #' @title spatial queries #' +#' @description Spatial queries serve to subset \code{SpatialData} elements +#' according to a rectangular bounding box or arbitrary polygonal shapes. +#' Queries rely on lesser-/greater-equal and \code{sf::st_intersects} for +#' spatial operations, so that points on the boundary are included as well. +#' #' @param x \code{SpatialData} element. -#' @param j scalar character or integer; index or name of coordinate space. -#' @param ... optional arguments passed to and from other methods. +#' @param y query specification; +#' bounding box: length-4 numeric list with names 'xmin/xmax/ymin/ymax' +#' (order is irrelevant); polygon: numeric matrix with ≥ 3 rows and 2 columns. #' #' @return same as input #' @@ -12,28 +18,39 @@ #' zs <- system.file(zs, package="SpatialData") #' sd <- readSpatialData(zs, tables=FALSE) #' -#' image(sd, "box") <- query(image(sd), xmin=0, xmax=30, ymin=30, ymax=50) +#' # bounding box +#' y <- list(xmin=11, xmax=44, ymin=22, ymax=55) +#' q <- query(p <- point(sd), y) +#' +#' plot(data.frame(data(p))[c("x", "y")], asp=1) +#' points(data.frame(data(q))[c("x", "y")], col="red") +#' rect(y$xmin, y$ymin, y$xmax, y$ymax, border="blue") #' -#' image(sd) -#' image(sd, "box") +#' # polygon +#' y <- rbind(c(20,10), c(50,30), c(20,50), c(30,30)) +#' q <- query(p <- point(sd), y) +#' +#' plot(data.frame(data(p))[c("x", "y")], asp=1) +#' points(data.frame(data(q))[c("x", "y")], col="red") +#' lines(rbind(y, y[1, ]), col="blue") NULL -# TODO: query with polygonal boundary region - .check_box <- \(bb) { xy <- c("xmin", "xmax", "ymin", "ymax") - ok <- c( + ok <- c(is.list(bb), length(bb) == 4, setequal(names(bb), xy), + bb$xmin <= bb$xmax, bb$ymin <= bb$ymax, is.numeric(bb <- unlist(bb)), !is.na(bb)) if (!all(ok)) stop( "Invalid bounding box query; should be length-4 ", - "numeric vector with names 'xmin/xmax/ymin/ymax'") + "numeric list with names 'xmin/xmax/ymin/ymax'") } .check_pol <- \(mx) { ok <- c( is.matrix(mx), is.numeric(mx), - nrow(mx) >= 3, ncol(mx) == 2) + nrow(mx) >= 3, ncol(mx) == 2, + !is.na(mx), is.finite(mx)) if (!all(ok)) stop( "Invalid polygon query; should be numeric matrix ", "with ≥ 3 rows and 2 columns (= xy-coordinates)") @@ -42,6 +59,8 @@ NULL bot <- mx[nrow(mx), ] if (!all(top == bot)) mx <- rbind(mx, top) + dup <- any(duplicated(mx[-1, ])) + if (dup) stop("Invalid polygon query; found duplicated vertices") return(mx) } @@ -72,50 +91,47 @@ setMethod("query", "SpatialData", \(x, j=NULL, ...) { #' @rdname query #' @export -setMethod("query", "ImageArray", \(x, ...) { - args <- list(...) - .check_box(args) +setMethod("query", "ImageArray", \(x, y) { + .check_box(y) d <- dim(x) - args$ymax <- min(args$ymax, d[2]) - args$xmax <- min(args$xmax, d[3]) - j <- seq(args$ymin, args$ymax) - k <- seq(args$xmin, args$xmax) + y$ymax <- min(y$ymax, d[2]) + y$xmax <- min(y$xmax, d[3]) + j <- seq(y$ymin, y$ymax) + k <- seq(y$xmin, y$xmax) return(x[, j, k]) }) #' @rdname query #' @export -setMethod("query", "LabelArray", \(x, ...) { - args <- list(...) - .check_box(args) +setMethod("query", "LabelArray", \(x, y) { + .check_box(y) d <- dim(x) - args$ymax <- min(args$ymax, d[1]) - args$xmax <- min(args$xmax, d[2]) - i <- seq(args$ymin, args$ymax) - j <- seq(args$xmin, args$xmax) + y$ymax <- min(y$ymax, d[1]) + y$xmax <- min(y$xmax, d[2]) + i <- seq(y$ymin, y$ymax) + j <- seq(y$xmin, y$xmax) return(x[i, j]) }) #' @rdname query #' @importFrom sf st_as_sf st_intersects st_polygon st_bbox st_crop #' @export -setMethod("query", "ShapeFrame", \(x, ...) { +setMethod("query", "ShapeFrame", \(x, y) { # TODO: this will drop geometries where any coordinate # is out of bounds; keep but crop to boundary region? - args <- list(...) - if (length(args) == 1) { + if (is.matrix(y)) { # TODO: currently ignoring 'radius' for circles (i.e., # query based on centroids only); what does Python do? - mx <- .check_pol(mx <- args[[1]]) + mx <- .check_pol(y) sf <- st_as_sf(data(x)) ok <- st_intersects(sf, st_polygon(list(mx)), sparse=FALSE) x@data <- x@data[which(ok), ] return(x) } # note: non-spatial attributes (e.g., radius) give warnings? - .check_box(args) + .check_box(y) sf <- st_as_sf(data(x)) - bb <- st_bbox(unlist(args)) + bb <- st_bbox(unlist(y)) suppressWarnings(sf <- st_crop(sf, bb)) x@data <- sf[names(x)] return(x) @@ -125,19 +141,16 @@ setMethod("query", "ShapeFrame", \(x, ...) { #' @importFrom sf st_as_sf st_polygon st_intersects #' @importFrom dplyr collect filter #' @export -setMethod("query", "PointFrame", \(x, ...) { - args <- list(...) - if (length(args) == 1) { - mx <- .check_pol(mx <- args[[1]]) +setMethod("query", "PointFrame", \(x, y) { + if (is.matrix(y)) { + mx <- .check_pol(y) xy <- st_as_sf(collect(data(x)[c("x", "y")]), coords=c("x", "y")) ok <- st_intersects(xy, st_polygon(list(mx)), sparse=FALSE) return(x[which(ok[, 1])]) } else { - .check_box(args) - y <- filter(x, - x >= args$xmin, x <= args$xmax, - y >= args$ymin, y <= args$ymax) - x@data <- y@data - return(x) + .check_box(bb <- y) + filter(x, + x >= bb$xmin, x <= bb$xmax, + y >= bb$ymin, y <= bb$ymax) } }) diff --git a/man/query.Rd b/man/query.Rd index 81a4fea2..1931623c 100644 --- a/man/query.Rd +++ b/man/query.Rd @@ -11,34 +11,48 @@ \usage{ \S4method{query}{SpatialData}(x, j = NULL, ...) -\S4method{query}{ImageArray}(x, ...) +\S4method{query}{ImageArray}(x, y) -\S4method{query}{LabelArray}(x, ...) +\S4method{query}{LabelArray}(x, y) -\S4method{query}{ShapeFrame}(x, ...) +\S4method{query}{ShapeFrame}(x, y) -\S4method{query}{PointFrame}(x, ...) +\S4method{query}{PointFrame}(x, y) } \arguments{ \item{x}{\code{SpatialData} element.} -\item{j}{scalar character or integer; index or name of coordinate space.} - -\item{...}{optional arguments passed to and from other methods.} +\item{y}{query specification; +bounding box: length-4 numeric list with names 'xmin/xmax/ymin/ymax' +(order is irrelevant); polygon: numeric matrix with ≥ 3 rows and 2 columns.} } \value{ same as input } \description{ -spatial queries +Spatial queries serve to subset \code{SpatialData} elements +according to a rectangular bounding box or arbitrary polygonal shapes. +Queries rely on lesser-/greater-equal and \code{sf::st_intersects} for +spatial operations, so that points on the boundary are included as well. } \examples{ zs <- file.path("extdata", "blobs.zarr") zs <- system.file(zs, package="SpatialData") sd <- readSpatialData(zs, tables=FALSE) -image(sd, "box") <- query(image(sd), xmin=0, xmax=30, ymin=30, ymax=50) +# bounding box +y <- list(xmin=11, xmax=44, ymin=22, ymax=55) +q <- query(p <- point(sd), y) + +plot(data.frame(data(p))[c("x", "y")], asp=1) +points(data.frame(data(q))[c("x", "y")], col="red") +rect(y$xmin, y$ymin, y$xmax, y$ymax, border="blue") + +# polygon +y <- rbind(c(20,10), c(50,30), c(20,50), c(30,30)) +q <- query(p <- point(sd), y) -image(sd) -image(sd, "box") +plot(data.frame(data(p))[c("x", "y")], asp=1) +points(data.frame(data(q))[c("x", "y")], col="red") +lines(rbind(y, y[1, ]), col="blue") } diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index 7c3032b1..5b2ae3f5 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -3,41 +3,76 @@ x <- file.path("extdata", "blobs.zarr") x <- system.file(x, package="SpatialData") x <- readSpatialData(x, tables=FALSE) -test_that("query,...", { - # extract 1st element from every layer - lys <- lapply(seq_len(4), \(.) x[.,1][[.]][[1]]) - for (y in lys) { - # missing bounding box coordinates - expect_error(query(y, xmin=0, xmax=1, ymin=0)) - # # invalid coordinate space - # expect_error(query(y, ".", xmin=0, xmax=1, ymin=0, ymax=1)) - # expect_error(query(y, 100, xmin=0, xmax=1, ymin=0, ymax=1)) - } +test_that("query,.check_box", { + # valid + q <- list( + list(xmin=0, xmax=1, ymin=0, ymax=1), + list(xmin=-1, xmax=0, ymin=-1, ymax=0), + list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)) + for (. in q) expect_silent(.check_box(.)) + # invalid + q <- list( + list(xmin=0, xmax=1, ymin=0), + list(xmin=1, xmax=0, ymin=1, ymax=0), + list(xmin=0, xmax=-1, ymin=0, ymax=-1), + list(xmin=0, xmax=1, ymin=10, ymax=NA), + list(xmin=Inf, xmax=-Inf, ymin=Inf, ymax=-Inf)) + for (. in q) expect_error(.check_box(.)) }) +test_that("query,.check_pol", { + # valid + q <- list( + m <- matrix(seq_len(8), 4, 2), + rbind(c(1,1), c(2,2), c(3,3)), # open + rbind(c(1,1), c(2,2), c(3,3), c(1,1))) + for (. in q) expect_silent(.check_pol(.)) + # invalid + q <- list( + `[<-`(m, i=1, j=1, value=NA), # missing value + `[<-`(m, i=1, j=1, value=Inf), # not finite + matrix(numeric(6), 3, 2), # duplicates + matrix(seq_len(6), 2, 3)) # wrong dim. + for (. in q) expect_error(.check_pol(.)) +}) + +# test_that("query,...", { +# # extract 1st element from every layer +# lys <- lapply(seq_len(4), \(.) x[.,1][[.]][[1]]) +# for (y in lys) { +# # missing bounding box coordinates +# expect_error(query(y, xmin=0, xmax=1, ymin=0)) +# # # invalid coordinate space +# # expect_error(query(y, ".", xmin=0, xmax=1, ymin=0, ymax=1)) +# # expect_error(query(y, 100, xmin=0, xmax=1, ymin=0, ymax=1)) +# } +# }) + test_that("query,ImageArray", { d <- dim(i <- image(x)) # neither crop nor shift - expect_identical(query(i, xmin=0, xmax=d[3], ymin=0, ymax=d[2]), i) + y <- list(xmin=0, xmax=d[3], ymin=0, ymax=d[2]) + expect_identical(query(i, y), i) # order is irrelevant - expect_identical(query(i, ymax=d[2], xmax=d[3], xmin=0, ymin=0), i) + y <- list(ymax=d[2], xmax=d[3], xmin=0, ymin=0) + expect_identical(query(i, y), i) # crop but don't shift - j <- query(i, xmin=0, xmax=w <- d[3]/2, ymin=0, ymax=h <- d[2]/4) - expect_equal(dim(j), c(3, h, w)) + y <- list(xmin=0, xmax=w <- d[3]/2, ymin=0, ymax=h <- d[2]/4) + expect_equal(dim(j <- query(i, y)), c(3, h, w)) expect_identical(CTlist(i), CTlist(j)) # crop and shift - j <- query(i, xmin=1, xmax=w <- d[3]/2, ymin=2, ymax=h <- d[2]/4) - expect_equal(dim(j), c(3, 1+h-2, 1+w-1)) + y <- list(xmin=1, xmax=w <- d[3]/2, ymin=2, ymax=h <- d[2]/4) + expect_equal(dim(query(i, y)), c(3, 1+h-2, 1+w-1)) }) test_that("query-box,PointFrame", { n <- length(p <- point(x)) # this shouldn't do anything - q <- query(p, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + q <- query(p, list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)) expect_is(data(q), "arrow_dplyr_query") expect_identical(collect(data(p)), collect(data(q))) # this should drop everything - q <- query(p, xmin=Inf, xmax=-Inf, ymin=Inf, ymax=-Inf) + q <- query(p, list(xmin=0, xmax=1e-3, ymin=0, ymax=1e-3)) expect_equal(nrow(collect(data(q))), 0) # proper query bb <- lapply(c("x", "y"), \(.) { @@ -45,10 +80,11 @@ test_that("query-box,PointFrame", { d <- c((d <- diff(range(v)))/4, d/2) names(d) <- paste0(., c("min", "max")) as.list(d) }) |> Reduce(f=c) - q <- do.call(query, c(list(x=p), bb)) + q <- do.call(query, c(list(x=p), list(bb))) df <- collect(data(p)) fd <- collect(data(q)) - i <- df$x >= bb$xmin & df$x <= bb$xmax & + i <- + df$x >= bb$xmin & df$x <= bb$xmax & df$y >= bb$ymin & df$y <= bb$ymax expect_identical(df[i, ], fd) }) @@ -77,10 +113,10 @@ test_that("query-pol,PointFrame", { test_that("query-box,ShapeFrame", { n <- length(s <- shape(x)) # mock query without any effect - t <- query(s, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + t <- query(s, list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)) expect_equal(nrow(data(t)), nrow(data(s))) # this should drop everything - t <- query(s, xmin=0, xmax=1e-3, ymin=0, ymax=1e-3) + t <- query(s, list(xmin=0, xmax=1e-3, ymin=0, ymax=1e-3)) expect_equal(nrow(t), 0) # proper query xy <- st_coordinates(st_as_sf(data(s))) @@ -89,7 +125,7 @@ test_that("query-box,ShapeFrame", { bb <- lapply(xy, \(.) c(.-1e-9, .+1e-9)) bb <- data.frame(t(unlist(bb))) names(bb) <- c("xmin", "xmax", "ymin", "ymax") - t <- do.call(query, c(list(x=s), bb)) + t <- do.call(query, c(list(x=s), list(bb))) expect_equal(s[i], t) }) From 29f51a31e6dcfced4f2bbb177e08528ccc66d05a Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Tue, 7 Apr 2026 18:48:40 +0200 Subject: [PATCH 10/35] more docs --- R/query.R | 16 ++++++++++++++++ man/query.Rd | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/R/query.R b/R/query.R index f55faaca..daa768ba 100644 --- a/R/query.R +++ b/R/query.R @@ -5,6 +5,9 @@ #' according to a rectangular bounding box or arbitrary polygonal shapes. #' Queries rely on lesser-/greater-equal and \code{sf::st_intersects} for #' spatial operations, so that points on the boundary are included as well. +#' Note: shape queries ignore non-spatial attributes (e.g., radius for circles) +#' such that a circle is included if its centroid intersects the query region; +#' similarly, a polygon is included if any vertex intersects the query region. #' #' @param x \code{SpatialData} element. #' @param y query specification; @@ -33,6 +36,19 @@ #' plot(data.frame(data(p))[c("x", "y")], asp=1) #' points(data.frame(data(q))[c("x", "y")], col="red") #' lines(rbind(y, y[1, ]), col="blue") +#' +#' # shapes that intersect the query region are kept +#' y <- rbind(c(30,45), c(40,45), c(35,50)) +#' t <- query(s <- shape(sd, 3), y) +#' +#' require(sf, quietly=TRUE) +#' df <- st_coordinates(st_as_sf(data(s))) +#' fd <- st_coordinates(st_as_sf(data(t))) +#' plot( +#' asp=1, xlim=c(15, 60), ylim=c(15, 60), +#' rbind(pol, pol[1, ]), type="l", col="blue") +#' foo <- by(df, df[, "L2"], \(x) points(x, type="b", col="black")) +#' foo <- by(fd, fd[, "L2"], \(x) points(x, type="b", col="red")) NULL .check_box <- \(bb) { diff --git a/man/query.Rd b/man/query.Rd index 1931623c..2076a792 100644 --- a/man/query.Rd +++ b/man/query.Rd @@ -34,6 +34,9 @@ Spatial queries serve to subset \code{SpatialData} elements according to a rectangular bounding box or arbitrary polygonal shapes. Queries rely on lesser-/greater-equal and \code{sf::st_intersects} for spatial operations, so that points on the boundary are included as well. +Note: shape queries ignore non-spatial attributes (e.g., radius for circles) +such that a circle is included if its centroid intersects the query region; +similarly, a polygon is included if any vertex intersects the query region. } \examples{ zs <- file.path("extdata", "blobs.zarr") @@ -55,4 +58,17 @@ q <- query(p <- point(sd), y) plot(data.frame(data(p))[c("x", "y")], asp=1) points(data.frame(data(q))[c("x", "y")], col="red") lines(rbind(y, y[1, ]), col="blue") + +# shapes that intersect the query region are kept +y <- rbind(c(30,45), c(40,45), c(35,50)) +t <- query(s <- shape(sd, 3), y) + +require(sf, quietly=TRUE) +df <- st_coordinates(st_as_sf(data(s))) +fd <- st_coordinates(st_as_sf(data(t))) +plot( + asp=1, xlim=c(15, 60), ylim=c(15, 60), + rbind(pol, pol[1, ]), type="l", col="blue") +foo <- by(df, df[, "L2"], \(x) points(x, type="b", col="black")) +foo <- by(fd, fd[, "L2"], \(x) points(x, type="b", col="red")) } From d8130ed97429bf947a28e478f632397d3ad0afd3 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Tue, 7 Apr 2026 19:24:14 +0200 Subject: [PATCH 11/35] cleaner docs --- R/query.R | 45 ++++++++++--------------------------- man/query.Rd | 21 +++++++++-------- tests/testthat/test-query.R | 12 ---------- 3 files changed, 22 insertions(+), 56 deletions(-) diff --git a/R/query.R b/R/query.R index daa768ba..fdf5fe54 100644 --- a/R/query.R +++ b/R/query.R @@ -4,10 +4,9 @@ #' @description Spatial queries serve to subset \code{SpatialData} elements #' according to a rectangular bounding box or arbitrary polygonal shapes. #' Queries rely on lesser-/greater-equal and \code{sf::st_intersects} for -#' spatial operations, so that points on the boundary are included as well. -#' Note: shape queries ignore non-spatial attributes (e.g., radius for circles) -#' such that a circle is included if its centroid intersects the query region; -#' similarly, a polygon is included if any vertex intersects the query region. +#' spatial operations (i.e., instances that intersect the query region +#' in any way are kept). For circle shapes, radii are currently ignored +#' (i.e., a circle is kept if its centroid intersects the query region). #' #' @param x \code{SpatialData} element. #' @param y query specification; @@ -21,20 +20,23 @@ #' zs <- system.file(zs, package="SpatialData") #' sd <- readSpatialData(zs, tables=FALSE) #' +#' # helper for visualizing point coordinates +#' .xy <- \(.) data.frame(data(.)[c("x", "y")]) +#' #' # bounding box #' y <- list(xmin=11, xmax=44, ymin=22, ymax=55) #' q <- query(p <- point(sd), y) #' -#' plot(data.frame(data(p))[c("x", "y")], asp=1) -#' points(data.frame(data(q))[c("x", "y")], col="red") +#' plot(.xy(p), asp=1) +#' points(.xy(q), col="red") #' rect(y$xmin, y$ymin, y$xmax, y$ymax, border="blue") #' #' # polygon #' y <- rbind(c(20,10), c(50,30), c(20,50), c(30,30)) #' q <- query(p <- point(sd), y) #' -#' plot(data.frame(data(p))[c("x", "y")], asp=1) -#' points(data.frame(data(q))[c("x", "y")], col="red") +#' plot(.xy(p), asp=1) +#' points(.xy(q), col="red") #' lines(rbind(y, y[1, ]), col="blue") #' #' # shapes that intersect the query region are kept @@ -80,34 +82,10 @@ NULL return(mx) } -#' @rdname query -#' @export -setMethod("query", "SpatialData", \(x, j=NULL, ...) { - # check validity of dots - args <- list(...) - .check_box(args) - # guess coordinate space - stopifnot(length(j) == 1) - j <- if (is.null(j)) { - .guess_space(x) - } else { - if (is.character(j)) { - match.arg(j, CTname(x)) - } else if (is.numeric(j)) { - stopifnot(j > 0, j == round(j)) - CTname(x)[j] - } - } - # execute query - for (l in rownames(x)) - for (e in colnames(x)[[l]]) - x[[l]][[e]] <- query(x[[l]][[e]], j, ...) - return(x) -}) - #' @rdname query #' @export setMethod("query", "ImageArray", \(x, y) { + if (is.matrix(y)) stop("Polygon query not supported for images") .check_box(y) d <- dim(x) y$ymax <- min(y$ymax, d[2]) @@ -120,6 +98,7 @@ setMethod("query", "ImageArray", \(x, y) { #' @rdname query #' @export setMethod("query", "LabelArray", \(x, y) { + if (is.matrix(y)) stop("Polygon query not supported for labels") .check_box(y) d <- dim(x) y$ymax <- min(y$ymax, d[1]) diff --git a/man/query.Rd b/man/query.Rd index 2076a792..f7455ecf 100644 --- a/man/query.Rd +++ b/man/query.Rd @@ -2,15 +2,12 @@ % Please edit documentation in R/query.R \name{query} \alias{query} -\alias{query,SpatialData-method} \alias{query,ImageArray-method} \alias{query,LabelArray-method} \alias{query,ShapeFrame-method} \alias{query,PointFrame-method} \title{spatial queries} \usage{ -\S4method{query}{SpatialData}(x, j = NULL, ...) - \S4method{query}{ImageArray}(x, y) \S4method{query}{LabelArray}(x, y) @@ -33,30 +30,32 @@ same as input Spatial queries serve to subset \code{SpatialData} elements according to a rectangular bounding box or arbitrary polygonal shapes. Queries rely on lesser-/greater-equal and \code{sf::st_intersects} for -spatial operations, so that points on the boundary are included as well. -Note: shape queries ignore non-spatial attributes (e.g., radius for circles) -such that a circle is included if its centroid intersects the query region; -similarly, a polygon is included if any vertex intersects the query region. +spatial operations (i.e., instances that intersect the query region +in any way are kept). For circle shapes, radii are currently ignored +(i.e., a circle is kept if its centroid intersects the query region). } \examples{ zs <- file.path("extdata", "blobs.zarr") zs <- system.file(zs, package="SpatialData") sd <- readSpatialData(zs, tables=FALSE) +# helper for visualizing point coordinates +.xy <- \(.) data.frame(data(.)[c("x", "y")]) + # bounding box y <- list(xmin=11, xmax=44, ymin=22, ymax=55) q <- query(p <- point(sd), y) -plot(data.frame(data(p))[c("x", "y")], asp=1) -points(data.frame(data(q))[c("x", "y")], col="red") +plot(.xy(p), asp=1) +points(.xy(q), col="red") rect(y$xmin, y$ymin, y$xmax, y$ymax, border="blue") # polygon y <- rbind(c(20,10), c(50,30), c(20,50), c(30,30)) q <- query(p <- point(sd), y) -plot(data.frame(data(p))[c("x", "y")], asp=1) -points(data.frame(data(q))[c("x", "y")], col="red") +plot(.xy(p), asp=1) +points(.xy(q), col="red") lines(rbind(y, y[1, ]), col="blue") # shapes that intersect the query region are kept diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index 5b2ae3f5..73637861 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -36,18 +36,6 @@ test_that("query,.check_pol", { for (. in q) expect_error(.check_pol(.)) }) -# test_that("query,...", { -# # extract 1st element from every layer -# lys <- lapply(seq_len(4), \(.) x[.,1][[.]][[1]]) -# for (y in lys) { -# # missing bounding box coordinates -# expect_error(query(y, xmin=0, xmax=1, ymin=0)) -# # # invalid coordinate space -# # expect_error(query(y, ".", xmin=0, xmax=1, ymin=0, ymax=1)) -# # expect_error(query(y, 100, xmin=0, xmax=1, ymin=0, ymax=1)) -# } -# }) - test_that("query,ImageArray", { d <- dim(i <- image(x)) # neither crop nor shift From 319b0bb12e108fea56a6382b05cbe937e7db7d8b Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Tue, 7 Apr 2026 19:26:50 +0200 Subject: [PATCH 12/35] track changes --- inst/NEWS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/inst/NEWS b/inst/NEWS index 4c35e07a..59a27e8b 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,9 @@ +changes in version 0.99.27 + +- spatial queries (aka subsetting) by bounding box + (all element types) or polygons (points and shapes), + including unit tests, documentation, visual examples + changes in version 0.99.26 - added unit tests for existing transformations From 5676dd0e31039610248d89f9c051c712708f89b7 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Tue, 7 Apr 2026 19:26:57 +0200 Subject: [PATCH 13/35] v0.99.27 bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a3c52e55..31e61414 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: SpatialData Title: Representation of Python's SpatialData in R Depends: R (>= 4.5) -Version: 0.99.26 +Version: 0.99.27 Description: Interface to Python's 'SpatialData', currently including: reticulate-based use of 'spatialdata-io' for reading of manufacturer data and writing to .zarr, on-disk representation of images/labels as From 334841791cb763a31dbbdf59d2467f909622fdcb Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 00:01:54 +0200 Subject: [PATCH 14/35] init mask revision --- NAMESPACE | 2 - R/AllGenerics.R | 4 ++ R/Zattrs.R | 3 + R/mask.R | 113 +++++++++++++++++++----------------- R/query.R | 25 ++++++++ man/mask.Rd | 24 +++++--- man/query.Rd | 3 + tests/testthat/test-query.R | 2 +- 8 files changed, 112 insertions(+), 64 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 1699d4d5..c39d44ee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -157,9 +157,7 @@ importFrom(sf,st_distance) importFrom(sf,st_geometry) importFrom(sf,st_geometry_type) importFrom(sf,st_intersects) -importFrom(sf,st_point) importFrom(sf,st_polygon) -importFrom(sf,st_sfc) importFrom(utils,.DollarNames) importFrom(utils,head) importFrom(utils,tail) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index c92b0aa0..5c9f6541 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -57,6 +57,10 @@ setGeneric("rotate", \(x, t, ...) standardGeneric("rotate")) setGeneric("transform", \(x, ...) standardGeneric("transform")) setGeneric("translation", \(x, t, ...) standardGeneric("translation")) +# sda ---- + +setGeneric("feature_key", \(x, ...) standardGeneric("feature_key")) + # uts ---- setGeneric("layer", \(x, i, ...) standardGeneric("layer")) diff --git a/R/Zattrs.R b/R/Zattrs.R index 2d69664c..dc42e53d 100644 --- a/R/Zattrs.R +++ b/R/Zattrs.R @@ -75,3 +75,6 @@ setMethod("$", "Zattrs", \(x, name) x[[name]]) if (!is.null(cs)) coolcat("channels(%d): %s\n", cs) } setMethod("show", "Zattrs", .showZattrs) + +setMethod("feature_key", "Zattrs", \(x) x$spatialdata_attrs$feature_key) +setMethod("feature_key", "SpatialDataElement", \(x) feature_key(meta(x))) \ No newline at end of file diff --git a/R/mask.R b/R/mask.R index 9f62a532..9285febe 100644 --- a/R/mask.R +++ b/R/mask.R @@ -7,9 +7,14 @@ #' @param i,j character string; names of elements to mask, #' specifically, \code{i} will be masked by \code{j}, #' adding a \code{table} for \code{j} in \code{x}. +#' @param how character string; statistic to use for masking. +#' Defaults to "mean" when masking images, ignored when masking points. #' @param ... optional arguments passed to and from other methods. #' -#' @return \code{\link{SingleCellExperiment}} +#' @return +#' Input \code{SpatialData} object \code{x} with an additional table named +#' \code{_masking_}; or a \code{SingleCellExperiment} object when +#' masking elements directly (i.e., without \code{x} as input). #' #' @examples #' library(SingleCellExperiment) @@ -17,10 +22,15 @@ #' x <- system.file(x, package="SpatialData") #' x <- readSpatialData(x, tables=FALSE) #' -#' # count points in circles -#' x <- mask(x, "blobs_points", "blobs_circles") -#' x <- mask(x, "blobs_image", "blobs_labels") -#' tables(x) +#' # count points in shapes +#' y <- mask(x, "blobs_points", "blobs_circles") +#' (sce <- mask(i=point(x), j=shape(x, 1))) +#' identical(assay(table(y)), assay(sce)) +#' +#' # average image channels by labels +#' y <- mask(x, "blobs_image", "blobs_labels") +#' (sce <- mask(i=image(x), j=label(x))) +#' identical(assay(table(y)), assay(sce)) #' #' @export NULL @@ -30,54 +40,48 @@ NULL #' @rdname mask #' @importFrom SingleCellExperiment int_colData int_colData<- int_metadata<- #' @export -setMethod("mask", "SpatialData", \(x, i, j, ...) { +setMethod("mask", c("SpatialData", "ANY", "ANY"), \(x, i, j, how=NULL) { stopifnot(length(i) == 1, is.character(i), i %in% unlist(colnames(x))) stopifnot(length(j) == 1, is.character(j), j %in% unlist(colnames(x))) # get element types - ls <- vapply( - list(i, j), \(e) rownames(x)[vapply(colnames(x), - \(es) e %in% es, logical(1))], character(1)) - a <- element(x, ls[[1]], i) - b <- element(x, ls[[2]], j) - t <- .mask(a, b, ...) + f <- \(i) names(which(rapply(colnames(x), \(.) i %in% ., "character"))) + t <- mask(i=element(x, f(i), i), j=element(x, f(j), j), how=how) md <- list(region=j, region_key="region", instance_key="instance") int_metadata(t)$spatialdata_attrs <- md cd <- data.frame(region=j, instance=colnames(t)) int_colData(t) <- cbind(int_colData(t), cd) - nm <- paste0(i, "_masked_by_", j) + nm <- paste0(j, "_masking_", i) `table<-`(x, nm, value=t) }) -setGeneric(".mask", \(a, b, ...) standardGeneric(".mask")) - #' @importFrom methods as #' @importFrom Matrix rowSums sparseVector t #' @importFrom SingleCellExperiment SingleCellExperiment -#' @importFrom sf st_as_sf st_geometry_type st_sfc st_point st_distance -setMethod(".mask", c("PointFrame", "ShapeFrame"), \(a, b) { - n <- nrow(b <- st_as_sf(data(b))) - fk <- meta(a)$spatialdata_attrs$feature_key - switch(paste(st_geometry_type(b)[1]), - POINT={ - # realize one feature at a time - is <- split(seq_len(length(a)), a[[fk]]) - ns <- lapply(is, \(.) { - # make points 'sf'-compliant - xy <- as.data.frame(a[., c("x", "y")]) - ps <- st_sfc(lapply(asplit(xy, 1), st_point)) - # for each circle, count points within radius - z <- rowSums(st_distance(b, ps) < b$radius) - # sparsify counts - sv <- sparseVector(z[i <- z > 0], which(i), n) - sm <- as(sv, "sparseMatrix") - }) - # collect intro matrix w/ dim. features x circles - ns <- t(as(do.call(cbind, ns), "dgCMatrix")) - rownames(ns) <- names(is) - colnames(ns) <- seq(ncol(ns)) - }) +#' @importFrom sf st_as_sf st_geometry_type st_distance +setMethod("mask", c("missing", "PointFrame", "ShapeFrame"), \(x, i, j, how=NULL) { + if (!is.null(how)) warning("Can only count when masking points; ignoring 'how'") + n <- nrow(j <- st_as_sf(data(j))) + fun <- switch(as.character(st_geometry_type(j[1, ])), + POINT=\(i, j) rowSums(st_distance(j, i) <= j$radius), + \(i, j) vapply(st_intersects(j, i), length, integer(1))) + # realize one feature at i time + is <- split(seq_len(length(i)), i[[feature_key(i)]]) + ns <- lapply(is, \(.) { + # make points 'sf'-compliant + i <- as.data.frame(i[., c("x", "y")]) + i <- st_as_sf(i, coords=c("x", "y")) + # for each shape, count intersecting points + z <- fun(i, j) + # sparsify counts + sv <- sparseVector(z[i <- z > 0], which(i), n) + sm <- as(sv, "sparseMatrix") + }) + # collect into matrix w/ dim. features x shapes + ns <- t(do.call(cbind, ns)) + rownames(ns) <- names(is) + colnames(ns) <- seq(ncol(ns)) SingleCellExperiment(list(counts=ns)) }) @@ -85,20 +89,21 @@ setMethod(".mask", c("PointFrame", "ShapeFrame"), \(a, b) { #' @importFrom DelayedArray realize #' @importFrom S4Arrays as.array.Array #' @importFrom SingleCellExperiment SingleCellExperiment -setMethod(".mask", c("ImageArray", "LabelArray"), \(a, b, fun=mean) { - # TODO: somehow rewrite w/o realizing everything - # at once (maybe w/ 'DelayedArray::blockApply'?) - .a2v <- \(.) as.vector(as.array.Array(.)) - stopifnot(dim(a)[-1] == dim(b)) - w <- .a2v(data(b)); w[w == 0] <- NA - n <- length(i <- unique(w[!is.na(w)])) - ns <- vapply(seq_len(dim(a)[1]), \(.) { - v <- .a2v(data(a, 1)[., , ]) - tapply(v, w, sum, na.rm=TRUE) - }, numeric(n)) - ns <- t(as(ns, "dgCMatrix")) - dimnames(ns) <- list(seq(dim(a)[1]), i) - SingleCellExperiment(list(counts=ns)) +setMethod("mask", c("missing", "ImageArray", "LabelArray"), \(x, i, j, how=NULL) { + if (is.null(how)) how <- "mean" + stopifnot(dim(i)[-1] == dim(j)) + .j <- as(data(j), "sparseVector") + .j <- as.vector(.j[ok <- .j > 0]) + mx <- apply(data(i), 1, \(.i) { + .i <- as(.i, "sparseVector") + .i <- as.vector(.i[ok]) + tapply(.i, .j, how) + }) + colnames(mx) <- channels(i) + se <- SingleCellExperiment(list(t(mx))) + assayNames(se) <- how + return(se) }) -setMethod(".mask", c("ANY", "ANY"), \(a, b) - stop("'mask'ing between these element types not supported.")) + +setMethod("mask", c("missing", "ANY", "ANY"), \(x, i, j, how=NULL) + stop("'mask'ing between these element types not yet supported")) diff --git a/R/query.R b/R/query.R index fdf5fe54..a50cb834 100644 --- a/R/query.R +++ b/R/query.R @@ -82,6 +82,31 @@ NULL return(mx) } +#' @rdname query +#' @importFrom dplyr filter +#' @export +setMethod("query", "SpatialData", \(x, ..., i) { + # TODO: need more example data to properly implement this; + # for now, just a proof of concept using 'spatialdata_attrs' + if (missing(i)) i <- 1 + if (!length(tables(x))) + stop("There aren't any tables") + if (is.numeric(i)) { + i <- tableNames(x)[i] + } else if (is.character(i)) { + i <- match.arg(i, tableNames(x)) + } + t <- x$tables[[i]] + ns <- vapply(nm <- colnames(x), length, integer(1)) + nm <- data.frame(layer=rep.int(names(nm), ns), region=unlist(nm)) + nm <- filter(nm, ...) + i <- match(nm$layer, .LAYERS) + j <- split(nm$region, nm$layer) + x <- x[i, j] + x$tables$table <- t + return(x) +}) + #' @rdname query #' @export setMethod("query", "ImageArray", \(x, y) { diff --git a/man/mask.Rd b/man/mask.Rd index 2caa01cf..a64865cd 100644 --- a/man/mask.Rd +++ b/man/mask.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/mask.R \name{mask} \alias{mask} -\alias{mask,SpatialData-method} +\alias{mask,SpatialData,ANY,ANY-method} \title{Masking} \usage{ -\S4method{mask}{SpatialData}(x, i, j, ...) +\S4method{mask}{SpatialData,ANY,ANY}(x, i, j, how = NULL) } \arguments{ \item{x}{\code{\link{SpatialData}} object.} @@ -14,10 +14,15 @@ specifically, \code{i} will be masked by \code{j}, adding a \code{table} for \code{j} in \code{x}.} +\item{how}{character string; statistic to use for masking. +Defaults to "mean" when masking images, ignored when masking points.} + \item{...}{optional arguments passed to and from other methods.} } \value{ -\code{\link{SingleCellExperiment}} +Input \code{SpatialData} object \code{x} with an additional table named +\code{_masking_}; or a \code{SingleCellExperiment} object when +masking elements directly (i.e., without \code{x} as input). } \description{ ... @@ -28,9 +33,14 @@ x <- file.path("extdata", "blobs.zarr") x <- system.file(x, package="SpatialData") x <- readSpatialData(x, tables=FALSE) -# count points in circles -x <- mask(x, "blobs_points", "blobs_circles") -x <- mask(x, "blobs_image", "blobs_labels") -tables(x) +# count points in shapes +y <- mask(x, "blobs_points", "blobs_circles") +(sce <- mask(i=point(x), j=shape(x, 1))) +identical(assay(table(y)), assay(sce)) + +# average image channels by labels +y <- mask(x, "blobs_image", "blobs_labels") +(sce <- mask(i=image(x), j=label(x))) +identical(assay(table(y)), assay(sce)) } diff --git a/man/query.Rd b/man/query.Rd index f7455ecf..cb50c9b3 100644 --- a/man/query.Rd +++ b/man/query.Rd @@ -2,12 +2,15 @@ % Please edit documentation in R/query.R \name{query} \alias{query} +\alias{query,SpatialData-method} \alias{query,ImageArray-method} \alias{query,LabelArray-method} \alias{query,ShapeFrame-method} \alias{query,PointFrame-method} \title{spatial queries} \usage{ +\S4method{query}{SpatialData}(x, ..., i) + \S4method{query}{ImageArray}(x, y) \S4method{query}{LabelArray}(x, y) diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index 73637861..2c161788 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -1,7 +1,7 @@ require(sf, quietly=TRUE) x <- file.path("extdata", "blobs.zarr") x <- system.file(x, package="SpatialData") -x <- readSpatialData(x, tables=FALSE) +x <- readSpatialData(x, anndataR=TRUE) test_that("query,.check_box", { # valid From 2e70594c98fd64526506368067ed10dad8e11229 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 00:04:08 +0200 Subject: [PATCH 15/35] more docs --- R/mask.R | 6 +++++- man/mask.Rd | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/R/mask.R b/R/mask.R index 9285febe..f0b8a206 100644 --- a/R/mask.R +++ b/R/mask.R @@ -1,7 +1,11 @@ #' @name mask #' @title Masking #' -#' @description ... +#' @description +#' Masking operations serve to aggregate data across layers, e.g., +#' counting points in shapes, averaging image channels by labels, etc. +#' For added flexibility, these may be carried out directly between elements, +#' or using an input \code{SpatialData} object and specifying element names. #' #' @param x \code{\link{SpatialData}} object. #' @param i,j character string; names of elements to mask, diff --git a/man/mask.Rd b/man/mask.Rd index a64865cd..da716523 100644 --- a/man/mask.Rd +++ b/man/mask.Rd @@ -25,7 +25,10 @@ Input \code{SpatialData} object \code{x} with an additional table named masking elements directly (i.e., without \code{x} as input). } \description{ -... +Masking operations serve to aggregate data across layers, e.g., +counting points in shapes, averaging image channels by labels, etc. +For added flexibility, these may be carried out directly between elements, +or using an input \code{SpatialData} object and specifying element names. } \examples{ library(SingleCellExperiment) From ecca7df8d51b1dc59ef7bea53bd36f13c4cf4e97 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 09:42:15 +0200 Subject: [PATCH 16/35] fix doc typo --- R/query.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/query.R b/R/query.R index a50cb834..e3ea8716 100644 --- a/R/query.R +++ b/R/query.R @@ -48,7 +48,7 @@ #' fd <- st_coordinates(st_as_sf(data(t))) #' plot( #' asp=1, xlim=c(15, 60), ylim=c(15, 60), -#' rbind(pol, pol[1, ]), type="l", col="blue") +#' rbind(y, y[1, ]), type="l", col="blue") #' foo <- by(df, df[, "L2"], \(x) points(x, type="b", col="black")) #' foo <- by(fd, fd[, "L2"], \(x) points(x, type="b", col="red")) NULL From 4d9539deb67b16dbc1d338bef2d42daacba4e948 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 12:44:59 +0200 Subject: [PATCH 17/35] some progress on masking with tables --- NAMESPACE | 2 + R/mask.R | 100 ++++++++++++++++++++++++++++++--------------- man/SpatialData.Rd | 8 ++-- man/mask.Rd | 31 ++++++++++---- man/query.Rd | 2 +- 5 files changed, 97 insertions(+), 46 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c39d44ee..1cbd069d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -114,6 +114,7 @@ importFrom(SingleCellExperiment,int_colData) importFrom(SingleCellExperiment,int_metadata) importFrom(SummarizedExperiment,"colData<-") importFrom(SummarizedExperiment,assay) +importFrom(SummarizedExperiment,assayNames) importFrom(SummarizedExperiment,colData) importFrom(ZarrArray,ZarrArray) importFrom(ZarrArray,path) @@ -148,6 +149,7 @@ importFrom(methods,new) importFrom(methods,setClassUnion) importFrom(methods,setReplaceMethod) importFrom(reticulate,import) +importFrom(scuttle,aggregateAcrossCells) importFrom(sf,"st_geometry<-") importFrom(sf,st_as_sf) importFrom(sf,st_bbox) diff --git a/R/mask.R b/R/mask.R index f0b8a206..7682e5fe 100644 --- a/R/mask.R +++ b/R/mask.R @@ -12,7 +12,7 @@ #' specifically, \code{i} will be masked by \code{j}, #' adding a \code{table} for \code{j} in \code{x}. #' @param how character string; statistic to use for masking. -#' Defaults to "mean" when masking images, ignored when masking points. +#' @param name function use to generate the new \code{table}'s name. #' @param ... optional arguments passed to and from other methods. #' #' @return @@ -28,14 +28,21 @@ #' #' # count points in shapes #' y <- mask(x, "blobs_points", "blobs_circles") -#' (sce <- mask(i=point(x), j=shape(x, 1))) -#' identical(assay(table(y)), assay(sce)) +#' t <- y$tables$blobs_circles_masking_blobs_points +#' (sce <- .mask(i=point(x), j=shape(x, 1))) +#' identical(assay(t), assay(sce)) #' #' # average image channels by labels #' y <- mask(x, "blobs_image", "blobs_labels") -#' (sce <- mask(i=image(x), j=label(x))) -#' identical(assay(table(y)), assay(sce)) +#' t <- y$tables$blobs_labels_masking_blobs_image +#' (sce <- .mask(i=image(x), j=label(x))) +#' identical(assay(t), assay(sce)) #' +#' library(SpatialData.data) +#' x <- get_demo_SDdata("merfish") +#' x <- readSpatialData(x) +#' y <- mask(x, "cells", "anatomical") +#' tail(tables(y), 1) #' @export NULL @@ -44,27 +51,52 @@ NULL #' @rdname mask #' @importFrom SingleCellExperiment int_colData int_colData<- int_metadata<- #' @export -setMethod("mask", c("SpatialData", "ANY", "ANY"), \(x, i, j, how=NULL) { +setMethod("mask", c("SpatialData", "ANY", "ANY"), \(x, i, j, how=NULL, name=\(i, j) sprintf("%s_by_%s", i, j), ...) { + if (!is.null(how)) how <- match.arg(how, c("sum", "mean")) stopifnot(length(i) == 1, is.character(i), i %in% unlist(colnames(x))) stopifnot(length(j) == 1, is.character(j), j %in% unlist(colnames(x))) - # get element types + ok <- is.character(name) && length(name) == 1 && !name %in% tableNames(x) + nm <- if (is.function(name)) name(i, j) else if (ok) name else stop( + "Invalid 'name'; should be a function or a ", + "character string not yet in 'tableNames(x)'") + t <- tryCatch(error=\(.) NULL, getTable(x, i)) f <- \(i) names(which(rapply(colnames(x), \(.) i %in% ., "character"))) - t <- mask(i=element(x, f(i), i), j=element(x, f(j), j), how=how) - md <- list(region=j, - region_key="region", - instance_key="instance") - int_metadata(t)$spatialdata_attrs <- md - cd <- data.frame(region=j, instance=colnames(t)) - int_colData(t) <- cbind(int_colData(t), cd) - nm <- paste0(j, "_masking_", i) - `table<-`(x, nm, value=t) + se <- .mask(i=element(x, f(i), i), j=element(x, f(j), j), how=how, table=t) + md <- list(region=j, region_key="region", instance_key="instance") + int_metadata(se)$spatialdata_attrs <- md + cd <- data.frame(region=j, instance=colnames(se)) + int_colData(se) <- cbind(int_colData(se), cd) + `table<-`(x, nm, value=se) +}) + +setGeneric(".mask", \(i, j, ...) standardGeneric(".mask")) + +#' @importFrom methods as +#' @importFrom DelayedArray realize +#' @importFrom S4Arrays as.array.Array +#' @importFrom SummarizedExperiment assayNames +#' @importFrom SingleCellExperiment SingleCellExperiment +setMethod(".mask", c("ImageArray", "LabelArray"), \(i, j, how=NULL) { + if (is.null(how)) { how <- "mean"; message("Missing 'how'; defaulting to 'mean'") } + stopifnot(dim(i)[-1] == dim(j)) + .j <- as(data(j), "sparseVector") + .j <- as.vector(.j[ok <- .j > 0]) + mx <- apply(data(i), 1, \(.i) { + .i <- as(.i, "sparseVector") + .i <- as.vector(.i[ok]) + tapply(.i, .j, how) + }) + colnames(mx) <- channels(i) + se <- SingleCellExperiment(list(t(mx))) + assayNames(se) <- how + return(se) }) #' @importFrom methods as #' @importFrom Matrix rowSums sparseVector t #' @importFrom SingleCellExperiment SingleCellExperiment #' @importFrom sf st_as_sf st_geometry_type st_distance -setMethod("mask", c("missing", "PointFrame", "ShapeFrame"), \(x, i, j, how=NULL) { +setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL) { if (!is.null(how)) warning("Can only count when masking points; ignoring 'how'") n <- nrow(j <- st_as_sf(data(j))) fun <- switch(as.character(st_geometry_type(j[1, ])), @@ -89,25 +121,27 @@ setMethod("mask", c("missing", "PointFrame", "ShapeFrame"), \(x, i, j, how=NULL) SingleCellExperiment(list(counts=ns)) }) -#' @importFrom methods as -#' @importFrom DelayedArray realize -#' @importFrom S4Arrays as.array.Array +#' @importFrom scuttle aggregateAcrossCells +#' @importFrom SummarizedExperiment assayNames #' @importFrom SingleCellExperiment SingleCellExperiment -setMethod("mask", c("missing", "ImageArray", "LabelArray"), \(x, i, j, how=NULL) { - if (is.null(how)) how <- "mean" - stopifnot(dim(i)[-1] == dim(j)) - .j <- as(data(j), "sparseVector") - .j <- as.vector(.j[ok <- .j > 0]) - mx <- apply(data(i), 1, \(.i) { - .i <- as(.i, "sparseVector") - .i <- as.vector(.i[ok]) - tapply(.i, .j, how) - }) - colnames(mx) <- channels(i) - se <- SingleCellExperiment(list(t(mx))) +setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL, how=NULL) { + #how <- value <- NULL; i <- shape(x, "cells"); j <- shape(x, "anatomical") + if (is.null(how)) { how <- "sum"; message("Missing 'how'; defaulting to 'sum'") } + if (is.null(table)) stop("Missing 'table'; can't mask shapes without") + if (is.null(value)) value <- rownames(table) + idx <- st_intersects( + st_as_sf(data(j)), + st_as_sf(data(i))) + foo <- integer(nrow(i)) + foo[unlist(idx)] <- rep(seq_along(idx), lengths(idx)) + se <- aggregateAcrossCells(table, + ids=foo, statistics=how, use.assay.type=1, + store_number=paste0("n_", meta(table)$region)) + colnames(se) <- se[[1]]; se[[1]] <- NULL assayNames(se) <- how return(se) }) -setMethod("mask", c("missing", "ANY", "ANY"), \(x, i, j, how=NULL) +setMethod(".mask", c("ANY", "ANY"), \(x, i, j, ...) stop("'mask'ing between these element types not yet supported")) + diff --git a/man/SpatialData.Rd b/man/SpatialData.Rd index 2eb34b55..5c16f970 100644 --- a/man/SpatialData.Rd +++ b/man/SpatialData.Rd @@ -50,8 +50,8 @@ \alias{element,SpatialData,ANY,numeric-method} \alias{element,SpatialData,ANY,missing-method} \alias{element,SpatialData,ANY,ANY-method} -\alias{[[<-,SpatialData,numeric,ANY-method} -\alias{[[<-,SpatialData,character,ANY-method} +\alias{[[<-,SpatialData,numeric,ANY,ANY-method} +\alias{[[<-,SpatialData,character,ANY,ANY-method} \title{The `SpatialData` class} \usage{ SpatialData(images, labels, points, shapes, tables) @@ -88,9 +88,9 @@ SpatialData(images, labels, points, shapes, tables) \S4method{element}{SpatialData,ANY,ANY}(x, i, j) -\S4method{[[}{SpatialData,numeric,ANY}(x, i) <- value +\S4method{[[}{SpatialData,numeric,ANY,ANY}(x, i) <- value -\S4method{[[}{SpatialData,character,ANY}(x, i) <- value +\S4method{[[}{SpatialData,character,ANY,ANY}(x, i) <- value } \arguments{ \item{images}{list of \code{\link{ImageArray}}s} diff --git a/man/mask.Rd b/man/mask.Rd index da716523..d5488532 100644 --- a/man/mask.Rd +++ b/man/mask.Rd @@ -2,10 +2,17 @@ % Please edit documentation in R/mask.R \name{mask} \alias{mask} -\alias{mask,SpatialData,ANY,ANY-method} +\alias{mask,SpatialData-method} \title{Masking} \usage{ -\S4method{mask}{SpatialData,ANY,ANY}(x, i, j, how = NULL) +\S4method{mask}{SpatialData}( + x, + i, + j, + how = NULL, + name = function(i, j) sprintf("\%s_by_\%s", i, j), + ... +) } \arguments{ \item{x}{\code{\link{SpatialData}} object.} @@ -14,8 +21,9 @@ specifically, \code{i} will be masked by \code{j}, adding a \code{table} for \code{j} in \code{x}.} -\item{how}{character string; statistic to use for masking. -Defaults to "mean" when masking images, ignored when masking points.} +\item{how}{character string; statistic to use for masking.} + +\item{name}{function use to generate the new \code{table}'s name.} \item{...}{optional arguments passed to and from other methods.} } @@ -38,12 +46,19 @@ x <- readSpatialData(x, tables=FALSE) # count points in shapes y <- mask(x, "blobs_points", "blobs_circles") -(sce <- mask(i=point(x), j=shape(x, 1))) -identical(assay(table(y)), assay(sce)) +t <- y$tables$blobs_circles_masking_blobs_points +(sce <- .mask(i=point(x), j=shape(x, 1))) +identical(assay(t), assay(sce)) # average image channels by labels y <- mask(x, "blobs_image", "blobs_labels") -(sce <- mask(i=image(x), j=label(x))) -identical(assay(table(y)), assay(sce)) +t <- y$tables$blobs_labels_masking_blobs_image +(sce <- .mask(i=image(x), j=label(x))) +identical(assay(t), assay(sce)) +library(SpatialData.data) +x <- get_demo_SDdata("merfish") +x <- readSpatialData(x) +y <- mask(x, "cells", "anatomical") +tail(tables(y), 1) } diff --git a/man/query.Rd b/man/query.Rd index cb50c9b3..b3a383bd 100644 --- a/man/query.Rd +++ b/man/query.Rd @@ -70,7 +70,7 @@ df <- st_coordinates(st_as_sf(data(s))) fd <- st_coordinates(st_as_sf(data(t))) plot( asp=1, xlim=c(15, 60), ylim=c(15, 60), - rbind(pol, pol[1, ]), type="l", col="blue") + rbind(y, y[1, ]), type="l", col="blue") foo <- by(df, df[, "L2"], \(x) points(x, type="b", col="black")) foo <- by(fd, fd[, "L2"], \(x) points(x, type="b", col="red")) } From 2216dae31608808be039f2903fc5d791d3e08dda Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 13:30:09 +0200 Subject: [PATCH 18/35] masking tests --- R/mask.R | 29 ++++++++++++-------- tests/testthat/test-mask.R | 56 ++++++++++++++++++++++++++++++-------- 2 files changed, 61 insertions(+), 24 deletions(-) diff --git a/R/mask.R b/R/mask.R index 7682e5fe..09d408c9 100644 --- a/R/mask.R +++ b/R/mask.R @@ -46,26 +46,29 @@ #' @export NULL -# TODO: table from point + shape, image + label etc. etc. etc. +.check_ij <- \(x, .) stopifnot(length(.) == 1, is.character(.), . %in% unlist(colnames(x))) #' @rdname mask #' @importFrom SingleCellExperiment int_colData int_colData<- int_metadata<- #' @export -setMethod("mask", c("SpatialData", "ANY", "ANY"), \(x, i, j, how=NULL, name=\(i, j) sprintf("%s_by_%s", i, j), ...) { +setMethod("mask", c("SpatialData", "ANY", "ANY"), \(x, i, j, + how=NULL, name=\(i, j) sprintf("%s_by_%s", i, j), ...) { + .check_ij(x, i); .check_ij(x, j) if (!is.null(how)) how <- match.arg(how, c("sum", "mean")) - stopifnot(length(i) == 1, is.character(i), i %in% unlist(colnames(x))) - stopifnot(length(j) == 1, is.character(j), j %in% unlist(colnames(x))) ok <- is.character(name) && length(name) == 1 && !name %in% tableNames(x) nm <- if (is.function(name)) name(i, j) else if (ok) name else stop( "Invalid 'name'; should be a function or a ", "character string not yet in 'tableNames(x)'") t <- tryCatch(error=\(.) NULL, getTable(x, i)) f <- \(i) names(which(rapply(colnames(x), \(.) i %in% ., "character"))) - se <- .mask(i=element(x, f(i), i), j=element(x, f(j), j), how=how, table=t) + se <- .mask(i=element(x, f(i), i), j=element(x, f(j), j), how=how, table=t, ...) md <- list(region=j, region_key="region", instance_key="instance") int_metadata(se)$spatialdata_attrs <- md - cd <- data.frame(region=j, instance=colnames(se)) - int_colData(se) <- cbind(int_colData(se), cd) + assay(se) <- as(assay(se), "dgCMatrix") + cd <- int_colData(se) + cd$region <- j + cd$instance <- colnames(se) + int_colData(se) <- cd `table<-`(x, nm, value=se) }) @@ -76,7 +79,7 @@ setGeneric(".mask", \(i, j, ...) standardGeneric(".mask")) #' @importFrom S4Arrays as.array.Array #' @importFrom SummarizedExperiment assayNames #' @importFrom SingleCellExperiment SingleCellExperiment -setMethod(".mask", c("ImageArray", "LabelArray"), \(i, j, how=NULL) { +setMethod(".mask", c("ImageArray", "LabelArray"), \(i, j, how=NULL, ...) { if (is.null(how)) { how <- "mean"; message("Missing 'how'; defaulting to 'mean'") } stopifnot(dim(i)[-1] == dim(j)) .j <- as(data(j), "sparseVector") @@ -96,7 +99,7 @@ setMethod(".mask", c("ImageArray", "LabelArray"), \(i, j, how=NULL) { #' @importFrom Matrix rowSums sparseVector t #' @importFrom SingleCellExperiment SingleCellExperiment #' @importFrom sf st_as_sf st_geometry_type st_distance -setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL) { +setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL, ...) { if (!is.null(how)) warning("Can only count when masking points; ignoring 'how'") n <- nrow(j <- st_as_sf(data(j))) fun <- switch(as.character(st_geometry_type(j[1, ])), @@ -124,18 +127,20 @@ setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL) { #' @importFrom scuttle aggregateAcrossCells #' @importFrom SummarizedExperiment assayNames #' @importFrom SingleCellExperiment SingleCellExperiment -setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL, how=NULL) { +setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL, how=NULL, ...) { #how <- value <- NULL; i <- shape(x, "cells"); j <- shape(x, "anatomical") if (is.null(how)) { how <- "sum"; message("Missing 'how'; defaulting to 'sum'") } if (is.null(table)) stop("Missing 'table'; can't mask shapes without") - if (is.null(value)) value <- rownames(table) + ok <- is.null(value) || (is.character(value) && all(value %in% rownames(table))) + if (!ok) stop("Invalid 'value'; should be in 'rownames(table(x, i))'") idx <- st_intersects( st_as_sf(data(j)), st_as_sf(data(i))) foo <- integer(nrow(i)) foo[unlist(idx)] <- rep(seq_along(idx), lengths(idx)) se <- aggregateAcrossCells(table, - ids=foo, statistics=how, use.assay.type=1, + ids=foo, subset.row=value, + statistics=how, use.assay.type=1, store_number=paste0("n_", meta(table)$region)) colnames(se) <- se[[1]]; se[[1]] <- NULL assayNames(se) <- how diff --git a/tests/testthat/test-mask.R b/tests/testthat/test-mask.R index 9039bfe6..93c6cf3c 100644 --- a/tests/testthat/test-mask.R +++ b/tests/testthat/test-mask.R @@ -3,24 +3,56 @@ x <- file.path("extdata", "blobs.zarr") x <- system.file(x, package="SpatialData") x <- readSpatialData(x, anndataR=TRUE) -test_that("mask(),ImageArray,LabelArray", { +test_that("mask,ImageArray,LabelArray", { i <- "blobs_image" j <- "blobs_labels" - x <- SpatialData::mask(x, i, j, fun=sum) + # reproduce example data + y <- mask(x, i, j, how="sum") expect_equivalent( - assay(SpatialData::table(x, 1)), - assay(SpatialData::table(x, 2))) + assay(tables(y)[[2]]), + assay(tables(x)[[1]])) + # default to 'mean' with a message + expect_message(y <- mask(x, i, j)) + expect_silent(z <- mask(x, i, j, how="mean")) + expect_identical(y, z) }) -test_that("mask(),PointFrame,ShapeFrame", { +test_that("mask,PointFrame,ShapeFrame", { i <- "blobs_points" j <- "blobs_circles" - x <- SpatialData::mask(x, i, j) - t <- getTable(x, j) - md <- meta(point(x, i)) - md <- md$spatialdata_attrs - fk <- md$feature_key - nr <- length(unique(point(x, i)[[fk]])) + y <- mask(x, i, j) + t <- getTable(y, j) + fk <- feature_key(p <- point(x, i)) + np <- length(unique(p[[fk]])) nc <- nrow(shape(x, j)) - expect_equal(dim(t), c(nr, nc)) + expect_equal(dim(t), c(np, nc)) + # ignore 'how' with a warning + expect_warning(mask(x, i, j, how="sum")) +}) + +require(SpatialData.data, quietly=TRUE) +x <- get_demo_SDdata("merfish") +x <- readSpatialData(x) + +test_that("mask,ShapeFrame,ShapeFrame", { + i <- "cells" + j <- "anatomical" + # error without 'table' + y <- x; tables(y) <- list() + expect_error(mask(y, i, j)) + # default to 'sum' with a message + expect_message(y <- mask(x, i, j)) + expect_silent(z <- mask(x, i, j, how="sum")) + expect_identical(y, z) + old <- getTable(y, i) + new <- getTable(y, j) + expect_equal(dim(new), c(nrow(old), nrow(shape(x, j))+1)) + expect_equal(sum(assay(new)), sum(assay(old))) + expect_identical(rownames(new), rownames(old)) + expect_identical(meta(new)$region, j) + # 'value' should be a character + # vector of rownames in 'table(x, i)' + expect_error(mask(x, i, j, value="x")) + y <- mask(x, i, j, value=v <- sample(rownames(old), 5)) + expect_equal(sum(assay(new[v, ])), sum(assay(old[v, ]))) }) From 8c8e4ef5813099eaf3ba419e958e9f9bf21a8119 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 13:34:41 +0200 Subject: [PATCH 19/35] +Imports:scuttle --- DESCRIPTION | 1 + R/mask.R | 2 +- tests/testthat/test-mask.R | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 31e61414..cbaf5789 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,6 +47,7 @@ Imports: RBGL, reticulate, anndataR, + scuttle, sf, S4Arrays, S4Vectors, diff --git a/R/mask.R b/R/mask.R index 09d408c9..a89c0f8d 100644 --- a/R/mask.R +++ b/R/mask.R @@ -129,10 +129,10 @@ setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL, ...) { #' @importFrom SingleCellExperiment SingleCellExperiment setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL, how=NULL, ...) { #how <- value <- NULL; i <- shape(x, "cells"); j <- shape(x, "anatomical") - if (is.null(how)) { how <- "sum"; message("Missing 'how'; defaulting to 'sum'") } if (is.null(table)) stop("Missing 'table'; can't mask shapes without") ok <- is.null(value) || (is.character(value) && all(value %in% rownames(table))) if (!ok) stop("Invalid 'value'; should be in 'rownames(table(x, i))'") + if (is.null(how)) { how <- "sum"; message("Missing 'how'; defaulting to 'sum'") } idx <- st_intersects( st_as_sf(data(j)), st_as_sf(data(i))) diff --git a/tests/testthat/test-mask.R b/tests/testthat/test-mask.R index 93c6cf3c..da27a205 100644 --- a/tests/testthat/test-mask.R +++ b/tests/testthat/test-mask.R @@ -52,7 +52,8 @@ test_that("mask,ShapeFrame,ShapeFrame", { expect_identical(meta(new)$region, j) # 'value' should be a character # vector of rownames in 'table(x, i)' - expect_error(mask(x, i, j, value="x")) - y <- mask(x, i, j, value=v <- sample(rownames(old), 5)) + v <- sample(rownames(old), 5) + new <- getTable(mask(x, i, j, value=v), j) expect_equal(sum(assay(new[v, ])), sum(assay(old[v, ]))) + expect_error(mask(x, i, j, value=`[<-`(v, i=1, "x"))) }) From d9b2a20098baa3b3f3e6f89718baf433b656b87e Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 13:37:02 +0200 Subject: [PATCH 20/35] note on masking --- inst/NEWS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/inst/NEWS b/inst/NEWS index 59a27e8b..f1cf28bc 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -3,6 +3,9 @@ changes in version 0.99.27 - spatial queries (aka subsetting) by bounding box (all element types) or polygons (points and shapes), including unit tests, documentation, visual examples +- masking draft to aggregate information across layers + (image by label, point by shape, shape by shape; + the latter aggregates values in an associated table) changes in version 0.99.26 From 00fc7ee4d2ccb0eabb589764857640efad9817ca Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 13:43:29 +0200 Subject: [PATCH 21/35] bug fix doc --- R/mask.R | 4 ++-- man/mask.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/mask.R b/R/mask.R index a89c0f8d..bf173a57 100644 --- a/R/mask.R +++ b/R/mask.R @@ -28,13 +28,13 @@ #' #' # count points in shapes #' y <- mask(x, "blobs_points", "blobs_circles") -#' t <- y$tables$blobs_circles_masking_blobs_points +#' t <- tail(y$tables, 1)[[1]] #' (sce <- .mask(i=point(x), j=shape(x, 1))) #' identical(assay(t), assay(sce)) #' #' # average image channels by labels #' y <- mask(x, "blobs_image", "blobs_labels") -#' t <- y$tables$blobs_labels_masking_blobs_image +#' t <- tail(y$tables, 1)[[1]] #' (sce <- .mask(i=image(x), j=label(x))) #' identical(assay(t), assay(sce)) #' diff --git a/man/mask.Rd b/man/mask.Rd index d5488532..5bfe22e8 100644 --- a/man/mask.Rd +++ b/man/mask.Rd @@ -46,13 +46,13 @@ x <- readSpatialData(x, tables=FALSE) # count points in shapes y <- mask(x, "blobs_points", "blobs_circles") -t <- y$tables$blobs_circles_masking_blobs_points +t <- tail(y$tables, 1)[[1]] (sce <- .mask(i=point(x), j=shape(x, 1))) identical(assay(t), assay(sce)) # average image channels by labels y <- mask(x, "blobs_image", "blobs_labels") -t <- y$tables$blobs_labels_masking_blobs_image +t <- tail(y$tables, 1)[[1]] (sce <- .mask(i=image(x), j=label(x))) identical(assay(t), assay(sce)) From c04939f3a9b514214ffa2afbe43178cf41f4dafc Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 14:04:19 +0200 Subject: [PATCH 22/35] fix roxy imports --- NAMESPACE | 3 ++- R/mask.R | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 1cbd069d..7070fd1b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -112,9 +112,10 @@ importFrom(SingleCellExperiment,"int_metadata<-") importFrom(SingleCellExperiment,SingleCellExperiment) importFrom(SingleCellExperiment,int_colData) importFrom(SingleCellExperiment,int_metadata) +importFrom(SummarizedExperiment,"assay<-") +importFrom(SummarizedExperiment,"assayNames<-") importFrom(SummarizedExperiment,"colData<-") importFrom(SummarizedExperiment,assay) -importFrom(SummarizedExperiment,assayNames) importFrom(SummarizedExperiment,colData) importFrom(ZarrArray,ZarrArray) importFrom(ZarrArray,path) diff --git a/R/mask.R b/R/mask.R index bf173a57..589b0f79 100644 --- a/R/mask.R +++ b/R/mask.R @@ -49,6 +49,8 @@ NULL .check_ij <- \(x, .) stopifnot(length(.) == 1, is.character(.), . %in% unlist(colnames(x))) #' @rdname mask +#' @importFrom methods as +#' @importFrom SummarizedExperiment assay assay<- #' @importFrom SingleCellExperiment int_colData int_colData<- int_metadata<- #' @export setMethod("mask", c("SpatialData", "ANY", "ANY"), \(x, i, j, @@ -74,10 +76,11 @@ setMethod("mask", c("SpatialData", "ANY", "ANY"), \(x, i, j, setGeneric(".mask", \(i, j, ...) standardGeneric(".mask")) +#' @noRd #' @importFrom methods as #' @importFrom DelayedArray realize #' @importFrom S4Arrays as.array.Array -#' @importFrom SummarizedExperiment assayNames +#' @importFrom SummarizedExperiment assayNames<- #' @importFrom SingleCellExperiment SingleCellExperiment setMethod(".mask", c("ImageArray", "LabelArray"), \(i, j, how=NULL, ...) { if (is.null(how)) { how <- "mean"; message("Missing 'how'; defaulting to 'mean'") } @@ -95,6 +98,7 @@ setMethod(".mask", c("ImageArray", "LabelArray"), \(i, j, how=NULL, ...) { return(se) }) +#' @noRd #' @importFrom methods as #' @importFrom Matrix rowSums sparseVector t #' @importFrom SingleCellExperiment SingleCellExperiment @@ -124,8 +128,9 @@ setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL, ...) { SingleCellExperiment(list(counts=ns)) }) +#' @noRd #' @importFrom scuttle aggregateAcrossCells -#' @importFrom SummarizedExperiment assayNames +#' @importFrom SummarizedExperiment assayNames<- #' @importFrom SingleCellExperiment SingleCellExperiment setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL, how=NULL, ...) { #how <- value <- NULL; i <- shape(x, "cells"); j <- shape(x, "anatomical") @@ -147,6 +152,7 @@ setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL return(se) }) +#' @noRd setMethod(".mask", c("ANY", "ANY"), \(x, i, j, ...) stop("'mask'ing between these element types not yet supported")) From 4843dfdf12a226b377cfcaa4dfb6fd4b5f663510 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 14:18:55 +0200 Subject: [PATCH 23/35] fix R CMD check error/warnings --- R/coord_utils.R | 2 +- R/mask.R | 12 +++++------- R/query.R | 2 ++ R/sdArray.R | 2 ++ man/Array-methods.Rd | 2 ++ man/coord-utils.Rd | 1 + man/mask.Rd | 11 +++++------ man/query.Rd | 4 ++++ tests/testthat/test-mask.R | 9 +++++++++ 9 files changed, 31 insertions(+), 14 deletions(-) diff --git a/R/coord_utils.R b/R/coord_utils.R index 83451fc4..440ad77c 100644 --- a/R/coord_utils.R +++ b/R/coord_utils.R @@ -1,6 +1,6 @@ #' @name coord-utils #' @title Coordinate transformations -#' @aliases axes CTname CTtype CTdata CTpath CTgraph addCT rmvCT +#' @aliases axes CTlist CTname CTtype CTdata CTpath CTgraph addCT rmvCT #' #' @param x \code{SpatialData}, an element, or \code{Zattrs}. #' @param i for \code{CTpath}, source node label; else, string or diff --git a/R/mask.R b/R/mask.R index 589b0f79..4196a4b3 100644 --- a/R/mask.R +++ b/R/mask.R @@ -28,21 +28,20 @@ #' #' # count points in shapes #' y <- mask(x, "blobs_points", "blobs_circles") -#' t <- tail(y$tables, 1)[[1]] -#' (sce <- .mask(i=point(x), j=shape(x, 1))) -#' identical(assay(t), assay(sce)) +#' tail(tables(y), 1) #' #' # average image channels by labels #' y <- mask(x, "blobs_image", "blobs_labels") -#' t <- tail(y$tables, 1)[[1]] -#' (sce <- .mask(i=image(x), j=label(x))) -#' identical(assay(t), assay(sce)) +#' tail(tables(y), 1) #' #' library(SpatialData.data) #' x <- get_demo_SDdata("merfish") #' x <- readSpatialData(x) +#' +#' # sum assay data from table by shapes #' y <- mask(x, "cells", "anatomical") #' tail(tables(y), 1) +#' #' @export NULL @@ -133,7 +132,6 @@ setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL, ...) { #' @importFrom SummarizedExperiment assayNames<- #' @importFrom SingleCellExperiment SingleCellExperiment setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL, how=NULL, ...) { - #how <- value <- NULL; i <- shape(x, "cells"); j <- shape(x, "anatomical") if (is.null(table)) stop("Missing 'table'; can't mask shapes without") ok <- is.null(value) || (is.character(value) && all(value %in% rownames(table))) if (!ok) stop("Invalid 'value'; should be in 'rownames(table(x, i))'") diff --git a/R/query.R b/R/query.R index e3ea8716..2630d05d 100644 --- a/R/query.R +++ b/R/query.R @@ -12,6 +12,8 @@ #' @param y query specification; #' bounding box: length-4 numeric list with names 'xmin/xmax/ymin/ymax' #' (order is irrelevant); polygon: numeric matrix with ≥ 3 rows and 2 columns. +#' @param i for \code{SpatialData}, index or name of table to query. +#' @param ... optional arguments passed to and from other methods. #' #' @return same as input #' diff --git a/R/sdArray.R b/R/sdArray.R index 8e005e5f..c1893892 100644 --- a/R/sdArray.R +++ b/R/sdArray.R @@ -8,6 +8,8 @@ #' dim,LabelArray-method #' length,ImageArray-method #' length,LabelArray-method +#' data_type,ImageArray-method +#' data_type,LabelArray-method #' #' @param x \code{ImageArray} or \code{LabelArray} #' @param k scalar index specifying which scale to extract. diff --git a/man/Array-methods.Rd b/man/Array-methods.Rd index 1990122e..2be1a08a 100644 --- a/man/Array-methods.Rd +++ b/man/Array-methods.Rd @@ -8,6 +8,8 @@ \alias{dim,LabelArray-method} \alias{length,ImageArray-method} \alias{length,LabelArray-method} +\alias{data_type,ImageArray-method} +\alias{data_type,LabelArray-method} \alias{data,sdArray-method} \alias{dim,sdArray-method} \alias{length,sdArray-method} diff --git a/man/coord-utils.Rd b/man/coord-utils.Rd index b1d3efd7..aab39e96 100644 --- a/man/coord-utils.Rd +++ b/man/coord-utils.Rd @@ -3,6 +3,7 @@ \name{coord-utils} \alias{coord-utils} \alias{axes} +\alias{CTlist} \alias{CTname} \alias{CTtype} \alias{CTdata} diff --git a/man/mask.Rd b/man/mask.Rd index 5bfe22e8..73c55451 100644 --- a/man/mask.Rd +++ b/man/mask.Rd @@ -46,19 +46,18 @@ x <- readSpatialData(x, tables=FALSE) # count points in shapes y <- mask(x, "blobs_points", "blobs_circles") -t <- tail(y$tables, 1)[[1]] -(sce <- .mask(i=point(x), j=shape(x, 1))) -identical(assay(t), assay(sce)) +tail(tables(y), 1) # average image channels by labels y <- mask(x, "blobs_image", "blobs_labels") -t <- tail(y$tables, 1)[[1]] -(sce <- .mask(i=image(x), j=label(x))) -identical(assay(t), assay(sce)) +tail(tables(y), 1) library(SpatialData.data) x <- get_demo_SDdata("merfish") x <- readSpatialData(x) + +# sum assay data from table by shapes y <- mask(x, "cells", "anatomical") tail(tables(y), 1) + } diff --git a/man/query.Rd b/man/query.Rd index b3a383bd..83d23cf4 100644 --- a/man/query.Rd +++ b/man/query.Rd @@ -22,6 +22,10 @@ \arguments{ \item{x}{\code{SpatialData} element.} +\item{...}{optional arguments passed to and from other methods.} + +\item{i}{for \code{SpatialData}, index or name of table to query.} + \item{y}{query specification; bounding box: length-4 numeric list with names 'xmin/xmax/ymin/ymax' (order is irrelevant); polygon: numeric matrix with ≥ 3 rows and 2 columns.} diff --git a/tests/testthat/test-mask.R b/tests/testthat/test-mask.R index da27a205..1eebe0d0 100644 --- a/tests/testthat/test-mask.R +++ b/tests/testthat/test-mask.R @@ -3,6 +3,15 @@ x <- file.path("extdata", "blobs.zarr") x <- system.file(x, package="SpatialData") x <- readSpatialData(x, anndataR=TRUE) +test_that("mask,unsupported", { + nm <- list( + c(imageNames(x)[1], imageNames(x)[2]), # image,image + c(labelNames(x)[1], labelNames(x)[2]), # label,label + c(labelNames(x)[1], imageNames(x)[1]), # label,image + c(shapeNames(x)[1], pointNames(x)[1])) # shape,point + for (ij in nm) expect_error(mask(x, ij[1], ij[2])) +}) + test_that("mask,ImageArray,LabelArray", { i <- "blobs_image" j <- "blobs_labels" From 60c8c4611ade0ac3c923176a97600f19cb709400 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 14:22:51 +0200 Subject: [PATCH 24/35] make copi happier --- R/mask.R | 7 ++----- man/mask.Rd | 4 +--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/R/mask.R b/R/mask.R index 4196a4b3..5a93763d 100644 --- a/R/mask.R +++ b/R/mask.R @@ -15,10 +15,7 @@ #' @param name function use to generate the new \code{table}'s name. #' @param ... optional arguments passed to and from other methods. #' -#' @return -#' Input \code{SpatialData} object \code{x} with an additional table named -#' \code{_masking_}; or a \code{SingleCellExperiment} object when -#' masking elements directly (i.e., without \code{x} as input). +#' @return Input \code{SpatialData} object \code{x} with an additional table. #' #' @examples #' library(SingleCellExperiment) @@ -151,6 +148,6 @@ setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL }) #' @noRd -setMethod(".mask", c("ANY", "ANY"), \(x, i, j, ...) +setMethod(".mask", c("ANY", "ANY"), \(i, j, ...) stop("'mask'ing between these element types not yet supported")) diff --git a/man/mask.Rd b/man/mask.Rd index 73c55451..6e960757 100644 --- a/man/mask.Rd +++ b/man/mask.Rd @@ -28,9 +28,7 @@ adding a \code{table} for \code{j} in \code{x}.} \item{...}{optional arguments passed to and from other methods.} } \value{ -Input \code{SpatialData} object \code{x} with an additional table named -\code{_masking_}; or a \code{SingleCellExperiment} object when -masking elements directly (i.e., without \code{x} as input). +Input \code{SpatialData} object \code{x} with an additional table. } \description{ Masking operations serve to aggregate data across layers, e.g., From 0ad8f952e26cf7eafc386efb9a0e1d4b2fbe7ad8 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 14:27:09 +0200 Subject: [PATCH 25/35] added roxy imports --- NAMESPACE | 3 +-- R/mask.R | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7070fd1b..86d000b5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -93,14 +93,13 @@ importFrom(DelayedArray,ConstantArray) importFrom(DelayedArray,DelayedArray) importFrom(DelayedArray,cbind) importFrom(DelayedArray,rbind) -importFrom(DelayedArray,realize) importFrom(Matrix,rowSums) +importFrom(Matrix,sparseMatrix) importFrom(Matrix,sparseVector) importFrom(Matrix,t) importFrom(RBGL,sp.between) importFrom(Rarr,read_zarr_attributes) importFrom(Rarr,zarr_overview) -importFrom(S4Arrays,as.array.Array) importFrom(S4Vectors,"metadata<-") importFrom(S4Vectors,coolcat) importFrom(S4Vectors,isSequence) diff --git a/R/mask.R b/R/mask.R index 5a93763d..2f3038d0 100644 --- a/R/mask.R +++ b/R/mask.R @@ -74,8 +74,7 @@ setGeneric(".mask", \(i, j, ...) standardGeneric(".mask")) #' @noRd #' @importFrom methods as -#' @importFrom DelayedArray realize -#' @importFrom S4Arrays as.array.Array +#' @importFrom Matrix sparseVector #' @importFrom SummarizedExperiment assayNames<- #' @importFrom SingleCellExperiment SingleCellExperiment setMethod(".mask", c("ImageArray", "LabelArray"), \(i, j, how=NULL, ...) { @@ -96,7 +95,7 @@ setMethod(".mask", c("ImageArray", "LabelArray"), \(i, j, how=NULL, ...) { #' @noRd #' @importFrom methods as -#' @importFrom Matrix rowSums sparseVector t +#' @importFrom Matrix t rowSums sparseVector sparseMatrix #' @importFrom SingleCellExperiment SingleCellExperiment #' @importFrom sf st_as_sf st_geometry_type st_distance setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL, ...) { From 8505a1f7a8c43676a082d648d4017a4c97e0f95e Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 14:28:13 +0200 Subject: [PATCH 26/35] rephrase comment --- R/mask.R | 2 +- man/mask.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/mask.R b/R/mask.R index 2f3038d0..f0a9de7f 100644 --- a/R/mask.R +++ b/R/mask.R @@ -35,7 +35,7 @@ #' x <- get_demo_SDdata("merfish") #' x <- readSpatialData(x) #' -#' # sum assay data from table by shapes +#' # sum table counts by shapes #' y <- mask(x, "cells", "anatomical") #' tail(tables(y), 1) #' diff --git a/man/mask.Rd b/man/mask.Rd index 6e960757..9c248c12 100644 --- a/man/mask.Rd +++ b/man/mask.Rd @@ -54,7 +54,7 @@ library(SpatialData.data) x <- get_demo_SDdata("merfish") x <- readSpatialData(x) -# sum assay data from table by shapes +# sum table counts by shapes y <- mask(x, "cells", "anatomical") tail(tables(y), 1) From febdf4c738474e1c1e0f11527102ce0f642913ee Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 14:33:06 +0200 Subject: [PATCH 27/35] bug fix: check for row-wise duplicates to assure polygon vertices are unique --- R/AllGenerics.R | 4 ---- R/mask.R | 6 ++++-- R/query.R | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 5c9f6541..c92b0aa0 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -57,10 +57,6 @@ setGeneric("rotate", \(x, t, ...) standardGeneric("rotate")) setGeneric("transform", \(x, ...) standardGeneric("transform")) setGeneric("translation", \(x, t, ...) standardGeneric("translation")) -# sda ---- - -setGeneric("feature_key", \(x, ...) standardGeneric("feature_key")) - # uts ---- setGeneric("layer", \(x, i, ...) standardGeneric("layer")) diff --git a/R/mask.R b/R/mask.R index f0a9de7f..dc64dec6 100644 --- a/R/mask.R +++ b/R/mask.R @@ -57,9 +57,11 @@ setMethod("mask", c("SpatialData", "ANY", "ANY"), \(x, i, j, nm <- if (is.function(name)) name(i, j) else if (ok) name else stop( "Invalid 'name'; should be a function or a ", "character string not yet in 'tableNames(x)'") - t <- tryCatch(error=\(.) NULL, getTable(x, i)) f <- \(i) names(which(rapply(colnames(x), \(.) i %in% ., "character"))) - se <- .mask(i=element(x, f(i), i), j=element(x, f(j), j), how=how, table=t, ...) + .i <- element(x, f(i), i) + .j <- element(x, f(j), j) + t <- tryCatch(error=\(.) NULL, getTable(x, i)) + se <- .mask(.i, .j, how=how, table=t, ...) md <- list(region=j, region_key="region", instance_key="instance") int_metadata(se)$spatialdata_attrs <- md assay(se) <- as(assay(se), "dgCMatrix") diff --git a/R/query.R b/R/query.R index 2630d05d..4a84d1ed 100644 --- a/R/query.R +++ b/R/query.R @@ -79,8 +79,8 @@ NULL bot <- mx[nrow(mx), ] if (!all(top == bot)) mx <- rbind(mx, top) - dup <- any(duplicated(mx[-1, ])) - if (dup) stop("Invalid polygon query; found duplicated vertices") + dup <- duplicated(as.data.frame(mx[-1, , drop=FALSE])) + if (any(dup)) stop("Invalid polygon query; found duplicated vertices") return(mx) } From 73539d9ef2a5db3ce25fdc80239cbfc0f376c8b0 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Wed, 8 Apr 2026 14:34:23 +0200 Subject: [PATCH 28/35] fix method export --- NAMESPACE | 1 + R/AllGenerics.R | 4 ++++ R/Zattrs.R | 9 ++++++++- man/Zattrs.Rd | 3 +++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 86d000b5..3cc8f38f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,6 +52,7 @@ exportMethods(data) exportMethods(data_type) exportMethods(dim) exportMethods(element) +exportMethods(feature_key) exportMethods(getTable) exportMethods(hasTable) exportMethods(image) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index c92b0aa0..5c9f6541 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -57,6 +57,10 @@ setGeneric("rotate", \(x, t, ...) standardGeneric("rotate")) setGeneric("transform", \(x, ...) standardGeneric("transform")) setGeneric("translation", \(x, t, ...) standardGeneric("translation")) +# sda ---- + +setGeneric("feature_key", \(x, ...) standardGeneric("feature_key")) + # uts ---- setGeneric("layer", \(x, i, ...) standardGeneric("layer")) diff --git a/R/Zattrs.R b/R/Zattrs.R index dc42e53d..d21bec29 100644 --- a/R/Zattrs.R +++ b/R/Zattrs.R @@ -1,6 +1,8 @@ #' @name Zattrs #' @title The `Zattrs` class -#' +#' +#' @aliases feature_key +#' #' @param x list extracted from a OME-NGFF compliant .zattrs file. #' @param name character string for extraction (see ?base::`$`). #' @@ -16,6 +18,8 @@ #' CTname(z) #' CTtype(z) #' CTdata(z, "scale") +#' +#' feature_key(point(x)) #' #' @export Zattrs <- \(x=list()) { @@ -76,5 +80,8 @@ setMethod("$", "Zattrs", \(x, name) x[[name]]) } setMethod("show", "Zattrs", .showZattrs) +#' @export setMethod("feature_key", "Zattrs", \(x) x$spatialdata_attrs$feature_key) + +#' @export setMethod("feature_key", "SpatialDataElement", \(x) feature_key(meta(x))) \ No newline at end of file diff --git a/man/Zattrs.Rd b/man/Zattrs.Rd index 366844b1..06ddea16 100644 --- a/man/Zattrs.Rd +++ b/man/Zattrs.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/Zattrs.R \name{Zattrs} \alias{Zattrs} +\alias{feature_key} \alias{$,Zattrs-method} \title{The `Zattrs` class} \usage{ @@ -31,4 +32,6 @@ CTname(z) CTtype(z) CTdata(z, "scale") +feature_key(point(x)) + } From 5ba105587c365aa2e5a5177c63bf327edc488c1c Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Thu, 9 Apr 2026 09:37:39 +0200 Subject: [PATCH 29/35] assure bb is within bounds; add tests of query,labelArray; fractorize image/label query --- R/query.R | 46 +++++++++++++++++++++---------------- tests/testthat/test-query.R | 33 +++++++++++++++++++++++--- 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/R/query.R b/R/query.R index 4a84d1ed..135d139c 100644 --- a/R/query.R +++ b/R/query.R @@ -109,31 +109,37 @@ setMethod("query", "SpatialData", \(x, ..., i) { return(x) }) -#' @rdname query -#' @export -setMethod("query", "ImageArray", \(x, y) { - if (is.matrix(y)) stop("Polygon query not supported for images") +.query_sdArray <- \(x, y) { + if (is.matrix(y)) stop( + "Polygon query not supported for ", + "element of type 'image/labelArray'") .check_box(y) - d <- dim(x) - y$ymax <- min(y$ymax, d[2]) - y$xmax <- min(y$xmax, d[3]) - j <- seq(y$ymin, y$ymax) - k <- seq(y$xmin, y$xmax) - return(x[, j, k]) -}) - -#' @rdname query -#' @export -setMethod("query", "LabelArray", \(x, y) { - if (is.matrix(y)) stop("Polygon query not supported for labels") - .check_box(y) - d <- dim(x) + # protect image channels (i.e., + # only query spatial dimensions) + n <- length(d <- dim(x)) + if (n == 3) d <- d[-1] + # assure query is within bounds + y$xmin <- max(y$xmin, 0) + y$ymin <- max(y$ymin, 0) y$ymax <- min(y$ymax, d[1]) y$xmax <- min(y$xmax, d[2]) + # subset spatial dimensions i <- seq(y$ymin, y$ymax) j <- seq(y$xmin, y$xmax) - return(x[i, j]) -}) + if (n == 3) { + return(x[, i, j]) + } else { + return(x[i, j]) + } +} + +#' @rdname query +#' @export +setMethod("query", "ImageArray", \(x, y) .query_sdArray(x, y)) + +#' @rdname query +#' @export +setMethod("query", "LabelArray", \(x, y) .query_sdArray(x, y)) #' @rdname query #' @importFrom sf st_as_sf st_intersects st_polygon st_bbox st_crop diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index 2c161788..f6ac2633 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -38,7 +38,7 @@ test_that("query,.check_pol", { test_that("query,ImageArray", { d <- dim(i <- image(x)) - # neither crop nor shift + # query equals dimensions y <- list(xmin=0, xmax=d[3], ymin=0, ymax=d[2]) expect_identical(query(i, y), i) # order is irrelevant @@ -49,8 +49,35 @@ test_that("query,ImageArray", { expect_equal(dim(j <- query(i, y)), c(3, h, w)) expect_identical(CTlist(i), CTlist(j)) # crop and shift - y <- list(xmin=1, xmax=w <- d[3]/2, ymin=2, ymax=h <- d[2]/4) - expect_equal(dim(query(i, y)), c(3, 1+h-2, 1+w-1)) + y <- list( + xmin=dx <- 3, xmax=w <- d[3]/2, + ymin=dy <- 5, ymax=h <- d[2]/4) + expect_equal(dim(query(i, y)), c(3, 1+h-dy, 1+w-dx)) + # non-finite boundaries + y <- list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + expect_silent(query(i, y)) +}) + +test_that("query,LabelArray", { + d <- dim(l <- label(x)) + # query equals dimensions + y <- list(xmin=0, xmax=d[2], ymin=0, ymax=d[1]) + expect_identical(query(l, y), l) + # order is irrelevant + y <- list(ymax=d[1], xmax=d[2], xmin=0, ymin=0) + expect_identical(query(l, y), l) + # crop but don't shift + y <- list(xmin=0, xmax=w <- d[2]/2, ymin=0, ymax=h <- d[1]/4) + expect_equal(dim(m <- query(l, y)), c(h, w)) + expect_identical(CTlist(l), CTlist(m)) + # crop and shift + y <- list( + xmin=dx <- 3, xmax=w <- d[2]/2, + ymin=dy <- 5, ymax=h <- d[1]/4) + expect_equal(dim(query(l, y)), c(1+h-dy, 1+w-dx)) + # non-finite boundaries + y <- list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + expect_silent(query(i, y)) }) test_that("query-box,PointFrame", { From 1c147d706b8e526273208e68f737711601faaebf Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Thu, 9 Apr 2026 09:47:43 +0200 Subject: [PATCH 30/35] code rearrangement; fix typo --- R/query.R | 50 ++++++++++++++++++------------------- tests/testthat/test-query.R | 8 +++--- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/R/query.R b/R/query.R index 135d139c..f5fa6bd3 100644 --- a/R/query.R +++ b/R/query.R @@ -55,6 +55,31 @@ #' foo <- by(fd, fd[, "L2"], \(x) points(x, type="b", col="red")) NULL +#' @rdname query +#' @importFrom dplyr filter +#' @export +setMethod("query", "SpatialData", \(x, ..., i) { + # TODO: need more example data to properly implement this; + # for now, just a proof of concept using 'spatialdata_attrs' + if (missing(i)) i <- 1 + if (!length(tables(x))) + stop("There aren't any tables") + if (is.numeric(i)) { + i <- tableNames(x)[i] + } else if (is.character(i)) { + i <- match.arg(i, tableNames(x)) + } + t <- x$tables[[i]] + ns <- vapply(nm <- colnames(x), length, integer(1)) + nm <- data.frame(layer=rep.int(names(nm), ns), region=unlist(nm)) + nm <- filter(nm, ...) + i <- match(nm$layer, .LAYERS) + j <- split(nm$region, nm$layer) + x <- x[i, j] + x$tables$table <- t + return(x) +}) + .check_box <- \(bb) { xy <- c("xmin", "xmax", "ymin", "ymax") ok <- c(is.list(bb), @@ -84,31 +109,6 @@ NULL return(mx) } -#' @rdname query -#' @importFrom dplyr filter -#' @export -setMethod("query", "SpatialData", \(x, ..., i) { - # TODO: need more example data to properly implement this; - # for now, just a proof of concept using 'spatialdata_attrs' - if (missing(i)) i <- 1 - if (!length(tables(x))) - stop("There aren't any tables") - if (is.numeric(i)) { - i <- tableNames(x)[i] - } else if (is.character(i)) { - i <- match.arg(i, tableNames(x)) - } - t <- x$tables[[i]] - ns <- vapply(nm <- colnames(x), length, integer(1)) - nm <- data.frame(layer=rep.int(names(nm), ns), region=unlist(nm)) - nm <- filter(nm, ...) - i <- match(nm$layer, .LAYERS) - j <- split(nm$region, nm$layer) - x <- x[i, j] - x$tables$table <- t - return(x) -}) - .query_sdArray <- \(x, y) { if (is.matrix(y)) stop( "Polygon query not supported for ", diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index f6ac2633..7964b12a 100644 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -29,10 +29,10 @@ test_that("query,.check_pol", { for (. in q) expect_silent(.check_pol(.)) # invalid q <- list( - `[<-`(m, i=1, j=1, value=NA), # missing value - `[<-`(m, i=1, j=1, value=Inf), # not finite + matrix(seq_len(6), 2, 3), # wrong dim. matrix(numeric(6), 3, 2), # duplicates - matrix(seq_len(6), 2, 3)) # wrong dim. + `[<-`(m, i=1, j=1, value=Inf), # not finite + `[<-`(m, i=1, j=1, value=NA)) # missing value for (. in q) expect_error(.check_pol(.)) }) @@ -77,7 +77,7 @@ test_that("query,LabelArray", { expect_equal(dim(query(l, y)), c(1+h-dy, 1+w-dx)) # non-finite boundaries y <- list(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) - expect_silent(query(i, y)) + expect_silent(query(l, y)) }) test_that("query-box,PointFrame", { From 2c48a058d68fc7d00b6675c201f3f5c596bf9955 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Thu, 9 Apr 2026 11:10:06 +0200 Subject: [PATCH 31/35] scuttle -> scrapper --- DESCRIPTION | 2 +- NAMESPACE | 2 +- R/mask.R | 17 ++++++++--------- tests/testthat/test-mask.R | 4 ++-- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cbaf5789..a694c81c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,7 +47,7 @@ Imports: RBGL, reticulate, anndataR, - scuttle, + scrapper, sf, S4Arrays, S4Vectors, diff --git a/NAMESPACE b/NAMESPACE index 3cc8f38f..4441913d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -150,7 +150,7 @@ importFrom(methods,new) importFrom(methods,setClassUnion) importFrom(methods,setReplaceMethod) importFrom(reticulate,import) -importFrom(scuttle,aggregateAcrossCells) +importFrom(scrapper,aggregateAcrossCells.se) importFrom(sf,"st_geometry<-") importFrom(sf,st_as_sf) importFrom(sf,st_bbox) diff --git a/R/mask.R b/R/mask.R index dc64dec6..62881d10 100644 --- a/R/mask.R +++ b/R/mask.R @@ -126,29 +126,28 @@ setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL, ...) { }) #' @noRd -#' @importFrom scuttle aggregateAcrossCells +#' @importFrom methods as +#' @importFrom scrapper aggregateAcrossCells.se #' @importFrom SummarizedExperiment assayNames<- #' @importFrom SingleCellExperiment SingleCellExperiment setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL, how=NULL, ...) { if (is.null(table)) stop("Missing 'table'; can't mask shapes without") ok <- is.null(value) || (is.character(value) && all(value %in% rownames(table))) if (!ok) stop("Invalid 'value'; should be in 'rownames(table(x, i))'") - if (is.null(how)) { how <- "sum"; message("Missing 'how'; defaulting to 'sum'") } + if (!is.null(how)) { message("Can only 'sum' when masking shapes; ignoring 'sum'") } idx <- st_intersects( st_as_sf(data(j)), st_as_sf(data(i))) foo <- integer(nrow(i)) foo[unlist(idx)] <- rep(seq_along(idx), lengths(idx)) - se <- aggregateAcrossCells(table, - ids=foo, subset.row=value, - statistics=how, use.assay.type=1, - store_number=paste0("n_", meta(table)$region)) + se <- aggregateAcrossCells.se(table, foo, assay.type=1, + counts.name=paste0("n_", meta(table)$region)) colnames(se) <- se[[1]]; se[[1]] <- NULL - assayNames(se) <- how - return(se) + assayNames(se)[1] <- "counts" + metadata(se) <- list() + as(se, "SingleCellExperiment") }) #' @noRd setMethod(".mask", c("ANY", "ANY"), \(i, j, ...) stop("'mask'ing between these element types not yet supported")) - diff --git a/tests/testthat/test-mask.R b/tests/testthat/test-mask.R index 1eebe0d0..f69b34ec 100644 --- a/tests/testthat/test-mask.R +++ b/tests/testthat/test-mask.R @@ -50,8 +50,8 @@ test_that("mask,ShapeFrame,ShapeFrame", { y <- x; tables(y) <- list() expect_error(mask(y, i, j)) # default to 'sum' with a message - expect_message(y <- mask(x, i, j)) - expect_silent(z <- mask(x, i, j, how="sum")) + expect_silent(y <- mask(x, i, j)) + expect_message(z <- mask(x, i, j, how="sum")) expect_identical(y, z) old <- getTable(y, i) new <- getTable(y, j) From fa2b29e8b5f24608ddb9876ebaaadf422c33ccca Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Thu, 9 Apr 2026 11:52:41 +0200 Subject: [PATCH 32/35] omit deps by aggregating via mtx mul --- DESCRIPTION | 1 - NAMESPACE | 1 - R/mask.R | 45 ++++++++++++++++++++++++++++---------- tests/testthat/test-mask.R | 4 ++-- 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a694c81c..31e61414 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,7 +47,6 @@ Imports: RBGL, reticulate, anndataR, - scrapper, sf, S4Arrays, S4Vectors, diff --git a/NAMESPACE b/NAMESPACE index 4441913d..9fafee78 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -150,7 +150,6 @@ importFrom(methods,new) importFrom(methods,setClassUnion) importFrom(methods,setReplaceMethod) importFrom(reticulate,import) -importFrom(scrapper,aggregateAcrossCells.se) importFrom(sf,"st_geometry<-") importFrom(sf,st_as_sf) importFrom(sf,st_bbox) diff --git a/R/mask.R b/R/mask.R index 62881d10..2e59d630 100644 --- a/R/mask.R +++ b/R/mask.R @@ -52,7 +52,7 @@ NULL setMethod("mask", c("SpatialData", "ANY", "ANY"), \(x, i, j, how=NULL, name=\(i, j) sprintf("%s_by_%s", i, j), ...) { .check_ij(x, i); .check_ij(x, j) - if (!is.null(how)) how <- match.arg(how, c("sum", "mean")) + #if (!is.null(how)) how <- match.arg(how, c("sum", "mean")) ok <- is.character(name) && length(name) == 1 && !name %in% tableNames(x) nm <- if (is.function(name)) name(i, j) else if (ok) name else stop( "Invalid 'name'; should be a function or a ", @@ -127,25 +127,46 @@ setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL, ...) { #' @noRd #' @importFrom methods as -#' @importFrom scrapper aggregateAcrossCells.se -#' @importFrom SummarizedExperiment assayNames<- +#' @importFrom Matrix sparseMatrix +#' @importFrom SummarizedExperiment assay #' @importFrom SingleCellExperiment SingleCellExperiment setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL, how=NULL, ...) { + # validity if (is.null(table)) stop("Missing 'table'; can't mask shapes without") ok <- is.null(value) || (is.character(value) && all(value %in% rownames(table))) if (!ok) stop("Invalid 'value'; should be in 'rownames(table(x, i))'") - if (!is.null(how)) { message("Can only 'sum' when masking shapes; ignoring 'sum'") } + if (is.null(how)) { how <- "sum"; message("Missing 'how'; defaulting to 'sum'") } + if (is.character(how)) how <- match.arg(how, c("sum", "mean", "detected", "prop.detected")) + # grouping idx <- st_intersects( st_as_sf(data(j)), st_as_sf(data(i))) - foo <- integer(nrow(i)) - foo[unlist(idx)] <- rep(seq_along(idx), lengths(idx)) - se <- aggregateAcrossCells.se(table, foo, assay.type=1, - counts.name=paste0("n_", meta(table)$region)) - colnames(se) <- se[[1]]; se[[1]] <- NULL - assayNames(se)[1] <- "counts" - metadata(se) <- list() - as(se, "SingleCellExperiment") + ids <- integer(nrow(i)) + ids[unlist(idx)] <- rep(seq_along(idx), lengths(idx)) + ids <- factor(ids, seq(0, nrow(j))) + nid <- nlevels(ids) + # aggregation + mx <- assay(table) + if (grepl("detected$", how)) { + mx <- mx > 0 + } + my <- sparseMatrix( + x=rep(1, length(ids)), + i=seq_along(ids), j=ids, + dims=c(ncol(table), nid)) + mx <- mx %*% my + if (grepl("mean|prop", how)) { + ns <- tabulate(ids, nid) + mx <- t(t(mx)/ns) + } + # wrangling + mx <- as(mx, "dgCMatrix") + colnames(mx) <- levels(ids) + mx <- list(mx); names(mx) <- how + se <- SingleCellExperiment(mx) + nm <- paste0("n_", meta(table)$region) + se[[nm]] <- ns + return(se) }) #' @noRd diff --git a/tests/testthat/test-mask.R b/tests/testthat/test-mask.R index f69b34ec..1eebe0d0 100644 --- a/tests/testthat/test-mask.R +++ b/tests/testthat/test-mask.R @@ -50,8 +50,8 @@ test_that("mask,ShapeFrame,ShapeFrame", { y <- x; tables(y) <- list() expect_error(mask(y, i, j)) # default to 'sum' with a message - expect_silent(y <- mask(x, i, j)) - expect_message(z <- mask(x, i, j, how="sum")) + expect_message(y <- mask(x, i, j)) + expect_silent(z <- mask(x, i, j, how="sum")) expect_identical(y, z) old <- getTable(y, i) new <- getTable(y, j) From 6847822ded2585d481ca2c876df5c93064c49542 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Thu, 9 Apr 2026 12:29:51 +0200 Subject: [PATCH 33/35] bug fix; code cleaning --- R/mask.R | 29 +++++++++++------------------ man/SpatialData.Rd | 8 ++++---- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/R/mask.R b/R/mask.R index 2e59d630..96c73473 100644 --- a/R/mask.R +++ b/R/mask.R @@ -138,30 +138,23 @@ setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL if (is.null(how)) { how <- "sum"; message("Missing 'how'; defaulting to 'sum'") } if (is.character(how)) how <- match.arg(how, c("sum", "mean", "detected", "prop.detected")) # grouping - idx <- st_intersects( - st_as_sf(data(j)), - st_as_sf(data(i))) - ids <- integer(nrow(i)) - ids[unlist(idx)] <- rep(seq_along(idx), lengths(idx)) - ids <- factor(ids, seq(0, nrow(j))) - nid <- nlevels(ids) + js <- st_intersects(st_as_sf(data(j)), st_as_sf(data(i))) + is <- factor(integer(nrow(i)), seq(0, nrow(j))) + is[unlist(js)] <- rep(seq_along(js), lengths(js)) + is <- factor(is, seq(0, nrow(j))) + ns <- tabulate(is, ni <- nlevels(is)) # aggregation mx <- assay(table) - if (grepl("detected$", how)) { - mx <- mx > 0 - } + if (grepl("detected$", how)) mx <- mx > 0 my <- sparseMatrix( - x=rep(1, length(ids)), - i=seq_along(ids), j=ids, - dims=c(ncol(table), nid)) + x=rep(1, length(is)), + i=seq_along(is), j=is, + dims=c(ncol(table), ni)) mx <- mx %*% my - if (grepl("mean|prop", how)) { - ns <- tabulate(ids, nid) - mx <- t(t(mx)/ns) - } + if (grepl("mean|prop", how)) mx <- t(t(mx)/ns) # wrangling mx <- as(mx, "dgCMatrix") - colnames(mx) <- levels(ids) + colnames(mx) <- levels(is) mx <- list(mx); names(mx) <- how se <- SingleCellExperiment(mx) nm <- paste0("n_", meta(table)$region) diff --git a/man/SpatialData.Rd b/man/SpatialData.Rd index 5c16f970..2eb34b55 100644 --- a/man/SpatialData.Rd +++ b/man/SpatialData.Rd @@ -50,8 +50,8 @@ \alias{element,SpatialData,ANY,numeric-method} \alias{element,SpatialData,ANY,missing-method} \alias{element,SpatialData,ANY,ANY-method} -\alias{[[<-,SpatialData,numeric,ANY,ANY-method} -\alias{[[<-,SpatialData,character,ANY,ANY-method} +\alias{[[<-,SpatialData,numeric,ANY-method} +\alias{[[<-,SpatialData,character,ANY-method} \title{The `SpatialData` class} \usage{ SpatialData(images, labels, points, shapes, tables) @@ -88,9 +88,9 @@ SpatialData(images, labels, points, shapes, tables) \S4method{element}{SpatialData,ANY,ANY}(x, i, j) -\S4method{[[}{SpatialData,numeric,ANY,ANY}(x, i) <- value +\S4method{[[}{SpatialData,numeric,ANY}(x, i) <- value -\S4method{[[}{SpatialData,character,ANY,ANY}(x, i) <- value +\S4method{[[}{SpatialData,character,ANY}(x, i) <- value } \arguments{ \item{images}{list of \code{\link{ImageArray}}s} From 60e0ddb891eadfe86f2a3a355172f5fcabf7d256 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Thu, 9 Apr 2026 12:30:13 +0200 Subject: [PATCH 34/35] remove duplicated factoring --- R/mask.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/mask.R b/R/mask.R index 96c73473..a755989e 100644 --- a/R/mask.R +++ b/R/mask.R @@ -141,7 +141,6 @@ setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL js <- st_intersects(st_as_sf(data(j)), st_as_sf(data(i))) is <- factor(integer(nrow(i)), seq(0, nrow(j))) is[unlist(js)] <- rep(seq_along(js), lengths(js)) - is <- factor(is, seq(0, nrow(j))) ns <- tabulate(is, ni <- nlevels(is)) # aggregation mx <- assay(table) From 0fd9fe3aaaa7e454c643123f42dd812a1c3408c3 Mon Sep 17 00:00:00 2001 From: HelenaLC Date: Thu, 9 Apr 2026 12:39:07 +0200 Subject: [PATCH 35/35] expose 'assay' arg --- R/mask.R | 4 ++-- man/SpatialData.Rd | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/mask.R b/R/mask.R index a755989e..7a6658a4 100644 --- a/R/mask.R +++ b/R/mask.R @@ -130,7 +130,7 @@ setMethod(".mask", c("PointFrame", "ShapeFrame"), \(i, j, how=NULL, ...) { #' @importFrom Matrix sparseMatrix #' @importFrom SummarizedExperiment assay #' @importFrom SingleCellExperiment SingleCellExperiment -setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL, how=NULL, ...) { +setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, how=NULL, table=NULL, value=NULL, assay=1, ...) { # validity if (is.null(table)) stop("Missing 'table'; can't mask shapes without") ok <- is.null(value) || (is.character(value) && all(value %in% rownames(table))) @@ -143,7 +143,7 @@ setMethod(".mask", c("ShapeFrame", "ShapeFrame"), \(i, j, table=NULL, value=NULL is[unlist(js)] <- rep(seq_along(js), lengths(js)) ns <- tabulate(is, ni <- nlevels(is)) # aggregation - mx <- assay(table) + mx <- assay(table, assay) if (grepl("detected$", how)) mx <- mx > 0 my <- sparseMatrix( x=rep(1, length(is)), diff --git a/man/SpatialData.Rd b/man/SpatialData.Rd index 2eb34b55..5c16f970 100644 --- a/man/SpatialData.Rd +++ b/man/SpatialData.Rd @@ -50,8 +50,8 @@ \alias{element,SpatialData,ANY,numeric-method} \alias{element,SpatialData,ANY,missing-method} \alias{element,SpatialData,ANY,ANY-method} -\alias{[[<-,SpatialData,numeric,ANY-method} -\alias{[[<-,SpatialData,character,ANY-method} +\alias{[[<-,SpatialData,numeric,ANY,ANY-method} +\alias{[[<-,SpatialData,character,ANY,ANY-method} \title{The `SpatialData` class} \usage{ SpatialData(images, labels, points, shapes, tables) @@ -88,9 +88,9 @@ SpatialData(images, labels, points, shapes, tables) \S4method{element}{SpatialData,ANY,ANY}(x, i, j) -\S4method{[[}{SpatialData,numeric,ANY}(x, i) <- value +\S4method{[[}{SpatialData,numeric,ANY,ANY}(x, i) <- value -\S4method{[[}{SpatialData,character,ANY}(x, i) <- value +\S4method{[[}{SpatialData,character,ANY,ANY}(x, i) <- value } \arguments{ \item{images}{list of \code{\link{ImageArray}}s}