Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Imports:
graph,
Matrix,
methods,
Rarr,
Rarr (>= 2.1.9),
RBGL,
rlang,
sf,
Expand All @@ -87,7 +87,7 @@ biocViews:
SingleCell,
Spatial
Remotes:
Huber-group-EMBL/Rarr@b0c8174
Bioconductor/ZarrArray#7
License: Artistic-2.0
Encoding: UTF-8
VignetteBuilder: knitr
Expand Down
37 changes: 29 additions & 8 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,15 @@ NULL
# https://ngff.openmicroscopy.org/specifications/0.5/index.html#images
# The name of the array is arbitrary with the ordering defined by
# by the "multiscales" metadata, but is often a sequence starting at 0.
ds <- .validate_multiscales_paths(x, datasets(mdattr))
ds <- file.path(x, as.character(ds))
if (!any(startsWith(x, c("http://", "https://", "s3://")))) {
# Until we have a complete store interface (https://github.com/Huber-group-EMBL/Rarr/pull/176),
# only local objects can be fully validated.
Comment on lines +55 to +56
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will be handled by rome eventually anyways.

ds <- .validate_multiscales_paths(x, datasets(mdattr))
} else {
# For remote objects, we skip validation and assume that the datasets are in the expected location.
ds <- datasets(mdattr)
}
ds <- paste0(x, ds)
as <- lapply(ds, ZarrArray)
list(array=as, mdattr=mdattr)
}
Expand All @@ -77,7 +84,7 @@ readLabel <- function(x, ...) {
#' @importFrom dplyr sql
#' @export
readPoint <- function(x, ...) {
pq <- list.files(x, "\\.parquet$", full.names=TRUE)
pq <- paste0(x, file.path("points.parquet", "part.0.parquet"))
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: open an issue in Spatialdata python. Why do points have an extra nesting level?

  • shapes are shapes/layer_name/shapes.parquet
  • points are points/layer_name/points.parquet/part.0.parquet

Is this really intended? If so, if this desirable?

md <- read_zarr_attributes(x)
ax <- unlist(md$axes)
df <- ddbs_open_dataset(pq, conn=.conn()) |>
Expand All @@ -93,7 +100,8 @@ readPoint <- function(x, ...) {
#' @export
readShape <- function(x, ...) {
md <- read_zarr_attributes(x)
pq <- list.files(x, "\\.parquet$", full.names=TRUE)
# "shapes.parquet" currently hardcoded in SpatialData.io
pq <- paste0(x, "shapes.parquet")
df <- ddbs_open_dataset(pq, conn=.conn(), crs=NA_character_)
SpatialDataShape(data=df, meta=SpatialDataAttrs(md))
}
Expand Down Expand Up @@ -131,10 +139,23 @@ readSpatialData <- function(x,
args <- as.list(environment())[.LAYERS]
skip <- vapply(args, isFALSE, logical(1))

x <- Rarr:::.normalize_array_path(x)
store_meta <- Rarr:::.read_consolidated_metadata(x)$metadata
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Bioconductor, it seems acceptable to use internal functions (see, e.g., ZarrArray) but ideally, this will be eventually exported: Huber-group-EMBL/Rarr#116

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Spatialdata python uses zmetadata instead of .zmetadata. Is there a special reason for this? AFAIK, zarr python uses the hidden version by default.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See https://github.com/scverse/spatialdata/blob/7604a3d2325079293ff523c5dff4483dffc890cb/src/spatialdata/_core/spatialdata.py#L1442-L1445.

The best way to deal with this here is to address this in the same way as we deal with datasets that don't have consolidated metadata: by recreating it.


# We have to treat v2 and v3 separately in the next 3 lines but we unify them again as `store_groups`.
store_groups_v3 <- store_meta[vapply(store_meta, \(.) !is.null(.$node_type) && .$node_type == "group", logical(1))]
store_groups_v2 <- store_meta[endsWith(names(store_meta), ".zgroup")]
names(store_groups_v2) <- dirname(names(store_groups_v2))
store_groups <- names(c(store_groups_v3, store_groups_v2))

# helper for layer reading
.readLayer <- \(l) {
j <- list.dirs(file.path(x, l), recursive=FALSE, full.names=TRUE)
names(j) <- basename(j)
j <- store_groups[startsWith(store_groups, paste0(l, "/"))]
j <- setNames(
paste0(x, j, "/", recycle0 = TRUE),
basename(j)
)

opt <- args[[l]]
if (!isTRUE(opt)) {
if (is.numeric(opt) && opt > (. <- length(j)))
Expand All @@ -143,8 +164,8 @@ readSpatialData <- function(x,
stop("couldn't find ", l, " of name", .)
j <- j[opt]
}
f <- get(paste0("read", toupper(substr(l, 1, 1)), substr(l, 2, nchar(l)-1)))
lapply(j, \(.) do.call(f, list(.)))
reader <- get(paste0("read", toupper(substr(l, 1, 1)), substr(l, 2, nchar(l)-1)))
lapply(j, reader)
}

names(ls) <- ls <- .LAYERS[!skip]
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions tests/testthat/test-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ test_that("readElement()", {
for (l in names(typ)) {
f <- paste0(toupper(substr(l, 1, 1)), substr(l, 2, nchar(l)-1))
y <- list.files(file.path(x, l), full.names=TRUE)[1]
y <- paste0(y, "/", recycle0 = TRUE)
expect_is(get(paste0("read", f))(y), typ[l])
}
})
Expand Down
Loading