From e9e71bbb0a73056f4ead7d3a4ad7ed3c5e0ebf48 Mon Sep 17 00:00:00 2001 From: Maximilian Girlich Date: Wed, 18 Oct 2023 06:01:40 +0000 Subject: [PATCH 1/3] Remove `collapseBuffer()` regularly collapsing the buffer is quite memorey inefficient and slow --- R/utils.R | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/R/utils.R b/R/utils.R index 259c6b51..5edbcb99 100644 --- a/R/utils.R +++ b/R/utils.R @@ -15,10 +15,6 @@ # # Text is automatically converted to UTF-8 before being written. #' @param bufferSize The initial size of the buffer in which writes are stored. -#' The buffer will be periodically cleared, if possible, to cache the writes -#' as a string. If the buffer cannot be cleared (because of the need to be -#' able to backtrack to fulfill an `eatWS()` call), then the buffer size will -#' be doubled. #' @noRd WSTextWriter <- function(bufferSize=1024) { if (bufferSize < 3) { @@ -37,31 +33,6 @@ WSTextWriter <- function(bufferSize=1024) { # TRUE if we're eating whitespace right now, in which case calls to writeWS are no-ops. suppressing <- FALSE - # Collapses the text in the buffer to create space for more writes. The first - # element in the buffer will be the concatenation of any writes up to the - # current marker. The second element in the buffer will be the concatenation - # of all writes after the marker. - collapseBuffer <- function() { - # Collapse the writes in the buffer up to the marked position into the first buffer entry - nonWS <- "" - if (marked > 0) { - nonWS <- paste(buffer[seq_len(marked)], collapse="") - } - - # Collapse any remaining whitespace - ws <- "" - remaining <- position - marked - if (remaining > 0) { - # We have some whitespace to collapse. Collapse it into the second buffer entry. - ws <- paste(buffer[seq(from=marked+1,to=marked+remaining)], collapse="") - } - - buffer[1] <<- nonWS - buffer[2] <<- ws - position <<- 2 - marked <<- 1 - } - # Logic to do the actual write writeImpl <- function(text) { # force `text` to evaluate and check that it's the right shape @@ -72,11 +43,6 @@ WSTextWriter <- function(bufferSize=1024) { stop("Text to be written must be a length-one character vector") } - # Are we at the end of our buffer? - if (position == length(buffer)) { - collapseBuffer() - } - # The text that is written to this writer will be converted to # UTF-8 using enc2utf8. The rendered output will always be UTF-8 # encoded. From 50435eb2e8b87b1edbad0dce36eac74695613d23 Mon Sep 17 00:00:00 2001 From: Maximilian Girlich Date: Wed, 18 Oct 2023 06:19:54 +0000 Subject: [PATCH 2/3] Allow writing characters with multiple elements --- R/utils.R | 12 ++++++------ tests/testthat/test-textwriter.r | 4 +++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/R/utils.R b/R/utils.R index 5edbcb99..354685cb 100644 --- a/R/utils.R +++ b/R/utils.R @@ -36,10 +36,7 @@ WSTextWriter <- function(bufferSize=1024) { # Logic to do the actual write writeImpl <- function(text) { # force `text` to evaluate and check that it's the right shape - # TODO: We could support vectors with multiple elements here and perhaps - # find some way to combine with `paste8()`. See - # https://github.com/rstudio/htmltools/pull/132#discussion_r302280588 - if (length(text) != 1 || !is.character(text)) { + if (!is.character(text)) { stop("Text to be written must be a length-one character vector") } @@ -49,8 +46,11 @@ WSTextWriter <- function(bufferSize=1024) { enc <- enc2utf8(text) # Move the position pointer and store the (encoded) write - position <<- position + 1 - buffer[position] <<- enc + n <- length(text) + # TODO is it faster if we special case for n = 1? + new_position <- position + n + buffer[(position + 1):new_position] <<- enc + position <<- new_position } # The actual object returned diff --git a/tests/testthat/test-textwriter.r b/tests/testthat/test-textwriter.r index 8cf83f7f..da354ff9 100644 --- a/tests/testthat/test-textwriter.r +++ b/tests/testthat/test-textwriter.r @@ -18,9 +18,11 @@ describe("WSTextWriter", { wsw$write("more content") expect_identical(wsw$readAll(), "line one\nanother linemore content") + wsw$write(c("\n", "write", "three", "elements")) + expect_identical(wsw$readAll(), "line one\nanother linemore content\nwritethreeelements") + # Non-character writes expect_error(wsw$write(1)) - expect_error(wsw$write(letters[1:2])) expect_error(WSTextWriter(bufferSize=2)) }) From feffc686c1b80d214cc633ba88fbbfeebdf4fb1d Mon Sep 17 00:00:00 2001 From: Maximilian Girlich Date: Wed, 18 Oct 2023 06:50:22 +0000 Subject: [PATCH 3/3] Write multiple elements instead of pasting them --- R/tags.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/tags.R b/R/tags.R index e35691bb..4463e5a6 100644 --- a/R/tags.R +++ b/R/tags.R @@ -856,7 +856,7 @@ tagWrite <- function(tag, textWriter, indent=0, eol = "\n") { } # write tag name - textWriter$write(concat8("<", tag$name)) + textWriter$write(c("<", tag$name)) # Convert all attribs to chars explicitly; prevents us from messing up factors attribs <- flattenTagAttribs(lapply(tag$attribs, as.character)) @@ -880,10 +880,10 @@ tagWrite <- function(tag, textWriter, indent=0, eol = "\n") { attribValue <- tolower(attribValue) } text <- htmlEscape(attribValue, attribute=TRUE) - textWriter$write(concat8(" ", attrib,"=\"", text, "\"")) + textWriter$write(c(" ", attrib,"=\"", text, "\"")) } else { - textWriter$write(concat8(" ", attrib)) + textWriter$write(c(" ", attrib)) } } @@ -894,7 +894,7 @@ tagWrite <- function(tag, textWriter, indent=0, eol = "\n") { # special case for a single child text node (skip newlines and indentation) if ((length(children) == 1) && is.character(children[[1]]) ) { - textWriter$write(concat8(normalizeText(children[[1]]), "")) + textWriter$write(c(normalizeText(children[[1]]), "")) } else { if ("after-begin" %in% .noWS || "inside" %in% .noWS) { @@ -907,7 +907,7 @@ tagWrite <- function(tag, textWriter, indent=0, eol = "\n") { if ("before-end" %in% .noWS || "inside" %in% .noWS) { textWriter$eatWS() } - textWriter$write(concat8("")) + textWriter$write(c("")) } } else { @@ -919,7 +919,7 @@ tagWrite <- function(tag, textWriter, indent=0, eol = "\n") { textWriter$write("/>") } else { - textWriter$write(concat8(">")) + textWriter$write(c(">")) } } if ("after" %in% .noWS || "outside" %in% .noWS) {